Bash Backup script: Run if CPU usage is below threshold!


#!/bin/bash

PREV_TOTAL=0
PREV_IDLE=0

###
#added by jj
###

#If less then cpu threshold than will
#perform backup, else loop until true.
CPU_THRESHOLD=5

#current date
DATE=$(/bin/date +%d-%m-%y)

####

while true; do
  CPU=(`cat /proc/stat | grep '^cpu '`) # Get the total CPU statistics.
  unset CPU[0]                          # Discard the "cpu" prefix.
  IDLE=${CPU[4]}                        # Get the idle CPU time.

  # Calculate the total CPU time.
  TOTAL=0
  for VALUE in "${CPU[@]}"; do
    let "TOTAL=$TOTAL+$VALUE"
  done

  # Calculate the CPU usage since we last checked.
  let "DIFF_IDLE=$IDLE-$PREV_IDLE"
  let "DIFF_TOTAL=$TOTAL-$PREV_TOTAL"
  let "DIFF_USAGE=(1000*($DIFF_TOTAL-$DIFF_IDLE)/$DIFF_TOTAL+5)/10"
  #echo -en "rCPU: $DIFF_USAGE%  bb"

  # Remember the total and idle CPU times for the next check.
  PREV_TOTAL="$TOTAL"
  PREV_IDLE="$IDLE"

  # Wait before checking again.
  sleep 1

  #added by jj
  if [ "$DIFF_USAGE" -lt "$CPU_THRESHOLD" ]
        then
                echo "CPU usage below $CPU_THRESHOLD%. Backing up now.."
                sleep 2

                #Perform backup.
                tar -czvf /vol03/backups/stone-archive-$DATE.tar.gz /vol03 /homes /etc

                #quit script.
                echo "Backup is complete."

                #delete old archives if approximately a month old
                REMOVE=$(find /vol03/backups/ -name *.tar.gz -atime +28 -exec rm {} 😉

                exit
  fi
done

Nagios failover setup

Once you have nagios configured you can setup a failover setup where if the nagios master is offline then a standby slave will enable notifications and checking. Here are the setup notes and as custome script i wrote in pythong to achieve this.

the following command checks if nagios is running locally

[root@scrappy nagios]# /usr/local/nagios/libexec/check_nagios -F /usr/local/nagios/var/status.dat -e 1 -C ‘/usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg’

NAGIOS OK: 1 process, status log updated 5 seconds ago

now, set in nrpe config as following on remote slave/master to verify is running. must restart nrpe on machine installed on to reread nrpe.cfg

command[check_nagios_failover]=/usr/local/nagios/libexec/check_nagios -F /usr/local/nagios/var/status.dat -e 1 -C ‘/usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg’

test nrpe remote command.

[root@scrappy nagios]# /usr/local/nagios/libexec/check_nrpe -H james -c check_nagios_failover

NAGIOS OK: 15 processes, status log updated 0 seconds ago

modify nagios.cfg on nagios slave. restart nagios.

execute_service_checks=0

enable_notifications=0

check_external_commands=1

now set crontab on slave to check for master failure.

[root@scrappy etc]# crontab -l

* * * * * nagios /usr/local/nagios/set_slave_status.py > /dev/null

Create script and modify master ip.

#!/usr/bin/python
import os
import commands
import sys

master='james'
slave='scrappy'

tmp_file='/tmp/nagios-failover-state.txt'
commandfile='/usr/local/nagios/var/rw/nagios.cmd'
now=commands.getoutput('date +%s')

<a href="mailto:email='email@domain.com'">email='</a><a href="mailto:email@domain.com">email@domain.com</a>'
out=commands.getoutput('/usr/local/nagios/libexec/check_nrpe -H '+master+' -c check_nagios_failover')

#if not 0, then master is down.
master_return_val=out.find('OK')
#store state information
def failover_save_state(x):
fh=open(tmp_file,'w')
fh.write(x)
fh.close()

def sync_nagios_files():
     os.system('rsync -av '+master+':/usr/local/nagios/etc/objects/* /usr/local/nagios/etc/objects/')
     os.system('/etc/init.d/nagios reload')
     print "Master conf files synced with Slave."

     #read current state information
     fh=open(tmp_file,'r')
     current_state=fh.readlines()
     fh.close()

     current_state=current_state[0]
     #enable or disable checks for hosts
     #print current_state

if master_return_val &lt;= 0 and current_state is not 'enabled':
     if current_state=='enabled':
          print "Nagios slave ("+slave+") is active. Nagios master ("+master+") is down. No state change"

     else:
          os.system('/usr/bin/printf "[%lu] ENABLE_NOTIFICATIONSn" '+now+' &gt; '+commandfile)
          os.system('/usr/bin/printf "[%lu] START_EXECUTING_HOST_CHECKSn" '+now+' &gt; '+commandfile)
          os.system('/usr/bin/printf "[%lu] START_EXECUTING_SVC_CHECKSn" '+now+' &gt; '+commandfile)
          os.system('echo elvis has left the building | mail -s "[Nagios] Master Down! Slave Enabled." '+email)
          print "Nagios Failover enabled"
          failover_save_state('enabled')
          sync_nagios_files()

     elif current_state is not 'disabled':
          if current_state=='disabled':
               print "Nagios master ("+master+") is enabled. No state change."
          else:
               os.system('/usr/bin/printf "[%lu] DISABLE_NOTIFICATIONSn" '+now+' &gt; '+commandfile)
               os.system('/usr/bin/printf "[%lu] STOP_EXECUTING_HOST_CHECKSn" '+now+' &gt; '+commandfile)
               os.system('/usr/bin/printf "[%lu] STOP_EXECUTING_SVC_CHECKSn" '+now+' &gt; '+commandfile)
               os.system('echo We are all out of donuts. | mail -s "[Nagios] Master Restored. Slave Disabled." '+email)
               print "Nagios master restored"
               failover_save_state('disabled')

sync_nagios_files()

make executable

[root@scrappy nagios]# chown nagios.nagios /usr/local/nagios/etc/set_slave_status.py
[root@scrappy etc]# chmod o=rwx set_slave_status.py