Your IP : 18.224.54.118
#!/usr/bin/bash
## This check measures age of nc_cp_backup.sh process and rises alarm if it
## takes longer than expected
# Forked from nagios_hosting_check_backup-splited (check_cpbackup_process.sh) by Vladimir Kuprikov
usage() {
cat << EOF
This script measures age of pkgacct and rsync processes which have nc_cp_backup.sh as parent
and rises alarm if it takes longer than expected
Usage: $(basename "${0}") -c CTIME[D|H|M] -w WTIME[D|H|M]
-h Print this help
-c CRITICAL status if backups are older than CTIME (default CTIME = 5 days)\
(default assumes Days)
-w WARNING status if backups are older than WTIME (default WTIME = 3 days)\
(default assumes Days)
EOF
}
# Set variables
PARAMS="${@}"
NCCPBACKUP="/root/bin/nc_cp_backup.sh"
#INTERPRETER="(/bin/){0,1}bash"
PKGACCT="pkgacct"
RSYNC="rsync"
CPBACKUP_PAUSED="/backup/CPBACKUP_PAUSED"
declare -A UNITS=([days]=86400 [hours]=3600 [minutes]=60)
declare -A CPROC
declare -A WPROC
# Set defaults
#myIFS="${IFS}"
WTIME=259200 # 3 days
CTIME=432000 # 5 days
UNIT="days"
WUNIT="${UNIT}"
CUNIT="${UNIT}"
OK=0
WARN=1
CRIT=2
UNKN=3
RC="${UNKN}"
ident_time () {
unset IDENT NUM UNIT TIME
IDENT="${1: -1}"
NUM="${1//,/.}"
case "${IDENT}" in
[Dd])
UNIT="days"
NUM="${NUM:0:$(( ${#NUM} - 1 ))}"
;;
[Hh])
UNIT="hours"
NUM="${NUM:0:$(( ${#NUM} - 1 ))}"
;;
[Mm])
UNIT="minutes"
NUM="${NUM:0:$(( ${#NUM} - 1 ))}"
;;
[0-9])
TIME="$(bc -l <<<"scale=0; ${NUM}*86400")"
UNIT="days"
return
;;
*)
echo "Can not recognize time ${1}. Please use identifier [D|H|M]"
exit "${UNKN}"
;;
esac
if [[ "${NUM}" =~ ^[0-9]+$ ]] ; then
: # All is OK, keep running
else
echo "Can not recognize time ${1}."
exit "${UNKN}"
fi
TIME="$(bc -l <<<"scale=0; ${NUM:0:$(( ${#1} - 1 ))}*${UNITS[${UNIT}]}")" # Show time in seconds
}
while getopts ":hw:c:" OPTS ${PARAMS}; do
case "${OPTS}" in
h)
usage
exit "${WARN}"
;;
w)
ident_time "${OPTARG}"
WTIME="${TIME}"
WUNIT="${UNIT}"
;;
c)
ident_time "${OPTARG}"
CTIME="${TIME}"
CUNIT="${UNIT}"
;;
:)
echo "Option -${OPTARG} requires an argument" >&2
usage
exit "${UNKN}"
;;
?)
usage
exit "${UNKN}"
;;
esac
done
#CPBACKUPPROC="$( ps ax -o lstart -o "|%P|%p|" -o command | awk -v NCCPBACKUP="${NCCPBACKUP}" '
# (match($0,NCCPBACKUP)&& ! match($0,"\\|awk")) {print $0}')"
CPBACKUPPROC="$( ps ax -o lstart -o "|%P|%p|" -o command | awk -v pkgacct="${PKGACCT}" -v rsync="${RSYNC}" '
((match($0,pkgacct) || match($0,rsync)) && ! match($0,"\\|awk")) {print $0}')"
if [[ -n "${CPBACKUPPROC}" ]] ; then
IFS='|'
flagSubProcRunning=0
while read mySDATE myP2ID myPID myCOMMAND ; do
# we are expecting the following tree of nc_cp_backup.sh processes (refering to the task https://track.namecheap.net/browse/TO-6668):
#root 738309 3.6 0.0 108256 1700 pts/1 S+ 05:15 0:00 | \_ bash /root/bin/nc_cp_backup.sh -a
#root 738353 0.0 0.0 108256 832 pts/1 S+ 05:15 0:00 | \_ bash /root/bin/nc_cp_backup.sh -a
#root 738364 0.0 0.0 108256 676 pts/1 S+ 05:15 0:00 | | \_ bash /root/bin/nc_cp_backup.sh -a
#root 738367 0.2 0.0 220000 25812 pts/1 D+ 05:15 0:00 | | \_ pkgacct - husqolkv - av: 4
#root 738369 0.0 0.0 103004 700 pts/1 S+ 05:15 0:00 | | \_ tail -1
#root 738360 0.0 0.0 108256 832 pts/1 S+ 05:15 0:00 | \_ bash /root/bin/nc_cp_backup.sh -a
#root 738372 0.0 0.0 108256 676 pts/1 S+ 05:15 0:00 | | \_ bash /root/bin/nc_cp_backup.sh -a
#root 738376 0.2 0.0 220012 25808 pts/1 D+ 05:15 0:00 | | \_ pkgacct - axiaatmy - av: 4
#root 738378 0.0 0.0 103004 700 pts/1 S+ 05:15 0:00 | | \_ tail -1
#root 738370 0.0 0.0 108256 832 pts/1 S+ 05:15 0:00 | \_ bash /root/bin/nc_cp_backup.sh -a
#root 738381 0.0 0.0 108256 676 pts/1 S+ 05:15 0:00 | | \_ bash /root/bin/nc_cp_backup.sh -a
#root 738382 0.2 0.0 220000 25812 pts/1 D+ 05:15 0:00 | | \_ pkgacct - delejqqw - av: 4
#root 738383 0.0 0.0 103004 696 pts/1 S+ 05:15 0:00 | | \_ tail -1
#root 738684 0.0 0.0 102968 616 pts/1 S+ 05:16 0:00 | \_ sleep 1
#
#so here is 3 level parent-child relations. And we want to monitor the second one (PIDs 738353, 738360, 738370 in example)
myPID="${myPID// /}" # removing spaces
myP2ID="${myP2ID// /}" # removing spaces
if [ -d "/proc/${myPID}" ] ; then
# EXE="$(readlink -e "/proc/${myPID}/exe")"
CMDLINE="$(< /proc/${myPID}/cmdline)"
P2CMDLINE="$(< /proc/${myP2ID}/cmdline)" # get cmdline of our parental process
myP3ID="$(awk '{print $4}' /proc/${myP2ID}/stat )" # get PID of parental of our parental
P3CMDLINE="$(< /proc/${myP3ID}/cmdline)" # get cmdline of parental of our parental
else
continue
fi
# !!! OLD BEHAVIOUR !!!
# We are expecting to see a process "bash /root/bin/nc_cp_backup.sh"
# also we are expecting that it will be child of the same-named initiator
# and our parent is not a child of same-named initiator
# We are checking in the following way:
# 1 make sure that process is run by bash
# 2 check that our parental and parental of our parental are not similar (we are on the second level)
# 3 our cmdline is what we are expecting to see
# 4 check that our cmdline is equal cmdline of our parental (it is not an accidental coincidence)
# if [[ "${EXE}" =~ ^${INTERPRETER} ]] && \
# if [[ "${P2CMDLINE}" != "${P3CMDLINE}" ]] && \
# [[ "${CMDLINE}" =~ ^${INTERPRETER}${NCCPBACKUP} ]] && \
# [[ "${CMDLINE}" == "${P2CMDLINE}" ]] ; then
# !!! NEW BEHAVIOUR !!!
# Now we are looking for processes pkgacct or rsync with parent cmdline "bash /root/bin/nc_cp_backup.sh" and parent of parent should be the same
if [[ "${P2CMDLINE}" == "${P3CMDLINE}" ]] && \
( [[ "${CMDLINE}" =~ ^${PKGACCT} ]] || [[ "${CMDLINE}" =~ ^${RSYNC} ]] ); then
flagSubProcRunning=1 #nc_cp_backup is running and there is at least one of the running backing up sub-processes
STARTDATE="$(date -d "${mySDATE}" +"%s")"
if ! [ "${STARTDATE}" -eq "${STARTDATE}" ] 2>/dev/null ; then #make sure that STARTDATE is an integer
echo "ERROR: can not get start date of process"
exit "${UNKN}"
fi
RUNTIME="$(( $(date +"%s") - ${STARTDATE} ))"
if (( ${RUNTIME} >= ${CTIME} )) ; then
((CFLAG++))
CPROC[${myPID}]="${RUNTIME}"
elif (( ${RUNTIME} >= ${WTIME} )) ; then
((WFLAG++))
WPROC[${myPID}]="${RUNTIME}"
else
((OFLAG++))
fi
else
continue
fi
done <<<"${CPBACKUPPROC}"
# if nc_cp_backup is running and there are no any of the running backing up sub-processes we should check if the nc_cp_backup has a PAUSED flag:
# WARNING: file /backup/CPBACKUP_PAUSED older than 8 hours
# CRITICAL: file /backup/CPBACKUP_PAUSED older than 12 hours
if [[ ${flagSubProcRunning} == 0 && -e "${CPBACKUP_PAUSED}" ]]; then
pauseAge=$((($(date +%s) - $(date +%s -r "${CPBACKUP_PAUSED}")) / 3600)) # in hours
if (( pauseAge >= 12 )) ; then
echo "CRITICAL. CPBACKUP is PAUSED for more than 12 hours"
exit 2
elif (( pauseAge >= 8 )) ; then
echo "WARNING. CPBACKUP is PAUSED for more than 8 hours"
exit 1
fi
fi
if [[ -n "${CFLAG}" ]] ; then
STATUS="[CRITICAL]"
STATUSTXT="${NCCPBACKUP} process(es): PID(s)={${!CPROC[@]}} exceeded running time of $(( ${CTIME} /${UNITS[${CUNIT}]} )) ${CUNIT}"
RC="${CRIT}"
elif [[ -n "${WFLAG}" ]] ; then
STATUS="[WARNING]"
STATUSTXT="${NCCPBACKUP} process(es): PID(s)={${!WPROC[@]}} exceeded running time of $(( ${WTIME} /${UNITS[${WUNIT}]} )) ${WUNIT}"
RC="${WARN}"
else
STATUS="[OK]"
STATUSTXT="${NCCPBACKUP} process is OK"
RC="${OK}"
fi
echo "${STATUS} ${STATUSTXT}"
exit "${RC}"
else
echo "[OK] ${NCCPBACKUP} is not running"
exit "${OK}"
fi
echo "UNKNOWN RESULT"
exit "${UNKN}"