hdf5/bin/timekeeper
Albert Cheng 28bb8b4694 [svn-r11428] Purpose:
Bug fix.

Description:
In some racing condition, the process ID file might be removed just
the moment before timekeeper cats its content. When that happens,
the $pid is a blank.  Timekeeper was not prepared for this.

Solution:
Added code to verify $pid is some number before proceeding.

Platforms tested:
Tested by hand only.
2005-09-17 20:41:57 -05:00

129 lines
3.8 KiB
Bash
Executable File

#!/bin/sh
##
## Copyright by the Board of Trustees of the University of Illinois.
## All rights reserved.
##
## This file is part of HDF5. The full HDF5 copyright notice, including
## terms governing use, modification, and redistribution, is contained in
## the files COPYING and Copyright.html. COPYING can be found at the root
## of the source code distribution tree; Copyright.html can be found at the
## root level of an installed copy of the electronic HDF5 document set and
## is linked from the top-level documents page. It can also be found at
## http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have
## access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu.
##
# As a time keeper of the remote daily test process launched by runtest.
# It sleeps for a certain time and then wakes up to hangup those processes
# that are still around, assuming they have run too long.
#
# Programmer: Albert Cheng
# Created Date: 2004/12/23
# variable initialization
waitminutes=300 # default to 5 hours == 300 minutes
debugtimelimit=
debugflag= # no debug
# Function definitions
#
# PRINTMSG
# Print a one line message left justified in a field of 70 characters
# without newline. More output for this line later.
#
PRINTMSG() {
SPACES=" "
echo "$* $SPACES" | cut -c1-70 | tr -d '\012'
}
USAGE()
{
echo "Usage: %0 [-h] [-debug] [<time-limit>]"
echo " Run timekeeper with <time-limit> minutes, default is $waitminutes."
echo " If <time-limit> is in the form of HH:MM, it means wait till then."
echo " -h print this help page"
echo " -debug run debug mode"
}
ParseOption()
{
if [ $# -gt 0 -a "$1" = -h ]; then
shift
USAGE
exit 0
fi
if [ $# -gt 0 -a "$1" = -debug ]; then
shift
debugflag=yes
waitminutes=1 # use shorter time for debug
fi
if [ $# -gt 0 ]; then
targettime=$1
shift
# find out it is minutes to wait or HH:MM to wake up
case $targettime in
*:*) # HH:MM
currenttime=`date +%H:%M`
currenthour=`echo $currenttime | cut -f1 -d:`
currentminute=`echo $currenttime | cut -f2 -d:`
targethour=`echo $targettime | cut -f1 -d:`
targetminute=`echo $targettime | cut -f2 -d:`
waitminutes=`expr \( $targethour - $currenthour \) \* 60 + $targetminute - $currentminute`
if test $waitminutes -le 0; then
# target time is in tomorrow, add 1 day of minutes
waitminutes=`expr 24 \* 60 + $waitminutes`
fi
;;
*)
waitminutes=$targettime
;;
esac
fi
}
# Main body
echo "Timekeeper started at `date`"
ParseOption $*
waitperiod=`expr $waitminutes \* 60` # convert to seconds
if [ -z "$debugflag" ]; then
# normal time keeping mode
# sleep first
echo Timekeeper sleeping for $waitperiod seconds
sleep $waitperiod
# Look for any processes still around
echo "Timekeeper woke up at `date`, looking for processes to terminate..."
for x in PID.* ; do
if [ -f $x ]; then
pid=`cat $x`
# check if process is still around
if test X$pid \!= X && ps -p $pid > /dev/null; then
echo "terminating process $x ($pid)"
kill -HUP $pid
fi
fi
done
else
# Debug mode. Launch two rsh process, one ends before, the other after
# waitperiod. Must launch timekeeper from a subshell, else the debug
# will wait for it too.
myhostname=`hostname`
( $0 $waitminutes &)
debugtimelimit=`expr $waitperiod - 10`
echo rsh $myhostname sleep $debugtimelimit
rsh $myhostname sleep $debugtimelimit &
echo $! > PID.before
debugtimelimit=`expr $waitperiod + 10`
echo rsh $myhostname sleep $debugtimelimit
rsh $myhostname sleep $debugtimelimit &
echo $! > PID.after
wait
rm PID.before PID.after
fi
echo "Timekeeper ended at `date`"