#!/usr/bin/ksh ###### VERITAS CLUSTER SERVER CONTROL SCRIPT ###### NAME: Script.OVPI.VCS.StartStopMonitorClean.ksh ###### SYMLINKS POINTING TO THIS SCRIPT: ###### Run_OVPI.ksh - INITIATES BRINGING UP ALL RESOURCES ###### Halt_OVPI.ksh - INITIATES HALTING ALL RESOURCES ###### Monitor_OVPI.ksh - CHECKS PERIODICALLY IF ALL RESOURCES ARE UP AND RUNNING ###### Clean_OVPI.ksh - CLEANED BY HALT TO CLEAN UP ANY LEFT OVER PROCESSES , FORCIBLY ###### BEGIN: USER CUSTOMIZABLE PARAMETERS ################################################## FLOATER_HOSTNAME="PMPICS.IDEACONNECT.COM" # The hostname of the logical/floating machine DBTYPE="ORACLE" # Must be either "SYBASE" or "ORACLE" DEBUG_MODE=3 # 0=none, 1=logfile, 2=stdout, 3=both # Set $OVPI_SCENARIO to the type of distributed (or non-distributed) scenario: # This selects whether to monitor both trendtimer and piweb, or just piweb. # If STANDALONE or DB_ONLY, manage both trendtimer and piweb. # If WAS_ONLY, manage only piweb. # STANDALONE=1 # Both DB+TrendTimer and WAS on this system. DB_ONLY=2 # Only DB+TrendTimer on this system; WAS on some other system. WAS_ONLY=3 # Only WAS on this system; DB+TrendTimer on some other system. OVPI_SCENARIO=${DB_ONLY} # MUST BE STANDALONE OR DB_ONLY OR WAS_ONLY BASED ON ABOVE LINES ####### END: USER CUSTOMIZABLE PARAMETERS #################################################### # Set the following to "T" if we should ignore "" processes when # evaluating the results of a kill operation: # IGNORE_DEFUNCT="T" # ORACLE SECTION: Ignore the rest of this if you're using SYBASE. # *NOTE* - If you're using an older Oracle version, it would be wise to browse # through routine Ora_Env_Setup() below and make necessary changes! # Set this to Y if there are Oracle httpd/Apache processes to be monitored # and controlled with ${ORACLE_HOME}/Apache/Apache/bin/apachectl # ORACLE_HTTPD="N" # Use "N" if this feature not implemented and/or enabled. # Set this to Y if there are Oracle dbsnmpwd/dbsnmp agent processes to be # monitored and controlled with ${ORACLE_HOME}/bin/agentctl # ORACLE_DBSNMP="N" # Use "N" if this feature not implemented and/or enabled. # If OVPI is using an Oracle db, then we must be able to login as this user # and obtain 3 critical environ vars: ORACLE_SID ORACLE_BASE ORACLE_HOME # ORACLE_USER="oracle" # Above 2 flags used by ORA_online, ORA_offline, ORA_monitor, but not ORA_clean # SYBASE SECTION: Ignore the rest of this if you're using ORACLE. # SYBASE_USER="sybase" SYBASE_DBPW="" # SYBASE Database Password. Usually it's null. SYBASE_PORT="2052" # Port number, as found in /etc/services ###### END: User-customizable defines ## TODO: ## ## 1) Is the level of verbosity too great in the debug messages? Should ## there be a "DEBUG_LEVEL" defined and used to cut down on the amount ## of detail that we log? ## 2) A number of UNIX commands run by us send output to STDOUT and STDERR, ## and VERITAS redirects all of that to the logs. Is there too much of ## this? If so, then a few of these: ">/dev/null 2>&1" should perhaps ## be appended to some commands. ## 3) Should more tuning of the timeout values be done, either made shorter ## for more efficiency or made longer for more reliable functionality? ## (See all occurrences of the command "sleep ##"). # This flag allows us to debug the DB, OVPI and IP address separately: # When =0, do everything normally. I.e. start/stop/monitor both OVPI # and the DB when invoked. # When =1, we will NOT start/stop/monitor OVPI and the DB, but will instead # just pretend that we did and do nothing. This way we can get VERITAS to # start up the shared IP address and shared volumes and it will be content # with just that. Once those are up we can paly with the DB and/or OVPI # via manual startups and shutdowns. # When =2, we will still start/stop/monitor OVPI, but will not do so with # the DB (but pretend that we did to keep VERITAS happy). # DEBUG_SERVICES=0 # 0=normal, 1=do nothing, 2=Start/stop/monitor only OVPI PIHOME=`grep trend.home /etc/trend.conf | sed "s/trend.home=//"` #define where jboss is - this is the directory containing directories log, bin, conf etc #JBOSS_HOME=${JBOSS_HOME:-"/opt/OV/nonOV/PiJBoss/4.23.000"} JBOSS_HOME=${JBOSS_HOME:-"/opt/OV/nonOV/PiJBoss/405"} #make sure java is on your path JAVAPTH=${JAVAPTH:-"/opt/OV/nonOV/jdk/b/bin"} #define the classpath for the shutdown class JBOSSCP=${JBOSSCP:-"$JBOSS_HOME/bin/shutdown.jar:$JBOSS_HOME/client/jnet.jar"} #define the script to use to start jboss JBOSSSH=${JBOSSSH:-" nohup $JBOSS_HOME/bin/run.sh -c piweb -b 0.0.0.0 "} #define what will be done with the console log JBOSS_CONSOLE=${JBOSS_CONSOLE:-"$PIHOME/log/piweb.log"} #define the user under which jboss will run, or use RUNASIS to run as the current user JBOSSUS=${JBOSSUS:-"root"} CMD_START="cd $JBOSS_HOME/bin; $JBOSSSH" CMD_STOP=" nohup $JBOSS_HOME/bin/shutdown.sh -S" #Add java to path PATH=$PATH:$JAVAPTH PIUSER=trendadm #GREP=/usr/xpg4/bin/grep # /bin/grep doesn't support -q or -x options. #AWK=/usr/xpg4/bin/awk GREP=/usr/bin/grep AWK=/usr/bin/awk export LANG=C export LC_ALL=C LOGFILE=/tmp/VRTS_OVPI.log RESOURCE_ONLINE=110 RESOURCE_OFFLINE=100 RESOURCE_INLIMBO=105 ####### logit() # logit( Severity=C|E|W|N|I, MessageText ) ####### { ### NOTES ON LOGGING / TRACING from this script # # There are several ways to log messages from this script: # # 1) Log to a logfile named $LOGFILE under /tmp/ # # 2) Print to stdout using ksh's "echo" # -When script invoked manually from cmd line, it appears in term window. # -When script invoked by VERITAS, stdout goes into this file: # /etc/cmcluster/ovtestpkg/control.sh.log # # 3) Log to both of the above typeset sev=$1 msg=$2 DATE # DEBUG_MODE: 0=none, 1=logfile, 2=stdout, 3=both DATE=`date` if [ $DEBUG_MODE = 1 -o $DEBUG_MODE = 3 ] ; then echo "${DATE} ${OVPITAG}|${MYPID}(${PHYSICAL_HOSTNAME}):Sev=${sev}: ${msg}"\ 2>&1 >>${LOGFILE} fi if [ $DEBUG_MODE = 2 -o $DEBUG_MODE = 3 ] ; then echo "${DATE} ${OVPITAG}|${MYPID}(${PHYSICAL_HOSTNAME}):Sev=${sev}: ${msg}" fi unset sev msg DATE } ################# # Test connectivity to any DB via OVPI's "ovpi_run_sql" - OVPIConnectToDB() # a front-end to both isql and sqlplus. ################# # Return 0 if success, 1 if failure. { typeset OVPI_SQL=ovpi_run_sql # This tool calls either isql or sqlplus. typeset SQLSCR=/tmp/AgentDbScr.${MYPID}.sql # Sybase ISQL or Oracle SQL code. typeset DBOUT=/tmp/AgentDbScr.${MYPID}.out # Raw output of (I)SQL goes here. typeset ErrMsg SQLCMD case $DBTYPE in ORACLE) SQLCMD="select name from dsi_time_zone;" ;; SYBASE) SQLCMD="select name from dsi_time_zone \n go" ;; esac case $DBTYPE in ORACLE) echo "${SQLCMD}" >${SQLSCR};; esac logit I "OVPIConnectToDB() Testing SQL/ISQL command '${SQLCMD}'" echo "quit" >>${SQLSCR} /bin/su - $PIUSER -c "${PIHOME}/bin/${OVPI_SQL} -sqlscript $SQLSCR" >$DBOUT 2>&1 # Expected output: Should include a short table looking like: (ORACLE vers) # NAME # ------------------------------ # gmt # local # # SYBASE differences: 1) NAME-->name, 2) one blank before each table row. # Let's just look for the "local" line to determine success. strchk=`grep "^ *local *$" ${DBOUT}` if [ -n "$strchk" ] ; then logit I "OVPIConnectToDB() EXIT 0 = SUCCESS connecting to the $DBTYPE DB" rm -f ${SQLSCR} ${DBOUT} >/dev/null 2>&1 return 0 fi ErrMsg=`echo ""; cat ${DBOUT} |egrep -v "(DISPLAY has|^ *$)" \ |sed "s/\(.*\)/ > \1/"` logit I "OVPIConnectToDB() EXIT 1 = FAILURE. ${DBTYPE} returned:${ErrMsg}" rm -f ${SQLSCR} ${DBOUT} >/dev/null 2>&1 return 1 } #ovpi_httpd start/stop implementation. waitForFinish() { for i in 1 2 3 4 5 6 7 8 9 do sleep 2 echo ".\c" done } startjboss() { jbosschk=`ps -ef |grep -i "c piweb"|grep -v "grep"` if [ -z "$jbosschk" ] ; then eval $CMD_START > ${JBOSS_CONSOLE} 2>&1 & echo "Starting OVPI HTTP Server " && waitForFinish echo echo "OVPI HTTP Server is now Running" else echo "OVPI HTTP Server is ALREADY running" fi } stopjboss() { jbosschk=`ps -ef |grep -i "c piweb"|grep -v "grep"` if [ -z "$jbosschk" ] ; then echo "OVPI HTTP Server is NOT running" else $CMD_STOP > ${JBOSS_CONSOLE} 2>&1 & echo "Stopping OVPI HTTP Server" && waitForFinish #wait for some time to stop the server, otherwise kill all the instances of piweb. ps -ef | grep "c piweb" | grep -v grep | awk -F" " '{print $2}' \ | while read isJBossRunningPid; do kill -9 $isJBossRunningPid; done echo echo "OVPI HTTP Server is now Stopped" fi } ############# # FUNCTION: Called by main() when ($MODE = "Run_OVPI.ksh") OVPI_online() # RETURN: 0 if success, else 1. ############# # Start piweb, and optionally trendtimer if not WAS-ONLY/ { typeset Res if [ ${OVPI_SCENARIO} -ne ${WAS_ONLY} ] ; then # Don't start trendtimer if another copy of it is already running: # Res=`ps -ef | grep "trendtimer" | grep -v grep` if [ ! "$Res" ] ; then # # Implement same mechanism used by "/etc/init.d/ovpi_timer start" : # logit I "OVPI_online() STARTING trendtimer ...." /bin/su - $PIUSER -c "${PIHOME}/bin/trendtimer -s \ ${PIHOME}/lib/trendtimer.sched" >StartTtimerOUT 2>StartTtimerERR sleep 5 fi fi #THIS CODE CHECKS THE APPROPRIATE OS AND STARTS PIWEB ACCORDINGLY # logit I "OVPI_online() STARTING piweb ...." startjboss logit I "OVPI_online() WAITING 3 seconds after piweb start ...." sleep 3 logit I "OVPI_online() EXIT 0" return 110 } ############## # FUNCTION: Called by main() when ($MODE = "Halt_OVPI.ksh") OVPI_offline() # RETURN: 0 if success, else 1. ############## # Stop piweb, and optionally trendtimer if not WAS-ONLY/ { # Implement same mechanism used by "/etc/init.d/ovpi_httpd stop" : # logit I "OVPI_offline() STOPPING piweb ...." stopjboss if [ ${OVPI_SCENARIO} -ne ${WAS_ONLY} ] ; then sleep 1 logit I "OVPI_offline() STOPPING trendtimer ...." kill -TERM `ps -ef | grep trendtimer | grep -v grep | ${AWK} '{print $2,$3}'` sleep 1 fi OVPI_clean #call this just in case all processes are not killed yet logit I "OVPI_offline() EXIT 0" return 100 } ############ OVPI_clean() # FUNCTION: Called by main() when ($MODE = "clean") ############ # RETURN: 1=Failed to clean up all OVPI processes, 0=Succeeded. { typeset PidToKill Res # Find all relevant PID's and kill with -9 (SIGKILL). # Don't need to check $OVPI_SCENARIO: If he's WAS_ONLY then the trendtimer # process by definition can never be present, so no harm in looking for it # to kill it. # logit I "OVPI_clean() FORCED CLEANUP of processes ...." ps -ef | egrep "($PIUSER|timer|piweb)" | grep -v grep \ | ${AWK} '{print $2}' | while read PidToKill do # Send signal 9 to the ${PidToKill} process. logit I "OVPI_clean() kill -9 ${PidToKill}" kill -9 ${PidToKill} done sleep 2 # Give the last kill a few seconds to work. if [ ${IGNORE_DEFUNCT} = "T" ] ; then Res=`ps -ef | egrep "($PIUSER|timer|piweb)" | egrep -v "(grep|)"` else Res=`ps -ef | egrep "($PIUSER|timer|piweb)" | grep -v grep` fi if [ ! -z "$Res" ] ; then logit I "OVPI_clean() EXIT 1/FAILURE. 'ps -ef' shows: [${Res}]" return 1 # Failed to clean up all processes fi logit I "OVPI_clean() EXIT 0/SUCCESS" return 100 # All cleaned up } ############## OVPI_monitor() # FUNCTION: Called by OVPI_monitor_loop() repeatedly. ############## # RETURN: 100=offline, 101-110=online w/ 10-100% confidence, # other return value=UNKNOWN # DECISION MATRIX: # # SCENARIO = STANDALONE or DB_ONLY: # RETURN|trendtimer & piweb|Can connect to Dbase|trendadm procs present? # ------|------------------|--------------------|----------------------- # 110 | Both Up | Yes | n/a # 107 | Both Up | No | n/a # 100 | Both Down | n/a | No # 105 |<--------------(Any Other Combination of Values)--------------> # # SCENARIO = WAS_ONLY: # RETURN|piweb # ------|------ # 110 | Up # 100 | Down { typeset TT="" PW="" DBST TR="" DBPROCS # Look for the two key processes: # PW=`ps -ef |grep -i "piweb" |grep -v grep` if [ ${OVPI_SCENARIO} -ne ${WAS_ONLY} ] ; then TT=`ps -ef |grep trendtimer |grep -v grep` fi logit I "OVPI_monitor() TT=[$TT], PW=[$PW]" # SCENARIO: WAS ONLY (piweb) # if [ ${OVPI_SCENARIO} -eq ${WAS_ONLY} ] ; then if [ ! -z "$PW" ] ; then logit I "OVPI_monitor() EXIT 110 : Resource (WAS_ONLY) online" return $RESOURCE_ONLINE # = 110 = 100% sure it's online else logit I "OVPI_monitor() EXIT 100 : Resource (WAS_ONLY) offline" return $RESOURCE_OFFLINE # = 100 = 0% sure it's online fi fi # SCENARIO: STANDALONE or DB_ONLY (both trendtimer & piweb) # # NOTE: At this point we could call ORA_monitor() or SYB_monitor(), but for # efficiency's sake we'll just test with OVPIConnectToDB(). if [ \( ! -z "$TT" \) -a \( ! -z "$PW" \) ] ; then OVPIConnectToDB; DBST=$? # Test connectivity to Sybase or Oracle if [ $DBST -eq 0 ] ; then logit I "OVPI_monitor() EXIT 110 : Resource online" return $RESOURCE_ONLINE # = 110 = 100% sure else logit I "OVPI_monitor() EXIT 107 : Resource mostly online" return 107 # 70% sure fi fi # Look for any trendadm "leftovers": # if [ ${IGNORE_DEFUNCT} = "T" ] ; then TR=`ps -ef |grep $PIUSER |egrep -v "(grep|)"` else TR=`ps -ef |grep $PIUSER |grep -v grep` fi logit I "OVPI_monitor() TR=[$TR]" if [ \( -z "$TT" \) -a \( -z "$PW" \) -a \( -z "$TR" \) ] ; then logit I "OVPI_monitor() EXIT 100 : Resource offline" return $RESOURCE_OFFLINE # No processes found. Definitely is offline. fi logit I "OVPI_monitor() EXIT 105 : Resource partly on-/off- line" return ${RESOURCE_INLIMBO} # Mixed results. } ################### OVPI_monitor_loop() # FUNCTION: Called by main() when $MODE= "Monitor_OVPI.ksh" ################### # RETURN: 110 ($RESOURCE_ONLINE) if everything is up else returns 100 ($RESOURCE_OFFLINE) { logit I "OVPI_monitor_loop() ENTRY" OVPI_monitor; MonRet=$? if [ $MonRet -ne ${RESOURCE_ONLINE} ] ; then return ${RESOURCE_OFFLINE} else return ${RESOURCE_ONLINE} fi } ############## ### main() ### ############## MYNAME=$0 MODE=`basename $0` # Run_OVPI.ksh or Halt_OVPI.ksh or Monitor_OVPI.ksh PHYSICAL_HOSTNAME=`hostname` MYPID=$$ # Set ABBR to "srv" or "syb" or "ora" if $RESOURCE is in one of the 3 expected # forms listed above. Else set it to the full value of $RESOURCE. Set MABBR # to MODE abbrev = mon(monitor) or on(online) or off(offline) or clr(clear) # ABBR=`echo $MODE |sed "s/Monitor_OVPI.ksh/mon/" |sed "s/Run_OVPI.ksh/run/" \ |sed "s/Halt_OVPI.ksh/halt/" | sed "s/clean.ksh/clean/"` OVPITAG="OVPI-$ABBR" logit I "TRACE: AGENT '$MODE' INVOKED WITH ARGS: [$*]" # Set PIHOME to look like, for instance, "/OVPI" logit I "DBTYPE=$DBTYPE, FLOATER_HOSTNAME=$FLOATER_HOSTNAME" case ${ABBR} in # RETURN: 1 if OVPI is down, else loop forever with no return. mon) OVPI_monitor_loop; exit $? ;; # RETURN: 0 if success, else 1. run) OVPI_online; exit $? ;; # RETURN: 0 if success, else 1. halt) OVPI_offline; exit $? ;; # return: 0 if success, else 1. clean) OVPI_clean; exit $? ;; *) logit E "EXIT -1 : Unknown script name \'${ABBR}\"" exit -1 ;; esac exit