01-19-2010 01:01 PM
I created a test because the same thing happened with the production service group.
Test details:
Application testsvr (
Critical = 0
StartProgram = "/export/export2/vcsbuild/test_script_start"
StopProgram = "/export/export2/vcsbuild/test_script_stop"
MonitorProgram = "/opt/VRTSvcs/bin/Application/test_monitor"
)
-------------------------------------------------------------------------
/opt/VRTSvcs/bin/Application
test_monitor
#!/bin/ksh
# test_monitor modified from ctm_agt_monitor
# Marc Jourdeuil-Jan 19, 2010
# custom script for VCS application agent controlmagt to monitor if controlm server process is running.
#
# This script must exit either 0 or 1. If the exit code is 0 VCS will think the resource state is offline,
# if the exit code is 1 VCS will think the state is online.
# MonitorProgram can return the following VCSagResState values to VCS:
# offline value is 100, online values range from 101 to 110 (depending on the confidence level),
# 110 equals confidence level of 100%. Any other value is=unknown
#
# agent processes to monitor: p_ctmat and p_ctmag
# use controlm server processes p_ctmsl, p_ctmlg for test
#agent_process1=p_ctmsl
#agent_process2=p_ctmlg
agent_process1=test_script_2monitor
#agent_process2=p_ctmag
status=0
#res=$(ps -ef | grep $agent_process1 | grep -v grep)
res=$(pgrep -f $agent_process1)
echo "res=[$res]"
if [[ -z $res ]];then
echo $status
exit $status
fi
#res=$(ps -ef | grep $agent_process2 | grep -v grep)
#if [[ -z $res ]];then
# exit $status
#fi
status=1
# If both the controlm agent processes are running, return status 1, ok, the application agent
# will consider the resource is online, otherwise exit with 0, consider the resource is offline
# echo "status=[$status], if 1, ctm agent pids are running, ok, VCS will consider the app agent online, otherwise offline"
echo $status
exit $status
-------------------------------------------------------------------------
test_script_2monitor
#!/bin/sh
while [ 1=1 ]
do
echo ".\c"
sleep 300
done
-------------------------------------------------------------------------
start script
test_script_start
#!/bin/sh
sh /export/export2/vcsbuild/test_script_2monitor &
pgrep -f /export/export2/vcsbuild/test_script_2monitor
-------------------------------------------------------------------------
stop script
test_script_stop
#!/bin/sh
echo " Check if script test_script_2monitor is running..."
pgrep -f /export/export2/vcsbuild/test_script_2monitor
pkill -9 -f /export/export2/vcsbuild/test_script_2monitor
echo " Check if script test_script_2monitor is running..."
pgrep -f /export/export2/vcsbuild/test_script_2monitor
-------------------------------------------------------------------------
start
mnvpr02b# ./test_script_start
.10172
monitor
/opt/VRTSvcs/bin/Application/test_monitor
mnvpr02b# /opt/VRTSvcs/bin/Application/test_monitor
res=[10172]
1
hares -add testsvr Application ClusterService
hares -modify testsvr StartProgram /export/export2/vcsbuild/test_script_start
hares -modify testsvr StopProgram /export/export2/vcsbuild/test_script_stop
hares -modify testsvr MonitorProgram /opt/VRTSvcs/bin/Application/test_monitor
hares -modify testsvr Critical 0
hares -modify testsvr Enabled 1
hares -probe testsvr -sys `hostname`
2010/01/19 15:34:50 VCS ERROR V-16-10001-60 (mnvpr02a) Application:testsvr:monitor:monitor:Program (/opt/VRTSvcs/bin/Application/test_monitor) does not exist.
2010/01/19 15:34:50 VCS WARNING V-16-10001-51 (mnvpr02a) Application:testsvr:monitor:State returned by Monitor Program (/opt/VRTSvcs/bin/Application/test_monitor):UNKNOWN.
2010/01/19 15:34:51 VCS WARNING V-16-10001-51 (mnvpr02b) Application:testsvr:monitor:State returned by Monitor Program (/opt/VRTSvcs/bin/Application/test_monitor):UNKNOWN.
2010/01/19 15:35:19 VCS INFO V-16-1-50135 User root fired command: MSG_RES_PROBE testsvr mnvpr02b from localhost
2010/01/19 15:35:20 VCS WARNING V-16-10001-51 (mnvpr02b) Application:testsvr:monitor:State returned by Monitor Program (/opt/VRTSvcs/bin/Application/test_monitor):UNKNOWN.
mnvpr02b# hastatus -summ
-- SYSTEM STATE
-- System State Frozen
A mnvpr02a RUNNING 0
A mnvpr02b RUNNING 0
-- GROUP STATE
-- Group System Probed AutoDisabled State
B ClusterService mnvpr02a N N OFFLINE
B ClusterService mnvpr02b N N ONLINE
B sapP20 mnvpr02a Y N OFFLINE
B sapP20 mnvpr02b Y N PARTIAL
-- RESOURCES NOT PROBED
-- Group Type Resource System
D ClusterService Application testsvr mnvpr02a
D ClusterService Application testsvr mnvpr02b
What am I missing?
01-19-2010 02:42 PM
01-19-2010 02:45 PM