03-28-2015 04:51 PM
Dears,
I am facing a weired behaviour in my cluster 6.1 , i created a files under the mount point in the first node and failover to second node, when i checked the same mount point there i found that my files are not exists ,but when i failover back to node1 i can see them again, do you have any idea about such behaviour ????????
attached are the tests i did to invetigate about the issue.
apprciate your response
Regards,
03-29-2015 09:30 PM
Can you post vxprint -rth for the affected disk group, plus vxdisk list for each disk in the disk group.
03-29-2015 11:24 PM
Hi,
hereunder the output of the commands :
[root@rl analysis]# vxprint -Aht
Disk group: rlm_dg
DG NAME NCONFIG NLOG MINORS GROUP-ID
ST NAME STATE DM_CNT SPARE_CNT APPVOL_CNT
DM NAME DEVICE TYPE PRIVLEN PUBLEN STATE
RV NAME RLINK_CNT KSTATE STATE PRIMARY DATAVOLS SRL
RL NAME RVG KSTATE STATE REM_HOST REM_DG REM_RLNK
CO NAME CACHEVOL KSTATE STATE
VT NAME RVG KSTATE STATE NVOLUME
V NAME RVG/VSET/CO KSTATE STATE LENGTH READPOL PREFPLEX UTYPE
PL NAME VOLUME KSTATE STATE LENGTH LAYOUT NCOL/WID MODE
SD NAME PLEX DISK DISKOFFS LENGTH [COL/]OFF DEVICE MODE
SV NAME PLEX VOLNAME NVOLLAYR LENGTH [COL/]OFF AM/NM MODE
SC NAME PLEX CACHE DISKOFFS LENGTH [COL/]OFF DEVICE MODE
DC NAME PARENTVOL LOGVOL
SP NAME SNAPVOL DCO
EX NAME ASSOC VC PERMS MODE STATE
SR NAME KSTATE
dg rlm_dg default default 28000 1413731055.7.rlm1.bh.zain.com
dm vmdk0_6 vmdk0_6 auto 65536 524204656 -
v rlm_vol - ENABLED ACTIVE 522190848 SELECT - fsgen
pl rlm_vol-01 rlm_vol ENABLED ACTIVE 522190848 CONCAT - RW
sd vmdk0_6-01 rlm_vol-01 vmdk0_6 0 522190848 0 vmdk0_6 ENA
######################################################
[root@rlm1 analysis]# vxdisk list
DEVICE TYPE DISK GROUP STATUS
sda auto:LVM - - online invalid
vmdk0_6 auto:cdsdisk vmdk0_6 rlm_dg online
03-30-2015 08:58 AM
Please post
vxdiks list vmdk0_6
FROM BOTH SERVERS
03-30-2015 09:12 AM
Can you provide extract from main.cf (in /etc/VRTSvcs/conf/config) of the service group you are failing over.
Mike
03-30-2015 09:21 AM
here us the output for vxdisk list
Node1:
[root@rlm1 ~]# vxdisk list vmdk0_6
Device: vmdk0_6
devicetag: vmdk0_6
type: auto
hostid:
disk: name= id=1413731025.5.rlm1.bh.zain.com
group: name=rlm_dg id=1413731055.7.rlm1.bh.zain.com
info: format=cdsdisk,privoffset=256,pubslice=3,privslice=3
flags: online ready private autoconfig
pubpaths: block=/dev/vx/dmp/vmdk0_6s3 char=/dev/vx/rdmp/vmdk0_6s3
guid: {1f0e7646-57a1-11e4-83fe-62ff9d3412aa}
udid: VMware%5FVirtual%20disk%5Fvmdk%5F6000C2920A45195080AB527069DB08D4
site: -
version: 3.1
iosize: min=512 (bytes) max=1024 (blocks)
public: slice=3 offset=65792 len=524204656 disk_offset=0
private: slice=3 offset=256 len=65536 disk_offset=0
update: time=1427721868 seqno=0.292
ssb: actual_seqno=0.0
headers: 0 240
configs: count=1 len=51360
logs: count=1 len=4096
Defined regions:
config priv 000048-000239[000192]: copy=01 offset=000000 enabled
config priv 000256-051423[051168]: copy=01 offset=000192 enabled
log priv 051424-055519[004096]: copy=01 offset=000000 enabled
lockrgn priv 055520-055663[000144]: part=00 offset=000000
Multipathing information:
numpaths: 1
sdb state=enabled
[root@rlm1 ~]#
Node2:
[root@rlm2 ~]# vxdisk list vmdk0_2
Device: vmdk0_2
devicetag: vmdk0_2
type: auto
hostid: rlm2.bh.zain.com
disk: name=vmdk0_6 id=1413731025.5.rlm1.bh.zain.com
group: name=rlm_dg id=1413731055.7.rlm1.bh.zain.com
info: format=cdsdisk,privoffset=256,pubslice=3,privslice=3
flags: online ready private autoconfig noautoimport imported
pubpaths: block=/dev/vx/dmp/vmdk0_2s3 char=/dev/vx/rdmp/vmdk0_2s3
guid: {1f0e7646-57a1-11e4-83fe-62ff9d3412aa}
udid: VMware%5FVirtual%20disk%5Fvmdk%5F6000C2920A45195080AB527069DB08D4
site: -
version: 3.1
iosize: min=512 (bytes) max=1024 (blocks)
public: slice=3 offset=65792 len=524204656 disk_offset=0
private: slice=3 offset=256 len=65536 disk_offset=0
update: time=1427732274 seqno=0.280
ssb: actual_seqno=0.0
headers: 0 240
configs: count=1 len=51360
logs: count=1 len=4096
Defined regions:
config priv 000048-000239[000192]: copy=01 offset=000000 enabled
config priv 000256-051423[051168]: copy=01 offset=000192 enabled
log priv 051424-055519[004096]: copy=01 offset=000000 enabled
lockrgn priv 055520-055663[000144]: part=00 offset=000000
Multipathing information:
numpaths: 1
sdb state=enabled
[root@rlm2 ~]#
03-30-2015 09:23 AM
here is the main.cf (in /etc/VRTSvcs/conf/config) of the service group
group Rlm_App (
SystemList = { rlm1 = 0, rlm2 = 1 }
)
Application pricemaker (
CleanProgram = "/home/rlx/rlmapp1/RLX/bin/pricemaker.clean"
MonitorProgram = "/home/rlx/rlmapp1/RLX/bin/pricemaker.probe"
User = rlx
StartProgram = "/home/rlx/working/Scripts/StartPricemaker"
StopProgram = "/home/rlx/rlmapp1/RLX/bin/pricemaker stop"
)
Application tomcat (
CleanProgram = "/home/tomcat/apache-tomcat-6.0.20/bin/tomcat.clean"
MonitorProgram = "/home/tomcat/apache-tomcat-6.0.20/bin/tomcat.prob "
User = tomcat
StartProgram = " /home/tomcat/apache-tomcat-6.0.20/bin/catalina.sh start"
StopProgram = " /home/tomcat/apache-tomcat-6.0.20/bin/catalina.sh stop"
)
DiskGroup rlm_dg (
DiskGroup = rlm_dg
)
IP rlm_lh (
Device = eth0
Address = "172.21.17.153"
NetMask = "255.255.252.0"
)
Mount rlm_mnt (
FsckOpt = "-y"
BlockDevice = "/dev/vx/dsk/rlm_dg/rlm_vol"
MountPoint = "/work"
FSType = vxfs
)
Volume rlm_vol (
Volume = rlm_vol
DiskGroup = rlm_dg
)
pricemaker requires rlm_mnt
rlm_mnt requires rlm_vol
rlm_vol requires rlm_dg
tomcat requires pricemaker
03-30-2015 03:52 PM
I see your disks are VMWare so my guess is that you have not setup shared disks correctly in VMWare. Have a look at https://www-secure.symantec.com/connect/articles/building-vmware-shared-disk - this is for a Windows guest, but the VMWare setting should be the same when using a Linux guest.
Mike
03-31-2015 02:40 AM
03-31-2015 02:43 AM
04-01-2015 07:33 AM
I am with Mike on this one: : .... my guess is that you have not setup shared disks correctly in VMWare. "
I would at this stage take VCS out of the picture (hastop -all -force).
List disks on both nodes:
vxdisk -o alldgs list
Check disk serial # where disk is imported:
/usr/lib/vxvm/diag.d/vxscsiinq <disk-name>
Unmount filesystem and deport dg.
List disks again on both nodes:
vxdisk -o alldgs list
Import dg on other node.
List disks again on both nodes:
vxdisk -o alldgs list
Check disk serial # where disk is imported:
/usr/lib/vxvm/diag.d/vxscsiinq <disk-name>
04-08-2015 10:32 AM
There is also another possibility if the above hasn't fixed your issue. (I have seen similar behavior on a couple of support cases)....
That you have somehow placed these files in "/work" in such a way that they actually aren't in the shared storage.
To test for this, do an "ls -al" in the directory "/work" when the failover group was not on that system and see if the files are there?
If the above is the case - files / folders are still present here (and the service group is not online on this node) the files are not saved in the shared storage.
Solution:
Move these files / folders currently in "/work" elsewhere on the system.
Failover service group to this node and once the mount is online, put the files / folders into "/work" again.
Test failover to see that the files move with the service group...
Hope that is helpful.
Regards,
Ted