I have some kind of a synchronization script transferring files from a production server to a DR one every day via rsync.
however, since a few days the script seems to be crashing.
they were multiple instances of it running, so I killed them all and restarted the process, however, it doesn't seem to do anything.
when tracing the PID with strace, I can see it re-reading the same data again and again.
the commands ran by the wrapper are :
root 4052277 30.5 0.1 61540 19280 pts/0 S+ 11:00 8:32 /opt/csm/64-bit/deps/1.0.6/bin/rsync --archive --compress --recursive --rsh=/usr/bin/ssh -i /root/.ssh/id_rsa_drsync --rsync-path=/usr/bin/rsync --verbose /app/PWC/ xm3p7050vmo://app/PWC
root 4052278 0.0 0.0 52184 6096 pts/0 S+ 11:00 0:00 /usr/bin/ssh -i /root/.ssh/id_rsa_drsync xm3p7050vmo /usr/bin/rsync --server -vlogDtprze.iLsf . //app/PWC
when doing an strace -p4052277, I get :
read(3, ",,N,,N,07/09/2017 04:00:17.000000000,D,pc_exe,D,07/09/2017 04:00:17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_"..., 262144) = 262144
read(3, ",N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,-1,D,,N"..., 262144) = 262144
read(3, ",D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,22529353,D,88380,D,4,D,0174525.001.000.001,D,MC58119990"..., 262144) = 262144
read(3, " 19:54:21.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,"..., 262144) = 262144
read(3, ",N,,N,,N,0,D,0,D,,N,,N,,N,,N,,N,10/29/2016 19:54:16.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,"..., 262144) = 262144
read(3, ",N/A,D,N/A,D,N/A,D,N/A,D,2201,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,-1,D,0,D,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N"..., 262144) = 262144
read(3, "7 04:00:17.000000000,D,pc_exe,D,07/09/2017 04:00:17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE"..., 262144) = 262144
read(3, ",D,86550,D,6,D,FX.FORW-0419518-L1-GBP,D,,N,,N,,N,,N,FX.FORW-0419518-L1-GBP,D,,N,,N,,N,,N,999,D,GBP,D,2108,D,999,D,N/A,D,"..., 262144) = 262144
read(3, "-Update: 07.07.2017 05:20:41,D,,N,,N,,N,,N,,N,,N,,N,30100,D,30107,D,30108,D,,N,,N,10/29/2016 00:00:00.000000000,D,0,D,,N"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,-1,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:21:17,D,,N,,N,,N,,N,,N,0,D,,N,30100,D,30111,D,30112,D,,N,,N,1"..., 262144) = 262144
read(3, ",,N,,N,10/29/2016 19:54:13.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N"..., 262144) = 262144
read(3, ",2105,D,999,D,N/A,D,100,D,CPT-001,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,2201,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,-1,D,0,D,1,"..., 262144) = 262144
read(3, "0,D,,N,30100,D,30101,D,30102,D,,N,,N,10/29/2016 00:00:00.000000000,D,0,D,,N,0,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:21:05,D,,N,,N,,N,,N,,N,,N,,N,30100,D,30107,D,30108,D,,N,,N,10/"..., 262144) = 262144
read(3, "7.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:1"..., 262144) = 262144
read(3, "00000,D,pc_exe,D,07/09/2017 04:00:17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,"..., 262144) = 262144
read(3, "D,N/A,D,N/A,D,N/A,D,N/A,D,0,D,0,D,1,D,0,D,,N,03/14/2016 00:00:00.000000000,D,03/14/2018 00:00:00.000000000,D,,N,,N,,N,,N"..., 262144) = 262144
read(3, ":17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,22534150,D,83337,D,6,D,FX.FORW-03"..., 262144) = 262144
read(3, ",N,,N,,N,,N,RTW-Update: 07.07.2017 05:20:51,D,,N,,N,,N,,N,,N,,N,,N,30100,D,30107,D,30108,D,,N,,N,10/29/2016 00:00:00.000"..., 262144) = 262144
read(3, "00,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:17.0000000"..., 262144) = 262144
read(3, "N,,N,,N,,N,10/29/2016 19:54:00.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,"..., 262144) = 262144
read(3, ",22535333,D,82141,D,6,D,FX.FORW-0373675-L2-CHF,D,,N,,N,,N,,N,FX.FORW-0373675-L2-CHF,D,,N,,N,,N,,N,999,D,CHF,D,2108,D,999"..., 262144) = 262144
read(3, "N/A,D,N/A,D,N/A,D,-1,D,0,D,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,"..., 262144) = 262144
read(3, "4795-L1-USD,D,,N,,N,,N,,N,999,D,USD,D,2108,D,999,D,N/A,D,100,D,CHG-210,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,2204,D,N/A,D,N/A,"..., 262144) = 262144
read(3, "BASE_TITLE,D,19141,D\n0,D,22536220,D,82668,D,6,D,FX.FORW-0385515-L1-USD,D,,N,,N,,N,,N,FX.FORW-0385515-L1-USD,D,,N,,N,,N,,"..., 262144) = 262144
read(3, "N,,N,,N,,N,,N,0,D,0,D,,N,,N,,N,,N,,N,10/29/2016 19:53:33.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N"..., 262144) = 262144
read(3, "2017 05:20:26,D,,N,,N,,N,,N,,N,0,D,,N,30100,D,30101,D,30102,D,,N,,N,10/29/2016 00:00:00.000000000,D,0,D,,N,0,D,0,D,,N,,N"..., 262144) = 262144
read(3, ",N,,N,,N,FX.FORW-0387703-L2-CHF,D,,N,,N,,N,,N,999,D,CHF,D,2108,D,999,D,N/A,D,100,D,CHG-210,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A"..., 262144) = 262144
read(3, "obal,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,22537407,D,80668,D,4,D,0177715.001.000.756,D,MC5811999000010177715000426,D,,N,,N"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:20:05,D,,N,,N,,N,,N,,N,"..., 262144) = 262144
read(3, "N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:17.000000000,D,pc_exe,D,07/09/2017 04:00:17.00"..., 262144) = 262144
read(3, "10/29/2016 00:00:00.000000000,D,0,D,,N,0,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,,N,,N,,N,10/29/2016 19:53:33.000"..., 262144) = 262144
read(3, "/A,D,N/A,D,N/A,D,2201,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,-1,D,0,D,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,"..., 262144) = 262144
read(3, "1,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:20:03,D,,N,,N,,N,,N,,N,0,D,,N,30100,D,30101,D,30102,D,,N,,N,10/29/2016 00:00:0"..., 262144) = 262144
read(3, "0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:17.000000000,D,pc_exe,D,07/09/2"..., 262144) = 262144
read(3, "9141,D\n0,D,22539492,D,77738,D,4,D,0171560.001.000.001,D,MC5811999000010171560000159,D,,N,,N,,N,0171560.001.000.001,D,,N,"..., 262144) = 262144
just flooding this same block again and again (the FS is about 150Gbytes)
how can I investigate on why it's doing this ? (it was perfectly working before) I can't reboot the machine as it's in prod.
could it be related to a bad link or anything in the folder ? or a file being in use ? (apparently none are in use, looking at the processes running), is there additional RSYNC options I could add to fix that ?
thanks for your hints regards,