0

My server was working fine for 173 days then it started to happen. Upgrading CentOS 6.6 kernel to latest one didn't helped.

Rebooting helps for about 10 minutes then it starts all over again.

I'm running dedicated server Intel Core i7 980X, MSI X58-E Pro board, 2x Intel SSD 240 GB S/W RAID 0 + 2x WD RE4 3 TB S/W RAID 0

I've replaced all hardware expect SSD drives and it still happens ;(

 INFO: task qmail-queue:7091 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
qmail-queue   D 0000000000000003     0  7091   7090 0x00000080
 ffff88031b30fdc8 0000000000000082 ffff88031b30fd90 ffff88031b30fd8c
 000000000000000e ffff88063fc24780 000000a2c09d83a0 ffff8800282f58c0
 0000000000000400 00000001000618c3 ffff8806333d1068 ffff88031b30ffd8
Call Trace:
 [<ffffffffa02255a5>] jbd2_log_wait_commit+0xc5/0x140 [jbd2]
 [<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
 [<ffffffffa0225938>] jbd2_complete_transaction+0x68/0xb0 [jbd2]
 [<ffffffffa0256061>] ext4_sync_file+0x121/0x1d0 [ext4]
 [<ffffffff811c0c51>] vfs_fsync_range+0xa1/0x100
 [<ffffffff811c0d1d>] vfs_fsync+0x1d/0x20
 [<ffffffff811c0d5e>] do_fsync+0x3e/0x60
 [<ffffffff811c0db0>] sys_fsync+0x10/0x20
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task qmail-send:1606 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
qmail-send    D 0000000000000003     0  1606   1594 0x00000080
 ffff8806327b9dc8 0000000000000086 ffff8806327b9e70 0000000000000001
 000000000000000e ffff8806327b9de8 ffff8806327b9d88 ffffffff81123f80
 ffff88036fa7c220 ffff8806327b9de8 ffff88062fc31ad8 ffff8806327b9fd8
Call Trace:
 [<ffffffff81123f80>] ? find_get_pages_tag+0x40/0x130
 [<ffffffff8109ef4e>] ? prepare_to_wait+0x4e/0x80
 [<ffffffffa02255a5>] jbd2_log_wait_commit+0xc5/0x140 [jbd2]
 [<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
 [<ffffffffa0225938>] jbd2_complete_transaction+0x68/0xb0 [jbd2]
 [<ffffffffa0256061>] ext4_sync_file+0x121/0x1d0 [ext4]
 [<ffffffff811c0c51>] vfs_fsync_range+0xa1/0x100
 [<ffffffff811c0d1d>] vfs_fsync+0x1d/0x20
 [<ffffffff811c0d5e>] do_fsync+0x3e/0x60
 [<ffffffff811c0db0>] sys_fsync+0x10/0x20
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4141 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx         D 0000000000000003     0  4141   4140 0x00000080
 ffff880633079be8 0000000000000082 0000000000000000 ffff880632a50ae8
 0000000000000000 ffff8800282b5928 000000bc7bda8648 0000000000000003
 ffff880633079b88 000000010007c912 ffff88063214c5f8 ffff880633079fd8
Call Trace:
 [<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
 [<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
 [<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
 [<ffffffff811f1eac>] ? dqput+0x5c/0x200
 [<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
 [<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
 [<ffffffff8119d9e6>] vfs_create+0xe6/0x110
 [<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
 [<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
 [<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
 [<ffffffff811ae392>] ? alloc_fd+0x92/0x160
 [<ffffffff8118b157>] do_sys_open+0x67/0x130
 [<ffffffff8118b260>] sys_open+0x20/0x30
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4147 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx         D 0000000000000001     0  4147   4140 0x00000080
 ffff880633395be8 0000000000000086 ffff880633395b48 0000000000000000
 0000000000000000 ffff880633395cc4 0000000000000000 ffff880458b6b4e8
 ffff880633395c18 ffffffffa025d3de ffff88063214dad8 ffff880633395fd8
Call Trace:
 [<ffffffffa025d3de>] ? ext4_getblk+0xee/0x1f0 [ext4]
 [<ffffffff8109ef4e>] ? prepare_to_wait+0x4e/0x80
 [<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
 [<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
 [<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
 [<ffffffff811f1eac>] ? dqput+0x5c/0x200
 [<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
 [<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
 [<ffffffff8119d9e6>] vfs_create+0xe6/0x110
 [<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
 [<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
 [<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
 [<ffffffff811ae392>] ? alloc_fd+0x92/0x160
 [<ffffffff8118b157>] do_sys_open+0x67/0x130
 [<ffffffff8118b260>] sys_open+0x20/0x30
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4149 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx         D 0000000000000003     0  4149   4140 0x00000080
 ffff8805efc7bbe8 0000000000000086 ffff8805efc7bb48 0000000000000000
 0000000000000000 ffff8805efc7bcc4 0000000000000000 ffff88044b34d4e8
 ffff8805efc7bc18 ffffffffa025d3de ffff8805d117c5f8 ffff8805efc7bfd8
Call Trace:
 [<ffffffffa025d3de>] ? ext4_getblk+0xee/0x1f0 [ext4]
 [<ffffffff8109ef4e>] ? prepare_to_wait+0x4e/0x80
 [<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
 [<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
 [<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
 [<ffffffff811f1eac>] ? dqput+0x5c/0x200
 [<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
 [<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
 [<ffffffff8119d9e6>] vfs_create+0xe6/0x110
 [<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
 [<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
 [<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
 [<ffffffff811ae392>] ? alloc_fd+0x92/0x160
 [<ffffffff8118b157>] do_sys_open+0x67/0x130
 [<ffffffff8118b260>] sys_open+0x20/0x30
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4155 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx         D 0000000000000006     0  4155   4140 0x00000080
 ffff8805d2a0fd18 0000000000000082 0000000000000000 ffff8805a643b840
 ffff8805d2a0fc88 ffff8805d2a0fe08 000000b880966530 ffffffff8122ec0f
 ffff8805d2a0fd68 0000000100078618 ffff88063318bad8 ffff8805d2a0ffd8
Call Trace:
 [<ffffffff8122ec0f>] ? security_inode_permission+0x1f/0x30
 [<ffffffff8152b486>] __mutex_lock_slowpath+0x96/0x210
 [<ffffffff8152afab>] mutex_lock+0x2b/0x50
 [<ffffffff811a10b6>] do_filp_open+0x2d6/0xd20
 [<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
 [<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
 [<ffffffff811ae392>] ? alloc_fd+0x92/0x160
 [<ffffffff8118b157>] do_sys_open+0x67/0x130
 [<ffffffff8118b260>] sys_open+0x20/0x30
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4160 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx         D 0000000000000006     0  4160   4140 0x00000080
 ffff8805efcd3be8 0000000000000086 ffff8805efcd3bb0 ffff8805efcd3bac
 0000000000000000 ffff88063fc24d80 000000b7c632b48c ffff8800282158c0
 00000000000007ff 00000001000779f0 ffff880632ddc5f8 ffff8805efcd3fd8
Call Trace:
 [<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
 [<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
 [<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
 [<ffffffff811f1eac>] ? dqput+0x5c/0x200
 [<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
 [<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
 [<ffffffff8119d9e6>] vfs_create+0xe6/0x110
 [<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
 [<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
 [<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
 [<ffffffff811ae392>] ? alloc_fd+0x92/0x160
 [<ffffffff8118b157>] do_sys_open+0x67/0x130
 [<ffffffff8118b260>] sys_open+0x20/0x30
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4164 blocked for more than 120 seconds.
      Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx         D 0000000000000003     0  4164   4140 0x00000080
 ffff880633055be8 0000000000000082 ffff880633055bb0 ffff880633055bac
 0000000000000000 ffff88063fc24780 000000be5103f4f7 ffff8800282158c0
 00000000000005ff 000000010007e7db ffff88062fd63ad8 ffff880633055fd8
Call Trace:
 [<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
 [<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
 [<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
 [<ffffffff811f1eac>] ? dqput+0x5c/0x200
 [<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
 [<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
 [<ffffffff8119d9e6>] vfs_create+0xe6/0x110
 [<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
 [<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
 [<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
 [<ffffffff811ae392>] ? alloc_fd+0x92/0x160
 [<ffffffff8118b157>] do_sys_open+0x67/0x130
 [<ffffffff8118b260>] sys_open+0x20/0x30
 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b

Some strace on simple commands like userdel username:

    open("/etc/group", O_RDONLY|O_CLOEXEC)  = 10
fstat(10, {st_mode=S_IFREG|0644, st_size=4253, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb3b4957000
read(10, "root:x:0:root\nbin:x:1:root,bin,d"..., 4096) = 4096
close(10)                               = 0
munmap(0x7fb3b4957000, 4096)            = 0
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 10
fstat(10, {st_mode=S_IFREG|0644, st_size=16685, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb3b4957000
lseek(10, 0, SEEK_CUR)                  = 0
read(10, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 4096
read(10, "stem User for edzia-r:/home/edzi"..., 4096) = 4096
read(10, "stros:/home/stros/:/sbin/nologin"..., 4096) = 4096
read(10, "x:7923:7923:System User for neox"..., 4096) = 4096
read(10, "me:x:8042:8042:System User for s"..., 4096) = 301
read(10, "", 4096)                      = 0
close(10)                               = 0
munmap(0x7fb3b4957000, 4096)            = 0
ioctl(0, SNDCTL_TMR_TIMEBASE or TCGETS, {B38400 opost isig icanon echo ...}) = 0
readlink("/proc/self/fd/0", "/dev/pts/0"..., 31) = 10
lstat("/dev/pts/0", {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
sendto(3, "x\0\0\0]\4\5\0\2\0\0\0\0\0\0\0op=deleting grou"..., 120, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 120
poll([{fd=3, events=POLLIN}], 1, 500)   = 1 ([{fd=3, revents=POLLIN}])
recvfrom(3, "$\0\0\0\2\0\0\0\2\0\0\0YR\0\0\0\0\0\0x\0\0\0]\4\5\0\2\0\0\0"..., 8988, MSG_PEEK|MSG_DONTWAIT, {sa_family=AF_NETLINK, pid=0, groups=00000000}, [12]) = 36
recvfrom(3, "$\0\0\0\2\0\0\0\2\0\0\0YR\0\0\0\0\0\0x\0\0\0]\4\5\0\2\0\0\0"..., 8988, MSG_DONTWAIT, {sa_family=AF_NETLINK, pid=0, groups=00000000}, [12]) = 36
sendto(9, "<86>Jul 19 13:16:04 userdel[2108"..., 77, MSG_NOSIGNAL, NULL, 0) = 77
fstat(5, {st_mode=S_IFREG|0644, st_size=16685, ...}) = 0
fstat(5, {st_mode=S_IFREG|0644, st_size=16685, ...}) = 0
umask(077)                              = 022
open("/etc/passwd-", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 10
umask(022)                              = 077
lseek(5, 0, SEEK_SET)                   = 0
read(5, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 4096
fstat(10, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb3b4957000
read(5, "stem User for edzia-r:/home/edzi"..., 4096) = 4096
write(10, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 4096
read(5, "stros:/home/stros/:/sbin/nologin"..., 4096) = 4096
write(10, "stem User for edzia-r:/home/edzi"..., 4096) = 4096
read(5, "x:7923:7923:System User for neox"..., 4096) = 4096
write(10, "stros:/home/stros/:/sbin/nologin"..., 4096) = 4096
read(5, "me:x:8042:8042:System User for s"..., 4096) = 301
write(10, "x:7923:7923:System User for neox"..., 4096) = 4096
read(5, "", 4096)                       = 0
write(10, "me:x:8042:8042:System User for s"..., 301) = 301
fsync(10
Marek Zakrzewski
  • 163
  • 1
  • 2
  • 11

0 Answers0