公司线上一台服务器登录上去后,在根目录下执行ls命令后完全卡死,ctrl-c也没办法杀掉,怀疑磁盘问题,另起一个ssh后执行df命令,发现依旧是卡死的。问题是线上所有部署在该服务器上的服务正常运行,并且因为该服务器上有重要服务后文件不能重启。于是开始通过搜索排查问题。
解决思路:
1.strace 命令跟踪,定位问题
首先通过yum install strace来安装相关命令
yum install centos-release-scl-rh -y
yum install strace --enablerepo=centos-sclo-rh -y
执行
strace ls /
execve("/usr/bin/ls", ["ls", "/"], [/* 29 vars */]) = 0
brk(NULL) = 0x10aa000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f79c000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=28106, ...}) = 0
mmap(NULL, 28106, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fdc1f795000
close(3) = 0
open("/lib64/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320i\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=155784, ...}) = 0
mmap(NULL, 2255184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1f355000
mprotect(0x7fdc1f379000, 2093056, PROT_NONE) = 0
mmap(0x7fdc1f578000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x23000) = 0x7fdc1f578000
mmap(0x7fdc1f57a000, 6480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f57a000
close(3) = 0
open("/lib64/libcap.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \26\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=20032, ...}) = 0
mmap(NULL, 2114112, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1f150000
mprotect(0x7fdc1f154000, 2093056, PROT_NONE) = 0
mmap(0x7fdc1f353000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7fdc1f353000
close(3) = 0
open("/lib64/libacl.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200\37\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=37056, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f794000
mmap(NULL, 2130560, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1ef47000
mprotect(0x7fdc1ef4e000, 2097152, PROT_NONE) = 0
mmap(0x7fdc1f14e000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7000) = 0x7fdc1f14e000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2127336, ...}) = 0
mmap(NULL, 3940800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1eb84000
mprotect(0x7fdc1ed3c000, 2097152, PROT_NONE) = 0
mmap(0x7fdc1ef3c000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b8000) = 0x7fdc1ef3c000
mmap(0x7fdc1ef42000, 16832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fdc1ef42000
close(3) = 0
open("/lib64/libpcre.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360\25\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=402384, ...}) = 0
mmap(NULL, 2494984, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1e922000
mprotect(0x7fdc1e982000, 2097152, PROT_NONE) = 0
mmap(0x7fdc1eb82000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x60000) = 0x7fdc1eb82000
close(3) = 0
open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`\16\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=19776, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f793000
mmap(NULL, 2109744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1e71e000
mprotect(0x7fdc1e720000, 2097152, PROT_NONE) = 0
mmap(0x7fdc1e920000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7fdc1e920000
close(3) = 0
open("/lib64/libattr.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\23\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=19888, ...}) = 0
mmap(NULL, 2113904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1e519000
mprotect(0x7fdc1e51d000, 2093056, PROT_NONE) = 0
mmap(0x7fdc1e71c000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7fdc1e71c000
close(3) = 0
open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0m\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=144792, ...}) = 0
mmap(NULL, 2208904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fdc1e2fd000
mprotect(0x7fdc1e314000, 2093056, PROT_NONE) = 0
mmap(0x7fdc1e513000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7fdc1e513000
mmap(0x7fdc1e515000, 13448, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fdc1e515000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f792000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f790000
arch_prctl(ARCH_SET_FS, 0x7fdc1f790800) = 0
mprotect(0x7fdc1ef3c000, 16384, PROT_READ) = 0
mprotect(0x7fdc1e513000, 4096, PROT_READ) = 0
mprotect(0x7fdc1e71c000, 4096, PROT_READ) = 0
mprotect(0x7fdc1e920000, 4096, PROT_READ) = 0
mprotect(0x7fdc1eb82000, 4096, PROT_READ) = 0
mprotect(0x7fdc1f14e000, 4096, PROT_READ) = 0
mprotect(0x7fdc1f353000, 4096, PROT_READ) = 0
mprotect(0x7fdc1f578000, 4096, PROT_READ) = 0
mprotect(0x61a000, 4096, PROT_READ) = 0
mprotect(0x7fdc1f79d000, 4096, PROT_READ) = 0
munmap(0x7fdc1f795000, 28106) = 0
set_tid_address(0x7fdc1f790ad0) = 7685
set_robust_list(0x7fdc1f790ae0, 24) = 0
rt_sigaction(SIGRTMIN, {0x7fdc1e3037e0, [], SA_RESTORER|SA_SIGINFO, 0x7fdc1e30c5e0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x7fdc1e303870, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7fdc1e30c5e0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
statfs("/sys/fs/selinux", {f_type=SELINUX_MAGIC, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_RELATIME}) = 0
statfs("/sys/fs/selinux", {f_type=SELINUX_MAGIC, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_RELATIME}) = 0
stat("/sys/fs/selinux", {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
brk(NULL) = 0x10aa000
brk(0x10cb000) = 0x10cb000
access("/etc/selinux/config", F_OK) = 0
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106070960, ...}) = 0
mmap(NULL, 106070960, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fdc17dd4000
close(3) = 0
open("/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=2502, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f79b000
read(3, "# Locale name alias data base.\n#"..., 4096) = 2502
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7fdc1f79b000, 4096) = 0
open("/usr/lib/locale/UTF-8/LC_CTYPE", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TIOCGWINSZ, {ws_row=70, ws_col=181, ws_xpixel=0, ws_ypixel=0}) = 0
stat("/", {st_mode=S_IFDIR|0555, st_size=4096, ...}) = 0
openat(AT_FDCWD, "/", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 28 entries */, 32768) = 736
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 7), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fdc1f79b000
write(1, "bin boot data deployment dev"..., 163bin boot data deployment dev etc home lib lib64 media mnt nohup.out opt proc productdata root run sbin srv sys testdata tmp usr var workspace
lgetxattr("/testdata",
可以看到,命令执行到 /testdata,停止不动了
也可以使用cat /proc/mounts 查看当前mount状态,发现确实有testdata目录的记录
通过mount 命令查看到/testdata挂载一个nfs目录,似乎这个目录已经不暴露了,解决方案自然是umount这目录
当操作时发现 umount /testdata也是卡死,于是执行如下命令:
umount -lf /testdata
问题得以解决
本文为Lokie.Wang原创文章,转载无需和我联系,但请注明来自lokie博客http://lokie.wang