<span class="Apple-style-span" style="border-collapse: collapse; font-family: arial, sans-serif; font-size: 13px; ">Hi,<div><br></div><div>Does anyone know why mkfs.lustre hangs the system? This problem occurred on Lustre 1.8.5 SUSE Linux 11 x86_64 version, h/w platform is Amazon EC2. Here's the full strace log: </div>
<div><br></div><div><div>ldiskfs # strace mkfs.lustre --fsname=temp --mgs --mdt /dev/sdf</div><div>execve("/usr/sbin/mkfs.lustre", ["mkfs.lustre", "--fsname=temp", "--mgs", "--mdt ", "/dev/sdf"], [/* 51 vars */]) = 0</div>
<div>brk(0) = 0x612000</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124b4000</div><div>access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)</div>
<div>open("/etc/ld.so.cache", O_RDONLY) = 3</div><div>fstat(3, {st_mode=S_IFREG|0644, st_size=50926, ...}) = 0</div><div>mmap(NULL, 50926, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f30124a7000</div><div>close(3) = 0</div>
<div>open("/lib64/libc.so.6", O_RDONLY) = 3</div><div>read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\354\1\0\0\0\0\0"..., 832) = 832</div>
<div>fstat(3, {st_mode=S_IFREG|0755, st_size=1661454, ...}) = 0</div><div>mmap(NULL, 3528776, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f 3011f39000</div><div>fadvise64(3, 0, 3528776, POSIX_FADV_WILLNEED) = 0</div>
<div>mprotect(0x7f301208d000, 2097152, PROT_NONE) = 0</div><div>mmap(0x7f301228d000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENY WRITE, 3, 0x154000) = 0x7f301228d000</div>
<div>mmap(0x7f<a href="tel:3012292000%2C%2018504" value="+13012292000" target="_blank" style="color: rgb(119, 153, 187); ">3012292000, 18504</a>, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANON YMOUS, -1, 0) = 0x7f<a href="tel:3012292000" value="+13012292000" target="_blank" style="color: rgb(119, 153, 187); ">3012292000</a></div>
<div>close(3) = 0</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124a6000</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124a5000</div>
<div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124a4000</div><div>arch_prctl(ARCH_SET_FS, 0x7f30124a5700) = 0</div><div>mprotect(0x7f301228d000, 16384, PROT_READ) = 0</div>
<div>mprotect(0x608000, 4096, PROT_READ) = 0</div><div>mprotect(0x7f30124b5000, 4096, PROT_READ) = 0</div><div>munmap(0x7f30124a7000, 50926) = 0</div><div>open("/proc/sys/kernel/osrelease", O_RDONLY) = 3</div>
<div>read(3, "2.6.", 4) = 4</div><div>close(3) = 0</div><div>access("/dev/sdf", F_OK) = 0</div><div>stat("/dev/sdf", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 80), ...}) = 0</div>
<div>access("/usr/sbin/l_getgroups", R_OK|X_OK) = 0</div><div>fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124b3000</div>
<div>write(1, "\n", 1</div><div>) = 1</div><div>write(1, " Permanent disk data:\n", 24 Permanent disk data:</div><div>) = 24</div><div>write(1, "Target: temp-MDTffff\n", 25Target: temp-MDTffff</div>
<div>) = 25</div><div>write(1, "Index: unassigned\n", 23Index: unassigned</div><div>) = 23</div><div>write(1, "Lustre FS: temp\n", 17Lustre FS: temp</div><div>) = 17</div><div>write(1, "Mount type: ldiskfs\n", 20Mount type: ldiskfs</div>
<div>) = 20</div><div>write(1, "Flags: 0x75\n", 17Flags: 0x75</div><div>) = 17</div><div>write(1, " (MDT MGS needs_ind"..., 55 (MDT MGS needs_i ndex first_time update )</div>
<div>) = 55</div><div>write(1, "Persistent mount opts: iopen_nop"..., 65Persistent mount opts: iopen_n opriv,user_xattr,errors=remount-ro</div><div>) = 65</div><div>write(1, "Parameters: mdt.group_upcall=/us"..., 51Parameters: mdt.group_upcall=/ usr/sbin/l_getgroups</div>
<div>) = 51</div><div>write(1, "\n", 1</div><div>) = 1</div><div>brk(0) = 0x612000</div><div>brk(0x633000) = 0x633000</div><div>open("/etc/mtab", O_RDONLY) = 3</div>
<div>fstat(3, {st_mode=S_IFREG|0644, st_size=349, ...}) = 0</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124b2000</div><div>read(3, "/dev/sda1 / ext3 rw,acl,user_xat"..., 4096) = 349</div>
<div>read(3, "", 4096) = 0</div><div>close(3) = 0</div><div>munmap(0x7f30124b2000, 4096) = 0</div><div>syscall_293(0x7fff65afd9c0, 0x80000, 0x406af9, 0x7f30124a5700, 0, 0x612100, 0x61 1820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x61 1820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x61 1820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x61 1820, 0x611820) = 0</div>
<div>clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, chil d_tidptr=0x7f30124a59d0) = 3978</div><div>close(4) = 0</div>
<div>fcntl(3, F_SETFD, 0) = 0</div><div>fstat(3, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124b2000</div>
<div>read(3, "", 4096) = 0</div><div>--- SIGCHLD (Child exited) @ 0 (0) ---</div><div>read(3, "", 4096) = 0</div><div>close(3) = 0</div>
<div>wait4(3978, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], 0, NULL) = 3978</div><div>munmap(0x7f30124b2000, 4096) = 0</div><div>syscall_293(0x7fff65afd9c0, 0x80000, 0x406af9, 0x7f30124a5700, 0x1, 0x612100, 0x 611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x 611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x 611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x 611820, 0x611820) = 0</div>
<div>clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, chil d_tidptr=0x7f30124a59d0) = 3981</div><div>close(4) = 0</div>
<div>fcntl(3, F_SETFD, 0) = 0</div><div>fstat(3, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124b2000</div>
<div>read(3, "", 4096) = 0</div><div>--- SIGCHLD (Child exited) @ 0 (0) ---</div><div>read(3, "", 4096) = 0</div><div>close(3) = 0</div>
<div>wait4(3981, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], 0, NULL) = 3981</div><div>munmap(0x7f30124b2000, 4096) = 0</div><div>write(1, "checking for existing Lustre dat"..., 45checking for existing Lustre d ata: not found</div>
<div>) = 45</div><div>open("/dev/sdf", O_RDONLY) = 3</div><div>ioctl(3, BLKGETSIZE64, 0x7fff65afc910) = 0</div><div>close(3) = 0</div><div>write(1, "device size = 5120MB\n", 21device size = 5120MB</div>
<div>) = 21</div><div>syscall_293(0x7fff65afb870, 0x80000, 0x406af9, 0x7f30124a5700, 0x1, 0x612100, 0x 609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x 609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x 609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x 609210, 0x609210) = 0</div>
<div>clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, chil d_tidptr=0x7f30124a59d0) = 3984</div><div>close(4) = 0</div>
<div>fcntl(3, F_SETFD, 0) = 0</div><div>fstat(3, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0</div><div>mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3 0124b2000</div>
<div>read(3, "debugfs 1.41.9 (22-Aug-2009)\n", 4096) = 29</div><div>read(3, "Supported feature: uninit_groups"..., 4096) = 33</div><div>read(3, "", 4096) = 0</div><div>uname({sys="Linux", node="ip-10-196-34-143", ...}) = 0</div>
<div>write(1, "2 6 32\n", 72 6 32</div><div>) = 7</div><div>write(1, "formatting backing filesystem ld"..., 50formatting backing filesystem ldiskfs on /dev/sdf</div>
<div>) = 50</div><div>write(1, "\ttarget name temp-MDTffff\n", 27 target name temp-MDTffff</div><div>) = 27</div><div>write(1, "\t4k blocks 1310720\n", 23 4k blocks 1310720</div><div>
) = 23</div><div>write(1, "\toptions -J size=204 -i 4"..., 76 options -J size=2 04 -i 4096 -I 512 -q -O dir_index,uninit_groups -F</div><div>) = 76</div>
<div>write(1, "mkfs_cmd = mke2fs -j -b 4096 -L "..., 123mkfs_cmd = mke2fs -j -b 4096 -L temp-MDTffff -J size=204 -i 4096 -I 512 -q -O dir_index,uninit_groups -F /dev/sdf 1310720</div><div>) = 123</div><div>gettimeofday({1307462203, 738431}, NULL) = 0</div>
<div>getpid() = 3977</div><div>open("/tmp/run_command_logiIQsXq", O_RDWR|O_CREAT|O_EXCL, 0600) = 4</div><div>close(4) = 0</div><div>rt_sigaction(SIGINT, {0x1, [], SA_RESTORER, 0x7f3011f6b9e0}, {SIG_DFL, [], 0}, 8 ) = 0</div>
<div>rt_sigaction(SIGQUIT, {0x1, [], SA_RESTORER, 0x7f3011f6b9e0}, {SIG_DFL, [], 0}, 8) = 0</div><div>rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0</div><div>clone(child_stack=0, flags=CLONE_PARENT_SETTID|SIGCHLD, parent_tidptr=0x7fff65af c828) = 3986</div>
<div>wait4(3986, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 3986</div><div>rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x7f3011f6b9e0}, NULL, 8) = 0</div><div>rt_sigaction(SIGQUIT, {SIG_DFL, [], SA_RESTORER, 0x7f3011f6b9e0}, NULL, 8) = 0</div>
<div>rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0</div><div>--- SIGCHLD (Child exited) @ 0 (0) ---</div><div>unlink("/tmp/run_command_logiIQsXq") = 0</div><div>mkdir("/tmp/mntFdZiJN", 0700) = 0</div>
<div>mount("/dev/sdf", "/tmp/mntFdZiJN", "ldiskfs", 0, "iopen_nopriv,user_xattr,error s=r"...</div><div><br></div></div><div><br></div><div>Thanks,</div>
<div>-Jon</div><div><a href="mailto:jon.zhu@gmail.com" target="_blank" style="color: rgb(119, 153, 187); ">jon.zhu@gmail.com</a></div></span>