[lustre-discuss] Backup software for Lustre

Brett Lee brettlee.lustre at gmail.com
Sun Mar 19 08:45:39 PDT 2017


Sure, happy to help.  I did not see mknod+setxattr in the strace output.
Included is a trimmed version of the strace output, along with a few more
bits of information.  Thanks!

# cat /proc/fs/lustre/version
lustre: 2.7.19.8
# cat /etc/redhat-release
CentOS Linux release 7.3.1611 (Core)
# uname -r
3.10.0-514.2.2.el7_lustre.x86_64
# rpm -qa|grep tar
tar-1.26-31.el7.x86_64
# sha1sum `which tar` `which gtar`
ea17ec98894212b2e2285eb2dd99aad76185ea7d  /usr/bin/tar
ea17ec98894212b2e2285eb2dd99aad76185ea7d  /usr/bin/gtar

Striping was set on the four directories before creating the files.
mkdir -p /scratch/1; lfs setstripe -c 1 --stripe-size 128K /scratch/1; lfs
getstripe /scratch/1
mkdir -p /scratch/2; lfs setstripe -c 2 --stripe-size 256K /scratch/2; lfs
getstripe /scratch/2
mkdir -p /scratch/3; lfs setstripe -c 3 --stripe-size 768K /scratch/3; lfs
getstripe /scratch/3
mkdir -p /scratch/4; lfs setstripe -c 4 --stripe-size 1M    /scratch/4; lfs
getstripe /scratch/4
After tar, all files and directories all had the default Lustre striping.

# tar ztvf /scratch.tgz
drwxr-xr-x root/root         0 2017-03-19 10:54 scratch/
drwxr-xr-x root/root         0 2017-03-19 10:57 scratch/4/
-rw-r--r-- root/root   4194304 2017-03-19 10:57 scratch/4/4.dd
drwxr-xr-x root/root         0 2017-03-19 10:57 scratch/3/
-rw-r--r-- root/root   4194304 2017-03-19 10:57 scratch/3/3.dd
drwxr-xr-x root/root         0 2017-03-19 10:57 scratch/1/
-rw-r--r-- root/root   4194304 2017-03-19 10:57 scratch/1/1.dd
drwxr-xr-x root/root         0 2017-03-19 10:57 scratch/2/
-rw-r--r-- root/root   4194304 2017-03-19 10:57 scratch/2/2.dd

# strace tar zxvf /scratch.tgz > strace.out 2>&1
execve("/usr/bin/tar", ["tar", "zxvf", "/scratch.tgz"], [/* 22 vars */]) = 0
...
(-cut - loading libraries)
...
fstat(1, {st_mode=S_IFREG|0644, st_size=10187, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x7f4a63d9f000
write(1, "scratch/\n", 9scratch/
)               = 9
mkdirat(AT_FDCWD, "scratch", 0700)      = -1 EEXIST (File exists)
newfstatat(AT_FDCWD, "scratch", {st_mode=S_IFDIR|0755, st_size=4096, ...},
AT_SYMLINK_NOFOLLOW) = 0
write(1, "scratch/4/\n", 11scratch/4/
)            = 11
mkdirat(AT_FDCWD, "scratch/4", 0700)    = 0
write(1, "scratch/4/4.dd\n", 15scratch/4/4.dd
)        = 15
openat(AT_FDCWD, "scratch/4/4.dd",
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
O_WRONLY|O_CREAT|O_EXCL|O_NOCTTY|O_NONBLOCK|O_CLOEXEC, 0600) = 4
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
5632) = 5632
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
5632) = 5632
...
(-cut)
...
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512)
= 512
dup2(4, 4)                              = 4
fstat(4, {st_mode=S_IFREG|0600, st_size=4194304, ...}) = 0
utimensat(4, NULL, {{1489935825, 0}, {1489935444, 0}}, 0) = 0
fchown(4, 0, 0)                         = 0
fchmod(4, 0644)                         = 0
close(4)                                = 0
write(1, "scratch/3/\n", 11scratch/3/
)            = 11
newfstatat(AT_FDCWD, "scratch/4", {st_mode=S_IFDIR|0700, st_size=4096,
...}, AT_SYMLINK_NOFOLLOW) = 0
utimensat(AT_FDCWD, "scratch/4", {{1489935825, 0}, {1489935444, 0}},
AT_SYMLINK_NOFOLLOW) = 0
fchownat(AT_FDCWD, "scratch/4", 0, 0, AT_SYMLINK_NOFOLLOW) = 0
fchmodat(AT_FDCWD, "scratch/4", 0755)   = 0
mkdirat(AT_FDCWD, "scratch/3", 0700)    = 0
write(1, "scratch/3/3.dd\n", 15scratch/3/3.dd
)        = 15
openat(AT_FDCWD, "scratch/3/3.dd",
O_WRONLY|O_CREAT|O_EXCL|O_NOCTTY|O_NONBLOCK|O_CLOEXEC, 0600) = 4
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
6656) = 6656
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
...
(-cut - pick up with last file...)
...
d(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2476,
si_status=0, si_utime=7, si_stime=0} ---
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
10240) = 10240
write(4,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
7680) = 7680
dup2(4, 4)                              = 4
fstat(4, {st_mode=S_IFREG|0600, st_size=4194304, ...}) = 0
utimensat(4, NULL, {{1489935825, 0}, {1489935432, 0}}, 0) = 0
fchown(4, 0, 0)                         = 0
fchmod(4, 0644)                         = 0
close(4)                                = 0
clock_gettime(CLOCK_REALTIME, {1489935825, 628399394}) = 0
clock_gettime(CLOCK_REALTIME, {1489935825, 628414336}) = 0
close(3)                                = 0
wait4(2476, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 2476
newfstatat(AT_FDCWD, "scratch/2", {st_mode=S_IFDIR|0700, st_size=4096,
...}, AT_SYMLINK_NOFOLLOW) = 0
utimensat(AT_FDCWD, "scratch/2", {{1489935825, 0}, {1489935432, 0}},
AT_SYMLINK_NOFOLLOW) = 0
fchownat(AT_FDCWD, "scratch/2", 0, 0, AT_SYMLINK_NOFOLLOW) = 0
fchmodat(AT_FDCWD, "scratch/2", 0755)   = 0
newfstatat(AT_FDCWD, "scratch", {st_mode=S_IFDIR|0755, st_size=4096, ...},
0) = 0
utimensat(AT_FDCWD, "scratch", {{1489934977, 0}, {1489935261, 0}}, 0) = 0
fchownat(AT_FDCWD, "scratch", 0, 0, 0)  = 0
close(1)                                = 0
munmap(0x7f4a63d9f000, 4096)            = 0
close(2)                                = 0
exit_group(0)                           = ?
+++ exited with 0 +++


Brett
--
Protect Yourself Against Cybercrime
PDS Software Solutions LLC
https://www.TrustPDS.com <https://www.trustpds.com/>

On Sun, Mar 19, 2017 at 7:39 AM, Dilger, Andreas <andreas.dilger at intel.com>
wrote:

> I ran a test locally with RHEL 6.8 and the included tar 1.26 using strace,
> and tar is properly using mknod+setxattr to restore the "lov" xattr, and
> the stripe count and stripe size to be preserved.
>
> The OST index is not preserved with the xattr restore, since that may
> cause imbalance if the  files were backed up in a different filesystem
> (e.g. one with fewer OSTs).  The MDS will balance OST allocation as needed
> for the current OST usage.
>
> Could you please run your tar on RHEL 7 with strace to see if it is doing
> this correctly?
>
> Cheers, Andreas
>
> On Mar 18, 2017, at 21:51, Brett Lee <brettlee.lustre at gmail.com> wrote:
>
> Hi Andreas, I expected that to be the case, but found out it was not.
> Instead, the restore restores everything - unless directed otherwise.
>
> Backup == cmd + add xattrs.
> Restore == cmd + exclude xattrs.
>
> Brett
> --
> Protect Yourself Against Cybercrime
> PDS Software Solutions LLC
> https://www.TrustPDS.com
> On Mar 18, 2017 9:28 PM, "Dilger, Andreas" <andreas.dilger at intel.com>
> wrote:
>
>> Do you need to specify --xattrs (or similar) during the restore phase as
>> well?
>>
>> Cheers, Andreas
>>
>> On Mar 17, 2017, at 15:12, Brett Lee <brettlee.lustre at gmail.com> wrote:
>>
>> Hi.  In what I thought was a valid test, I was unable to confirm that a
>> backup and restore retained the layouts.  Perhaps my expectation or process
>> was incorrect?  The process was:
>>
>> 1.  Create 4 files, each with different stripe sizes and stripe counts
>> (verified with getstripe).
>> 2.  Back up the files using tar-1.26-31.el7.x86_64.
>> 3.  Recreate a file system and restore the files.
>>
>> Backup command:  tar --xattrs -zcvf /scratch.tgz /scratch
>> Restore command:  tar zxvf /scratch.tgz
>>
>> After restoration, getstripe showed that each file had the default stripe
>> count (1) and stripe size (1MB).
>> FWIW:  After restoring, getfattr produced the same result for each file:
>> # getfattr -d -m - -R <file>
>> lustre.lov=0s0AvRCwEAAAAdAAAAAAAAAAAEAAACAAAAAAAQAAEAAAAFAAA
>> AAAAAAAAAAAAAAAAAAAAAAAAAAAA=
>> trusted.link=0s3/HqEQEAAAAuAAAAAAAAAAAAAAAAAAAAABYAAAACAAAEA
>> AAAAAUAAAAAMS5kZA==
>> trusted.lma=0sAAAAAAAAAAAABAAAAgAAAB0AAAAAAAAA
>> trusted.lov=0s0AvRCwEAAAAdAAAAAAAAAAAEAAACAAAAAAAQAAEAAAAFAA
>> AAAAAAAAAAAAAAAAAAAAAAAAAAAAA=
>>
>> Brett
>> --
>> Protect Yourself Against Cybercrime
>> PDS Software Solutions LLC
>> https://www.TrustPDS.com <https://www.trustpds.com/>
>>
>> On Wed, Mar 15, 2017 at 5:03 AM, Dilger, Andreas <
>> andreas.dilger at intel.com> wrote:
>>
>>> I believe Zmanda is already using GNU tar (or RHEL tar) for the actual
>>> backup storage?  I that case it should already work, since we fixed tar
>>> long ago to backup and restore xattrs in a way that preserves Lustre
>>> layouts.
>>>
>>> Cheers, Andreas
>>>
>>> On Mar 14, 2017, at 15:47, Brett Lee <brettlee.lustre at gmail.com> wrote:
>>>
>>> Thanks for the details, Andreas!
>>>
>>> Maybe OpenSFS can fund Zmanda so that their backup software can include
>>> the Lustre metadata... :)
>>>
>>> Brett
>>> --
>>> Protect Yourself Against Cybercrime
>>> PDS Software Solutions LLC
>>> https://www.TrustPDS.com <https://www.trustpds.com/>
>>>
>>> On Tue, Mar 14, 2017 at 3:13 PM, Dilger, Andreas <
>>> andreas.dilger at intel.com> wrote:
>>>
>>>> To reply to this old thread, there are two different kinds of Lustre
>>>> backup solutions:
>>>> - file level backups that traverse the client POSIX filesystem, for
>>>> which any number of
>>>>   commercial solutions exist.  Making these solutions "capable of
>>>> saving Lustre metadata"
>>>>   boils down to two simple things - save the "lustre.lov" xattr during
>>>> backup (at a minimum,
>>>>   other xattrs also should be backed up), and then using mknod(2) +
>>>> setxattr() to restore
>>>>   the "lustre.lov" xattr before opening the file and restoring the data.
>>>>
>>>> - device level backups (e.g. "dd" for ldiskfs, and "zfs send/recv" for
>>>> ZFS).
>>>>
>>>> Using the file level backups allows backup/restore of subsets of the
>>>> filesystem, since many
>>>> HPC sites have Lustre filesystems that are too large to backup
>>>> completely.  I typically do
>>>> not recommend to use device-level backups for the OSTs, unless doing an
>>>> OST hardware migration,
>>>> and even then it is probably less disruptive to do Lustre-level file
>>>> migration off the OST
>>>> before swapping it out.
>>>>
>>>> Whether file level backups are used or not, I would recommend sites
>>>> always make periodic
>>>> device level backups of the MDT(s).  The amount of space needed for an
>>>> MDT backup is small
>>>> compared to the rest of the filesystem (e.g. a few TB at most), and can
>>>> avoid the need for
>>>> a full filesystem restore (e.g. multi-PB of data, if a full backup
>>>> exists at all) even
>>>> though all the data is still available on the OSTs.
>>>>
>>>> The MDT device-level backup can use relatively slow SATA drives, since
>>>> they will mostly be
>>>> used for linear writes (or occasionally linear reads for restore), so a
>>>> few multi-TB SATA III
>>>> drives is sufficient for storing a rotating set of MDT device backups.
>>>> At 150MB/s for even
>>>> a single SATA drive, this is about 2h/TB, which is reasonable to do
>>>> once a week (or more often
>>>> for smaller MDTs).
>>>>
>>>> While using an LVM snapshot of the ldiskfs MDT for the backup source is
>>>> desirable for consistency
>>>> reasons, having even an MDT backup from a mounted and in-use MDT is
>>>> better than nothing at
>>>> all when a problem is hit, since e2fsck can repair the in-use
>>>> inconsistencies fairly easily,
>>>> and Lustre can deal with inconsistencies between the MDT and OST
>>>> reasonably (at most returning
>>>> an -ENOENT error to the client for files that were deleted).
>>>>
>>>> Cheers, Andreas
>>>>
>>>> On Feb 7, 2017, at 12:32, Andrew Holway <andrew.holway at gmail.com>
>>>> wrote:
>>>> >
>>>> > Would it be difficult to suspend IO and snapshot all the nodes
>>>> (assuming ZFS). Could you be sure that your MDS and OSS are synchronised?
>>>> >
>>>> > On 7 February 2017 at 19:52, Mike Selway <mselway at cray.com> wrote:
>>>> >> Hello Brett,
>>>> >>
>>>> >>                Actually, looking for someone who uses a
>>>> commercialized approach (that retains user metadata and Lustre extended
>>>> metadata) and not specifically the manual approaches of Chapter 17.
>>>> >>
>>>> >> Thanks!
>>>> >> Mike
>>>> >>
>>>> >> Mike Selway | Sr. Tiered Storage Architect | Cray Inc.
>>>> >> Work +1-301-332-4116 | mselway at cray.com
>>>> >> 146 Castlemaine Ct,   Castle Rock,  CO  80104 | www.cray.com
>>>> >>
>>>> >>
>>>> >>> From: Brett Lee [mailto:brettlee.lustre at gmail.com]
>>>> >>> Sent: Monday, February 06, 2017 11:45 AM
>>>> >>> To: Mike Selway <mselway at cray.com>
>>>> >>> Cc: lustre-discuss at lists.lustre.org
>>>> >>> Subject: Re: [lustre-discuss] Backup software for Lustre
>>>> >>>
>>>> >>> Hey Mike,
>>>> >>>
>>>> >>> "Chapter 17" and
>>>> >>> http://www.intel.com/content/www/us/en/lustre/backup-and-res
>>>> tore-training.html
>>>> >>>
>>>> >>> both contain methods to backup & restore the entire Lustre file
>>>> system.
>>>> >>>
>>>> >>> Are you looking for a solution that backs up only the (user) data
>>>> files and their associated metadata (e.g. xattrs)?
>>>> >>>
>>>> >>> Brett
>>>> >>> --
>>>> >>> Protect Yourself From Cybercrime
>>>> >>> PDS Software Solutions LLC
>>>> >>> https://www.TrustPDS.com
>>>> >>>
>>>> >>>> On Mon, Feb 6, 2017 at 11:12 AM, Mike Selway <mselway at cray.com>
>>>> wrote:
>>>> >>>>
>>>> >>>> Hello,
>>>> >>>>          Anyone aware of and/or using a Backup software package to
>>>> protect their LFS environment (not referring to the tools/scripts suggested
>>>> in Chapter 17).
>>>> >>>>
>>>> >>>> Regards,
>>>> >>>> Mike
>>>>
>>>> Cheers, Andreas
>>>> --
>>>> Andreas Dilger
>>>> Lustre Principal Architect
>>>> Intel Corporation
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> lustre-discuss mailing list
>>>> lustre-discuss at lists.lustre.org
>>>> http://lists.lustre.org/listinfo.cgi/lustre-discuss-lustre.org
>>>>
>>>
>>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.lustre.org/pipermail/lustre-discuss-lustre.org/attachments/20170319/684862d4/attachment-0001.htm>


More information about the lustre-discuss mailing list