diff --git a/tests/funcs/exec.sh b/tests/funcs/exec.sh index 379f1ea5..06d4c96b 100644 --- a/tests/funcs/exec.sh +++ b/tests/funcs/exec.sh @@ -39,6 +39,18 @@ t_quiet() t_fail "quiet command failed" } +# +# Quietly run a command during a test. The output is logged but only +# the return code is printed, presumably because the output contains +# a lot of invocation specific text that is difficult to filter. +# +t_rc() +{ + echo "# $*" >> "$T_TMP.rc.log" + "$@" >> "$T_TMP.rc.log" 2>&1 + echo "rc: $?" +} + # # redirect test output back to the output of the invoking script intead # of the compared output. diff --git a/tests/golden/change-devices b/tests/golden/change-devices new file mode 100644 index 00000000..46f68e48 --- /dev/null +++ b/tests/golden/change-devices @@ -0,0 +1,27 @@ +== make tmp sparse data dev files +== make scratch fs +== small new data device fails +rc: 1 +== check sees data device errors +rc: 1 +rc: 0 +== preparing while mounted fails +rc: 1 +== preparing without recovery fails +rc: 1 +== check sees metadata errors +rc: 1 +rc: 1 +== preparing with file data fails +rc: 1 +== preparing after emptied +rc: 0 +== checks pass +rc: 0 +rc: 0 +== using prepared +== preparing larger and resizing +rc: 0 +equal_prepared +large_prepared +resized larger test rc: 0 diff --git a/tests/sequence b/tests/sequence index 9a5298a0..0bbf6315 100644 --- a/tests/sequence +++ b/tests/sequence @@ -36,6 +36,7 @@ cross-mount-data-free.sh persistent-item-vers.sh setup-error-teardown.sh resize-devices.sh +change-devices.sh fence-and-reclaim.sh orphan-inodes.sh mount-unmount-race.sh diff --git a/tests/tests/basic-bad-mounts.sh b/tests/tests/basic-bad-mounts.sh index f7bd80c8..b8710b04 100644 --- a/tests/tests/basic-bad-mounts.sh +++ b/tests/tests/basic-bad-mounts.sh @@ -12,7 +12,7 @@ mount_fail() } echo "== prepare devices, mount point, and logs" -SCR="/mnt/scoutfs.extra" +SCR="$T_TMPDIR/mnt.scratch" mkdir -p "$SCR" > $T_TMP.mount.out scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \ diff --git a/tests/tests/change-devices.sh b/tests/tests/change-devices.sh new file mode 100644 index 00000000..c346161f --- /dev/null +++ b/tests/tests/change-devices.sh @@ -0,0 +1,76 @@ +# +# test changing devices +# + +echo "== make tmp sparse data dev files" +sz=$(blockdev --getsize64 "$T_EX_DATA_DEV") +large_sz=$((sz * 2)) +touch "$T_TMP."{small,equal,large} +truncate -s 1MB "$T_TMP.small" +truncate -s $sz "$T_TMP.equal" +truncate -s $large_sz "$T_TMP.large" + +echo "== make scratch fs" +t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" +SCR="$T_TMPDIR/mnt.scratch" +mkdir -p "$SCR" + +echo "== small new data device fails" +t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.small" + +echo "== check sees data device errors" +t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.small" +t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" + +echo "== preparing while mounted fails" +mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR" +t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal" +umount "$SCR" + +echo "== preparing without recovery fails" +mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR" +umount -f "$SCR" +t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal" + +echo "== check sees metadata errors" +t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" +t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal" + +echo "== preparing with file data fails" +mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR" +echo hi > "$SCR"/file +umount "$SCR" +scoutfs print "$T_EX_META_DEV" > "$T_TMP.print" +t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal" + +echo "== preparing after emptied" +mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR" +rm -f "$SCR"/file +umount "$SCR" +t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal" + +echo "== checks pass" +t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" +t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal" + +echo "== using prepared" +scr_loop=$(losetup --find --show "$T_TMP.equal") +mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR" +touch "$SCR"/equal_prepared +equal_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR") +umount "$SCR" +losetup -d "$scr_loop" + +echo "== preparing larger and resizing" +t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.large" +scr_loop=$(losetup --find --show "$T_TMP.large") +mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR" +touch "$SCR"/large_prepared +ls "$SCR" +scoutfs resize-devices -p "$SCR" -d $large_sz +large_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR") +test "$large_tot" -gt "$equal_tot" ; echo "resized larger test rc: $?" +umount "$SCR" +losetup -d "$scr_loop" + +t_pass diff --git a/tests/tests/enospc.sh b/tests/tests/enospc.sh index ab042479..277bfae0 100644 --- a/tests/tests/enospc.sh +++ b/tests/tests/enospc.sh @@ -59,7 +59,7 @@ echo "== make small meta fs" # meta device just big enough for reserves and the metadata we'll fill scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \ t_fail "mkfs failed" -SCR="/mnt/scoutfs.enospc" +SCR="$T_TMPDIR/mnt.scratch" mkdir -p "$SCR" mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \ "$T_EX_DATA_DEV" "$SCR" diff --git a/tests/tests/resize-devices.sh b/tests/tests/resize-devices.sh index 9bd91476..cf7adb67 100644 --- a/tests/tests/resize-devices.sh +++ b/tests/tests/resize-devices.sh @@ -73,7 +73,7 @@ echo "== make initial small fs" scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m $quarter_meta -d $quarter_data \ "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \ t_fail "mkfs failed" -SCR="/mnt/scoutfs.enospc" +SCR="$T_TMPDIR/mnt.scratch" mkdir -p "$SCR" mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \ "$T_EX_DATA_DEV" "$SCR" diff --git a/utils/man/scoutfs.8 b/utils/man/scoutfs.8 index a8a517ff..8cfd2af1 100644 --- a/utils/man/scoutfs.8 +++ b/utils/man/scoutfs.8 @@ -76,6 +76,97 @@ run when the file system will not be mounted. .RE .PD +.TP +.BI "counters [-t|--table] SYSFS-DIR" +.sp +Display the counters and their values for a mounted ScoutFS filesystem. +.RS 1.0i +.PD 0 +.sp +.TP +.B SYSFS-DIR +The mount's sysfs directory in which to find the +.B counters/ +directory when then contains files for each counter. +The sysfs directory is +of the form +.I /sys/fs/scoutfs/f..r./ +\&. +.TP +.B "-t, --table" +Format the counters into a columnar table that fills the width of the display +instead of printing one counter per line. +.RE +.PD + +.TP +.BI "data-waiting {-I|--inode} INODE-NUM {-B|--block} BLOCK-NUM [-p|--path PATH]" +.sp +Display all the files and blocks for which there is a task blocked waiting on +offline data. +.sp +The results are sorted by the file's inode number and the +logical block offset that is being waited on. +.sp +Each line of output describes a block in a file that has a task waiting +and is formatted as: +.I "ino iblock ops [str]" +\&. The ops string indicates blocked operations seperated by commas and can +include +.B read +for a read operation, +.B write +for a write operation, and +.B change_size +for a truncate or extending write. +.RS 1.0i +.PD 0 +.sp +.TP +.B "-I, --inode INODE-NUM" +Start iterating over waiting tasks from the given inode number. +Value of 0 will show all waiting tasks. +.TP +.B "-B, --block BLOCK-NUM" +Start iterating over waiting tasks from the given logical block number +in the starting inode. Value of 0 will show blocks in the first inode +and then continue to show all blocks with tasks waiting in all the +remaining inodes. +.TP +.B "-p, --path PATH" +A path within a ScoutFS filesystem. +.RE +.PD + +.TP +.BI "data-wait-err {-I|--inode} INODE-NUM {-V|--version} VER-NUM {-F|--offset} OFF-NUM {-C|--count} COUNT {-O|--op} OP {-E|--err} ERR [-p|--path PATH]" +.sp +Return error from matching waiters. +.RS 1.0i +.PD 0 +.sp +.TP +.B "-C, --count COUNT" +Count. +.TP +.B "-E, --err ERR" +Error. +.TP +.B "-F, --offset OFF-NUM" +Offset. May be expressed in bytes, or with KMGTP (Kibi, Mibi, etc.) size +suffixes. +.TP +.B "-I, --inode INODE-NUM" +Inode number. +.TP +.B "-O, --op OP" +Operation. One of: "read", "write", "change_size". +.TP +.B "-p, --path PATH" +A path within a ScoutFS filesystem. +.RE +.PD + .TP .BI "df [-h|--human-readable] [-p|--path PATH]" .sp @@ -93,6 +184,72 @@ A path within a ScoutFS filesystem. .RE .PD +.TP +.BI "get-allocated-inos [-i|--ino INO] [-s|--single] [-p|--path PATH]" +.sp +This debugging command prints allocated inode numbers. It only prints +inodes +found in the group that contains the starting inode. The printed inode +numbers aren't necessarily reachable. They could be anywhere in the +process from being unlinked to finally deleted when their items +were found. +.RS 1.0i +.PD 0 +.TP +.sp +.B "-i, --ino INO" +The first 64bit inode number which could be printed. +.TP +.B "-s, --single" +Only print the single starting inode when it is allocated, all other allocated +inode numbers will be ignored. +.TP +.B "-p, --path PATH" +A path within a ScoutFS filesystem. +.RE +.PD + +.TP +.BI "ino-path INODE-NUM [-p|--path PATH]" +.sp +Display all paths that reference an inode number. +.sp +Ongoing filesystem changes, such as renaming a common parent of multiple paths, +can cause displayed paths to be inconsistent. +.RS 1.0i +.PD 0 +.sp +.TP +.B "INODE-NUM" +The inode number of the target inode. +.TP +.B "-p|--path PATH" +A path within a ScoutFS filesystem. +.RE +.PD + +.TP +.BI "list-hidden-xattrs FILE" +.sp +Display extended attributes starting with the +.BR scoutfs. +prefix and containing the +.BR hide. +tag +which makes them invisible to +.BR listxattr (2) . +The names of each attribute are output, one per line. Their order +is not specified. +.RS 1.0i +.PD 0 +.TP +.sp +.B "FILE" +The path to a file within a ScoutFS filesystem. File permissions must allow +reading. +.RE +.PD + .TP .BI "mkfs META-DEVICE DATA-DEVICE {-Q|--quorum-slot} NR,ADDR,PORT [-m|--max-meta-size SIZE] [-d|--max-data-size SIZE] [-z|--data-alloc-zone-blocks BLOCKS] [-f|--force] [-A|--allow-small-size] [-V|--format-version VERS]" .sp @@ -171,6 +328,79 @@ The range of supported versions is visible in the output of .RE .PD +.TP +.BI "prepare-empty-data-device {-c|--check} META-DEVICE DATA-DEVICE" +.sp +Prepare an unused device for use as the data device for an existing file +system. This will write an initialized super block to the specified +data device, destroying any existing contents. The specified metadata +device will not be modified. The file system must be fully unmounted +and any client mount recovery must be complete. +.sp +The existing metadata device is read to ensure that it's safe to stop +using the old data device. The data block allocators must indicate that +all data blocks are free. If there are still data blocks referenced by +files then the command will fail. The contents of these files must be +freed for the command to proceed. +.sp +A new super block is written to the new data device. The device can +then be used as the data device to mount the file system. As this +switch is made all client mounts must refer to the new device. The old +device is not modified and still contains a valid data super block that +could be mounted, creating data device writes that wouldn't be read by +mounts using the new device. +.sp +The number of data blocks available to the file system will not change +as the new data device is used. The new device must be large enough to +store all the data blocks that were available on the old device. If the +new device is larger then its added capacity can be used by growing the +new data device with the resize-devices command once it is mounted. +.RS 1.0i +.PD 0 +.TP +.sp +.B "-c, --check" +Only check for errors that would prevent a new empty data device from +being used. No changes will be made to the data device. If the data +device is provided then its size will be checked to make sure that it is +large enough. This can be used to test the metadata for data references +before destroying an old empty data device. +.RE +.PD + +.TP +.BI "print {-S|--skip-likely-huge} META-DEVICE" +.sp +Prints out all of the metadata in the file system. This makes no effort +to ensure that the structures are consistent as they're traversed and +can present structures that seem corrupt as they change as they're +output. +.RS 1.0i +.PD 0 +.TP +.sp +.B "-S, --skip-likely-huge" +Skip printing structures that are likely to be very large. The +structures that are skipped tend to be global and whose size tends to be +related to the size of the volume. Examples of skipped structures include +the global fs items, srch files, and metadata and data +allocators. Similar structures that are not skipped are related to the +number of mounts and are maintained at a relatively reasonable size. +These include per-mount log trees, srch files, allocators, and the +metadata allocators used by server commits. +.sp +Skipping the larger structures limits the print output to a relatively +constant size rather than being a large multiple of the used metadata +space of the volume making the output much more useful for inspection. +.TP +.B "META-DEVICE" +The path to the metadata device for the filesystem whose metadata will be +printed. An attempt will be made to flush the host's buffer cache for +this device with the BLKFLSBUF ioctl, or with posix_fadvise() if +the path refers to a regular file. +.RE +.PD + .TP .BI "resize-devices [-p|--path PATH] [-m|--meta-size SIZE] [-d|--data-size SIZE]" .sp @@ -229,6 +459,92 @@ kibibytes, mebibytes, etc. .RE .PD +.TP +.BI "search-xattrs XATTR-NAME [-p|--path PATH]" +.sp +Display the inode numbers of inodes in the filesystem which may have +an extended attribute with the given name. +.sp +The results may contain false positives. The returned inode numbers +should be checked to verify that the extended attribute is in fact +present on the inode. +.RS 1.0i +.PD 0 +.TP +.sp +.B XATTR-NAME +The full name of the extended attribute to search for as +described in the +.BR xattr (7) +manual page. +.TP +.B "-p|--path PATH" +A path within a ScoutFS filesystem. +.RE +.PD + +.TP +.BI "setattr FILE [-d, --data-version=VERSION [-s, --size=SIZE [-o, --offline]]] [-t, --ctime=TIMESPEC]" +.sp +Set ScoutFS-specific attributes on a newly created zero-length file. +.RS 1.0i +.PD 0 +.sp +.TP +.B "-V, --data-version=VERSION" +Set data version. +.TP +.B "-o, --offline" +Set file contents as offline, not sparse. Requires +.I --size +option also be present. +.TP +.B "-s, --size=SIZE" +Set file size. May be expressed in bytes, or with +KMGTP (Kibi, Mibi, etc.) size suffixes. Requires +.I --data-version +option also be present. +.TP +.B "-t, --ctime=TIMESPEC" +Set creation time using +.I "." +format. +.RE +.PD + +.TP +.BI "stage ARCHIVE-FILE FILE {-V|--version} VERSION [-o, --offset OFF-NUM] [-l, --length LENGTH]" +.sp +.B Stage +(i.e. return to online) the previously-offline contents of a file by copying a +region from another file, the archive, and without updating regular inode +metadata. Any operations that are blocked by the existence of an offline +region will proceed once the region has been staged. +.RS 1.0i +.PD 0 +.TP +.sp +.B "ARCHIVE-FILE" +The source file for the file contents being staged. +.TP +.B "FILE" +The regular file whose contents will be staged. +.TP +.B "-V, --version VERSION" +The data_version of the contents to be staged. It must match the +current data_version of the file. +.TP +.B "-o, --offset OFF-NUM" +The starting byte offset of the region to write. May be expressed in bytes, or with +KMGTP (Kibi, Mibi, etc.) size suffixes. Default is 0. +.TP +.B "-l, --length LENGTH" +Length of range (bytes or KMGTP units) of file to stage. Default is the file's +total size. +.RE +.PD + +.TP .BI "stat FILE [-s|--single-field FIELD-NAME]" .sp Display ScoutFS-specific metadata fields for the given file. @@ -314,221 +630,6 @@ The total number of 4K data blocks in the filesystem. .RE .PD -.TP -.BI "counters [-t|--table] SYSFS-DIR" -.sp -Display the counters and their values for a mounted ScoutFS filesystem. -.RS 1.0i -.PD 0 -.sp -.TP -.B SYSFS-DIR -The mount's sysfs directory in which to find the -.B counters/ -directory when then contains files for each counter. -The sysfs directory is -of the form -.I /sys/fs/scoutfs/f..r./ -\&. -.TP -.B "-t, --table" -Format the counters into a columnar table that fills the width of the display -instead of printing one counter per line. -.RE -.PD - -.TP -.BI "search-xattrs XATTR-NAME [-p|--path PATH]" -.sp -Display the inode numbers of inodes in the filesystem which may have -an extended attribute with the given name. -.sp -The results may contain false positives. The returned inode numbers -should be checked to verify that the extended attribute is in fact -present on the inode. -.RS 1.0i -.PD 0 -.TP -.sp -.B XATTR-NAME -The full name of the extended attribute to search for as -described in the -.BR xattr (7) -manual page. -.TP -.B "-p|--path PATH" -A path within a ScoutFS filesystem. -.RE -.PD - -.TP -.BI "list-hidden-xattrs FILE" -.sp -Display extended attributes starting with the -.BR scoutfs. -prefix and containing the -.BR hide. -tag -which makes them invisible to -.BR listxattr (2) . -The names of each attribute are output, one per line. Their order -is not specified. -.RS 1.0i -.PD 0 -.TP -.sp -.B "FILE" -The path to a file within a ScoutFS filesystem. File permissions must allow -reading. -.RE -.PD - -.TP -.BI "walk-inodes {meta_seq|data_seq} FIRST-INODE LAST-INODE [-p|--path PATH]" -.sp -Walk an inode index in the file system and output the inode numbers -that are found between the first and last positions in the index. -.RS 1.0i -.PD 0 -.sp -.TP -.BR meta_seq , data_seq -Which index to walk. -.TP -.B "FIRST-INODE" -An integer index value giving starting position of the index walk. -.I 0 -is the first possible position. -.TP -.B "LAST-INODE" -An integer index value giving the last position to include in the index walk. -.I \-1 -can be given to indicate the last possible position. -.TP -.B "-p|--path PATH" -A path within a ScoutFS filesystem. -.RE -.PD - -.TP -.BI "ino-path INODE-NUM [-p|--path PATH]" -.sp -Display all paths that reference an inode number. -.sp -Ongoing filesystem changes, such as renaming a common parent of multiple paths, -can cause displayed paths to be inconsistent. -.RS 1.0i -.PD 0 -.sp -.TP -.B "INODE-NUM" -The inode number of the target inode. -.TP -.B "-p|--path PATH" -A path within a ScoutFS filesystem. -.RE -.PD - -.TP -.BI "data-waiting {-I|--inode} INODE-NUM {-B|--block} BLOCK-NUM [-p|--path PATH]" -.sp -Display all the files and blocks for which there is a task blocked waiting on -offline data. -.sp -The results are sorted by the file's inode number and the -logical block offset that is being waited on. -.sp -Each line of output describes a block in a file that has a task waiting -and is formatted as: -.I "ino iblock ops [str]" -\&. The ops string indicates blocked operations seperated by commas and can -include -.B read -for a read operation, -.B write -for a write operation, and -.B change_size -for a truncate or extending write. -.RS 1.0i -.PD 0 -.sp -.TP -.B "-I, --inode INODE-NUM" -Start iterating over waiting tasks from the given inode number. -Value of 0 will show all waiting tasks. -.TP -.B "-B, --block BLOCK-NUM" -Start iterating over waiting tasks from the given logical block number -in the starting inode. Value of 0 will show blocks in the first inode -and then continue to show all blocks with tasks waiting in all the -remaining inodes. -.TP -.B "-p, --path PATH" -A path within a ScoutFS filesystem. -.RE -.PD - -.TP -.BI "data-wait-err {-I|--inode} INODE-NUM {-V|--version} VER-NUM {-F|--offset} OFF-NUM {-C|--count} COUNT {-O|--op} OP {-E|--err} ERR [-p|--path PATH]" -.sp -Return error from matching waiters. -.RS 1.0i -.PD 0 -.sp -.TP -.B "-C, --count COUNT" -Count. -.TP -.B "-E, --err ERR" -Error. -.TP -.B "-F, --offset OFF-NUM" -Offset. May be expressed in bytes, or with KMGTP (Kibi, Mibi, etc.) size -suffixes. -.TP -.B "-I, --inode INODE-NUM" -Inode number. -.TP -.B "-O, --op OP" -Operation. One of: "read", "write", "change_size". -.TP -.B "-p, --path PATH" -A path within a ScoutFS filesystem. -.RE -.PD - -.TP -.BI "stage ARCHIVE-FILE FILE {-V|--version} VERSION [-o, --offset OFF-NUM] [-l, --length LENGTH]" -.sp -.B Stage -(i.e. return to online) the previously-offline contents of a file by copying a -region from another file, the archive, and without updating regular inode -metadata. Any operations that are blocked by the existence of an offline -region will proceed once the region has been staged. -.RS 1.0i -.PD 0 -.TP -.sp -.B "ARCHIVE-FILE" -The source file for the file contents being staged. -.TP -.B "FILE" -The regular file whose contents will be staged. -.TP -.B "-V, --version VERSION" -The data_version of the contents to be staged. It must match the -current data_version of the file. -.TP -.B "-o, --offset OFF-NUM" -The starting byte offset of the region to write. May be expressed in bytes, or with -KMGTP (Kibi, Mibi, etc.) size suffixes. Default is 0. -.TP -.B "-l, --length LENGTH" -Length of range (bytes or KMGTP units) of file to stage. Default is the file's -total size. -.RE -.PD - .TP .BI "release FILE {-V|--version} VERSION [-o, --offset OFF-NUM] [-l, --length LENGTH]" .sp @@ -568,88 +669,28 @@ total size. .PD .TP -.BI "setattr FILE [-d, --data-version=VERSION [-s, --size=SIZE [-o, --offline]]] [-t, --ctime=TIMESPEC]" +.BI "walk-inodes {meta_seq|data_seq} FIRST-INODE LAST-INODE [-p|--path PATH]" .sp -Set ScoutFS-specific attributes on a newly created zero-length file. +Walk an inode index in the file system and output the inode numbers +that are found between the first and last positions in the index. .RS 1.0i .PD 0 .sp .TP -.B "-V, --data-version=VERSION" -Set data version. +.BR meta_seq , data_seq +Which index to walk. .TP -.B "-o, --offline" -Set file contents as offline, not sparse. Requires -.I --size -option also be present. +.B "FIRST-INODE" +An integer index value giving starting position of the index walk. +.I 0 +is the first possible position. .TP -.B "-s, --size=SIZE" -Set file size. May be expressed in bytes, or with -KMGTP (Kibi, Mibi, etc.) size suffixes. Requires -.I --data-version -option also be present. +.B "LAST-INODE" +An integer index value giving the last position to include in the index walk. +.I \-1 +can be given to indicate the last possible position. .TP -.B "-t, --ctime=TIMESPEC" -Set creation time using -.I "." -format. -.RE -.PD - -.TP -.BI "print {-S|--skip-likely-huge} META-DEVICE" -.sp -Prints out all of the metadata in the file system. This makes no effort -to ensure that the structures are consistent as they're traversed and -can present structures that seem corrupt as they change as they're -output. -.RS 1.0i -.PD 0 -.TP -.sp -.B "-S, --skip-likely-huge" -Skip printing structures that are likely to be very large. The -structures that are skipped tend to be global and whose size tends to be -related to the size of the volume. Examples of skipped structures include -the global fs items, srch files, and metadata and data -allocators. Similar structures that are not skipped are related to the -number of mounts and are maintained at a relatively reasonable size. -These include per-mount log trees, srch files, allocators, and the -metadata allocators used by server commits. -.sp -Skipping the larger structures limits the print output to a relatively -constant size rather than being a large multiple of the used metadata -space of the volume making the output much more useful for inspection. -.TP -.B "META-DEVICE" -The path to the metadata device for the filesystem whose metadata will be -printed. An attempt will be made to flush the host's buffer cache for -this device with the BLKFLSBUF ioctl, or with posix_fadvise() if -the path refers to a regular file. -.RE -.PD - -.TP -.BI "get-allocated-inos [-i|--ino INO] [-s|--single] [-p|--path PATH]" -.sp -This debugging command prints allocated inode numbers. It only prints -inodes -found in the group that contains the starting inode. The printed inode -numbers aren't necessarily reachable. They could be anywhere in the -process from being unlinked to finally deleted when their items -were found. -.RS 1.0i -.PD 0 -.TP -.sp -.B "-i, --ino INO" -The first 64bit inode number which could be printed. -.TP -.B "-s, --single" -Only print the single starting inode when it is allocated, all other allocated -inode numbers will be ignored. -.TP -.B "-p, --path PATH" +.B "-p|--path PATH" A path within a ScoutFS filesystem. .RE .PD diff --git a/utils/src/dev.c b/utils/src/dev.c index 2a0c9eb6..8eae2d40 100644 --- a/utils/src/dev.c +++ b/utils/src/dev.c @@ -12,13 +12,10 @@ #include "sparse.h" #include "dev.h" -int device_size(char *path, int fd, - u64 min_size, u64 max_size, bool allow_small_size, - char *use_type, u64 *size_ret) +int get_device_size(char *path, int fd, u64 *size_ret) { struct stat st; u64 size; - char *target_type; int ret; if (fstat(fd, &st)) { @@ -30,7 +27,6 @@ int device_size(char *path, int fd, if (S_ISREG(st.st_mode)) { size = st.st_size; - target_type = "file"; } else if (S_ISBLK(st.st_mode)) { if (ioctl(fd, BLKGETSIZE64, &size)) { ret = -errno; @@ -38,13 +34,26 @@ int device_size(char *path, int fd, path, strerror(errno), errno); return ret; } - target_type = "device"; } else { fprintf(stderr, "path isn't regular or device file '%s'\n", path); return -EINVAL; } + *size_ret = size; + return 0; +} + +int limit_device_size(char *path, int fd, u64 min_size, u64 max_size, bool allow_small_size, + char *use_type, u64 *size_ret) +{ + u64 size; + int ret; + + ret = get_device_size(path, fd, &size); + if (ret < 0) + return ret; + if (max_size) { if (size > max_size) { printf("Limiting use of "BASE_SIZE_FMT @@ -64,9 +73,9 @@ int device_size(char *path, int fd, if (size < min_size) { fprintf(stderr, - BASE_SIZE_FMT" %s too small for min " + BASE_SIZE_FMT" too small for min " BASE_SIZE_FMT" %s device%s\n", - BASE_SIZE_ARGS(size), target_type, + BASE_SIZE_ARGS(size), BASE_SIZE_ARGS(min_size), use_type, allow_small_size ? ", allowing with -A" : ""); diff --git a/utils/src/dev.h b/utils/src/dev.h index d483f70b..3d14c22d 100644 --- a/utils/src/dev.h +++ b/utils/src/dev.h @@ -9,9 +9,9 @@ #define SIZE_FMT "%llu (%.2f %s)" #define SIZE_ARGS(nr, sz) (nr), size_flt(nr, sz), size_str(nr, sz) -int device_size(char *path, int fd, - u64 min_size, u64 max_size, bool allow_small_size, - char *use_type, u64 *size_ret); +int get_device_size(char *path, int fd, u64 *size_ret); +int limit_device_size(char *path, int fd, u64 min_size, u64 max_size, bool allow_small_size, + char *use_type, u64 *size_ret); float size_flt(u64 nr, unsigned size); char *size_str(u64 nr, unsigned size); int flush_device(int fd); diff --git a/utils/src/mkfs.c b/utils/src/mkfs.c index 962927c9..f27a5674 100644 --- a/utils/src/mkfs.c +++ b/utils/src/mkfs.c @@ -206,14 +206,14 @@ static int do_mkfs(struct mkfs_args *args) } /* minumum meta device size to make reserved blocks reasonably large */ - ret = device_size(args->meta_device, meta_fd, 64ULL * (1024 * 1024 * 1024), - args->max_meta_size, args->allow_small_size, "meta", &meta_size); + ret = limit_device_size(args->meta_device, meta_fd, 64ULL * (1024 * 1024 * 1024), + args->max_meta_size, args->allow_small_size, "meta", &meta_size); if (ret) goto out; /* .. then arbitrarily the same minimum data device size */ - ret = device_size(args->data_device, data_fd, 64ULL * (1024 * 1024 * 1024), - args->max_data_size, args->allow_small_size, "data", &data_size); + ret = limit_device_size(args->data_device, data_fd, 64ULL * (1024 * 1024 * 1024), + args->max_data_size, args->allow_small_size, "data", &data_size); if (ret) goto out; diff --git a/utils/src/prepare_empty_data_device.c b/utils/src/prepare_empty_data_device.c new file mode 100644 index 00000000..b9732d1a --- /dev/null +++ b/utils/src/prepare_empty_data_device.c @@ -0,0 +1,247 @@ +#define _GNU_SOURCE /* O_DIRECT */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sparse.h" +#include "cmd.h" +#include "util.h" +#include "format.h" +#include "parse.h" +#include "crc.h" +#include "rand.h" +#include "dev.h" +#include "key.h" +#include "bitops.h" +#include "btree.h" +#include "leaf_item_hash.h" +#include "blkid.h" +#include "quorum.h" + +struct prepare_empty_data_dev_args { + char *meta_device; + char *data_device; + bool check; +}; + +static int do_prepare_empty_data_dev(struct prepare_empty_data_dev_args *args) +{ + struct scoutfs_super_block *meta_super = NULL; + struct scoutfs_super_block *data_super = NULL; + char uuid_str[37]; + int meta_fd = -1; + int data_fd = -1; + u64 data_blocks; + u64 data_size; + u64 in_use; + int ret; + + ret = posix_memalign((void **)&data_super, SCOUTFS_BLOCK_SM_SIZE, SCOUTFS_BLOCK_SM_SIZE); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "failed to allocate data super block: %s (%d)\n", + strerror(errno), errno); + goto out; + } + + meta_fd = open(args->meta_device, O_DIRECT | O_SYNC | O_RDONLY | O_EXCL); + if (meta_fd < 0) { + ret = -errno; + fprintf(stderr, "failed to open meta device '%s': %s (%d)\n", + args->meta_device, strerror(errno), errno); + goto out; + } + + ret = read_block_verify(meta_fd, SCOUTFS_BLOCK_MAGIC_SUPER, 0, SCOUTFS_SUPER_BLKNO, + SCOUTFS_BLOCK_SM_SHIFT, (void **)&meta_super); + if (ret) { + ret = -errno; + fprintf(stderr, "failed to read meta super block: %s (%d)\n", + strerror(errno), errno); + goto out; + } + + ret = meta_super_in_use(meta_fd, meta_super); + if (ret < 0) { + if (ret == -EBUSY) + fprintf(stderr, "The filesystem must be fully recovered and cleanly unmounted to determine if the data device is empty.\n"); + goto out; + } + + in_use = (le64_to_cpu(meta_super->total_data_blocks) - SCOUTFS_DATA_DEV_START_BLKNO) - + le64_to_cpu(meta_super->data_alloc.total_len); + if (in_use) { + fprintf(stderr, "Data block allocator metadata shows "SIZE_FMT" data blocks used by files. They must be removed, truncated, or released before a new empty data device can be used.\n", + SIZE_ARGS(in_use, SCOUTFS_BLOCK_SM_SIZE)); + ret = -EINVAL; + goto out; + } + + if (args->data_device) { + data_fd = open(args->data_device, O_DIRECT | O_EXCL | + (args->check ? O_RDONLY : O_RDWR | O_SYNC)); + if (data_fd < 0) { + ret = -errno; + fprintf(stderr, "failed to open data device '%s': %s (%d)\n", + args->data_device, strerror(errno), errno); + goto out; + } + + ret = get_device_size(args->data_device, data_fd, &data_size); + if (ret < 0) + goto out; + + data_blocks = data_size >> SCOUTFS_BLOCK_SM_SHIFT; + + if (data_blocks < le64_to_cpu(meta_super->total_data_blocks)) { + fprintf(stderr, "new data device %s of size "BASE_SIZE_FMT" has %llu 4KiB blocks, it needs at least "SIZE_FMT" blocks.\n", + args->data_device, + BASE_SIZE_ARGS(data_size), + data_blocks, + SIZE_ARGS(le64_to_cpu(meta_super->total_data_blocks), + SCOUTFS_BLOCK_SM_SIZE)); + ret = -EINVAL; + goto out; + } + } + + if (args->check) { + ret = 0; + goto out; + } + + /* the data device superblock only needs fs identifying fields */ + memset(data_super, 0, sizeof(struct scoutfs_super_block)); + data_super->id = meta_super->id; + data_super->fmt_vers = meta_super->fmt_vers; + data_super->flags = meta_super->flags &~ cpu_to_le64(SCOUTFS_FLAG_IS_META_BDEV); + memcpy(data_super->uuid, meta_super->uuid,sizeof(data_super->uuid)); + data_super->seq = meta_super->seq; + data_super->total_meta_blocks = meta_super->total_meta_blocks; + data_super->total_data_blocks = meta_super->total_data_blocks; + + ret = write_block(data_fd, SCOUTFS_BLOCK_MAGIC_SUPER, meta_super->hdr.fsid, 1, + SCOUTFS_SUPER_BLKNO, SCOUTFS_BLOCK_SM_SHIFT, &data_super->hdr); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "Error writing super block to new data device '%s': %s (%d)\n", + args->data_device, strerror(errno), errno); + goto out; + } + + uuid_unparse(meta_super->uuid, uuid_str); + + printf("Successfully initialized empty data device for scoutfs filesystem:\n" + " meta device path: %s\n" + " data device path: %s\n" + " fsid: %llx\n" + " uuid: %s\n" + " format version: %llu\n" + " 64KB metadata blocks: "SIZE_FMT"\n" + " 4KB data blocks: "SIZE_FMT"\n", + args->meta_device, + args->data_device, + le64_to_cpu(meta_super->hdr.fsid), + uuid_str, + le64_to_cpu(meta_super->fmt_vers), + SIZE_ARGS(le64_to_cpu(meta_super->total_meta_blocks), + SCOUTFS_BLOCK_LG_SIZE), + SIZE_ARGS(le64_to_cpu(meta_super->total_data_blocks), + SCOUTFS_BLOCK_SM_SIZE)); + + ret = 0; +out: + if (args->check) { + if (ret == 0) + printf("All checks passed.\n"); + else + printf("Errors were found that must be addressed before a new empty data device could be prepared and used.\n"); + } + + if (meta_super) + free(meta_super); + if (data_super) + free(data_super); + if (meta_fd != -1) + close(meta_fd); + if (data_fd != -1) + close(data_fd); + return ret; +} + +static int parse_opt(int key, char *arg, struct argp_state *state) +{ + struct prepare_empty_data_dev_args *args = state->input; + + switch (key) { + case 'c': + args->check = true; + break; + case ARGP_KEY_ARG: + if (!args->meta_device) + args->meta_device = strdup_or_error(state, arg); + else if (!args->data_device) + args->data_device = strdup_or_error(state, arg); + else + argp_error(state, "more than two device arguments given"); + break; + case ARGP_KEY_FINI: + if (!args->meta_device) + argp_error(state, "no metadata device argument given"); + if (!args->data_device && !args->check) + argp_error(state, "no data device argument given"); + break; + default: + break; + } + + return 0; +} + +static struct argp_option options[] = { + { "check", 'c', NULL, 0, "Only check for errors and do not write", }, + { NULL } +}; + +static struct argp argp = { + options, + parse_opt, + "META-DEVICE DATA-DEVICE", + "Prepare empty data device for use with an existing ScoutFS filesystem" +}; + +static int prepare_empty_data_dev_cmd(int argc, char *argv[]) +{ + struct prepare_empty_data_dev_args prepare_empty_data_dev_args = { + .check = false, + }; + int ret; + + ret = argp_parse(&argp, argc, argv, 0, NULL, &prepare_empty_data_dev_args); + if (ret) + return ret; + + return do_prepare_empty_data_dev(&prepare_empty_data_dev_args); +} + +static void __attribute__((constructor)) prepare_empty_data_dev_ctor(void) +{ + cmd_register_argp("prepare-empty-data-device", &argp, GROUP_CORE, + prepare_empty_data_dev_cmd); +}