Merge pull request #118 from versity/zab/prepare_empty_data_dev

Zab/prepare empty data dev
This commit is contained in:
Zach Brown
2023-04-17 14:20:29 -07:00
committed by GitHub
12 changed files with 720 additions and 307 deletions

View File

@@ -39,6 +39,18 @@ t_quiet()
t_fail "quiet command failed"
}
#
# Quietly run a command during a test. The output is logged but only
# the return code is printed, presumably because the output contains
# a lot of invocation specific text that is difficult to filter.
#
t_rc()
{
echo "# $*" >> "$T_TMP.rc.log"
"$@" >> "$T_TMP.rc.log" 2>&1
echo "rc: $?"
}
#
# redirect test output back to the output of the invoking script intead
# of the compared output.

View File

@@ -0,0 +1,27 @@
== make tmp sparse data dev files
== make scratch fs
== small new data device fails
rc: 1
== check sees data device errors
rc: 1
rc: 0
== preparing while mounted fails
rc: 1
== preparing without recovery fails
rc: 1
== check sees metadata errors
rc: 1
rc: 1
== preparing with file data fails
rc: 1
== preparing after emptied
rc: 0
== checks pass
rc: 0
rc: 0
== using prepared
== preparing larger and resizing
rc: 0
equal_prepared
large_prepared
resized larger test rc: 0

View File

@@ -36,6 +36,7 @@ cross-mount-data-free.sh
persistent-item-vers.sh
setup-error-teardown.sh
resize-devices.sh
change-devices.sh
fence-and-reclaim.sh
orphan-inodes.sh
mount-unmount-race.sh

View File

@@ -12,7 +12,7 @@ mount_fail()
}
echo "== prepare devices, mount point, and logs"
SCR="/mnt/scoutfs.extra"
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
> $T_TMP.mount.out
scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \

View File

@@ -0,0 +1,76 @@
#
# test changing devices
#
echo "== make tmp sparse data dev files"
sz=$(blockdev --getsize64 "$T_EX_DATA_DEV")
large_sz=$((sz * 2))
touch "$T_TMP."{small,equal,large}
truncate -s 1MB "$T_TMP.small"
truncate -s $sz "$T_TMP.equal"
truncate -s $large_sz "$T_TMP.large"
echo "== make scratch fs"
t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV"
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
echo "== small new data device fails"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.small"
echo "== check sees data device errors"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.small"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
echo "== preparing while mounted fails"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
umount "$SCR"
echo "== preparing without recovery fails"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
umount -f "$SCR"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
echo "== check sees metadata errors"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"
echo "== preparing with file data fails"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
echo hi > "$SCR"/file
umount "$SCR"
scoutfs print "$T_EX_META_DEV" > "$T_TMP.print"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
echo "== preparing after emptied"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
rm -f "$SCR"/file
umount "$SCR"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
echo "== checks pass"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"
echo "== using prepared"
scr_loop=$(losetup --find --show "$T_TMP.equal")
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
touch "$SCR"/equal_prepared
equal_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
umount "$SCR"
losetup -d "$scr_loop"
echo "== preparing larger and resizing"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.large"
scr_loop=$(losetup --find --show "$T_TMP.large")
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
touch "$SCR"/large_prepared
ls "$SCR"
scoutfs resize-devices -p "$SCR" -d $large_sz
large_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
test "$large_tot" -gt "$equal_tot" ; echo "resized larger test rc: $?"
umount "$SCR"
losetup -d "$scr_loop"
t_pass

View File

@@ -59,7 +59,7 @@ echo "== make small meta fs"
# meta device just big enough for reserves and the metadata we'll fill
scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
t_fail "mkfs failed"
SCR="/mnt/scoutfs.enospc"
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"

View File

@@ -73,7 +73,7 @@ echo "== make initial small fs"
scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m $quarter_meta -d $quarter_data \
"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
t_fail "mkfs failed"
SCR="/mnt/scoutfs.enospc"
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"

View File

@@ -76,6 +76,97 @@ run when the file system will not be mounted.
.RE
.PD
.TP
.BI "counters [-t|--table] SYSFS-DIR"
.sp
Display the counters and their values for a mounted ScoutFS filesystem.
.RS 1.0i
.PD 0
.sp
.TP
.B SYSFS-DIR
The mount's sysfs directory in which to find the
.B counters/
directory when then contains files for each counter.
The sysfs directory is
of the form
.I /sys/fs/scoutfs/f.<fsid>.r.<rid>/
\&.
.TP
.B "-t, --table"
Format the counters into a columnar table that fills the width of the display
instead of printing one counter per line.
.RE
.PD
.TP
.BI "data-waiting {-I|--inode} INODE-NUM {-B|--block} BLOCK-NUM [-p|--path PATH]"
.sp
Display all the files and blocks for which there is a task blocked waiting on
offline data.
.sp
The results are sorted by the file's inode number and the
logical block offset that is being waited on.
.sp
Each line of output describes a block in a file that has a task waiting
and is formatted as:
.I "ino <nr> iblock <nr> ops [str]"
\&. The ops string indicates blocked operations seperated by commas and can
include
.B read
for a read operation,
.B write
for a write operation, and
.B change_size
for a truncate or extending write.
.RS 1.0i
.PD 0
.sp
.TP
.B "-I, --inode INODE-NUM"
Start iterating over waiting tasks from the given inode number.
Value of 0 will show all waiting tasks.
.TP
.B "-B, --block BLOCK-NUM"
Start iterating over waiting tasks from the given logical block number
in the starting inode. Value of 0 will show blocks in the first inode
and then continue to show all blocks with tasks waiting in all the
remaining inodes.
.TP
.B "-p, --path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "data-wait-err {-I|--inode} INODE-NUM {-V|--version} VER-NUM {-F|--offset} OFF-NUM {-C|--count} COUNT {-O|--op} OP {-E|--err} ERR [-p|--path PATH]"
.sp
Return error from matching waiters.
.RS 1.0i
.PD 0
.sp
.TP
.B "-C, --count COUNT"
Count.
.TP
.B "-E, --err ERR"
Error.
.TP
.B "-F, --offset OFF-NUM"
Offset. May be expressed in bytes, or with KMGTP (Kibi, Mibi, etc.) size
suffixes.
.TP
.B "-I, --inode INODE-NUM"
Inode number.
.TP
.B "-O, --op OP"
Operation. One of: "read", "write", "change_size".
.TP
.B "-p, --path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "df [-h|--human-readable] [-p|--path PATH]"
.sp
@@ -93,6 +184,72 @@ A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "get-allocated-inos [-i|--ino INO] [-s|--single] [-p|--path PATH]"
.sp
This debugging command prints allocated inode numbers. It only prints
inodes
found in the group that contains the starting inode. The printed inode
numbers aren't necessarily reachable. They could be anywhere in the
process from being unlinked to finally deleted when their items
were found.
.RS 1.0i
.PD 0
.TP
.sp
.B "-i, --ino INO"
The first 64bit inode number which could be printed.
.TP
.B "-s, --single"
Only print the single starting inode when it is allocated, all other allocated
inode numbers will be ignored.
.TP
.B "-p, --path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "ino-path INODE-NUM [-p|--path PATH]"
.sp
Display all paths that reference an inode number.
.sp
Ongoing filesystem changes, such as renaming a common parent of multiple paths,
can cause displayed paths to be inconsistent.
.RS 1.0i
.PD 0
.sp
.TP
.B "INODE-NUM"
The inode number of the target inode.
.TP
.B "-p|--path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "list-hidden-xattrs FILE"
.sp
Display extended attributes starting with the
.BR scoutfs.
prefix and containing the
.BR hide.
tag
which makes them invisible to
.BR listxattr (2) .
The names of each attribute are output, one per line. Their order
is not specified.
.RS 1.0i
.PD 0
.TP
.sp
.B "FILE"
The path to a file within a ScoutFS filesystem. File permissions must allow
reading.
.RE
.PD
.TP
.BI "mkfs META-DEVICE DATA-DEVICE {-Q|--quorum-slot} NR,ADDR,PORT [-m|--max-meta-size SIZE] [-d|--max-data-size SIZE] [-z|--data-alloc-zone-blocks BLOCKS] [-f|--force] [-A|--allow-small-size] [-V|--format-version VERS]"
.sp
@@ -171,6 +328,79 @@ The range of supported versions is visible in the output of
.RE
.PD
.TP
.BI "prepare-empty-data-device {-c|--check} META-DEVICE DATA-DEVICE"
.sp
Prepare an unused device for use as the data device for an existing file
system. This will write an initialized super block to the specified
data device, destroying any existing contents. The specified metadata
device will not be modified. The file system must be fully unmounted
and any client mount recovery must be complete.
.sp
The existing metadata device is read to ensure that it's safe to stop
using the old data device. The data block allocators must indicate that
all data blocks are free. If there are still data blocks referenced by
files then the command will fail. The contents of these files must be
freed for the command to proceed.
.sp
A new super block is written to the new data device. The device can
then be used as the data device to mount the file system. As this
switch is made all client mounts must refer to the new device. The old
device is not modified and still contains a valid data super block that
could be mounted, creating data device writes that wouldn't be read by
mounts using the new device.
.sp
The number of data blocks available to the file system will not change
as the new data device is used. The new device must be large enough to
store all the data blocks that were available on the old device. If the
new device is larger then its added capacity can be used by growing the
new data device with the resize-devices command once it is mounted.
.RS 1.0i
.PD 0
.TP
.sp
.B "-c, --check"
Only check for errors that would prevent a new empty data device from
being used. No changes will be made to the data device. If the data
device is provided then its size will be checked to make sure that it is
large enough. This can be used to test the metadata for data references
before destroying an old empty data device.
.RE
.PD
.TP
.BI "print {-S|--skip-likely-huge} META-DEVICE"
.sp
Prints out all of the metadata in the file system. This makes no effort
to ensure that the structures are consistent as they're traversed and
can present structures that seem corrupt as they change as they're
output.
.RS 1.0i
.PD 0
.TP
.sp
.B "-S, --skip-likely-huge"
Skip printing structures that are likely to be very large. The
structures that are skipped tend to be global and whose size tends to be
related to the size of the volume. Examples of skipped structures include
the global fs items, srch files, and metadata and data
allocators. Similar structures that are not skipped are related to the
number of mounts and are maintained at a relatively reasonable size.
These include per-mount log trees, srch files, allocators, and the
metadata allocators used by server commits.
.sp
Skipping the larger structures limits the print output to a relatively
constant size rather than being a large multiple of the used metadata
space of the volume making the output much more useful for inspection.
.TP
.B "META-DEVICE"
The path to the metadata device for the filesystem whose metadata will be
printed. An attempt will be made to flush the host's buffer cache for
this device with the BLKFLSBUF ioctl, or with posix_fadvise() if
the path refers to a regular file.
.RE
.PD
.TP
.BI "resize-devices [-p|--path PATH] [-m|--meta-size SIZE] [-d|--data-size SIZE]"
.sp
@@ -229,6 +459,92 @@ kibibytes, mebibytes, etc.
.RE
.PD
.TP
.BI "search-xattrs XATTR-NAME [-p|--path PATH]"
.sp
Display the inode numbers of inodes in the filesystem which may have
an extended attribute with the given name.
.sp
The results may contain false positives. The returned inode numbers
should be checked to verify that the extended attribute is in fact
present on the inode.
.RS 1.0i
.PD 0
.TP
.sp
.B XATTR-NAME
The full name of the extended attribute to search for as
described in the
.BR xattr (7)
manual page.
.TP
.B "-p|--path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "setattr FILE [-d, --data-version=VERSION [-s, --size=SIZE [-o, --offline]]] [-t, --ctime=TIMESPEC]"
.sp
Set ScoutFS-specific attributes on a newly created zero-length file.
.RS 1.0i
.PD 0
.sp
.TP
.B "-V, --data-version=VERSION"
Set data version.
.TP
.B "-o, --offline"
Set file contents as offline, not sparse. Requires
.I --size
option also be present.
.TP
.B "-s, --size=SIZE"
Set file size. May be expressed in bytes, or with
KMGTP (Kibi, Mibi, etc.) size suffixes. Requires
.I --data-version
option also be present.
.TP
.B "-t, --ctime=TIMESPEC"
Set creation time using
.I "<seconds-since-epoch>.<nanoseconds>"
format.
.RE
.PD
.TP
.BI "stage ARCHIVE-FILE FILE {-V|--version} VERSION [-o, --offset OFF-NUM] [-l, --length LENGTH]"
.sp
.B Stage
(i.e. return to online) the previously-offline contents of a file by copying a
region from another file, the archive, and without updating regular inode
metadata. Any operations that are blocked by the existence of an offline
region will proceed once the region has been staged.
.RS 1.0i
.PD 0
.TP
.sp
.B "ARCHIVE-FILE"
The source file for the file contents being staged.
.TP
.B "FILE"
The regular file whose contents will be staged.
.TP
.B "-V, --version VERSION"
The data_version of the contents to be staged. It must match the
current data_version of the file.
.TP
.B "-o, --offset OFF-NUM"
The starting byte offset of the region to write. May be expressed in bytes, or with
KMGTP (Kibi, Mibi, etc.) size suffixes. Default is 0.
.TP
.B "-l, --length LENGTH"
Length of range (bytes or KMGTP units) of file to stage. Default is the file's
total size.
.RE
.PD
.TP
.BI "stat FILE [-s|--single-field FIELD-NAME]"
.sp
Display ScoutFS-specific metadata fields for the given file.
@@ -314,221 +630,6 @@ The total number of 4K data blocks in the filesystem.
.RE
.PD
.TP
.BI "counters [-t|--table] SYSFS-DIR"
.sp
Display the counters and their values for a mounted ScoutFS filesystem.
.RS 1.0i
.PD 0
.sp
.TP
.B SYSFS-DIR
The mount's sysfs directory in which to find the
.B counters/
directory when then contains files for each counter.
The sysfs directory is
of the form
.I /sys/fs/scoutfs/f.<fsid>.r.<rid>/
\&.
.TP
.B "-t, --table"
Format the counters into a columnar table that fills the width of the display
instead of printing one counter per line.
.RE
.PD
.TP
.BI "search-xattrs XATTR-NAME [-p|--path PATH]"
.sp
Display the inode numbers of inodes in the filesystem which may have
an extended attribute with the given name.
.sp
The results may contain false positives. The returned inode numbers
should be checked to verify that the extended attribute is in fact
present on the inode.
.RS 1.0i
.PD 0
.TP
.sp
.B XATTR-NAME
The full name of the extended attribute to search for as
described in the
.BR xattr (7)
manual page.
.TP
.B "-p|--path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "list-hidden-xattrs FILE"
.sp
Display extended attributes starting with the
.BR scoutfs.
prefix and containing the
.BR hide.
tag
which makes them invisible to
.BR listxattr (2) .
The names of each attribute are output, one per line. Their order
is not specified.
.RS 1.0i
.PD 0
.TP
.sp
.B "FILE"
The path to a file within a ScoutFS filesystem. File permissions must allow
reading.
.RE
.PD
.TP
.BI "walk-inodes {meta_seq|data_seq} FIRST-INODE LAST-INODE [-p|--path PATH]"
.sp
Walk an inode index in the file system and output the inode numbers
that are found between the first and last positions in the index.
.RS 1.0i
.PD 0
.sp
.TP
.BR meta_seq , data_seq
Which index to walk.
.TP
.B "FIRST-INODE"
An integer index value giving starting position of the index walk.
.I 0
is the first possible position.
.TP
.B "LAST-INODE"
An integer index value giving the last position to include in the index walk.
.I \-1
can be given to indicate the last possible position.
.TP
.B "-p|--path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "ino-path INODE-NUM [-p|--path PATH]"
.sp
Display all paths that reference an inode number.
.sp
Ongoing filesystem changes, such as renaming a common parent of multiple paths,
can cause displayed paths to be inconsistent.
.RS 1.0i
.PD 0
.sp
.TP
.B "INODE-NUM"
The inode number of the target inode.
.TP
.B "-p|--path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "data-waiting {-I|--inode} INODE-NUM {-B|--block} BLOCK-NUM [-p|--path PATH]"
.sp
Display all the files and blocks for which there is a task blocked waiting on
offline data.
.sp
The results are sorted by the file's inode number and the
logical block offset that is being waited on.
.sp
Each line of output describes a block in a file that has a task waiting
and is formatted as:
.I "ino <nr> iblock <nr> ops [str]"
\&. The ops string indicates blocked operations seperated by commas and can
include
.B read
for a read operation,
.B write
for a write operation, and
.B change_size
for a truncate or extending write.
.RS 1.0i
.PD 0
.sp
.TP
.B "-I, --inode INODE-NUM"
Start iterating over waiting tasks from the given inode number.
Value of 0 will show all waiting tasks.
.TP
.B "-B, --block BLOCK-NUM"
Start iterating over waiting tasks from the given logical block number
in the starting inode. Value of 0 will show blocks in the first inode
and then continue to show all blocks with tasks waiting in all the
remaining inodes.
.TP
.B "-p, --path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "data-wait-err {-I|--inode} INODE-NUM {-V|--version} VER-NUM {-F|--offset} OFF-NUM {-C|--count} COUNT {-O|--op} OP {-E|--err} ERR [-p|--path PATH]"
.sp
Return error from matching waiters.
.RS 1.0i
.PD 0
.sp
.TP
.B "-C, --count COUNT"
Count.
.TP
.B "-E, --err ERR"
Error.
.TP
.B "-F, --offset OFF-NUM"
Offset. May be expressed in bytes, or with KMGTP (Kibi, Mibi, etc.) size
suffixes.
.TP
.B "-I, --inode INODE-NUM"
Inode number.
.TP
.B "-O, --op OP"
Operation. One of: "read", "write", "change_size".
.TP
.B "-p, --path PATH"
A path within a ScoutFS filesystem.
.RE
.PD
.TP
.BI "stage ARCHIVE-FILE FILE {-V|--version} VERSION [-o, --offset OFF-NUM] [-l, --length LENGTH]"
.sp
.B Stage
(i.e. return to online) the previously-offline contents of a file by copying a
region from another file, the archive, and without updating regular inode
metadata. Any operations that are blocked by the existence of an offline
region will proceed once the region has been staged.
.RS 1.0i
.PD 0
.TP
.sp
.B "ARCHIVE-FILE"
The source file for the file contents being staged.
.TP
.B "FILE"
The regular file whose contents will be staged.
.TP
.B "-V, --version VERSION"
The data_version of the contents to be staged. It must match the
current data_version of the file.
.TP
.B "-o, --offset OFF-NUM"
The starting byte offset of the region to write. May be expressed in bytes, or with
KMGTP (Kibi, Mibi, etc.) size suffixes. Default is 0.
.TP
.B "-l, --length LENGTH"
Length of range (bytes or KMGTP units) of file to stage. Default is the file's
total size.
.RE
.PD
.TP
.BI "release FILE {-V|--version} VERSION [-o, --offset OFF-NUM] [-l, --length LENGTH]"
.sp
@@ -568,88 +669,28 @@ total size.
.PD
.TP
.BI "setattr FILE [-d, --data-version=VERSION [-s, --size=SIZE [-o, --offline]]] [-t, --ctime=TIMESPEC]"
.BI "walk-inodes {meta_seq|data_seq} FIRST-INODE LAST-INODE [-p|--path PATH]"
.sp
Set ScoutFS-specific attributes on a newly created zero-length file.
Walk an inode index in the file system and output the inode numbers
that are found between the first and last positions in the index.
.RS 1.0i
.PD 0
.sp
.TP
.B "-V, --data-version=VERSION"
Set data version.
.BR meta_seq , data_seq
Which index to walk.
.TP
.B "-o, --offline"
Set file contents as offline, not sparse. Requires
.I --size
option also be present.
.B "FIRST-INODE"
An integer index value giving starting position of the index walk.
.I 0
is the first possible position.
.TP
.B "-s, --size=SIZE"
Set file size. May be expressed in bytes, or with
KMGTP (Kibi, Mibi, etc.) size suffixes. Requires
.I --data-version
option also be present.
.B "LAST-INODE"
An integer index value giving the last position to include in the index walk.
.I \-1
can be given to indicate the last possible position.
.TP
.B "-t, --ctime=TIMESPEC"
Set creation time using
.I "<seconds-since-epoch>.<nanoseconds>"
format.
.RE
.PD
.TP
.BI "print {-S|--skip-likely-huge} META-DEVICE"
.sp
Prints out all of the metadata in the file system. This makes no effort
to ensure that the structures are consistent as they're traversed and
can present structures that seem corrupt as they change as they're
output.
.RS 1.0i
.PD 0
.TP
.sp
.B "-S, --skip-likely-huge"
Skip printing structures that are likely to be very large. The
structures that are skipped tend to be global and whose size tends to be
related to the size of the volume. Examples of skipped structures include
the global fs items, srch files, and metadata and data
allocators. Similar structures that are not skipped are related to the
number of mounts and are maintained at a relatively reasonable size.
These include per-mount log trees, srch files, allocators, and the
metadata allocators used by server commits.
.sp
Skipping the larger structures limits the print output to a relatively
constant size rather than being a large multiple of the used metadata
space of the volume making the output much more useful for inspection.
.TP
.B "META-DEVICE"
The path to the metadata device for the filesystem whose metadata will be
printed. An attempt will be made to flush the host's buffer cache for
this device with the BLKFLSBUF ioctl, or with posix_fadvise() if
the path refers to a regular file.
.RE
.PD
.TP
.BI "get-allocated-inos [-i|--ino INO] [-s|--single] [-p|--path PATH]"
.sp
This debugging command prints allocated inode numbers. It only prints
inodes
found in the group that contains the starting inode. The printed inode
numbers aren't necessarily reachable. They could be anywhere in the
process from being unlinked to finally deleted when their items
were found.
.RS 1.0i
.PD 0
.TP
.sp
.B "-i, --ino INO"
The first 64bit inode number which could be printed.
.TP
.B "-s, --single"
Only print the single starting inode when it is allocated, all other allocated
inode numbers will be ignored.
.TP
.B "-p, --path PATH"
.B "-p|--path PATH"
A path within a ScoutFS filesystem.
.RE
.PD

View File

@@ -12,13 +12,10 @@
#include "sparse.h"
#include "dev.h"
int device_size(char *path, int fd,
u64 min_size, u64 max_size, bool allow_small_size,
char *use_type, u64 *size_ret)
int get_device_size(char *path, int fd, u64 *size_ret)
{
struct stat st;
u64 size;
char *target_type;
int ret;
if (fstat(fd, &st)) {
@@ -30,7 +27,6 @@ int device_size(char *path, int fd,
if (S_ISREG(st.st_mode)) {
size = st.st_size;
target_type = "file";
} else if (S_ISBLK(st.st_mode)) {
if (ioctl(fd, BLKGETSIZE64, &size)) {
ret = -errno;
@@ -38,13 +34,26 @@ int device_size(char *path, int fd,
path, strerror(errno), errno);
return ret;
}
target_type = "device";
} else {
fprintf(stderr, "path isn't regular or device file '%s'\n",
path);
return -EINVAL;
}
*size_ret = size;
return 0;
}
int limit_device_size(char *path, int fd, u64 min_size, u64 max_size, bool allow_small_size,
char *use_type, u64 *size_ret)
{
u64 size;
int ret;
ret = get_device_size(path, fd, &size);
if (ret < 0)
return ret;
if (max_size) {
if (size > max_size) {
printf("Limiting use of "BASE_SIZE_FMT
@@ -64,9 +73,9 @@ int device_size(char *path, int fd,
if (size < min_size) {
fprintf(stderr,
BASE_SIZE_FMT" %s too small for min "
BASE_SIZE_FMT" too small for min "
BASE_SIZE_FMT" %s device%s\n",
BASE_SIZE_ARGS(size), target_type,
BASE_SIZE_ARGS(size),
BASE_SIZE_ARGS(min_size), use_type,
allow_small_size ? ", allowing with -A" : "");

View File

@@ -9,9 +9,9 @@
#define SIZE_FMT "%llu (%.2f %s)"
#define SIZE_ARGS(nr, sz) (nr), size_flt(nr, sz), size_str(nr, sz)
int device_size(char *path, int fd,
u64 min_size, u64 max_size, bool allow_small_size,
char *use_type, u64 *size_ret);
int get_device_size(char *path, int fd, u64 *size_ret);
int limit_device_size(char *path, int fd, u64 min_size, u64 max_size, bool allow_small_size,
char *use_type, u64 *size_ret);
float size_flt(u64 nr, unsigned size);
char *size_str(u64 nr, unsigned size);
int flush_device(int fd);

View File

@@ -206,14 +206,14 @@ static int do_mkfs(struct mkfs_args *args)
}
/* minumum meta device size to make reserved blocks reasonably large */
ret = device_size(args->meta_device, meta_fd, 64ULL * (1024 * 1024 * 1024),
args->max_meta_size, args->allow_small_size, "meta", &meta_size);
ret = limit_device_size(args->meta_device, meta_fd, 64ULL * (1024 * 1024 * 1024),
args->max_meta_size, args->allow_small_size, "meta", &meta_size);
if (ret)
goto out;
/* .. then arbitrarily the same minimum data device size */
ret = device_size(args->data_device, data_fd, 64ULL * (1024 * 1024 * 1024),
args->max_data_size, args->allow_small_size, "data", &data_size);
ret = limit_device_size(args->data_device, data_fd, 64ULL * (1024 * 1024 * 1024),
args->max_data_size, args->allow_small_size, "data", &data_size);
if (ret)
goto out;

View File

@@ -0,0 +1,247 @@
#define _GNU_SOURCE /* O_DIRECT */
#include <unistd.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/time.h>
#include <uuid/uuid.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <assert.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <ctype.h>
#include <inttypes.h>
#include <argp.h>
#include "sparse.h"
#include "cmd.h"
#include "util.h"
#include "format.h"
#include "parse.h"
#include "crc.h"
#include "rand.h"
#include "dev.h"
#include "key.h"
#include "bitops.h"
#include "btree.h"
#include "leaf_item_hash.h"
#include "blkid.h"
#include "quorum.h"
struct prepare_empty_data_dev_args {
char *meta_device;
char *data_device;
bool check;
};
static int do_prepare_empty_data_dev(struct prepare_empty_data_dev_args *args)
{
struct scoutfs_super_block *meta_super = NULL;
struct scoutfs_super_block *data_super = NULL;
char uuid_str[37];
int meta_fd = -1;
int data_fd = -1;
u64 data_blocks;
u64 data_size;
u64 in_use;
int ret;
ret = posix_memalign((void **)&data_super, SCOUTFS_BLOCK_SM_SIZE, SCOUTFS_BLOCK_SM_SIZE);
if (ret < 0) {
ret = -errno;
fprintf(stderr, "failed to allocate data super block: %s (%d)\n",
strerror(errno), errno);
goto out;
}
meta_fd = open(args->meta_device, O_DIRECT | O_SYNC | O_RDONLY | O_EXCL);
if (meta_fd < 0) {
ret = -errno;
fprintf(stderr, "failed to open meta device '%s': %s (%d)\n",
args->meta_device, strerror(errno), errno);
goto out;
}
ret = read_block_verify(meta_fd, SCOUTFS_BLOCK_MAGIC_SUPER, 0, SCOUTFS_SUPER_BLKNO,
SCOUTFS_BLOCK_SM_SHIFT, (void **)&meta_super);
if (ret) {
ret = -errno;
fprintf(stderr, "failed to read meta super block: %s (%d)\n",
strerror(errno), errno);
goto out;
}
ret = meta_super_in_use(meta_fd, meta_super);
if (ret < 0) {
if (ret == -EBUSY)
fprintf(stderr, "The filesystem must be fully recovered and cleanly unmounted to determine if the data device is empty.\n");
goto out;
}
in_use = (le64_to_cpu(meta_super->total_data_blocks) - SCOUTFS_DATA_DEV_START_BLKNO) -
le64_to_cpu(meta_super->data_alloc.total_len);
if (in_use) {
fprintf(stderr, "Data block allocator metadata shows "SIZE_FMT" data blocks used by files. They must be removed, truncated, or released before a new empty data device can be used.\n",
SIZE_ARGS(in_use, SCOUTFS_BLOCK_SM_SIZE));
ret = -EINVAL;
goto out;
}
if (args->data_device) {
data_fd = open(args->data_device, O_DIRECT | O_EXCL |
(args->check ? O_RDONLY : O_RDWR | O_SYNC));
if (data_fd < 0) {
ret = -errno;
fprintf(stderr, "failed to open data device '%s': %s (%d)\n",
args->data_device, strerror(errno), errno);
goto out;
}
ret = get_device_size(args->data_device, data_fd, &data_size);
if (ret < 0)
goto out;
data_blocks = data_size >> SCOUTFS_BLOCK_SM_SHIFT;
if (data_blocks < le64_to_cpu(meta_super->total_data_blocks)) {
fprintf(stderr, "new data device %s of size "BASE_SIZE_FMT" has %llu 4KiB blocks, it needs at least "SIZE_FMT" blocks.\n",
args->data_device,
BASE_SIZE_ARGS(data_size),
data_blocks,
SIZE_ARGS(le64_to_cpu(meta_super->total_data_blocks),
SCOUTFS_BLOCK_SM_SIZE));
ret = -EINVAL;
goto out;
}
}
if (args->check) {
ret = 0;
goto out;
}
/* the data device superblock only needs fs identifying fields */
memset(data_super, 0, sizeof(struct scoutfs_super_block));
data_super->id = meta_super->id;
data_super->fmt_vers = meta_super->fmt_vers;
data_super->flags = meta_super->flags &~ cpu_to_le64(SCOUTFS_FLAG_IS_META_BDEV);
memcpy(data_super->uuid, meta_super->uuid,sizeof(data_super->uuid));
data_super->seq = meta_super->seq;
data_super->total_meta_blocks = meta_super->total_meta_blocks;
data_super->total_data_blocks = meta_super->total_data_blocks;
ret = write_block(data_fd, SCOUTFS_BLOCK_MAGIC_SUPER, meta_super->hdr.fsid, 1,
SCOUTFS_SUPER_BLKNO, SCOUTFS_BLOCK_SM_SHIFT, &data_super->hdr);
if (ret < 0) {
ret = -errno;
fprintf(stderr, "Error writing super block to new data device '%s': %s (%d)\n",
args->data_device, strerror(errno), errno);
goto out;
}
uuid_unparse(meta_super->uuid, uuid_str);
printf("Successfully initialized empty data device for scoutfs filesystem:\n"
" meta device path: %s\n"
" data device path: %s\n"
" fsid: %llx\n"
" uuid: %s\n"
" format version: %llu\n"
" 64KB metadata blocks: "SIZE_FMT"\n"
" 4KB data blocks: "SIZE_FMT"\n",
args->meta_device,
args->data_device,
le64_to_cpu(meta_super->hdr.fsid),
uuid_str,
le64_to_cpu(meta_super->fmt_vers),
SIZE_ARGS(le64_to_cpu(meta_super->total_meta_blocks),
SCOUTFS_BLOCK_LG_SIZE),
SIZE_ARGS(le64_to_cpu(meta_super->total_data_blocks),
SCOUTFS_BLOCK_SM_SIZE));
ret = 0;
out:
if (args->check) {
if (ret == 0)
printf("All checks passed.\n");
else
printf("Errors were found that must be addressed before a new empty data device could be prepared and used.\n");
}
if (meta_super)
free(meta_super);
if (data_super)
free(data_super);
if (meta_fd != -1)
close(meta_fd);
if (data_fd != -1)
close(data_fd);
return ret;
}
static int parse_opt(int key, char *arg, struct argp_state *state)
{
struct prepare_empty_data_dev_args *args = state->input;
switch (key) {
case 'c':
args->check = true;
break;
case ARGP_KEY_ARG:
if (!args->meta_device)
args->meta_device = strdup_or_error(state, arg);
else if (!args->data_device)
args->data_device = strdup_or_error(state, arg);
else
argp_error(state, "more than two device arguments given");
break;
case ARGP_KEY_FINI:
if (!args->meta_device)
argp_error(state, "no metadata device argument given");
if (!args->data_device && !args->check)
argp_error(state, "no data device argument given");
break;
default:
break;
}
return 0;
}
static struct argp_option options[] = {
{ "check", 'c', NULL, 0, "Only check for errors and do not write", },
{ NULL }
};
static struct argp argp = {
options,
parse_opt,
"META-DEVICE DATA-DEVICE",
"Prepare empty data device for use with an existing ScoutFS filesystem"
};
static int prepare_empty_data_dev_cmd(int argc, char *argv[])
{
struct prepare_empty_data_dev_args prepare_empty_data_dev_args = {
.check = false,
};
int ret;
ret = argp_parse(&argp, argc, argv, 0, NULL, &prepare_empty_data_dev_args);
if (ret)
return ret;
return do_prepare_empty_data_dev(&prepare_empty_data_dev_args);
}
static void __attribute__((constructor)) prepare_empty_data_dev_ctor(void)
{
cmd_register_argp("prepare-empty-data-device", &argp, GROUP_CORE,
prepare_empty_data_dev_cmd);
}