mirror of
https://github.com/versity/scoutfs.git
synced 2026-05-03 19:35:43 +00:00
Compare commits
15 Commits
zab/check
...
zab/parall
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92ba4f5662 | ||
|
|
7754feee3f | ||
|
|
c4b00d059a | ||
|
|
42a60e31c5 | ||
|
|
ea7637b915 | ||
|
|
1324c39372 | ||
|
|
f41bd60ca4 | ||
|
|
70a932614a | ||
|
|
c6a11fcd6b | ||
|
|
3c67805004 | ||
|
|
02fe58b727 | ||
|
|
0519830229 | ||
|
|
4d6e1a14ae | ||
|
|
fc3e061ea8 | ||
|
|
a4bc3fb27d |
@@ -12,17 +12,22 @@ else
|
||||
SP = @:
|
||||
endif
|
||||
|
||||
SCOUTFS_GIT_DESCRIBE := \
|
||||
SCOUTFS_GIT_DESCRIBE ?= \
|
||||
$(shell git describe --all --abbrev=6 --long 2>/dev/null || \
|
||||
echo no-git)
|
||||
|
||||
ESCAPED_GIT_DESCRIBE := \
|
||||
$(shell echo $(SCOUTFS_GIT_DESCRIBE) |sed -e 's/\//\\\//g')
|
||||
|
||||
RPM_GITHASH ?= $(shell git rev-parse --short HEAD)
|
||||
|
||||
SCOUTFS_ARGS := SCOUTFS_GIT_DESCRIBE=$(SCOUTFS_GIT_DESCRIBE) \
|
||||
RPM_GITHASH=$(RPM_GITHASH) \
|
||||
CONFIG_SCOUTFS_FS=m -C $(SK_KSRC) M=$(CURDIR)/src \
|
||||
EXTRA_CFLAGS="-Werror"
|
||||
|
||||
# - We use the git describe from tags to set up the RPM versioning
|
||||
RPM_VERSION := $(shell git describe --long --tags | awk -F '-' '{gsub(/^v/,""); print $$1}')
|
||||
RPM_GITHASH := $(shell git rev-parse --short HEAD)
|
||||
TARFILE = scoutfs-kmod-$(RPM_VERSION).tar
|
||||
|
||||
|
||||
@@ -41,7 +46,8 @@ modules_install:
|
||||
|
||||
%.spec: %.spec.in .FORCE
|
||||
sed -e 's/@@VERSION@@/$(RPM_VERSION)/g' \
|
||||
-e 's/@@GITHASH@@/$(RPM_GITHASH)/g' < $< > $@+
|
||||
-e 's/@@GITHASH@@/$(RPM_GITHASH)/g' \
|
||||
-e 's/@@GITDESCRIBE@@/$(ESCAPED_GIT_DESCRIBE)/g' < $< > $@+
|
||||
mv $@+ $@
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
%define kmod_name scoutfs
|
||||
%define kmod_version @@VERSION@@
|
||||
%define kmod_git_hash @@GITHASH@@
|
||||
%define kmod_git_describe @@GITDESCRIBE@@
|
||||
%define pkg_date %(date +%%Y%%m%%d)
|
||||
|
||||
# Disable the building of the debug package(s).
|
||||
@@ -75,7 +76,7 @@ echo "Building for kernel: %{kernel_version} flavors: '%{flavors_to_build}'"
|
||||
for flavor in %flavors_to_build; do
|
||||
rm -rf obj/$flavor
|
||||
cp -r source obj/$flavor
|
||||
make SK_KSRC=%{kernel_source $flavor} -C obj/$flavor module
|
||||
make RPM_GITHASH=%{kmod_git_hash} SCOUTFS_GIT_DESCRIBE=%{kmod_git_describe} SK_KSRC=%{kernel_source $flavor} -C obj/$flavor module
|
||||
done
|
||||
|
||||
%install
|
||||
@@ -97,10 +98,21 @@ find %{buildroot} -type f -name \*.ko -exec %{__chmod} u+x \{\} \;
|
||||
/lib/modules
|
||||
|
||||
%post
|
||||
weak-modules --add-kernel --no-initramfs
|
||||
echo /lib/modules/%{kversion}/%{install_mod_dir}/scoutfs.ko | weak-modules --add-modules --no-initramfs
|
||||
depmod -a
|
||||
%endif
|
||||
|
||||
%clean
|
||||
rm -rf %{buildroot}
|
||||
|
||||
%preun
|
||||
# stash our modules for postun cleanup
|
||||
SCOUTFS_RPM_NAME=$(rpm -q %{name} | grep "%{version}-%{release}")
|
||||
rpm -ql $SCOUTFS_RPM_NAME | grep '\.ko$' > /var/run/%{name}-modules-%{version}-%{release} || true
|
||||
|
||||
%postun
|
||||
if [ -x /sbin/weak-modules ]; then
|
||||
cat /var/run/%{name}-modules-%{version}-%{release} | /sbin/weak-modules --remove-modules --no-initramfs
|
||||
fi
|
||||
|
||||
rm /var/run/%{name}-modules-%{version}-%{release} || true
|
||||
|
||||
@@ -12,7 +12,8 @@ BIN := src/createmany \
|
||||
src/find_xattrs \
|
||||
src/create_xattr_loop \
|
||||
src/fragmented_data_extents \
|
||||
src/o_tmpfile_umask
|
||||
src/o_tmpfile_umask \
|
||||
src/parallel_restore
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
@@ -22,8 +23,11 @@ ifneq ($(DEPS),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
|
||||
src/parallel_restore_cflags := ../utils/src/scoutfs_parallel_restore.a -lm
|
||||
|
||||
$(BIN): %: %.c Makefile
|
||||
gcc $(CFLAGS) -MD -MP -MF $*.d $< -o $@
|
||||
gcc $(CFLAGS) -MD -MP -MF $*.d $< -o $@ $($(@)_cflags)
|
||||
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
|
||||
782
tests/src/parallel_restore.c
Normal file
782
tests/src/parallel_restore.c
Normal file
@@ -0,0 +1,782 @@
|
||||
#define _GNU_SOURCE /* O_DIRECT */
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/xattr.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <time.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <signal.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include "../../utils/src/sparse.h"
|
||||
#include "../../utils/src/util.h"
|
||||
#include "../../utils/src/list.h"
|
||||
#include "../../utils/src/parse.h"
|
||||
#include "../../kmod/src/format.h"
|
||||
#include "../../utils/src/parallel_restore.h"
|
||||
|
||||
/*
|
||||
* XXX:
|
||||
* - add a nice description of what's going on
|
||||
* - mention allocator contention
|
||||
* - test child process dying handling
|
||||
* - root dir entry name length is wrong
|
||||
*/
|
||||
|
||||
#define ERRF " errno %d (%s)"
|
||||
#define ERRA errno, strerror(errno)
|
||||
|
||||
#define error_exit(cond, fmt, args...) \
|
||||
do { \
|
||||
if (cond) { \
|
||||
printf("error: "fmt"\n", ##args); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define dprintf(fmt, args...) \
|
||||
do { \
|
||||
if (0) \
|
||||
printf(fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define REG_MODE (S_IFREG | 0644)
|
||||
#define DIR_MODE (S_IFDIR | 0755)
|
||||
|
||||
struct opts {
|
||||
unsigned long long buf_size;
|
||||
|
||||
unsigned long long write_batch;
|
||||
unsigned long long low_dirs;
|
||||
unsigned long long high_dirs;
|
||||
unsigned long long low_files;
|
||||
unsigned long long high_files;
|
||||
char *meta_path;
|
||||
unsigned long long total_files;
|
||||
bool read_only;
|
||||
unsigned long long seed;
|
||||
unsigned long long nr_writers;
|
||||
};
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
printf("usage:\n"
|
||||
" -b NR | threads write blocks in batches files (100000)\n"
|
||||
" -d LOW:HIGH | range of subdirs per directory (5:10)\n"
|
||||
" -f LOW:HIGH | range of files per directory (10:20)\n"
|
||||
" -m PATH | path to metadata device\n"
|
||||
" -n NR | total number of files to create (100)\n"
|
||||
" -r | read-only, all work except writing, measure cpu cost\n"
|
||||
" -s NR | randomization seed (random)\n"
|
||||
" -w NR | number of writing processes to fork (online cpus)\n"
|
||||
);
|
||||
}
|
||||
|
||||
static size_t write_bufs(struct opts *opts, struct scoutfs_parallel_restore_writer *wri,
|
||||
void *buf, size_t buf_size, int dev_fd)
|
||||
{
|
||||
size_t total = 0;
|
||||
size_t count;
|
||||
off_t off;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
ret = scoutfs_parallel_restore_write_buf(wri, buf, buf_size, &off, &count);
|
||||
error_exit(ret, "write buf %d", ret);
|
||||
|
||||
if (count > 0) {
|
||||
if (!opts->read_only)
|
||||
ret = pwrite(dev_fd, buf, count, off);
|
||||
else
|
||||
ret = count;
|
||||
error_exit(ret != count, "pwrite count %zu ret %d", count, ret);
|
||||
total += ret;
|
||||
}
|
||||
} while (count > 0);
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
struct gen_inode {
|
||||
struct scoutfs_parallel_restore_inode inode;
|
||||
struct scoutfs_parallel_restore_xattr **xattrs;
|
||||
u64 nr_xattrs;
|
||||
struct scoutfs_parallel_restore_entry **entries;
|
||||
u64 nr_files;
|
||||
u64 nr_entries;
|
||||
};
|
||||
|
||||
static void free_gino(struct gen_inode *gino)
|
||||
{
|
||||
u64 i;
|
||||
|
||||
if (gino) {
|
||||
if (gino->entries) {
|
||||
for (i = 0; i < gino->nr_entries; i++)
|
||||
free(gino->entries[i]);
|
||||
free(gino->entries);
|
||||
}
|
||||
if (gino->xattrs) {
|
||||
for (i = 0; i < gino->nr_xattrs; i++)
|
||||
free(gino->xattrs[i]);
|
||||
free(gino->xattrs);
|
||||
}
|
||||
free(gino);
|
||||
}
|
||||
}
|
||||
|
||||
static struct scoutfs_parallel_restore_xattr *
|
||||
generate_xattr(struct opts *opts, u64 ino, u64 pos, char *name, int name_len, void *value,
|
||||
int value_len)
|
||||
{
|
||||
struct scoutfs_parallel_restore_xattr *xattr;
|
||||
|
||||
xattr = malloc(sizeof(struct scoutfs_parallel_restore_xattr) + name_len + value_len);
|
||||
error_exit(!xattr, "error allocating generated xattr");
|
||||
|
||||
*xattr = (struct scoutfs_parallel_restore_xattr) {
|
||||
.ino = ino,
|
||||
.pos = pos,
|
||||
.name_len = name_len,
|
||||
.value_len = value_len,
|
||||
};
|
||||
|
||||
xattr->name = (void *)(xattr + 1);
|
||||
xattr->value = (void *)(xattr->name + name_len);
|
||||
|
||||
memcpy(xattr->name, name, name_len);
|
||||
if (value_len)
|
||||
memcpy(xattr->value, value, value_len);
|
||||
|
||||
return xattr;
|
||||
}
|
||||
|
||||
static struct gen_inode *generate_inode(struct opts *opts, u64 ino, mode_t mode)
|
||||
{
|
||||
struct gen_inode *gino;
|
||||
struct timespec now;
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &now);
|
||||
|
||||
gino = calloc(1, sizeof(struct gen_inode));
|
||||
error_exit(!gino, "failure allocating generated inode");
|
||||
|
||||
gino->inode = (struct scoutfs_parallel_restore_inode) {
|
||||
.ino = ino,
|
||||
.mode = mode,
|
||||
.atime = now,
|
||||
.ctime = now,
|
||||
.mtime = now,
|
||||
.crtime = now,
|
||||
};
|
||||
|
||||
/*
|
||||
* hacky creation of a bunch of xattrs for now.
|
||||
*/
|
||||
if ((mode & S_IFMT) == S_IFREG) {
|
||||
#define NV(n, v) { n, sizeof(n) - 1, v, sizeof(v) - 1, }
|
||||
struct name_val {
|
||||
char *name;
|
||||
int len;
|
||||
char *value;
|
||||
int value_len;
|
||||
} nv[] = {
|
||||
NV("scoutfs.hide.totl.acct.8314611887310466424.2.0", "1"),
|
||||
NV("scoutfs.hide.srch.sam_vol_E01001L6_4", ""),
|
||||
NV("scoutfs.hide.sam_reqcopies", ""),
|
||||
NV("scoutfs.hide.sam_copy_2", ""),
|
||||
NV("scoutfs.hide.totl.acct.F01030L6.8314611887310466424.7.30", "1"),
|
||||
NV("scoutfs.hide.sam_copy_1", ""),
|
||||
NV("scoutfs.hide.srch.sam_vol_F01030L6_4", ""),
|
||||
NV("scoutfs.hide.srch.sam_release_cand", ""),
|
||||
NV("scoutfs.hide.sam_restime", ""),
|
||||
NV("scoutfs.hide.sam_uuid", ""),
|
||||
NV("scoutfs.hide.totl.acct.8314611887310466424.3.0", "1"),
|
||||
NV("scoutfs.hide.srch.sam_vol_F01030L6", ""),
|
||||
NV("scoutfs.hide.srch.sam_uuid_865939b7-24d6-472f-b85c-7ce7afeb813a", ""),
|
||||
NV("scoutfs.hide.srch.sam_vol_E01001L6", ""),
|
||||
NV("scoutfs.hide.totl.acct.E01001L6.8314611887310466424.7.1", "1"),
|
||||
NV("scoutfs.hide.totl.acct.8314611887310466424.4.0", "1"),
|
||||
NV("scoutfs.hide.totl.acct.8314611887310466424.11.0", "1"),
|
||||
NV("scoutfs.hide.totl.acct.8314611887310466424.1.0", "1"),
|
||||
};
|
||||
unsigned int nr = array_size(nv);
|
||||
int i;
|
||||
|
||||
gino->xattrs = calloc(nr, sizeof(struct scoutfs_parallel_restore_xattr *));
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
gino->xattrs[i] = generate_xattr(opts, ino, i, nv[i].name, nv[i].len,
|
||||
nv[i].value, nv[i].value_len);
|
||||
|
||||
gino->nr_xattrs = nr;
|
||||
gino->inode.nr_xattrs = nr;
|
||||
}
|
||||
|
||||
return gino;
|
||||
}
|
||||
|
||||
static struct scoutfs_parallel_restore_entry *
|
||||
generate_entry(struct opts *opts, char *prefix, u64 nr, u64 dir_ino, u64 pos, u64 ino, mode_t mode)
|
||||
{
|
||||
struct scoutfs_parallel_restore_entry *entry;
|
||||
char buf[PATH_MAX];
|
||||
int bytes;
|
||||
|
||||
bytes = snprintf(buf, sizeof(buf), "%s-%llu", prefix, nr);
|
||||
|
||||
entry = malloc(sizeof(struct scoutfs_parallel_restore_entry) + bytes);
|
||||
error_exit(!entry, "error allocating generated entry");
|
||||
|
||||
*entry = (struct scoutfs_parallel_restore_entry) {
|
||||
.dir_ino = dir_ino,
|
||||
.pos = pos,
|
||||
.ino = ino,
|
||||
.mode = mode,
|
||||
.name = (void *)(entry + 1),
|
||||
.name_len = bytes,
|
||||
};
|
||||
|
||||
memcpy(entry->name, buf, bytes);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
static u64 random64(void)
|
||||
{
|
||||
return ((u64)lrand48() << 32) | lrand48();
|
||||
}
|
||||
|
||||
static u64 random_range(u64 low, u64 high)
|
||||
{
|
||||
return low + (random64() % (high - low + 1));
|
||||
}
|
||||
|
||||
static struct gen_inode *generate_dir(struct opts *opts, u64 dir_ino, u64 ino_start, u64 ino_len,
|
||||
bool no_dirs)
|
||||
{
|
||||
struct scoutfs_parallel_restore_entry *entry;
|
||||
struct gen_inode *gino;
|
||||
u64 nr_entries;
|
||||
u64 nr_files;
|
||||
u64 nr_dirs;
|
||||
u64 ino;
|
||||
char *prefix;
|
||||
mode_t mode;
|
||||
u64 i;
|
||||
|
||||
nr_dirs = no_dirs ? 0 : random_range(opts->low_dirs, opts->high_dirs);
|
||||
nr_files = random_range(opts->low_files, opts->high_files);
|
||||
|
||||
if (1 + nr_dirs + nr_files > ino_len) {
|
||||
nr_dirs = no_dirs ? 0 : (ino_len - 1) / 2;
|
||||
nr_files = (ino_len - 1) - nr_dirs;
|
||||
}
|
||||
|
||||
nr_entries = nr_dirs + nr_files;
|
||||
|
||||
gino = generate_inode(opts, dir_ino, DIR_MODE);
|
||||
error_exit(!gino, "error allocating generated inode");
|
||||
|
||||
gino->inode.nr_subdirs = nr_dirs;
|
||||
gino->nr_files = nr_files;
|
||||
|
||||
if (nr_entries) {
|
||||
gino->entries = calloc(nr_entries, sizeof(struct scoutfs_parallel_restore_entry *));
|
||||
error_exit(!gino->entries, "error allocating generated inode entries");
|
||||
|
||||
gino->nr_entries = nr_entries;
|
||||
}
|
||||
|
||||
mode = DIR_MODE;
|
||||
prefix = "dir";
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
if (i == nr_dirs) {
|
||||
mode = REG_MODE;
|
||||
prefix = "file";
|
||||
}
|
||||
|
||||
ino = ino_start + i;
|
||||
entry = generate_entry(opts, prefix, ino, gino->inode.ino,
|
||||
SCOUTFS_DIRENT_FIRST_POS + i, ino, mode);
|
||||
|
||||
gino->entries[i] = entry;
|
||||
gino->inode.total_entry_name_bytes += entry->name_len;
|
||||
}
|
||||
|
||||
return gino;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore a generated inode. If it's a directory then we also restore
|
||||
* all its entries. The caller is going to descend into subdir entries and generate
|
||||
* those dir inodes. We have to generate and restore all non-dir inodes referenced
|
||||
* by this inode's entries.
|
||||
*/
|
||||
static void restore_inode(struct opts *opts, struct scoutfs_parallel_restore_writer *wri,
|
||||
struct gen_inode *gino)
|
||||
{
|
||||
struct gen_inode *nondir;
|
||||
int ret;
|
||||
u64 i;
|
||||
|
||||
ret = scoutfs_parallel_restore_add_inode(wri, &gino->inode);
|
||||
error_exit(ret, "thread add root inode %d", ret);
|
||||
|
||||
for (i = 0; i < gino->nr_entries; i++) {
|
||||
ret = scoutfs_parallel_restore_add_entry(wri, gino->entries[i]);
|
||||
error_exit(ret, "thread add entry %d", ret);
|
||||
|
||||
/* caller only needs subdir entries, generate and free others */
|
||||
if ((gino->entries[i]->mode & S_IFMT) != S_IFDIR) {
|
||||
|
||||
nondir = generate_inode(opts, gino->entries[i]->ino,
|
||||
gino->entries[i]->mode);
|
||||
restore_inode(opts, wri, nondir);
|
||||
free_gino(nondir);
|
||||
|
||||
free(gino->entries[i]);
|
||||
if (i != gino->nr_entries - 1)
|
||||
gino->entries[i] = gino->entries[gino->nr_entries - 1];
|
||||
gino->nr_entries--;
|
||||
gino->nr_files--;
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < gino->nr_xattrs; i++) {
|
||||
ret = scoutfs_parallel_restore_add_xattr(wri, gino->xattrs[i]);
|
||||
error_exit(ret, "thread add xattr %d", ret);
|
||||
}
|
||||
}
|
||||
|
||||
struct writer_args {
|
||||
struct list_head head;
|
||||
|
||||
int dev_fd;
|
||||
int pair_fd;
|
||||
|
||||
struct scoutfs_parallel_restore_slice slice;
|
||||
u64 writer_nr;
|
||||
u64 dir_height;
|
||||
u64 ino_start;
|
||||
u64 ino_len;
|
||||
};
|
||||
|
||||
struct write_result {
|
||||
struct scoutfs_parallel_restore_progress prog;
|
||||
struct scoutfs_parallel_restore_slice slice;
|
||||
__le64 files_created;
|
||||
__le64 bytes_written;
|
||||
};
|
||||
|
||||
static void write_bufs_and_send(struct opts *opts, struct scoutfs_parallel_restore_writer *wri,
|
||||
void *buf, size_t buf_size, int dev_fd,
|
||||
struct write_result *res, bool get_slice, int pair_fd)
|
||||
{
|
||||
size_t total;
|
||||
int ret;
|
||||
|
||||
total = write_bufs(opts, wri, buf, buf_size, dev_fd);
|
||||
le64_add_cpu(&res->bytes_written, total);
|
||||
|
||||
ret = scoutfs_parallel_restore_get_progress(wri, &res->prog);
|
||||
error_exit(ret, "get prog %d", ret);
|
||||
|
||||
if (get_slice) {
|
||||
ret = scoutfs_parallel_restore_get_slice(wri, &res->slice);
|
||||
error_exit(ret, "thread get slice %d", ret);
|
||||
}
|
||||
|
||||
ret = write(pair_fd, res, sizeof(struct write_result));
|
||||
error_exit(ret != sizeof(struct write_result), "result send error");
|
||||
|
||||
memset(res, 0, sizeof(struct write_result));
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of bytes in toplevel "dir-%llu" entry names for the given
|
||||
* number of writers.
|
||||
*/
|
||||
static u64 topdir_entry_bytes(u64 nr_writers)
|
||||
{
|
||||
u64 bytes = (3 + 1) * nr_writers;
|
||||
u64 limit;
|
||||
u64 done;
|
||||
u64 wid;
|
||||
u64 nr;
|
||||
|
||||
for (done = 0, wid = 1, limit = 10; done < nr_writers; done += nr, wid++, limit *= 10) {
|
||||
nr = min(limit - done, nr_writers - done);
|
||||
bytes += nr * wid;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
struct dir_pos {
|
||||
struct gen_inode *gino;
|
||||
u64 pos;
|
||||
};
|
||||
|
||||
static void writer_proc(struct opts *opts, struct writer_args *args)
|
||||
{
|
||||
struct scoutfs_parallel_restore_writer *wri = NULL;
|
||||
struct scoutfs_parallel_restore_entry *entry;
|
||||
struct dir_pos *dirs = NULL;
|
||||
struct write_result res;
|
||||
struct gen_inode *gino;
|
||||
void *buf = NULL;
|
||||
u64 level;
|
||||
u64 ino;
|
||||
int ret;
|
||||
|
||||
memset(&res, 0, sizeof(res));
|
||||
|
||||
dirs = calloc(args->dir_height, sizeof(struct dir_pos));
|
||||
error_exit(errno, "error allocating parent dirs "ERRF, ERRA);
|
||||
|
||||
errno = posix_memalign((void **)&buf, 4096, opts->buf_size);
|
||||
error_exit(errno, "error allocating block buf "ERRF, ERRA);
|
||||
|
||||
ret = scoutfs_parallel_restore_create_writer(&wri);
|
||||
error_exit(ret, "create writer %d", ret);
|
||||
|
||||
ret = scoutfs_parallel_restore_add_slice(wri, &args->slice);
|
||||
error_exit(ret, "add slice %d", ret);
|
||||
|
||||
/* writer 0 creates the root dir */
|
||||
if (args->writer_nr == 0) {
|
||||
gino = generate_inode(opts, SCOUTFS_ROOT_INO, DIR_MODE);
|
||||
gino->inode.nr_subdirs = opts->nr_writers;
|
||||
gino->inode.total_entry_name_bytes = topdir_entry_bytes(opts->nr_writers);
|
||||
|
||||
ret = scoutfs_parallel_restore_add_inode(wri, &gino->inode);
|
||||
error_exit(ret, "thread add root inode %d", ret);
|
||||
free_gino(gino);
|
||||
}
|
||||
|
||||
/* create root entry for our top level dir */
|
||||
ino = args->ino_start++;
|
||||
args->ino_len--;
|
||||
|
||||
entry = generate_entry(opts, "top", args->writer_nr,
|
||||
SCOUTFS_ROOT_INO, SCOUTFS_DIRENT_FIRST_POS + args->writer_nr,
|
||||
ino, DIR_MODE);
|
||||
|
||||
ret = scoutfs_parallel_restore_add_entry(wri, entry);
|
||||
error_exit(ret, "thread top entry %d", ret);
|
||||
free(entry);
|
||||
|
||||
level = args->dir_height - 1;
|
||||
|
||||
while (args->ino_len > 0 && level < args->dir_height) {
|
||||
gino = dirs[level].gino;
|
||||
|
||||
/* generate and restore if we follow entries */
|
||||
if (!gino) {
|
||||
gino = generate_dir(opts, ino, args->ino_start, args->ino_len, level == 0);
|
||||
args->ino_start += gino->nr_entries;
|
||||
args->ino_len -= gino->nr_entries;
|
||||
le64_add_cpu(&res.files_created, gino->nr_files);
|
||||
|
||||
restore_inode(opts, wri, gino);
|
||||
dirs[level].gino = gino;
|
||||
}
|
||||
|
||||
if (dirs[level].pos == gino->nr_entries) {
|
||||
/* ascend if we're done with this dir */
|
||||
dirs[level].gino = NULL;
|
||||
dirs[level].pos = 0;
|
||||
free_gino(gino);
|
||||
level++;
|
||||
|
||||
} else {
|
||||
/* otherwise descend into subdir entry */
|
||||
ino = gino->entries[dirs[level].pos]->ino;
|
||||
dirs[level].pos++;
|
||||
level--;
|
||||
}
|
||||
|
||||
/* do a partial write at batch intervals when there's still more to do */
|
||||
if (le64_to_cpu(res.files_created) >= opts->write_batch && args->ino_len > 0)
|
||||
write_bufs_and_send(opts, wri, buf, opts->buf_size, args->dev_fd,
|
||||
&res, false, args->pair_fd);
|
||||
}
|
||||
|
||||
write_bufs_and_send(opts, wri, buf, opts->buf_size, args->dev_fd,
|
||||
&res, true, args->pair_fd);
|
||||
|
||||
scoutfs_parallel_restore_destroy_writer(&wri);
|
||||
|
||||
free(dirs);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static void fork_writer(struct opts *opts, struct writer_args *args)
|
||||
{
|
||||
pid_t parent = getpid();
|
||||
pid_t pid;
|
||||
int ret;
|
||||
|
||||
pid = fork();
|
||||
error_exit(pid == -1, "fork error");
|
||||
|
||||
if (pid != 0)
|
||||
return;
|
||||
|
||||
ret = prctl(PR_SET_PDEATHSIG, SIGHUP);
|
||||
error_exit(ret < 0, "failed to set parent death sig");
|
||||
|
||||
printf("pid %u getpid() %u parent %u getppid() %u\n",
|
||||
pid, getpid(), parent, getppid());
|
||||
error_exit(getppid() != parent, "child parent already changed");
|
||||
|
||||
writer_proc(opts, args);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static int do_restore(struct opts *opts)
|
||||
{
|
||||
struct scoutfs_parallel_restore_writer *wri = NULL;
|
||||
struct scoutfs_parallel_restore_slice *slices = NULL;
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct write_result res;
|
||||
struct writer_args *args;
|
||||
struct timespec begin;
|
||||
struct timespec end;
|
||||
LIST_HEAD(writers);
|
||||
u64 next_ino;
|
||||
u64 ino_per;
|
||||
u64 avg_dirs;
|
||||
u64 avg_files;
|
||||
u64 dir_height;
|
||||
u64 tot_files;
|
||||
u64 tot_bytes;
|
||||
int pair[2] = {-1, -1};
|
||||
float secs;
|
||||
void *buf = NULL;
|
||||
int dev_fd = -1;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
|
||||
error_exit(ret, "socketpair error "ERRF, ERRA);
|
||||
|
||||
dev_fd = open(opts->meta_path, O_DIRECT | (opts->read_only ? O_RDONLY : (O_RDWR|O_EXCL)));
|
||||
error_exit(dev_fd < 0, "error opening '%s': "ERRF, opts->meta_path, ERRA);
|
||||
|
||||
errno = posix_memalign((void **)&super, 4096, SCOUTFS_BLOCK_SM_SIZE) ?:
|
||||
posix_memalign((void **)&buf, 4096, opts->buf_size);
|
||||
error_exit(errno, "error allocating block bufs "ERRF, ERRA);
|
||||
|
||||
ret = pread(dev_fd, super, SCOUTFS_BLOCK_SM_SIZE,
|
||||
SCOUTFS_SUPER_BLKNO << SCOUTFS_BLOCK_SM_SHIFT);
|
||||
error_exit(ret != SCOUTFS_BLOCK_SM_SIZE, "error reading super, ret %d", ret);
|
||||
|
||||
ret = scoutfs_parallel_restore_create_writer(&wri);
|
||||
error_exit(ret, "create writer %d", ret);
|
||||
|
||||
ret = scoutfs_parallel_restore_import_super(wri, super);
|
||||
error_exit(ret, "import super %d", ret);
|
||||
|
||||
slices = calloc(1 + opts->nr_writers, sizeof(struct scoutfs_parallel_restore_slice));
|
||||
error_exit(!slices, "alloc slices");
|
||||
|
||||
scoutfs_parallel_restore_init_slices(wri, slices, 1 + opts->nr_writers);
|
||||
|
||||
ret = scoutfs_parallel_restore_add_slice(wri, &slices[0]);
|
||||
error_exit(ret, "add slices[0] %d", ret);
|
||||
|
||||
next_ino = (SCOUTFS_ROOT_INO | SCOUTFS_LOCK_INODE_GROUP_MASK) + 1;
|
||||
ino_per = opts->total_files / opts->nr_writers;
|
||||
avg_dirs = (opts->low_dirs + opts->high_dirs) / 2;
|
||||
avg_files = (opts->low_files + opts->high_files) / 2;
|
||||
|
||||
dir_height = 1;
|
||||
tot_files = avg_files * opts->nr_writers;
|
||||
|
||||
while (tot_files < opts->total_files) {
|
||||
dir_height++;
|
||||
tot_files *= avg_dirs;
|
||||
}
|
||||
|
||||
dprintf("height %llu tot %llu total %llu\n", dir_height, tot_files, opts->total_files);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &begin);
|
||||
|
||||
/* start each writing process */
|
||||
for (i = 0; i < opts->nr_writers; i++) {
|
||||
args = calloc(1, sizeof(struct writer_args));
|
||||
error_exit(!args, "alloc writer args");
|
||||
|
||||
args->dev_fd = dev_fd;
|
||||
args->pair_fd = pair[1];
|
||||
args->slice = slices[1 + i];
|
||||
args->writer_nr = i;
|
||||
args->dir_height = dir_height;
|
||||
args->ino_start = next_ino;
|
||||
args->ino_len = ino_per;
|
||||
|
||||
list_add_tail(&args->head, &writers);
|
||||
next_ino += ino_per;
|
||||
|
||||
fork_writer(opts, args);
|
||||
}
|
||||
|
||||
/* read results and watch for writers to finish */
|
||||
tot_files = 0;
|
||||
tot_bytes = 0;
|
||||
i = 0;
|
||||
while (i < opts->nr_writers) {
|
||||
ret = read(pair[0], &res, sizeof(struct write_result));
|
||||
error_exit(ret != sizeof(struct write_result), "result read error");
|
||||
|
||||
ret = scoutfs_parallel_restore_add_progress(wri, &res.prog);
|
||||
error_exit(ret, "add thr prog %d", ret);
|
||||
|
||||
if (res.slice.meta_len != 0) {
|
||||
ret = scoutfs_parallel_restore_add_slice(wri, &res.slice);
|
||||
error_exit(ret, "add thr slice %d", ret);
|
||||
i++;
|
||||
}
|
||||
|
||||
tot_files += le64_to_cpu(res.files_created);
|
||||
tot_bytes += le64_to_cpu(res.bytes_written);
|
||||
}
|
||||
|
||||
tot_bytes += write_bufs(opts, wri, buf, opts->buf_size, dev_fd);
|
||||
|
||||
ret = scoutfs_parallel_restore_export_super(wri, super);
|
||||
error_exit(ret, "update super %d", ret);
|
||||
|
||||
if (!opts->read_only) {
|
||||
ret = pwrite(dev_fd, super, SCOUTFS_BLOCK_SM_SIZE,
|
||||
SCOUTFS_SUPER_BLKNO << SCOUTFS_BLOCK_SM_SHIFT);
|
||||
error_exit(ret != SCOUTFS_BLOCK_SM_SIZE, "error writing super, ret %d", ret);
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
|
||||
|
||||
scoutfs_parallel_restore_destroy_writer(&wri);
|
||||
|
||||
secs = ((float)end.tv_sec + ((float)end.tv_nsec/NSEC_PER_SEC)) -
|
||||
((float)begin.tv_sec + ((float)begin.tv_nsec/NSEC_PER_SEC));
|
||||
printf("created %llu files in %llu bytes and %f secs => %f bytes/file, %f files/sec\n",
|
||||
tot_files, tot_bytes, secs,
|
||||
(float)tot_bytes / tot_files, (float)tot_files / secs);
|
||||
|
||||
if (dev_fd >= 0)
|
||||
close(dev_fd);
|
||||
if (pair[0] >= 0)
|
||||
close(pair[0]);
|
||||
if (pair[1] >= 0)
|
||||
close(pair[1]);
|
||||
free(super);
|
||||
free(slices);
|
||||
free(buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_low_high(char *str, u64 *low_ret, u64 *high_ret)
|
||||
{
|
||||
char *sep;
|
||||
int ret = 0;
|
||||
|
||||
sep = index(str, ':');
|
||||
if (sep) {
|
||||
*sep = '\0';
|
||||
ret = parse_u64(sep + 1, high_ret);
|
||||
}
|
||||
|
||||
if (ret == 0)
|
||||
ret = parse_u64(str, low_ret);
|
||||
|
||||
if (sep)
|
||||
*sep = ':';
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct opts opts = {
|
||||
.buf_size = (32 * 1024 * 1024),
|
||||
|
||||
.write_batch = 1000000,
|
||||
.low_dirs = 5,
|
||||
.high_dirs = 10,
|
||||
.low_files = 10,
|
||||
.high_files = 20,
|
||||
.total_files = 100,
|
||||
};
|
||||
int ret;
|
||||
int c;
|
||||
|
||||
opts.seed = random64();
|
||||
opts.nr_writers = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
while ((c = getopt(argc, argv, "b:d:f:m:n:rs:w:")) != -1) {
|
||||
switch(c) {
|
||||
case 'b':
|
||||
ret = parse_u64(optarg, &opts.write_batch);
|
||||
error_exit(ret, "error parsing -b '%s'\n", optarg);
|
||||
error_exit(opts.write_batch == 0, "-b can't be 0");
|
||||
break;
|
||||
case 'd':
|
||||
ret = parse_low_high(optarg, &opts.low_dirs, &opts.high_dirs);
|
||||
error_exit(ret, "error parsing -d '%s'\n", optarg);
|
||||
break;
|
||||
case 'f':
|
||||
ret = parse_low_high(optarg, &opts.low_files, &opts.high_files);
|
||||
error_exit(ret, "error parsing -f '%s'\n", optarg);
|
||||
break;
|
||||
case 'm':
|
||||
opts.meta_path = strdup(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
ret = parse_u64(optarg, &opts.total_files);
|
||||
error_exit(ret, "error parsing -n '%s'\n", optarg);
|
||||
break;
|
||||
case 'r':
|
||||
opts.read_only = true;
|
||||
break;
|
||||
case 's':
|
||||
ret = parse_u64(optarg, &opts.seed);
|
||||
error_exit(ret, "error parsing -s '%s'\n", optarg);
|
||||
break;
|
||||
case 'w':
|
||||
ret = parse_u64(optarg, &opts.nr_writers);
|
||||
error_exit(ret, "error parsing -w '%s'\n", optarg);
|
||||
break;
|
||||
case '?':
|
||||
printf("Unknown option '%c'\n", optopt);
|
||||
usage();
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
error_exit(opts.low_dirs > opts.high_dirs, "LOW > HIGH in -d %llu:%llu",
|
||||
opts.low_dirs, opts.high_dirs);
|
||||
error_exit(opts.low_files > opts.high_files, "LOW > HIGH in -f %llu:%llu",
|
||||
opts.low_files, opts.high_files);
|
||||
error_exit(!opts.meta_path, "must specify metadata device path with -m");
|
||||
|
||||
printf("recreate with: -d %llu:%llu -f %llu:%llu -n %llu -s %llu -w %llu\n",
|
||||
opts.low_dirs, opts.high_dirs, opts.low_files, opts.high_files,
|
||||
opts.total_files, opts.seed, opts.nr_writers);
|
||||
|
||||
ret = do_restore(&opts);
|
||||
|
||||
free(opts.meta_path);
|
||||
|
||||
return ret == 0 ? 0 : 1;
|
||||
}
|
||||
@@ -7,7 +7,7 @@ FMTIOC_H := format.h ioctl.h
|
||||
FMTIOC_KMOD := $(addprefix ../kmod/src/,$(FMTIOC_H))
|
||||
|
||||
CFLAGS := -Wall -O2 -Werror -D_FILE_OFFSET_BITS=64 -g -msse4.2 \
|
||||
-I src/ -fno-strict-aliasing \
|
||||
-fno-strict-aliasing \
|
||||
-DSCOUTFS_FORMAT_HASH=0x$(SCOUTFS_FORMAT_HASH)LLU
|
||||
|
||||
ifneq ($(wildcard $(firstword $(FMTIOC_KMOD))),)
|
||||
@@ -15,11 +15,12 @@ CFLAGS += -I../kmod/src
|
||||
endif
|
||||
|
||||
BIN := src/scoutfs
|
||||
OBJ_DIRS := src src/check
|
||||
OBJ := $(foreach dir,$(OBJ_DIRS),$(patsubst %.c,%.o,$(wildcard $(dir)/*.c)))
|
||||
DEPS := $(foreach dir,$(OBJ_DIRS),$(wildcard $(dir)/*.d))
|
||||
OBJ := $(patsubst %.c,%.o,$(wildcard src/*.c))
|
||||
DEPS := $(wildcard */*.d)
|
||||
|
||||
all: $(BIN)
|
||||
AR := src/scoutfs_parallel_restore.a
|
||||
|
||||
all: $(BIN) $(AR)
|
||||
|
||||
ifneq ($(DEPS),)
|
||||
-include $(DEPS)
|
||||
@@ -37,6 +38,10 @@ $(BIN): $(OBJ)
|
||||
$(QU) [BIN $@]
|
||||
$(VE)gcc -o $@ $^ -luuid -lm -lcrypto -lblkid
|
||||
|
||||
$(AR): $(OBJ)
|
||||
$(QU) [AR $@]
|
||||
$(VE)ar rcs $@ $^
|
||||
|
||||
%.o %.d: %.c Makefile sparse.sh
|
||||
$(QU) [CC $<]
|
||||
$(VE)gcc $(CFLAGS) -MD -MP -MF $*.d -c $< -o $*.o
|
||||
|
||||
@@ -54,6 +54,8 @@ cp man/*.8.gz $RPM_BUILD_ROOT%{_mandir}/man8/.
|
||||
install -m 755 -D src/scoutfs $RPM_BUILD_ROOT%{_sbindir}/scoutfs
|
||||
install -m 644 -D src/ioctl.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/ioctl.h
|
||||
install -m 644 -D src/format.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/format.h
|
||||
install -m 644 -D src/parallel_restore.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/parallel_restore.h
|
||||
install -m 644 -D src/scoutfs_parallel_restore.a $RPM_BUILD_ROOT%{_libdir}/scoutfs/libscoutfs_parallel_restore.a
|
||||
install -m 755 -D fenced/scoutfs-fenced $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/scoutfs-fenced
|
||||
install -m 644 -D fenced/scoutfs-fenced.service $RPM_BUILD_ROOT%{_unitdir}/scoutfs-fenced.service
|
||||
install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-fenced.conf.example
|
||||
@@ -70,6 +72,7 @@ install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdi
|
||||
%files -n scoutfs-devel
|
||||
%defattr(644,root,root,755)
|
||||
%{_includedir}/scoutfs
|
||||
%{_libdir}/scoutfs
|
||||
|
||||
%clean
|
||||
rm -rf %{buildroot}
|
||||
|
||||
@@ -10,11 +10,6 @@
|
||||
* Just a quick simple native bitmap.
|
||||
*/
|
||||
|
||||
int test_bit(unsigned long *bits, u64 nr)
|
||||
{
|
||||
return !!(bits[nr / BITS_PER_LONG] & (1UL << (nr & (BITS_PER_LONG - 1))));
|
||||
}
|
||||
|
||||
void set_bit(unsigned long *bits, u64 nr)
|
||||
{
|
||||
bits[nr / BITS_PER_LONG] |= 1UL << (nr & (BITS_PER_LONG - 1));
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef _BITMAP_H_
|
||||
#define _BITMAP_H_
|
||||
|
||||
int test_bit(unsigned long *bits, u64 nr);
|
||||
void set_bit(unsigned long *bits, u64 nr);
|
||||
void clear_bit(unsigned long *bits, u64 nr);
|
||||
u64 find_next_set_bit(unsigned long *start, u64 from, u64 total);
|
||||
|
||||
20
utils/src/bloom.c
Normal file
20
utils/src/bloom.c
Normal file
@@ -0,0 +1,20 @@
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "hash.h"
|
||||
#include "bloom.h"
|
||||
|
||||
void calc_bloom_nrs(struct scoutfs_key *key, unsigned int *nrs)
|
||||
{
|
||||
u64 hash;
|
||||
int i;
|
||||
|
||||
hash = scoutfs_hash64(key, sizeof(struct scoutfs_key));
|
||||
|
||||
for (i = 0; i < SCOUTFS_FOREST_BLOOM_NRS; i++) {
|
||||
nrs[i] = (u32)hash % SCOUTFS_FOREST_BLOOM_BITS;
|
||||
hash >>= SCOUTFS_FOREST_BLOOM_FUNC_BITS;
|
||||
}
|
||||
}
|
||||
6
utils/src/bloom.h
Normal file
6
utils/src/bloom.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#ifndef _BLOOM_H_
|
||||
#define _BLOOM_H_
|
||||
|
||||
void calc_bloom_nrs(struct scoutfs_key *key, unsigned int *nrs);
|
||||
|
||||
#endif
|
||||
@@ -8,7 +8,7 @@
|
||||
#include "leaf_item_hash.h"
|
||||
#include "btree.h"
|
||||
|
||||
static void init_block(struct scoutfs_btree_block *bt, int level)
|
||||
void btree_init_block(struct scoutfs_btree_block *bt, int level)
|
||||
{
|
||||
int free;
|
||||
|
||||
@@ -33,7 +33,7 @@ void btree_init_root_single(struct scoutfs_btree_root *root,
|
||||
|
||||
memset(bt, 0, SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
init_block(bt, 0);
|
||||
btree_init_block(bt, 0);
|
||||
}
|
||||
|
||||
static void *alloc_val(struct scoutfs_btree_block *bt, int len)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#ifndef _BTREE_H_
|
||||
#define _BTREE_H_
|
||||
|
||||
void btree_init_block(struct scoutfs_btree_block *bt, int level);
|
||||
void btree_init_root_single(struct scoutfs_btree_root *root,
|
||||
struct scoutfs_btree_block *bt,
|
||||
u64 seq, u64 blkno);
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "bitmap.h"
|
||||
#include "key.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "block.h"
|
||||
#include "btree.h"
|
||||
#include "extent.h"
|
||||
#include "iter.h"
|
||||
#include "sns.h"
|
||||
|
||||
/*
|
||||
* We check the list blocks serially.
|
||||
*
|
||||
* XXX:
|
||||
* - compare ref seqs
|
||||
* - detect cycles?
|
||||
*/
|
||||
int alloc_list_meta_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_alloc_list_block *lblk;
|
||||
struct scoutfs_block_ref ref;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
|
||||
ref = lhead->ref;
|
||||
|
||||
while (ref.blkno) {
|
||||
blkno = le64_to_cpu(ref.blkno);
|
||||
|
||||
ret = cb(blkno, 1, cb_arg);
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
lblk = block_buf(blk);
|
||||
/* XXX verify block */
|
||||
/* XXX sort? maybe */
|
||||
|
||||
ref = lblk->next;
|
||||
|
||||
block_put(&blk);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int alloc_root_meta_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
return btree_meta_iter(&root->root, cb, cb_arg);
|
||||
}
|
||||
|
||||
int alloc_list_extent_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_alloc_list_block *lblk;
|
||||
struct scoutfs_block_ref ref;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ref = lhead->ref;
|
||||
|
||||
while (ref.blkno) {
|
||||
blkno = le64_to_cpu(ref.blkno);
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("alloc_list_block", blkno, 0);
|
||||
|
||||
lblk = block_buf(blk);
|
||||
/* XXX verify block */
|
||||
/* XXX sort? maybe */
|
||||
|
||||
ret = 0;
|
||||
for (i = 0; i < le32_to_cpu(lblk->nr); i++) {
|
||||
blkno = le64_to_cpu(lblk->blknos[le32_to_cpu(lblk->start) + i]);
|
||||
|
||||
ret = cb(blkno, 1, cb_arg);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ref = lblk->next;
|
||||
|
||||
block_put(&blk);
|
||||
sns_pop();
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool valid_free_extent_key(struct scoutfs_key *key)
|
||||
{
|
||||
return (key->sk_zone == SCOUTFS_FREE_EXTENT_BLKNO_ZONE ||
|
||||
key->sk_zone == SCOUTFS_FREE_EXTENT_ORDER_ZONE) &&
|
||||
(!key->_sk_fourth && !key->sk_type &&
|
||||
(key->sk_zone == SCOUTFS_FREE_EXTENT_ORDER_ZONE || !key->_sk_third));
|
||||
}
|
||||
|
||||
static int free_item_cb(struct scoutfs_key *key, void *val, u16 val_len, void *cb_arg)
|
||||
{
|
||||
struct extent_cb_arg_t *ecba = cb_arg;
|
||||
u64 start;
|
||||
u64 len;
|
||||
|
||||
/* XXX not sure these eios are what we want */
|
||||
|
||||
if (val_len != 0)
|
||||
return -EIO;
|
||||
|
||||
if (!valid_free_extent_key(key))
|
||||
return -EIO;
|
||||
|
||||
if (key->sk_zone == SCOUTFS_FREE_EXTENT_ORDER_ZONE)
|
||||
return -ECHECK_ITER_DONE;
|
||||
|
||||
start = le64_to_cpu(key->skfb_end) - le64_to_cpu(key->skfb_len) + 1;
|
||||
len = le64_to_cpu(key->skfb_len);
|
||||
|
||||
return ecba->cb(start, len, ecba->cb_arg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the callback with each of the primary BLKNO free extents stored
|
||||
* in item in the given alloc root. It doesn't visit the secondary
|
||||
* ORDER extents.
|
||||
*/
|
||||
int alloc_root_extent_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct extent_cb_arg_t ecba = { .cb = cb, .cb_arg = cb_arg };
|
||||
|
||||
return btree_item_iter(&root->root, free_item_cb, &ecba);
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_ALLOC_H
|
||||
#define _SCOUTFS_UTILS_CHECK_ALLOC_H
|
||||
|
||||
#include "extent.h"
|
||||
|
||||
int alloc_list_meta_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg);
|
||||
int alloc_root_meta_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg);
|
||||
|
||||
int alloc_list_extent_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg);
|
||||
int alloc_root_extent_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg);
|
||||
|
||||
#endif
|
||||
@@ -1,564 +0,0 @@
|
||||
#define _ISOC11_SOURCE /* aligned_alloc */
|
||||
#define _DEFAULT_SOURCE /* syscall() */
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/aio_abi.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "list.h"
|
||||
#include "cmp.h"
|
||||
#include "hash.h"
|
||||
|
||||
#include "block.h"
|
||||
#include "debug.h"
|
||||
#include "eno.h"
|
||||
|
||||
static struct block_data {
|
||||
struct list_head *hash_lists;
|
||||
size_t hash_nr;
|
||||
|
||||
struct list_head active_head;
|
||||
struct list_head inactive_head;
|
||||
struct list_head dirty_list;
|
||||
size_t nr_active;
|
||||
size_t nr_inactive;
|
||||
size_t nr_dirty;
|
||||
|
||||
int meta_fd;
|
||||
size_t max_cached;
|
||||
size_t nr_events;
|
||||
|
||||
aio_context_t ctx;
|
||||
struct iocb *iocbs;
|
||||
struct iocb **iocbps;
|
||||
struct io_event *events;
|
||||
} global_bdat;
|
||||
|
||||
struct block {
|
||||
struct list_head hash_head;
|
||||
struct list_head lru_head;
|
||||
struct list_head dirty_head;
|
||||
struct list_head submit_head;
|
||||
unsigned long refcount;
|
||||
unsigned long uptodate:1,
|
||||
active:1;
|
||||
u64 blkno;
|
||||
void *buf;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
#define BLK_FMT \
|
||||
"blkno %llu rc %ld d %u a %u"
|
||||
#define BLK_ARG(blk) \
|
||||
(blk)->blkno, (blk)->refcount, !list_empty(&(blk)->dirty_head), blk->active
|
||||
#define debug_blk(blk, fmt, args...) \
|
||||
debug(fmt " " BLK_FMT, ##args, BLK_ARG(blk))
|
||||
|
||||
/*
|
||||
* This just allocates and initialzies the block. The caller is
|
||||
* responsible for putting it on the appropriate initial lists and
|
||||
* managing refcounts.
|
||||
*/
|
||||
static struct block *alloc_block(struct block_data *bdat, u64 blkno, size_t size)
|
||||
{
|
||||
struct block *blk;
|
||||
|
||||
blk = calloc(1, sizeof(struct block));
|
||||
if (blk) {
|
||||
blk->buf = aligned_alloc(4096, size); /* XXX static alignment :/ */
|
||||
if (!blk->buf) {
|
||||
free(blk);
|
||||
blk = NULL;
|
||||
} else {
|
||||
INIT_LIST_HEAD(&blk->hash_head);
|
||||
INIT_LIST_HEAD(&blk->lru_head);
|
||||
INIT_LIST_HEAD(&blk->dirty_head);
|
||||
INIT_LIST_HEAD(&blk->submit_head);
|
||||
blk->blkno = blkno;
|
||||
blk->size = size;
|
||||
}
|
||||
}
|
||||
|
||||
return blk;
|
||||
}
|
||||
|
||||
static void free_block(struct block_data *bdat, struct block *blk)
|
||||
{
|
||||
debug_blk(blk, "free");
|
||||
|
||||
if (!list_empty(&blk->lru_head)) {
|
||||
if (blk->active)
|
||||
bdat->nr_active--;
|
||||
else
|
||||
bdat->nr_inactive--;
|
||||
list_del(&blk->lru_head);
|
||||
}
|
||||
|
||||
if (!list_empty(&blk->dirty_head)) {
|
||||
bdat->nr_dirty--;
|
||||
list_del(&blk->dirty_head);
|
||||
}
|
||||
|
||||
if (!list_empty(&blk->hash_head))
|
||||
list_del(&blk->hash_head);
|
||||
|
||||
if (!list_empty(&blk->submit_head))
|
||||
list_del(&blk->submit_head);
|
||||
|
||||
free(blk->buf);
|
||||
free(blk);
|
||||
}
|
||||
|
||||
static bool blk_is_dirty(struct block *blk)
|
||||
{
|
||||
return !list_empty(&blk->dirty_head);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebalance the cache.
|
||||
*
|
||||
* First we shrink the cache to limit it to max_cached blocks.
|
||||
* Logically, we walk from oldest to newest in the inactive list and
|
||||
* then in the active list. Since these lists are physically one
|
||||
* list_head list we achieve this with a reverse walk starting from the
|
||||
* active head.
|
||||
*
|
||||
* Then we rebalnace the size of the two lists. The constraint is that
|
||||
* we don't let the active list grow larger than the inactive list. We
|
||||
* move blocks from the oldest tail of the active list to the newest
|
||||
* head of the inactive list.
|
||||
*
|
||||
* <- [active head] <-> [ .. active list .. ] <-> [inactive head] <-> [ .. inactive list .. ] ->
|
||||
*/
|
||||
static void rebalance_cache(struct block_data *bdat)
|
||||
{
|
||||
struct block *blk;
|
||||
struct block *blk_;
|
||||
|
||||
list_for_each_entry_safe_reverse(blk, blk_, &bdat->active_head, lru_head) {
|
||||
if ((bdat->nr_active + bdat->nr_inactive) < bdat->max_cached)
|
||||
break;
|
||||
|
||||
if (&blk->lru_head == &bdat->inactive_head || blk->refcount > 0 ||
|
||||
blk_is_dirty(blk))
|
||||
continue;
|
||||
|
||||
free_block(bdat, blk);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe_reverse(blk, blk_, &bdat->inactive_head, lru_head) {
|
||||
if (bdat->nr_active <= bdat->nr_inactive || &blk->lru_head == &bdat->active_head)
|
||||
break;
|
||||
|
||||
list_move(&blk->lru_head, &bdat->inactive_head);
|
||||
blk->active = 0;
|
||||
bdat->nr_active--;
|
||||
bdat->nr_inactive++;
|
||||
}
|
||||
}
|
||||
|
||||
static void make_active(struct block_data *bdat, struct block *blk)
|
||||
{
|
||||
if (!blk->active) {
|
||||
if (!list_empty(&blk->lru_head)) {
|
||||
list_move(&blk->lru_head, &bdat->active_head);
|
||||
bdat->nr_inactive--;
|
||||
} else {
|
||||
list_add(&blk->lru_head, &bdat->active_head);
|
||||
}
|
||||
|
||||
blk->active = 1;
|
||||
bdat->nr_active++;
|
||||
}
|
||||
}
|
||||
|
||||
static int compar_iocbp(const void *A, const void *B)
|
||||
{
|
||||
struct iocb *a = *(struct iocb **)A;
|
||||
struct iocb *b = *(struct iocb **)B;
|
||||
|
||||
return scoutfs_cmp(a->aio_offset, b->aio_offset);
|
||||
}
|
||||
|
||||
static int submit_and_wait(struct block_data *bdat, struct list_head *list)
|
||||
{
|
||||
struct io_event *event;
|
||||
struct iocb *iocb;
|
||||
struct block *blk;
|
||||
int ret;
|
||||
int err;
|
||||
int nr;
|
||||
int i;
|
||||
|
||||
err = 0;
|
||||
nr = 0;
|
||||
list_for_each_entry(blk, list, submit_head) {
|
||||
iocb = &bdat->iocbs[nr];
|
||||
bdat->iocbps[nr] = iocb;
|
||||
|
||||
memset(iocb, 0, sizeof(struct iocb));
|
||||
|
||||
iocb->aio_data = (intptr_t)blk;
|
||||
iocb->aio_lio_opcode = blk_is_dirty(blk) ? IOCB_CMD_PWRITE : IOCB_CMD_PREAD;
|
||||
iocb->aio_fildes = bdat->meta_fd;
|
||||
iocb->aio_buf = (intptr_t)blk->buf;
|
||||
iocb->aio_nbytes = blk->size;
|
||||
iocb->aio_offset = blk->blkno * blk->size;
|
||||
|
||||
nr++;
|
||||
|
||||
debug_blk(blk, "submit");
|
||||
|
||||
if ((nr < bdat->nr_events) && blk->submit_head.next != list)
|
||||
continue;
|
||||
|
||||
qsort(bdat->iocbps, nr, sizeof(bdat->iocbps[0]), compar_iocbp);
|
||||
|
||||
ret = syscall(__NR_io_submit, bdat->ctx, nr, bdat->iocbps);
|
||||
if (ret != nr) {
|
||||
if (ret >= 0)
|
||||
errno = EIO;
|
||||
ret = -errno;
|
||||
printf("fatal system error submitting async IO: "ENO_FMT"\n",
|
||||
ENO_ARG(-ret));
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = syscall(__NR_io_getevents, bdat->ctx, nr, nr, bdat->events, NULL);
|
||||
if (ret != nr) {
|
||||
if (ret >= 0)
|
||||
errno = EIO;
|
||||
ret = -errno;
|
||||
printf("fatal system error getting IO events: "ENO_FMT"\n",
|
||||
ENO_ARG(-ret));
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
for (i = 0; i < nr; i++) {
|
||||
event = &bdat->events[i];
|
||||
iocb = (struct iocb *)(intptr_t)event->obj;
|
||||
blk = (struct block *)(intptr_t)event->data;
|
||||
|
||||
debug_blk(blk, "complete res %lld", (long long)event->res);
|
||||
|
||||
if (event->res >= 0 && event->res != blk->size)
|
||||
event->res = -EIO;
|
||||
|
||||
/* io errors are fatal */
|
||||
if (event->res < 0) {
|
||||
ret = event->res;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (iocb->aio_lio_opcode == IOCB_CMD_PREAD) {
|
||||
blk->uptodate = 1;
|
||||
} else {
|
||||
list_del_init(&blk->dirty_head);
|
||||
bdat->nr_dirty--;
|
||||
}
|
||||
}
|
||||
nr = 0;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret ?: err;
|
||||
}
|
||||
|
||||
static void inc_refcount(struct block *blk)
|
||||
{
|
||||
blk->refcount++;
|
||||
}
|
||||
|
||||
void block_put(struct block **blkp)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk = *blkp;
|
||||
|
||||
if (blk) {
|
||||
blk->refcount--;
|
||||
*blkp = NULL;
|
||||
|
||||
rebalance_cache(bdat);
|
||||
}
|
||||
}
|
||||
|
||||
static struct list_head *hash_bucket(struct block_data *bdat, u64 blkno)
|
||||
{
|
||||
u32 hash = scoutfs_hash32(&blkno, sizeof(blkno));
|
||||
|
||||
return &bdat->hash_lists[hash % bdat->hash_nr];
|
||||
}
|
||||
|
||||
static struct block *get_or_alloc(struct block_data *bdat, u64 blkno, int bf)
|
||||
{
|
||||
struct list_head *bucket = hash_bucket(bdat, blkno);
|
||||
struct block *search;
|
||||
struct block *blk;
|
||||
size_t size;
|
||||
|
||||
size = (bf & BF_SM) ? SCOUTFS_BLOCK_SM_SIZE : SCOUTFS_BLOCK_LG_SIZE;
|
||||
|
||||
blk = NULL;
|
||||
list_for_each_entry(search, bucket, hash_head) {
|
||||
if (search->blkno && blkno && search->size == size) {
|
||||
blk = search;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!blk) {
|
||||
blk = alloc_block(bdat, blkno, size);
|
||||
if (blk) {
|
||||
list_add(&blk->hash_head, bucket);
|
||||
list_add(&blk->lru_head, &bdat->inactive_head);
|
||||
bdat->nr_inactive++;
|
||||
}
|
||||
}
|
||||
if (blk)
|
||||
inc_refcount(blk);
|
||||
|
||||
return blk;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a block.
|
||||
*
|
||||
* The caller holds a refcount to the block while it's in use that
|
||||
* prevents it from being removed from the cache. It must be dropped
|
||||
* with block_put();
|
||||
*/
|
||||
int block_get(struct block **blk_ret, u64 blkno, int bf)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk;
|
||||
LIST_HEAD(list);
|
||||
int ret;
|
||||
|
||||
blk = get_or_alloc(bdat, blkno, bf);
|
||||
if (!blk) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((bf & BF_ZERO)) {
|
||||
memset(blk->buf, 0, blk->size);
|
||||
blk->uptodate = 1;
|
||||
}
|
||||
|
||||
if (bf & BF_OVERWRITE)
|
||||
blk->uptodate = 1;
|
||||
|
||||
if (!blk->uptodate) {
|
||||
list_add(&blk->submit_head, &list);
|
||||
ret = submit_and_wait(bdat, &list);
|
||||
list_del_init(&blk->submit_head);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((bf & BF_DIRTY) && !blk_is_dirty(blk)) {
|
||||
list_add_tail(&bdat->dirty_list, &blk->dirty_head);
|
||||
bdat->nr_dirty++;
|
||||
}
|
||||
|
||||
make_active(bdat, blk);
|
||||
|
||||
rebalance_cache(bdat);
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret < 0)
|
||||
block_put(&blk);
|
||||
*blk_ret = blk;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *block_buf(struct block *blk)
|
||||
{
|
||||
return blk->buf;
|
||||
}
|
||||
|
||||
size_t block_size(struct block *blk)
|
||||
{
|
||||
return blk->size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the block from the cache, regardless of if it was free or not.
|
||||
* This is used to avoid writing blocks which were dirtied but then
|
||||
* later freed.
|
||||
*
|
||||
* The block is immediately freed and can't be referenced after this
|
||||
* returns.
|
||||
*/
|
||||
void block_drop(struct block **blkp)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
|
||||
free_block(bdat, *blkp);
|
||||
*blkp = NULL;
|
||||
rebalance_cache(bdat);
|
||||
}
|
||||
|
||||
/*
|
||||
* This doesn't quite work for mixing large and small blocks, but that's
|
||||
* fine, we never do that.
|
||||
*/
|
||||
static int compar_u64(const void *A, const void *B)
|
||||
{
|
||||
u64 a = *((u64 *)A);
|
||||
u64 b = *((u64 *)B);
|
||||
|
||||
return scoutfs_cmp(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
* This read-ahead is synchronous and errors are ignored. If any of the
|
||||
* blknos aren't present in the cache then we issue concurrent reads for
|
||||
* them and wait. Any existing cached blocks will be left as is.
|
||||
*
|
||||
* We might be trying to read a lot more than the number of events so we
|
||||
* sort the caller's blknos before iterating over them rather than
|
||||
* relying on submission sorting the blocks in each submitted set.
|
||||
*/
|
||||
void block_readahead(u64 *blknos, size_t nr)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk;
|
||||
struct block *blk_;
|
||||
LIST_HEAD(list);
|
||||
size_t i;
|
||||
|
||||
if (nr == 0)
|
||||
return;
|
||||
|
||||
qsort(blknos, nr, sizeof(blknos[0]), compar_u64);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
blk = get_or_alloc(bdat, blknos[i], 0);
|
||||
if (blk) {
|
||||
if (!blk->uptodate)
|
||||
list_add_tail(&blk->submit_head, &list);
|
||||
else
|
||||
block_put(&blk);
|
||||
}
|
||||
}
|
||||
|
||||
(void)submit_and_wait(bdat, &list);
|
||||
|
||||
list_for_each_entry_safe(blk, blk_, &list, submit_head) {
|
||||
list_del_init(&blk->submit_head);
|
||||
block_put(&blk);
|
||||
}
|
||||
|
||||
rebalance_cache(bdat);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller's block changes form a consistent transaction. If the amount of dirty
|
||||
* blocks is large enough we issue a write.
|
||||
*/
|
||||
int block_try_commit(bool force)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk;
|
||||
struct block *blk_;
|
||||
LIST_HEAD(list);
|
||||
int ret;
|
||||
|
||||
if (!force && bdat->nr_dirty < bdat->nr_events)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(blk, &bdat->dirty_list, dirty_head) {
|
||||
list_add_tail(&blk->submit_head, &list);
|
||||
inc_refcount(blk);
|
||||
}
|
||||
|
||||
ret = submit_and_wait(bdat, &list);
|
||||
|
||||
list_for_each_entry_safe(blk, blk_, &list, submit_head) {
|
||||
list_del_init(&blk->submit_head);
|
||||
block_put(&blk);
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
printf("error writing dirty transaction blocks\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_get(&blk, SCOUTFS_SUPER_BLKNO, BF_SM | BF_OVERWRITE | BF_DIRTY);
|
||||
if (ret == 0) {
|
||||
list_add(&blk->submit_head, &list);
|
||||
ret = submit_and_wait(bdat, &list);
|
||||
list_del_init(&blk->submit_head);
|
||||
block_put(&blk);
|
||||
} else {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
if (ret < 0)
|
||||
printf("error writing super block to commit transaction\n");
|
||||
|
||||
out:
|
||||
rebalance_cache(bdat);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int block_setup(int meta_fd, size_t max_cached_bytes, size_t max_dirty_bytes)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
bdat->max_cached = DIV_ROUND_UP(max_cached_bytes, SCOUTFS_BLOCK_LG_SIZE);
|
||||
bdat->hash_nr = bdat->max_cached / 4;
|
||||
bdat->nr_events = DIV_ROUND_UP(max_dirty_bytes, SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
bdat->iocbs = calloc(bdat->nr_events, sizeof(bdat->iocbs[0]));
|
||||
bdat->iocbps = calloc(bdat->nr_events, sizeof(bdat->iocbps[0]));
|
||||
bdat->events = calloc(bdat->nr_events, sizeof(bdat->events[0]));
|
||||
bdat->hash_lists = calloc(bdat->hash_nr, sizeof(bdat->hash_lists[0]));
|
||||
if (!bdat->iocbs || !bdat->iocbps || !bdat->events || !bdat->hash_lists) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&bdat->active_head);
|
||||
INIT_LIST_HEAD(&bdat->inactive_head);
|
||||
INIT_LIST_HEAD(&bdat->dirty_list);
|
||||
bdat->meta_fd = meta_fd;
|
||||
list_add(&bdat->inactive_head, &bdat->active_head);
|
||||
|
||||
for (i = 0; i < bdat->hash_nr; i++)
|
||||
INIT_LIST_HEAD(&bdat->hash_lists[i]);
|
||||
|
||||
ret = syscall(__NR_io_setup, bdat->nr_events, &bdat->ctx);
|
||||
|
||||
out:
|
||||
if (ret < 0) {
|
||||
free(bdat->iocbs);
|
||||
free(bdat->iocbps);
|
||||
free(bdat->events);
|
||||
free(bdat->hash_lists);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void block_shutdown(void)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
|
||||
syscall(SYS_io_destroy, bdat->ctx);
|
||||
|
||||
free(bdat->iocbs);
|
||||
free(bdat->iocbps);
|
||||
free(bdat->events);
|
||||
free(bdat->hash_lists);
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_BLOCK_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_BLOCK_H_
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct block;
|
||||
|
||||
#include "sparse.h"
|
||||
|
||||
/* block flags passed to block_get() */
|
||||
enum {
|
||||
BF_ZERO = (1 << 0), /* zero contents buf as block is returned */
|
||||
BF_DIRTY = (1 << 1), /* block will be written with transaction */
|
||||
BF_SM = (1 << 2), /* small 4k block instead of large 64k block */
|
||||
BF_OVERWRITE = (1 << 3), /* caller will overwrite contents, don't read */
|
||||
};
|
||||
|
||||
int block_get(struct block **blk_ret, u64 blkno, int bf);
|
||||
void block_put(struct block **blkp);
|
||||
|
||||
void *block_buf(struct block *blk);
|
||||
size_t block_size(struct block *blk);
|
||||
void block_drop(struct block **blkp);
|
||||
|
||||
void block_readahead(u64 *blknos, size_t nr);
|
||||
int block_try_commit(bool force);
|
||||
|
||||
int block_setup(int meta_fd, size_t max_cached_bytes, size_t max_dirty_bytes);
|
||||
void block_shutdown(void);
|
||||
|
||||
#endif
|
||||
@@ -1,209 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
#include "avl.h"
|
||||
|
||||
#include "block.h"
|
||||
#include "btree.h"
|
||||
#include "extent.h"
|
||||
#include "iter.h"
|
||||
#include "sns.h"
|
||||
#include "meta.h"
|
||||
#include "problem.h"
|
||||
|
||||
static inline void *item_val(struct scoutfs_btree_block *bt, struct scoutfs_btree_item *item)
|
||||
{
|
||||
return (void *)bt + le16_to_cpu(item->val_off);
|
||||
}
|
||||
|
||||
static void readahead_refs(struct scoutfs_btree_block *bt)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct scoutfs_block_ref *ref;
|
||||
u64 *blknos;
|
||||
u64 blkno;
|
||||
u16 valid = 0;
|
||||
u16 nr = le16_to_cpu(bt->nr_items);
|
||||
int i;
|
||||
|
||||
blknos = calloc(nr, sizeof(blknos[0]));
|
||||
if (!blknos)
|
||||
return;
|
||||
|
||||
node = avl_first(&bt->item_root);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
ref = item_val(bt, item);
|
||||
blkno = le64_to_cpu(ref->blkno);
|
||||
|
||||
if (valid_meta_blkno(blkno))
|
||||
blknos[valid++] = blkno;
|
||||
|
||||
node = avl_next(&bt->item_root, &item->node);
|
||||
}
|
||||
|
||||
if (valid > 0)
|
||||
block_readahead(blknos, valid);
|
||||
free(blknos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the callback on the referenced block. Then if the block
|
||||
* contains referneces read it and recurse into all its references.
|
||||
*/
|
||||
static int btree_ref_meta_iter(struct scoutfs_block_ref *ref, unsigned level, extent_cb_t cb,
|
||||
void *cb_arg)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
blkno = le64_to_cpu(ref->blkno);
|
||||
if (!blkno)
|
||||
return 0;
|
||||
|
||||
ret = cb(blkno, 1, cb_arg);
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (level == 0)
|
||||
return 0;
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
sns_push("btree_parent", blkno, 0);
|
||||
|
||||
bt = block_buf(blk);
|
||||
|
||||
/* XXX integrate verification with block cache */
|
||||
if (bt->level != level) {
|
||||
problem(PB_BTREE_BLOCK_BAD_LEVEL, "expected %u level %u", level, bt->level);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* read-ahead last level of parents */
|
||||
if (level == 2)
|
||||
readahead_refs(bt);
|
||||
|
||||
node = avl_first(&bt->item_root);
|
||||
|
||||
for (i = 0; i < le16_to_cpu(bt->nr_items); i++) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
ref = item_val(bt, item);
|
||||
|
||||
ret = btree_ref_meta_iter(ref, level - 1, cb, cb_arg);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
node = avl_next(&bt->item_root, &item->node);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
block_put(&blk);
|
||||
sns_pop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btree_meta_iter(struct scoutfs_btree_root *root, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
/* XXX check root */
|
||||
if (root->height == 0)
|
||||
return 0;
|
||||
|
||||
return btree_ref_meta_iter(&root->ref, root->height - 1, cb, cb_arg);
|
||||
}
|
||||
|
||||
static int btree_ref_item_iter(struct scoutfs_block_ref *ref, unsigned level,
|
||||
btree_item_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
blkno = le64_to_cpu(ref->blkno);
|
||||
if (!blkno)
|
||||
return 0;
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (level)
|
||||
sns_push("btree_parent", blkno, 0);
|
||||
else
|
||||
sns_push("btree_leaf", blkno, 0);
|
||||
|
||||
bt = block_buf(blk);
|
||||
|
||||
/* XXX integrate verification with block cache */
|
||||
if (bt->level != level) {
|
||||
problem(PB_BTREE_BLOCK_BAD_LEVEL, "expected %u level %u", level, bt->level);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* read-ahead leaves that contain items */
|
||||
if (level == 1)
|
||||
readahead_refs(bt);
|
||||
|
||||
node = avl_first(&bt->item_root);
|
||||
|
||||
for (i = 0; i < le16_to_cpu(bt->nr_items); i++) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
|
||||
if (level) {
|
||||
ref = item_val(bt, item);
|
||||
ret = btree_ref_item_iter(ref, level - 1, cb, cb_arg);
|
||||
} else {
|
||||
ret = cb(&item->key, item_val(bt, item),
|
||||
le16_to_cpu(item->val_len), cb_arg);
|
||||
debug("free item key "SK_FMT" ret %d", SK_ARG(&item->key), ret);
|
||||
}
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
node = avl_next(&bt->item_root, &item->node);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
block_put(&blk);
|
||||
sns_pop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btree_item_iter(struct scoutfs_btree_root *root, btree_item_cb_t cb, void *cb_arg)
|
||||
{
|
||||
/* XXX check root */
|
||||
if (root->height == 0)
|
||||
return 0;
|
||||
|
||||
return btree_ref_item_iter(&root->ref, root->height - 1, cb, cb_arg);
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_BTREE_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_BTREE_H_
|
||||
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
|
||||
#include "extent.h"
|
||||
|
||||
typedef int (*btree_item_cb_t)(struct scoutfs_key *key, void *val, u16 val_len, void *cb_arg);
|
||||
|
||||
int btree_meta_iter(struct scoutfs_btree_root *root, extent_cb_t cb, void *cb_arg);
|
||||
int btree_item_iter(struct scoutfs_btree_root *root, btree_item_cb_t cb, void *cb_arg);
|
||||
|
||||
#endif
|
||||
@@ -1,149 +0,0 @@
|
||||
#define _GNU_SOURCE /* O_DIRECT */
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
#include "dev.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "block.h"
|
||||
#include "debug.h"
|
||||
#include "meta.h"
|
||||
#include "super.h"
|
||||
|
||||
struct check_args {
|
||||
char *meta_device;
|
||||
char *data_device;
|
||||
char *debug_path;
|
||||
};
|
||||
|
||||
static int do_check(struct check_args *args)
|
||||
{
|
||||
int debug_fd = -1;
|
||||
int meta_fd = -1;
|
||||
int data_fd = -1;
|
||||
int ret;
|
||||
|
||||
if (args->debug_path) {
|
||||
debug_fd = open(args->debug_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
||||
if (debug_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "error opening debug output file '%s': %s (%d)\n",
|
||||
args->debug_path, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
debug_enable(debug_fd);
|
||||
}
|
||||
|
||||
meta_fd = open(args->meta_device, O_DIRECT | O_RDWR | O_EXCL);
|
||||
if (meta_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open meta device '%s': %s (%d)\n",
|
||||
args->meta_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
data_fd = open(args->data_device, O_DIRECT | O_RDWR | O_EXCL);
|
||||
if (data_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open data device '%s': %s (%d)\n",
|
||||
args->data_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_setup(meta_fd, 128 * 1024 * 1024, 32 * 1024 * 1024);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = check_supers() ?:
|
||||
check_meta_alloc();
|
||||
out:
|
||||
/* and tear it all down */
|
||||
block_shutdown();
|
||||
super_shutdown();
|
||||
debug_disable();
|
||||
|
||||
if (meta_fd >= 0)
|
||||
close(meta_fd);
|
||||
if (data_fd >= 0)
|
||||
close(data_fd);
|
||||
if (debug_fd >= 0)
|
||||
close(debug_fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct check_args *args = state->input;
|
||||
|
||||
switch (key) {
|
||||
case 'd':
|
||||
args->debug_path = strdup_or_error(state, arg);
|
||||
break;
|
||||
case 'e':
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->meta_device)
|
||||
args->meta_device = strdup_or_error(state, arg);
|
||||
else if (!args->data_device)
|
||||
args->data_device = strdup_or_error(state, arg);
|
||||
else
|
||||
argp_error(state, "more than two device arguments given");
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->meta_device)
|
||||
argp_error(state, "no metadata device argument given");
|
||||
if (!args->data_device)
|
||||
argp_error(state, "no data device argument given");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "debug", 'd', "FILE_PATH", 0, "Path to debug output file, will be created or truncated"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"META-DEVICE DATA-DEVICE",
|
||||
"Check filesystem consistency"
|
||||
};
|
||||
|
||||
static int check_cmd(int argc, char **argv)
|
||||
{
|
||||
struct check_args check_args = {NULL};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&argp, argc, argv, 0, NULL, &check_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_check(&check_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) check_ctor(void)
|
||||
{
|
||||
cmd_register_argp("check", &argp, GROUP_CORE, check_cmd);
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
int debug_fd = -1;
|
||||
|
||||
void debug_enable(int fd)
|
||||
{
|
||||
debug_fd = fd;
|
||||
}
|
||||
|
||||
void debug_disable(void)
|
||||
{
|
||||
if (debug_fd >= 0)
|
||||
debug_fd = -1;
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_DEBUG_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_DEBUG_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define debug(fmt, args...) \
|
||||
do { \
|
||||
if (debug_fd >= 0) \
|
||||
dprintf(debug_fd, fmt"\n", ##args); \
|
||||
} while (0)
|
||||
|
||||
extern int debug_fd;
|
||||
|
||||
void debug_enable(int fd);
|
||||
void debug_disable(void);
|
||||
|
||||
#endif
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_ENO_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_ENO_H_
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#define ENO_FMT "%d (%s)"
|
||||
#define ENO_ARG(eno) eno, strerror(eno)
|
||||
|
||||
#endif
|
||||
@@ -1,312 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "lk_rbtree_wrapper.h"
|
||||
|
||||
#include "debug.h"
|
||||
#include "extent.h"
|
||||
|
||||
/*
|
||||
* In-memory extent management in rbtree nodes.
|
||||
*/
|
||||
|
||||
bool extents_overlap(u64 a_start, u64 a_len, u64 b_start, u64 b_len)
|
||||
{
|
||||
u64 a_end = a_start + a_len;
|
||||
u64 b_end = b_start + b_len;
|
||||
|
||||
return !((a_end <= b_start) || (b_end <= a_start));
|
||||
}
|
||||
|
||||
static int ext_contains(struct extent_node *ext, u64 start, u64 len)
|
||||
{
|
||||
return ext->start <= start && ext->start + ext->len >= start + len;
|
||||
}
|
||||
|
||||
/*
|
||||
* True if the given extent is bisected by the given range; there's
|
||||
* leftover containing extents on both the left and right sides of the
|
||||
* range in the extent.
|
||||
*/
|
||||
static int ext_bisected(struct extent_node *ext, u64 start, u64 len)
|
||||
{
|
||||
return ext->start < start && ext->start + ext->len > start + len;
|
||||
}
|
||||
|
||||
static struct extent_node *ext_from_rbnode(struct rb_node *rbnode)
|
||||
{
|
||||
return rbnode ? container_of(rbnode, struct extent_node, rbnode) : NULL;
|
||||
}
|
||||
|
||||
static struct extent_node *next_ext(struct extent_node *ext)
|
||||
{
|
||||
return ext ? ext_from_rbnode(rb_next(&ext->rbnode)) : NULL;
|
||||
}
|
||||
|
||||
static struct extent_node *prev_ext(struct extent_node *ext)
|
||||
{
|
||||
return ext ? ext_from_rbnode(rb_prev(&ext->rbnode)) : NULL;
|
||||
}
|
||||
|
||||
struct walk_results {
|
||||
unsigned bisect_to_leaf:1;
|
||||
struct extent_node *found;
|
||||
struct extent_node *next;
|
||||
struct rb_node *parent;
|
||||
struct rb_node **node;
|
||||
};
|
||||
|
||||
static void walk_extents(struct extent_root *root, u64 start, u64 len, struct walk_results *wlk)
|
||||
{
|
||||
struct rb_node **node = &root->rbroot.rb_node;
|
||||
struct extent_node *ext;
|
||||
u64 end = start + len;
|
||||
int cmp;
|
||||
|
||||
wlk->found = NULL;
|
||||
wlk->next = NULL;
|
||||
wlk->parent = NULL;
|
||||
|
||||
while (*node) {
|
||||
wlk->parent = *node;
|
||||
ext = ext_from_rbnode(*node);
|
||||
cmp = end <= ext->start ? -1 :
|
||||
start >= ext->start + ext->len ? 1 : 0;
|
||||
|
||||
if (cmp < 0) {
|
||||
node = &ext->rbnode.rb_left;
|
||||
wlk->next = ext;
|
||||
} else if (cmp > 0) {
|
||||
node = &ext->rbnode.rb_right;
|
||||
} else {
|
||||
wlk->found = ext;
|
||||
if (!(wlk->bisect_to_leaf && ext_bisected(ext, start, len)))
|
||||
break;
|
||||
/* walk right so we can insert greater right from bisection */
|
||||
node = &ext->rbnode.rb_right;
|
||||
}
|
||||
}
|
||||
|
||||
wlk->node = node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an extent that overlaps with the given range.
|
||||
*/
|
||||
int extent_lookup(struct extent_root *root, u64 start, u64 len, struct extent_node *found)
|
||||
{
|
||||
struct walk_results wlk = { 0, };
|
||||
int ret;
|
||||
|
||||
walk_extents(root, start, len, &wlk);
|
||||
if (wlk.found) {
|
||||
memset(found, 0, sizeof(struct extent_node));
|
||||
found->start = wlk.found->start;
|
||||
found->len = wlk.found->len;
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callers can iterate through direct node references and are entirely
|
||||
* responsible for consistency when doing so.
|
||||
*/
|
||||
struct extent_node *extent_first(struct extent_root *root)
|
||||
{
|
||||
struct walk_results wlk = { 0, };
|
||||
|
||||
walk_extents(root, 0, 1, &wlk);
|
||||
|
||||
return wlk.found ?: wlk.next;
|
||||
}
|
||||
|
||||
struct extent_node *extent_next(struct extent_node *ext)
|
||||
{
|
||||
return next_ext(ext);
|
||||
}
|
||||
|
||||
struct extent_node *extent_prev(struct extent_node *ext)
|
||||
{
|
||||
return prev_ext(ext);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new extent into the tree. We can extend existing nodes,
|
||||
* merge with neighbours, or remove existing extents entirely if we
|
||||
* insert a range that fully spans existing nodes.
|
||||
*/
|
||||
static int walk_insert(struct extent_root *root, u64 start, u64 len, int found_err)
|
||||
{
|
||||
struct walk_results wlk = { 0, };
|
||||
struct extent_node *ext;
|
||||
struct extent_node *nei;
|
||||
int ret;
|
||||
|
||||
walk_extents(root, start, len, &wlk);
|
||||
|
||||
ext = wlk.found;
|
||||
if (ext && found_err) {
|
||||
ret = found_err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!ext) {
|
||||
ext = malloc(sizeof(struct extent_node));
|
||||
if (!ext) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ext->start = start;
|
||||
ext->len = len;
|
||||
|
||||
rb_link_node(&ext->rbnode, wlk.parent, wlk.node);
|
||||
rb_insert_color(&ext->rbnode, &root->rbroot);
|
||||
}
|
||||
|
||||
/* start by expanding an existing extent if our range is larger */
|
||||
if (start < ext->start) {
|
||||
ext->len += ext->start - start;
|
||||
ext->start = start;
|
||||
}
|
||||
if (ext->start + ext->len < start + len)
|
||||
ext->len += (start + len) - (ext->start + ext->len);
|
||||
|
||||
/* drop any fully spanned neighbors, possibly merging with a final adjacent one */
|
||||
|
||||
while ((nei = prev_ext(ext))) {
|
||||
if (nei->start + nei->len < ext->start)
|
||||
break;
|
||||
|
||||
if (nei->start < ext->start) {
|
||||
ext->len += ext->start - nei->start;
|
||||
ext->start = nei->start;
|
||||
}
|
||||
|
||||
rb_erase(&nei->rbnode, &root->rbroot);
|
||||
free(nei);
|
||||
}
|
||||
|
||||
while ((nei = next_ext(ext))) {
|
||||
if (ext->start + ext->len < nei->start)
|
||||
break;
|
||||
|
||||
if (ext->start + ext->len < nei->start + nei->len)
|
||||
ext->len += (nei->start + nei->len) - (ext->start + ext->len);
|
||||
|
||||
rb_erase(&nei->rbnode, &root->rbroot);
|
||||
free(nei);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
debug("start %llu len %llu ret %d", start, len, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new extent. The specified extent must not overlap with any
|
||||
* existing extents or -EEXIST is returned.
|
||||
*/
|
||||
int extent_insert_new(struct extent_root *root, u64 start, u64 len)
|
||||
{
|
||||
return walk_insert(root, start, len, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert an extent, extending any existing extents that may overlap.
|
||||
*/
|
||||
int extent_insert_extend(struct extent_root *root, u64 start, u64 len)
|
||||
{
|
||||
return walk_insert(root, start, len, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the specified extent from an existing node. The given extent must be fully
|
||||
* contained in a single node or -ENOENT is returned.
|
||||
*/
|
||||
int extent_remove(struct extent_root *root, u64 start, u64 len)
|
||||
{
|
||||
struct extent_node *ext;
|
||||
struct extent_node *ins;
|
||||
struct walk_results wlk = {
|
||||
.bisect_to_leaf = 1,
|
||||
};
|
||||
int ret;
|
||||
|
||||
walk_extents(root, start, len, &wlk);
|
||||
|
||||
if (!(ext = wlk.found) || !ext_contains(ext, start, len)) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ext_bisected(ext, start, len)) {
|
||||
debug("found bisected start %llu len %llu", ext->start, ext->len);
|
||||
ins = malloc(sizeof(struct extent_node));
|
||||
if (!ins) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ins->start = start + len;
|
||||
ins->len = (ext->start + ext->len) - ins->start;
|
||||
|
||||
rb_link_node(&ins->rbnode, wlk.parent, wlk.node);
|
||||
rb_insert_color(&ins->rbnode, &root->rbroot);
|
||||
}
|
||||
|
||||
if (start > ext->start) {
|
||||
ext->len = start - ext->start;
|
||||
} else if (len < ext->len) {
|
||||
ext->start += len;
|
||||
ext->len -= len;
|
||||
} else {
|
||||
rb_erase(&ext->rbnode, &root->rbroot);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
debug("start %llu len %llu ret %d", start, len, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void extent_root_init(struct extent_root *root)
|
||||
{
|
||||
root->rbroot = RB_ROOT;
|
||||
root->total = 0;
|
||||
}
|
||||
|
||||
void extent_root_free(struct extent_root *root)
|
||||
{
|
||||
struct extent_node *ext;
|
||||
struct rb_node *node;
|
||||
struct rb_node *tmp;
|
||||
|
||||
for (node = rb_first(&root->rbroot); node && ((tmp = rb_next(node)), 1); node = tmp) {
|
||||
ext = rb_entry(node, struct extent_node, rbnode);
|
||||
rb_erase(&ext->rbnode, &root->rbroot);
|
||||
free(ext);
|
||||
}
|
||||
}
|
||||
|
||||
void extent_root_print(struct extent_root *root)
|
||||
{
|
||||
struct extent_node *ext;
|
||||
struct rb_node *node;
|
||||
struct rb_node *tmp;
|
||||
|
||||
for (node = rb_first(&root->rbroot); node && ((tmp = rb_next(node)), 1); node = tmp) {
|
||||
ext = rb_entry(node, struct extent_node, rbnode);
|
||||
debug(" start %llu len %llu", ext->start, ext->len);
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_EXTENT_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_EXTENT_H_
|
||||
|
||||
#include "lk_rbtree_wrapper.h"
|
||||
|
||||
struct extent_root {
|
||||
struct rb_root rbroot;
|
||||
u64 total;
|
||||
};
|
||||
|
||||
struct extent_node {
|
||||
struct rb_node rbnode;
|
||||
u64 start;
|
||||
u64 len;
|
||||
};
|
||||
|
||||
typedef int (*extent_cb_t)(u64 start, u64 len, void *arg);
|
||||
|
||||
struct extent_cb_arg_t {
|
||||
extent_cb_t cb;
|
||||
void *cb_arg;
|
||||
};
|
||||
|
||||
bool extents_overlap(u64 a_start, u64 a_len, u64 b_start, u64 b_len);
|
||||
|
||||
int extent_lookup(struct extent_root *root, u64 start, u64 len, struct extent_node *found);
|
||||
struct extent_node *extent_first(struct extent_root *root);
|
||||
struct extent_node *extent_next(struct extent_node *ext);
|
||||
struct extent_node *extent_prev(struct extent_node *ext);
|
||||
int extent_insert_new(struct extent_root *root, u64 start, u64 len);
|
||||
int extent_insert_extend(struct extent_root *root, u64 start, u64 len);
|
||||
int extent_remove(struct extent_root *root, u64 start, u64 len);
|
||||
|
||||
void extent_root_init(struct extent_root *root);
|
||||
void extent_root_free(struct extent_root *root);
|
||||
void extent_root_print(struct extent_root *root);
|
||||
|
||||
#endif
|
||||
@@ -1,540 +0,0 @@
|
||||
#define _GNU_SOURCE /* O_DIRECT */
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "bitmap.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "crc.h"
|
||||
#include "cmd.h"
|
||||
#include "dev.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "block.h"
|
||||
#include "btree.h"
|
||||
#include "log_trees.h"
|
||||
#include "super.h"
|
||||
|
||||
/* huh. */
|
||||
#define OFF_MAX (off_t)((u64)((off_t)~0ULL) >> 1)
|
||||
|
||||
#define SCOUTFS_META_IMAGE_HEADER_MAGIC 0x8aee00d098fa60c5ULL
|
||||
#define SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC 0x70bd5e9269effd86ULL
|
||||
|
||||
struct scoutfs_meta_image_header {
|
||||
__le64 magic;
|
||||
__le64 total_bytes;
|
||||
__le32 version;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_meta_image_block_header {
|
||||
__le64 magic;
|
||||
__le64 offset;
|
||||
__le32 size;
|
||||
__le32 crc;
|
||||
} __packed;
|
||||
|
||||
struct image_args {
|
||||
char *meta_device;
|
||||
bool is_read;
|
||||
bool show_header;
|
||||
u64 ra_window;
|
||||
};
|
||||
|
||||
struct block_bitmaps {
|
||||
unsigned long *bits;
|
||||
u64 size;
|
||||
u64 count;
|
||||
};
|
||||
|
||||
#define errf(fmt, args...) \
|
||||
dprintf(STDERR_FILENO, fmt, ##args)
|
||||
|
||||
static int set_meta_bit(u64 start, u64 len, void *arg)
|
||||
{
|
||||
struct block_bitmaps *bm = arg;
|
||||
int ret;
|
||||
|
||||
if (len != 1) {
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
if (!test_bit(bm->bits, start)) {
|
||||
set_bit(bm->bits, start);
|
||||
bm->count++;
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_ref_bits(struct block_bitmaps *bm)
|
||||
{
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
int ret;
|
||||
u64 i;
|
||||
|
||||
/*
|
||||
* There are almost no small blocks we need to read, so we read
|
||||
* them as the large blocks that contain them to simplify the
|
||||
* block reading process.
|
||||
*/
|
||||
set_meta_bit(SCOUTFS_SUPER_BLKNO >> SCOUTFS_BLOCK_SM_LG_SHIFT, 1, bm);
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++)
|
||||
set_meta_bit((SCOUTFS_QUORUM_BLKNO + i) >> SCOUTFS_BLOCK_SM_LG_SHIFT, 1, bm);
|
||||
|
||||
ret = alloc_root_meta_iter(&super->meta_alloc[0], set_meta_bit, bm) ?:
|
||||
alloc_root_meta_iter(&super->meta_alloc[1], set_meta_bit, bm) ?:
|
||||
alloc_root_meta_iter(&super->data_alloc, set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_avail[0], set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_avail[1], set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_freed[0], set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_freed[1], set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->fs_root, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->logs_root, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->log_merge, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->mounted_clients, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->srch_root, set_meta_bit, bm) ?:
|
||||
log_trees_meta_iter(set_meta_bit, bm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that this temporarily modifies the header that it's given.
|
||||
*/
|
||||
static __le32 calc_crc(struct scoutfs_meta_image_block_header *bh, void *buf, size_t size)
|
||||
{
|
||||
__le32 saved = bh->crc;
|
||||
u32 crc = ~0;
|
||||
|
||||
bh->crc = 0;
|
||||
crc = crc32c(crc, bh, sizeof(*bh));
|
||||
crc = crc32c(crc, buf, size);
|
||||
bh->crc = saved;
|
||||
|
||||
return cpu_to_le32(crc);
|
||||
}
|
||||
|
||||
static void printf_header(struct scoutfs_meta_image_header *hdr)
|
||||
{
|
||||
errf("magic: 0x%016llx\n"
|
||||
"total_bytes: %llu\n"
|
||||
"version: %u\n",
|
||||
le64_to_cpu(hdr->magic),
|
||||
le64_to_cpu(hdr->total_bytes),
|
||||
le32_to_cpu(hdr->version));
|
||||
}
|
||||
|
||||
typedef ssize_t (*rw_func_t)(int fd, void *buf, size_t count, off_t offset);
|
||||
|
||||
static inline ssize_t rw_read(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return read(fd, buf, count);
|
||||
}
|
||||
|
||||
static inline ssize_t rw_pread(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return pread(fd, buf, count, offset);
|
||||
}
|
||||
|
||||
static inline ssize_t rw_write(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return write(fd, buf, count);
|
||||
}
|
||||
|
||||
static inline ssize_t rw_pwrite(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return pwrite(fd, buf, count, offset);
|
||||
}
|
||||
|
||||
static int rw_full_count(rw_func_t func, u64 *tot, int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
ssize_t sret;
|
||||
|
||||
while (count > 0) {
|
||||
sret = func(fd, buf, count, offset);
|
||||
if (sret <= 0 || sret > count) {
|
||||
if (sret < 0)
|
||||
return -errno;
|
||||
else
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (tot)
|
||||
*tot += sret;
|
||||
buf += sret;
|
||||
count -= sret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_image(struct image_args *args, int fd, struct block_bitmaps *bm)
|
||||
{
|
||||
struct scoutfs_meta_image_block_header bh;
|
||||
struct scoutfs_meta_image_header hdr;
|
||||
u64 opening;
|
||||
void *buf;
|
||||
off_t off;
|
||||
u64 bit;
|
||||
u64 ra;
|
||||
int ret;
|
||||
|
||||
buf = malloc(SCOUTFS_BLOCK_LG_SIZE);
|
||||
if (!buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hdr.magic = cpu_to_le64(SCOUTFS_META_IMAGE_HEADER_MAGIC);
|
||||
hdr.total_bytes = cpu_to_le64(sizeof(hdr) +
|
||||
(bm->count * (SCOUTFS_BLOCK_LG_SIZE + sizeof(bh))));
|
||||
hdr.version = cpu_to_le32(1);
|
||||
|
||||
if (args->show_header) {
|
||||
printf_header(&hdr);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = rw_full_count(rw_write, NULL, STDOUT_FILENO, &hdr, sizeof(hdr), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
opening = args->ra_window;
|
||||
ra = 0;
|
||||
bit = 0;
|
||||
|
||||
for (bit = 0; (bit = find_next_set_bit(bm->bits, bit, bm->size)) < bm->size; bit++) {
|
||||
|
||||
/* readahead to open the full window, then a block at a time */
|
||||
do {
|
||||
ra = find_next_set_bit(bm->bits, ra, bm->size);
|
||||
if (ra < bm->size) {
|
||||
off = ra << SCOUTFS_BLOCK_LG_SHIFT;
|
||||
posix_fadvise(fd, off, SCOUTFS_BLOCK_LG_SIZE, POSIX_FADV_WILLNEED);
|
||||
ra++;
|
||||
if (opening)
|
||||
opening -= min(opening, SCOUTFS_BLOCK_LG_SIZE);
|
||||
}
|
||||
} while (opening > 0);
|
||||
|
||||
off = bit << SCOUTFS_BLOCK_LG_SHIFT;
|
||||
ret = rw_full_count(rw_pread, NULL, fd, buf, SCOUTFS_BLOCK_LG_SIZE, off);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Might as well try to drop the pages we've used to
|
||||
* reduce memory pressure on our read-ahead pages that
|
||||
* are waiting.
|
||||
*/
|
||||
posix_fadvise(fd, off, SCOUTFS_BLOCK_LG_SIZE, POSIX_FADV_DONTNEED);
|
||||
|
||||
bh.magic = SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC;
|
||||
bh.offset = cpu_to_le64(off);
|
||||
bh.size = cpu_to_le32(SCOUTFS_BLOCK_LG_SIZE);
|
||||
bh.crc = calc_crc(&bh, buf, SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
ret = rw_full_count(rw_write, NULL, STDOUT_FILENO, &bh, sizeof(bh), 0) ?:
|
||||
rw_full_count(rw_write, NULL, STDOUT_FILENO, buf, SCOUTFS_BLOCK_LG_SIZE, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
free(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int invalid_header(struct scoutfs_meta_image_header *hdr)
|
||||
{
|
||||
if (le64_to_cpu(hdr->magic) != SCOUTFS_META_IMAGE_HEADER_MAGIC) {
|
||||
errf("bad image header magic 0x%016llx (!= expected %016llx)\n",
|
||||
le64_to_cpu(hdr->magic), SCOUTFS_META_IMAGE_HEADER_MAGIC);
|
||||
|
||||
} else if (le32_to_cpu(hdr->version) != 1) {
|
||||
errf("unknown image header version %u\n", le32_to_cpu(hdr->version));
|
||||
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Doesn't catch offset+size overflowing, presumes pwrite() will return
|
||||
* an error.
|
||||
*/
|
||||
static int invalid_block_header(struct scoutfs_meta_image_block_header *bh)
|
||||
{
|
||||
if (le64_to_cpu(bh->magic) != SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC) {
|
||||
errf("bad block header magic 0x%016llx (!= expected %016llx)\n",
|
||||
le64_to_cpu(bh->magic), SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC);
|
||||
|
||||
} else if (le32_to_cpu(bh->size) == 0) {
|
||||
errf("invalid block header size %u\n", le32_to_cpu(bh->size));
|
||||
|
||||
} else if (le32_to_cpu(bh->size) > SIZE_MAX) {
|
||||
errf("block header size %u too large for size_t (> %zu)\n",
|
||||
le32_to_cpu(bh->size), (size_t)SIZE_MAX);
|
||||
|
||||
} else if (le64_to_cpu(bh->offset) > OFF_MAX) {
|
||||
errf("block header offset %llu too large for off_t (> %llu)\n",
|
||||
le64_to_cpu(bh->offset), (u64)OFF_MAX);
|
||||
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int write_image(struct image_args *args, int fd, struct block_bitmaps *bm)
|
||||
{
|
||||
struct scoutfs_meta_image_block_header bh;
|
||||
struct scoutfs_meta_image_header hdr;
|
||||
size_t writeback_batch = (2 * 1024 * 1024);
|
||||
size_t buf_size;
|
||||
size_t dirty;
|
||||
size_t size;
|
||||
off_t first;
|
||||
off_t last;
|
||||
off_t off;
|
||||
__le32 calc;
|
||||
void *buf;
|
||||
u64 tot;
|
||||
int ret;
|
||||
|
||||
tot = 0;
|
||||
|
||||
ret = rw_full_count(rw_read, &tot, STDIN_FILENO, &hdr, sizeof(hdr), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (args->show_header) {
|
||||
printf_header(&hdr);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = invalid_header(&hdr);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
dirty = 0;
|
||||
first = OFF_MAX;
|
||||
last = 0;
|
||||
buf = NULL;
|
||||
buf_size = 0;
|
||||
|
||||
while (tot < le64_to_cpu(hdr.total_bytes)) {
|
||||
|
||||
ret = rw_full_count(rw_read, &tot, STDIN_FILENO, &bh, sizeof(bh), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = invalid_block_header(&bh);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
size = le32_to_cpu(bh.size);
|
||||
if (buf_size < size) {
|
||||
buf = realloc(buf, size);
|
||||
if (!buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
buf_size = size;
|
||||
}
|
||||
|
||||
ret = rw_full_count(rw_read, &tot, STDIN_FILENO, buf, size, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
calc = calc_crc(&bh, buf, size);
|
||||
if (calc != bh.crc) {
|
||||
errf("crc err");
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
off = le64_to_cpu(bh.offset);
|
||||
|
||||
ret = rw_full_count(rw_pwrite, NULL, fd, buf, size, off);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
dirty += size;
|
||||
first = min(first, off);
|
||||
last = max(last, off);
|
||||
if (dirty >= writeback_batch) {
|
||||
posix_fadvise(fd, first, last, POSIX_FADV_DONTNEED);
|
||||
dirty = 0;
|
||||
first = OFF_MAX;
|
||||
last = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ret = fsync(fd);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int do_image(struct image_args *args)
|
||||
{
|
||||
struct block_bitmaps bm = { .bits = NULL };
|
||||
int meta_fd = -1;
|
||||
u64 dev_size;
|
||||
mode_t mode;
|
||||
int ret;
|
||||
|
||||
mode = args->is_read ? O_RDONLY : O_RDWR;
|
||||
|
||||
meta_fd = open(args->meta_device, mode);
|
||||
if (meta_fd < 0) {
|
||||
ret = -errno;
|
||||
errf("failed to open meta device '%s': %s (%d)\n",
|
||||
args->meta_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (args->is_read) {
|
||||
ret = flush_device(meta_fd);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = get_device_size(args->meta_device, meta_fd, &dev_size);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
bm.size = DIV_ROUND_UP(dev_size, SCOUTFS_BLOCK_LG_SIZE);
|
||||
bm.bits = calloc(1, round_up(bm.size, BITS_PER_LONG) / 8);
|
||||
if (!bm.bits) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_setup(meta_fd, 128 * 1024 * 1024, 32 * 1024 * 1024) ?:
|
||||
check_supers() ?:
|
||||
get_ref_bits(&bm) ?:
|
||||
read_image(args, meta_fd, &bm);
|
||||
block_shutdown();
|
||||
} else {
|
||||
ret = write_image(args, meta_fd, &bm);
|
||||
}
|
||||
out:
|
||||
free(bm.bits);
|
||||
|
||||
if (meta_fd >= 0)
|
||||
close(meta_fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct image_args *args = state->input;
|
||||
int ret;
|
||||
|
||||
switch (key) {
|
||||
case 'h':
|
||||
args->show_header = true;
|
||||
break;
|
||||
case 'r':
|
||||
ret = parse_u64(arg, &args->ra_window);
|
||||
if (ret)
|
||||
argp_error(state, "readahead winddoe parse error");
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->meta_device)
|
||||
args->meta_device = strdup_or_error(state, arg);
|
||||
else
|
||||
argp_error(state, "more than two device arguments given");
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->meta_device)
|
||||
argp_error(state, "no metadata device argument given");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "show-header", 'h', NULL, 0, "Print image header and exit without processing stream" },
|
||||
{ "readahead", 'r', "NR", 0, "Maintain read-ahead window of NR blocks" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp read_image_argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"META-DEVICE",
|
||||
"Read metadata image stream from metadata device file"
|
||||
};
|
||||
|
||||
#define DEFAULT_RA_WINDOW (512 * 1024)
|
||||
|
||||
static int read_image_cmd(int argc, char **argv)
|
||||
{
|
||||
struct image_args image_args = {
|
||||
.is_read = true,
|
||||
.ra_window = DEFAULT_RA_WINDOW,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&read_image_argp, argc, argv, 0, NULL, &image_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_image(&image_args);
|
||||
}
|
||||
|
||||
static struct argp write_image_argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"META-DEVICE",
|
||||
"Write metadata image stream to metadata device file"
|
||||
};
|
||||
|
||||
static int write_image_cmd(int argc, char **argv)
|
||||
{
|
||||
struct image_args image_args = {
|
||||
.is_read = false,
|
||||
.ra_window = DEFAULT_RA_WINDOW,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&write_image_argp, argc, argv, 0, NULL, &image_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_image(&image_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) image_ctor(void)
|
||||
{
|
||||
cmd_register_argp("read-metadata-image", &read_image_argp, GROUP_CORE, read_image_cmd);
|
||||
cmd_register_argp("write-metadata-image", &write_image_argp, GROUP_CORE, write_image_cmd);
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_ITER_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_ITER_H_
|
||||
|
||||
/*
|
||||
* Callbacks can return a weird -errno that we'll never use to indicate
|
||||
* that iteration can stop and return 0 for success.
|
||||
*/
|
||||
#define ECHECK_ITER_DONE EL2HLT
|
||||
|
||||
static inline int xlate_iter_errno(int ret)
|
||||
{
|
||||
return ret == -ECHECK_ITER_DONE ? 0 : ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,98 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "btree.h"
|
||||
#include "debug.h"
|
||||
#include "extent.h"
|
||||
#include "iter.h"
|
||||
#include "sns.h"
|
||||
#include "log_trees.h"
|
||||
#include "super.h"
|
||||
|
||||
struct iter_args {
|
||||
extent_cb_t cb;
|
||||
void *cb_arg;
|
||||
};
|
||||
|
||||
static int lt_meta_iter(struct scoutfs_key *key, void *val, u16 val_len, void *cb_arg)
|
||||
{
|
||||
struct iter_args *ia = cb_arg;
|
||||
struct scoutfs_log_trees *lt;
|
||||
int ret;
|
||||
|
||||
if (val_len != sizeof(struct scoutfs_log_trees))
|
||||
; /* XXX */
|
||||
|
||||
lt = val;
|
||||
|
||||
sns_push("log_trees", le64_to_cpu(lt->rid), le64_to_cpu(lt->nr));
|
||||
|
||||
debug("lt rid 0x%16llx nr %llu", le64_to_cpu(lt->rid), le64_to_cpu(lt->nr));
|
||||
|
||||
sns_push("meta_avail", 0, 0);
|
||||
ret = alloc_list_meta_iter(<->meta_avail, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("meta_freed", 0, 0);
|
||||
ret = alloc_list_meta_iter(<->meta_freed, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("item_root", 0, 0);
|
||||
ret = btree_meta_iter(<->item_root, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (lt->bloom_ref.blkno) {
|
||||
sns_push("bloom_ref", 0, 0);
|
||||
ret = ia->cb(le64_to_cpu(lt->bloom_ref.blkno), 1, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
sns_push("data_avail", 0, 0);
|
||||
ret = alloc_root_meta_iter(<->data_avail, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("data_freed", 0, 0);
|
||||
ret = alloc_root_meta_iter(<->data_freed, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
sns_pop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the callers callback with the extent of all the metadata block references contained
|
||||
* in log btrees. We walk the logs_root btree items and walk all the metadata structures
|
||||
* they reference.
|
||||
*/
|
||||
int log_trees_meta_iter(extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
struct iter_args ia = { .cb = cb, .cb_arg = cb_arg };
|
||||
|
||||
return btree_item_iter(&super->logs_root, lt_meta_iter, &ia);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_LOG_TREES_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_LOG_TREES_H_
|
||||
|
||||
#include "extent.h"
|
||||
|
||||
int log_trees_meta_iter(extent_cb_t cb, void *cb_arg);
|
||||
|
||||
#endif
|
||||
@@ -1,367 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "bitmap.h"
|
||||
#include "key.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "btree.h"
|
||||
#include "debug.h"
|
||||
#include "extent.h"
|
||||
#include "sns.h"
|
||||
#include "log_trees.h"
|
||||
#include "meta.h"
|
||||
#include "problem.h"
|
||||
#include "super.h"
|
||||
|
||||
static struct meta_data {
|
||||
struct extent_root meta_refed;
|
||||
struct extent_root meta_free;
|
||||
struct {
|
||||
u64 ref_blocks;
|
||||
u64 free_extents;
|
||||
u64 free_blocks;
|
||||
} stats;
|
||||
} global_mdat;
|
||||
|
||||
bool valid_meta_blkno(u64 blkno)
|
||||
{
|
||||
u64 tot = le64_to_cpu(global_super->total_meta_blocks);
|
||||
|
||||
return blkno >= SCOUTFS_META_DEV_START_BLKNO && blkno < tot;
|
||||
}
|
||||
|
||||
static bool valid_meta_extent(u64 start, u64 len)
|
||||
{
|
||||
u64 tot = le64_to_cpu(global_super->total_meta_blocks);
|
||||
bool valid;
|
||||
|
||||
valid = len > 0 &&
|
||||
start >= SCOUTFS_META_DEV_START_BLKNO &&
|
||||
start < tot &&
|
||||
len <= tot &&
|
||||
((start + len) <= tot) &&
|
||||
((start + len) > start);
|
||||
|
||||
debug("start %llu len %llu valid %u", start, len, !!valid);
|
||||
|
||||
if (!valid)
|
||||
problem(PB_META_EXTENT_INVALID, "start %llu len %llu", start, len);
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Track references to individual metadata blocks. This uses the extent
|
||||
* callback type but is only ever called for single block references.
|
||||
* Any reference to a block that has already been referenced is
|
||||
* considered invalid and is ignored. Later repair will resolve
|
||||
* duplicate references.
|
||||
*/
|
||||
static int insert_meta_ref(u64 start, u64 len, void *arg)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct extent_root *root = arg;
|
||||
int ret = 0;
|
||||
|
||||
/* this is tracking single metadata block references */
|
||||
if (len != 1) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (valid_meta_blkno(start)) {
|
||||
ret = extent_insert_new(root, start, len);
|
||||
if (ret == 0)
|
||||
mdat->stats.ref_blocks++;
|
||||
else if (ret == -EEXIST)
|
||||
problem(PB_META_REF_OVERLAPS_EXISTING, "blkno %llu", start);
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int insert_meta_free(u64 start, u64 len, void *arg)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct extent_root *root = arg;
|
||||
int ret = 0;
|
||||
|
||||
if (valid_meta_extent(start, len)) {
|
||||
ret = extent_insert_new(root, start, len);
|
||||
if (ret == 0) {
|
||||
mdat->stats.free_extents++;
|
||||
mdat->stats.free_blocks++;
|
||||
|
||||
} else if (ret == -EEXIST) {
|
||||
problem(PB_META_FREE_OVERLAPS_EXISTING,
|
||||
"start %llu llen %llu", start, len);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk all metadata references in the system. This walk doesn't need
|
||||
* to read metadata that doesn't contain any metadata references so it
|
||||
* can skip the bulk of metadata blocks. This gives us the set of
|
||||
* referenced metadata blocks which we can then use to repair metadata
|
||||
* allocator structures.
|
||||
*/
|
||||
static int get_meta_refs(void)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
int ret;
|
||||
|
||||
extent_root_init(&mdat->meta_refed);
|
||||
|
||||
/* XXX record reserved blocks around super as referenced */
|
||||
|
||||
sns_push("meta_alloc", 0, 0);
|
||||
ret = alloc_root_meta_iter(&super->meta_alloc[0], insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("meta_alloc", 1, 0);
|
||||
ret = alloc_root_meta_iter(&super->meta_alloc[1], insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("data_alloc", 1, 0);
|
||||
ret = alloc_root_meta_iter(&super->data_alloc, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 0, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_avail[0],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 1, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_avail[1],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 0, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_freed[0],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 1, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_freed[1],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("fs_root", 0, 0);
|
||||
ret = btree_meta_iter(&super->fs_root, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("logs_root", 0, 0);
|
||||
ret = btree_meta_iter(&super->logs_root, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("log_merge", 0, 0);
|
||||
ret = btree_meta_iter(&super->log_merge, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("mounted_clients", 0, 0);
|
||||
ret = btree_meta_iter(&super->mounted_clients, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("srch_root", 0, 0);
|
||||
ret = btree_meta_iter(&super->srch_root, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = log_trees_meta_iter(insert_meta_ref, &mdat->meta_refed);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
printf("found %llu referenced metadata blocks\n", mdat->stats.ref_blocks);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_meta_free(void)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
int ret;
|
||||
|
||||
extent_root_init(&mdat->meta_free);
|
||||
|
||||
sns_push("meta_alloc", 0, 0);
|
||||
ret = alloc_root_extent_iter(&super->meta_alloc[0], insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("meta_alloc", 1, 0);
|
||||
ret = alloc_root_extent_iter(&super->meta_alloc[1], insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 0, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_avail[0],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 1, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_avail[1],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 0, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_freed[0],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 1, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_freed[1],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
printf("found %llu free metadata blocks in %llu extents\n",
|
||||
mdat->stats.free_blocks, mdat->stats.free_extents);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* All the space between referenced blocks must be recorded in the free
|
||||
* extents. The free extent walk didn't check that the extents
|
||||
* overlapped with references, we do that here. Remember that metadata
|
||||
* block references were merged into extents here, the refed extents
|
||||
* aren't necessarily all a single block.
|
||||
*/
|
||||
static int compare_refs_and_free(void)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct extent_node *ref;
|
||||
struct extent_node *free;
|
||||
struct extent_node *next;
|
||||
struct extent_node *prev;
|
||||
u64 expect;
|
||||
u64 start;
|
||||
u64 end;
|
||||
|
||||
expect = 0;
|
||||
ref = extent_first(&mdat->meta_refed);
|
||||
free = extent_first(&mdat->meta_free);
|
||||
while (ref || free) {
|
||||
|
||||
printf("exp %llu ref %llu.%llu free %llu.%llu\n",
|
||||
expect, ref ? ref->start : 0, ref ? ref->len : 0,
|
||||
free ? free->start : 0, free ? free->len : 0);
|
||||
|
||||
/* referenced marked free, remove ref from free and continue from same point */
|
||||
if (ref && free && extents_overlap(ref->start, ref->len, free->start, free->len)) {
|
||||
printf("ref extent %llu.%llu overlaps free %llu %llu\n",
|
||||
ref->start, ref->len, free->start, free->len);
|
||||
|
||||
start = max(ref->start, free->start);
|
||||
end = min(ref->start + ref->len, free->start + free->len);
|
||||
|
||||
prev = extent_prev(free);
|
||||
|
||||
extent_remove(&mdat->meta_free, start, end - start);
|
||||
|
||||
if (prev)
|
||||
free = extent_next(prev);
|
||||
else
|
||||
free = extent_first(&mdat->meta_free);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* see which extent starts earlier */
|
||||
if (!free || (ref && ref->start <= free->start))
|
||||
next = ref;
|
||||
else
|
||||
next = free;
|
||||
|
||||
/* untracked region before next extent */
|
||||
if (expect < next->start) {
|
||||
printf("missing free extent %llu.%llu\n", expect, next->start - expect);
|
||||
expect = next->start;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/* didn't overlap, advance past next extent */
|
||||
expect = next->start + next->len;
|
||||
if (next == ref)
|
||||
ref = extent_next(ref);
|
||||
else
|
||||
free = extent_next(free);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the metadata allocators by comparing the set of referenced
|
||||
* blocks with the set of free blocks that are stored in free btree
|
||||
* items and alloc list blocks.
|
||||
*/
|
||||
int check_meta_alloc(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = get_meta_refs();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = get_meta_free();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = compare_refs_and_free();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_META_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_META_H_
|
||||
|
||||
bool valid_meta_blkno(u64 blkno);
|
||||
|
||||
int check_meta_alloc(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "padding.h"
|
||||
|
||||
bool padding_is_zeros(const void *data, size_t sz)
|
||||
{
|
||||
static char zeros[32] = {0,};
|
||||
const size_t batch = array_size(zeros);
|
||||
|
||||
while (sz >= batch) {
|
||||
if (memcmp(data, zeros, batch))
|
||||
return false;
|
||||
data += batch;
|
||||
sz -= batch;
|
||||
}
|
||||
|
||||
if (sz > 0 && memcmp(data, zeros, sz))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_PADDING_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_PADDING_H_
|
||||
|
||||
bool padding_is_zeros(const void *data, size_t sz);
|
||||
|
||||
#endif
|
||||
@@ -1,23 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "problem.h"
|
||||
|
||||
#if 0
|
||||
#define PROB_STR(pb) [pb] = #pb
|
||||
static char *prob_strs[] = {
|
||||
PROB_STR(PB_META_EXTENT_INVALID),
|
||||
PROB_STR(PB_META_EXTENT_OVERLAPS_EXISTING),
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct problem_data {
|
||||
uint64_t counts[PB__NR];
|
||||
} global_pdat;
|
||||
|
||||
void problem_record(prob_t pb)
|
||||
{
|
||||
struct problem_data *pdat = &global_pdat;
|
||||
|
||||
pdat->counts[pb]++;
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_PROBLEM_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_PROBLEM_H_
|
||||
|
||||
#include "debug.h"
|
||||
#include "sns.h"
|
||||
|
||||
typedef enum {
|
||||
PB_META_EXTENT_INVALID,
|
||||
PB_META_REF_OVERLAPS_EXISTING,
|
||||
PB_META_FREE_OVERLAPS_EXISTING,
|
||||
PB_BTREE_BLOCK_BAD_LEVEL,
|
||||
PB__NR,
|
||||
} prob_t;
|
||||
|
||||
#define problem(pb, fmt, ...) \
|
||||
do { \
|
||||
debug("problem found: "#pb": %s: "fmt, sns_str(), __VA_ARGS__); \
|
||||
problem_record(pb); \
|
||||
} while (0)
|
||||
|
||||
void problem_record(prob_t pb);
|
||||
|
||||
#endif
|
||||
@@ -1,118 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sns.h"
|
||||
|
||||
/*
|
||||
* This "str num stack" is used to describe our location in metadata at
|
||||
* any given time.
|
||||
*
|
||||
* As we descend into structures we pop a string on decribing them,
|
||||
* perhaps with associated numbers. Pushing and popping is very cheap
|
||||
* and only rarely do we format the stack into a string, as an arbitrary
|
||||
* example:
|
||||
* super.fs_root.btree_parent:1231.btree_leaf:3231"
|
||||
*/
|
||||
|
||||
#define SNS_MAX_DEPTH 1000
|
||||
#define SNS_STR_SIZE (SNS_MAX_DEPTH * (SNS_MAX_STR_LEN + 1 + 16 + 1))
|
||||
|
||||
static struct sns_data {
|
||||
unsigned int depth;
|
||||
|
||||
struct sns_entry {
|
||||
char *str;
|
||||
size_t len;
|
||||
u64 a;
|
||||
u64 b;
|
||||
} ents[SNS_MAX_DEPTH];
|
||||
|
||||
char str[SNS_STR_SIZE];
|
||||
|
||||
} global_lsdat;
|
||||
|
||||
void _sns_push(char *str, size_t len, u64 a, u64 b)
|
||||
{
|
||||
struct sns_data *lsdat = &global_lsdat;
|
||||
|
||||
if (lsdat->depth < SNS_MAX_DEPTH) {
|
||||
lsdat->ents[lsdat->depth++] = (struct sns_entry) {
|
||||
.str = str,
|
||||
.len = len,
|
||||
.a = a,
|
||||
.b = b,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void sns_pop(void)
|
||||
{
|
||||
struct sns_data *lsdat = &global_lsdat;
|
||||
|
||||
if (lsdat->depth > 0)
|
||||
lsdat->depth--;
|
||||
}
|
||||
|
||||
static char *append_str(char *pos, char *str, size_t len)
|
||||
{
|
||||
memcpy(pos, str, len);
|
||||
return pos + len;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is not called for x = 0 so we don't need to emit an initial 0.
|
||||
* We could by using do {} while instead of while {}.
|
||||
*/
|
||||
static char *append_u64x(char *pos, u64 x)
|
||||
{
|
||||
static char hex[] = "0123456789abcdef";
|
||||
|
||||
while (x) {
|
||||
*pos++ = hex[x & 0xf];
|
||||
x >>= 4;
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
static char *append_char(char *pos, char c)
|
||||
{
|
||||
*(pos++) = c;
|
||||
return pos;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a pointer to a null terminated string that describes the
|
||||
* current location stack. The string buffer is global.
|
||||
*/
|
||||
char *sns_str(void)
|
||||
{
|
||||
struct sns_data *lsdat = &global_lsdat;
|
||||
struct sns_entry *ent;
|
||||
char *pos;
|
||||
int i;
|
||||
|
||||
pos = lsdat->str;
|
||||
for (i = 0; i < lsdat->depth; i++) {
|
||||
ent = &lsdat->ents[i];
|
||||
|
||||
if (i)
|
||||
pos = append_char(pos, '.');
|
||||
|
||||
pos = append_str(pos, ent->str, ent->len);
|
||||
|
||||
if (ent->a) {
|
||||
pos = append_char(pos, ':');
|
||||
pos = append_u64x(pos, ent->a);
|
||||
}
|
||||
|
||||
if (ent->b) {
|
||||
pos = append_char(pos, ':');
|
||||
pos = append_u64x(pos, ent->b);
|
||||
}
|
||||
}
|
||||
|
||||
*pos = '\0';
|
||||
|
||||
return lsdat->str;
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_SNS_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_SNS_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "sparse.h"
|
||||
|
||||
#define SNS_MAX_STR_LEN 20
|
||||
|
||||
#define sns_push(str, a, b) \
|
||||
do { \
|
||||
build_assert(sizeof(str) - 1 <= SNS_MAX_STR_LEN); \
|
||||
_sns_push((str), sizeof(str) - 1, a, b); \
|
||||
} while (0)
|
||||
|
||||
void _sns_push(char *str, size_t len, u64 a, u64 b);
|
||||
void sns_pop(void);
|
||||
char *sns_str(void);
|
||||
|
||||
#endif
|
||||
@@ -1,57 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
|
||||
#include "block.h"
|
||||
#include "super.h"
|
||||
|
||||
/*
|
||||
* After we check the super blocks we provide a global buffer to track
|
||||
* the current super block. It is referenced to get static information
|
||||
* about the system and is also modified and written as part of
|
||||
* transactions.
|
||||
*/
|
||||
struct scoutfs_super_block *global_super;
|
||||
|
||||
/*
|
||||
* After checking the supers we save a copy of it in a global buffer that's used by
|
||||
* other modules to track the current super. It can be modified and written during commits.
|
||||
*/
|
||||
int check_supers(void)
|
||||
{
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct block *blk = NULL;
|
||||
int ret;
|
||||
|
||||
global_super = malloc(sizeof(struct scoutfs_super_block));
|
||||
if (!global_super) {
|
||||
printf("error allocating super block buffer\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_get(&blk, SCOUTFS_SUPER_BLKNO, BF_SM);
|
||||
if (ret < 0) {
|
||||
printf("error reading super block\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
super = block_buf(blk);
|
||||
|
||||
memcpy(global_super, super, sizeof(struct scoutfs_super_block));
|
||||
ret = 0;
|
||||
out:
|
||||
block_put(&blk);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void super_shutdown(void)
|
||||
{
|
||||
free(global_super);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_SUPER_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_SUPER_H_
|
||||
|
||||
extern struct scoutfs_super_block *global_super;
|
||||
|
||||
int check_supers(void);
|
||||
void super_shutdown(void);
|
||||
|
||||
#endif
|
||||
@@ -156,16 +156,6 @@ static inline void list_move_tail(struct list_head *list,
|
||||
list_add_tail(list, head);
|
||||
}
|
||||
|
||||
/**
|
||||
* list_is_head - tests whether @list is the list @head
|
||||
* @list: the entry to test
|
||||
* @head: the head of the list
|
||||
*/
|
||||
static inline int list_is_head(const struct list_head *list, const struct list_head *head)
|
||||
{
|
||||
return list == head;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_empty - tests whether a list is empty
|
||||
* @head: the list to test.
|
||||
@@ -252,15 +242,6 @@ static inline void list_splice_init(struct list_head *list,
|
||||
for (pos = (head)->next, n = pos->next; pos != (head); \
|
||||
pos = n, n = pos->next)
|
||||
|
||||
/**
|
||||
* list_entry_is_head - test if the entry points to the head of the list
|
||||
* @pos: the type * to cursor
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_head within the struct.
|
||||
*/
|
||||
#define list_entry_is_head(pos, head, member) \
|
||||
(&pos->member == (head))
|
||||
|
||||
/**
|
||||
* list_for_each_entry - iterate over list of given type
|
||||
* @pos: the type * to use as a loop counter.
|
||||
@@ -326,28 +307,4 @@ static inline void list_splice_init(struct list_head *list,
|
||||
#define list_next_entry(pos, member) \
|
||||
list_entry((pos)->member.next, typeof(*(pos)), member)
|
||||
|
||||
/**
|
||||
* list_prev_entry - get the prev element in list
|
||||
* @pos: the type * to cursor
|
||||
* @member: the name of the list_head within the struct.
|
||||
*/
|
||||
#define list_prev_entry(pos, member) \
|
||||
list_entry((pos)->member.prev, typeof(*(pos)), member)
|
||||
|
||||
/**
|
||||
* list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
|
||||
* @pos: the type * to use as a loop cursor.
|
||||
* @n: another type * to use as temporary storage
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_head within the struct.
|
||||
*
|
||||
* Iterate backwards over list of given type, safe against removal
|
||||
* of list entry.
|
||||
*/
|
||||
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
|
||||
for (pos = list_last_entry(head, typeof(*pos), member), \
|
||||
n = list_prev_entry(pos, member); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = n, n = list_prev_entry(n, member))
|
||||
|
||||
#endif
|
||||
|
||||
24
utils/src/mode_types.c
Normal file
24
utils/src/mode_types.c
Normal file
@@ -0,0 +1,24 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "mode_types.h"
|
||||
|
||||
unsigned int mode_to_type(mode_t mode)
|
||||
{
|
||||
#define S_SHIFT 12
|
||||
static unsigned char mode_types[S_IFMT >> S_SHIFT] = {
|
||||
[S_IFIFO >> S_SHIFT] = SCOUTFS_DT_FIFO,
|
||||
[S_IFCHR >> S_SHIFT] = SCOUTFS_DT_CHR,
|
||||
[S_IFDIR >> S_SHIFT] = SCOUTFS_DT_DIR,
|
||||
[S_IFBLK >> S_SHIFT] = SCOUTFS_DT_BLK,
|
||||
[S_IFREG >> S_SHIFT] = SCOUTFS_DT_REG,
|
||||
[S_IFLNK >> S_SHIFT] = SCOUTFS_DT_LNK,
|
||||
[S_IFSOCK >> S_SHIFT] = SCOUTFS_DT_SOCK,
|
||||
};
|
||||
|
||||
return mode_types[(mode & S_IFMT) >> S_SHIFT];
|
||||
#undef S_SHIFT
|
||||
}
|
||||
6
utils/src/mode_types.h
Normal file
6
utils/src/mode_types.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#ifndef _MODE_TYPES_H_
|
||||
#define _MODE_TYPES_H_
|
||||
|
||||
unsigned int mode_to_type(mode_t mode);
|
||||
|
||||
#endif
|
||||
46
utils/src/name_hash.h
Normal file
46
utils/src/name_hash.h
Normal file
@@ -0,0 +1,46 @@
|
||||
#ifndef _SCOUTFS_NAME_HASH_H_
|
||||
#define _SCOUTFS_NAME_HASH_H_
|
||||
|
||||
#include "hash.h"
|
||||
|
||||
/*
|
||||
* Test a bit number as though an array of bytes is a large len-bit
|
||||
* big-endian value. nr 0 is the LSB of the final byte, nr (len - 1) is
|
||||
* the MSB of the first byte.
|
||||
*/
|
||||
static int test_be_bytes_bit(int nr, const char *bytes, int len)
|
||||
{
|
||||
return bytes[(len - 1 - nr) >> 3] & (1 << (nr & 7));
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a 32bit "fingerprint" of the name by extracting 32 evenly
|
||||
* distributed bits from the name. The intent is to have the sort order
|
||||
* of the fingerprints reflect the memcmp() sort order of the names
|
||||
* while mapping large names down to small fs keys.
|
||||
*
|
||||
* Names that are smaller than 32bits are biased towards the high bits
|
||||
* of the fingerprint so that most significant bits of the fingerprints
|
||||
* consistently reflect the initial characters of the names.
|
||||
*/
|
||||
static inline u32 dirent_name_fingerprint(const char *name, unsigned int name_len)
|
||||
{
|
||||
int name_bits = name_len * 8;
|
||||
int skip = max(name_bits / 32, 1);
|
||||
u32 fp = 0;
|
||||
int f;
|
||||
int n;
|
||||
|
||||
for (f = 31, n = name_bits - 1; f >= 0 && n >= 0; f--, n -= skip)
|
||||
fp |= !!test_be_bytes_bit(n, name, name_bits) << f;
|
||||
|
||||
return fp;
|
||||
}
|
||||
|
||||
static inline u64 dirent_name_hash(const char *name, unsigned int name_len)
|
||||
{
|
||||
return scoutfs_hash32(name, name_len) |
|
||||
((u64)dirent_name_fingerprint(name, name_len) << 32);
|
||||
}
|
||||
|
||||
#endif
|
||||
1871
utils/src/parallel_restore.c
Normal file
1871
utils/src/parallel_restore.c
Normal file
File diff suppressed because it is too large
Load Diff
103
utils/src/parallel_restore.h
Normal file
103
utils/src/parallel_restore.h
Normal file
@@ -0,0 +1,103 @@
|
||||
#ifndef _SCOUTFS_PARALLEL_RESTORE_H_
|
||||
#define _SCOUTFS_PARALLEL_RESTORE_H_
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
struct scoutfs_parallel_restore_progress {
|
||||
struct scoutfs_btree_root fs_items;
|
||||
struct scoutfs_btree_root root_items;
|
||||
struct scoutfs_srch_file sfl;
|
||||
struct scoutfs_block_ref bloom_ref;
|
||||
__le64 inode_count;
|
||||
__le64 max_ino;
|
||||
};
|
||||
|
||||
struct scoutfs_parallel_restore_slice {
|
||||
__le64 fsid;
|
||||
__le64 meta_start;
|
||||
__le64 meta_len;
|
||||
};
|
||||
|
||||
struct scoutfs_parallel_restore_entry {
|
||||
u64 dir_ino;
|
||||
u64 pos;
|
||||
u64 ino;
|
||||
mode_t mode;
|
||||
char *name;
|
||||
unsigned int name_len;
|
||||
};
|
||||
|
||||
struct scoutfs_parallel_restore_xattr {
|
||||
u64 ino;
|
||||
u64 pos;
|
||||
char *name;
|
||||
unsigned int name_len;
|
||||
void *value;
|
||||
unsigned int value_len;
|
||||
};
|
||||
|
||||
struct scoutfs_parallel_restore_inode {
|
||||
/* all inodes */
|
||||
u64 ino;
|
||||
u64 nr_xattrs;
|
||||
u32 uid;
|
||||
u32 gid;
|
||||
u32 mode;
|
||||
u32 rdev;
|
||||
struct timespec atime;
|
||||
struct timespec ctime;
|
||||
struct timespec mtime;
|
||||
struct timespec crtime;
|
||||
|
||||
/* regular files */
|
||||
u64 data_version;
|
||||
u64 size;
|
||||
bool offline;
|
||||
|
||||
/* only used for directories */
|
||||
u64 nr_subdirs;
|
||||
u64 total_entry_name_bytes;
|
||||
|
||||
/* only used for symlnks */
|
||||
char *target;
|
||||
unsigned int target_len; /* not including null terminator */
|
||||
};
|
||||
|
||||
typedef __typeof__(EINVAL) spr_err_t;
|
||||
|
||||
struct scoutfs_parallel_restore_writer;
|
||||
|
||||
spr_err_t scoutfs_parallel_restore_create_writer(struct scoutfs_parallel_restore_writer **wrip);
|
||||
void scoutfs_parallel_restore_destroy_writer(struct scoutfs_parallel_restore_writer **wrip);
|
||||
|
||||
spr_err_t scoutfs_parallel_restore_init_slices(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_slice *slices,
|
||||
int nr);
|
||||
spr_err_t scoutfs_parallel_restore_add_slice(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_slice *slice);
|
||||
spr_err_t scoutfs_parallel_restore_get_slice(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_slice *slice);
|
||||
|
||||
spr_err_t scoutfs_parallel_restore_add_inode(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_inode *inode);
|
||||
spr_err_t scoutfs_parallel_restore_add_entry(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_entry *entry);
|
||||
spr_err_t scoutfs_parallel_restore_add_xattr(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_xattr *xattr);
|
||||
|
||||
spr_err_t scoutfs_parallel_restore_get_progress(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_progress *prog);
|
||||
spr_err_t scoutfs_parallel_restore_add_progress(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_parallel_restore_progress *prog);
|
||||
|
||||
spr_err_t scoutfs_parallel_restore_write_buf(struct scoutfs_parallel_restore_writer *wri,
|
||||
void *buf, size_t len, off_t *off_ret,
|
||||
size_t *count_ret);
|
||||
|
||||
spr_err_t scoutfs_parallel_restore_import_super(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_super_block *super);
|
||||
spr_err_t scoutfs_parallel_restore_export_super(struct scoutfs_parallel_restore_writer *wri,
|
||||
struct scoutfs_super_block *super);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -609,8 +609,6 @@ static int print_alloc_list_block(int fd, char *str, struct scoutfs_block_ref *r
|
||||
u64 blkno;
|
||||
u64 start;
|
||||
u64 len;
|
||||
u64 st;
|
||||
u64 nr;
|
||||
int wid;
|
||||
int ret;
|
||||
int i;
|
||||
@@ -629,37 +627,27 @@ static int print_alloc_list_block(int fd, char *str, struct scoutfs_block_ref *r
|
||||
AL_REF_A(&lblk->next), le32_to_cpu(lblk->start),
|
||||
le32_to_cpu(lblk->nr));
|
||||
|
||||
st = le32_to_cpu(lblk->start);
|
||||
nr = le32_to_cpu(lblk->nr);
|
||||
if (st >= SCOUTFS_ALLOC_LIST_MAX_BLOCKS ||
|
||||
nr > SCOUTFS_ALLOC_LIST_MAX_BLOCKS ||
|
||||
(st + nr) > SCOUTFS_ALLOC_LIST_MAX_BLOCKS) {
|
||||
printf(" (invalid start and nr fields)\n");
|
||||
goto out;
|
||||
}
|
||||
if (lblk->nr) {
|
||||
wid = printf(" exts: ");
|
||||
start = 0;
|
||||
len = 0;
|
||||
for (i = 0; i < le32_to_cpu(lblk->nr); i++) {
|
||||
if (len == 0)
|
||||
start = le64_to_cpu(lblk->blknos[i]);
|
||||
len++;
|
||||
|
||||
if (lblk->nr == 0)
|
||||
goto out;
|
||||
if (i == (le32_to_cpu(lblk->nr) - 1) ||
|
||||
start + len != le64_to_cpu(lblk->blknos[i + 1])) {
|
||||
if (wid >= 72)
|
||||
wid = printf("\n ");
|
||||
|
||||
wid = printf(" exts: ");
|
||||
start = 0;
|
||||
len = 0;
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (len == 0)
|
||||
start = le64_to_cpu(lblk->blknos[st + i]);
|
||||
len++;
|
||||
|
||||
if (i == (nr - 1) || (start + len) != le64_to_cpu(lblk->blknos[st + i + 1])) {
|
||||
if (wid >= 72)
|
||||
wid = printf("\n ");
|
||||
|
||||
wid += printf("%llu,%llu ", start, len);
|
||||
len = 0;
|
||||
wid += printf("%llu,%llu ", start, len);
|
||||
len = 0;
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
out:
|
||||
next = lblk->next;
|
||||
free(lblk);
|
||||
return print_alloc_list_block(fd, str, &next);
|
||||
|
||||
@@ -44,3 +44,37 @@ int srch_decode_entry(void *buf, struct scoutfs_srch_entry *sre,
|
||||
|
||||
return tot;
|
||||
}
|
||||
|
||||
static int encode_u64(__le64 *buf, u64 val)
|
||||
{
|
||||
int bytes;
|
||||
|
||||
val = (val << 1) ^ ((s64)val >> 63); /* shift sign extend */
|
||||
bytes = (fls64(val) + 7) >> 3;
|
||||
|
||||
put_unaligned_le64(val, buf);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
int srch_encode_entry(void *buf, struct scoutfs_srch_entry *sre, struct scoutfs_srch_entry *prev)
|
||||
{
|
||||
u64 diffs[] = {
|
||||
le64_to_cpu(sre->hash) - le64_to_cpu(prev->hash),
|
||||
le64_to_cpu(sre->ino) - le64_to_cpu(prev->ino),
|
||||
le64_to_cpu(sre->id) - le64_to_cpu(prev->id),
|
||||
};
|
||||
u16 lengths = 0;
|
||||
int bytes;
|
||||
int tot = 2;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < array_size(diffs); i++) {
|
||||
bytes = encode_u64(buf + tot, diffs[i]);
|
||||
lengths |= bytes << (i << 2);
|
||||
tot += bytes;
|
||||
}
|
||||
|
||||
put_unaligned_le16(lengths, buf);
|
||||
|
||||
return tot;
|
||||
}
|
||||
|
||||
@@ -3,5 +3,6 @@
|
||||
|
||||
int srch_decode_entry(void *buf, struct scoutfs_srch_entry *sre,
|
||||
struct scoutfs_srch_entry *prev);
|
||||
int srch_encode_entry(void *buf, struct scoutfs_srch_entry *sre, struct scoutfs_srch_entry *prev);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -69,6 +69,8 @@ do { \
|
||||
#define container_of(ptr, type, memb) \
|
||||
((type *)((void *)(ptr) - offsetof(type, memb)))
|
||||
|
||||
#define NSEC_PER_SEC 1000000000
|
||||
|
||||
#define BITS_PER_LONG (sizeof(long) * 8)
|
||||
#define U8_MAX ((u8)~0ULL)
|
||||
#define U16_MAX ((u16)~0ULL)
|
||||
@@ -81,6 +83,7 @@ do { \
|
||||
\
|
||||
(_x == 0 ? 0 : 64 - __builtin_clzll(_x)); \
|
||||
})
|
||||
#define fls64(x) flsll(x)
|
||||
|
||||
#define ilog2(x) \
|
||||
({ \
|
||||
@@ -98,6 +101,16 @@ emit_get_unaligned_le(16)
|
||||
emit_get_unaligned_le(32)
|
||||
emit_get_unaligned_le(64)
|
||||
|
||||
#define emit_put_unaligned_le(nr) \
|
||||
static inline void put_unaligned_le##nr(u##nr val, void *buf) \
|
||||
{ \
|
||||
__le##nr x = cpu_to_le##nr(val); \
|
||||
memcpy(buf, &x, sizeof(x)); \
|
||||
}
|
||||
emit_put_unaligned_le(16)
|
||||
emit_put_unaligned_le(32)
|
||||
emit_put_unaligned_le(64)
|
||||
|
||||
/*
|
||||
* return -1,0,+1 based on the memcmp comparison of the minimum of their
|
||||
* two lengths. If their min shared bytes are equal but the lengths
|
||||
|
||||
Reference in New Issue
Block a user