From 9cf87ee571f09344ad406541f72a15fbc3d37fd1 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Sat, 26 Mar 2016 10:58:06 -0700 Subject: [PATCH] scoutfs: add basic file page cache read and write Add basic file data support by implementing the address space file and page read and write methods. This passis basic read/write tests but is only the seed of a final implementation. Signed-off-by: Zach Brown --- kmod/src/Makefile | 4 +- kmod/src/filerw.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++ kmod/src/filerw.h | 7 ++ kmod/src/format.h | 17 +++- kmod/src/inode.c | 20 ++++- 5 files changed, 260 insertions(+), 6 deletions(-) create mode 100644 kmod/src/filerw.c create mode 100644 kmod/src/filerw.h diff --git a/kmod/src/Makefile b/kmod/src/Makefile index dae6c279..21058481 100644 --- a/kmod/src/Makefile +++ b/kmod/src/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o -scoutfs-y += block.o bloom.o chunk.o crc.o dir.o inode.o manifest.o msg.o \ - ring.o segment.o skip.o super.o +scoutfs-y += block.o bloom.o chunk.o crc.o dir.o filerw.o inode.o manifest.o \ + msg.o ring.o segment.o skip.o super.o diff --git a/kmod/src/filerw.c b/kmod/src/filerw.c new file mode 100644 index 00000000..2b914d87 --- /dev/null +++ b/kmod/src/filerw.c @@ -0,0 +1,218 @@ +/* +* Copyright (C) 2016 Versity Software, Inc. All rights reserved. +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public +* License v2 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +*/ +#include +#include +#include + +#include "format.h" +#include "segment.h" +#include "inode.h" +#include "key.h" +#include "filerw.h" + +/* + * File data is stored in items just like everything else. This is very + * easy to implement but incurs a copying overhead. We'll see how + * expensive that gets. + * + * By making the max item size a bit less than the block size we can + * still have room for the block header which gets us file data + * checksums. File item key offsets are multiples of this max block + * size though items can be smaller if the data is sparse. This lets us + * do lookups for specific keys and take advantage of the bloom filters. + * + * This is a minimal first pass and will need more work. It'll need to + * worry about enospc in writepage and cluster access for a start. + */ + +/* +* Track the intersection of the logical region of a file with a page +* and file data item. +*/ +struct data_region { + u64 item_key; + unsigned int page_off; + unsigned short len; + unsigned short item_off; +}; + +/* + * Map the file offset to its intersection with the page and item region. + * Returns false if the byte position is outside the page. +*/ +static bool map_data_region(struct data_region *dr, u64 pos, struct page *page) +{ + if (pos >> PAGE_SHIFT != page->index) + return false; + + dr->page_off = pos & ~PAGE_MASK; + + dr->item_off = do_div(pos, SCOUTFS_MAX_ITEM_LEN); + dr->item_key = pos; + + dr->len = min(SCOUTFS_MAX_ITEM_LEN - dr->item_off, + PAGE_SIZE - dr->page_off); + + return true; +} + +#define for_each_data_region(dr, page, pos) \ + for (pos = (u64)page->index << PAGE_SHIFT; \ + map_data_region(dr, pos, page); pos += (dr)->len) + +/* + * Copy the contents of file data items into the page. If we don't + * find an item then we zero that region of the page. + * + * XXX i_size? + * XXX async? + */ +static int scoutfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = file->f_mapping->host; + struct super_block *sb = inode->i_sb; + DECLARE_SCOUTFS_ITEM_REF(ref); + struct scoutfs_key key; + struct data_region dr; + int ret = 0; + void *addr; + u64 pos; + + for_each_data_region(&dr, page, pos) { + scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DATA_KEY, + dr.item_key); + + ret = scoutfs_read_item(sb, &key, &ref); + if (ret == -ENOENT) { + addr = kmap_atomic(page); + memset(addr + dr.page_off, 0, dr.len); + kunmap_atomic(addr); + continue; + } + if (ret) + break; + + addr = kmap_atomic(page); + memcpy(addr + dr.page_off, ref.val + dr.item_off, dr.len); + kunmap_atomic(addr); + } + + if (!ret) + SetPageUptodate(page); + unlock_page(page); + return ret; +} + +/* + * Copy the contents of the page into file items. Data integrity syncs + * will later write the dirty segment to the device. + * +* XXX zeroing regions of data items? +* XXX wbc counters? +* XXX reserve space so dirty item doesn't get enospc -- our "delalloc"? +*/ +static int scoutfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + DECLARE_SCOUTFS_ITEM_REF(ref); + struct scoutfs_key key; + struct data_region dr; + void *addr; + u64 pos; + int ret; + + set_page_writeback(page); + + for_each_data_region(&dr, page, pos) { + scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DATA_KEY, + dr.item_key); + + ret = scoutfs_dirty_item(sb, &key, SCOUTFS_MAX_ITEM_LEN, &ref); + if (ret) + break; + + addr = kmap_atomic(page); + memcpy(ref.val + dr.item_off, addr + dr.page_off, dr.len); + kunmap_atomic(addr); + + scoutfs_put_ref(&ref); + + } + + scoutfs_put_ref(&ref); + + if (ret) { + SetPageError(page); + mapping_set_error(&inode->i_data, ret); + } + + end_page_writeback(page); + unlock_page(page); + + return ret; +} + +static int scoutfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + struct page *page; + + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + + *pagep = page; + return 0; +} + +static int scoutfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + unsigned off; + + off = pos & (PAGE_CACHE_SIZE - 1); + + /* zero the stale part of the page if we did a short copy */ + if (copied < len) + zero_user_segment(page, off + copied, len); + + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + + if (!PageUptodate(page)) + SetPageUptodate(page); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + + return copied; +} + +const struct address_space_operations scoutfs_file_aops = { + .readpage = scoutfs_readpage, + .writepage = scoutfs_writepage, + .write_begin = scoutfs_write_begin, + .write_end = scoutfs_write_end, +}; + +const struct file_operations scoutfs_file_fops = { + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, +}; diff --git a/kmod/src/filerw.h b/kmod/src/filerw.h new file mode 100644 index 00000000..2d9d478e --- /dev/null +++ b/kmod/src/filerw.h @@ -0,0 +1,7 @@ +#ifndef _SCOUTFS_FILERW_H_ +#define _SCOUTFS_FILERW_H_ + +extern const struct address_space_operations scoutfs_file_aops; +extern const struct file_operations scoutfs_file_fops; + +#endif diff --git a/kmod/src/format.h b/kmod/src/format.h index 989592e3..8808eb04 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -100,8 +100,14 @@ struct scoutfs_key { #define SCOUTFS_ROOT_INO 1 -#define SCOUTFS_INODE_KEY 128 -#define SCOUTFS_DIRENT_KEY 192 +/* + * Currently we sort keys by the numeric value of the types, but that + * isn't necessary. We could have an arbitrary sort order. So we don't + * have to stress about cleverly allocating the types. + */ +#define SCOUTFS_INODE_KEY 1 +#define SCOUTFS_DIRENT_KEY 2 +#define SCOUTFS_DATA_KEY 3 struct scoutfs_ring_map_block { struct scoutfs_block_header hdr; @@ -203,6 +209,13 @@ struct scoutfs_item { __le32 skip_next[0]; } __packed; +/* + * Item size caps item file data item length so that they fit in checksummed + * 4k blocks with a bit of expansion room. + */ +#define SCOUTFS_MAX_ITEM_LEN \ + (SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_block_header) - 32) + struct scoutfs_timespec { __le64 sec; __le32 nsec; diff --git a/kmod/src/inode.c b/kmod/src/inode.c index a59cac5d..703ac676 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -21,6 +21,7 @@ #include "inode.h" #include "segment.h" #include "dir.h" +#include "filerw.h" /* * XXX @@ -68,10 +69,25 @@ void scoutfs_destroy_inode(struct inode *inode) static void set_inode_ops(struct inode *inode) { switch (inode->i_mode & S_IFMT) { + /* + * I guess we add a reg.c for regular files? Or pagecache.c? + * I guess that makes more sense. + * + * - page dirtying makes sure there's a dirty item + * - sync writes back page cache pages + * - writepage copies to dirty item + * - crc calculated after copying + * - pages can be pretty large + * - tail items can be partial? + * - tracing all over the place + * - maybe just less than 4k is the answer? + * - so allocation pulls the value back + * - probably leave some overhead for header growth + */ case S_IFREG: -// inode->i_mapping->a_ops = &scoutfs_file_aops; + inode->i_mapping->a_ops = &scoutfs_file_aops; // inode->i_op = &scoutfs_file_iops; -// inode->i_fop = &scoutfs_file_fops; + inode->i_fop = &scoutfs_file_fops; break; case S_IFDIR: inode->i_op = &scoutfs_dir_iops;