scoutfs: add basic file page cache read and write

Add basic file data support by implementing the address space file and
page read and write methods.  This passis basic read/write tests but is
only the seed of a final implementation.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2016-03-26 10:58:06 -07:00
parent 867d717d2b
commit 9cf87ee571
5 changed files with 260 additions and 6 deletions

View File

@@ -1,4 +1,4 @@
obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o
scoutfs-y += block.o bloom.o chunk.o crc.o dir.o inode.o manifest.o msg.o \
ring.o segment.o skip.o super.o
scoutfs-y += block.o bloom.o chunk.o crc.o dir.o filerw.o inode.o manifest.o \
msg.o ring.o segment.o skip.o super.o

218
kmod/src/filerw.c Normal file
View File

@@ -0,0 +1,218 @@
/*
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include "format.h"
#include "segment.h"
#include "inode.h"
#include "key.h"
#include "filerw.h"
/*
* File data is stored in items just like everything else. This is very
* easy to implement but incurs a copying overhead. We'll see how
* expensive that gets.
*
* By making the max item size a bit less than the block size we can
* still have room for the block header which gets us file data
* checksums. File item key offsets are multiples of this max block
* size though items can be smaller if the data is sparse. This lets us
* do lookups for specific keys and take advantage of the bloom filters.
*
* This is a minimal first pass and will need more work. It'll need to
* worry about enospc in writepage and cluster access for a start.
*/
/*
* Track the intersection of the logical region of a file with a page
* and file data item.
*/
struct data_region {
u64 item_key;
unsigned int page_off;
unsigned short len;
unsigned short item_off;
};
/*
* Map the file offset to its intersection with the page and item region.
* Returns false if the byte position is outside the page.
*/
static bool map_data_region(struct data_region *dr, u64 pos, struct page *page)
{
if (pos >> PAGE_SHIFT != page->index)
return false;
dr->page_off = pos & ~PAGE_MASK;
dr->item_off = do_div(pos, SCOUTFS_MAX_ITEM_LEN);
dr->item_key = pos;
dr->len = min(SCOUTFS_MAX_ITEM_LEN - dr->item_off,
PAGE_SIZE - dr->page_off);
return true;
}
#define for_each_data_region(dr, page, pos) \
for (pos = (u64)page->index << PAGE_SHIFT; \
map_data_region(dr, pos, page); pos += (dr)->len)
/*
* Copy the contents of file data items into the page. If we don't
* find an item then we zero that region of the page.
*
* XXX i_size?
* XXX async?
*/
static int scoutfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = file->f_mapping->host;
struct super_block *sb = inode->i_sb;
DECLARE_SCOUTFS_ITEM_REF(ref);
struct scoutfs_key key;
struct data_region dr;
int ret = 0;
void *addr;
u64 pos;
for_each_data_region(&dr, page, pos) {
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DATA_KEY,
dr.item_key);
ret = scoutfs_read_item(sb, &key, &ref);
if (ret == -ENOENT) {
addr = kmap_atomic(page);
memset(addr + dr.page_off, 0, dr.len);
kunmap_atomic(addr);
continue;
}
if (ret)
break;
addr = kmap_atomic(page);
memcpy(addr + dr.page_off, ref.val + dr.item_off, dr.len);
kunmap_atomic(addr);
}
if (!ret)
SetPageUptodate(page);
unlock_page(page);
return ret;
}
/*
* Copy the contents of the page into file items. Data integrity syncs
* will later write the dirty segment to the device.
*
* XXX zeroing regions of data items?
* XXX wbc counters?
* XXX reserve space so dirty item doesn't get enospc -- our "delalloc"?
*/
static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct super_block *sb = inode->i_sb;
DECLARE_SCOUTFS_ITEM_REF(ref);
struct scoutfs_key key;
struct data_region dr;
void *addr;
u64 pos;
int ret;
set_page_writeback(page);
for_each_data_region(&dr, page, pos) {
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DATA_KEY,
dr.item_key);
ret = scoutfs_dirty_item(sb, &key, SCOUTFS_MAX_ITEM_LEN, &ref);
if (ret)
break;
addr = kmap_atomic(page);
memcpy(ref.val + dr.item_off, addr + dr.page_off, dr.len);
kunmap_atomic(addr);
scoutfs_put_ref(&ref);
}
scoutfs_put_ref(&ref);
if (ret) {
SetPageError(page);
mapping_set_error(&inode->i_data, ret);
}
end_page_writeback(page);
unlock_page(page);
return ret;
}
static int scoutfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct page *page;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
return 0;
}
static int scoutfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
unsigned off;
off = pos & (PAGE_CACHE_SIZE - 1);
/* zero the stale part of the page if we did a short copy */
if (copied < len)
zero_user_segment(page, off + copied, len);
if (pos + copied > inode->i_size)
i_size_write(inode, pos + copied);
if (!PageUptodate(page))
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
return copied;
}
const struct address_space_operations scoutfs_file_aops = {
.readpage = scoutfs_readpage,
.writepage = scoutfs_writepage,
.write_begin = scoutfs_write_begin,
.write_end = scoutfs_write_end,
};
const struct file_operations scoutfs_file_fops = {
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
};

7
kmod/src/filerw.h Normal file
View File

@@ -0,0 +1,7 @@
#ifndef _SCOUTFS_FILERW_H_
#define _SCOUTFS_FILERW_H_
extern const struct address_space_operations scoutfs_file_aops;
extern const struct file_operations scoutfs_file_fops;
#endif

View File

@@ -100,8 +100,14 @@ struct scoutfs_key {
#define SCOUTFS_ROOT_INO 1
#define SCOUTFS_INODE_KEY 128
#define SCOUTFS_DIRENT_KEY 192
/*
* Currently we sort keys by the numeric value of the types, but that
* isn't necessary. We could have an arbitrary sort order. So we don't
* have to stress about cleverly allocating the types.
*/
#define SCOUTFS_INODE_KEY 1
#define SCOUTFS_DIRENT_KEY 2
#define SCOUTFS_DATA_KEY 3
struct scoutfs_ring_map_block {
struct scoutfs_block_header hdr;
@@ -203,6 +209,13 @@ struct scoutfs_item {
__le32 skip_next[0];
} __packed;
/*
* Item size caps item file data item length so that they fit in checksummed
* 4k blocks with a bit of expansion room.
*/
#define SCOUTFS_MAX_ITEM_LEN \
(SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_block_header) - 32)
struct scoutfs_timespec {
__le64 sec;
__le32 nsec;

View File

@@ -21,6 +21,7 @@
#include "inode.h"
#include "segment.h"
#include "dir.h"
#include "filerw.h"
/*
* XXX
@@ -68,10 +69,25 @@ void scoutfs_destroy_inode(struct inode *inode)
static void set_inode_ops(struct inode *inode)
{
switch (inode->i_mode & S_IFMT) {
/*
* I guess we add a reg.c for regular files? Or pagecache.c?
* I guess that makes more sense.
*
* - page dirtying makes sure there's a dirty item
* - sync writes back page cache pages
* - writepage copies to dirty item
* - crc calculated after copying
* - pages can be pretty large
* - tail items can be partial?
* - tracing all over the place
* - maybe just less than 4k is the answer?
* - so allocation pulls the value back
* - probably leave some overhead for header growth
*/
case S_IFREG:
// inode->i_mapping->a_ops = &scoutfs_file_aops;
inode->i_mapping->a_ops = &scoutfs_file_aops;
// inode->i_op = &scoutfs_file_iops;
// inode->i_fop = &scoutfs_file_fops;
inode->i_fop = &scoutfs_file_fops;
break;
case S_IFDIR:
inode->i_op = &scoutfs_dir_iops;