mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-05 03:44:05 +00:00
scoutfs: add basic file page cache read and write
Add basic file data support by implementing the address space file and page read and write methods. This passis basic read/write tests but is only the seed of a final implementation. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o
|
||||
|
||||
scoutfs-y += block.o bloom.o chunk.o crc.o dir.o inode.o manifest.o msg.o \
|
||||
ring.o segment.o skip.o super.o
|
||||
scoutfs-y += block.o bloom.o chunk.o crc.o dir.o filerw.o inode.o manifest.o \
|
||||
msg.o ring.o segment.o skip.o super.o
|
||||
|
||||
218
kmod/src/filerw.c
Normal file
218
kmod/src/filerw.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "segment.h"
|
||||
#include "inode.h"
|
||||
#include "key.h"
|
||||
#include "filerw.h"
|
||||
|
||||
/*
|
||||
* File data is stored in items just like everything else. This is very
|
||||
* easy to implement but incurs a copying overhead. We'll see how
|
||||
* expensive that gets.
|
||||
*
|
||||
* By making the max item size a bit less than the block size we can
|
||||
* still have room for the block header which gets us file data
|
||||
* checksums. File item key offsets are multiples of this max block
|
||||
* size though items can be smaller if the data is sparse. This lets us
|
||||
* do lookups for specific keys and take advantage of the bloom filters.
|
||||
*
|
||||
* This is a minimal first pass and will need more work. It'll need to
|
||||
* worry about enospc in writepage and cluster access for a start.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Track the intersection of the logical region of a file with a page
|
||||
* and file data item.
|
||||
*/
|
||||
struct data_region {
|
||||
u64 item_key;
|
||||
unsigned int page_off;
|
||||
unsigned short len;
|
||||
unsigned short item_off;
|
||||
};
|
||||
|
||||
/*
|
||||
* Map the file offset to its intersection with the page and item region.
|
||||
* Returns false if the byte position is outside the page.
|
||||
*/
|
||||
static bool map_data_region(struct data_region *dr, u64 pos, struct page *page)
|
||||
{
|
||||
if (pos >> PAGE_SHIFT != page->index)
|
||||
return false;
|
||||
|
||||
dr->page_off = pos & ~PAGE_MASK;
|
||||
|
||||
dr->item_off = do_div(pos, SCOUTFS_MAX_ITEM_LEN);
|
||||
dr->item_key = pos;
|
||||
|
||||
dr->len = min(SCOUTFS_MAX_ITEM_LEN - dr->item_off,
|
||||
PAGE_SIZE - dr->page_off);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define for_each_data_region(dr, page, pos) \
|
||||
for (pos = (u64)page->index << PAGE_SHIFT; \
|
||||
map_data_region(dr, pos, page); pos += (dr)->len)
|
||||
|
||||
/*
|
||||
* Copy the contents of file data items into the page. If we don't
|
||||
* find an item then we zero that region of the page.
|
||||
*
|
||||
* XXX i_size?
|
||||
* XXX async?
|
||||
*/
|
||||
static int scoutfs_readpage(struct file *file, struct page *page)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
DECLARE_SCOUTFS_ITEM_REF(ref);
|
||||
struct scoutfs_key key;
|
||||
struct data_region dr;
|
||||
int ret = 0;
|
||||
void *addr;
|
||||
u64 pos;
|
||||
|
||||
for_each_data_region(&dr, page, pos) {
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DATA_KEY,
|
||||
dr.item_key);
|
||||
|
||||
ret = scoutfs_read_item(sb, &key, &ref);
|
||||
if (ret == -ENOENT) {
|
||||
addr = kmap_atomic(page);
|
||||
memset(addr + dr.page_off, 0, dr.len);
|
||||
kunmap_atomic(addr);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
addr = kmap_atomic(page);
|
||||
memcpy(addr + dr.page_off, ref.val + dr.item_off, dr.len);
|
||||
kunmap_atomic(addr);
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
SetPageUptodate(page);
|
||||
unlock_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the contents of the page into file items. Data integrity syncs
|
||||
* will later write the dirty segment to the device.
|
||||
*
|
||||
* XXX zeroing regions of data items?
|
||||
* XXX wbc counters?
|
||||
* XXX reserve space so dirty item doesn't get enospc -- our "delalloc"?
|
||||
*/
|
||||
static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
DECLARE_SCOUTFS_ITEM_REF(ref);
|
||||
struct scoutfs_key key;
|
||||
struct data_region dr;
|
||||
void *addr;
|
||||
u64 pos;
|
||||
int ret;
|
||||
|
||||
set_page_writeback(page);
|
||||
|
||||
for_each_data_region(&dr, page, pos) {
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DATA_KEY,
|
||||
dr.item_key);
|
||||
|
||||
ret = scoutfs_dirty_item(sb, &key, SCOUTFS_MAX_ITEM_LEN, &ref);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
addr = kmap_atomic(page);
|
||||
memcpy(ref.val + dr.item_off, addr + dr.page_off, dr.len);
|
||||
kunmap_atomic(addr);
|
||||
|
||||
scoutfs_put_ref(&ref);
|
||||
|
||||
}
|
||||
|
||||
scoutfs_put_ref(&ref);
|
||||
|
||||
if (ret) {
|
||||
SetPageError(page);
|
||||
mapping_set_error(&inode->i_data, ret);
|
||||
}
|
||||
|
||||
end_page_writeback(page);
|
||||
unlock_page(page);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scoutfs_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata)
|
||||
{
|
||||
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
|
||||
struct page *page;
|
||||
|
||||
page = grab_cache_page_write_begin(mapping, index, flags);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
*pagep = page;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int scoutfs_write_end(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned off;
|
||||
|
||||
off = pos & (PAGE_CACHE_SIZE - 1);
|
||||
|
||||
/* zero the stale part of the page if we did a short copy */
|
||||
if (copied < len)
|
||||
zero_user_segment(page, off + copied, len);
|
||||
|
||||
if (pos + copied > inode->i_size)
|
||||
i_size_write(inode, pos + copied);
|
||||
|
||||
if (!PageUptodate(page))
|
||||
SetPageUptodate(page);
|
||||
set_page_dirty(page);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
const struct address_space_operations scoutfs_file_aops = {
|
||||
.readpage = scoutfs_readpage,
|
||||
.writepage = scoutfs_writepage,
|
||||
.write_begin = scoutfs_write_begin,
|
||||
.write_end = scoutfs_write_end,
|
||||
};
|
||||
|
||||
const struct file_operations scoutfs_file_fops = {
|
||||
.read = do_sync_read,
|
||||
.write = do_sync_write,
|
||||
.aio_read = generic_file_aio_read,
|
||||
.aio_write = generic_file_aio_write,
|
||||
};
|
||||
7
kmod/src/filerw.h
Normal file
7
kmod/src/filerw.h
Normal file
@@ -0,0 +1,7 @@
|
||||
#ifndef _SCOUTFS_FILERW_H_
|
||||
#define _SCOUTFS_FILERW_H_
|
||||
|
||||
extern const struct address_space_operations scoutfs_file_aops;
|
||||
extern const struct file_operations scoutfs_file_fops;
|
||||
|
||||
#endif
|
||||
@@ -100,8 +100,14 @@ struct scoutfs_key {
|
||||
|
||||
#define SCOUTFS_ROOT_INO 1
|
||||
|
||||
#define SCOUTFS_INODE_KEY 128
|
||||
#define SCOUTFS_DIRENT_KEY 192
|
||||
/*
|
||||
* Currently we sort keys by the numeric value of the types, but that
|
||||
* isn't necessary. We could have an arbitrary sort order. So we don't
|
||||
* have to stress about cleverly allocating the types.
|
||||
*/
|
||||
#define SCOUTFS_INODE_KEY 1
|
||||
#define SCOUTFS_DIRENT_KEY 2
|
||||
#define SCOUTFS_DATA_KEY 3
|
||||
|
||||
struct scoutfs_ring_map_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
@@ -203,6 +209,13 @@ struct scoutfs_item {
|
||||
__le32 skip_next[0];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Item size caps item file data item length so that they fit in checksummed
|
||||
* 4k blocks with a bit of expansion room.
|
||||
*/
|
||||
#define SCOUTFS_MAX_ITEM_LEN \
|
||||
(SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_block_header) - 32)
|
||||
|
||||
struct scoutfs_timespec {
|
||||
__le64 sec;
|
||||
__le32 nsec;
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "inode.h"
|
||||
#include "segment.h"
|
||||
#include "dir.h"
|
||||
#include "filerw.h"
|
||||
|
||||
/*
|
||||
* XXX
|
||||
@@ -68,10 +69,25 @@ void scoutfs_destroy_inode(struct inode *inode)
|
||||
static void set_inode_ops(struct inode *inode)
|
||||
{
|
||||
switch (inode->i_mode & S_IFMT) {
|
||||
/*
|
||||
* I guess we add a reg.c for regular files? Or pagecache.c?
|
||||
* I guess that makes more sense.
|
||||
*
|
||||
* - page dirtying makes sure there's a dirty item
|
||||
* - sync writes back page cache pages
|
||||
* - writepage copies to dirty item
|
||||
* - crc calculated after copying
|
||||
* - pages can be pretty large
|
||||
* - tail items can be partial?
|
||||
* - tracing all over the place
|
||||
* - maybe just less than 4k is the answer?
|
||||
* - so allocation pulls the value back
|
||||
* - probably leave some overhead for header growth
|
||||
*/
|
||||
case S_IFREG:
|
||||
// inode->i_mapping->a_ops = &scoutfs_file_aops;
|
||||
inode->i_mapping->a_ops = &scoutfs_file_aops;
|
||||
// inode->i_op = &scoutfs_file_iops;
|
||||
// inode->i_fop = &scoutfs_file_fops;
|
||||
inode->i_fop = &scoutfs_file_fops;
|
||||
break;
|
||||
case S_IFDIR:
|
||||
inode->i_op = &scoutfs_dir_iops;
|
||||
|
||||
Reference in New Issue
Block a user