diff --git a/kmod/src/data.c b/kmod/src/data.c index 7903e8d7..7181960f 100644 --- a/kmod/src/data.c +++ b/kmod/src/data.c @@ -1515,6 +1515,93 @@ out: return ret; } +/* + * Punch holes in offline extents. This is a very specific tool that + * only does one job: it converts extents from offline to sparse. It + * returns an error if it encounters an extent that isn't offline or has + * a block mapping. It ignores i_size completely; it does not test it, + * and does not update it. + * + * The caller has the inode locked in the vfs and performed basic sanity + * checks. We manage transactions and the extent_sem which is ordered + * inside the transaction. + */ +int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version, + struct scoutfs_lock *lock) +{ + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + struct data_ext_args args = { + .ino = scoutfs_ino(inode), + .inode = inode, + .lock = lock, + }; + struct scoutfs_extent ext; + LIST_HEAD(ind_locks); + int ret; + int i; + + if (WARN_ON_ONCE(iblock > last)) { + ret = -EINVAL; + goto out; + } + + /* idiomatic to call start,last with 0,~0, clamp last to last possible */ + last = min(last, SCOUTFS_BLOCK_SM_MAX); + + ret = 0; + while (iblock <= last) { + ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false) ?: + scoutfs_dirty_inode_item(inode, lock); + if (ret < 0) + break; + + down_write(&si->extent_sem); + + for (i = 0; i < 32 && (iblock <= last); i++) { + ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &ext); + if (ret == -ENOENT || ext.start > iblock) { + iblock = last + 1; + ret = 0; + break; + } + + if (ext.map) { + ret = -EINVAL; + break; + } + + if (ext.flags & SEF_OFFLINE) { + if (iblock > ext.start) { + ext.len -= iblock - ext.start; + ext.start = iblock; + } + ext.len = min(ext.len, last - ext.start + 1); + ext.flags &= ~SEF_OFFLINE; + + ret = scoutfs_ext_set(sb, &data_ext_ops, &args, + ext.start, ext.len, ext.map, ext.flags); + if (ret < 0) + break; + } + + iblock = ext.start + ext.len; + } + + up_write(&si->extent_sem); + + scoutfs_update_inode_item(inode, lock, &ind_locks); + scoutfs_release_trans(sb); + scoutfs_inode_index_unlock(sb, &ind_locks); + + if (ret < 0) + break; + } + +out: + return ret; +} + /* * This copies to userspace :/ */ diff --git a/kmod/src/data.h b/kmod/src/data.h index a34854eb..8b2f5f60 100644 --- a/kmod/src/data.h +++ b/kmod/src/data.h @@ -57,6 +57,8 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size, int scoutfs_data_move_blocks(struct inode *from, u64 from_off, u64 byte_len, struct inode *to, u64 to_off, bool to_stage, u64 data_version); +int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version, + struct scoutfs_lock *lock); int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len, u8 sef, u8 op, struct scoutfs_data_wait *ow, diff --git a/kmod/src/ioctl.c b/kmod/src/ioctl.c index fea7aae3..064321ed 100644 --- a/kmod/src/ioctl.c +++ b/kmod/src/ioctl.c @@ -1668,6 +1668,78 @@ out: return ret; } +static long scoutfs_ioc_punch_offline(struct file *file, unsigned long arg) +{ + struct inode *inode = file_inode(file); + struct super_block *sb = inode->i_sb; + struct scoutfs_ioctl_punch_offline __user *upo = (void __user *)arg; + struct scoutfs_ioctl_punch_offline po; + struct scoutfs_lock *lock = NULL; + u64 iblock; + u64 last; + u64 tmp; + int ret; + + if (copy_from_user(&po, upo, sizeof(po))) + return -EFAULT; + + if (po.len == 0) + return 0; + + if (check_add_overflow(po.offset, po.len - 1, &tmp) || + (po.offset & SCOUTFS_BLOCK_SM_MASK) || + (po.len & SCOUTFS_BLOCK_SM_MASK)) + return -EOVERFLOW; + + if (po.flags) + return -EINVAL; + + ret = mnt_want_write_file(file); + if (ret < 0) + return ret; + + inode_lock(inode); + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, + SCOUTFS_LKF_REFRESH_INODE, inode, &lock); + if (ret) + goto out; + + if (!S_ISREG(inode->i_mode)) { + ret = -EINVAL; + goto out; + } + + if (!(file->f_mode & FMODE_WRITE)) { + ret = -EINVAL; + goto out; + } + + ret = inode_permission(KC_VFS_INIT_NS inode, MAY_WRITE); + if (ret < 0) + goto out; + + if (scoutfs_inode_data_version(inode) != po.data_version) { + ret = -ESTALE; + goto out; + } + + if ((ret = scoutfs_inode_check_retention(inode))) + goto out; + + iblock = po.offset >> SCOUTFS_BLOCK_SM_SHIFT; + last = (po.offset + po.len - 1) >> SCOUTFS_BLOCK_SM_SHIFT; + + ret = scoutfs_data_punch_offline(inode, iblock, last, po.data_version, lock); + +out: + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE); + inode_unlock(inode); + mnt_drop_write_file(file); + + return ret; +} + long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -1717,6 +1789,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return scoutfs_ioc_mod_quota_rule(file, arg, false); case SCOUTFS_IOC_READ_XATTR_INDEX: return scoutfs_ioc_read_xattr_index(file, arg); + case SCOUTFS_IOC_PUNCH_OFFLINE: + return scoutfs_ioc_punch_offline(file, arg); } return -ENOTTY; diff --git a/kmod/src/ioctl.h b/kmod/src/ioctl.h index 9f79839e..401a8d02 100644 --- a/kmod/src/ioctl.h +++ b/kmod/src/ioctl.h @@ -843,4 +843,32 @@ struct scoutfs_ioctl_read_xattr_index { #define SCOUTFS_IOC_READ_XATTR_INDEX \ _IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index) +/* + * This is a limited and specific version of hole punching. It's an + * archive layer operation that only converts unmapped offline extents + * into sparse extents. It is intended to be used when restoring sparse + * files after the initial creation set the entire file size offline. + * + * The offset and len fields are in units of bytes and must be aligned + * to the small (4KiB) block size. All regions of offline extents + * covered by the region will be converted into sparse online extents, + * including regions that straddle the boundaries of the region. Any + * existing sparse extents in the region are ignored. + * + * The data_version must match the inode or EINVAL is returned. The + * data_version is not modified by this operation. + * + * EINVAL is returned if any mapped extents are found in the region. If + * an error is returned then partial progress may have been made. + */ +struct scoutfs_ioctl_punch_offline { + __u64 offset; + __u64 len; + __u64 data_version; + __u64 flags; +}; + +#define SCOUTFS_IOC_PUNCH_OFFLINE \ + _IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline) + #endif