diff --git a/kmod/src/data.c b/kmod/src/data.c
index cee53511..ce31e642 100644
--- a/kmod/src/data.c
+++ b/kmod/src/data.c
@@ -732,9 +732,9 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
 	if (ext.len)
 		trace_scoutfs_data_get_block_intersection(sb, &ext);
 
-	/* fail read and write if it's offline and we're not staging */
-	if ((ext.flags & SEF_OFFLINE) && !si->staging) {
-		ret = -EINVAL;
+	/* non-staging callers should have waited on offline blocks */
+	if (WARN_ON_ONCE((ext.flags & SEF_OFFLINE) && !si->staging)) {
+		ret = -EIO;
 		goto out;
 	}
 
@@ -780,14 +780,28 @@ out:
 /*
  * This is almost never used.  We can't block on a cluster lock while
  * holding the page lock because lock invalidation gets the page lock
- * while blocking locks.  If we can't use an existing lock then we drop
- * the page lock and try again.
+ * while blocking locks.  If a non blocking lock attempt fails we unlock
+ * the page and block acquiring the lock.  We unlocked the page so it
+ * could have been truncated away, or whatever, so we return
+ * AOP_TRUNCATED_PAGE to have the caller try again.
+ *
+ * A similar process happens if we try to read from an offline extent
+ * that a caller hasn't already waited for.  Instead of blocking
+ * acquiring the lock we block waiting for the offline extent.  The page
+ * lock protects the page from release while we're checking and
+ * reading the extent.
+ *
+ * We can return errors from locking and checking offline extents.  The
+ * page is unlocked if we return an error.
  */
 static int scoutfs_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = file->f_inode;
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
+	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
+	DECLARE_DATA_WAIT(dw);
 	int flags;
 	int ret;
 
@@ -809,27 +823,77 @@ static int scoutfs_readpage(struct file *file, struct page *page)
 		return ret;
 	}
 
+	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
+		ret = scoutfs_data_wait_check(inode, page_offset(page),
+					      PAGE_CACHE_SIZE, SEF_OFFLINE,
+					      SCOUTFS_IOC_DWO_READ, &dw,
+					      inode_lock);
+		if (ret != 0) {
+			unlock_page(page);
+			scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+			scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
+		}
+		if (ret > 0) {
+			ret = scoutfs_data_wait(inode, &dw);
+			if (ret == 0)
+				ret = AOP_TRUNCATED_PAGE;
+		}
+		if (ret != 0)
+			return ret;
+	}
+
 	ret = mpage_readpage(page, scoutfs_get_block);
+
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
+	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+
 	return ret;
 }
 
+/*
+ * This is used for opportunistic read-ahead which can throw the pages
+ * away if it needs to.  If the caller didn't deal with offline extents
+ * then we drop those pages rather than trying to wait.  Whoever is
+ * staging offline extents should be doing it in enormous chunks so that
+ * read-ahead can ramp up within each staged region.  The check for
+ * offline extents is cheap when the inode has no offline extents.
+ */
 static int scoutfs_readpages(struct file *file, struct address_space *mapping,
 			     struct list_head *pages, unsigned nr_pages)
 {
 	struct inode *inode = file->f_inode;
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
+	struct page *page;
+	struct page *tmp;
 	int ret;
 
 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
 	if (ret)
-		return ret;
+		goto out;
+
+	list_for_each_entry_safe(page, tmp, pages, lru) {
+		ret = scoutfs_data_wait_check(inode, page_offset(page),
+					      PAGE_CACHE_SIZE, SEF_OFFLINE,
+					      SCOUTFS_IOC_DWO_READ, NULL,
+					      inode_lock);
+		if (ret < 0)
+			goto out;
+		if (ret > 0) {
+			list_del(&page->lru);
+			page_cache_release(page);
+			if (--nr_pages == 0) {
+				ret = 0;
+				goto out;
+			}
+		}
+	}
 
 	ret = mpage_readpages(mapping, pages, nr_pages, scoutfs_get_block);
-
+out:
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
+	BUG_ON(!list_empty(pages));
 	return ret;
 }
 
@@ -1249,6 +1313,239 @@ out:
 	return ret;
 }
 
+/*
+ * Insert a new waiter.  This supports multiple tasks waiting for the
+ * same ino and iblock by also comparing waiters by their addresses.
+ */
+static void insert_offline_waiting(struct rb_root *root,
+				   struct scoutfs_data_wait *ins)
+{
+	struct rb_node **node = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct scoutfs_data_wait *dw;
+	int cmp;
+
+	while (*node) {
+		parent = *node;
+		dw = rb_entry(*node, struct scoutfs_data_wait, node);
+
+		cmp = scoutfs_cmp_u64s(ins->ino, dw->ino) ?:
+		      scoutfs_cmp_u64s(ins->iblock, dw->iblock) ?:
+		      scoutfs_cmp(ins, dw);
+		if (cmp < 0)
+			node = &(*node)->rb_left;
+		else
+			node = &(*node)->rb_right;
+	}
+
+	rb_link_node(&ins->node, parent, node);
+	rb_insert_color(&ins->node, root);
+}
+
+static struct scoutfs_data_wait *next_data_wait(struct rb_root *root, u64 ino,
+						u64 iblock)
+{
+	struct rb_node **node = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct scoutfs_data_wait *next = NULL;
+	struct scoutfs_data_wait *dw;
+	int cmp;
+
+	while (*node) {
+		parent = *node;
+		dw = rb_entry(*node, struct scoutfs_data_wait, node);
+
+		/* go left when ino/iblock are equal to get first task */
+		cmp = scoutfs_cmp_u64s(ino, dw->ino) ?:
+		      scoutfs_cmp_u64s(iblock, dw->iblock);
+		if (cmp <= 0) {
+			node = &(*node)->rb_left;
+			next = dw;
+		} else if (cmp > 0) {
+			node = &(*node)->rb_right;
+		}
+	}
+
+	return next;
+}
+
+static struct scoutfs_data_wait *dw_next(struct scoutfs_data_wait *dw)
+{
+	struct rb_node *node = rb_next(&dw->node);
+	if (node)
+		return container_of(node, struct scoutfs_data_wait, node);
+	return NULL;
+}
+
+/*
+ * Check if we should wait by looking for extents whose flags match.
+ * Returns 0 if no extents were found or any error encountered.
+ *
+ * The caller must have locked the extents before calling, both across
+ * mounts and within this mount.
+ *
+ * Returns 1 if any file extents in the caller's region matched.  If the
+ * wait struct is provided then it is initialized to be woken when the
+ * extents change after the caller unlocks after the check.  The caller
+ * must come through _data_wait() to clean up the wait struct if we set
+ * it up.
+ */
+int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
+			    u8 sef, u8 op, struct scoutfs_data_wait *dw,
+			    struct scoutfs_lock *lock)
+{
+	struct super_block *sb = inode->i_sb;
+	DECLARE_DATA_WAIT_ROOT(sb, rt);
+	DECLARE_DATA_WAITQ(inode, wq);
+	struct scoutfs_extent ext = {0,};
+	u64 iblock;
+	u64 last_block;
+	u64 on;
+	u64 off;
+	int ret = 0;
+
+	if (WARN_ON_ONCE(sef & SEF_UNKNOWN) ||
+	    WARN_ON_ONCE(op & SCOUTFS_IOC_DWO_UNKNOWN) ||
+	    WARN_ON_ONCE(dw && !RB_EMPTY_NODE(&dw->node)) ||
+	    WARN_ON_ONCE(pos + len < pos)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if ((sef & SEF_OFFLINE)) {
+		scoutfs_inode_get_onoff(inode, &on, &off);
+		if (off == 0) {
+			ret = 0;
+			goto out;
+		}
+	}
+
+	iblock = pos >> SCOUTFS_BLOCK_SHIFT;
+	last_block = (pos + len - 1) >> SCOUTFS_BLOCK_SHIFT;
+
+	while(iblock <= last_block) {
+		scoutfs_extent_init(&ext, SCOUTFS_FILE_EXTENT_TYPE,
+				    scoutfs_ino(inode), iblock, 1, 0, 0);
+		ret = scoutfs_extent_next(sb, data_extent_io, &ext, lock);
+		if (ret < 0) {
+			if (ret == -ENOENT)
+				ret = 0;
+			break;
+		}
+
+		if (ext.start > last_block)
+			break;
+
+		if (sef & ext.flags) {
+			if (dw) {
+				dw->chg = atomic64_read(&wq->changed);
+				dw->ino = scoutfs_ino(inode);
+				dw->iblock = max(iblock, ext.start);
+				dw->op = op;
+
+				spin_lock(&rt->lock);
+				insert_offline_waiting(&rt->root, dw);
+				spin_unlock(&rt->lock);
+			}
+
+			ret = 1;
+			break;
+		}
+
+		iblock = ext.start + ext.len;
+	}
+
+out:
+	trace_scoutfs_data_wait_check(sb, scoutfs_ino(inode), pos, len,
+				      sef, op, ext.start, ext.len, ext.flags,
+				      ret);
+	return ret;
+}
+
+bool scoutfs_data_wait_found(struct scoutfs_data_wait *dw)
+{
+	return !RB_EMPTY_NODE(&dw->node);
+}
+
+int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos, u8 sef,
+				u8 op, struct scoutfs_data_wait *dw,
+				struct scoutfs_lock *lock)
+{
+	unsigned long i;
+	int ret = 0;
+
+	for (i = 0; i < nr_segs; i++) {
+		if (iov[i].iov_len == 0)
+			continue;
+
+		ret = scoutfs_data_wait_check(inode, pos, iov[i].iov_len, sef,
+					      op, dw, lock);
+		if (ret != 0)
+			break;
+
+		pos += iov[i].iov_len;
+	}
+
+	return ret;
+}
+
+int scoutfs_data_wait(struct inode *inode, struct scoutfs_data_wait *dw)
+{
+	DECLARE_DATA_WAIT_ROOT(inode->i_sb, rt);
+	DECLARE_DATA_WAITQ(inode, wq);
+	int ret;
+
+	ret = wait_event_interruptible(wq->waitq,
+					atomic64_read(&wq->changed) != dw->chg);
+
+	spin_lock(&rt->lock);
+	rb_erase(&dw->node, &rt->root);
+	RB_CLEAR_NODE(&dw->node);
+	spin_unlock(&rt->lock);
+
+	return ret;
+}
+
+void scoutfs_data_wait_changed(struct inode *inode)
+{
+	DECLARE_DATA_WAITQ(inode, wq);
+
+	atomic64_inc(&wq->changed);
+	wake_up(&wq->waitq);
+}
+
+int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
+			 struct scoutfs_ioctl_data_waiting_entry *dwe,
+			 unsigned int nr)
+{
+	DECLARE_DATA_WAIT_ROOT(sb, rt);
+	struct scoutfs_data_wait *dw;
+	int ret = 0;
+
+	spin_lock(&rt->lock);
+
+	dw = next_data_wait(&rt->root, ino, iblock);
+	while (dw && ret < nr) {
+
+		dwe->ino = dw->ino;
+		dwe->iblock = dw->iblock;
+		dwe->op = dw->op;
+
+		while ((dw = dw_next(dw)) &&
+		       (dw->ino == dwe->ino && dw->iblock == dwe->iblock)) {
+			dwe->op |= dw->op;
+		}
+
+		dwe++;
+		ret++;
+	}
+
+	spin_unlock(&rt->lock);
+
+	return ret;
+}
+
 const struct address_space_operations scoutfs_file_aops = {
 	.readpage		= scoutfs_readpage,
 	.readpages		= scoutfs_readpages,
diff --git a/kmod/src/data.h b/kmod/src/data.h
index bd9f84fa..d45114da 100644
--- a/kmod/src/data.h
+++ b/kmod/src/data.h
@@ -1,6 +1,41 @@
 #ifndef _SCOUTFS_FILERW_H_
 #define _SCOUTFS_FILERW_H_
 
+struct scoutfs_lock;
+struct scoutfs_ioctl_data_waiting_entry;
+
+struct scoutfs_data_wait_root {
+	spinlock_t lock;
+	struct rb_root root;
+};
+
+#define DECLARE_DATA_WAIT_ROOT(sb, nm) \
+	struct scoutfs_data_wait_root *nm = &SCOUTFS_SB(sb)->data_wait_root
+
+struct scoutfs_data_waitq {
+	atomic64_t changed;
+	wait_queue_head_t waitq;
+};
+
+#define DECLARE_DATA_WAITQ(in, nm) \
+	struct scoutfs_data_waitq *nm = &SCOUTFS_I(in)->data_waitq
+
+/*
+ * Tasks can wait for data extents.
+ */
+struct scoutfs_data_wait {
+	struct rb_node node;
+	u64 chg;
+	u64 ino;
+	u64 iblock;
+	u8 op;
+};
+
+#define DECLARE_DATA_WAIT(nm)						\
+	struct scoutfs_data_wait nm = {					\
+		.node.__rb_parent_color = (unsigned long)(&nm.node),	\
+	}
+
 extern const struct address_space_operations scoutfs_file_aops;
 extern const struct file_operations scoutfs_file_fops;
 
@@ -11,6 +46,21 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			u64 start, u64 len);
 long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len);
 
+int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
+			    u8 sef, u8 op, struct scoutfs_data_wait *ow,
+			    struct scoutfs_lock *lock);
+int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos, u8 sef,
+				u8 op, struct scoutfs_data_wait *ow,
+				struct scoutfs_lock *lock);
+bool scoutfs_data_wait_found(struct scoutfs_data_wait *ow);
+int scoutfs_data_wait(struct inode *inode,
+			      struct scoutfs_data_wait *ow);
+void scoutfs_data_wait_changed(struct inode *inode);
+int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
+			 struct scoutfs_ioctl_data_waiting_entry *dwe,
+			 unsigned int nr);
+
 int scoutfs_data_setup(struct super_block *sb);
 void scoutfs_data_destroy(struct super_block *sb);
 
diff --git a/kmod/src/file.c b/kmod/src/file.c
index f78e5721..765e5f1e 100644
--- a/kmod/src/file.c
+++ b/kmod/src/file.c
@@ -39,15 +39,40 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
 	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
+	DECLARE_DATA_WAIT(dw);
 	int ret;
 
+retry:
 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
-	if (ret == 0) {
-		scoutfs_per_task_add(&si->pt_data_lock, &pt_ent, inode_lock);
-		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
-		scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
-		scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
+	if (ret)
+		goto out;
+
+	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
+		/* protect checked extents from stage/release */
+		mutex_lock(&inode->i_mutex);
+		atomic_inc(&inode->i_dio_count);
+		mutex_unlock(&inode->i_mutex);
+
+		ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
+						  SEF_OFFLINE,
+						  SCOUTFS_IOC_DWO_READ,
+						  &dw, inode_lock);
+		if (ret != 0)
+			goto out;
+	}
+
+	ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+
+out:
+	if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
+		inode_dio_done(inode);
+	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
+
+	if (scoutfs_data_wait_found(&dw)) {
+		ret = scoutfs_data_wait(inode, &dw);
+		if (ret == 0)
+			goto retry;
 	}
 
 	return ret;
@@ -62,11 +87,13 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
 	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
+	DECLARE_DATA_WAIT(dw);
 	int ret;
 
 	if (iocb->ki_left == 0) /* Does this even happen? */
 		return 0;
 
+retry:
 	mutex_lock(&inode->i_mutex);
 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
@@ -77,16 +104,31 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (ret)
 		goto out;
 
-	scoutfs_per_task_add(&si->pt_data_lock, &pt_ent, inode_lock);
+	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
+		/* data_version is per inode, whole file must be online */
+		ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
+					      SEF_OFFLINE,
+					      SCOUTFS_IOC_DWO_WRITE,
+					      &dw, inode_lock);
+		if (ret != 0)
+			goto out;
+	}
 
 	/* XXX: remove SUID bit */
 
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+
 out:
 	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_WRITE);
 	mutex_unlock(&inode->i_mutex);
 
+	if (scoutfs_data_wait_found(&dw)) {
+		ret = scoutfs_data_wait(inode, &dw);
+		if (ret == 0)
+			goto retry;
+	}
+
 	if (ret > 0 || ret == -EIOCBQUEUED) {
 		ssize_t err;
 
diff --git a/kmod/src/format.h b/kmod/src/format.h
index 936b0717..9fcbc082 100644
--- a/kmod/src/format.h
+++ b/kmod/src/format.h
@@ -390,8 +390,9 @@ struct scoutfs_file_extent {
 	__u8 flags;
 } __packed;
 
-#define SEF_OFFLINE	0x1
-#define SEF_UNWRITTEN	0x2
+#define SEF_OFFLINE	(1 << 0)
+#define SEF_UNWRITTEN	(1 << 1)
+#define SEF_UNKNOWN	(U8_MAX << 2)
 
 /*
  * The first xattr part item has a header that describes the xattr.  The
diff --git a/kmod/src/inode.c b/kmod/src/inode.c
index b9e6fcf9..3bdc756a 100644
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -70,6 +70,8 @@ static void scoutfs_inode_ctor(void *obj)
 	seqcount_init(&ci->seqcount);
 	ci->staging = false;
 	scoutfs_per_task_init(&ci->pt_data_lock);
+	atomic64_set(&ci->data_waitq.changed, 0);
+	init_waitqueue_head(&ci->data_waitq.waitq);
 	init_rwsem(&ci->xattr_rwsem);
 	RB_CLEAR_NODE(&ci->writeback_node);
 	spin_lock_init(&ci->ino_alloc.lock);
@@ -340,6 +342,9 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
 	if (ret)
 		return ret;
 
+	if (new_size != i_size_read(inode))
+		scoutfs_inode_inc_data_version(inode);
+
 	truncate_setsize(inode, new_size);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	if (truncate)
@@ -394,11 +399,22 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock)
 	return ret ? ret : err;
 }
 
+/*
+ * If we're changing the file size than the contents of the file are
+ * changing and we increment the data_version.  This would prevent
+ * staging because the data_version is per-inode today, not per-extent.
+ * So if there are any offline extents within the new size then we need
+ * to stage them before we truncate.  And this is called with the
+ * i_mutex held which would prevent staging so we release it and
+ * re-acquire it.  Ideally we'd fix this so that we can acquire the lock
+ * instead of the caller.
+ */
 int scoutfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
+	DECLARE_DATA_WAIT(dw);
 	LIST_HEAD(ind_locks);
 	bool truncate = false;
 	u64 attr_size;
@@ -406,6 +422,7 @@ int scoutfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	trace_scoutfs_setattr(dentry, attr);
 
+retry:
 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
 	if (ret)
@@ -427,6 +444,28 @@ int scoutfs_setattr(struct dentry *dentry, struct iattr *attr)
 		if (ret)
 			goto out;
 
+		/* data_version is per inode, all must be online */
+		if (attr_size > 0 && attr_size != i_size_read(inode)) {
+			ret = scoutfs_data_wait_check(inode, 0, attr_size,
+						SEF_OFFLINE,
+						SCOUTFS_IOC_DWO_CHANGE_SIZE,
+						&dw, lock);
+			if (ret < 0)
+				goto out;
+			if (scoutfs_data_wait_found(&dw)) {
+				scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+
+				/* XXX callee locks instead? */
+				mutex_unlock(&inode->i_mutex);
+				ret = scoutfs_data_wait(inode, &dw);
+				mutex_lock(&inode->i_mutex);
+
+				if (ret == 0)
+					goto retry;
+				goto out;
+			}
+		}
+
 		/* truncating to current size truncates extents past size */
 		truncate = i_size_read(inode) >= attr_size;
 
@@ -532,6 +571,10 @@ void scoutfs_inode_add_onoff(struct inode *inode, s64 on, s64 off)
 		write_seqcount_end(&si->seqcount);
 		preempt_enable();
 	}
+
+	/* any time offline extents decreased we try and wake waiters */
+	if (inode && off < 0)
+		scoutfs_data_wait_changed(inode);
 }
 
 static u64 read_seqcount_u64(struct inode *inode, u64 *val)
diff --git a/kmod/src/inode.h b/kmod/src/inode.h
index 7ae34de8..0ccd0184 100644
--- a/kmod/src/inode.h
+++ b/kmod/src/inode.h
@@ -6,6 +6,7 @@
 #include "per_task.h"
 #include "count.h"
 #include "format.h"
+#include "data.h"
 
 struct scoutfs_lock;
 
@@ -48,8 +49,10 @@ struct scoutfs_inode_info {
 	seqcount_t seqcount;
 	bool staging;			/* holder of i_mutex is staging */
 	struct scoutfs_per_task pt_data_lock;
+	struct scoutfs_data_waitq data_waitq;
 	struct rw_semaphore xattr_rwsem;
 	struct rb_node writeback_node;
+
 	struct inode inode;
 };
 
diff --git a/kmod/src/ioctl.c b/kmod/src/ioctl.c
index 2c1fa74e..738173e9 100644
--- a/kmod/src/ioctl.c
+++ b/kmod/src/ioctl.c
@@ -541,6 +541,56 @@ static long scoutfs_ioc_item_cache_keys(struct file *file, unsigned long arg)
 	return ret ?: total;
 }
 
+static bool inc_wrapped(u64 *ino, u64 *iblock)
+{
+	return (++(*iblock) == 0) && (++(*ino) == 0);
+}
+
+static long scoutfs_ioc_data_waiting(struct file *file, unsigned long arg)
+{
+	struct super_block *sb = file_inode(file)->i_sb;
+	struct scoutfs_ioctl_data_waiting idw;
+	struct scoutfs_ioctl_data_waiting_entry __user *udwe;
+	struct scoutfs_ioctl_data_waiting_entry dwe[16];
+	unsigned int nr;
+	int total;
+	int ret;
+
+	if (copy_from_user(&idw, (void __user *)arg, sizeof(idw)))
+		return -EFAULT;
+
+	if (idw.flags & SCOUTFS_IOC_DATA_WAITING_FLAGS_UNKNOWN)
+		return -EINVAL;
+
+	udwe = (void __user *)(long)idw.ents_ptr;
+	total = 0;
+	ret = 0;
+	while (idw.ents_nr && !inc_wrapped(&idw.after_ino, &idw.after_iblock)) {
+		nr = min_t(size_t, idw.ents_nr, ARRAY_SIZE(dwe));
+
+		ret = scoutfs_data_waiting(sb, idw.after_ino, idw.after_iblock,
+					   dwe, nr);
+		BUG_ON(ret > nr); /* stack overflow \o/ */
+		if (ret <= 0)
+			break;
+
+		if (copy_to_user(udwe, dwe, ret * sizeof(dwe[0]))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		idw.after_ino = dwe[ret - 1].ino;
+		idw.after_iblock = dwe[ret - 1].iblock;
+
+		udwe += ret;
+		idw.ents_nr -= ret;
+		total += ret;
+		ret = 0;
+	}
+
+	return ret ?: total;
+}
+
 long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -556,6 +606,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return scoutfs_ioc_stat_more(file, arg);
 	case SCOUTFS_IOC_ITEM_CACHE_KEYS:
 		return scoutfs_ioc_item_cache_keys(file, arg);
+	case SCOUTFS_IOC_DATA_WAITING:
+		return scoutfs_ioc_data_waiting(file, arg);
 	}
 
 	return -ENOTTY;
diff --git a/kmod/src/ioctl.h b/kmod/src/ioctl.h
index 915a130b..1b592522 100644
--- a/kmod/src/ioctl.h
+++ b/kmod/src/ioctl.h
@@ -229,4 +229,28 @@ enum {
 #define SCOUTFS_IOC_ITEM_CACHE_KEYS _IOW(SCOUTFS_IOCTL_MAGIC, 8, \
 					 struct scoutfs_ioctl_item_cache_keys)
 
+struct scoutfs_ioctl_data_waiting_entry {
+	__u64 ino;
+	__u64 iblock;
+	__u8 op;
+} __packed;
+
+#define SCOUTFS_IOC_DWO_READ		(1 << 0)
+#define SCOUTFS_IOC_DWO_WRITE		(1 << 1)
+#define SCOUTFS_IOC_DWO_CHANGE_SIZE	(1 << 2)
+#define SCOUTFS_IOC_DWO_UNKNOWN		(U8_MAX << 3)
+
+struct scoutfs_ioctl_data_waiting {
+	__u64 flags;
+	__u64 after_ino;
+	__u64 after_iblock;
+	__u64 ents_ptr;
+	__u16 ents_nr;
+} __packed;
+
+#define SCOUTFS_IOC_DATA_WAITING_FLAGS_UNKNOWN		(U8_MAX << 0)
+
+#define SCOUTFS_IOC_DATA_WAITING _IOW(SCOUTFS_IOCTL_MAGIC, 9, \
+				      struct scoutfs_ioctl_data_waiting)
+
 #endif
diff --git a/kmod/src/lock.c b/kmod/src/lock.c
index d268dd82..6697ec66 100644
--- a/kmod/src/lock.c
+++ b/kmod/src/lock.c
@@ -32,6 +32,7 @@
 #include "triggers.h"
 #include "tseq.h"
 #include "client.h"
+#include "data.h"
 
 /*
  * scoutfs uses a lock service to manage item cache consistency between
@@ -126,8 +127,10 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
 	inode = scoutfs_ilookup(sb, ino);
 	if (inode) {
 		scoutfs_inc_counter(sb, lock_invalidate_inode);
-		if (S_ISREG(inode->i_mode))
+		if (S_ISREG(inode->i_mode)) {
 			truncate_inode_pages(inode->i_mapping, 0);
+			scoutfs_data_wait_changed(inode);
+		}
 		iput(inode);
 	}
 }
diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h
index 5e85305e..08d4572b 100644
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -514,6 +514,45 @@ TRACE_EVENT(scoutfs_data_truncate_items,
 		  __entry->iblock, __entry->last, __entry->offline)
 );
 
+TRACE_EVENT(scoutfs_data_wait_check,
+	TP_PROTO(struct super_block *sb, __u64 ino, __u64 pos, __u64 len,
+		 __u8 sef, __u8 op, __u64 ext_start, __u64 ext_len,
+		 __u8 ext_flags, int ret),
+
+	TP_ARGS(sb, ino, pos, len, sef, op, ext_start, ext_len, ext_flags, ret),
+
+	TP_STRUCT__entry(
+		__field(__u64, fsid)
+		__field(__u64, ino)
+		__field(__u64, pos)
+		__field(__u64, len)
+		__field(__u8, sef)
+		__field(__u8, op)
+		__field(__u64, ext_start)
+		__field(__u64, ext_len)
+		__field(__u8, ext_flags)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->fsid = FSID_ARG(sb);
+		__entry->ino = ino;
+		__entry->pos = pos;
+		__entry->len = len;
+		__entry->sef = sef;
+		__entry->op = op;
+		__entry->ext_start = ext_start;
+		__entry->ext_len = ext_len;
+		__entry->ext_flags = ext_flags;
+		__entry->ret = ret;
+	),
+
+	TP_printk(FSID_FMT" ino %llu pos %llu len %llu sef 0x%x op 0x%x ext_start %llu ext_len %llu ext_flags 0x%x ret %d",
+			__entry->fsid, __entry->ino, __entry->pos, __entry->len,
+			__entry->sef, __entry->op, __entry->ext_start,
+			__entry->ext_len, __entry->ext_flags, __entry->ret)
+);
+
 TRACE_EVENT(scoutfs_sync_fs,
 	TP_PROTO(struct super_block *sb, int wait),
 
diff --git a/kmod/src/super.c b/kmod/src/super.c
index 02f38934..ff39a6fb 100644
--- a/kmod/src/super.c
+++ b/kmod/src/super.c
@@ -339,6 +339,8 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	spin_lock_init(&sbi->next_ino_lock);
 	init_waitqueue_head(&sbi->trans_hold_wq);
+	spin_lock_init(&sbi->data_wait_root.lock);
+	sbi->data_wait_root.root = RB_ROOT;
 	spin_lock_init(&sbi->trans_write_lock);
 	INIT_DELAYED_WORK(&sbi->trans_write_work, scoutfs_trans_write_func);
 	init_waitqueue_head(&sbi->trans_write_wq);
diff --git a/kmod/src/super.h b/kmod/src/super.h
index e24f4d9a..6dd03ac6 100644
--- a/kmod/src/super.h
+++ b/kmod/src/super.h
@@ -6,6 +6,7 @@
 
 #include "format.h"
 #include "options.h"
+#include "data.h"
 
 struct scoutfs_counters;
 struct scoutfs_triggers;
@@ -49,6 +50,9 @@ struct scoutfs_sb_info {
 	wait_queue_head_t trans_hold_wq;
 	struct task_struct *trans_task;
 
+	/* tracks tasks waiting for data extents */
+	struct scoutfs_data_wait_root data_wait_root;
+
 	spinlock_t trans_write_lock;
 	u64 trans_write_count;
 	u64 trans_seq;