[lustre-devel] [PATCH 26/29] lustre: fallocate: Add punch mode to fallocate

James Simmons jsimmons at infradead.org
Sun Apr 25 13:08:33 PDT 2021


From: Arshad Hussain <arshad.hussain at aeoncomputing.com>

This patch adds fallocate(2) punch operation
(FALLOCATE_FL_PUNCH_HOLE) mode support for ldiskfs backend
OSD and for OSC/OST

WC-bug-id: https://jira.whamcloud.com/browse/LU-14160
Lustre-commit: cb037f305c64cd512 ("LU-14160 fallocate: Add punch mode to fallocate")
Signed-off-by: Arshad Hussain <arshad.hussain at aeoncomputing.com>
Signed-off-by: Mikhail Pershin <mpershin at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/40877
Reviewed-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-by: Bobi Jam <bobijam at hotmail.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 fs/lustre/include/cl_object.h |  1 -
 fs/lustre/llite/file.c        | 43 ++++++++++++++-------------
 fs/lustre/mdc/mdc_dev.c       |  3 ++
 fs/lustre/osc/osc_io.c        | 68 +++++++++++++++++++++++++------------------
 fs/lustre/osc/osc_request.c   |  7 -----
 5 files changed, 65 insertions(+), 57 deletions(-)

diff --git a/fs/lustre/include/cl_object.h b/fs/lustre/include/cl_object.h
index 3926aac..b69c04a 100644
--- a/fs/lustre/include/cl_object.h
+++ b/fs/lustre/include/cl_object.h
@@ -1876,7 +1876,6 @@ struct cl_io {
 			/* The following are used for fallocate(2) */
 			int			sa_falloc_mode;
 			loff_t			sa_falloc_offset;
-			loff_t			sa_falloc_len;
 			loff_t			sa_falloc_end;
 		} ci_setattr;
 		struct cl_data_version_io {
diff --git a/fs/lustre/llite/file.c b/fs/lustre/llite/file.c
index 15072bb1..c8f250c 100644
--- a/fs/lustre/llite/file.c
+++ b/fs/lustre/llite/file.c
@@ -4998,7 +4998,7 @@ int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len)
 	struct lu_env *env;
 	struct cl_io *io;
 	u16 refcheck;
-	int rc; loff_t sa_falloc_end;
+	int rc;
 	loff_t size = i_size_read(inode);
 
 	env = cl_env_get(&refcheck);
@@ -5011,34 +5011,32 @@ int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len)
 	io->u.ci_setattr.sa_parent_fid = lu_object_fid(&io->ci_obj->co_lu);
 	io->u.ci_setattr.sa_falloc_mode = mode;
 	io->u.ci_setattr.sa_falloc_offset = offset;
-	io->u.ci_setattr.sa_falloc_len = len;
-	io->u.ci_setattr.sa_falloc_end = io->u.ci_setattr.sa_falloc_offset +
-					 io->u.ci_setattr.sa_falloc_len;
+	io->u.ci_setattr.sa_falloc_end = offset + len;
 	io->u.ci_setattr.sa_subtype = CL_SETATTR_FALLOCATE;
-	sa_falloc_end = io->u.ci_setattr.sa_falloc_end;
-	if (sa_falloc_end > size) {
+	if (io->u.ci_setattr.sa_falloc_end > size) {
+		loff_t newsize = io->u.ci_setattr.sa_falloc_end;
+
 		/* Check new size against VFS/VM file size limit and rlimit */
-		rc = inode_newsize_ok(inode, sa_falloc_end);
+		rc = inode_newsize_ok(inode, newsize);
 		if (rc)
 			goto out;
-		if (sa_falloc_end > ll_file_maxbytes(inode)) {
+		if (newsize > ll_file_maxbytes(inode)) {
 			CDEBUG(D_INODE, "file size too large %llu > %llu\n",
-			       (unsigned long long)(sa_falloc_end),
+			       (unsigned long long)newsize,
 			       ll_file_maxbytes(inode));
 			rc = -EFBIG;
 			goto out;
 		}
 	}
 
-again:
-	if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0)
-		rc = cl_io_loop(env, io);
-	else
-		rc = io->ci_result;
-
-	cl_io_fini(env, io);
-	if (unlikely(io->ci_need_restart))
-		goto again;
+	do {
+		rc = cl_io_init(env, io, CIT_SETATTR, io->ci_obj);
+		if (rc)
+			rc = io->ci_result;
+		else
+			rc = cl_io_loop(env, io);
+		cl_io_fini(env, io);
+	} while (unlikely(io->ci_need_restart));
 
 out:
 	cl_env_put(env, &refcheck);
@@ -5050,6 +5048,9 @@ long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	int rc;
 
+	if (offset < 0 || len <= 0)
+		return -EINVAL;
+
 	/*
 	 * Encrypted inodes can't handle collapse range or zero range or insert
 	 * range since we would need to re-encrypt blocks with a different IV or
@@ -5062,10 +5063,10 @@ long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
 		return -EOPNOTSUPP;
 
 	/*
-	 * Only mode == 0 (which is standard prealloc) is supported now.
-	 * Punch is not supported yet.
+	 * mode == 0 (which is standard prealloc) and PUNCH is supported.
+	 * Rest of mode options are not supported yet.
 	 */
-	if (mode & ~FALLOC_FL_KEEP_SIZE)
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 		return -EOPNOTSUPP;
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FALLOCATE, 1);
diff --git a/fs/lustre/mdc/mdc_dev.c b/fs/lustre/mdc/mdc_dev.c
index 7807f9e..70f8987 100644
--- a/fs/lustre/mdc/mdc_dev.c
+++ b/fs/lustre/mdc/mdc_dev.c
@@ -1046,6 +1046,9 @@ static int mdc_io_setattr_start(const struct lu_env *env,
 			return rc;
 	}
 
+	if (cl_io_is_fallocate(io))
+		return -EOPNOTSUPP;
+
 	if (oio->oi_lockless == 0) {
 		cl_object_attr_lock(obj);
 		rc = cl_object_attr_get(env, obj, attr);
diff --git a/fs/lustre/osc/osc_io.c b/fs/lustre/osc/osc_io.c
index 9d783e0..3be3cfe 100644
--- a/fs/lustre/osc/osc_io.c
+++ b/fs/lustre/osc/osc_io.c
@@ -530,6 +530,29 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
 			     trunc_check_cb, (void *)&size);
 }
 
+/**
+ * Flush affected pages prior punch.
+ * We shouldn't discard them locally first because that could be data loss
+ * if server doesn't support fallocate punch, we also need these data to be
+ * flushed first to prevent re-ordering with the punch
+ */
+static int osc_punch_start(const struct lu_env *env, struct cl_io *io,
+			   struct cl_object *obj)
+{
+	struct osc_object *osc = cl2osc(obj);
+	pgoff_t pg_start = cl_index(obj, io->u.ci_setattr.sa_falloc_offset);
+	pgoff_t pg_end = cl_index(obj, io->u.ci_setattr.sa_falloc_end - 1);
+	int rc;
+
+	rc = osc_cache_writeback_range(env, osc, pg_start, pg_end, 1, 0);
+	if (rc < 0)
+		return rc;
+
+	osc_page_gang_lookup(env, io, osc, pg_start, pg_end, osc_discard_cb,
+			     osc);
+	return 0;
+}
+
 static int osc_io_setattr_start(const struct lu_env *env,
 				const struct cl_io_slice *slice)
 {
@@ -543,19 +566,17 @@ static int osc_io_setattr_start(const struct lu_env *env,
 	unsigned int ia_avalid = io->u.ci_setattr.sa_avalid;
 	enum op_xvalid ia_xvalid = io->u.ci_setattr.sa_xvalid;
 	u64 size = io->u.ci_setattr.sa_attr.lvb_size;
-	u64 end = OBD_OBJECT_EOF;
-	bool io_is_falloc = false;
+	bool io_is_falloc = cl_io_is_fallocate(io);
 	int result = 0;
 
 	/* truncate cache dirty pages first */
-	if (cl_io_is_trunc(io)) {
+	if (cl_io_is_trunc(io))
 		result = osc_cache_truncate_start(env, cl2osc(obj), size,
 						  &oio->oi_trunc);
-	} else if (cl_io_is_fallocate(io)) {
-		io_is_falloc = true;
-		size = io->u.ci_setattr.sa_falloc_offset;
-		end = io->u.ci_setattr.sa_falloc_end;
-	}
+	/* flush local pages prior punching them on server */
+	if (io_is_falloc &&
+	    io->u.ci_setattr.sa_falloc_mode & FALLOC_FL_PUNCH_HOLE)
+		result = osc_punch_start(env, io, obj);
 
 	if (result == 0 && oio->oi_lockless == 0) {
 		cl_object_attr_lock(obj);
@@ -565,14 +586,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
 			unsigned int cl_valid = 0;
 
 			if (ia_avalid & ATTR_SIZE) {
-				if (io_is_falloc) {
-					attr->cat_size =
-						io->u.ci_setattr.sa_attr.lvb_size;
-					attr->cat_kms = attr->cat_size;
-				} else {
-					attr->cat_size = size;
-					attr->cat_kms = size;
-				}
+				attr->cat_size = size;
+				attr->cat_kms = size;
 				cl_valid = CAT_SIZE | CAT_KMS;
 			}
 			if (ia_avalid & ATTR_MTIME_SET) {
@@ -612,17 +627,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
 			oa->o_valid |= OBD_MD_FLMTIME;
 			oa->o_mtime = attr->cat_mtime;
 		}
-		if (ia_avalid & ATTR_SIZE) {
-			if (io_is_falloc) {
-				oa->o_size = size;
-				oa->o_blocks = end;
-				oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-			} else {
-				oa->o_size = size;
-				oa->o_blocks = OBD_OBJECT_EOF;
-				oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-			}
 
+		if (ia_avalid & ATTR_SIZE || io_is_falloc) {
 			if (oio->oi_lockless) {
 				oa->o_flags = OBD_FL_SRVLOCK;
 				oa->o_valid |= OBD_MD_FLFLAGS;
@@ -646,10 +652,16 @@ static int osc_io_setattr_start(const struct lu_env *env,
 		if (io_is_falloc) {
 			int falloc_mode = io->u.ci_setattr.sa_falloc_mode;
 
+			oa->o_size = io->u.ci_setattr.sa_falloc_offset;
+			oa->o_blocks = io->u.ci_setattr.sa_falloc_end;
+			oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
 			result = osc_fallocate_base(osc_export(cl2osc(obj)),
 						    oa, osc_async_upcall,
 						    cbargs, falloc_mode);
 		} else if (ia_avalid & ATTR_SIZE) {
+			oa->o_size = size;
+			oa->o_blocks = OBD_OBJECT_EOF;
+			oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
 			result = osc_punch_send(osc_export(cl2osc(obj)),
 						oa, osc_async_upcall, cbargs);
 		} else {
@@ -682,11 +694,11 @@ void osc_io_setattr_end(const struct lu_env *env,
 	if (result == 0) {
 		if (oio->oi_lockless) {
 			/* lockless truncate */
-			struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev);
+			struct osc_device *osc = lu2osc_dev(obj->co_lu.lo_dev);
 
 			LASSERT(cl_io_is_trunc(io) || cl_io_is_fallocate(io));
 			/* XXX: Need a lock. */
-			osd->od_stats.os_lockless_truncates++;
+			osc->od_stats.os_lockless_truncates++;
 		}
 	}
 
diff --git a/fs/lustre/osc/osc_request.c b/fs/lustre/osc/osc_request.c
index 973c504..376afb9 100644
--- a/fs/lustre/osc/osc_request.c
+++ b/fs/lustre/osc/osc_request.c
@@ -454,14 +454,7 @@ int osc_fallocate_base(struct obd_export *exp, struct obdo *oa,
 	struct obd_import *imp = class_exp2cliimp(exp);
 	int rc;
 
-	/*
-	 * Only mode == 0 (which is standard prealloc) is supported now.
-	 * Punch is not supported yet.
-	 */
-	if (mode & ~FALLOC_FL_KEEP_SIZE)
-		return -EOPNOTSUPP;
 	oa->o_falloc_mode = mode;
-
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 				   &RQF_OST_FALLOCATE);
 	if (!req)
-- 
1.8.3.1



More information about the lustre-devel mailing list