[lustre-devel] [PATCH 179/622] lustre: osc: limit chunk number of write submit

James Simmons jsimmons at infradead.org
Thu Feb 27 13:10:47 PST 2020


From: Bobi Jam <bobijam at whamcloud.com>

Don't queue too many pages in an extent for a write RPC, we need
to take care of the chunk limit in write submit as well (refers to
LU-8135 for more details).

WC-bug-id: https://jira.whamcloud.com/browse/LU-10239
Lustre-commit: 93ef6e7863b4 ("LU-10239 osc: limit chunk number of write submit")
Signed-off-by: Bobi Jam <bobijam at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/30627
Reviewed-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong at gmail.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 fs/lustre/osc/osc_cache.c    | 30 ------------------------------
 fs/lustre/osc/osc_internal.h | 30 ++++++++++++++++++++++++++++++
 fs/lustre/osc/osc_io.c       | 27 +++++++++++++++++++++++++--
 3 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/fs/lustre/osc/osc_cache.c b/fs/lustre/osc/osc_cache.c
index 47aee99..1ff258c 100644
--- a/fs/lustre/osc/osc_cache.c
+++ b/fs/lustre/osc/osc_cache.c
@@ -1937,36 +1937,6 @@ static int try_to_add_extent_for_io(struct client_obd *cli,
 	return 1;
 }
 
-static inline unsigned int osc_max_write_chunks(const struct client_obd *cli)
-{
-	/*
-	 * LU-8135:
-	 *
-	 * The maximum size of a single transaction is about 64MB in ZFS.
-	 * #define DMU_MAX_ACCESS (64 * 1024 * 1024)
-	 *
-	 * Since ZFS is a copy-on-write file system, a single dirty page in
-	 * a chunk will result in the rewrite of the whole chunk, therefore
-	 * an RPC shouldn't be allowed to contain too many chunks otherwise
-	 * it will make transaction size much bigger than 64MB, especially
-	 * with big block size for ZFS.
-	 *
-	 * This piece of code is to make sure that OSC won't send write RPCs
-	 * with too many chunks. The maximum chunk size that an RPC can cover
-	 * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally
-	 * OST should tell the client what the biggest transaction size is,
-	 * but it's good enough for now.
-	 *
-	 * This limitation doesn't apply to ldiskfs, which allows as many
-	 * chunks in one RPC as we want. However, it won't have any benefits
-	 * to have too many discontiguous pages in one RPC.
-	 *
-	 * An osc_extent won't cover over a RPC size, so the chunks in an
-	 * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits.
-	 */
-	return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits;
-}
-
 /**
  * In order to prevent multiple ptlrpcd from breaking contiguous extents,
  * get_write_extent() takes all appropriate extents in atomic.
diff --git a/fs/lustre/osc/osc_internal.h b/fs/lustre/osc/osc_internal.h
index 3ba209f..2cb737b 100644
--- a/fs/lustre/osc/osc_internal.h
+++ b/fs/lustre/osc/osc_internal.h
@@ -162,6 +162,36 @@ unsigned long osc_cache_shrink_count(struct shrinker *sk,
 unsigned long osc_cache_shrink_scan(struct shrinker *sk,
 				    struct shrink_control *sc);
 
+static inline unsigned int osc_max_write_chunks(const struct client_obd *cli)
+{
+	/*
+	 * LU-8135:
+	 *
+	 * The maximum size of a single transaction is about 64MB in ZFS.
+	 * #define DMU_MAX_ACCESS (64 * 1024 * 1024)
+	 *
+	 * Since ZFS is a copy-on-write file system, a single dirty page in
+	 * a chunk will result in the rewrite of the whole chunk, therefore
+	 * an RPC shouldn't be allowed to contain too many chunks otherwise
+	 * it will make transaction size much bigger than 64MB, especially
+	 * with big block size for ZFS.
+	 *
+	 * This piece of code is to make sure that OSC won't send write RPCs
+	 * with too many chunks. The maximum chunk size that an RPC can cover
+	 * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally
+	 * OST should tell the client what the biggest transaction size is,
+	 * but it's good enough for now.
+	 *
+	 * This limitation doesn't apply to ldiskfs, which allows as many
+	 * chunks in one RPC as we want. However, it won't have any benefits
+	 * to have too many discontiguous pages in one RPC.
+	 *
+	 * An osc_extent won't cover over a RPC size, so the chunks in an
+	 * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits.
+	 */
+	return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits;
+}
+
 static inline void osc_set_io_portal(struct ptlrpc_request *req)
 {
 	struct obd_import *imp = req->rq_import;
diff --git a/fs/lustre/osc/osc_io.c b/fs/lustre/osc/osc_io.c
index 1485962..56f30cb 100644
--- a/fs/lustre/osc/osc_io.c
+++ b/fs/lustre/osc/osc_io.c
@@ -122,6 +122,9 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
 	int result = 0;
 	int brw_flags;
 	unsigned int max_pages;
+	unsigned int ppc_bits; /* pages per chunk bits */
+	unsigned int ppc;
+	bool sync_queue = false;
 
 	LASSERT(qin->pl_nr > 0);
 
@@ -130,6 +133,8 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
 	osc = cl2osc(ios->cis_obj);
 	cli = osc_cli(osc);
 	max_pages = cli->cl_max_pages_per_rpc;
+	ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
+	ppc = 1 << ppc_bits;
 
 	brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0;
 	brw_flags |= crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
@@ -186,12 +191,30 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
 		else /* async IO */
 			cl_page_list_del(env, qin, page);
 
-		if (++queued == max_pages) {
-			queued = 0;
+		queued++;
+		if (queued == max_pages) {
+			sync_queue = true;
+		} else if (crt == CRT_WRITE) {
+			unsigned int chunks;
+			unsigned int next_chunks;
+
+			chunks = (queued + ppc - 1) >> ppc_bits;
+			/* chunk number if add another page */
+			next_chunks = (queued + ppc) >> ppc_bits;
+
+			/* next page will excceed write chunk limit */
+			if (chunks == osc_max_write_chunks(cli) &&
+			    next_chunks > chunks)
+				sync_queue = true;
+		}
+
+		if (sync_queue) {
 			result = osc_queue_sync_pages(env, io, osc, &list,
 						      brw_flags);
 			if (result < 0)
 				break;
+			queued = 0;
+			sync_queue = false;
 		}
 	}
 
-- 
1.8.3.1



More information about the lustre-devel mailing list