[lustre-devel] [PATCH 06/15] lnet: don't retry allocating router buffers

James Simmons jsimmons at infradead.org
Mon Nov 8 07:07:34 PST 2021


From: Andreas Dilger <adilger at whamcloud.com>

Don't loop indefinitely trying to allocate router buffer pools if
the number of requested buffers is too large for the system.

WC-bug-id: https://jira.whamcloud.com/browse/LU-2084
Lustre-commit: 3038917f12a53b059 ("LU-2084 lnet: don't retry allocating router buffers")
Signed-off-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/45174
Reviewed-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-by: Chris Horn <chris.horn at hpe.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 net/lnet/lnet/router.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/lnet/lnet/router.c b/net/lnet/lnet/router.c
index 6cfcead..7ce33eb 100644
--- a/net/lnet/lnet/router.c
+++ b/net/lnet/lnet/router.c
@@ -1245,18 +1245,19 @@ bool lnet_router_checker_active(void)
 	int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]);
 	struct page *page;
 	struct lnet_rtrbuf *rb;
-	int i;
+	int i, node;
 
 	rb = kzalloc_cpt(sz, GFP_NOFS, cpt);
 	if (!rb)
 		return NULL;
 
+	node = cfs_cpt_spread_node(lnet_cpt_table(), cpt);
 	rb->rb_pool = rbp;
 
 	for (i = 0; i < npages; i++) {
-		page = alloc_pages_node(
-				cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				GFP_KERNEL | __GFP_ZERO, 0);
+		page = alloc_pages_node(node,
+					GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY,
+					0);
 		if (!page) {
 			while (--i >= 0)
 				__free_page(rb->rb_kiov[i].bv_page);
@@ -1344,8 +1345,8 @@ bool lnet_router_checker_active(void)
 	while (num_rb-- > 0) {
 		rb = lnet_new_rtrbuf(rbp, cpt);
 		if (!rb) {
-			CERROR("Failed to allocate %d route bufs of %d pages\n",
-			       nbufs, npages);
+			CERROR("lnet: error allocating %ux%u page router buffers on CPT %u: rc = %d\n",
+			       nbufs, npages, cpt, -ENOMEM);
 
 			lnet_net_lock(cpt);
 			rbp->rbp_req_nbuffers = old_req_nbufs;
@@ -1496,8 +1497,11 @@ bool lnet_router_checker_active(void)
 	} else if (!strcmp(forwarding, "enabled")) {
 		/* explicitly enabled */
 	} else {
-		LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
-		return -EINVAL;
+		rc = -EINVAL;
+		LCONSOLE_ERROR_MSG(0x10b,
+				   "lnet: forwarding='%s' not set to either 'enabled' or 'disabled': rc = %d\n",
+				   forwarding, rc);
+		return rc;
 	}
 
 	nrb_tiny = lnet_nrb_tiny_calculate();
@@ -1516,9 +1520,11 @@ bool lnet_router_checker_active(void)
 						LNET_NRBPOOLS *
 						sizeof(*the_lnet.ln_rtrpools[0]));
 	if (!the_lnet.ln_rtrpools) {
+		rc = -ENOMEM;
 		LCONSOLE_ERROR_MSG(0x10c,
-				   "Failed to initialize router buffe pool\n");
-		return -ENOMEM;
+			"lnet: error allocating router buffer pool: rc = %d\n",
+			rc);
+		return rc;
 	}
 
 	cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
-- 
1.8.3.1



More information about the lustre-devel mailing list