[lustre-devel] [PATCH 06/15] lnet: don't retry allocating router buffers
James Simmons
jsimmons at infradead.org
Mon Nov 8 07:07:34 PST 2021
From: Andreas Dilger <adilger at whamcloud.com>
Don't loop indefinitely trying to allocate router buffer pools if
the number of requested buffers is too large for the system.
WC-bug-id: https://jira.whamcloud.com/browse/LU-2084
Lustre-commit: 3038917f12a53b059 ("LU-2084 lnet: don't retry allocating router buffers")
Signed-off-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/45174
Reviewed-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-by: Chris Horn <chris.horn at hpe.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
net/lnet/lnet/router.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/net/lnet/lnet/router.c b/net/lnet/lnet/router.c
index 6cfcead..7ce33eb 100644
--- a/net/lnet/lnet/router.c
+++ b/net/lnet/lnet/router.c
@@ -1245,18 +1245,19 @@ bool lnet_router_checker_active(void)
int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]);
struct page *page;
struct lnet_rtrbuf *rb;
- int i;
+ int i, node;
rb = kzalloc_cpt(sz, GFP_NOFS, cpt);
if (!rb)
return NULL;
+ node = cfs_cpt_spread_node(lnet_cpt_table(), cpt);
rb->rb_pool = rbp;
for (i = 0; i < npages; i++) {
- page = alloc_pages_node(
- cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_KERNEL | __GFP_ZERO, 0);
+ page = alloc_pages_node(node,
+ GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY,
+ 0);
if (!page) {
while (--i >= 0)
__free_page(rb->rb_kiov[i].bv_page);
@@ -1344,8 +1345,8 @@ bool lnet_router_checker_active(void)
while (num_rb-- > 0) {
rb = lnet_new_rtrbuf(rbp, cpt);
if (!rb) {
- CERROR("Failed to allocate %d route bufs of %d pages\n",
- nbufs, npages);
+ CERROR("lnet: error allocating %ux%u page router buffers on CPT %u: rc = %d\n",
+ nbufs, npages, cpt, -ENOMEM);
lnet_net_lock(cpt);
rbp->rbp_req_nbuffers = old_req_nbufs;
@@ -1496,8 +1497,11 @@ bool lnet_router_checker_active(void)
} else if (!strcmp(forwarding, "enabled")) {
/* explicitly enabled */
} else {
- LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
- return -EINVAL;
+ rc = -EINVAL;
+ LCONSOLE_ERROR_MSG(0x10b,
+ "lnet: forwarding='%s' not set to either 'enabled' or 'disabled': rc = %d\n",
+ forwarding, rc);
+ return rc;
}
nrb_tiny = lnet_nrb_tiny_calculate();
@@ -1516,9 +1520,11 @@ bool lnet_router_checker_active(void)
LNET_NRBPOOLS *
sizeof(*the_lnet.ln_rtrpools[0]));
if (!the_lnet.ln_rtrpools) {
+ rc = -ENOMEM;
LCONSOLE_ERROR_MSG(0x10c,
- "Failed to initialize router buffe pool\n");
- return -ENOMEM;
+ "lnet: error allocating router buffer pool: rc = %d\n",
+ rc);
+ return rc;
}
cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
--
1.8.3.1
More information about the lustre-devel
mailing list