[lustre-devel] [PATCH 04/24] lustre: lnet: automatic sizing of router pinger buffers

NeilBrown neilb at suse.com
Sun Oct 7 16:19:37 PDT 2018


From: Olaf Weber <olaf at sgi.com>

The router pinger uses fixed-size buffers to receive the data
returned by a ping. When a router has more than 16 interfaces
(including loopback) this means the data for some interfaces
is dropped.

Detect this situation, and track the number of remote NIs in
the lnet_rc_data_t structure.  lnet_create_rc_data_locked()
becomes lnet_update_rc_data_locked(), and modified to replace
an existing ping buffer if one is present. It is now also
called by lnet_ping_router_locked() when the existing ping
buffer is too small.

WC-bug-id: https://jira.whamcloud.com/browse/LU-9480
Signed-off-by: Olaf Weber <olaf at sgi.com>
Reviewed-on: https://review.whamcloud.com/25774
Reviewed-by: Olaf Weber <olaf.weber at hpe.com>
Reviewed-by: Amir Shehata <amir.shehata at intel.com>
Tested-by: Amir Shehata <amir.shehata at intel.com>
Signed-off-by: NeilBrown <neilb at suse.com>
---
 .../staging/lustre/include/linux/lnet/lib-types.h  |    4 -
 drivers/staging/lustre/lnet/lnet/router.c          |   90 +++++++++++++-------
 2 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index ab8c6d66cdbf..d1d17ededd06 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -411,8 +411,6 @@ struct lnet_ping_buffer {
 
 
 /* router checker data, per router */
-#define LNET_MAX_RTR_NIS   LNET_INTERFACES_MIN
-#define LNET_RTR_PINGINFO_SIZE	LNET_PING_INFO_SIZE(LNET_MAX_RTR_NIS)
 struct lnet_rc_data {
 	/* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
 	struct list_head	rcd_list;
@@ -422,6 +420,8 @@ struct lnet_rc_data {
 	struct lnet_peer_ni	*rcd_gateway;
 	/* ping buffer */
 	struct lnet_ping_buffer	*rcd_pingbuffer;
+	/* desired size of buffer */
+	int			rcd_nnis;
 };
 
 struct lnet_peer_ni {
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index e97957ce9252..86cce27e10d8 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -678,8 +678,11 @@ lnet_parse_rc_info(struct lnet_rc_data *rcd)
 
 	/* Determine the number of NIs for which there is data. */
 	nnis = pbuf->pb_info.pi_nnis;
-	if (pbuf->pb_nnis < nnis)
+	if (pbuf->pb_nnis < nnis) {
+		if (rcd->rcd_nnis < nnis)
+			rcd->rcd_nnis = nnis;
 		nnis = pbuf->pb_nnis;
+	}
 
 	list_for_each_entry(rte, &gw->lpni_routes, lr_gwlist) {
 		int down = 0;
@@ -911,28 +914,47 @@ lnet_destroy_rc_data(struct lnet_rc_data *rcd)
 }
 
 static struct lnet_rc_data *
-lnet_create_rc_data_locked(struct lnet_peer_ni *gateway)
+lnet_update_rc_data_locked(struct lnet_peer_ni *gateway)
 {
-	struct lnet_rc_data *rcd = NULL;
-	struct lnet_ping_buffer *pbuf;
+	struct lnet_handle_md mdh;
+	struct lnet_rc_data *rcd;
+	struct lnet_ping_buffer *pbuf = NULL;
 	struct lnet_md md;
+	int nnis = LNET_INTERFACES_MIN;
 	int rc;
 	int i;
 
+	rcd = gateway->lpni_rcd;
+	if (rcd) {
+		nnis = rcd->rcd_nnis;
+		mdh = rcd->rcd_mdh;
+		LNetInvalidateMDHandle(&rcd->rcd_mdh);
+		pbuf = rcd->rcd_pingbuffer;
+		rcd->rcd_pingbuffer = NULL;
+	} else {
+		LNetInvalidateMDHandle(&mdh);
+	}
+
 	lnet_net_unlock(gateway->lpni_cpt);
 
-	rcd = kzalloc(sizeof(*rcd), GFP_NOFS);
-	if (!rcd)
-		goto out;
+	if (rcd) {
+		LNetMDUnlink(mdh);
+		lnet_ping_buffer_decref(pbuf);
+	} else {
+		rcd = kzalloc(sizeof(*rcd), GFP_NOFS);
+		if (!rcd)
+			goto out;
 
-	LNetInvalidateMDHandle(&rcd->rcd_mdh);
-	INIT_LIST_HEAD(&rcd->rcd_list);
+		LNetInvalidateMDHandle(&rcd->rcd_mdh);
+		INIT_LIST_HEAD(&rcd->rcd_list);
+		rcd->rcd_nnis = nnis;
+	}
 
-	pbuf = lnet_ping_buffer_alloc(LNET_MAX_RTR_NIS, GFP_NOFS);
+	pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
 	if (!pbuf)
 		goto out;
 
-	for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
+	for (i = 0; i < nnis; i++) {
 		pbuf->pb_info.pi_ni[i].ns_nid = LNET_NID_ANY;
 		pbuf->pb_info.pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
 	}
@@ -940,7 +962,7 @@ lnet_create_rc_data_locked(struct lnet_peer_ni *gateway)
 
 	md.start = &pbuf->pb_info;
 	md.user_ptr = rcd;
-	md.length = LNET_RTR_PINGINFO_SIZE;
+	md.length = LNET_PING_INFO_SIZE(nnis);
 	md.threshold = LNET_MD_THRESH_INF;
 	md.options = LNET_MD_TRUNCATE;
 	md.eq_handle = the_lnet.ln_rc_eqh;
@@ -949,33 +971,37 @@ lnet_create_rc_data_locked(struct lnet_peer_ni *gateway)
 	rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh);
 	if (rc < 0) {
 		CERROR("Can't bind MD: %d\n", rc);
-		goto out;
+		goto out_ping_buffer_decref;
 	}
 	LASSERT(!rc);
 
 	lnet_net_lock(gateway->lpni_cpt);
-	/* router table changed or someone has created rcd for this gateway */
-	if (!lnet_isrouter(gateway) || gateway->lpni_rcd) {
-		lnet_net_unlock(gateway->lpni_cpt);
-		goto out;
+	/* Check if this is still a router. */
+	if (!lnet_isrouter(gateway))
+		goto out_unlock;
+	/* Check if someone else installed router data. */
+	if (gateway->lpni_rcd && gateway->lpni_rcd != rcd)
+		goto out_unlock;
+
+	/* Install and/or update the router data. */
+	if (!gateway->lpni_rcd) {
+		lnet_peer_ni_addref_locked(gateway);
+		rcd->rcd_gateway = gateway;
+		gateway->lpni_rcd = rcd;
 	}
-
-	lnet_peer_ni_addref_locked(gateway);
-	rcd->rcd_gateway = gateway;
-	gateway->lpni_rcd = rcd;
 	gateway->lpni_ping_notsent = 0;
 
 	return rcd;
 
- out:
-	if (rcd) {
-		if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) {
-			rc = LNetMDUnlink(rcd->rcd_mdh);
-			LASSERT(!rc);
-		}
+out_unlock:
+	lnet_net_unlock(gateway->lpni_cpt);
+	rc = LNetMDUnlink(mdh);
+	LASSERT(!rc);
+out_ping_buffer_decref:
+	lnet_ping_buffer_decref(pbuf);
+out:
+	if (rcd && rcd != gateway->lpni_rcd)
 		lnet_destroy_rc_data(rcd);
-	}
-
 	lnet_net_lock(gateway->lpni_cpt);
 	return gateway->lpni_rcd;
 }
@@ -1018,9 +1044,9 @@ lnet_ping_router_locked(struct lnet_peer_ni *rtr)
 		return;
 	}
 
-	rcd = rtr->lpni_rcd ?
-	      rtr->lpni_rcd : lnet_create_rc_data_locked(rtr);
-
+	rcd = rtr->lpni_rcd;
+	if (!rcd || rcd->rcd_nnis > rcd->rcd_pingbuffer->pb_nnis)
+		rcd = lnet_update_rc_data_locked(rtr);
 	if (!rcd)
 		return;
 




More information about the lustre-devel mailing list