[lustre-devel] [PATCH 03/13] lnet: Local NI must be on same net as next-hop

James Simmons jsimmons at infradead.org
Sat May 15 06:06:00 PDT 2021


From: Chris Horn <chris.horn at hpe.com>

When sending to a remote peer we need to restrict our selection of a
local NI to those on the same peer net as the next-hop.

The code currently selects a local NI on the peer net specified by the
lr_lnet field of the lnet_route returned by lnet_find_route_locked().
However, lnet_find_route_locked() may select a next-hop peer NI on any
local peer net - not just lr_lnet.

A redundant assignment to sd->sd_msg->msg_src_nid_param is also
removed. That variable is always set appropriately in
lnet_select_pathway().

HPE-bug-id: LUS-9095
WC-bug-id: https://jira.whamcloud.com/browse/LU-13781
Lustre-commit: 031c087f3847777c ("LU-13781 lnet: Local NI must be on same net as next-hop")
Signed-off-by: Chris Horn <chris.horn at hpe.com>
Reviewed-on: https://review.whamcloud.com/39352
Reviewed-by: Neil Brown <neilb at suse.de>
Reviewed-by: James Simmons <jsimmons at infradead.org>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 net/lnet/lnet/lib-move.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index 6d0637c..3ae0209 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -1907,7 +1907,6 @@ struct lnet_ni *
 			     struct lnet_peer **gw_peer)
 {
 	int rc;
-	u32 local_lnet;
 	struct lnet_peer *gw;
 	struct lnet_peer *lp;
 	struct lnet_peer_net *lpn;
@@ -1936,10 +1935,8 @@ struct lnet_ni *
 		if (gwni) {
 			gw = gwni->lpni_peer_net->lpn_peer;
 			lnet_peer_ni_decref_locked(gwni);
-			if (gw->lp_rtr_refcount) {
-				local_lnet = LNET_NIDNET(sd->sd_rtr_nid);
+			if (gw->lp_rtr_refcount)
 				route_found = true;
-			}
 		} else {
 			CWARN("No peer NI for gateway %s. Attempting to find an alternative route.\n",
 			       libcfs_nid2str(sd->sd_rtr_nid));
@@ -2054,31 +2051,26 @@ struct lnet_ni *
 
 		gw = best_route->lr_gateway;
 		LASSERT(gw == gwni->lpni_peer_net->lpn_peer);
-		local_lnet = best_route->lr_lnet;
 	}
 
 	/* Discover this gateway if it hasn't already been discovered.
 	 * This means we might delay the message until discovery has
 	 * completed
 	 */
-	sd->sd_msg->msg_src_nid_param = sd->sd_src_nid;
 	rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt);
 	if (rc)
 		return rc;
 
 	if (!sd->sd_best_ni) {
-		struct lnet_peer_net *lpeer;
-
-		lpeer = lnet_peer_get_net_locked(gw, local_lnet);
-		sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, lpeer,
+		lpn = gwni->lpni_peer_net;
+		sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, lpn,
 							       sd->sd_md_cpt);
-	}
-
-	if (!sd->sd_best_ni) {
-		CERROR("Internal Error. Expected local ni on %s but non found :%s\n",
-		       libcfs_net2str(local_lnet),
-		       libcfs_nid2str(sd->sd_src_nid));
-		return -EFAULT;
+		if (!sd->sd_best_ni) {
+			CERROR("Internal Error. Expected local ni on %s but non found :%s\n",
+			       libcfs_net2str(lpn->lpn_net_id),
+			       libcfs_nid2str(sd->sd_src_nid));
+			return -EFAULT;
+		}
 	}
 
 	*gw_lpni = gwni;
-- 
1.8.3.1



More information about the lustre-devel mailing list