[lustre-devel] [PATCH 582/622] lnet: Fix source specified route selection

James Simmons jsimmons at infradead.org
Thu Feb 27 13:17:30 PST 2020


From: Chris Horn <hornc at cray.com>

If lnet_send() is called with a specific src_nid, but
rtr_nid == LNET_NID_ANY and the message needs to be routed, then we
need to ensure that the lnet_peer_ni of our next hop is on the same
network as the lnet_ni associated with the src_nid. Otherwise we
may end up choosing an lnet_peer_ni that cannot be reached from
the specified source.

WC-bug-id: https://jira.whamcloud.com/browse/LU-12919
Lustre-commit: f0aa632d4255 ("LU-12919 lnet: Fix source specified route selection")
Signed-off-by: Chris Horn <hornc at cray.com>
Reviewed-on: https://review.whamcloud.com/36622
Reviewed-by: Alexandr Boyko <c17825 at cray.com>
Reviewed-by: Amir Shehata <ashehata at whamcloud.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 net/lnet/lnet/lib-move.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index 269b2d5..ca292a6 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -1290,7 +1290,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 }
 
 static struct lnet_route *
-lnet_find_route_locked(struct lnet_remotenet *rnet,
+lnet_find_route_locked(struct lnet_remotenet *rnet, u32 src_net,
 		       struct lnet_route **prev_route,
 		       struct lnet_peer_ni **gwni)
 {
@@ -1299,6 +1299,8 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	struct lnet_route *last_route;
 	struct lnet_route *route;
 	int rc;
+	u32 restrict_net;
+	u32 any_net = LNET_NIDNET(LNET_NID_ANY);
 
 	best_route = NULL;
 	last_route = NULL;
@@ -1306,14 +1308,23 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 		if (!lnet_is_route_alive(route))
 			continue;
 
+		/* If the src_net is specified then we need to find an lpni
+		 * on that network
+		 */
+		restrict_net = src_net == any_net ? route->lr_lnet : src_net;
 		if (!best_route) {
-			best_route = route;
-			last_route = route;
-			best_gw_ni = lnet_find_best_lpni_on_net(NULL,
-								LNET_NID_ANY,
-								route->lr_gateway,
-								route->lr_lnet);
-			LASSERT(best_gw_ni);
+			lpni = lnet_find_best_lpni_on_net(NULL, LNET_NID_ANY,
+							  route->lr_gateway,
+							  restrict_net);
+			if (lpni) {
+				best_route = route;
+				last_route = route;
+				best_gw_ni = lpni;
+			} else {
+				CERROR("Gateway %s does not have a peer NI on net %s\n",
+				       libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+				       libcfs_net2str(restrict_net));
+			}
 			continue;
 		}
 
@@ -1327,8 +1338,13 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 
 		lpni = lnet_find_best_lpni_on_net(NULL, LNET_NID_ANY,
 						  route->lr_gateway,
-						  route->lr_lnet);
-		LASSERT(lpni);
+						  restrict_net);
+		if (!lpni) {
+			CERROR("Gateway %s does not have a peer NI on net %s\n",
+			       libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+			       libcfs_net2str(restrict_net));
+			continue;
+		}
 
 		if (rc == 1) {
 			best_route = route;
@@ -1868,8 +1884,9 @@ struct lnet_ni *
 			return -EHOSTUNREACH;
 		}
 
-		best_route = lnet_find_route_locked(best_rnet, &last_route,
-						    &gwni);
+		best_route = lnet_find_route_locked(best_rnet,
+						    LNET_NIDNET(src_nid),
+						    &last_route, &gwni);
 		if (!best_route) {
 			CERROR("no route to %s from %s\n",
 			       libcfs_nid2str(dst_nid),
-- 
1.8.3.1



More information about the lustre-devel mailing list