[lustre-devel] [PATCH 07/34] lnet: change lnet_peer to reference the net, rather than ni.

NeilBrown neilb at suse.com
Thu Sep 6 17:49:31 PDT 2018


As a net will soon have multiple ni, a peer should identify
just the net.
Various places that we need the ni, we now use rxni or txni from
the message

This is part of
    8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
       LU-7734 lnet: Multi-Rail local NI split

Signed-off-by: NeilBrown <neilb at suse.com>
---
 .../staging/lustre/include/linux/lnet/lib-lnet.h   |    3 +
 .../staging/lustre/include/linux/lnet/lib-types.h  |    5 +-
 drivers/staging/lustre/lnet/lnet/api-ni.c          |   13 +++++
 drivers/staging/lustre/lnet/lnet/lib-move.c        |   49 +++++++++++---------
 drivers/staging/lustre/lnet/lnet/lib-ptl.c         |    2 -
 drivers/staging/lustre/lnet/lnet/net_fault.c       |    3 +
 drivers/staging/lustre/lnet/lnet/peer.c            |   26 ++++-------
 drivers/staging/lustre/lnet/lnet/router.c          |   14 +++---
 drivers/staging/lustre/lnet/lnet/router_proc.c     |    2 -
 9 files changed, 67 insertions(+), 50 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 4440b87299c4..34509e52bac7 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -435,6 +435,7 @@ int lnet_dyn_add_ni(lnet_pid_t requested_pid,
 		    struct lnet_ioctl_config_data *conf);
 int lnet_dyn_del_ni(__u32 net);
 int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
+struct lnet_net *lnet_get_net_locked(__u32 net_id);
 
 int lnet_islocalnid(lnet_nid_t nid);
 int lnet_islocalnet(__u32 net);
@@ -617,7 +618,7 @@ int lnet_sock_connect(struct socket **sockp, int *fatal,
 void libcfs_sock_release(struct socket *sock);
 
 int lnet_peers_start_down(void);
-int lnet_peer_buffer_credits(struct lnet_ni *ni);
+int lnet_peer_buffer_credits(struct lnet_net *net);
 
 int lnet_router_checker_start(void);
 void lnet_router_checker_stop(void);
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 16a493529a46..255c6c4bbb89 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -396,7 +396,8 @@ struct lnet_peer {
 	time64_t		 lp_last_query;	/* when lp_ni was queried
 						 * last time
 						 */
-	struct lnet_ni		*lp_ni;		/* interface peer is on */
+	/* network peer is on */
+	struct lnet_net		*lp_net;
 	lnet_nid_t		 lp_nid;	/* peer's NID */
 	int			 lp_refcount;	/* # refs */
 	int			 lp_cpt;	/* CPT this peer attached on */
@@ -427,7 +428,7 @@ struct lnet_peer_table {
  * lnet_ni::ni_peertimeout has been set to a positive value
  */
 #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \
-					 (lp)->lp_ni->ni_net->net_tunables.lct_peer_timeout > 0)
+					 (lp)->lp_net->net_tunables.lct_peer_timeout > 0)
 
 struct lnet_route {
 	struct list_head	 lr_list;	/* chain on net */
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 05687278334a..c21aef32cdde 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -680,6 +680,19 @@ lnet_net2ni(__u32 net)
 }
 EXPORT_SYMBOL(lnet_net2ni);
 
+struct lnet_net *
+lnet_get_net_locked(__u32 net_id)
+{
+	struct lnet_net	 *net;
+
+	list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+		if (net->net_id == net_id)
+			return net;
+	}
+
+	return NULL;
+}
+
 static unsigned int
 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
 {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index b2a52ddcefcb..b8b15f56a275 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -525,7 +525,7 @@ lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
 		return 0;
 
 	deadline = lp->lp_last_alive +
-		lp->lp_ni->ni_net->net_tunables.lct_peer_timeout;
+		lp->lp_net->net_tunables.lct_peer_timeout;
 	alive = deadline > now;
 
 	/* Update obsolete lp_alive except for routers assumed to be dead
@@ -544,7 +544,7 @@ lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
  *     may drop the lnet_net_lock
  */
 static int
-lnet_peer_alive_locked(struct lnet_peer *lp)
+lnet_peer_alive_locked(struct lnet_ni *ni, struct lnet_peer *lp)
 {
 	time64_t now = ktime_get_seconds();
 
@@ -570,13 +570,13 @@ lnet_peer_alive_locked(struct lnet_peer *lp)
 				      libcfs_nid2str(lp->lp_nid),
 				      now, next_query,
 				      lnet_queryinterval,
-				      lp->lp_ni->ni_net->net_tunables.lct_peer_timeout);
+				      lp->lp_net->net_tunables.lct_peer_timeout);
 			return 0;
 		}
 	}
 
 	/* query NI for latest aliveness news */
-	lnet_ni_query_locked(lp->lp_ni, lp);
+	lnet_ni_query_locked(ni, lp);
 
 	if (lnet_peer_is_alive(lp, now))
 		return 1;
@@ -600,7 +600,7 @@ static int
 lnet_post_send_locked(struct lnet_msg *msg, int do_send)
 {
 	struct lnet_peer *lp = msg->msg_txpeer;
-	struct lnet_ni *ni = lp->lp_ni;
+	struct lnet_ni *ni = msg->msg_txni;
 	int cpt = msg->msg_tx_cpt;
 	struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
 
@@ -611,7 +611,7 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
 
 	/* NB 'lp' is always the next hop */
 	if (!(msg->msg_target.pid & LNET_PID_USERFLAG) &&
-	    !lnet_peer_alive_locked(lp)) {
+	    !lnet_peer_alive_locked(ni, lp)) {
 		the_lnet.ln_counters[cpt]->drop_count++;
 		the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
 		lnet_net_unlock(cpt);
@@ -770,7 +770,7 @@ lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
 		int cpt = msg->msg_rx_cpt;
 
 		lnet_net_unlock(cpt);
-		lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
+		lnet_ni_recv(msg->msg_rxni, msg->msg_private, msg, 1,
 			     0, msg->msg_len, msg->msg_len);
 		lnet_net_lock(cpt);
 	}
@@ -785,7 +785,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)
 	struct lnet_ni	*txni = msg->msg_txni;
 
 	if (msg->msg_txcredit) {
-		struct lnet_ni *ni = txpeer->lp_ni;
+		struct lnet_ni *ni = msg->msg_txni;
 		struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
 
 		/* give back NI txcredits */
@@ -800,7 +800,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)
 					  struct lnet_msg, msg_list);
 			list_del(&msg2->msg_list);
 
-			LASSERT(msg2->msg_txpeer->lp_ni == ni);
+			LASSERT(msg2->msg_txni == ni);
 			LASSERT(msg2->msg_tx_delayed);
 
 			(void)lnet_post_send_locked(msg2, 1);
@@ -869,7 +869,7 @@ lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
 
 	while(!list_empty(&drop)) {
 		msg = list_first_entry(&drop, struct lnet_msg, msg_list);
-		lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
+		lnet_ni_recv(msg->msg_rxni, msg->msg_private, NULL,
 			     0, 0, 0, msg->msg_hdr.payload_length);
 		list_del_init(&msg->msg_list);
 		lnet_finalize(NULL, msg, -ECANCELED);
@@ -1007,7 +1007,7 @@ lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
 }
 
 static struct lnet_peer *
-lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
+lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target,
 		       lnet_nid_t rtr_nid)
 {
 	struct lnet_remotenet *rnet;
@@ -1035,7 +1035,7 @@ lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
 		if (!lnet_is_route_alive(route))
 			continue;
 
-		if (ni && lp->lp_ni != ni)
+		if (net && lp->lp_net != net)
 			continue;
 
 		if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
@@ -1164,10 +1164,12 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
 			/* ENOMEM or shutting down */
 			return rc;
 		}
-		LASSERT(lp->lp_ni == src_ni);
+		LASSERT(lp->lp_net == src_ni->ni_net);
 	} else {
 		/* sending to a remote network */
-		lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
+		lp = lnet_find_route_locked(src_ni != NULL ?
+					    src_ni->ni_net : NULL,
+					    dst_nid, rtr_nid);
 		if (!lp) {
 			if (src_ni)
 				lnet_ni_decref_locked(src_ni, cpt);
@@ -1203,10 +1205,11 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
 		       lnet_msgtyp2str(msg->msg_type), msg->msg_len);
 
 		if (!src_ni) {
-			src_ni = lp->lp_ni;
+			src_ni = lnet_get_next_ni_locked(lp->lp_net, NULL);
+			LASSERT(src_ni != NULL);
 			src_nid = src_ni->ni_nid;
 		} else {
-			LASSERT(src_ni == lp->lp_ni);
+			LASSERT(src_ni->ni_net == lp->lp_net);
 			lnet_ni_decref_locked(src_ni, cpt);
 		}
 
@@ -1918,7 +1921,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
 		 * called lnet_drop_message(), so I just hang onto msg as well
 		 * until that's done
 		 */
-		lnet_drop_message(msg->msg_rxpeer->lp_ni,
+		lnet_drop_message(msg->msg_rxni,
 				  msg->msg_rxpeer->lp_cpt,
 				  msg->msg_private, msg->msg_len);
 		/*
@@ -1926,7 +1929,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
 		 * but we still should give error code so lnet_msg_decommit()
 		 * can skip counters operations and other checks.
 		 */
-		lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
+		lnet_finalize(msg->msg_rxni, msg, -ENOENT);
 	}
 }
 
@@ -1959,7 +1962,7 @@ lnet_recv_delayed_msg_list(struct list_head *head)
 		       msg->msg_hdr.msg.put.offset,
 		       msg->msg_hdr.payload_length);
 
-		lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
+		lnet_recv_put(msg->msg_rxni, msg);
 	}
 }
 
@@ -2384,8 +2387,12 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
 
 			LASSERT(shortest);
 			hops = shortest_hops;
-			if (srcnidp)
-				*srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
+			if (srcnidp) {
+				ni = lnet_get_next_ni_locked(
+					shortest->lr_gateway->lp_net,
+					NULL);
+				*srcnidp = ni->ni_nid;
+			}
 			if (orderp)
 				*orderp = order;
 			lnet_net_unlock(cpt);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
index fc47379c5938..4c5737083422 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
@@ -946,7 +946,7 @@ lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
 		/* grab all messages which are on the NI passed in */
 		list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
 					 msg_list) {
-			if (msg->msg_rxpeer->lp_ni == ni)
+			if (msg->msg_txni == ni || msg->msg_rxni == ni)
 				list_move(&msg->msg_list, &zombies);
 		}
 	} else {
diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c
index 41d6131ee15a..6c53ae1811e5 100644
--- a/drivers/staging/lustre/lnet/lnet/net_fault.c
+++ b/drivers/staging/lustre/lnet/lnet/net_fault.c
@@ -601,8 +601,9 @@ delayed_msg_process(struct list_head *msg_list, bool drop)
 
 		msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
 		LASSERT(msg->msg_rxpeer);
+		LASSERT(msg->msg_rxni != NULL);
 
-		ni = msg->msg_rxpeer->lp_ni;
+		ni = msg->msg_rxni;
 		cpt = msg->msg_rx_cpt;
 
 		list_del_init(&msg->msg_list);
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
index b76ac3e051d9..ed29124ebded 100644
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ b/drivers/staging/lustre/lnet/lnet/peer.c
@@ -112,7 +112,7 @@ lnet_peer_table_cleanup_locked(struct lnet_ni *ni,
 	for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
 		list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
 					 lp_hashlist) {
-			if (ni && ni != lp->lp_ni)
+			if (ni && ni->ni_net != lp->lp_net)
 				continue;
 			list_del_init(&lp->lp_hashlist);
 			/* Lose hash table's ref */
@@ -154,7 +154,7 @@ lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni,
 	for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
 		list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
 					 lp_hashlist) {
-			if (ni != lp->lp_ni)
+			if (ni->ni_net != lp->lp_net)
 				continue;
 
 			if (!lp->lp_rtr_refcount)
@@ -230,8 +230,7 @@ lnet_destroy_peer_locked(struct lnet_peer *lp)
 	LASSERT(ptable->pt_number > 0);
 	ptable->pt_number--;
 
-	lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
-	lp->lp_ni = NULL;
+	lp->lp_net = NULL;
 
 	list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
 	LASSERT(ptable->pt_zombies > 0);
@@ -336,16 +335,11 @@ lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt)
 		goto out;
 	}
 
-	lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
-	if (!lp->lp_ni) {
-		rc = -EHOSTUNREACH;
-		goto out;
-	}
-
-	lp->lp_txcredits = lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
-	lp->lp_mintxcredits = lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
-	lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
-	lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
+	lp->lp_net = lnet_get_net_locked(LNET_NIDNET(!lp->lp_nid));
+	lp->lp_txcredits =
+		lp->lp_mintxcredits = lp->lp_net->net_tunables.lct_peer_tx_credits;
+	lp->lp_rtrcredits =
+		lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_net);
 
 	list_add_tail(&lp->lp_hashlist,
 		      &ptable->pt_hash[lnet_nid2peerhash(nid)]);
@@ -383,7 +377,7 @@ lnet_debug_peer(lnet_nid_t nid)
 
 	CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
 	       libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
-	       aliveness, lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits,
+	       aliveness, lp->lp_net->net_tunables.lct_peer_tx_credits,
 	       lp->lp_rtrcredits, lp->lp_minrtrcredits,
 	       lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
 
@@ -439,7 +433,7 @@ lnet_get_peer_info(__u32 peer_index, __u64 *nid,
 			*nid = lp->lp_nid;
 			*refcount = lp->lp_refcount;
 			*ni_peer_tx_credits =
-				lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
+				lp->lp_net->net_tunables.lct_peer_tx_credits;
 			*peer_tx_credits = lp->lp_txcredits;
 			*peer_rtr_credits = lp->lp_rtrcredits;
 			*peer_min_rtr_credits = lp->lp_mintxcredits;
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 135dfe793b0b..72b8ca2b0fc6 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -55,10 +55,8 @@ module_param(auto_down, int, 0444);
 MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
 
 int
-lnet_peer_buffer_credits(struct lnet_ni *ni)
+lnet_peer_buffer_credits(struct lnet_net *net)
 {
-	struct lnet_net *net = ni->ni_net;
-
 	/* NI option overrides LNet default */
 	if (net->net_tunables.lct_peer_rtr_credits > 0)
 		return net->net_tunables.lct_peer_rtr_credits;
@@ -373,7 +371,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
 		lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
 		lnet_add_route_to_rnet(rnet2, route);
 
-		ni = route->lr_gateway->lp_ni;
+		ni = lnet_get_next_ni_locked(route->lr_gateway->lp_net, NULL);
 		lnet_net_unlock(LNET_LOCK_EX);
 
 		/* XXX Assume alive */
@@ -428,8 +426,8 @@ lnet_check_routes(void)
 					continue;
 				}
 
-				if (route->lr_gateway->lp_ni ==
-				    route2->lr_gateway->lp_ni)
+				if (route->lr_gateway->lp_net ==
+				    route2->lr_gateway->lp_net)
 					continue;
 
 				nid1 = route->lr_gateway->lp_nid;
@@ -952,6 +950,7 @@ lnet_ping_router_locked(struct lnet_peer *rtr)
 	struct lnet_rc_data *rcd = NULL;
 	time64_t now = ktime_get_seconds();
 	time64_t secs;
+	struct lnet_ni  *ni;
 
 	lnet_peer_addref_locked(rtr);
 
@@ -960,7 +959,8 @@ lnet_ping_router_locked(struct lnet_peer *rtr)
 		lnet_notify_locked(rtr, 1, 0, now);
 
 	/* Run any outstanding notifications */
-	lnet_ni_notify_locked(rtr->lp_ni, rtr);
+	ni = lnet_get_next_ni_locked(rtr->lp_net, NULL);
+	lnet_ni_notify_locked(ni, rtr);
 
 	if (!lnet_isrouter(rtr) ||
 	    the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c
index 2a366e9a8627..52714b898aac 100644
--- a/drivers/staging/lustre/lnet/lnet/router_proc.c
+++ b/drivers/staging/lustre/lnet/lnet/router_proc.c
@@ -489,7 +489,7 @@ static int proc_lnet_peers(struct ctl_table *table, int write,
 			int nrefs = peer->lp_refcount;
 			time64_t lastalive = -1;
 			char *aliveness = "NA";
-			int maxcr = peer->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
+			int maxcr = peer->lp_net->net_tunables.lct_peer_tx_credits;
 			int txcr = peer->lp_txcredits;
 			int mintxcr = peer->lp_mintxcredits;
 			int rtrcr = peer->lp_rtrcredits;




More information about the lustre-devel mailing list