[lustre-devel] [PATCH 07/34] lnet: change lnet_peer to reference the net, rather than ni.
NeilBrown
neilb at suse.com
Thu Sep 6 17:49:31 PDT 2018
As a net will soon have multiple ni, a peer should identify
just the net.
Various places that we need the ni, we now use rxni or txni from
the message
This is part of
8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
LU-7734 lnet: Multi-Rail local NI split
Signed-off-by: NeilBrown <neilb at suse.com>
---
.../staging/lustre/include/linux/lnet/lib-lnet.h | 3 +
.../staging/lustre/include/linux/lnet/lib-types.h | 5 +-
drivers/staging/lustre/lnet/lnet/api-ni.c | 13 +++++
drivers/staging/lustre/lnet/lnet/lib-move.c | 49 +++++++++++---------
drivers/staging/lustre/lnet/lnet/lib-ptl.c | 2 -
drivers/staging/lustre/lnet/lnet/net_fault.c | 3 +
drivers/staging/lustre/lnet/lnet/peer.c | 26 ++++-------
drivers/staging/lustre/lnet/lnet/router.c | 14 +++---
drivers/staging/lustre/lnet/lnet/router_proc.c | 2 -
9 files changed, 67 insertions(+), 50 deletions(-)
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 4440b87299c4..34509e52bac7 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -435,6 +435,7 @@ int lnet_dyn_add_ni(lnet_pid_t requested_pid,
struct lnet_ioctl_config_data *conf);
int lnet_dyn_del_ni(__u32 net);
int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
+struct lnet_net *lnet_get_net_locked(__u32 net_id);
int lnet_islocalnid(lnet_nid_t nid);
int lnet_islocalnet(__u32 net);
@@ -617,7 +618,7 @@ int lnet_sock_connect(struct socket **sockp, int *fatal,
void libcfs_sock_release(struct socket *sock);
int lnet_peers_start_down(void);
-int lnet_peer_buffer_credits(struct lnet_ni *ni);
+int lnet_peer_buffer_credits(struct lnet_net *net);
int lnet_router_checker_start(void);
void lnet_router_checker_stop(void);
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 16a493529a46..255c6c4bbb89 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -396,7 +396,8 @@ struct lnet_peer {
time64_t lp_last_query; /* when lp_ni was queried
* last time
*/
- struct lnet_ni *lp_ni; /* interface peer is on */
+ /* network peer is on */
+ struct lnet_net *lp_net;
lnet_nid_t lp_nid; /* peer's NID */
int lp_refcount; /* # refs */
int lp_cpt; /* CPT this peer attached on */
@@ -427,7 +428,7 @@ struct lnet_peer_table {
* lnet_ni::ni_peertimeout has been set to a positive value
*/
#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \
- (lp)->lp_ni->ni_net->net_tunables.lct_peer_timeout > 0)
+ (lp)->lp_net->net_tunables.lct_peer_timeout > 0)
struct lnet_route {
struct list_head lr_list; /* chain on net */
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 05687278334a..c21aef32cdde 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -680,6 +680,19 @@ lnet_net2ni(__u32 net)
}
EXPORT_SYMBOL(lnet_net2ni);
+struct lnet_net *
+lnet_get_net_locked(__u32 net_id)
+{
+ struct lnet_net *net;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (net->net_id == net_id)
+ return net;
+ }
+
+ return NULL;
+}
+
static unsigned int
lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
{
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index b2a52ddcefcb..b8b15f56a275 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -525,7 +525,7 @@ lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
return 0;
deadline = lp->lp_last_alive +
- lp->lp_ni->ni_net->net_tunables.lct_peer_timeout;
+ lp->lp_net->net_tunables.lct_peer_timeout;
alive = deadline > now;
/* Update obsolete lp_alive except for routers assumed to be dead
@@ -544,7 +544,7 @@ lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
* may drop the lnet_net_lock
*/
static int
-lnet_peer_alive_locked(struct lnet_peer *lp)
+lnet_peer_alive_locked(struct lnet_ni *ni, struct lnet_peer *lp)
{
time64_t now = ktime_get_seconds();
@@ -570,13 +570,13 @@ lnet_peer_alive_locked(struct lnet_peer *lp)
libcfs_nid2str(lp->lp_nid),
now, next_query,
lnet_queryinterval,
- lp->lp_ni->ni_net->net_tunables.lct_peer_timeout);
+ lp->lp_net->net_tunables.lct_peer_timeout);
return 0;
}
}
/* query NI for latest aliveness news */
- lnet_ni_query_locked(lp->lp_ni, lp);
+ lnet_ni_query_locked(ni, lp);
if (lnet_peer_is_alive(lp, now))
return 1;
@@ -600,7 +600,7 @@ static int
lnet_post_send_locked(struct lnet_msg *msg, int do_send)
{
struct lnet_peer *lp = msg->msg_txpeer;
- struct lnet_ni *ni = lp->lp_ni;
+ struct lnet_ni *ni = msg->msg_txni;
int cpt = msg->msg_tx_cpt;
struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
@@ -611,7 +611,7 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
/* NB 'lp' is always the next hop */
if (!(msg->msg_target.pid & LNET_PID_USERFLAG) &&
- !lnet_peer_alive_locked(lp)) {
+ !lnet_peer_alive_locked(ni, lp)) {
the_lnet.ln_counters[cpt]->drop_count++;
the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
lnet_net_unlock(cpt);
@@ -770,7 +770,7 @@ lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
int cpt = msg->msg_rx_cpt;
lnet_net_unlock(cpt);
- lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
+ lnet_ni_recv(msg->msg_rxni, msg->msg_private, msg, 1,
0, msg->msg_len, msg->msg_len);
lnet_net_lock(cpt);
}
@@ -785,7 +785,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)
struct lnet_ni *txni = msg->msg_txni;
if (msg->msg_txcredit) {
- struct lnet_ni *ni = txpeer->lp_ni;
+ struct lnet_ni *ni = msg->msg_txni;
struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
/* give back NI txcredits */
@@ -800,7 +800,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)
struct lnet_msg, msg_list);
list_del(&msg2->msg_list);
- LASSERT(msg2->msg_txpeer->lp_ni == ni);
+ LASSERT(msg2->msg_txni == ni);
LASSERT(msg2->msg_tx_delayed);
(void)lnet_post_send_locked(msg2, 1);
@@ -869,7 +869,7 @@ lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
while(!list_empty(&drop)) {
msg = list_first_entry(&drop, struct lnet_msg, msg_list);
- lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
+ lnet_ni_recv(msg->msg_rxni, msg->msg_private, NULL,
0, 0, 0, msg->msg_hdr.payload_length);
list_del_init(&msg->msg_list);
lnet_finalize(NULL, msg, -ECANCELED);
@@ -1007,7 +1007,7 @@ lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
}
static struct lnet_peer *
-lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
+lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target,
lnet_nid_t rtr_nid)
{
struct lnet_remotenet *rnet;
@@ -1035,7 +1035,7 @@ lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
if (!lnet_is_route_alive(route))
continue;
- if (ni && lp->lp_ni != ni)
+ if (net && lp->lp_net != net)
continue;
if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
@@ -1164,10 +1164,12 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
/* ENOMEM or shutting down */
return rc;
}
- LASSERT(lp->lp_ni == src_ni);
+ LASSERT(lp->lp_net == src_ni->ni_net);
} else {
/* sending to a remote network */
- lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
+ lp = lnet_find_route_locked(src_ni != NULL ?
+ src_ni->ni_net : NULL,
+ dst_nid, rtr_nid);
if (!lp) {
if (src_ni)
lnet_ni_decref_locked(src_ni, cpt);
@@ -1203,10 +1205,11 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
lnet_msgtyp2str(msg->msg_type), msg->msg_len);
if (!src_ni) {
- src_ni = lp->lp_ni;
+ src_ni = lnet_get_next_ni_locked(lp->lp_net, NULL);
+ LASSERT(src_ni != NULL);
src_nid = src_ni->ni_nid;
} else {
- LASSERT(src_ni == lp->lp_ni);
+ LASSERT(src_ni->ni_net == lp->lp_net);
lnet_ni_decref_locked(src_ni, cpt);
}
@@ -1918,7 +1921,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
* called lnet_drop_message(), so I just hang onto msg as well
* until that's done
*/
- lnet_drop_message(msg->msg_rxpeer->lp_ni,
+ lnet_drop_message(msg->msg_rxni,
msg->msg_rxpeer->lp_cpt,
msg->msg_private, msg->msg_len);
/*
@@ -1926,7 +1929,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
* but we still should give error code so lnet_msg_decommit()
* can skip counters operations and other checks.
*/
- lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
+ lnet_finalize(msg->msg_rxni, msg, -ENOENT);
}
}
@@ -1959,7 +1962,7 @@ lnet_recv_delayed_msg_list(struct list_head *head)
msg->msg_hdr.msg.put.offset,
msg->msg_hdr.payload_length);
- lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
+ lnet_recv_put(msg->msg_rxni, msg);
}
}
@@ -2384,8 +2387,12 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
LASSERT(shortest);
hops = shortest_hops;
- if (srcnidp)
- *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
+ if (srcnidp) {
+ ni = lnet_get_next_ni_locked(
+ shortest->lr_gateway->lp_net,
+ NULL);
+ *srcnidp = ni->ni_nid;
+ }
if (orderp)
*orderp = order;
lnet_net_unlock(cpt);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
index fc47379c5938..4c5737083422 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
@@ -946,7 +946,7 @@ lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
/* grab all messages which are on the NI passed in */
list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
msg_list) {
- if (msg->msg_rxpeer->lp_ni == ni)
+ if (msg->msg_txni == ni || msg->msg_rxni == ni)
list_move(&msg->msg_list, &zombies);
}
} else {
diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c
index 41d6131ee15a..6c53ae1811e5 100644
--- a/drivers/staging/lustre/lnet/lnet/net_fault.c
+++ b/drivers/staging/lustre/lnet/lnet/net_fault.c
@@ -601,8 +601,9 @@ delayed_msg_process(struct list_head *msg_list, bool drop)
msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
LASSERT(msg->msg_rxpeer);
+ LASSERT(msg->msg_rxni != NULL);
- ni = msg->msg_rxpeer->lp_ni;
+ ni = msg->msg_rxni;
cpt = msg->msg_rx_cpt;
list_del_init(&msg->msg_list);
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
index b76ac3e051d9..ed29124ebded 100644
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ b/drivers/staging/lustre/lnet/lnet/peer.c
@@ -112,7 +112,7 @@ lnet_peer_table_cleanup_locked(struct lnet_ni *ni,
for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
lp_hashlist) {
- if (ni && ni != lp->lp_ni)
+ if (ni && ni->ni_net != lp->lp_net)
continue;
list_del_init(&lp->lp_hashlist);
/* Lose hash table's ref */
@@ -154,7 +154,7 @@ lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni,
for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
lp_hashlist) {
- if (ni != lp->lp_ni)
+ if (ni->ni_net != lp->lp_net)
continue;
if (!lp->lp_rtr_refcount)
@@ -230,8 +230,7 @@ lnet_destroy_peer_locked(struct lnet_peer *lp)
LASSERT(ptable->pt_number > 0);
ptable->pt_number--;
- lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
- lp->lp_ni = NULL;
+ lp->lp_net = NULL;
list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
LASSERT(ptable->pt_zombies > 0);
@@ -336,16 +335,11 @@ lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt)
goto out;
}
- lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
- if (!lp->lp_ni) {
- rc = -EHOSTUNREACH;
- goto out;
- }
-
- lp->lp_txcredits = lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
- lp->lp_mintxcredits = lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
- lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
- lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
+ lp->lp_net = lnet_get_net_locked(LNET_NIDNET(!lp->lp_nid));
+ lp->lp_txcredits =
+ lp->lp_mintxcredits = lp->lp_net->net_tunables.lct_peer_tx_credits;
+ lp->lp_rtrcredits =
+ lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_net);
list_add_tail(&lp->lp_hashlist,
&ptable->pt_hash[lnet_nid2peerhash(nid)]);
@@ -383,7 +377,7 @@ lnet_debug_peer(lnet_nid_t nid)
CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- aliveness, lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits,
+ aliveness, lp->lp_net->net_tunables.lct_peer_tx_credits,
lp->lp_rtrcredits, lp->lp_minrtrcredits,
lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
@@ -439,7 +433,7 @@ lnet_get_peer_info(__u32 peer_index, __u64 *nid,
*nid = lp->lp_nid;
*refcount = lp->lp_refcount;
*ni_peer_tx_credits =
- lp->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
+ lp->lp_net->net_tunables.lct_peer_tx_credits;
*peer_tx_credits = lp->lp_txcredits;
*peer_rtr_credits = lp->lp_rtrcredits;
*peer_min_rtr_credits = lp->lp_mintxcredits;
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 135dfe793b0b..72b8ca2b0fc6 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -55,10 +55,8 @@ module_param(auto_down, int, 0444);
MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
int
-lnet_peer_buffer_credits(struct lnet_ni *ni)
+lnet_peer_buffer_credits(struct lnet_net *net)
{
- struct lnet_net *net = ni->ni_net;
-
/* NI option overrides LNet default */
if (net->net_tunables.lct_peer_rtr_credits > 0)
return net->net_tunables.lct_peer_rtr_credits;
@@ -373,7 +371,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
lnet_add_route_to_rnet(rnet2, route);
- ni = route->lr_gateway->lp_ni;
+ ni = lnet_get_next_ni_locked(route->lr_gateway->lp_net, NULL);
lnet_net_unlock(LNET_LOCK_EX);
/* XXX Assume alive */
@@ -428,8 +426,8 @@ lnet_check_routes(void)
continue;
}
- if (route->lr_gateway->lp_ni ==
- route2->lr_gateway->lp_ni)
+ if (route->lr_gateway->lp_net ==
+ route2->lr_gateway->lp_net)
continue;
nid1 = route->lr_gateway->lp_nid;
@@ -952,6 +950,7 @@ lnet_ping_router_locked(struct lnet_peer *rtr)
struct lnet_rc_data *rcd = NULL;
time64_t now = ktime_get_seconds();
time64_t secs;
+ struct lnet_ni *ni;
lnet_peer_addref_locked(rtr);
@@ -960,7 +959,8 @@ lnet_ping_router_locked(struct lnet_peer *rtr)
lnet_notify_locked(rtr, 1, 0, now);
/* Run any outstanding notifications */
- lnet_ni_notify_locked(rtr->lp_ni, rtr);
+ ni = lnet_get_next_ni_locked(rtr->lp_net, NULL);
+ lnet_ni_notify_locked(ni, rtr);
if (!lnet_isrouter(rtr) ||
the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c
index 2a366e9a8627..52714b898aac 100644
--- a/drivers/staging/lustre/lnet/lnet/router_proc.c
+++ b/drivers/staging/lustre/lnet/lnet/router_proc.c
@@ -489,7 +489,7 @@ static int proc_lnet_peers(struct ctl_table *table, int write,
int nrefs = peer->lp_refcount;
time64_t lastalive = -1;
char *aliveness = "NA";
- int maxcr = peer->lp_ni->ni_net->net_tunables.lct_peer_tx_credits;
+ int maxcr = peer->lp_net->net_tunables.lct_peer_tx_credits;
int txcr = peer->lp_txcredits;
int mintxcr = peer->lp_mintxcredits;
int rtrcr = peer->lp_rtrcredits;
More information about the lustre-devel
mailing list