[lustre-devel] [PATCH 12/34] lnet: split lnet_startup_lndni
NeilBrown
neilb at suse.com
Thu Sep 6 17:49:31 PDT 2018
Split into
lnet_startup_lndnet
which starts all nis in a net, and
lnet_startup_lndni
which starts an individual ni.
lnet_startup_lndni() returns 0 on success, or -ve error.
lnet_startup_lndnis() returned the count of interfaces started.
The new lnet_startup_lndnet() returns the count of started interfaces,
This requires adding lnet_shutdown_lndnet() to handle errors
in lnet_dyn_add_ni(), which now uses the new lnet_startup_lndnet().
We now drop the ln_lnd_mutex near the end of lnet_startup_lndnet(),
and re-claim it for each lnet_startup_lndni().
This is part of
8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
LU-7734 lnet: Multi-Rail local NI split
Signed-off-by: NeilBrown <neilb at suse.com>
---
drivers/staging/lustre/lnet/lnet/api-ni.c | 142 +++++++++++++++++++++++------
1 file changed, 111 insertions(+), 31 deletions(-)
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 53ecfd700db3..8afddf11b5e2 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1239,32 +1239,61 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
lnet_net_unlock(LNET_LOCK_EX);
}
+static void
+lnet_shutdown_lndnet(struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+
+ lnet_net_lock(LNET_LOCK_EX);
+
+ list_del_init(&net->net_list);
+
+ while (!list_empty(&net->net_ni_list)) {
+ ni = list_entry(net->net_ni_list.next,
+ struct lnet_ni, ni_netlist);
+ lnet_net_unlock(LNET_LOCK_EX);
+ lnet_shutdown_lndni(ni);
+ lnet_net_lock(LNET_LOCK_EX);
+ }
+
+ /*
+ * decrement ref count on lnd only when the entire network goes
+ * away
+ */
+ net->net_lnd->lnd_refcount--;
+
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ lnet_net_free(net);
+}
+
static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun);
+
+static int
+lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
{
- int rc = -EINVAL;
- int lnd_type;
- struct lnet_lnd *lnd;
- struct lnet_tx_queue *tq;
- int i;
- u32 seed;
+ struct lnet_ni *ni;
+ __u32 lnd_type;
+ struct lnet_lnd *lnd;
+ int rc;
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+ lnd_type = LNET_NETTYP(net->net_id);
LASSERT(libcfs_isknown_lnd(lnd_type));
/* Make sure this new NI is unique. */
lnet_net_lock(LNET_LOCK_EX);
- rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nets);
+ rc = lnet_net_unique(net->net_id, &the_lnet.ln_nets);
lnet_net_unlock(LNET_LOCK_EX);
if (!rc) {
if (lnd_type == LOLND) {
- lnet_ni_free(ni);
+ lnet_net_free(net);
return 0;
}
CERROR("Net %s is not unique\n",
- libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+ libcfs_net2str(net->net_id));
rc = -EEXIST;
goto failed0;
}
@@ -1291,8 +1320,32 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
lnet_net_lock(LNET_LOCK_EX);
lnd->lnd_refcount++;
lnet_net_unlock(LNET_LOCK_EX);
+ net->net_lnd = lnd;
+ mutex_unlock(&the_lnet.ln_lnd_mutex);
- ni->ni_net->net_lnd = lnd;
+ ni = list_first_entry(&net->net_ni_list, struct lnet_ni, ni_netlist);
+
+ rc = lnet_startup_lndni(ni, tun);
+ if (rc < 0)
+ return rc;
+ return 1;
+
+failed0:
+ lnet_net_free(net);
+
+ return rc;
+}
+
+static int
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+{
+ int rc = -EINVAL;
+ struct lnet_tx_queue *tq;
+ int i;
+ struct lnet_net *net = ni->ni_net;
+ u32 seed;
+
+ mutex_lock(&the_lnet.ln_lnd_mutex);
if (tun) {
memcpy(&ni->ni_lnd_tunables, tun,
@@ -1300,15 +1353,15 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
ni->ni_lnd_tunables_set = true;
}
- rc = lnd->lnd_startup(ni);
+ rc = net->net_lnd->lnd_startup(ni);
mutex_unlock(&the_lnet.ln_lnd_mutex);
if (rc) {
LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
- rc, libcfs_lnd2str(lnd->lnd_type));
+ rc, libcfs_lnd2str(net->net_lnd->lnd_type));
lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount--;
+ net->net_lnd->lnd_refcount--;
lnet_net_unlock(LNET_LOCK_EX);
goto failed0;
}
@@ -1324,7 +1377,7 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
lnet_net_unlock(LNET_LOCK_EX);
- if (lnd->lnd_type == LOLND) {
+ if (net->net_lnd->lnd_type == LOLND) {
lnet_ni_addref(ni);
LASSERT(!the_lnet.ln_loni);
the_lnet.ln_loni = ni;
@@ -1338,7 +1391,7 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
if (!ni->ni_net->net_tunables.lct_peer_tx_credits ||
!ni->ni_net->net_tunables.lct_max_tx_credits) {
LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
- libcfs_lnd2str(lnd->lnd_type),
+ libcfs_lnd2str(net->net_lnd->lnd_type),
!ni->ni_net->net_tunables.lct_peer_tx_credits ?
"" : "per-peer ");
/*
@@ -1375,21 +1428,22 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
}
static int
-lnet_startup_lndnis(struct list_head *nilist)
+lnet_startup_lndnets(struct list_head *netlist)
{
- struct lnet_ni *ni;
+ struct lnet_net *net;
int rc;
int ni_count = 0;
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, struct lnet_ni, ni_netlist);
- list_del(&ni->ni_netlist);
- rc = lnet_startup_lndni(ni, NULL);
+ while (!list_empty(netlist)) {
+ net = list_entry(netlist->next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+
+ rc = lnet_startup_lndnet(net, NULL);
if (rc < 0)
goto failed;
- ni_count++;
+ ni_count += rc;
}
return ni_count;
@@ -1552,7 +1606,7 @@ LNetNIInit(lnet_pid_t requested_pid)
goto err_empty_list;
}
- ni_count = lnet_startup_lndnis(&net_head);
+ ni_count = lnet_startup_lndnets(&net_head);
if (ni_count < 0) {
rc = ni_count;
goto err_empty_list;
@@ -1831,10 +1885,11 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
struct lnet_ping_info *pinfo;
struct lnet_handle_md md_handle;
struct lnet_net *net;
- struct lnet_ni *ni;
struct list_head net_head;
struct lnet_remotenet *rnet;
int rc;
+ int num_acceptor_nets;
+ __u32 net_type;
struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
INIT_LIST_HEAD(&net_head);
@@ -1876,22 +1931,47 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
goto failed0;
list_del_init(&net->net_list);
+
if (lnd_tunables)
memcpy(&net->net_tunables,
&lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
- ni = list_first_entry(&net->net_ni_list, struct lnet_ni, ni_netlist);
- rc = lnet_startup_lndni(ni, (lnd_tunables ?
+ /*
+ * before starting this network get a count of the current TCP
+ * networks which require the acceptor thread running. If that
+ * count is == 0 before we start up this network, then we'd want to
+ * start up the acceptor thread after starting up this network
+ */
+ num_acceptor_nets = lnet_count_acceptor_nets();
+
+ /*
+ * lnd_startup_lndnet() can deallocate 'net' even if it it returns
+ * success, because we endded up adding interfaces to an existing
+ * network. So grab the net_type now
+ */
+ net_type = LNET_NETTYP(net->net_id);
+
+ rc = lnet_startup_lndnet(net, (lnd_tunables ?
&lnd_tunables->lt_tun : NULL));
if (rc < 0)
goto failed1;
- if (ni->ni_net->net_lnd->lnd_accept) {
+ /*
+ * Start the acceptor thread if this is the first network
+ * being added that requires the thread.
+ */
+ if (net_type == SOCKLND && num_acceptor_nets == 0) {
rc = lnet_acceptor_start();
if (rc < 0) {
- /* shutdown the ni that we just started */
+ /* shutdown the net that we just started */
CERROR("Failed to start up acceptor thread\n");
- lnet_shutdown_lndni(ni);
+ /*
+ * Note that if we needed to start the acceptor
+ * thread, then 'net' must have been the first TCP
+ * network, therefore was unique, and therefore
+ * wasn't deallocated by lnet_startup_lndnet()
+ */
+ lnet_shutdown_lndnet(net);
goto failed1;
}
}
More information about the lustre-devel
mailing list