[lustre-devel] [PATCH 27/34] lnet: make it possible to add a new interface to a network
Doug Oucharek
doucharek at cray.com
Tue Sep 11 21:38:18 PDT 2018
Reviewed-by: Doug Oucharek <dougso at me.com>
Doug
On 9/6/18, 5:55 PM, "NeilBrown" <neilb at suse.com> wrote:
lnet_startup_lndnet() is enhanced to cope if the net already
exists.
This is part of
8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
LU-7734 lnet: Multi-Rail local NI split
Signed-off-by: NeilBrown <neilb at suse.com>
---
.../staging/lustre/include/linux/lnet/lib-lnet.h | 3 +
drivers/staging/lustre/lnet/lnet/api-ni.c | 69 +++++++++++++++-----
drivers/staging/lustre/lnet/lnet/config.c | 12 ++-
3 files changed, 61 insertions(+), 23 deletions(-)
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 6401d9a37b23..905213fc16c7 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -630,7 +630,8 @@ void lnet_swap_pinginfo(struct lnet_ping_info *info);
int lnet_parse_ip2nets(char **networksp, char *ip2nets);
int lnet_parse_routes(char *route_str, int *im_a_router);
int lnet_parse_networks(struct list_head *nilist, char *networks);
-bool lnet_net_unique(__u32 net, struct list_head *nilist);
+bool lnet_net_unique(__u32 net_id, struct list_head *nilist,
+ struct lnet_net **net);
int lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt);
struct lnet_peer *lnet_find_peer_locked(struct lnet_peer_table *ptable,
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 0dfd3004f735..042ab0d9e318 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1298,14 +1298,9 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
goto failed0;
}
- lnet_net_lock(LNET_LOCK_EX);
- /* refcount for ln_nis */
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_net->net_list, &the_lnet.ln_nets);
- lnet_net_unlock(LNET_LOCK_EX);
-
ni->ni_state = LNET_NI_STATE_ACTIVE;
+ /* We keep a reference on the loopback net through the loopback NI */
if (net->net_lnd->lnd_type == LOLND) {
lnet_ni_addref(ni);
LASSERT(!the_lnet.ln_loni);
@@ -1360,6 +1355,7 @@ static int
lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
{
struct lnet_ni *ni;
+ struct lnet_net *net_l = NULL;
struct list_head local_ni_list;
int rc;
int ni_count = 0;
@@ -1368,8 +1364,14 @@ lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
INIT_LIST_HEAD(&local_ni_list);
- /* Make sure this new NI is unique. */
- if (lnet_net_unique(net->net_id, &the_lnet.ln_nets)) {
+ /*
+ * make sure that this net is unique. If it isn't then
+ * we are adding interfaces to an already existing network, and
+ * 'net' is just a convenient way to pass in the list.
+ * if it is unique we need to find the LND and load it if
+ * necessary.
+ */
+ if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
lnd_type = LNET_NETTYP(net->net_id);
LASSERT(libcfs_isknown_lnd(lnd_type));
@@ -1400,23 +1402,41 @@ lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
net->net_lnd = lnd;
mutex_unlock(&the_lnet.ln_lnd_mutex);
- } else {
- if (lnd_type == LOLND) {
- lnet_net_free(net);
- return 0;
- }
- CERROR("Net %s is not unique\n",
- libcfs_net2str(net->net_id));
- rc = -EEXIST;
- goto failed0;
+ net_l = net;
}
+ /*
+ * net_l: if the network being added is unique then net_l
+ * will point to that network
+ * if the network being added is not unique then
+ * net_l points to the existing network.
+ *
+ * When we enter the loop below, we'll pick NIs off he
+ * network beign added and start them up, then add them to
+ * a local ni list. Once we've successfully started all
+ * the NIs then we join the local NI list (of started up
+ * networks) with the net_l->net_ni_list, which should
+ * point to the correct network to add the new ni list to
+ *
+ * If any of the new NIs fail to start up, then we want to
+ * iterate through the local ni list, which should include
+ * any NIs which were successfully started up, and shut
+ * them down.
+ *
+ * After than we want to delete the network being added,
+ * to avoid a memory leak.
+ */
+
while (!list_empty(&net->net_ni_added)) {
ni = list_entry(net->net_ni_added.next, struct lnet_ni,
ni_netlist);
list_del_init(&ni->ni_netlist);
+ /* adjust the pointer the parent network, just in case it
+ * the net is a duplicate */
+ ni->ni_net = net_l;
+
rc = lnet_startup_lndni(ni, tun);
if (rc < 0)
@@ -1427,9 +1447,22 @@ lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
ni_count++;
}
+
lnet_net_lock(LNET_LOCK_EX);
- list_splice_tail(&local_ni_list, &net->net_ni_list);
+ list_splice_tail(&local_ni_list, &net_l->net_ni_list);
lnet_net_unlock(LNET_LOCK_EX);
+
+ /* if the network is not unique then we don't want to keep
+ * it around after we're done. Free it. Otherwise add that
+ * net to the global the_lnet.ln_nets */
+ if (net_l != net && net_l != NULL) {
+ lnet_net_free(net);
+ } else {
+ lnet_net_lock(LNET_LOCK_EX);
+ list_add_tail(&net->net_list, &the_lnet.ln_nets);
+ lnet_net_unlock(LNET_LOCK_EX);
+ }
+
return ni_count;
failed1:
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index f886dcfc6d6e..fcae50676422 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -79,13 +79,17 @@ lnet_issep(char c)
}
bool
-lnet_net_unique(__u32 net, struct list_head *netlist)
+lnet_net_unique(__u32 net_id, struct list_head *netlist,
+ struct lnet_net **net)
{
- struct lnet_net *net_l;
+ struct lnet_net *net_l;
list_for_each_entry(net_l, netlist, net_list) {
- if (net_l->net_id == net)
+ if (net_l->net_id == net_id) {
+ if (net != NULL)
+ *net = net_l;
return false;
+ }
}
return true;
@@ -309,7 +313,7 @@ lnet_net_alloc(__u32 net_id, struct list_head *net_list)
{
struct lnet_net *net;
- if (!lnet_net_unique(net_id, net_list)) {
+ if (!lnet_net_unique(net_id, net_list, NULL)) {
CERROR("Duplicate net %s. Ignore\n",
libcfs_net2str(net_id));
return NULL;
More information about the lustre-devel
mailing list