[lustre-devel] [PATCH 07/15] lnet: socklnd: detect link state to set fatal error on ni

James Simmons jsimmons at infradead.org
Wed Jul 7 12:11:08 PDT 2021


From: Serguei Smirnov <ssmirnov at whamcloud.com>

To help avoid selecting lnet ni which corresponds to a downed
ethernet link for sending, add a mechanism for detecting link
events in socklnd. On link up/down events, find corresponding
ni and toggle ni_fatal_error_on flag, similar to o2iblnd way.

WC-bug-id: https://jira.whamcloud.com/browse/LU-14742
Lustre-commit: fc2df80e96dc5db9f ("LU-14742 socklnd: detect link state to set fatal error on ni")
Signed-off-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/43952
Reviewed-by: Amir Shehata <ashehata at whamcloud.com>
Reviewed-by: James Simmons <jsimmons at infradead.org>
Reviewed-by: Chris Horn <chris.horn at hpe.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 net/lnet/klnds/socklnd/socklnd.c | 78 ++++++++++++++++++++++++++++++++++++++++
 net/lnet/klnds/socklnd/socklnd.h |  1 +
 2 files changed, 79 insertions(+)

diff --git a/net/lnet/klnds/socklnd/socklnd.c b/net/lnet/klnds/socklnd/socklnd.c
index eb8c736..e15f1c0 100644
--- a/net/lnet/klnds/socklnd/socklnd.c
+++ b/net/lnet/klnds/socklnd/socklnd.c
@@ -1843,6 +1843,78 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 	}
 }
 
+static int ksocknal_get_link_status(struct net_device *dev)
+{
+	int ret = -1;
+
+	LASSERT(dev);
+
+	if (!netif_running(dev))
+		ret = 0;
+	/* Some devices may not be providing link settings */
+	else if (dev->ethtool_ops->get_link)
+		ret = dev->ethtool_ops->get_link(dev);
+
+	return ret;
+}
+
+static int
+ksocknal_handle_link_state_change(struct net_device *dev,
+				  unsigned char operstate)
+{
+	struct lnet_ni *ni;
+	struct ksock_net *net;
+	struct ksock_net *cnxt;
+	int ifindex;
+	unsigned char link_down = !(operstate == IF_OPER_UP);
+
+	ifindex = dev->ifindex;
+
+	if (!ksocknal_data.ksnd_nnets)
+		goto out;
+
+	list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets,
+				 ksnn_list) {
+		if (net->ksnn_interface.ksni_index != ifindex)
+			continue;
+		ni = net->ksnn_ni;
+		if (link_down)
+			atomic_set(&ni->ni_fatal_error_on, link_down);
+		else
+			atomic_set(&ni->ni_fatal_error_on,
+				   (ksocknal_get_link_status(dev) == 0));
+	}
+out:
+	return 0;
+}
+
+
+/************************************
+ * Net device notifier event handler
+ ************************************/
+static int ksocknal_device_event(struct notifier_block *unused,
+				 unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	unsigned char operstate;
+
+	operstate = dev->operstate;
+
+	switch (event) {
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+	case NETDEV_CHANGE:
+		ksocknal_handle_link_state_change(dev, operstate);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ksocknal_notifier_block = {
+	.notifier_call = ksocknal_device_event,
+};
+
 static void
 ksocknal_base_shutdown(void)
 {
@@ -1852,6 +1924,9 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 
 	LASSERT(!ksocknal_data.ksnd_nnets);
 
+	if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL)
+		unregister_netdevice_notifier(&ksocknal_notifier_block);
+
 	switch (ksocknal_data.ksnd_init) {
 	default:
 		LASSERT(0);
@@ -2015,6 +2090,8 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 		goto failed;
 	}
 
+	register_netdevice_notifier(&ksocknal_notifier_block);
+
 	/* flag everything initialised */
 	ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
 
@@ -2297,6 +2374,7 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
 				ntohl(((struct sockaddr_in *)&ksi->ksni_addr)->sin_addr.s_addr));
 	list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
+	net->ksnn_ni = ni;
 	ksocknal_data.ksnd_nnets++;
 
 	return 0;
diff --git a/net/lnet/klnds/socklnd/socklnd.h b/net/lnet/klnds/socklnd/socklnd.h
index dac8559..357769a 100644
--- a/net/lnet/klnds/socklnd/socklnd.h
+++ b/net/lnet/klnds/socklnd/socklnd.h
@@ -175,6 +175,7 @@ struct ksock_net {
 	struct list_head	ksnn_list;		/* chain on global list */
 	atomic_t		ksnn_npeers;		/* # peers */
 	struct ksock_interface	ksnn_interface;		/* IP interface */
+	struct lnet_ni		*ksnn_ni;
 };
 
 /* When the ksock_net is shut down, this bias is added to
-- 
1.8.3.1



More information about the lustre-devel mailing list