[lustre-devel] [PATCH 19/42] lnet: Support checking for MD leaks.

James Simmons jsimmons at infradead.org
Mon Oct 5 17:05:58 PDT 2020


From: Mr NeilBrown <neilb at suse.de>

Since we dropped the refcounting on LNetEQ we no longer get
confirmation that all MDs for a given handler a gone by the
time they should be.

So add lnet_assert_handler_unused() which searches the per-cpt
containers and ensures there are no MDs for a given handler, and call
that are the same place that we used to call LNetEQFree().

WC-bug-id: https://jira.whamcloud.com/browse/LU-13005
Lustre-commit: b7278ecc699b5 ("LU-13005 lnet: Support checking for MD leaks.")
Signed-off-by: Mr NeilBrown <neilb at suse.de>
Reviewed-on: https://review.whamcloud.com/38059
Reviewed-by: Chris Horn <chris.horn at hpe.com>
Reviewed-by: James Simmons <jsimmons at infradead.org>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 fs/lustre/ptlrpc/events.c |  1 +
 include/linux/lnet/api.h  |  2 ++
 net/lnet/lnet/api-ni.c    |  3 +++
 net/lnet/lnet/lib-md.c    | 19 +++++++++++++++++++
 net/lnet/lnet/peer.c      |  1 +
 net/lnet/selftest/rpc.c   |  1 +
 6 files changed, 27 insertions(+)

diff --git a/fs/lustre/ptlrpc/events.c b/fs/lustre/ptlrpc/events.c
index eef40b3..0943612 100644
--- a/fs/lustre/ptlrpc/events.c
+++ b/fs/lustre/ptlrpc/events.c
@@ -517,6 +517,7 @@ static void ptlrpc_ni_fini(void)
 	percpu_ref_kill(&ptlrpc_pending);
 	wait_for_completion(&ptlrpc_done);
 
+	lnet_assert_handler_unused(ptlrpc_handler);
 	LNetNIFini();
 }
 
diff --git a/include/linux/lnet/api.h b/include/linux/lnet/api.h
index 95805de..064c92e 100644
--- a/include/linux/lnet/api.h
+++ b/include/linux/lnet/api.h
@@ -126,6 +126,8 @@ int LNetMDBind(const struct lnet_md *md_in,
 	       struct lnet_handle_md *md_handle_out);
 
 int LNetMDUnlink(struct lnet_handle_md md_in);
+
+void lnet_assert_handler_unused(lnet_handler_t handler);
 /** @} lnet_md */
 
 /** \defgroup lnet_data Data movement operations
diff --git a/net/lnet/lnet/api-ni.c b/net/lnet/lnet/api-ni.c
index c90ab2e..0f325ec 100644
--- a/net/lnet/lnet/api-ni.c
+++ b/net/lnet/lnet/api-ni.c
@@ -1231,6 +1231,7 @@ struct list_head **
 		the_lnet.ln_mt_zombie_rstqs = NULL;
 	}
 
+	lnet_assert_handler_unused(the_lnet.ln_mt_handler);
 	the_lnet.ln_mt_handler = NULL;
 
 	lnet_portals_destroy();
@@ -1795,6 +1796,7 @@ struct lnet_ping_buffer *
 	lnet_ping_md_unlink(the_lnet.ln_ping_target,
 			    &the_lnet.ln_ping_target_md);
 
+	lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
 	lnet_ping_target_destroy();
 }
 
@@ -1969,6 +1971,7 @@ static void lnet_push_target_fini(void)
 	the_lnet.ln_push_target_nnis = 0;
 
 	LNetClearLazyPortal(LNET_RESERVED_PORTAL);
+	lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
 	the_lnet.ln_push_target_handler = NULL;
 }
 
diff --git a/net/lnet/lnet/lib-md.c b/net/lnet/lnet/lib-md.c
index e2c3e90..203c794 100644
--- a/net/lnet/lnet/lib-md.c
+++ b/net/lnet/lnet/lib-md.c
@@ -262,6 +262,25 @@ int lnet_cpt_of_md(struct lnet_libmd *md, unsigned int offset)
 	list_add(&md->md_list, &container->rec_active);
 }
 
+void lnet_assert_handler_unused(lnet_handler_t handler)
+{
+	struct lnet_res_container *container;
+	int cpt;
+
+	if (!handler)
+		return;
+
+	cfs_percpt_for_each(container, cpt, the_lnet.ln_md_containers) {
+		struct lnet_libmd *md;
+
+		lnet_res_lock(cpt);
+		list_for_each_entry(md, &container->rec_active, md_list)
+			LASSERT(md->md_handler != handler);
+		lnet_res_unlock(cpt);
+	}
+}
+EXPORT_SYMBOL(lnet_assert_handler_unused);
+
 /* must be called with lnet_res_lock held */
 void
 lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_event *ev)
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index 5ca6f68..3889310 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -3473,6 +3473,7 @@ static int lnet_peer_discovery(void *arg)
 	}
 	lnet_net_unlock(LNET_LOCK_EX);
 
+	lnet_assert_handler_unused(the_lnet.ln_dc_handler);
 	the_lnet.ln_dc_handler = NULL;
 
 	the_lnet.ln_dc_state = LNET_DC_STATE_SHUTDOWN;
diff --git a/net/lnet/selftest/rpc.c b/net/lnet/selftest/rpc.c
index a72e485..d012930 100644
--- a/net/lnet/selftest/rpc.c
+++ b/net/lnet/selftest/rpc.c
@@ -1672,6 +1672,7 @@ struct srpc_client_rpc *
 		rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
 		rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
 		LASSERT(!rc);
+		lnet_assert_handler_unused(srpc_data.rpc_lnet_handler);
 		/* fall through */
 	case SRPC_STATE_NI_INIT:
 		LNetNIFini();
-- 
1.8.3.1



More information about the lustre-devel mailing list