[lustre-devel] [PATCH 10/29] lustre: mdc: set fid2path RPC interruptible

James Simmons jsimmons at infradead.org
Sun Apr 25 13:08:17 PDT 2021


From: Lai Siyao <lai.siyao at whamcloud.com>

Sometimes OI scrub can't fix the inconsistency in FID and name, and
server will return -EINPROGRESS for fid2path request. Upon such
failure, client will keep resending the request. Set such request
to be interruptible to avoid deadlock.

WC-bug-id: https://jira.whamcloud.com/browse/LU-14119
Lustre-commit: bf47526261067153 ("LU-14119 mdc: set fid2path RPC interruptible")
Signed-off-by: Lai Siyao <lai.siyao at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/41219
Reviewed-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 fs/lustre/include/lustre_net.h |  4 +++-
 fs/lustre/mdc/mdc_request.c    |  7 +++++++
 fs/lustre/ptlrpc/client.c      | 35 +++++++++++++++++++++++++++++++----
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/fs/lustre/include/lustre_net.h b/fs/lustre/include/lustre_net.h
index 2b98468..abd16ea 100644
--- a/fs/lustre/include/lustre_net.h
+++ b/fs/lustre/include/lustre_net.h
@@ -445,6 +445,7 @@ struct ptlrpc_request_set {
 	set_producer_func	set_producer;
 	/** opaq argument passed to the producer callback */
 	void			*set_producer_arg;
+	unsigned int		set_allow_intr:1;
 };
 
 struct ptlrpc_bulk_desc;
@@ -825,7 +826,8 @@ struct ptlrpc_request {
 		rq_allow_replay:1,
 		/* bulk request, sent to server, but uncommitted */
 		rq_unstable:1,
-		rq_early_free_repbuf:1; /* free reply buffer in advance */
+		rq_early_free_repbuf:1, /* free reply buffer in advance */
+		rq_allow_intr:1;
 	/** @} */
 
 	/** server-side flags @{ */
diff --git a/fs/lustre/mdc/mdc_request.c b/fs/lustre/mdc/mdc_request.c
index ef27af6..6ac3a39 100644
--- a/fs/lustre/mdc/mdc_request.c
+++ b/fs/lustre/mdc/mdc_request.c
@@ -2293,6 +2293,13 @@ static int mdc_get_info_rpc(struct obd_export *exp,
 			     RCL_SERVER, vallen);
 	ptlrpc_request_set_replen(req);
 
+	/* if server failed to resolve FID, and OI scrub not able to fix it, it
+	 * will return -EINPROGRESS, ptlrpc_queue_wait() will keep retrying,
+	 * set request interruptible to avoid deadlock.
+	 */
+	if (KEY_IS(KEY_FID2PATH))
+		req->rq_allow_intr = 1;
+
 	rc = ptlrpc_queue_wait(req);
 	/* -EREMOTE means the get_info result is partial, and it needs to
 	 * continue on another MDT, see fid2path part in lmv_iocontrol
diff --git a/fs/lustre/ptlrpc/client.c b/fs/lustre/ptlrpc/client.c
index 04e8fec..3c57b69 100644
--- a/fs/lustre/ptlrpc/client.c
+++ b/fs/lustre/ptlrpc/client.c
@@ -1127,6 +1127,9 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
 	LASSERT(req->rq_import->imp_state != LUSTRE_IMP_IDLE);
 	LASSERT(list_empty(&req->rq_set_chain));
 
+	if (req->rq_allow_intr)
+		set->set_allow_intr = 1;
+
 	/* The set takes over the caller's request reference */
 	list_add_tail(&req->rq_set_chain, &set->set_requests);
 	req->rq_set = set;
@@ -1725,6 +1728,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 	list_for_each_entry_safe(req, next, &set->set_requests, rq_set_chain) {
 		struct obd_import *imp = req->rq_import;
 		int unregistered = 0;
+		int async = 1;
 		int rc = 0;
 
 		/*
@@ -1736,6 +1740,24 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 		 */
 		cond_resched();
 
+		/*
+		 * If the caller requires to allow to be interpreted by force
+		 * and it has really been interpreted, then move the request
+		 * to RQ_PHASE_INTERPRET phase in spite of what the current
+		 * phase is.
+		 */
+		if (unlikely(req->rq_allow_intr && req->rq_intr)) {
+			req->rq_status = -EINTR;
+			ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+
+			/*
+			 * Since it is interpreted and we have to wait for
+			 * the reply to be unlinked, then use sync mode.
+			 */
+			async = 0;
+			goto interpret;
+		}
+
 		if (req->rq_phase == RQ_PHASE_NEW &&
 		    ptlrpc_send_new_req(req)) {
 			force_timer_recalc = 1;
@@ -2067,13 +2089,13 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 		 * This moves to "unregistering" phase we need to wait for
 		 * reply unlink.
 		 */
-		if (!unregistered && !ptlrpc_unregister_reply(req, 1)) {
+		if (!unregistered && !ptlrpc_unregister_reply(req, async)) {
 			/* start async bulk unlink too */
 			ptlrpc_unregister_bulk(req, 1);
 			continue;
 		}
 
-		if (!ptlrpc_unregister_bulk(req, 1))
+		if (!ptlrpc_unregister_bulk(req, async))
 			continue;
 
 		/* When calling interpret receive should already be finished. */
@@ -2271,8 +2293,12 @@ static void ptlrpc_interrupted_set(struct ptlrpc_request_set *set)
 
 	CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set);
 	list_for_each_entry(req, &set->set_requests, rq_set_chain) {
+		if (req->rq_intr)
+			continue;
+
 		if (req->rq_phase != RQ_PHASE_RPC &&
-		    req->rq_phase != RQ_PHASE_UNREG_RPC)
+		    req->rq_phase != RQ_PHASE_UNREG_RPC &&
+		    !req->rq_allow_intr)
 			continue;
 
 		spin_lock(&req->rq_lock);
@@ -2368,7 +2394,8 @@ int ptlrpc_set_wait(const struct lu_env *env, struct ptlrpc_request_set *set)
 		CDEBUG(D_RPCTRACE, "set %p going to sleep for %lld seconds\n",
 		       set, timeout);
 
-		if (timeout == 0 && !signal_pending(current)) {
+		if ((timeout == 0 && !signal_pending(current)) ||
+		    set->set_allow_intr) {
 			/*
 			 * No requests are in-flight (ether timed out
 			 * or delayed), so we can allow interrupts.
-- 
1.8.3.1



More information about the lustre-devel mailing list