[lustre-devel] [PATCH 04/14] lnet: Deprecate lnet_recovery_interval

James Simmons jsimmons at infradead.org
Mon May 3 17:10:06 PDT 2021


From: Chris Horn <chris.horn at hpe.com>

We no longer use a static recovery interval, so remove its remaining
uses and add warning that it has been deprecated.

HPE-bug-id: LUS-9109
C-bug-id: https://jira.whamcloud.com/browse/LU-13569
Lustre-commit: 79ab0535622782c82 ("LU-13569 lnet: Deprecate lnet_recovery_interval")
Signed-off-by: Chris Horn <chris.horn at hpe.com>
Reviewed-on: https://review.whamcloud.com/39722
Reviewed-by: Neil Brown <neilb at suse.de>
Reviewed-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-by: James Simmons <jsimmons at infradead.org>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 net/lnet/lnet/api-ni.c   | 26 ++------------------------
 net/lnet/lnet/lib-move.c | 19 +++----------------
 2 files changed, 5 insertions(+), 40 deletions(-)

diff --git a/net/lnet/lnet/api-ni.c b/net/lnet/lnet/api-ni.c
index cc40040..d6a8c1b 100644
--- a/net/lnet/lnet/api-ni.c
+++ b/net/lnet/lnet/api-ni.c
@@ -110,7 +110,7 @@ static int recovery_interval_set(const char *val,
 		__param_check(name, p, int)
 module_param(lnet_recovery_interval, recovery_interval, 0644);
 MODULE_PARM_DESC(lnet_recovery_interval,
-		 "Interval to recover unhealthy interfaces in seconds");
+		 "DEPRECATED Interval to recover unhealthy interfaces in seconds");
 
 unsigned int lnet_recovery_limit;
 module_param(lnet_recovery_limit, uint, 0644);
@@ -253,29 +253,7 @@ static int lnet_discover(struct lnet_process_id id, u32 force,
 static int
 recovery_interval_set(const char *val, const struct kernel_param *kp)
 {
-	int rc;
-	unsigned int *interval = (unsigned int *)kp->arg;
-	unsigned long value;
-
-	rc = kstrtoul(val, 0, &value);
-	if (rc) {
-		CERROR("Invalid module parameter value for 'lnet_recovery_interval'\n");
-		return rc;
-	}
-
-	if (value < 1) {
-		CERROR("lnet_recovery_interval must be at least 1 second\n");
-		return -EINVAL;
-	}
-
-	/* The purpose of locking the api_mutex here is to ensure that
-	 * the correct value ends up stored properly.
-	 */
-	mutex_lock(&the_lnet.ln_api_mutex);
-
-	*interval = value;
-
-	mutex_unlock(&the_lnet.ln_api_mutex);
+	CWARN("'lnet_recovery_interval' has been deprecated\n");
 
 	return 0;
 }
diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index 46c88d0..cb0943e 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -3480,9 +3480,7 @@ struct lnet_mt_event_info {
 static int
 lnet_monitor_thread(void *arg)
 {
-	time64_t recovery_timeout = 0;
 	time64_t rsp_timeout = 0;
-	int interval;
 	time64_t now;
 
 	wait_for_completion(&the_lnet.ln_started);
@@ -3509,11 +3507,8 @@ struct lnet_mt_event_info {
 			rsp_timeout = now + (lnet_transaction_timeout / 2);
 		}
 
-		if (now >= recovery_timeout) {
-			lnet_recover_local_nis();
-			lnet_recover_peer_nis();
-			recovery_timeout = now + lnet_recovery_interval;
-		}
+		lnet_recover_local_nis();
+		lnet_recover_peer_nis();
 
 		/* TODO do we need to check if we should sleep without
 		 * timeout?  Technically, an active system will always
@@ -3522,17 +3517,9 @@ struct lnet_mt_event_info {
 		 * if we wake up every 1 second? Although, we've seen
 		 * cases where we get a complaint that an idle thread
 		 * is waking up unnecessarily.
-		 *
-		 * Take into account the current net_count when you wake
-		 * up for alive router checking, since we need to check
-		 * possibly as many networks as we have configured.
 		 */
-		interval = min(lnet_recovery_interval,
-			       min((unsigned int)alive_router_check_interval /
-					lnet_current_net_count,
-				   lnet_transaction_timeout / 2));
 		wait_for_completion_interruptible_timeout(&the_lnet.ln_mt_wait_complete,
-							  interval * HZ);
+							  HZ);
 		/* Must re-init the completion before testing anything,
 		 * including ln_mt_state.
 		 */
-- 
1.8.3.1



More information about the lustre-devel mailing list