[lustre-devel] [PATCH 45/49] lnet: Recover peer NI w/exponential backoff interval
James Simmons
jsimmons at infradead.org
Wed Apr 14 21:02:37 PDT 2021
From: Chris Horn <chris.horn at hpe.com>
Perform LNet recovery pings of peer NIs with an exponential backoff
interval.
- The interval is equal to 2^(number failed pings) up to a maximum
of 900 seconds (15 minutes).
- When a message is received the count of failed pings for the
associated peer NI is reset to 0 so that recovery can happen more
quickly.
HPE-bug-id: LUS-9109
WC-bug-id: https://jira.whamcloud.com/browse/LU-13569
Lustre-commit: 917553c537a8860 ("LU-13569 lnet: Recover peer NI w/exponential backoff interval")
Signed-off-by: Chris Horn <chris.horn at hpe.com>
Reviewed-on: https://review.whamcloud.com/39720
Reviewed-by: Neil Brown <neilb at suse.de>
Reviewed-by: Alexander Boyko <alexander.boyko at hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
include/linux/lnet/lib-lnet.h | 22 ++++++++++++++++++++++
include/linux/lnet/lib-types.h | 6 ++++++
net/lnet/lnet/lib-move.c | 8 ++++++++
net/lnet/lnet/lib-msg.c | 6 +++++-
net/lnet/lnet/peer.c | 11 ++++++++++-
5 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/include/linux/lnet/lib-lnet.h b/include/linux/lnet/lib-lnet.h
index e30d0c4..8b369dd 100644
--- a/include/linux/lnet/lib-lnet.h
+++ b/include/linux/lnet/lib-lnet.h
@@ -910,6 +910,28 @@ int lnet_get_peer_ni_info(u32 peer_index, u64 *nid,
return false;
}
+#define LNET_RECOVERY_INTERVAL_MAX 900
+static inline unsigned int
+lnet_get_next_recovery_ping(unsigned int ping_count, time64_t now)
+{
+ unsigned int interval;
+
+ /* 2^9 = 512, 2^10 = 1024 */
+ if (ping_count > 9)
+ interval = LNET_RECOVERY_INTERVAL_MAX;
+ else
+ interval = 1 << ping_count;
+
+ return now + interval;
+}
+
+static inline void
+lnet_peer_ni_set_next_ping(struct lnet_peer_ni *lpni, time64_t now)
+{
+ lpni->lpni_next_ping =
+ lnet_get_next_recovery_ping(lpni->lpni_ping_count, now);
+}
+
/*
* A peer NI is alive if it satisfies the following two conditions:
* 1. peer NI health >= LNET_MAX_HEALTH_VALUE * router_sensitivity_percentage
diff --git a/include/linux/lnet/lib-types.h b/include/linux/lnet/lib-types.h
index cc451cf..af8f61e 100644
--- a/include/linux/lnet/lib-types.h
+++ b/include/linux/lnet/lib-types.h
@@ -573,6 +573,12 @@ struct lnet_peer_ni {
atomic_t lpni_healthv;
/* recovery ping mdh */
struct lnet_handle_md lpni_recovery_ping_mdh;
+ /* When to send the next recovery ping */
+ time64_t lpni_next_ping;
+ /* How many pings sent during current recovery period did not receive
+ * a reply. NB: reset whenever _any_ message arrives from this peer NI
+ */
+ unsigned int lpni_ping_count;
/* CPT this peer attached on */
int lpni_cpt;
/* state flags -- protected by lpni_lock */
diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index bdcba54..ad1517d 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -3398,6 +3398,12 @@ struct lnet_mt_event_info {
}
spin_unlock(&lpni->lpni_lock);
+
+ if (now < lpni->lpni_next_ping) {
+ lnet_net_unlock(0);
+ continue;
+ }
+
lnet_net_unlock(0);
/* NOTE: we're racing with peer deletion from user space.
@@ -3446,6 +3452,8 @@ struct lnet_mt_event_info {
continue;
}
+ lpni->lpni_ping_count++;
+
lpni->lpni_recovery_ping_mdh = mdh;
lnet_peer_ni_add_to_recoveryq_locked(lpni,
&processed_list,
diff --git a/net/lnet/lnet/lib-msg.c b/net/lnet/lnet/lib-msg.c
index 2e8fea7..0a4a317 100644
--- a/net/lnet/lnet/lib-msg.c
+++ b/net/lnet/lnet/lib-msg.c
@@ -863,8 +863,11 @@
switch (hstatus) {
case LNET_MSG_STATUS_OK:
- /* increment the local ni health weather we successfully
+ /* increment the local ni health whether we successfully
* received or sent a message on it.
+ *
+ * Ping counts are reset to 0 as appropriate to allow for
+ * faster recovery.
*/
lnet_inc_healthv(&ni->ni_healthv, lnet_health_sensitivity);
/* It's possible msg_txpeer is NULL in the LOLND
@@ -875,6 +878,7 @@
* as indication that the router is fully healthy.
*/
if (lpni && msg->msg_rx_committed) {
+ lpni->lpni_ping_count = 0;
/* If we're receiving a message from the router or
* I'm a router, then set that lpni's health to
* maximum so we can commence communication
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index f9af5da..15fcb5e 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -4006,14 +4006,23 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
CDEBUG(D_NET, "lpni %s aged out last alive %lld\n",
libcfs_nid2str(lpni->lpni_nid),
lpni->lpni_last_alive);
+ /* Reset the ping count so that if this peer NI is added back to
+ * the recovery queue we will send the first ping right away.
+ */
+ lpni->lpni_ping_count = 0;
return;
}
/* This peer NI is going on the recovery queue, so take a ref on it */
lnet_peer_ni_addref_locked(lpni);
- CDEBUG(D_NET, "%s added to recovery queue. last alive: %lld health: %d\n",
+ lnet_peer_ni_set_next_ping(lpni, now);
+
+ CDEBUG(D_NET,
+ "%s added to recovery queue. ping count: %u next ping: %lld last alive: %lld health: %d\n",
libcfs_nid2str(lpni->lpni_nid),
+ lpni->lpni_ping_count,
+ lpni->lpni_next_ping,
lpni->lpni_last_alive,
atomic_read(&lpni->lpni_healthv));
--
1.8.3.1
More information about the lustre-devel
mailing list