[lustre-devel] [PATCH 04/15] lnet: Add health ping stats

James Simmons jsimmons at infradead.org
Wed Jul 7 12:11:05 PDT 2021


From: Chris Horn <chris.horn at hpe.com>

Add the NI and peer NI ping count and next ping timestamp to
detailed output of lnetctl peer and net output.

HPE-bug-id: LUS-9109
WC-bug-id: https://jira.whamcloud.com/browse/LU-13569
Lustre-commit: 4c7e4aa576296603 ("LU-13569 lnet: Add health ping stats")
Signed-off-by: Chris Horn <chris.horn at hpe.com>
Reviewed-on: https://review.whamcloud.com/40314
Reviewed-by: Alexander Boyko <alexander.boyko at hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 include/uapi/linux/lnet/lnet-dlc.h | 4 ++++
 net/lnet/lnet/api-ni.c             | 2 ++
 net/lnet/lnet/peer.c               | 7 +++++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/lnet/lnet-dlc.h b/include/uapi/linux/lnet/lnet-dlc.h
index b375d0a..c1c063f 100644
--- a/include/uapi/linux/lnet/lnet-dlc.h
+++ b/include/uapi/linux/lnet/lnet-dlc.h
@@ -191,6 +191,8 @@ struct lnet_ioctl_local_ni_hstats {
 	__u32 hlni_local_timeout;
 	__u32 hlni_local_error;
 	__s32 hlni_health_value;
+	__u32 hlni_ping_count;
+	__u64 hlni_next_ping;
 };
 
 struct lnet_ioctl_peer_ni_hstats {
@@ -199,6 +201,8 @@ struct lnet_ioctl_peer_ni_hstats {
 	__u32 hlpni_remote_error;
 	__u32 hlpni_network_timeout;
 	__s32 hlpni_health_value;
+	__u32 hlpni_ping_count;
+	__u64 hlpni_next_ping;
 };
 
 struct lnet_ioctl_element_msg_stats {
diff --git a/net/lnet/lnet/api-ni.c b/net/lnet/lnet/api-ni.c
index d6a8c1b..e52bb41 100644
--- a/net/lnet/lnet/api-ni.c
+++ b/net/lnet/lnet/api-ni.c
@@ -3634,6 +3634,8 @@ u32 lnet_get_dlc_seq_locked(void)
 		atomic_read(&ni->ni_hstats.hlt_local_error);
 	stats->hlni_health_value =
 		atomic_read(&ni->ni_healthv);
+	stats->hlni_ping_count = ni->ni_ping_count;
+	stats->hlni_next_ping = ni->ni_next_ping;
 
 unlock:
 	lnet_net_unlock(cpt);
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index 2fc784d..76b2d2f 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -3986,6 +3986,8 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
 			atomic_read(&lpni->lpni_hstats.hlt_remote_error);
 		lpni_hstats->hlpni_health_value =
 			atomic_read(&lpni->lpni_healthv);
+		lpni_hstats->hlpni_ping_count = lpni->lpni_ping_count;
+		lpni_hstats->hlpni_next_ping = lpni->lpni_next_ping;
 		if (copy_to_user(bulk, lpni_hstats, sizeof(*lpni_hstats)))
 			goto out_free_hstats;
 		bulk += sizeof(*lpni_hstats);
@@ -4081,7 +4083,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
 			lnet_net_unlock(LNET_LOCK_EX);
 			return;
 		}
-		atomic_set(&lpni->lpni_healthv, value);
+		lnet_set_lpni_healthv_locked(lpni, value);
 		lnet_peer_ni_add_to_recoveryq_locked(lpni,
 						     &the_lnet.ln_mt_peerNIRecovq, now);
 		lnet_peer_ni_decref_locked(lpni);
@@ -4102,7 +4104,8 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
 					    lpn_peer_nets) {
 				list_for_each_entry(lpni, &lpn->lpn_peer_nis,
 						    lpni_peer_nis) {
-					atomic_set(&lpni->lpni_healthv, value);
+					lnet_set_lpni_healthv_locked(lpni,
+								     value);
 					lnet_peer_ni_add_to_recoveryq_locked(lpni,
 									     &the_lnet.ln_mt_peerNIRecovq,
 									     now);
-- 
1.8.3.1



More information about the lustre-devel mailing list