[lustre-devel] [PATCH 04/15] lnet: socklnd: lock ksnc_tx_queue list processing
James Simmons
jsimmons at infradead.org
Mon Nov 8 07:07:32 PST 2021
From: Artem Blagodarenko <artem.blagodarenko at hpe.com>
A GFP occurred in the ksocknal_find_timed_out_conn() while processing
ksnc_tx_queue list.
Add locking to this list.
HPE-bug-id: LUS-10248
Fixes: 3f8b895465 ("lnet: handle socklnd tx failure")
WC-bug-id: https://jira.whamcloud.com/browse/LU-15076
Lustre-commit: 13c7c2e3c248c8cdb ("LU-15076 socklnd: lock ksnc_tx_queue list processing")
Signed-off-by: Artem Blagodarenko <artem.blagodarenko at hpe.com>
Reviewed-by: Chris Horn <hornc at cray.com>
Reviewed-by: Alexander Boyko <alexander.boyko at hpe.com>
Reviewed-on: https://review.whamcloud.com/45179
Reviewed-by: Chris Horn <chris.horn at hpe.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
net/lnet/klnds/socklnd/socklnd_cb.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/lnet/klnds/socklnd/socklnd_cb.c b/net/lnet/klnds/socklnd/socklnd_cb.c
index edc584a..b2a1267 100644
--- a/net/lnet/klnds/socklnd/socklnd_cb.c
+++ b/net/lnet/klnds/socklnd/socklnd_cb.c
@@ -2188,12 +2188,14 @@ void ksocknal_write_callback(struct ksock_conn *conn)
/* We're called with a shared lock on ksnd_global_lock */
struct ksock_conn *conn;
struct ksock_tx *tx;
+ struct ksock_sched *sched;
list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) {
int error;
/* Don't need the {get,put}connsock dance to deref ksnc_sock */
LASSERT(!conn->ksnc_closing);
+ sched = conn->ksnc_scheduler;
error = conn->ksnc_sock->sk->sk_err;
if (error) {
@@ -2234,6 +2236,7 @@ void ksocknal_write_callback(struct ksock_conn *conn)
return conn;
}
+ spin_lock_bh(&sched->kss_lock);
if ((!list_empty(&conn->ksnc_tx_queue) ||
conn->ksnc_sock->sk->sk_wmem_queued) &&
ktime_get_seconds() >= conn->ksnc_tx_deadline) {
@@ -2249,8 +2252,10 @@ void ksocknal_write_callback(struct ksock_conn *conn)
CNETERR("Timeout sending data to %s (%pISp) the network or that node may be down.\n",
libcfs_idstr(&peer_ni->ksnp_id),
&conn->ksnc_peeraddr);
+ spin_unlock_bh(&sched->kss_lock);
return conn;
}
+ spin_unlock_bh(&sched->kss_lock);
}
return NULL;
--
1.8.3.1
More information about the lustre-devel
mailing list