[lustre-devel] [PATCH 10/24] lnet: socklnd: decrement connection counters on close

James Simmons jsimmons at infradead.org
Thu Jan 13 17:37:49 PST 2022


From: Serguei Smirnov <ssmirnov at whamcloud.com>

To gracefully handle potential race with delayed connection create,
decrement connection counters per type as connections are being
closed.

Fixes: 511ace4a ("lnet: socklnd: add conns_per_peer parameter")
WC-bug-id: https://jira.whamcloud.com/browse/LU-15137
Lustre-commit: 7e26413aa85fdc931 ("LU-15137 socklnd: decrement connection counters on close")
Signed-off-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/45422
Reviewed-by: Amir Shehata <ashehata at whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage at whamcloud.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 net/lnet/klnds/socklnd/socklnd.c | 69 ++++++++++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 9 deletions(-)

diff --git a/net/lnet/klnds/socklnd/socklnd.c b/net/lnet/klnds/socklnd/socklnd.c
index b014aa8..6d1f85c 100644
--- a/net/lnet/klnds/socklnd/socklnd.c
+++ b/net/lnet/klnds/socklnd/socklnd.c
@@ -422,7 +422,9 @@ struct ksock_peer_ni *
 	switch (type) {
 	case SOCKLND_CONN_CONTROL:
 		conn_cb->ksnr_ctrl_conn_count++;
-		/* there's a single control connection per peer */
+		/* there's a single control connection per peer,
+		 * two in case of loopback
+		 */
 		conn_cb->ksnr_connected |= BIT(type);
 		break;
 	case SOCKLND_CONN_BULK_IN:
@@ -449,6 +451,45 @@ struct ksock_peer_ni *
 }
 
 static void
+ksocknal_decr_conn_count(struct ksock_conn_cb *conn_cb,
+			 int type)
+{
+	conn_cb->ksnr_conn_count--;
+
+	/* check if all connections of the given type got created */
+	switch (type) {
+	case SOCKLND_CONN_CONTROL:
+		conn_cb->ksnr_ctrl_conn_count--;
+		/* there's a single control connection per peer,
+		 * two in case of loopback
+		 */
+		if (conn_cb->ksnr_ctrl_conn_count == 0)
+			conn_cb->ksnr_connected &= ~BIT(type);
+		break;
+	case SOCKLND_CONN_BULK_IN:
+		conn_cb->ksnr_blki_conn_count--;
+		if (conn_cb->ksnr_blki_conn_count < conn_cb->ksnr_max_conns)
+			conn_cb->ksnr_connected &= ~BIT(type);
+		break;
+	case SOCKLND_CONN_BULK_OUT:
+		conn_cb->ksnr_blko_conn_count--;
+		if (conn_cb->ksnr_blko_conn_count < conn_cb->ksnr_max_conns)
+			conn_cb->ksnr_connected &= ~BIT(type);
+		break;
+	case SOCKLND_CONN_ANY:
+		if (conn_cb->ksnr_conn_count < conn_cb->ksnr_max_conns)
+			conn_cb->ksnr_connected &= ~BIT(type);
+		break;
+	default:
+		LBUG();
+		break;
+	}
+
+	CDEBUG(D_NET, "Del conn type %d, ksnr_connected %x ksnr_max_conns %d\n",
+	       type, conn_cb->ksnr_connected, conn_cb->ksnr_max_conns);
+}
+
+static void
 ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb,
 				  struct ksock_conn *conn)
 {
@@ -1249,6 +1290,8 @@ struct ksock_peer_ni *
 	struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
 	struct ksock_conn_cb *conn_cb;
 	struct ksock_conn *conn2;
+	int conn_count;
+	int duplicate_count = 0;
 
 	LASSERT(!peer_ni->ksnp_error);
 	LASSERT(!conn->ksnc_closing);
@@ -1262,21 +1305,29 @@ struct ksock_peer_ni *
 		/* dissociate conn from cb... */
 		LASSERT(!conn_cb->ksnr_deleted);
 
+		conn_count = ksocknal_get_conn_count_by_type(conn_cb,
+							     conn->ksnc_type);
 		/* connected bit is set only if all connections
 		 * of the given type got created
 		 */
-		if (ksocknal_get_conn_count_by_type(conn_cb, conn->ksnc_type) ==
-		    conn_cb->ksnr_max_conns)
+		if (conn_count == conn_cb->ksnr_max_conns)
 			LASSERT((conn_cb->ksnr_connected &
 				BIT(conn->ksnc_type)) != 0);
 
-		list_for_each_entry(conn2, &peer_ni->ksnp_conns, ksnc_list) {
-			if (conn2->ksnc_conn_cb == conn_cb &&
-			    conn2->ksnc_type == conn->ksnc_type)
-				goto conn2_found;
+		if (conn_count == 1) {
+			list_for_each_entry(conn2, &peer_ni->ksnp_conns,
+					    ksnc_list) {
+				if (conn2->ksnc_conn_cb == conn_cb &&
+				    conn2->ksnc_type == conn->ksnc_type)
+					duplicate_count += 1;
+			}
+			if (duplicate_count > 0)
+				CERROR("Found %d duplicate conns type %d\n",
+				       duplicate_count,
+				       conn->ksnc_type);
 		}
-		conn_cb->ksnr_connected &= ~BIT(conn->ksnc_type);
-conn2_found:
+		ksocknal_decr_conn_count(conn_cb, conn->ksnc_type);
+
 		conn->ksnc_conn_cb = NULL;
 
 		/* drop conn's ref on route */
-- 
1.8.3.1



More information about the lustre-devel mailing list