[lustre-devel] [PATCH 46/50] lnet: o2iblnd: avoid memory copy for short msg

James Simmons jsimmons at infradead.org
Sun Mar 20 06:31:00 PDT 2022


From: Alexey Lyashkov <alexey.lyashkov at hpe.com>

Modern cards allow to send a kernel memory data without mapping
or copy to the preallocated buffer.
It reduce a lnet selftest cpu consumption by 3% for messages
less than 4k size.

HPE-bug-id: LUS-1796
WC-bug-id: https://jira.whamcloud.com/browse/LU-14008
Lustre-commit: bebd87cc6c9acc577 ("LU-14008 o2iblnd: avoid memory copy for short msg")
Signed-off-by: Alexey Lyashkov <alexey.lyashkov at hpe.com>
Reviewed-on: https://review.whamcloud.com/40262
Reviewed-by: Chris Horn <chris.horn at hpe.com>
Reviewed-by: Alexander Boyko <alexander.boyko at hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 net/lnet/klnds/o2iblnd/o2iblnd.c    |  3 +-
 net/lnet/klnds/o2iblnd/o2iblnd.h    |  3 ++
 net/lnet/klnds/o2iblnd/o2iblnd_cb.c | 63 ++++++++++++++++++++++++++++---------
 3 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.c b/net/lnet/klnds/o2iblnd/o2iblnd.c
index 9ce6082..8dce4179 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.c
@@ -628,10 +628,9 @@ static unsigned int kiblnd_send_wrs(struct kib_conn *conn)
 	 */
 	int ret;
 	int multiplier = 1 + conn->ibc_max_frags;
-	enum kib_dev_caps dev_caps = conn->ibc_hdev->ibh_dev->ibd_dev_caps;
 
 	/* FastReg needs two extra WRs for map and invalidate */
-	if (dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED)
+	if (IS_FAST_REG_DEV(conn->ibc_hdev->ibh_dev))
 		multiplier += 2;
 
 	/* account for a maximum of ibc_queue_depth in-flight transfers */
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.h b/net/lnet/klnds/o2iblnd/o2iblnd.h
index 5a4b4f8..e798695 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.h
@@ -149,6 +149,9 @@ enum kib_dev_caps {
 	IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT	= BIT(1),
 };
 
+#define IS_FAST_REG_DEV(dev) \
+	((dev)->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED)
+
 struct kib_dev {
 	struct list_head	ibd_list;	/* chain on kib_devs */
 	struct list_head	ibd_fail_list;	/* chain on kib_failed_devs */
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd_cb.c b/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 983599f..a88939e7 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -42,8 +42,11 @@
 static void kiblnd_peer_alive(struct kib_peer_ni *peer_ni);
 static void kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active,
 				       int error);
-static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx,
-			       int type, int body_nob);
+static struct ib_rdma_wr *
+kiblnd_init_tx_msg_payload(struct lnet_ni *ni, struct kib_tx *tx,
+			   int type, int body_nob, int payload_nob);
+#define kiblnd_init_tx_msg(ni, tx, type, body) \
+	kiblnd_init_tx_msg_payload(ni, tx, type, body, 0)
 static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
 			    int resid, struct kib_rdma_desc *dstrd,
 			    u64 dstcookie);
@@ -572,7 +575,7 @@ static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
 	 * in trying to map the memory, because it'll just fail. So
 	 * preemptively fail with an appropriate message
 	 */
-	if ((dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED) &&
+	if (IS_FAST_REG_DEV(dev) &&
 	    !(dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT) &&
 	    tx->tx_gaps) {
 		CERROR("Using FastReg with no GAPS support, but tx has gaps\n");
@@ -1021,9 +1024,9 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
 	tx->tx_nsge++;
 }
 
-static void
-kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type,
-		   int body_nob)
+static struct ib_rdma_wr *
+kiblnd_init_tx_msg_payload(struct lnet_ni *ni, struct kib_tx *tx, int type,
+			   int body_nob, int payload)
 {
 	struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
 	int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
@@ -1032,7 +1035,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
 	LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
 	LASSERT(nob <= IBLND_MSG_SIZE);
 
-	kiblnd_init_msg(tx->tx_msg, type, body_nob);
+	kiblnd_init_msg(tx->tx_msg, type, body_nob + payload);
 
 	*wrq = (struct ib_rdma_wr) {
 		.wr = {
@@ -1047,6 +1050,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
 	kiblnd_init_tx_sge(tx, tx->tx_msgaddr, nob);
 
 	tx->tx_nwrq++;
+	return wrq;
 }
 
 static int
@@ -1654,15 +1658,44 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
 	ibmsg = tx->tx_msg;
 	lnet_hdr_to_nid4(hdr, &ibmsg->ibm_u.immediate.ibim_hdr);
 
-	rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob,
-			    &from);
-	if (rc != payload_nob) {
-		kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
-		return -EFAULT;
-	}
+	if (payload_nob) {
+		struct ib_rdma_wr *wrq;
+		int i;
+
+		nob = offsetof(struct kib_immediate_msg, ibim_payload[0]);
+		wrq = kiblnd_init_tx_msg_payload(ni, tx, IBLND_MSG_IMMEDIATE,
+						 nob, payload_nob);
+
+		rd = tx->tx_rd;
+		rc = kiblnd_setup_rd_kiov(ni, tx, rd,
+					  payload_niov, payload_kiov,
+					  payload_offset, payload_nob);
+		if (rc != 0) {
+			CERROR("Can't setup IMMEDIATE src for %s: %d\n",
+			       libcfs_nidstr(&target->nid), rc);
+			kiblnd_tx_done(tx);
+			return -EIO;
+		}
 
-	nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
-	kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
+		/* lets generate a SGE chain */
+		for (i = 0; i < rd->rd_nfrags; i++) {
+			kiblnd_init_tx_sge(tx, rd->rd_frags[i].rf_addr,
+					   rd->rd_frags[i].rf_nob);
+			wrq->wr.num_sge++;
+		}
+	} else {
+		rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload,
+				    payload_nob, &from);
+		if (rc != payload_nob) {
+			kiblnd_pool_free_node(&tx->tx_pool->tpo_pool,
+					      &tx->tx_list);
+			return -EFAULT;
+		}
+
+		nob = offsetof(struct kib_immediate_msg,
+			       ibim_payload[payload_nob]);
+		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
+	}
 
 	/* finalise lntmsg on completion */
 	tx->tx_lntmsg[0] = lntmsg;
-- 
1.8.3.1



More information about the lustre-devel mailing list