[lustre-devel] [PATCH 08/19] lnet: o2iblnd: Fix logic for unaligned transfer
James Simmons
jsimmons at infradead.org
Sun Nov 28 15:27:43 PST 2021
From: Chris Horn <chris.horn at hpe.com>
It's possible for there to be an offset for the first page of a
transfer. However, there are two bugs with this code in o2iblnd.
The first is that this use-case will require LNET_MAX_IOV + 1 local
RDMA fragments, but we do not specify the correct corresponding values
for the max page list to ib_alloc_fast_reg_page_list(),
ib_alloc_fast_reg_mr(), etc.
The second issue is that the logic in kiblnd_setup_rd_kiov() attempts
to obtain one more scatterlist entry than is actually needed. This
causes the transfer to fail with -EFAULT.
HPE-bug-id: LUS-10407
WC-bug-id: https://jira.whamcloud.com/browse/LU-15092
Lustre-commit: 23a2c92f203ff2f39 ("LU-15092 o2iblnd: Fix logic for unaligned transfer")
Signed-off-by: Chris Horn <chris.horn at hpe.com>
Reviewed-on: https://review.whamcloud.com/45216
Reviewed-by: James Simmons <jsimmons at infradead.org>
Reviewed-by: Andriy Skulysh <andriy.skulysh at hpe.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
net/lnet/klnds/o2iblnd/o2iblnd.c | 2 +-
net/lnet/klnds/o2iblnd/o2iblnd.h | 6 ++++--
net/lnet/klnds/o2iblnd/o2iblnd_cb.c | 15 +++++++++------
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.c b/net/lnet/klnds/o2iblnd/o2iblnd.c
index 36d26b2..9cdc12a 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.c
@@ -1392,7 +1392,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps,
frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd,
fastreg_gaps ? IB_MR_TYPE_SG_GAPS :
IB_MR_TYPE_MEM_REG,
- LNET_MAX_IOV);
+ IBLND_MAX_RDMA_FRAGS);
if (IS_ERR(frd->frd_mr)) {
rc = PTR_ERR(frd->frd_mr);
CERROR("Failed to allocate ib_alloc_mr: %d\n", rc);
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.h b/net/lnet/klnds/o2iblnd/o2iblnd.h
index 5066f7b..21f8981 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.h
@@ -112,8 +112,10 @@ struct kib_tunables {
#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */
+/* max size of queued messages (inc hdr) */
+#define IBLND_MSG_SIZE (4 << 10)
+/* max # of fragments supported. + 1 for unaligned case */
+#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_IOV + 1)
/************************/
/* derived constants... */
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd_cb.c b/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
index a053e7d..db13f41 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -662,6 +662,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
struct scatterlist *sg;
int fragnob;
int max_nkiov;
+ int sg_count = 0;
CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
@@ -682,6 +683,12 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
do {
LASSERT(nkiov > 0);
+ if (!sg) {
+ CERROR("lacking enough sg entries to map tx\n");
+ return -EFAULT;
+ }
+ sg_count++;
+
fragnob = min((int)(kiov->bv_len - offset), nob);
/* We're allowed to start at a non-aligned page offset in
@@ -700,10 +707,6 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
sg_set_page(sg, kiov->bv_page, fragnob,
kiov->bv_offset + offset);
sg = sg_next(sg);
- if (!sg) {
- CERROR("lacking enough sg entries to map tx\n");
- return -EFAULT;
- }
offset = 0;
kiov++;
@@ -711,7 +714,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
nob -= fragnob;
} while (nob > 0);
- return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
+ return kiblnd_map_tx(ni, tx, rd, sg_count);
}
static int
@@ -1008,7 +1011,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
LASSERT(tx->tx_nwrq >= 0);
- LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
+ LASSERT(tx->tx_nwrq <= IBLND_MAX_RDMA_FRAGS);
LASSERT(nob <= IBLND_MSG_SIZE);
kiblnd_init_msg(tx->tx_msg, type, body_nob);
--
1.8.3.1
More information about the lustre-devel
mailing list