[lustre-devel] [PATCH 07/24] lustre: lov: Cache stripe offset calculation
James Simmons
jsimmons at infradead.org
Thu Jan 13 17:37:46 PST 2022
From: Patrick Farrell <farr0186 at gmail.com>
Calculating the page offset relative to the stripe (etc)
in a file is surprisingly expensive. Because i/o has
already been split up to stripes by the cl_io code,
calculating the stripe each time is unnecessary.
We cache most of the values requiring calculation.
This improves AIO/DIO page submission significantly,
improving performance by a bit over 10%.
Also remove lpg_generation, which isn't doing anything
useful. This suggests the possibility of removing
lov_page, but that's for another patch.
This patch reduces i/o time in ms/GiB by:
Write: 17 ms/GiB
Read: 22 ms/GiB
Totals:
Write: 119 ms/GiB
Read: 121 ms/GiB
mpirun -np 1 $IOR -w -r -t 64M -b 64G -o ./iorfile --posix.odirect
With previous patches in series:
write 7531 MiB/s
read 7179 MiB/s
Plus this patch:
write 8637 MiB/s
read 8488 MiB/s
WC-bug-id: https://jira.whamcloud.com/browse/LU-13799
Lustre-commit: 14db1faa0fbe813fe ("LU-13799 lov: Cache stripe offset calculation")
Signed-off-by: Patrick Farrell <farr0186 at gmail.com>
Reviewed-on: https://review.whamcloud.com/39445
Reviewed-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-by: Yingjin Qian <qian at ddn.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
fs/lustre/lov/lov_cl_internal.h | 9 +++++--
fs/lustre/lov/lov_io.c | 6 +++++
fs/lustre/lov/lov_page.c | 57 +++++++++++++++++++++++++++++++----------
3 files changed, 57 insertions(+), 15 deletions(-)
diff --git a/fs/lustre/lov/lov_cl_internal.h b/fs/lustre/lov/lov_cl_internal.h
index d48e2df3..42fd10a 100644
--- a/fs/lustre/lov/lov_cl_internal.h
+++ b/fs/lustre/lov/lov_cl_internal.h
@@ -453,8 +453,6 @@ struct lov_lock {
struct lov_page {
struct cl_page_slice lps_cl;
- /* the layout gen when this page was created */
- u32 lps_layout_gen;
};
/*
@@ -524,6 +522,7 @@ struct lov_io_sub {
/**
* IO state private for LOV.
*/
+#define LIS_CACHE_ENTRY_NONE -ENOENT
struct lov_io {
/** super-class */
struct cl_io_slice lis_cl;
@@ -590,6 +589,12 @@ struct lov_io {
* All sub-io's created in this lov_io.
*/
struct list_head lis_subios;
+ /* Cached results from stripe & offset calculations for page init */
+ int lis_cached_entry;
+ int lis_cached_stripe;
+ loff_t lis_cached_off;
+ loff_t lis_cached_suboff;
+ struct lov_io_sub *lis_cached_sub;
};
struct lov_session {
diff --git a/fs/lustre/lov/lov_io.c b/fs/lustre/lov/lov_io.c
index 8df13ee..904bafd 100644
--- a/fs/lustre/lov/lov_io.c
+++ b/fs/lustre/lov/lov_io.c
@@ -467,6 +467,7 @@ static int lov_io_slice_init(struct lov_io *lio, struct lov_object *obj,
io->ci_result = 0;
lio->lis_object = obj;
+ lio->lis_cached_entry = LIS_CACHE_ENTRY_NONE;
switch (io->ci_type) {
case CIT_READ:
@@ -1053,6 +1054,11 @@ static void lov_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
{
int rc;
+ /* Before ending each i/o, we must set lis_cached_entry to tell the
+ * next i/o not to use stale cached lis information.
+ */
+ cl2lov_io(env, ios)->lis_cached_entry = LIS_CACHE_ENTRY_NONE;
+
rc = lov_io_call(env, cl2lov_io(env, ios), lov_io_end_wrapper);
LASSERT(rc == 0);
}
diff --git a/fs/lustre/lov/lov_page.c b/fs/lustre/lov/lov_page.c
index fdc415b..16bd7cd 100644
--- a/fs/lustre/lov/lov_page.c
+++ b/fs/lustre/lov/lov_page.c
@@ -56,8 +56,7 @@ static int lov_comp_page_print(const struct lu_env *env,
struct lov_page *lp = cl2lov_page(slice);
return (*printer)(env, cookie,
- LUSTRE_LOV_NAME "-page@%p, gen: %u\n",
- lp, lp->lps_layout_gen);
+ LUSTRE_LOV_NAME"-page@%p\n", lp);
}
static const struct cl_page_operations lov_comp_page_ops = {
@@ -74,33 +73,65 @@ int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj,
struct cl_object *o;
struct lov_io_sub *sub;
struct lov_page *lpg = cl_object_page_slice(obj, page);
+ bool stripe_cached = false;
u64 offset;
u64 suboff;
- int stripe;
int entry;
+ int stripe;
int rc;
+ /* Direct i/o (CPT_TRANSIENT) is split strictly to stripes, so we can
+ * cache the stripe information. Buffered i/o is differently
+ * organized, and stripe calculation isn't a significant cost for
+ * buffered i/o, so we only cache this for direct i/o.
+ */
+ stripe_cached = lio->lis_cached_entry != LIS_CACHE_ENTRY_NONE &&
+ page->cp_type == CPT_TRANSIENT;
+
offset = cl_offset(obj, index);
- entry = lov_io_layout_at(lio, offset);
+
+ if (stripe_cached) {
+ entry = lio->lis_cached_entry;
+ stripe = lio->lis_cached_stripe;
+ /* Offset can never go backwards in an i/o, so this is valid */
+ suboff = lio->lis_cached_suboff + offset - lio->lis_cached_off;
+ } else {
+ entry = lov_io_layout_at(lio, offset);
+
+ stripe = lov_stripe_number(loo->lo_lsm, entry, offset);
+ rc = lov_stripe_offset(loo->lo_lsm, entry, offset, stripe,
+ &suboff);
+ LASSERT(rc == 0);
+ lio->lis_cached_entry = entry;
+ lio->lis_cached_stripe = stripe;
+ lio->lis_cached_off = offset;
+ lio->lis_cached_suboff = suboff;
+ }
+
if (entry < 0 || !lsm_entry_inited(loo->lo_lsm, entry)) {
/* non-existing layout component */
lov_page_init_empty(env, obj, page, index);
return 0;
}
- r0 = lov_r0(loo, entry);
- stripe = lov_stripe_number(loo->lo_lsm, entry, offset);
- LASSERT(stripe < r0->lo_nr);
- rc = lov_stripe_offset(loo->lo_lsm, entry, offset, stripe, &suboff);
- LASSERT(rc == 0);
+ CDEBUG(D_PAGE, "offset %llu, entry %d, stripe %d, suboff %llu\n",
+ offset, entry, stripe, suboff);
page->cp_lov_index = lov_comp_index(entry, stripe);
- lpg->lps_layout_gen = loo->lo_lsm->lsm_layout_gen;
cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_comp_page_ops);
- sub = lov_sub_get(env, lio, page->cp_lov_index);
- if (IS_ERR(sub))
- return PTR_ERR(sub);
+ if (!stripe_cached) {
+ sub = lov_sub_get(env, lio, page->cp_lov_index);
+ if (IS_ERR(sub))
+ return PTR_ERR(sub);
+ } else {
+ sub = lio->lis_cached_sub;
+ }
+
+ lio->lis_cached_sub = sub;
+
+ r0 = lov_r0(loo, entry);
+ LASSERT(stripe < r0->lo_nr);
subobj = lovsub2cl(r0->lo_sub[stripe]);
cl_object_for_each(o, subobj) {
--
1.8.3.1
More information about the lustre-devel
mailing list