[lustre-devel] [PATCH 25/41] lustre: lov: fixes bitfield in lod qos code

James Simmons jsimmons at infradead.org
Mon Apr 5 00:50:54 PST 2021


From: Rahul Deshmkuh <rahul.deshmukh at seagate.com>

Updating bitfields in struct lod_qos struct is protected
by lq_rw_sem in most places but an update can be lost
due unprotected bitfield access from
lod_qos_thresholdrr_seq_write() and qos_prio_free_store().
This patch fixes it by replacing bitfields with named bits
and atomic bitops.

Cray-bug-id: LUS-4651
WC-bug-id: https://jira.whamcloud.com/browse/LU-7853
Lustre-commit: 3bae39f0a5b98a2 ("LU-7853 lod: fixes bitfield in lod qos code")
Signed-off-by: Rahul Deshmukh <rahul.deshmukh at seagate.com>
Signed-off-by: Alexander Zarochentsev <c17826 at cray.com>
Reviewed-on: https://review.whamcloud.com/18812
Reviewed-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev at hpe.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 fs/lustre/include/lu_object.h     | 17 +++++++++++------
 fs/lustre/lmv/lmv_obd.c           |  2 +-
 fs/lustre/lmv/lproc_lmv.c         |  6 +++---
 fs/lustre/obdclass/lu_tgt_descs.c | 38 ++++++++++++++++++++------------------
 4 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/fs/lustre/include/lu_object.h b/fs/lustre/include/lu_object.h
index 6c47f43..34610d4 100644
--- a/fs/lustre/include/lu_object.h
+++ b/fs/lustre/include/lu_object.h
@@ -1405,6 +1405,15 @@ struct lu_kmem_descr {
 extern u32 lu_context_tags_default;
 extern u32 lu_session_tags_default;
 
+/* bitflags used in rr / qos allocation */
+enum lq_flag {
+	LQ_DIRTY	= 0,	/* recalc qos data */
+	LQ_SAME_SPACE,		/* the OSTs all have approx.
+				 * the same space avail
+				 */
+	LQ_RESET,		/* zero current penalties */
+};
+
 /* round-robin QoS data for LOD/LMV */
 struct lu_qos_rr {
 	spinlock_t		 lqr_alloc;	/* protect allocation index */
@@ -1412,7 +1421,7 @@ struct lu_qos_rr {
 	u32			 lqr_offset_idx;/* aliasing for start_idx */
 	int			 lqr_start_count;/* reseed counter */
 	struct lu_tgt_pool	 lqr_pool;	/* round-robin optimized list */
-	unsigned long		 lqr_dirty:1;	/* recalc round-robin list */
+	unsigned long		 lqr_flags;
 };
 
 /* QoS data per MDS/OSS */
@@ -1482,11 +1491,7 @@ struct lu_qos {
 	unsigned int		 lq_prio_free;	 /* priority for free space */
 	unsigned int		 lq_threshold_rr;/* priority for rr */
 	struct lu_qos_rr	 lq_rr;		 /* round robin qos data */
-	unsigned long		 lq_dirty:1,	 /* recalc qos data */
-				 lq_same_space:1,/* the servers all have approx.
-						  * the same space avail
-						  */
-				 lq_reset:1;	 /* zero current penalties */
+	unsigned long		lq_flags;
 };
 
 struct lu_tgt_descs {
diff --git a/fs/lustre/lmv/lmv_obd.c b/fs/lustre/lmv/lmv_obd.c
index d845118..747786e 100644
--- a/fs/lustre/lmv/lmv_obd.c
+++ b/fs/lustre/lmv/lmv_obd.c
@@ -1303,7 +1303,7 @@ static int lmv_statfs_update(void *cookie, int rc)
 		tgt->ltd_statfs = *osfs;
 		tgt->ltd_statfs_age = ktime_get_seconds();
 		spin_unlock(&lmv->lmv_lock);
-		lmv->lmv_qos.lq_dirty = 1;
+		set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags);
 	}
 
 	return rc;
diff --git a/fs/lustre/lmv/lproc_lmv.c b/fs/lustre/lmv/lproc_lmv.c
index 59922b8..85963d2 100644
--- a/fs/lustre/lmv/lproc_lmv.c
+++ b/fs/lustre/lmv/lproc_lmv.c
@@ -133,8 +133,8 @@ static ssize_t qos_prio_free_store(struct kobject *kobj,
 		return -EINVAL;
 
 	lmv->lmv_qos.lq_prio_free = (val << 8) / 100;
-	lmv->lmv_qos.lq_dirty = 1;
-	lmv->lmv_qos.lq_reset = 1;
+	set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags);
+	set_bit(LQ_RESET, &lmv->lmv_qos.lq_flags);
 
 	return count;
 }
@@ -170,7 +170,7 @@ static ssize_t qos_threshold_rr_store(struct kobject *kobj,
 		return -EINVAL;
 
 	lmv->lmv_qos.lq_threshold_rr = (val << 8) / 100;
-	lmv->lmv_qos.lq_dirty = 1;
+	set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags);
 
 	return count;
 }
diff --git a/fs/lustre/obdclass/lu_tgt_descs.c b/fs/lustre/obdclass/lu_tgt_descs.c
index 469c935..a77ce20 100644
--- a/fs/lustre/obdclass/lu_tgt_descs.c
+++ b/fs/lustre/obdclass/lu_tgt_descs.c
@@ -80,7 +80,7 @@ u64 lu_prandom_u64_max(u64 ep_ro)
 void lu_qos_rr_init(struct lu_qos_rr *lqr)
 {
 	spin_lock_init(&lqr->lqr_alloc);
-	lqr->lqr_dirty = 1;
+	set_bit(LQ_DIRTY, &lqr->lqr_flags);
 }
 EXPORT_SYMBOL(lu_qos_rr_init);
 
@@ -158,9 +158,8 @@ int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *tgt)
 	 */
 	list_add_tail(&svr->lsq_svr_list, &tempsvr->lsq_svr_list);
 
-	qos->lq_dirty = 1;
-	qos->lq_rr.lqr_dirty = 1;
-
+	set_bit(LQ_DIRTY, &qos->lq_flags);
+	set_bit(LQ_DIRTY, &qos->lq_rr.lqr_flags);
 out:
 	up_write(&qos->lq_rw_sem);
 	return rc;
@@ -200,8 +199,8 @@ static int lu_qos_del_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd)
 		kfree(svr);
 	}
 
-	qos->lq_dirty = 1;
-	qos->lq_rr.lqr_dirty = 1;
+	set_bit(LQ_DIRTY, &qos->lq_flags);
+	set_bit(LQ_DIRTY, &qos->lq_rr.lqr_flags);
 out:
 	up_write(&qos->lq_rw_sem);
 	return rc;
@@ -273,8 +272,8 @@ int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt)
 	/* Set up allocation policy (QoS and RR) */
 	INIT_LIST_HEAD(&ltd->ltd_qos.lq_svr_list);
 	init_rwsem(&ltd->ltd_qos.lq_rw_sem);
-	ltd->ltd_qos.lq_dirty = 1;
-	ltd->ltd_qos.lq_reset = 1;
+	set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_flags);
+	set_bit(LQ_RESET, &ltd->ltd_qos.lq_flags);
 	/* Default priority is toward free space balance */
 	ltd->ltd_qos.lq_prio_free = 232;
 	/* Default threshold for rr (roughly 17%) */
@@ -416,7 +415,8 @@ void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
  */
 bool ltd_qos_is_usable(struct lu_tgt_descs *ltd)
 {
-	if (!ltd->ltd_qos.lq_dirty && ltd->ltd_qos.lq_same_space)
+	if (!test_bit(LQ_DIRTY, &ltd->ltd_qos.lq_flags) &&
+	    test_bit(LQ_SAME_SPACE, &ltd->ltd_qos.lq_flags))
 		return false;
 
 	if (ltd->ltd_lov_desc.ld_active_tgt_count < 2)
@@ -456,7 +456,7 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
 	time64_t now, age;
 	int rc;
 
-	if (!qos->lq_dirty) {
+	if (!test_bit(LQ_DIRTY, &qos->lq_flags)) {
 		rc = 0;
 		goto out;
 	}
@@ -531,7 +531,8 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
 		tgt->ltd_qos.ltq_penalty_per_obj >>= 1;
 
 		age = (now - tgt->ltd_qos.ltq_used) >> 3;
-		if (qos->lq_reset || age > 32 * desc->ld_qos_maxage)
+		if (test_bit(LQ_RESET, &qos->lq_flags) ||
+		    age > 32 * desc->ld_qos_maxage)
 			tgt->ltd_qos.ltq_penalty = 0;
 		else if (age > desc->ld_qos_maxage)
 			/* Decay tgt penalty. */
@@ -566,31 +567,32 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
 		svr->lsq_penalty_per_obj >>= 1;
 
 		age = (now - svr->lsq_used) >> 3;
-		if (qos->lq_reset || age > 32 * desc->ld_qos_maxage)
+		if (test_bit(LQ_RESET, &qos->lq_flags) ||
+		    age > 32 * desc->ld_qos_maxage)
 			svr->lsq_penalty = 0;
 		else if (age > desc->ld_qos_maxage)
 			/* Decay server penalty. */
 			svr->lsq_penalty >>= age / desc->ld_qos_maxage;
 	}
 
-	qos->lq_dirty = 0;
-	qos->lq_reset = 0;
+	clear_bit(LQ_DIRTY, &qos->lq_flags);
+	clear_bit(LQ_RESET, &qos->lq_flags);
 
 	/*
 	 * If each tgt has almost same free space, do rr allocation for better
 	 * creation performance
 	 */
-	qos->lq_same_space = 0;
+	clear_bit(LQ_SAME_SPACE, &qos->lq_flags);
 	if ((ba_max * (256 - qos->lq_threshold_rr)) >> 8 < ba_min &&
 	    (ia_max * (256 - qos->lq_threshold_rr)) >> 8 < ia_min) {
-		qos->lq_same_space = 1;
+		set_bit(LQ_SAME_SPACE, &qos->lq_flags);
 		/* Reset weights for the next time we enter qos mode */
-		qos->lq_reset = 1;
+		set_bit(LQ_RESET, &qos->lq_flags);
 	}
 	rc = 0;
 
 out:
-	if (!rc && qos->lq_same_space)
+	if (!rc && test_bit(LQ_SAME_SPACE, &qos->lq_flags))
 		return -EAGAIN;
 
 	return rc;
-- 
1.8.3.1



More information about the lustre-devel mailing list