[lustre-devel] [PATCH 3/6] RLQOS main data structure
Yan Li
yanli at ascar.io
Tue Mar 21 12:43:30 PDT 2017
Each client_obd maintains a qos data structure.
Signed-off-by: Yan Li <yanli at ascar.io>
---
lustre/include/obd.h | 8 +++
lustre/include/rlqos.h | 136 +++++++++++++++++++++++++++++++++++++++++++++++
lustre/obdclass/genops.c | 25 +++++++++
3 files changed, 169 insertions(+)
create mode 100644 lustre/include/rlqos.h
diff --git a/lustre/include/obd.h b/lustre/include/obd.h
index b4ee379..726493c 100644
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -50,6 +50,9 @@
#include <lustre_intent.h>
#include <lvfs.h>
#include <lustre_quota.h>
+#ifdef ENABLE_RLQOS
+# include "rlqos.h"
+#endif
#define MAX_OBD_DEVICES 8192
@@ -331,6 +334,11 @@ struct client_obd {
void *cl_lru_work;
/* hash tables for osc_quota_info */
struct cfs_hash *cl_quota_hash[LL_MAXQUOTAS];
+
+#ifdef ENABLE_RLQOS
+ /* rate-limiting quality of service data */
+ struct qos_data_t qos;
+#endif
};
#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
diff --git a/lustre/include/rlqos.h b/lustre/include/rlqos.h
new file mode 100644
index 0000000..d8e012b
--- /dev/null
+++ b/lustre/include/rlqos.h
@@ -0,0 +1,136 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Storage Systems Research Center, Computer Science Department,
+ * University of California, Santa Cruz (www.ssrc.ucsc.edu) if you need
+ * additional information or have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013-2017, University of California, Santa Cruz, CA, USA.
+ * All rights reserved.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/rlqos.h
+ */
+
+#ifndef _RLQOS_H
+#define _RLQOS_H
+
+/* We work with kernel only */
+#ifdef __KERNEL__
+# include <linux/types.h>
+# include <linux/time.h>
+# include <asm/param.h>
+# include <libcfs/libcfs.h>
+# include <linux/delay.h>
+#else /* __KERNEL__ */
+# define HZ 100
+# define ONE_MILLION 1000000
+# include <liblustre.h>
+#endif
+
+#define EWMA_ALPHA_INV (8)
+
+/**
+ * For tracking the exponentially-weighted moving average of a timeval. Note
+ * that we can't do float point div in kernel, so actually we are tracking
+ * ea = ewma * alpha. You should divide ea with alpha to get the real ewma.
+ */
+struct time_ewma {
+ __u64 alpha_inv;
+ __u64 ea;
+ struct timeval last_time;
+};
+/* We can't do float point div, so we are tracking
+ * ea = ewma * alpha = ewma / alpha_inv
+ */
+
+struct qos_rule_t {
+ __u64 ack_ewma_lower;
+ __u64 ack_ewma_upper;
+ __u64 send_ewma_lower;
+ __u64 send_ewma_upper;
+ unsigned int rtt_ratio100_lower;
+ unsigned int rtt_ratio100_upper;
+ int m100;
+ int b100;
+ unsigned int tau;
+ int used_times;
+
+ __u64 ack_ewma_avg;
+ __u64 send_ewma_avg;
+ unsigned int rtt_ratio100_avg;
+};
+
+struct qos_data_t {
+ spinlock_t lock;
+ struct time_ewma ack_ewma;
+ struct time_ewma sent_ewma;
+ int rtt_ratio100;
+ long smallest_rtt;
+ int max_rpc_in_flight100;
+ struct timeval last_mrif_update_time;
+ int min_gap_between_updating_mrif;
+ int rule_no;
+ /* Following fields are for calculating I/O bandwidth,
+ * 0 for read, 1 for write */
+ long last_req_sec[2]; /* second of last request we received */
+ __u64 tp_last_sec[2]; /* throughput of last sec */
+ __u64 sum_bytes_this_sec[2]; /* cumulative bytes read within this sec */
+ /* For throttling support */
+ unsigned int min_usec_between_rpcs;
+ struct timeval last_rpc_time;
+ struct qos_rule_t *rules;
+};
+
+static inline __u64 qos_get_ewma_usec(const struct time_ewma *ewma) {
+ return ewma->ea / ewma->alpha_inv;
+}
+
+int parse_qos_rules(const char *buf, struct qos_data_t *qos);
+
+/* Lock of qos must be held. op == 0 for read, 1 for write */
+static inline void calc_throughput(struct qos_data_t *qos, int op, int bytes_transferred)
+{
+ struct timeval now;
+
+ if (op != 0 && op != 1)
+ return;
+
+ do_gettimeofday(&now);
+ if (likely(now.tv_sec == qos->last_req_sec[op])) {
+ qos->sum_bytes_this_sec[op] += bytes_transferred;
+ } else if (likely(now.tv_sec == qos->last_req_sec[op] + 1)) {
+ qos->tp_last_sec[op] = qos->sum_bytes_this_sec[op];
+ qos->last_req_sec[op] = now.tv_sec;
+ qos->sum_bytes_this_sec[op] = bytes_transferred;
+ } else if (likely(now.tv_sec > qos->last_req_sec[op] + 1)) {
+ qos->tp_last_sec[op] = 0;
+ qos->last_req_sec[op] = now.tv_sec;
+ qos->sum_bytes_this_sec[op] = bytes_transferred;
+ }
+ /* Ignore cases when now.tv_sec < qos->last_req_sec */
+}
+
+#endif /* _RLQOS_H */
diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c
index a48f887..417c612 100644
--- a/lustre/obdclass/genops.c
+++ b/lustre/obdclass/genops.c
@@ -284,6 +284,28 @@ int class_unregister_type(const char *name)
} /* class_unregister_type */
EXPORT_SYMBOL(class_unregister_type);
+#ifdef ENABLE_RLQOS
+static void init_time_ewma(struct time_ewma *ewma)
+{
+ ewma->alpha_inv = 8;
+ ewma->ea = 0;
+ ewma->last_time.tv_sec = 0;
+ ewma->last_time.tv_usec = 0;
+}
+
+static void init_qos(struct client_obd *cli)
+{
+ struct qos_data_t *qos = &cli->qos;
+
+ init_time_ewma(&qos->ack_ewma);
+ init_time_ewma(&qos->sent_ewma);
+
+ spin_lock(&cli->cl_loi_list_lock);
+ qos->max_rpc_in_flight100 = cli->cl_max_rpcs_in_flight * 100;
+ spin_unlock(&cli->cl_loi_list_lock);
+}
+#endif
+
/**
* Create a new obd device.
*
@@ -349,6 +371,9 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
result->obd_type = type;
strncpy(result->obd_name, name,
sizeof(result->obd_name) - 1);
+#ifdef ENABLE_RLQOS
+ init_qos(&result->u.cli);
+#endif
obd_devs[i] = result;
}
}
--
1.8.3.1
More information about the lustre-devel
mailing list