[lustre-devel] [PATCH 6/6] Adjust max_rpcs_in_flight according to metrics
Yan Li
yanli at ascar.io
Tue Mar 21 12:43:33 PDT 2017
Signed-off-by: Yan Li <yanli at ascar.io>
---
lustre/osc/osc_request.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 165 insertions(+)
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c
index c59c281..8efaf5a 100644
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -1613,6 +1613,156 @@ static void osc_release_ppga(struct brw_page **ppga, size_t count)
OBD_FREE(ppga, sizeof(*ppga) * count);
}
+
+#ifdef ENABLE_RLQOS
+/**
+ * te's lock should be acquired beforehand
+ */
+static void time_ewma_add_extlock(struct time_ewma *te, struct timeval *new_time) {
+ __u64 old_ea = te->ea;
+ long timediff;
+
+ if (te->last_time.tv_sec != 0) {
+ timediff = cfs_timeval_sub(new_time, &te->last_time, NULL);
+ if (timediff < 0) {
+ CDEBUG(D_INFO,
+ "(te: %p) negative timediff %ld detected, using abs value\n",
+ te, timediff);
+ timediff = -timediff;
+ }
+
+ /* Reset ea to 0 if a long gap (>10min) is detected */
+ if (timediff > 10 * 60 * ONE_MILLION) {
+ CWARN("(te: %p) Long gap detected\n", te);
+ te->ea = 0;
+ } else {
+ /* ewma = ewma * (1-alpha) + amount * alpha
+ * ea = ewma * alpha, alpha_inv = 1/alpha
+ *
+ * ea = ea / alpha_inv * (alpha_inv - 1) + timediff
+ */
+ do_div(te->ea, te->alpha_inv);
+ te->ea = te->ea * (te->alpha_inv - 1) + timediff;
+ if (te->ea > 1000000) {
+ CDEBUG(D_INFO,
+ "(te: %p) old_ea = %llu, "
+ "old_time = %ld.%ld, "
+ "new_time = %ld.%ld, new ea = %llu\n",
+ te, old_ea,
+ te->last_time.tv_sec,
+ te->last_time.tv_usec,
+ new_time->tv_sec,
+ new_time->tv_usec, te->ea);
+ }
+ }
+ } else {
+ CDEBUG(D_INFO, "(te: %p) first call\n", te);
+ }
+ te->last_time = *new_time;
+}
+
+/**
+ * Calculate ewma of time values. Long gaps will be ignored.
+ */
+static int qos_adjust(struct obd_device *obd, struct timeval *new_ack_time,
+ struct timeval *new_sent_time, int op, int bytes_transferred)
+{
+ struct client_obd *cli = &obd->u.cli;
+ struct qos_data_t *qos = &cli->qos;
+ struct time_ewma *ack_ewma_p = &qos->ack_ewma;
+ struct time_ewma *sent_ewma_p = &qos->sent_ewma;
+ __u64 ack_ewma;
+ __u64 sent_ewma;
+ struct qos_rule_t *r;
+ int new_mrif = -1; /* -1 means no change needed */
+ int i;
+ struct timeval now;
+ long rtt;
+ int rtt_ratio100;
+ long usec_since_last_mrif_update;
+
+ spin_lock(&qos->lock);
+ time_ewma_add_extlock(ack_ewma_p, new_ack_time);
+ ack_ewma = qos_get_ewma_usec(ack_ewma_p);
+
+ time_ewma_add_extlock(sent_ewma_p, new_sent_time);
+ sent_ewma = qos_get_ewma_usec(sent_ewma_p);
+
+ /* calculate rtt */
+ do_gettimeofday(&now);
+ rtt = cfs_timeval_sub(&now, new_sent_time, NULL);
+ if (0 == qos->smallest_rtt || rtt < qos->smallest_rtt) {
+ qos->smallest_rtt = rtt;
+ }
+ rtt = rtt * 100;
+ rtt_ratio100 = rtt / qos->smallest_rtt;
+ qos->rtt_ratio100 = rtt_ratio100;
+
+ /* Calculate throughput */
+ calc_throughput(qos, op, bytes_transferred);
+
+ /* Adjust max_rpc_in_flight according to ack_ewma and send_ewma */
+ if (NULL == qos->rules) goto out;
+ if (NULL == cli->cl_import) goto out; /* or else LPROCFS_CLIMP_CHECK may return this function, leaving qos->lock locked */
+ for(i = 0; i < qos->rule_no; ++i) {
+ r = &qos->rules[i];
+ if (ack_ewma >= r->ack_ewma_lower &&
+ ack_ewma < r->ack_ewma_upper &&
+ sent_ewma >= r->send_ewma_lower &&
+ sent_ewma < r->send_ewma_upper &&
+ rtt_ratio100 >= r->rtt_ratio100_lower &&
+ rtt_ratio100 < r->rtt_ratio100_upper)
+ {
+ r->used_times++;
+ r->ack_ewma_avg += ((__s64)ack_ewma - (__s64)r->ack_ewma_avg) / r->used_times;
+ r->send_ewma_avg += ((__s64)sent_ewma - (__s64)r->send_ewma_avg) / r->used_times;
+ r->rtt_ratio100_avg += (rtt_ratio100 - (int)r->rtt_ratio100_avg) / r->used_times;
+
+ usec_since_last_mrif_update = cfs_timeval_sub(&now, &qos->last_mrif_update_time, NULL);
+ if (usec_since_last_mrif_update > 0 &&
+ usec_since_last_mrif_update >= qos->min_gap_between_updating_mrif) {
+ qos->last_mrif_update_time = now;
+ /* m100 is disabled when assigned negative values */
+ if (r->m100 >= 0) {
+ /* Must multiply m100 first, then div by 100 to avoid
+ * losing precision */
+ qos->max_rpc_in_flight100 *= r->m100;
+ qos->max_rpc_in_flight100 /= 100;
+ }
+ qos->max_rpc_in_flight100 += r->b100;
+ CDEBUG(D_INFO, "New max_rpc_in_flight100 = %d\n", qos->max_rpc_in_flight100);
+ if (qos->max_rpc_in_flight100 < 0) {
+ CDEBUG(D_INFO, "New max_rpc_in_flight100 is negative, reset it to 0\n");
+ qos->max_rpc_in_flight100 = 0;
+ }
+ if (qos->max_rpc_in_flight100 > OSC_MAX_RIF_MAX * 100) {
+ CDEBUG(D_INFO, "New max_rpc_in_flight100 is larger than %d, reset it to max allowed value\n", OSC_MAX_RIF_MAX * 100);
+ qos->max_rpc_in_flight100 = OSC_MAX_RIF_MAX * 100;
+ }
+ new_mrif = qos->max_rpc_in_flight100 / 100;
+ if (new_mrif < 1) {
+ CDEBUG(D_INFO, "New max_rpc_in_flight is smaller than 1, reset it to 1\n");
+ new_mrif = 1;
+ }
+ }
+ /* Update min_usec_between_rpcs to tau */
+ qos->min_usec_between_rpcs = r->tau;
+ /* set MRIF after unlocking qos->lock to prevent deadlocking */
+ break;
+ }
+ }
+out:
+ spin_unlock(&qos->lock);
+
+ if (-1 != new_mrif) { /* -1 means no change needed */
+ LPROCFS_CLIMP_CHECK(obd);
+ set_max_rpcs_in_flight(new_mrif, cli);
+ LPROCFS_CLIMP_EXIT(obd);
+ }
+ return 0;
+}
+#endif /* ENABLE_RLQOS */
+
static int brw_interpret(const struct lu_env *env,
struct ptlrpc_request *req, void *data, int rc)
{
@@ -1622,6 +1772,14 @@ static int brw_interpret(const struct lu_env *env,
struct client_obd *cli = aa->aa_cli;
ENTRY;
+#ifdef ENABLE_RLQOS
+ qos_adjust(req->rq_import->imp_obd,
+ &req->rq_arrival_time,
+ &aa->aa_oa->o_sent_time,
+ lustre_msg_get_opc(req->rq_reqmsg) - OST_READ,
+ req->rq_bulk->bd_nob_transferred);
+#endif
+
rc = osc_brw_fini_request(req, rc);
CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
/* When server return -EINPROGRESS, client should always retry
@@ -1874,6 +2032,10 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
list_splice_init(&rpc_list, &aa->aa_oaps);
INIT_LIST_HEAD(&aa->aa_exts);
list_splice_init(ext_list, &aa->aa_exts);
+#ifdef ENABLE_RLQOS
+ /* sent_time is used by RLQoS */
+ do_gettimeofday(&aa->aa_oa->o_sent_time);
+#endif
spin_lock(&cli->cl_loi_list_lock);
starting_offset >>= PAGE_SHIFT;
@@ -1897,6 +2059,9 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
cli->cl_w_in_flight);
OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_IO, cfs_fail_val);
+#ifdef ENABLE_RLQOS
+ qos_throttle(&cli->qos);
+#endif
ptlrpcd_add_req(req);
rc = 0;
EXIT;
--
1.8.3.1
More information about the lustre-devel
mailing list