]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - block/blk-throttle.c
drm/vmwgfx: Bump driver minor and date
[karo-tx-linux.git] / block / blk-throttle.c
index b78db2e5fdff1e158ea52c179313ff3eba282015..fc13dd0c6e3956a84913d9e71132c0f321a67280 100644 (file)
@@ -22,11 +22,11 @@ static int throtl_quantum = 32;
 #define DFL_THROTL_SLICE_HD (HZ / 10)
 #define DFL_THROTL_SLICE_SSD (HZ / 50)
 #define MAX_THROTL_SLICE (HZ)
-#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
-#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
-/* default latency target is 0, eg, guarantee IO latency by default */
-#define DFL_LATENCY_TARGET (0)
+#define MIN_THROTL_BPS (320 * 1024)
+#define MIN_THROTL_IOPS (10)
+#define DFL_LATENCY_TARGET (-1L)
+#define DFL_IDLE_THRESHOLD (0)
 
 #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 
@@ -157,6 +157,7 @@ struct throtl_grp {
        unsigned long last_check_time;
 
        unsigned long latency_target; /* us */
+       unsigned long latency_target_conf; /* us */
        /* When did we start a new slice */
        unsigned long slice_start[2];
        unsigned long slice_end[2];
@@ -165,6 +166,7 @@ struct throtl_grp {
        unsigned long checked_last_finish_time; /* ns / 1024 */
        unsigned long avg_idletime; /* ns / 1024 */
        unsigned long idletime_threshold; /* us */
+       unsigned long idletime_threshold_conf; /* us */
 
        unsigned int bio_cnt; /* total bios */
        unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
@@ -201,8 +203,6 @@ struct throtl_data
        unsigned int limit_index;
        bool limit_valid[LIMIT_CNT];
 
-       unsigned long dft_idletime_threshold; /* us */
-
        unsigned long low_upgrade_time;
        unsigned long low_downgrade_time;
 
@@ -294,8 +294,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
 
        td = tg->td;
        ret = tg->bps[rw][td->limit_index];
-       if (ret == 0 && td->limit_index == LIMIT_LOW)
-               return tg->bps[rw][LIMIT_MAX];
+       if (ret == 0 && td->limit_index == LIMIT_LOW) {
+               /* intermediate node or iops isn't 0 */
+               if (!list_empty(&blkg->blkcg->css.children) ||
+                   tg->iops[rw][td->limit_index])
+                       return U64_MAX;
+               else
+                       return MIN_THROTL_BPS;
+       }
 
        if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
            tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
@@ -315,10 +321,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
 
        if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
                return UINT_MAX;
+
        td = tg->td;
        ret = tg->iops[rw][td->limit_index];
-       if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
-               return tg->iops[rw][LIMIT_MAX];
+       if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
+               /* intermediate node or bps isn't 0 */
+               if (!list_empty(&blkg->blkcg->css.children) ||
+                   tg->bps[rw][td->limit_index])
+                       return UINT_MAX;
+               else
+                       return MIN_THROTL_IOPS;
+       }
 
        if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
            tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
@@ -482,6 +495,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
        /* LIMIT_LOW will have default value 0 */
 
        tg->latency_target = DFL_LATENCY_TARGET;
+       tg->latency_target_conf = DFL_LATENCY_TARGET;
+       tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+       tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
 
        return &tg->pd;
 }
@@ -510,8 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
        if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
                sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
        tg->td = td;
-
-       tg->idletime_threshold = td->dft_idletime_threshold;
 }
 
 /*
@@ -1349,7 +1363,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
        return 0;
 }
 
-static void tg_conf_updated(struct throtl_grp *tg)
+static void tg_conf_updated(struct throtl_grp *tg, bool global)
 {
        struct throtl_service_queue *sq = &tg->service_queue;
        struct cgroup_subsys_state *pos_css;
@@ -1367,8 +1381,26 @@ static void tg_conf_updated(struct throtl_grp *tg)
         * restrictions in the whole hierarchy and allows them to bypass
         * blk-throttle.
         */
-       blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
-               tg_update_has_rules(blkg_to_tg(blkg));
+       blkg_for_each_descendant_pre(blkg, pos_css,
+                       global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
+               struct throtl_grp *this_tg = blkg_to_tg(blkg);
+               struct throtl_grp *parent_tg;
+
+               tg_update_has_rules(this_tg);
+               /* ignore root/second level */
+               if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
+                   !blkg->parent->parent)
+                       continue;
+               parent_tg = blkg_to_tg(blkg->parent);
+               /*
+                * make sure all children has lower idle time threshold and
+                * higher latency target
+                */
+               this_tg->idletime_threshold = min(this_tg->idletime_threshold,
+                               parent_tg->idletime_threshold);
+               this_tg->latency_target = max(this_tg->latency_target,
+                               parent_tg->latency_target);
+       }
 
        /*
         * We're already holding queue_lock and know @tg is valid.  Let's
@@ -1413,7 +1445,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
        else
                *(unsigned int *)((void *)tg + of_cft(of)->private) = v;
 
-       tg_conf_updated(tg);
+       tg_conf_updated(tg, false);
        ret = 0;
 out_finish:
        blkg_conf_finish(&ctx);
@@ -1497,34 +1529,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
            tg->iops_conf[READ][off] == iops_dft &&
            tg->iops_conf[WRITE][off] == iops_dft &&
            (off != LIMIT_LOW ||
-            (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
-             tg->latency_target == DFL_LATENCY_TARGET)))
+            (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
+             tg->latency_target_conf == DFL_LATENCY_TARGET)))
                return 0;
 
-       if (tg->bps_conf[READ][off] != bps_dft)
+       if (tg->bps_conf[READ][off] != U64_MAX)
                snprintf(bufs[0], sizeof(bufs[0]), "%llu",
                        tg->bps_conf[READ][off]);
-       if (tg->bps_conf[WRITE][off] != bps_dft)
+       if (tg->bps_conf[WRITE][off] != U64_MAX)
                snprintf(bufs[1], sizeof(bufs[1]), "%llu",
                        tg->bps_conf[WRITE][off]);
-       if (tg->iops_conf[READ][off] != iops_dft)
+       if (tg->iops_conf[READ][off] != UINT_MAX)
                snprintf(bufs[2], sizeof(bufs[2]), "%u",
                        tg->iops_conf[READ][off]);
-       if (tg->iops_conf[WRITE][off] != iops_dft)
+       if (tg->iops_conf[WRITE][off] != UINT_MAX)
                snprintf(bufs[3], sizeof(bufs[3]), "%u",
                        tg->iops_conf[WRITE][off]);
        if (off == LIMIT_LOW) {
-               if (tg->idletime_threshold == ULONG_MAX)
+               if (tg->idletime_threshold_conf == ULONG_MAX)
                        strcpy(idle_time, " idle=max");
                else
                        snprintf(idle_time, sizeof(idle_time), " idle=%lu",
-                               tg->idletime_threshold);
+                               tg->idletime_threshold_conf);
 
-               if (tg->latency_target == ULONG_MAX)
+               if (tg->latency_target_conf == ULONG_MAX)
                        strcpy(latency_time, " latency=max");
                else
                        snprintf(latency_time, sizeof(latency_time),
-                               " latency=%lu", tg->latency_target);
+                               " latency=%lu", tg->latency_target_conf);
        }
 
        seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
@@ -1563,8 +1595,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
        v[2] = tg->iops_conf[READ][index];
        v[3] = tg->iops_conf[WRITE][index];
 
-       idle_time = tg->idletime_threshold;
-       latency_time = tg->latency_target;
+       idle_time = tg->idletime_threshold_conf;
+       latency_time = tg->latency_target_conf;
        while (true) {
                char tok[27];   /* wiops=18446744073709551616 */
                char *p;
@@ -1623,17 +1655,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
                tg->iops_conf[READ][LIMIT_MAX]);
        tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
                tg->iops_conf[WRITE][LIMIT_MAX]);
+       tg->idletime_threshold_conf = idle_time;
+       tg->latency_target_conf = latency_time;
+
+       /* force user to configure all settings for low limit  */
+       if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
+             tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
+           tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
+           tg->latency_target_conf == DFL_LATENCY_TARGET) {
+               tg->bps[READ][LIMIT_LOW] = 0;
+               tg->bps[WRITE][LIMIT_LOW] = 0;
+               tg->iops[READ][LIMIT_LOW] = 0;
+               tg->iops[WRITE][LIMIT_LOW] = 0;
+               tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+               tg->latency_target = DFL_LATENCY_TARGET;
+       } else if (index == LIMIT_LOW) {
+               tg->idletime_threshold = tg->idletime_threshold_conf;
+               tg->latency_target = tg->latency_target_conf;
+       }
 
-       if (index == LIMIT_LOW) {
-               blk_throtl_update_limit_valid(tg->td);
-               if (tg->td->limit_valid[LIMIT_LOW])
+       blk_throtl_update_limit_valid(tg->td);
+       if (tg->td->limit_valid[LIMIT_LOW]) {
+               if (index == LIMIT_LOW)
                        tg->td->limit_index = LIMIT_LOW;
-               tg->idletime_threshold = (idle_time == ULONG_MAX) ?
-                       ULONG_MAX : idle_time;
-               tg->latency_target = (latency_time == ULONG_MAX) ?
-                       ULONG_MAX : latency_time;
-       }
-       tg_conf_updated(tg);
+       } else
+               tg->td->limit_index = LIMIT_MAX;
+       tg_conf_updated(tg, index == LIMIT_LOW &&
+               tg->td->limit_valid[LIMIT_LOW]);
        ret = 0;
 out_finish:
        blkg_conf_finish(&ctx);
@@ -1722,17 +1770,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
        /*
         * cgroup is idle if:
         * - single idle is too long, longer than a fixed value (in case user
-        *   configure a too big threshold) or 4 times of slice
+        *   configure a too big threshold) or 4 times of idletime threshold
         * - average think time is more than threshold
         * - IO latency is largely below threshold
         */
-       unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
-
-       time = min_t(unsigned long, MAX_IDLE_TIME, time);
-       return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
-              tg->avg_idletime > tg->idletime_threshold ||
-              (tg->latency_target && tg->bio_cnt &&
+       unsigned long time;
+       bool ret;
+
+       time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
+       ret = tg->latency_target == DFL_LATENCY_TARGET ||
+             tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
+             (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+             tg->avg_idletime > tg->idletime_threshold ||
+             (tg->latency_target && tg->bio_cnt &&
                tg->bad_bio_cnt * 5 < tg->bio_cnt);
+       throtl_log(&tg->service_queue,
+               "avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
+               tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
+               tg->bio_cnt, ret, tg->td->scale);
+       return ret;
 }
 
 static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
@@ -1828,6 +1884,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
        struct cgroup_subsys_state *pos_css;
        struct blkcg_gq *blkg;
 
+       throtl_log(&td->service_queue, "upgrade to max");
        td->limit_index = LIMIT_MAX;
        td->low_upgrade_time = jiffies;
        td->scale = 0;
@@ -1850,6 +1907,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
 {
        td->scale /= 2;
 
+       throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
        if (td->scale) {
                td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
                return;
@@ -2023,6 +2081,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
                td->avg_buckets[i].valid = true;
                last_latency = td->avg_buckets[i].latency;
        }
+
+       for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+               throtl_log(&td->service_queue,
+                       "Latency bucket %d: latency=%ld, valid=%d", i,
+                       td->avg_buckets[i].latency, td->avg_buckets[i].valid);
 }
 #else
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
@@ -2354,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
 void blk_throtl_register_queue(struct request_queue *q)
 {
        struct throtl_data *td;
-       struct cgroup_subsys_state *pos_css;
-       struct blkcg_gq *blkg;
 
        td = q->td;
        BUG_ON(!td);
 
-       if (blk_queue_nonrot(q)) {
+       if (blk_queue_nonrot(q))
                td->throtl_slice = DFL_THROTL_SLICE_SSD;
-               td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
-       } else {
+       else
                td->throtl_slice = DFL_THROTL_SLICE_HD;
-               td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
-       }
 #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
        /* if no low limit, use previous default */
        td->throtl_slice = DFL_THROTL_SLICE_HD;
@@ -2375,18 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
        td->track_bio_latency = !q->mq_ops && !q->request_fn;
        if (!td->track_bio_latency)
                blk_stat_enable_accounting(q);
-
-       /*
-        * some tg are created before queue is fully initialized, eg, nonrot
-        * isn't initialized yet
-        */
-       rcu_read_lock();
-       blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
-               struct throtl_grp *tg = blkg_to_tg(blkg);
-
-               tg->idletime_threshold = td->dft_idletime_threshold;
-       }
-       rcu_read_unlock();
 }
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW