blkcg: unify blkg's for blkcg policies

[karo-tx-linux.git] / block / cfq-iosched.c
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index ee55019066a19500c6df54addf90d91d2ace60fa..393eaa59913b2756b9f36ec780151f1f1baa27b0 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,6 +17,8 @@
  #include "blk.h"
  #include "cfq.h"
  
+static struct blkio_policy_type blkio_policy_cfq;
+
  /*
   * tunables
   */
@@ -206,11 +208,7 @@ struct cfq_group {
         unsigned long saved_workload_slice;
         enum wl_type_t saved_workload;
         enum wl_prio_t saved_serving_prio;
-       struct blkio_group blkg;
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-       struct hlist_node cfqd_node;
-       int ref;
-#endif
+
         /* number of requests that are on the dispatch list or inside driver */
         int dispatched;
         struct cfq_ttime ttime;
@@ -229,7 +227,7 @@ struct cfq_data {
         struct request_queue *queue;
         /* Root service tree for cfq_groups */
         struct cfq_rb_root grp_service_tree;
-       struct cfq_group root_group;
+       struct cfq_group *root_group;
  
         /*
          * The priority currently being served
@@ -302,13 +300,17 @@ struct cfq_data {
         struct cfq_queue oom_cfqq;
  
         unsigned long last_delayed_sync;
+};
  
-       /* List of cfq groups being managed on this device*/
-       struct hlist_head cfqg_list;
+static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
+{
+       return blkg_to_pdata(blkg, &blkio_policy_cfq);
+}
  
-       /* Number of groups which are on blkcg->blkg_list */
-       unsigned int nr_blkcg_linked_grps;
-};
+static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg)
+{
+       return pdata_to_blkg(cfqg, &blkio_policy_cfq);
+}
  
  static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
  
@@ -374,11 +376,11 @@ CFQ_CFQQ_FNS(wait_busy);
  #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
         blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
                         cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
-                       blkg_path(&(cfqq)->cfqg->blkg), ##args)
+                       blkg_path(cfqg_to_blkg((cfqq)->cfqg)), ##args)
  
  #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)                         \
         blk_add_trace_msg((cfqd)->queue, "%s " fmt,                     \
-                               blkg_path(&(cfqg)->blkg), ##args)       \
+                       blkg_path(cfqg_to_blkg((cfqg))), ##args)        \
  
  #else
  #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
@@ -935,7 +937,8 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
         cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
         cfq_group_service_tree_del(st, cfqg);
         cfqg->saved_workload_slice = 0;
-       cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
+       cfq_blkiocg_update_dequeue_stats(cfqg_to_blkg(cfqg),
+                                        &blkio_policy_cfq, 1);
  }
  
  static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
@@ -1007,178 +1010,70 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                      "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
                      used_sl, cfqq->slice_dispatch, charge,
                      iops_mode(cfqd), cfqq->nr_sectors);
-       cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
-                                         unaccounted_sl);
-       cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
-}
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
-{
-       if (blkg)
-               return container_of(blkg, struct cfq_group, blkg);
-       return NULL;
-}
-
-static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
-                                         unsigned int weight)
-{
-       struct cfq_group *cfqg = cfqg_of_blkg(blkg);
-       cfqg->new_weight = weight;
-       cfqg->needs_update = true;
-}
-
-static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
-                       struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
-{
-       struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
-       unsigned int major, minor;
-
-       /*
-        * Add group onto cgroup list. It might happen that bdi->dev is
-        * not initialized yet. Initialize this new group without major
-        * and minor info and this info will be filled in once a new thread
-        * comes for IO.
-        */
-       if (bdi->dev) {
-               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-                                       (void *)cfqd, MKDEV(major, minor));
-       } else
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-                                       (void *)cfqd, 0);
-
-       cfqd->nr_blkcg_linked_grps++;
-       cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
-
-       /* Add group on cfqd list */
-       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+       cfq_blkiocg_update_timeslice_used(cfqg_to_blkg(cfqg), &blkio_policy_cfq,
+                                         used_sl, unaccounted_sl);
+       cfq_blkiocg_set_start_empty_time(cfqg_to_blkg(cfqg), &blkio_policy_cfq);
  }
  
-/*
- * Should be called from sleepable context. No request queue lock as per
- * cpu stats are allocated dynamically and alloc_percpu needs to be called
- * from sleepable context.
+/**
+ * cfq_init_cfqg_base - initialize base part of a cfq_group
+ * @cfqg: cfq_group to initialize
+ *
+ * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED
+ * is enabled or not.
   */
-static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
+static void cfq_init_cfqg_base(struct cfq_group *cfqg)
  {
-       struct cfq_group *cfqg = NULL;
-       int i, j, ret;
         struct cfq_rb_root *st;
-
-       cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
-       if (!cfqg)
-               return NULL;
+       int i, j;
  
         for_each_cfqg_st(cfqg, i, j, st)
                 *st = CFQ_RB_ROOT;
         RB_CLEAR_NODE(&cfqg->rb_node);
  
         cfqg->ttime.last_end_request = jiffies;
-
-       /*
-        * Take the initial reference that will be released on destroy
-        * This can be thought of a joint reference by cgroup and
-        * elevator which will be dropped by either elevator exit
-        * or cgroup deletion path depending on who is exiting first.
-        */
-       cfqg->ref = 1;
-
-       ret = blkio_alloc_blkg_stats(&cfqg->blkg);
-       if (ret) {
-               kfree(cfqg);
-               return NULL;
-       }
-
-       return cfqg;
  }
  
-static struct cfq_group *
-cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static void cfq_update_blkio_group_weight(struct request_queue *q,
+                                         struct blkio_group *blkg,
+                                         unsigned int weight)
  {
-       struct cfq_group *cfqg = NULL;
-       void *key = cfqd;
-       struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
-       unsigned int major, minor;
+       struct cfq_group *cfqg = blkg_to_cfqg(blkg);
  
-       /*
-        * This is the common case when there are no blkio cgroups.
-        * Avoid lookup in this case
-        */
-       if (blkcg == &blkio_root_cgroup)
-               cfqg = &cfqd->root_group;
-       else
-               cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+       cfqg->new_weight = weight;
+       cfqg->needs_update = true;
+}
  
-       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
-               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfqg->blkg.dev = MKDEV(major, minor);
-       }
+static void cfq_init_blkio_group(struct blkio_group *blkg)
+{
+       struct cfq_group *cfqg = blkg_to_cfqg(blkg);
  
-       return cfqg;
+       cfq_init_cfqg_base(cfqg);
+       cfqg->weight = blkg->blkcg->weight;
  }
  
  /*
   * Search for the cfq group current task belongs to. request_queue lock must
   * be held.
   */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+                                               struct blkio_cgroup *blkcg)
  {
-       struct blkio_cgroup *blkcg;
-       struct cfq_group *cfqg = NULL, *__cfqg = NULL;
         struct request_queue *q = cfqd->queue;
+       struct cfq_group *cfqg = NULL;
  
-       rcu_read_lock();
-       blkcg = task_blkio_cgroup(current);
-       cfqg = cfq_find_cfqg(cfqd, blkcg);
-       if (cfqg) {
-               rcu_read_unlock();
-               return cfqg;
-       }
-
-       /*
-        * Need to allocate a group. Allocation of group also needs allocation
-        * of per cpu stats which in-turn takes a mutex() and can block. Hence
-        * we need to drop rcu lock and queue_lock before we call alloc.
-        *
-        * Not taking any queue reference here and assuming that queue is
-        * around by the time we return. CFQ queue allocation code does
-        * the same. It might be racy though.
-        */
-
-       rcu_read_unlock();
-       spin_unlock_irq(q->queue_lock);
-
-       cfqg = cfq_alloc_cfqg(cfqd);
-
-       spin_lock_irq(q->queue_lock);
-
-       rcu_read_lock();
-       blkcg = task_blkio_cgroup(current);
-
-       /*
-        * If some other thread already allocated the group while we were
-        * not holding queue lock, free up the group
-        */
-       __cfqg = cfq_find_cfqg(cfqd, blkcg);
+       /* avoid lookup for the common case where there's no blkio cgroup */
+       if (blkcg == &blkio_root_cgroup) {
+               cfqg = cfqd->root_group;
+       } else {
+               struct blkio_group *blkg;
  
-       if (__cfqg) {
-               kfree(cfqg);
-               rcu_read_unlock();
-               return __cfqg;
+               blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
+               if (!IS_ERR(blkg))
+                       cfqg = blkg_to_cfqg(blkg);
         }
  
-       if (!cfqg)
-               cfqg = &cfqd->root_group;
-
-       cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
-       rcu_read_unlock();
-       return cfqg;
-}
-
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
-       cfqg->ref++;
         return cfqg;
  }
  
@@ -1186,94 +1081,18 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
  {
         /* Currently, all async queues are mapped to root group */
         if (!cfq_cfqq_sync(cfqq))
-               cfqg = &cfqq->cfqd->root_group;
+               cfqg = cfqq->cfqd->root_group;
  
         cfqq->cfqg = cfqg;
         /* cfqq reference on cfqg */
-       cfqq->cfqg->ref++;
-}
-
-static void cfq_put_cfqg(struct cfq_group *cfqg)
-{
-       struct cfq_rb_root *st;
-       int i, j;
-
-       BUG_ON(cfqg->ref <= 0);
-       cfqg->ref--;
-       if (cfqg->ref)
-               return;
-       for_each_cfqg_st(cfqg, i, j, st)
-               BUG_ON(!RB_EMPTY_ROOT(&st->rb));
-       free_percpu(cfqg->blkg.stats_cpu);
-       kfree(cfqg);
-}
-
-static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
-       /* Something wrong if we are trying to remove same group twice */
-       BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
-
-       hlist_del_init(&cfqg->cfqd_node);
-
-       BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
-       cfqd->nr_blkcg_linked_grps--;
-
-       /*
-        * Put the reference taken at the time of creation so that when all
-        * queues are gone, group can be destroyed.
-        */
-       cfq_put_cfqg(cfqg);
-}
-
-static void cfq_release_cfq_groups(struct cfq_data *cfqd)
-{
-       struct hlist_node *pos, *n;
-       struct cfq_group *cfqg;
-
-       hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
-               /*
-                * If cgroup removal path got to blk_group first and removed
-                * it from cgroup list, then it will take care of destroying
-                * cfqg also.
-                */
-               if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
-                       cfq_destroy_cfqg(cfqd, cfqg);
-       }
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
- * read lock.
- *
- * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if elevator was exiting, cgroup deltion
- * path got to it first.
- */
-static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
-{
-       unsigned long  flags;
-       struct cfq_data *cfqd = key;
-
-       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
-       cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
-       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+       blkg_get(cfqg_to_blkg(cfqg));
  }
  
  #else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
-{
-       return &cfqd->root_group;
-}
-
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+                                               struct blkio_cgroup *blkcg)
  {
-       return cfqg;
+       return cfqd->root_group;
  }
  
  static inline void
@@ -1281,9 +1100,6 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
         cfqq->cfqg = cfqg;
  }
  
-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
-
  #endif /* GROUP_IOSCHED */
  
  /*
@@ -1550,12 +1366,14 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
  {
         elv_rb_del(&cfqq->sort_list, rq);
         cfqq->queued[rq_is_sync(rq)]--;
-       cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
-                                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                          &blkio_policy_cfq, rq_data_dir(rq),
+                                          rq_is_sync(rq));
         cfq_add_rq_rb(rq);
-       cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
-                       &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
-                       rq_is_sync(rq));
+       cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                       &blkio_policy_cfq,
+                                       cfqg_to_blkg(cfqq->cfqd->serving_group),
+                                       rq_data_dir(rq), rq_is_sync(rq));
  }
  
  static struct request *
@@ -1611,8 +1429,9 @@ static void cfq_remove_request(struct request *rq)
         cfq_del_rq_rb(rq);
  
         cfqq->cfqd->rq_queued--;
-       cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
-                                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                          &blkio_policy_cfq, rq_data_dir(rq),
+                                          rq_is_sync(rq));
         if (rq->cmd_flags & REQ_PRIO) {
                 WARN_ON(!cfqq->prio_pending);
                 cfqq->prio_pending--;
@@ -1647,8 +1466,9 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
  static void cfq_bio_merged(struct request_queue *q, struct request *req,
                                 struct bio *bio)
  {
-       cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg,
-                                       bio_data_dir(bio), cfq_bio_sync(bio));
+       cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(req)),
+                                          &blkio_policy_cfq, bio_data_dir(bio),
+                                          cfq_bio_sync(bio));
  }
  
  static void
@@ -1670,8 +1490,9 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
         if (cfqq->next_rq == next)
                 cfqq->next_rq = rq;
         cfq_remove_request(next);
-       cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
-                                       rq_data_dir(next), rq_is_sync(next));
+       cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                          &blkio_policy_cfq, rq_data_dir(next),
+                                          rq_is_sync(next));
  
         cfqq = RQ_CFQQ(next);
         /*
@@ -1699,18 +1520,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
  
         /*
          * Lookup the cfqq that this bio will be queued with and allow
-        * merge only if rq is queued there.  This function can be called
-        * from plug merge without queue_lock.  In such cases, ioc of @rq
-        * and %current are guaranteed to be equal.  Avoid lookup which
-        * requires queue_lock by using @rq's cic.
+        * merge only if rq is queued there.
          */
-       if (current->io_context == RQ_CIC(rq)->icq.ioc) {
-               cic = RQ_CIC(rq);
-       } else {
-               cic = cfq_cic_lookup(cfqd, current->io_context);
-               if (!cic)
-                       return false;
-       }
+       cic = cfq_cic_lookup(cfqd, current->io_context);
+       if (!cic)
+               return false;
  
         cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
         return cfqq == RQ_CFQQ(rq);
@@ -1719,7 +1533,8 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
  static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  {
         del_timer(&cfqd->idle_slice_timer);
-       cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
+       cfq_blkiocg_update_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+                                          &blkio_policy_cfq);
  }
  
  static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -1728,7 +1543,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
         if (cfqq) {
                 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
                                 cfqd->serving_prio, cfqd->serving_type);
-               cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
+               cfq_blkiocg_update_avg_queue_size_stats(cfqg_to_blkg(cfqq->cfqg),
+                                                       &blkio_policy_cfq);
                 cfqq->slice_start = 0;
                 cfqq->dispatch_start = jiffies;
                 cfqq->allocated_slice = 0;
@@ -1794,7 +1610,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 cfqd->active_queue = NULL;
  
         if (cfqd->active_cic) {
-               put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
+               put_io_context(cfqd->active_cic->icq.ioc);
                 cfqd->active_cic = NULL;
         }
  }
@@ -2076,7 +1892,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
                 sl = cfqd->cfq_slice_idle;
  
         mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
-       cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
+       cfq_blkiocg_update_set_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+                                              &blkio_policy_cfq);
         cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
                         group_idle ? 1 : 0);
  }
@@ -2099,8 +1916,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
  
         cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
         cfqq->nr_sectors += blk_rq_sectors(rq);
-       cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
-                                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_dispatch_stats(cfqg_to_blkg(cfqq->cfqg),
+                                         &blkio_policy_cfq, blk_rq_bytes(rq),
+                                         rq_data_dir(rq), rq_is_sync(rq));
  }
  
  /*
@@ -2682,7 +2500,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
  
         BUG_ON(cfq_cfqq_on_rr(cfqq));
         kmem_cache_free(cfq_pool, cfqq);
-       cfq_put_cfqg(cfqg);
+       blkg_put(cfqg_to_blkg(cfqg));
  }
  
  static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -2855,12 +2673,18 @@ static struct cfq_queue *
  cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
                      struct io_context *ioc, gfp_t gfp_mask)
  {
+       struct blkio_cgroup *blkcg;
         struct cfq_queue *cfqq, *new_cfqq = NULL;
         struct cfq_io_cq *cic;
         struct cfq_group *cfqg;
  
  retry:
-       cfqg = cfq_get_cfqg(cfqd);
+       rcu_read_lock();
+
+       blkcg = task_blkio_cgroup(current);
+
+       cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
+
         cic = cfq_cic_lookup(cfqd, ioc);
         /* cic always exists here */
         cfqq = cic_to_cfqq(cic, is_sync);
@@ -2875,6 +2699,7 @@ retry:
                         cfqq = new_cfqq;
                         new_cfqq = NULL;
                 } else if (gfp_mask & __GFP_WAIT) {
+                       rcu_read_unlock();
                         spin_unlock_irq(cfqd->queue->queue_lock);
                         new_cfqq = kmem_cache_alloc_node(cfq_pool,
                                         gfp_mask | __GFP_ZERO,
@@ -2900,6 +2725,7 @@ retry:
         if (new_cfqq)
                 kmem_cache_free(cfq_pool, new_cfqq);
  
+       rcu_read_unlock();
         return cfqq;
  }
  
@@ -3117,17 +2943,18 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
   */
  static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  {
+       enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
+
         cfq_log_cfqq(cfqd, cfqq, "preempt");
+       cfq_slice_expired(cfqd, 1);
  
         /*
          * workload type is changed, don't save slice, otherwise preempt
          * doesn't happen
          */
-       if (cfqq_type(cfqd->active_queue) != cfqq_type(cfqq))
+       if (old_type != cfqq_type(cfqq))
                 cfqq->cfqg->saved_workload_slice = 0;
  
-       cfq_slice_expired(cfqd, 1);
-
         /*
          * Put the new queue at the front of the of the current list,
          * so we know that it will be selected next.
@@ -3179,7 +3006,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                                 __blk_run_queue(cfqd->queue);
                         } else {
                                 cfq_blkiocg_update_idle_time_stats(
-                                               &cfqq->cfqg->blkg);
+                                               cfqg_to_blkg(cfqq->cfqg),
+                                               &blkio_policy_cfq);
                                 cfq_mark_cfqq_must_dispatch(cfqq);
                         }
                 }
@@ -3206,9 +3034,10 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
         rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
         list_add_tail(&rq->queuelist, &cfqq->fifo);
         cfq_add_rq_rb(rq);
-       cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
-                       &cfqd->serving_group->blkg, rq_data_dir(rq),
-                       rq_is_sync(rq));
+       cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                       &blkio_policy_cfq,
+                                       cfqg_to_blkg(cfqd->serving_group),
+                                       rq_data_dir(rq), rq_is_sync(rq));
         cfq_rq_enqueued(cfqd, cfqq, rq);
  }
  
@@ -3304,9 +3133,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
         cfqd->rq_in_driver--;
         cfqq->dispatched--;
         (RQ_CFQG(rq))->dispatched--;
-       cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg,
-                       rq_start_time_ns(rq), rq_io_start_time_ns(rq),
-                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_completion_stats(cfqg_to_blkg(cfqq->cfqg),
+                       &blkio_policy_cfq, rq_start_time_ns(rq),
+                       rq_io_start_time_ns(rq), rq_data_dir(rq),
+                       rq_is_sync(rq));
  
         cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
  
@@ -3425,7 +3255,7 @@ static void cfq_put_request(struct request *rq)
                 cfqq->allocated[rw]--;
  
                 /* Put down rq reference on cfqg */
-               cfq_put_cfqg(RQ_CFQG(rq));
+               blkg_put(cfqg_to_blkg(RQ_CFQG(rq)));
                 rq->elv.priv[0] = NULL;
                 rq->elv.priv[1] = NULL;
  
@@ -3476,20 +3306,20 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
         const int rw = rq_data_dir(rq);
         const bool is_sync = rq_is_sync(rq);
         struct cfq_queue *cfqq;
+       unsigned int changed;
  
         might_sleep_if(gfp_mask & __GFP_WAIT);
  
         spin_lock_irq(q->queue_lock);
  
         /* handle changed notifications */
-       if (unlikely(cic->icq.changed)) {
-               if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
-                       changed_ioprio(cic);
+       changed = icq_get_changed(&cic->icq);
+       if (unlikely(changed & ICQ_IOPRIO_CHANGED))
+               changed_ioprio(cic);
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
-               if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
-                       changed_cgroup(cic);
+       if (unlikely(changed & ICQ_CGROUP_CHANGED))
+               changed_cgroup(cic);
  #endif
-       }
  
  new_queue:
         cfqq = cic_to_cfqq(cic, is_sync);
@@ -3520,8 +3350,9 @@ new_queue:
         cfqq->allocated[rw]++;
  
         cfqq->ref++;
+       blkg_get(cfqg_to_blkg(cfqq->cfqg));
         rq->elv.priv[0] = cfqq;
-       rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
+       rq->elv.priv[1] = cfqq->cfqg;
         spin_unlock_irq(q->queue_lock);
         return 0;
  }
@@ -3628,17 +3459,18 @@ static void cfq_exit_queue(struct elevator_queue *e)
                 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
  
         cfq_put_async_queues(cfqd);
-       cfq_release_cfq_groups(cfqd);
  
+       spin_unlock_irq(q->queue_lock);
+
+#ifdef CONFIG_BLK_CGROUP
         /*
          * If there are groups which we could not unlink from blkcg list,
          * wait for a rcu period for them to be freed.
          */
-       if (cfqd->nr_blkcg_linked_grps)
-               wait = true;
-
+       spin_lock_irq(q->queue_lock);
+       wait = q->nr_blkgs;
         spin_unlock_irq(q->queue_lock);
-
+#endif
         cfq_shutdown_timer_wq(cfqd);
  
         /*
@@ -3655,62 +3487,54 @@ static void cfq_exit_queue(struct elevator_queue *e)
         if (wait)
                 synchronize_rcu();
  
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-       /* Free up per cpu stats for root group */
-       free_percpu(cfqd->root_group.blkg.stats_cpu);
+#ifndef CONFIG_CFQ_GROUP_IOSCHED
+       kfree(cfqd->root_group);
  #endif
+       update_root_blkg_pd(q, BLKIO_POLICY_PROP);
         kfree(cfqd);
  }
  
-static void *cfq_init_queue(struct request_queue *q)
+static int cfq_init_queue(struct request_queue *q)
  {
         struct cfq_data *cfqd;
-       int i, j;
-       struct cfq_group *cfqg;
-       struct cfq_rb_root *st;
+       struct blkio_group *blkg __maybe_unused;
+       int i;
  
         cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
         if (!cfqd)
-               return NULL;
+               return -ENOMEM;
+
+       cfqd->queue = q;
+       q->elevator->elevator_data = cfqd;
  
         /* Init root service tree */
         cfqd->grp_service_tree = CFQ_RB_ROOT;
  
-       /* Init root group */
-       cfqg = &cfqd->root_group;
-       for_each_cfqg_st(cfqg, i, j, st)
-               *st = CFQ_RB_ROOT;
-       RB_CLEAR_NODE(&cfqg->rb_node);
-
-       /* Give preference to root group over other groups */
-       cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
-
+       /* Init root group and prefer root group over other groups by default */
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
-       /*
-        * Set root group reference to 2. One reference will be dropped when
-        * all groups on cfqd->cfqg_list are being deleted during queue exit.
-        * Other reference will remain there as we don't want to delete this
-        * group as it is statically allocated and gets destroyed when
-        * throtl_data goes away.
-        */
-       cfqg->ref = 2;
+       rcu_read_lock();
+       spin_lock_irq(q->queue_lock);
+
+       blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
+                                 true);
+       if (!IS_ERR(blkg))
+               cfqd->root_group = blkg_to_cfqg(blkg);
  
-       if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
-               kfree(cfqg);
+       spin_unlock_irq(q->queue_lock);
+       rcu_read_unlock();
+#else
+       cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group),
+                                       GFP_KERNEL, cfqd->queue->node);
+       if (cfqd->root_group)
+               cfq_init_cfqg_base(cfqd->root_group);
+#endif
+       if (!cfqd->root_group) {
                 kfree(cfqd);
-               return NULL;
+               return -ENOMEM;
         }
  
-       rcu_read_lock();
-
-       cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
-                                       (void *)cfqd, 0);
-       rcu_read_unlock();
-       cfqd->nr_blkcg_linked_grps++;
+       cfqd->root_group->weight = 2*BLKIO_WEIGHT_DEFAULT;
  
-       /* Add group on cfqd->cfqg_list */
-       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-#endif
         /*
          * Not strictly needed (since RB_ROOT just clears the node and we
          * zeroed cfqd on alloc), but better be safe in case someone decides
@@ -3722,13 +3546,17 @@ static void *cfq_init_queue(struct request_queue *q)
         /*
          * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
          * Grab a permanent reference to it, so that the normal code flow
-        * will not attempt to free it.
+        * will not attempt to free it.  oom_cfqq is linked to root_group
+        * but shouldn't hold a reference as it'll never be unlinked.  Lose
+        * the reference from linking right away.
          */
         cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
         cfqd->oom_cfqq.ref++;
-       cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
  
-       cfqd->queue = q;
+       spin_lock_irq(q->queue_lock);
+       cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
+       blkg_put(cfqg_to_blkg(cfqd->root_group));
+       spin_unlock_irq(q->queue_lock);
  
         init_timer(&cfqd->idle_slice_timer);
         cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
@@ -3753,7 +3581,7 @@ static void *cfq_init_queue(struct request_queue *q)
          * second, in order to have larger depth for async operations.
          */
         cfqd->last_delayed_sync = jiffies - HZ;
-       return cfqd;
+       return 0;
  }
  
  /*
@@ -3879,13 +3707,12 @@ static struct elevator_type iosched_cfq = {
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
  static struct blkio_policy_type blkio_policy_cfq = {
         .ops = {
-               .blkio_unlink_group_fn =        cfq_unlink_blkio_group,
+               .blkio_init_group_fn =          cfq_init_blkio_group,
                 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
         },
         .plid = BLKIO_POLICY_PROP,
+       .pdata_size = sizeof(struct cfq_group),
  };
-#else
-static struct blkio_policy_type blkio_policy_cfq;
  #endif
  
  static int __init cfq_init(void)
@@ -3916,14 +3743,17 @@ static int __init cfq_init(void)
                 return ret;
         }
  
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
         blkio_policy_register(&blkio_policy_cfq);
-
+#endif
         return 0;
  }
  
  static void __exit cfq_exit(void)
  {
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
         blkio_policy_unregister(&blkio_policy_cfq);
+#endif
         elv_unregister(&iosched_cfq);
         kmem_cache_destroy(cfq_pool);
  }