Merge tag 'pwm_pxa_for_v3.14' of https://git.kernel.org/pub/scm/linux/kernel/git...

[karo-tx-linux.git] / block / blk-core.c
diff --git a/block/blk-core.c b/block/blk-core.c

index 0a00e4ecf87cae37a3d310c8dfd32869ac24c161..853f92749202cbfe5b252d557f667b7f970ac872 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -16,6 +16,7 @@
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
  #include <linux/highmem.h>
  #include <linux/mm.h>
  #include <linux/kernel_stat.h>
@@ -37,6 +38,7 @@
  
  #include "blk.h"
  #include "blk-cgroup.h"
+#include "blk-mq.h"
  
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -48,7 +50,7 @@ DEFINE_IDA(blk_queue_ida);
  /*
   * For the allocated request tables
   */
-static struct kmem_cache *request_cachep;
+struct kmem_cache *request_cachep = NULL;
  
  /*
   * For queue allocation
@@ -60,42 +62,6 @@ struct kmem_cache *blk_requestq_cachep;
   */
  static struct workqueue_struct *kblockd_workqueue;
  
-static void drive_stat_acct(struct request *rq, int new_io)
-{
-       struct hd_struct *part;
-       int rw = rq_data_dir(rq);
-       int cpu;
-
-       if (!blk_do_io_stat(rq))
-               return;
-
-       cpu = part_stat_lock();
-
-       if (!new_io) {
-               part = rq->part;
-               part_stat_inc(cpu, part, merges[rw]);
-       } else {
-               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
-               if (!hd_struct_try_get(part)) {
-                       /*
-                        * The partition is already being removed,
-                        * the request will be accounted on the disk only
-                        *
-                        * We take a reference on disk->part0 although that
-                        * partition will never be deleted, so we can treat
-                        * it as any other partition.
-                        */
-                       part = &rq->rq_disk->part0;
-                       hd_struct_get(part);
-               }
-               part_round_stats(cpu, part);
-               part_inc_in_flight(part, rw);
-               rq->part = part;
-       }
-
-       part_stat_unlock();
-}
-
  void blk_queue_congestion_threshold(struct request_queue *q)
  {
         int nr;
@@ -145,7 +111,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
         rq->cmd = rq->__cmd;
         rq->cmd_len = BLK_MAX_CDB;
         rq->tag = -1;
-       rq->ref_count = 1;
         rq->start_time = jiffies;
         set_start_time_ns(rq);
         rq->part = NULL;
@@ -166,7 +131,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
         bio_advance(bio, nbytes);
  
         /* don't actually finish bio if it's part of flush sequence */
-       if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+       if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
                 bio_endio(bio, error);
  }
  
@@ -174,9 +139,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
  {
         int bit;
  
-       printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
+       printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
                 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
-               rq->cmd_flags);
+               (unsigned long long) rq->cmd_flags);
  
         printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
                (unsigned long long)blk_rq_pos(rq),
@@ -281,7 +246,16 @@ EXPORT_SYMBOL(blk_stop_queue);
  void blk_sync_queue(struct request_queue *q)
  {
         del_timer_sync(&q->timeout);
-       cancel_delayed_work_sync(&q->delay_work);
+
+       if (q->mq_ops) {
+               struct blk_mq_hw_ctx *hctx;
+               int i;
+
+               queue_for_each_hw_ctx(q, hctx, i)
+                       cancel_delayed_work_sync(&hctx->delayed_work);
+       } else {
+               cancel_delayed_work_sync(&q->delay_work);
+       }
  }
  EXPORT_SYMBOL(blk_sync_queue);
  
@@ -533,8 +507,13 @@ void blk_cleanup_queue(struct request_queue *q)
          * Drain all requests queued before DYING marking. Set DEAD flag to
          * prevent that q->request_fn() gets invoked after draining finished.
          */
-       spin_lock_irq(lock);
-       __blk_drain_queue(q, true);
+       if (q->mq_ops) {
+               blk_mq_drain_queue(q);
+               spin_lock_irq(lock);
+       } else {
+               spin_lock_irq(lock);
+               __blk_drain_queue(q, true);
+       }
         queue_flag_set(QUEUE_FLAG_DEAD, q);
         spin_unlock_irq(lock);
  
@@ -595,9 +574,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         if (!q)
                 return NULL;
  
+       if (percpu_counter_init(&q->mq_usage_counter, 0))
+               goto fail_q;
+
         q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
         if (q->id < 0)
-               goto fail_q;
+               goto fail_c;
  
         q->backing_dev_info.ra_pages =
                         (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@ -644,13 +626,19 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         q->bypass_depth = 1;
         __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  
+       init_waitqueue_head(&q->mq_freeze_wq);
+
         if (blkcg_init_queue(q))
-               goto fail_id;
+               goto fail_bdi;
  
         return q;
  
+fail_bdi:
+       bdi_destroy(&q->backing_dev_info);
  fail_id:
         ida_simple_remove(&blk_queue_ida, q->id);
+fail_c:
+       percpu_counter_destroy(&q->mq_usage_counter);
  fail_q:
         kmem_cache_free(blk_requestq_cachep, q);
         return NULL;
@@ -705,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
         if (!uninit_q)
                 return NULL;
  
+       uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
+       if (!uninit_q->flush_rq)
+               goto out_cleanup_queue;
+
         q = blk_init_allocated_queue(uninit_q, rfn, lock);
         if (!q)
-               blk_cleanup_queue(uninit_q);
-
+               goto out_free_flush_rq;
         return q;
+
+out_free_flush_rq:
+       kfree(uninit_q->flush_rq);
+out_cleanup_queue:
+       blk_cleanup_queue(uninit_q);
+       return NULL;
  }
  EXPORT_SYMBOL(blk_init_queue_node);
  
@@ -739,9 +736,17 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
  
         q->sg_reserved_size = INT_MAX;
  
+       /* Protect q->elevator from elevator_change */
+       mutex_lock(&q->sysfs_lock);
+
         /* init elevator */
-       if (elevator_init(q, NULL))
+       if (elevator_init(q, NULL)) {
+               mutex_unlock(&q->sysfs_lock);
                 return NULL;
+       }
+
+       mutex_unlock(&q->sysfs_lock);
+
         return q;
  }
  EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1109,7 +1114,8 @@ retry:
         goto retry;
  }
  
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q, int rw,
+               gfp_t gfp_mask)
  {
         struct request *rq;
  
@@ -1126,6 +1132,14 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
  
         return rq;
  }
+
+struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+{
+       if (q->mq_ops)
+               return blk_mq_alloc_request(q, rw, gfp_mask);
+       else
+               return blk_old_get_request(q, rw, gfp_mask);
+}
  EXPORT_SYMBOL(blk_get_request);
  
  /**
@@ -1211,7 +1225,7 @@ EXPORT_SYMBOL(blk_requeue_request);
  static void add_acct_request(struct request_queue *q, struct request *rq,
                              int where)
  {
-       drive_stat_acct(rq, 1);
+       blk_account_io_start(rq, true);
         __elv_add_request(q, rq, where);
  }
  
@@ -1272,8 +1286,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
  {
         if (unlikely(!q))
                 return;
-       if (unlikely(--req->ref_count))
+
+       if (q->mq_ops) {
+               blk_mq_free_request(req);
                 return;
+       }
  
         blk_pm_put_request(req);
  
@@ -1302,12 +1319,17 @@ EXPORT_SYMBOL_GPL(__blk_put_request);
  
  void blk_put_request(struct request *req)
  {
-       unsigned long flags;
         struct request_queue *q = req->q;
  
-       spin_lock_irqsave(q->queue_lock, flags);
-       __blk_put_request(q, req);
-       spin_unlock_irqrestore(q->queue_lock, flags);
+       if (q->mq_ops)
+               blk_mq_free_request(req);
+       else {
+               unsigned long flags;
+
+               spin_lock_irqsave(q->queue_lock, flags);
+               __blk_put_request(q, req);
+               spin_unlock_irqrestore(q->queue_lock, flags);
+       }
  }
  EXPORT_SYMBOL(blk_put_request);
  
@@ -1333,7 +1355,7 @@ void blk_add_request_payload(struct request *rq, struct page *page,
         bio->bi_io_vec->bv_offset = 0;
         bio->bi_io_vec->bv_len = len;
  
-       bio->bi_size = len;
+       bio->bi_iter.bi_size = len;
         bio->bi_vcnt = 1;
         bio->bi_phys_segments = 1;
  
@@ -1343,8 +1365,8 @@ void blk_add_request_payload(struct request *rq, struct page *page,
  }
  EXPORT_SYMBOL_GPL(blk_add_request_payload);
  
-static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
-                                  struct bio *bio)
+bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
+                           struct bio *bio)
  {
         const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
  
@@ -1358,15 +1380,15 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
  
         req->biotail->bi_next = bio;
         req->biotail = bio;
-       req->__data_len += bio->bi_size;
+       req->__data_len += bio->bi_iter.bi_size;
         req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
  
-       drive_stat_acct(req, 0);
+       blk_account_io_start(req, false);
         return true;
  }
  
-static bool bio_attempt_front_merge(struct request_queue *q,
-                                   struct request *req, struct bio *bio)
+bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
+                            struct bio *bio)
  {
         const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
  
@@ -1387,16 +1409,16 @@ static bool bio_attempt_front_merge(struct request_queue *q,
          * not touch req->buffer either...
          */
         req->buffer = bio_data(bio);
-       req->__sector = bio->bi_sector;
-       req->__data_len += bio->bi_size;
+       req->__sector = bio->bi_iter.bi_sector;
+       req->__data_len += bio->bi_iter.bi_size;
         req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
  
-       drive_stat_acct(req, 0);
+       blk_account_io_start(req, false);
         return true;
  }
  
  /**
- * attempt_plug_merge - try to merge with %current's plugged list
+ * blk_attempt_plug_merge - try to merge with %current's plugged list
   * @q: request_queue new bio is being queued at
   * @bio: new bio being queued
   * @request_count: out parameter for number of traversed plugged requests
@@ -1412,19 +1434,28 @@ static bool bio_attempt_front_merge(struct request_queue *q,
   * reliable access to the elevator outside queue lock.  Only check basic
   * merging parameters without querying the elevator.
   */
-static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
-                              unsigned int *request_count)
+bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
+                           unsigned int *request_count)
  {
         struct blk_plug *plug;
         struct request *rq;
         bool ret = false;
+       struct list_head *plug_list;
+
+       if (blk_queue_nomerges(q))
+               goto out;
  
         plug = current->plug;
         if (!plug)
                 goto out;
         *request_count = 0;
  
-       list_for_each_entry_reverse(rq, &plug->list, queuelist) {
+       if (q->mq_ops)
+               plug_list = &plug->mq_list;
+       else
+               plug_list = &plug->list;
+
+       list_for_each_entry_reverse(rq, plug_list, queuelist) {
                 int el_ret;
  
                 if (rq->q == q)
@@ -1457,7 +1488,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
                 req->cmd_flags |= REQ_FAILFAST_MASK;
  
         req->errors = 0;
-       req->__sector = bio->bi_sector;
+       req->__sector = bio->bi_iter.bi_sector;
         req->ioprio = bio_prio(bio);
         blk_rq_bio_prep(req->q, req, bio);
  }
@@ -1492,7 +1523,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
          * Check if we can merge with the plugged list before grabbing
          * any locks.
          */
-       if (attempt_plug_merge(q, bio, &request_count))
+       if (blk_attempt_plug_merge(q, bio, &request_count))
                 return;
  
         spin_lock_irq(q->queue_lock);
@@ -1560,7 +1591,7 @@ get_rq:
                         }
                 }
                 list_add_tail(&req->queuelist, &plug->list);
-               drive_stat_acct(req, 1);
+               blk_account_io_start(req, true);
         } else {
                 spin_lock_irq(q->queue_lock);
                 add_acct_request(q, req, where);
@@ -1581,12 +1612,12 @@ static inline void blk_partition_remap(struct bio *bio)
         if (bio_sectors(bio) && bdev != bdev->bd_contains) {
                 struct hd_struct *p = bdev->bd_part;
  
-               bio->bi_sector += p->start_sect;
+               bio->bi_iter.bi_sector += p->start_sect;
                 bio->bi_bdev = bdev->bd_contains;
  
                 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
                                       bdev->bd_dev,
-                                     bio->bi_sector - p->start_sect);
+                                     bio->bi_iter.bi_sector - p->start_sect);
         }
  }
  
@@ -1652,7 +1683,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
         /* Test device or partition size, when known. */
         maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
         if (maxsector) {
-               sector_t sector = bio->bi_sector;
+               sector_t sector = bio->bi_iter.bi_sector;
  
                 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
                         /*
@@ -1688,7 +1719,7 @@ generic_make_request_checks(struct bio *bio)
                        "generic_make_request: Trying to access "
                         "nonexistent block-device %s (%Lu)\n",
                         bdevname(bio->bi_bdev, b),
-                       (long long) bio->bi_sector);
+                       (long long) bio->bi_iter.bi_sector);
                 goto end_io;
         }
  
@@ -1702,9 +1733,9 @@ generic_make_request_checks(struct bio *bio)
         }
  
         part = bio->bi_bdev->bd_part;
-       if (should_fail_request(part, bio->bi_size) ||
+       if (should_fail_request(part, bio->bi_iter.bi_size) ||
             should_fail_request(&part_to_disk(part)->part0,
-                               bio->bi_size))
+                               bio->bi_iter.bi_size))
                 goto end_io;
  
         /*
@@ -1863,7 +1894,7 @@ void submit_bio(int rw, struct bio *bio)
                 if (rw & WRITE) {
                         count_vm_events(PGPGOUT, count);
                 } else {
-                       task_io_account_read(bio->bi_size);
+                       task_io_account_read(bio->bi_iter.bi_size);
                         count_vm_events(PGPGIN, count);
                 }
  
@@ -1872,7 +1903,7 @@ void submit_bio(int rw, struct bio *bio)
                         printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
                         current->comm, task_pid_nr(current),
                                 (rw & WRITE) ? "WRITE" : "READ",
-                               (unsigned long long)bio->bi_sector,
+                               (unsigned long long)bio->bi_iter.bi_sector,
                                 bdevname(bio->bi_bdev, b),
                                 count);
                 }
@@ -2005,7 +2036,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
         for (bio = rq->bio; bio; bio = bio->bi_next) {
                 if ((bio->bi_rw & ff) != ff)
                         break;
-               bytes += bio->bi_size;
+               bytes += bio->bi_iter.bi_size;
         }
  
         /* this could lead to infinite loop */
@@ -2014,7 +2045,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
  }
  EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
  
-static void blk_account_io_completion(struct request *req, unsigned int bytes)
+void blk_account_io_completion(struct request *req, unsigned int bytes)
  {
         if (blk_do_io_stat(req)) {
                 const int rw = rq_data_dir(req);
@@ -2028,7 +2059,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
         }
  }
  
-static void blk_account_io_done(struct request *req)
+void blk_account_io_done(struct request *req)
  {
         /*
          * Account IO completion.  flush_rq isn't accounted as a
@@ -2076,6 +2107,42 @@ static inline struct request *blk_pm_peek_request(struct request_queue *q,
  }
  #endif
  
+void blk_account_io_start(struct request *rq, bool new_io)
+{
+       struct hd_struct *part;
+       int rw = rq_data_dir(rq);
+       int cpu;
+
+       if (!blk_do_io_stat(rq))
+               return;
+
+       cpu = part_stat_lock();
+
+       if (!new_io) {
+               part = rq->part;
+               part_stat_inc(cpu, part, merges[rw]);
+       } else {
+               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+               if (!hd_struct_try_get(part)) {
+                       /*
+                        * The partition is already being removed,
+                        * the request will be accounted on the disk only
+                        *
+                        * We take a reference on disk->part0 although that
+                        * partition will never be deleted, so we can treat
+                        * it as any other partition.
+                        */
+                       part = &rq->rq_disk->part0;
+                       hd_struct_get(part);
+               }
+               part_round_stats(cpu, part);
+               part_inc_in_flight(part, rw);
+               rq->part = part;
+       }
+
+       part_stat_unlock();
+}
+
  /**
   * blk_peek_request - peek at the top of a request queue
   * @q: request queue to peek at
@@ -2227,6 +2294,7 @@ void blk_start_request(struct request *req)
         if (unlikely(blk_bidi_rq(req)))
                 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
  
+       BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
         blk_add_timer(req);
  }
  EXPORT_SYMBOL(blk_start_request);
@@ -2339,9 +2407,9 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
         total_bytes = 0;
         while (req->bio) {
                 struct bio *bio = req->bio;
-               unsigned bio_bytes = min(bio->bi_size, nr_bytes);
+               unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
  
-               if (bio_bytes == bio->bi_size)
+               if (bio_bytes == bio->bi_iter.bi_size)
                         req->bio = bio->bi_next;
  
                 req_bio_endio(req, bio, bio_bytes, error);
@@ -2451,7 +2519,6 @@ static void blk_finish_request(struct request *req, int error)
         if (req->cmd_flags & REQ_DONTPREP)
                 blk_unprep_request(req);
  
-
         blk_account_io_done(req);
  
         if (req->end_io)
@@ -2690,7 +2757,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                 rq->nr_phys_segments = bio_phys_segments(q, bio);
                 rq->buffer = bio_data(bio);
         }
-       rq->__data_len = bio->bi_size;
+       rq->__data_len = bio->bi_iter.bi_size;
         rq->bio = rq->biotail = bio;
  
         if (bio->bi_bdev)
@@ -2708,10 +2775,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
  void rq_flush_dcache_pages(struct request *rq)
  {
         struct req_iterator iter;
-       struct bio_vec *bvec;
+       struct bio_vec bvec;
  
         rq_for_each_segment(bvec, rq, iter)
-               flush_dcache_page(bvec->bv_page);
+               flush_dcache_page(bvec.bv_page);
  }
  EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
  #endif
@@ -2873,6 +2940,7 @@ void blk_start_plug(struct blk_plug *plug)
  
         plug->magic = PLUG_MAGIC;
         INIT_LIST_HEAD(&plug->list);
+       INIT_LIST_HEAD(&plug->mq_list);
         INIT_LIST_HEAD(&plug->cb_list);
  
         /*
@@ -2970,6 +3038,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
         BUG_ON(plug->magic != PLUG_MAGIC);
  
         flush_plug_callbacks(plug, from_schedule);
+
+       if (!list_empty(&plug->mq_list))
+               blk_mq_flush_plug_list(plug, from_schedule);
+
         if (list_empty(&plug->list))
                 return;