]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - block/blk-core.c
Merge tag 'pwm_pxa_for_v3.14' of https://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / block / blk-core.c
index 0a00e4ecf87cae37a3d310c8dfd32869ac24c161..853f92749202cbfe5b252d557f667b7f970ac872 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
@@ -37,6 +38,7 @@
 
 #include "blk.h"
 #include "blk-cgroup.h"
+#include "blk-mq.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -48,7 +50,7 @@ DEFINE_IDA(blk_queue_ida);
 /*
  * For the allocated request tables
  */
-static struct kmem_cache *request_cachep;
+struct kmem_cache *request_cachep = NULL;
 
 /*
  * For queue allocation
@@ -60,42 +62,6 @@ struct kmem_cache *blk_requestq_cachep;
  */
 static struct workqueue_struct *kblockd_workqueue;
 
-static void drive_stat_acct(struct request *rq, int new_io)
-{
-       struct hd_struct *part;
-       int rw = rq_data_dir(rq);
-       int cpu;
-
-       if (!blk_do_io_stat(rq))
-               return;
-
-       cpu = part_stat_lock();
-
-       if (!new_io) {
-               part = rq->part;
-               part_stat_inc(cpu, part, merges[rw]);
-       } else {
-               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
-               if (!hd_struct_try_get(part)) {
-                       /*
-                        * The partition is already being removed,
-                        * the request will be accounted on the disk only
-                        *
-                        * We take a reference on disk->part0 although that
-                        * partition will never be deleted, so we can treat
-                        * it as any other partition.
-                        */
-                       part = &rq->rq_disk->part0;
-                       hd_struct_get(part);
-               }
-               part_round_stats(cpu, part);
-               part_inc_in_flight(part, rw);
-               rq->part = part;
-       }
-
-       part_stat_unlock();
-}
-
 void blk_queue_congestion_threshold(struct request_queue *q)
 {
        int nr;
@@ -145,7 +111,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
        rq->cmd = rq->__cmd;
        rq->cmd_len = BLK_MAX_CDB;
        rq->tag = -1;
-       rq->ref_count = 1;
        rq->start_time = jiffies;
        set_start_time_ns(rq);
        rq->part = NULL;
@@ -166,7 +131,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
        bio_advance(bio, nbytes);
 
        /* don't actually finish bio if it's part of flush sequence */
-       if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+       if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
                bio_endio(bio, error);
 }
 
@@ -174,9 +139,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 {
        int bit;
 
-       printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
+       printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
                rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
-               rq->cmd_flags);
+               (unsigned long long) rq->cmd_flags);
 
        printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
               (unsigned long long)blk_rq_pos(rq),
@@ -281,7 +246,16 @@ EXPORT_SYMBOL(blk_stop_queue);
 void blk_sync_queue(struct request_queue *q)
 {
        del_timer_sync(&q->timeout);
-       cancel_delayed_work_sync(&q->delay_work);
+
+       if (q->mq_ops) {
+               struct blk_mq_hw_ctx *hctx;
+               int i;
+
+               queue_for_each_hw_ctx(q, hctx, i)
+                       cancel_delayed_work_sync(&hctx->delayed_work);
+       } else {
+               cancel_delayed_work_sync(&q->delay_work);
+       }
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -533,8 +507,13 @@ void blk_cleanup_queue(struct request_queue *q)
         * Drain all requests queued before DYING marking. Set DEAD flag to
         * prevent that q->request_fn() gets invoked after draining finished.
         */
-       spin_lock_irq(lock);
-       __blk_drain_queue(q, true);
+       if (q->mq_ops) {
+               blk_mq_drain_queue(q);
+               spin_lock_irq(lock);
+       } else {
+               spin_lock_irq(lock);
+               __blk_drain_queue(q, true);
+       }
        queue_flag_set(QUEUE_FLAG_DEAD, q);
        spin_unlock_irq(lock);
 
@@ -595,9 +574,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (!q)
                return NULL;
 
+       if (percpu_counter_init(&q->mq_usage_counter, 0))
+               goto fail_q;
+
        q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
        if (q->id < 0)
-               goto fail_q;
+               goto fail_c;
 
        q->backing_dev_info.ra_pages =
                        (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@ -644,13 +626,19 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        q->bypass_depth = 1;
        __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
 
+       init_waitqueue_head(&q->mq_freeze_wq);
+
        if (blkcg_init_queue(q))
-               goto fail_id;
+               goto fail_bdi;
 
        return q;
 
+fail_bdi:
+       bdi_destroy(&q->backing_dev_info);
 fail_id:
        ida_simple_remove(&blk_queue_ida, q->id);
+fail_c:
+       percpu_counter_destroy(&q->mq_usage_counter);
 fail_q:
        kmem_cache_free(blk_requestq_cachep, q);
        return NULL;
@@ -705,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
        if (!uninit_q)
                return NULL;
 
+       uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
+       if (!uninit_q->flush_rq)
+               goto out_cleanup_queue;
+
        q = blk_init_allocated_queue(uninit_q, rfn, lock);
        if (!q)
-               blk_cleanup_queue(uninit_q);
-
+               goto out_free_flush_rq;
        return q;
+
+out_free_flush_rq:
+       kfree(uninit_q->flush_rq);
+out_cleanup_queue:
+       blk_cleanup_queue(uninit_q);
+       return NULL;
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 
@@ -739,9 +736,17 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 
        q->sg_reserved_size = INT_MAX;
 
+       /* Protect q->elevator from elevator_change */
+       mutex_lock(&q->sysfs_lock);
+
        /* init elevator */
-       if (elevator_init(q, NULL))
+       if (elevator_init(q, NULL)) {
+               mutex_unlock(&q->sysfs_lock);
                return NULL;
+       }
+
+       mutex_unlock(&q->sysfs_lock);
+
        return q;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1109,7 +1114,8 @@ retry:
        goto retry;
 }
 
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q, int rw,
+               gfp_t gfp_mask)
 {
        struct request *rq;
 
@@ -1126,6 +1132,14 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 
        return rq;
 }
+
+struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+{
+       if (q->mq_ops)
+               return blk_mq_alloc_request(q, rw, gfp_mask);
+       else
+               return blk_old_get_request(q, rw, gfp_mask);
+}
 EXPORT_SYMBOL(blk_get_request);
 
 /**
@@ -1211,7 +1225,7 @@ EXPORT_SYMBOL(blk_requeue_request);
 static void add_acct_request(struct request_queue *q, struct request *rq,
                             int where)
 {
-       drive_stat_acct(rq, 1);
+       blk_account_io_start(rq, true);
        __elv_add_request(q, rq, where);
 }
 
@@ -1272,8 +1286,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 {
        if (unlikely(!q))
                return;
-       if (unlikely(--req->ref_count))
+
+       if (q->mq_ops) {
+               blk_mq_free_request(req);
                return;
+       }
 
        blk_pm_put_request(req);
 
@@ -1302,12 +1319,17 @@ EXPORT_SYMBOL_GPL(__blk_put_request);
 
 void blk_put_request(struct request *req)
 {
-       unsigned long flags;
        struct request_queue *q = req->q;
 
-       spin_lock_irqsave(q->queue_lock, flags);
-       __blk_put_request(q, req);
-       spin_unlock_irqrestore(q->queue_lock, flags);
+       if (q->mq_ops)
+               blk_mq_free_request(req);
+       else {
+               unsigned long flags;
+
+               spin_lock_irqsave(q->queue_lock, flags);
+               __blk_put_request(q, req);
+               spin_unlock_irqrestore(q->queue_lock, flags);
+       }
 }
 EXPORT_SYMBOL(blk_put_request);
 
@@ -1333,7 +1355,7 @@ void blk_add_request_payload(struct request *rq, struct page *page,
        bio->bi_io_vec->bv_offset = 0;
        bio->bi_io_vec->bv_len = len;
 
-       bio->bi_size = len;
+       bio->bi_iter.bi_size = len;
        bio->bi_vcnt = 1;
        bio->bi_phys_segments = 1;
 
@@ -1343,8 +1365,8 @@ void blk_add_request_payload(struct request *rq, struct page *page,
 }
 EXPORT_SYMBOL_GPL(blk_add_request_payload);
 
-static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
-                                  struct bio *bio)
+bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
+                           struct bio *bio)
 {
        const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 
@@ -1358,15 +1380,15 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 
        req->biotail->bi_next = bio;
        req->biotail = bio;
-       req->__data_len += bio->bi_size;
+       req->__data_len += bio->bi_iter.bi_size;
        req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
-       drive_stat_acct(req, 0);
+       blk_account_io_start(req, false);
        return true;
 }
 
-static bool bio_attempt_front_merge(struct request_queue *q,
-                                   struct request *req, struct bio *bio)
+bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
+                            struct bio *bio)
 {
        const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 
@@ -1387,16 +1409,16 @@ static bool bio_attempt_front_merge(struct request_queue *q,
         * not touch req->buffer either...
         */
        req->buffer = bio_data(bio);
-       req->__sector = bio->bi_sector;
-       req->__data_len += bio->bi_size;
+       req->__sector = bio->bi_iter.bi_sector;
+       req->__data_len += bio->bi_iter.bi_size;
        req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
-       drive_stat_acct(req, 0);
+       blk_account_io_start(req, false);
        return true;
 }
 
 /**
- * attempt_plug_merge - try to merge with %current's plugged list
+ * blk_attempt_plug_merge - try to merge with %current's plugged list
  * @q: request_queue new bio is being queued at
  * @bio: new bio being queued
  * @request_count: out parameter for number of traversed plugged requests
@@ -1412,19 +1434,28 @@ static bool bio_attempt_front_merge(struct request_queue *q,
  * reliable access to the elevator outside queue lock.  Only check basic
  * merging parameters without querying the elevator.
  */
-static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
-                              unsigned int *request_count)
+bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
+                           unsigned int *request_count)
 {
        struct blk_plug *plug;
        struct request *rq;
        bool ret = false;
+       struct list_head *plug_list;
+
+       if (blk_queue_nomerges(q))
+               goto out;
 
        plug = current->plug;
        if (!plug)
                goto out;
        *request_count = 0;
 
-       list_for_each_entry_reverse(rq, &plug->list, queuelist) {
+       if (q->mq_ops)
+               plug_list = &plug->mq_list;
+       else
+               plug_list = &plug->list;
+
+       list_for_each_entry_reverse(rq, plug_list, queuelist) {
                int el_ret;
 
                if (rq->q == q)
@@ -1457,7 +1488,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
                req->cmd_flags |= REQ_FAILFAST_MASK;
 
        req->errors = 0;
-       req->__sector = bio->bi_sector;
+       req->__sector = bio->bi_iter.bi_sector;
        req->ioprio = bio_prio(bio);
        blk_rq_bio_prep(req->q, req, bio);
 }
@@ -1492,7 +1523,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
         * Check if we can merge with the plugged list before grabbing
         * any locks.
         */
-       if (attempt_plug_merge(q, bio, &request_count))
+       if (blk_attempt_plug_merge(q, bio, &request_count))
                return;
 
        spin_lock_irq(q->queue_lock);
@@ -1560,7 +1591,7 @@ get_rq:
                        }
                }
                list_add_tail(&req->queuelist, &plug->list);
-               drive_stat_acct(req, 1);
+               blk_account_io_start(req, true);
        } else {
                spin_lock_irq(q->queue_lock);
                add_acct_request(q, req, where);
@@ -1581,12 +1612,12 @@ static inline void blk_partition_remap(struct bio *bio)
        if (bio_sectors(bio) && bdev != bdev->bd_contains) {
                struct hd_struct *p = bdev->bd_part;
 
-               bio->bi_sector += p->start_sect;
+               bio->bi_iter.bi_sector += p->start_sect;
                bio->bi_bdev = bdev->bd_contains;
 
                trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
                                      bdev->bd_dev,
-                                     bio->bi_sector - p->start_sect);
+                                     bio->bi_iter.bi_sector - p->start_sect);
        }
 }
 
@@ -1652,7 +1683,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
        /* Test device or partition size, when known. */
        maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
        if (maxsector) {
-               sector_t sector = bio->bi_sector;
+               sector_t sector = bio->bi_iter.bi_sector;
 
                if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
                        /*
@@ -1688,7 +1719,7 @@ generic_make_request_checks(struct bio *bio)
                       "generic_make_request: Trying to access "
                        "nonexistent block-device %s (%Lu)\n",
                        bdevname(bio->bi_bdev, b),
-                       (long long) bio->bi_sector);
+                       (long long) bio->bi_iter.bi_sector);
                goto end_io;
        }
 
@@ -1702,9 +1733,9 @@ generic_make_request_checks(struct bio *bio)
        }
 
        part = bio->bi_bdev->bd_part;
-       if (should_fail_request(part, bio->bi_size) ||
+       if (should_fail_request(part, bio->bi_iter.bi_size) ||
            should_fail_request(&part_to_disk(part)->part0,
-                               bio->bi_size))
+                               bio->bi_iter.bi_size))
                goto end_io;
 
        /*
@@ -1863,7 +1894,7 @@ void submit_bio(int rw, struct bio *bio)
                if (rw & WRITE) {
                        count_vm_events(PGPGOUT, count);
                } else {
-                       task_io_account_read(bio->bi_size);
+                       task_io_account_read(bio->bi_iter.bi_size);
                        count_vm_events(PGPGIN, count);
                }
 
@@ -1872,7 +1903,7 @@ void submit_bio(int rw, struct bio *bio)
                        printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
                        current->comm, task_pid_nr(current),
                                (rw & WRITE) ? "WRITE" : "READ",
-                               (unsigned long long)bio->bi_sector,
+                               (unsigned long long)bio->bi_iter.bi_sector,
                                bdevname(bio->bi_bdev, b),
                                count);
                }
@@ -2005,7 +2036,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
        for (bio = rq->bio; bio; bio = bio->bi_next) {
                if ((bio->bi_rw & ff) != ff)
                        break;
-               bytes += bio->bi_size;
+               bytes += bio->bi_iter.bi_size;
        }
 
        /* this could lead to infinite loop */
@@ -2014,7 +2045,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 
-static void blk_account_io_completion(struct request *req, unsigned int bytes)
+void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
        if (blk_do_io_stat(req)) {
                const int rw = rq_data_dir(req);
@@ -2028,7 +2059,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
        }
 }
 
-static void blk_account_io_done(struct request *req)
+void blk_account_io_done(struct request *req)
 {
        /*
         * Account IO completion.  flush_rq isn't accounted as a
@@ -2076,6 +2107,42 @@ static inline struct request *blk_pm_peek_request(struct request_queue *q,
 }
 #endif
 
+void blk_account_io_start(struct request *rq, bool new_io)
+{
+       struct hd_struct *part;
+       int rw = rq_data_dir(rq);
+       int cpu;
+
+       if (!blk_do_io_stat(rq))
+               return;
+
+       cpu = part_stat_lock();
+
+       if (!new_io) {
+               part = rq->part;
+               part_stat_inc(cpu, part, merges[rw]);
+       } else {
+               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+               if (!hd_struct_try_get(part)) {
+                       /*
+                        * The partition is already being removed,
+                        * the request will be accounted on the disk only
+                        *
+                        * We take a reference on disk->part0 although that
+                        * partition will never be deleted, so we can treat
+                        * it as any other partition.
+                        */
+                       part = &rq->rq_disk->part0;
+                       hd_struct_get(part);
+               }
+               part_round_stats(cpu, part);
+               part_inc_in_flight(part, rw);
+               rq->part = part;
+       }
+
+       part_stat_unlock();
+}
+
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
@@ -2227,6 +2294,7 @@ void blk_start_request(struct request *req)
        if (unlikely(blk_bidi_rq(req)))
                req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
 
+       BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
        blk_add_timer(req);
 }
 EXPORT_SYMBOL(blk_start_request);
@@ -2339,9 +2407,9 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
        total_bytes = 0;
        while (req->bio) {
                struct bio *bio = req->bio;
-               unsigned bio_bytes = min(bio->bi_size, nr_bytes);
+               unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
 
-               if (bio_bytes == bio->bi_size)
+               if (bio_bytes == bio->bi_iter.bi_size)
                        req->bio = bio->bi_next;
 
                req_bio_endio(req, bio, bio_bytes, error);
@@ -2451,7 +2519,6 @@ static void blk_finish_request(struct request *req, int error)
        if (req->cmd_flags & REQ_DONTPREP)
                blk_unprep_request(req);
 
-
        blk_account_io_done(req);
 
        if (req->end_io)
@@ -2690,7 +2757,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                rq->nr_phys_segments = bio_phys_segments(q, bio);
                rq->buffer = bio_data(bio);
        }
-       rq->__data_len = bio->bi_size;
+       rq->__data_len = bio->bi_iter.bi_size;
        rq->bio = rq->biotail = bio;
 
        if (bio->bi_bdev)
@@ -2708,10 +2775,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 void rq_flush_dcache_pages(struct request *rq)
 {
        struct req_iterator iter;
-       struct bio_vec *bvec;
+       struct bio_vec bvec;
 
        rq_for_each_segment(bvec, rq, iter)
-               flush_dcache_page(bvec->bv_page);
+               flush_dcache_page(bvec.bv_page);
 }
 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
 #endif
@@ -2873,6 +2940,7 @@ void blk_start_plug(struct blk_plug *plug)
 
        plug->magic = PLUG_MAGIC;
        INIT_LIST_HEAD(&plug->list);
+       INIT_LIST_HEAD(&plug->mq_list);
        INIT_LIST_HEAD(&plug->cb_list);
 
        /*
@@ -2970,6 +3038,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
        BUG_ON(plug->magic != PLUG_MAGIC);
 
        flush_plug_callbacks(plug, from_schedule);
+
+       if (!list_empty(&plug->mq_list))
+               blk_mq_flush_plug_list(plug, from_schedule);
+
        if (list_empty(&plug->list))
                return;