X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=block%2Fblk-core.c;h=853f92749202cbfe5b252d557f667b7f970ac872;hb=110720fe57faa9aa8954ac40f56607f1184378b3;hp=0a00e4ecf87cae37a3d310c8dfd32869ac24c161;hpb=fc582aef7dcc27a7120cf232c1e76c569c7b6eab;p=karo-tx-linux.git diff --git a/block/blk-core.c b/block/blk-core.c index 0a00e4ecf87c..853f92749202 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include "blk.h" #include "blk-cgroup.h" +#include "blk-mq.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -48,7 +50,7 @@ DEFINE_IDA(blk_queue_ida); /* * For the allocated request tables */ -static struct kmem_cache *request_cachep; +struct kmem_cache *request_cachep = NULL; /* * For queue allocation @@ -60,42 +62,6 @@ struct kmem_cache *blk_requestq_cachep; */ static struct workqueue_struct *kblockd_workqueue; -static void drive_stat_acct(struct request *rq, int new_io) -{ - struct hd_struct *part; - int rw = rq_data_dir(rq); - int cpu; - - if (!blk_do_io_stat(rq)) - return; - - cpu = part_stat_lock(); - - if (!new_io) { - part = rq->part; - part_stat_inc(cpu, part, merges[rw]); - } else { - part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); - if (!hd_struct_try_get(part)) { - /* - * The partition is already being removed, - * the request will be accounted on the disk only - * - * We take a reference on disk->part0 although that - * partition will never be deleted, so we can treat - * it as any other partition. - */ - part = &rq->rq_disk->part0; - hd_struct_get(part); - } - part_round_stats(cpu, part); - part_inc_in_flight(part, rw); - rq->part = part; - } - - part_stat_unlock(); -} - void blk_queue_congestion_threshold(struct request_queue *q) { int nr; @@ -145,7 +111,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->cmd = rq->__cmd; rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; - rq->ref_count = 1; rq->start_time = jiffies; set_start_time_ns(rq); rq->part = NULL; @@ -166,7 +131,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) bio_endio(bio, error); } @@ -174,9 +139,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg) { int bit; - printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, + printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, - rq->cmd_flags); + (unsigned long long) rq->cmd_flags); printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", (unsigned long long)blk_rq_pos(rq), @@ -281,7 +246,16 @@ EXPORT_SYMBOL(blk_stop_queue); void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->timeout); - cancel_delayed_work_sync(&q->delay_work); + + if (q->mq_ops) { + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + cancel_delayed_work_sync(&hctx->delayed_work); + } else { + cancel_delayed_work_sync(&q->delay_work); + } } EXPORT_SYMBOL(blk_sync_queue); @@ -533,8 +507,13 @@ void blk_cleanup_queue(struct request_queue *q) * Drain all requests queued before DYING marking. Set DEAD flag to * prevent that q->request_fn() gets invoked after draining finished. */ - spin_lock_irq(lock); - __blk_drain_queue(q, true); + if (q->mq_ops) { + blk_mq_drain_queue(q); + spin_lock_irq(lock); + } else { + spin_lock_irq(lock); + __blk_drain_queue(q, true); + } queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); @@ -595,9 +574,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q) return NULL; + if (percpu_counter_init(&q->mq_usage_counter, 0)) + goto fail_q; + q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); if (q->id < 0) - goto fail_q; + goto fail_c; q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; @@ -644,13 +626,19 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) q->bypass_depth = 1; __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + init_waitqueue_head(&q->mq_freeze_wq); + if (blkcg_init_queue(q)) - goto fail_id; + goto fail_bdi; return q; +fail_bdi: + bdi_destroy(&q->backing_dev_info); fail_id: ida_simple_remove(&blk_queue_ida, q->id); +fail_c: + percpu_counter_destroy(&q->mq_usage_counter); fail_q: kmem_cache_free(blk_requestq_cachep, q); return NULL; @@ -705,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) if (!uninit_q) return NULL; + uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); + if (!uninit_q->flush_rq) + goto out_cleanup_queue; + q = blk_init_allocated_queue(uninit_q, rfn, lock); if (!q) - blk_cleanup_queue(uninit_q); - + goto out_free_flush_rq; return q; + +out_free_flush_rq: + kfree(uninit_q->flush_rq); +out_cleanup_queue: + blk_cleanup_queue(uninit_q); + return NULL; } EXPORT_SYMBOL(blk_init_queue_node); @@ -739,9 +736,17 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->sg_reserved_size = INT_MAX; + /* Protect q->elevator from elevator_change */ + mutex_lock(&q->sysfs_lock); + /* init elevator */ - if (elevator_init(q, NULL)) + if (elevator_init(q, NULL)) { + mutex_unlock(&q->sysfs_lock); return NULL; + } + + mutex_unlock(&q->sysfs_lock); + return q; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1109,7 +1114,8 @@ retry: goto retry; } -struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) +static struct request *blk_old_get_request(struct request_queue *q, int rw, + gfp_t gfp_mask) { struct request *rq; @@ -1126,6 +1132,14 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) return rq; } + +struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) +{ + if (q->mq_ops) + return blk_mq_alloc_request(q, rw, gfp_mask); + else + return blk_old_get_request(q, rw, gfp_mask); +} EXPORT_SYMBOL(blk_get_request); /** @@ -1211,7 +1225,7 @@ EXPORT_SYMBOL(blk_requeue_request); static void add_acct_request(struct request_queue *q, struct request *rq, int where) { - drive_stat_acct(rq, 1); + blk_account_io_start(rq, true); __elv_add_request(q, rq, where); } @@ -1272,8 +1286,11 @@ void __blk_put_request(struct request_queue *q, struct request *req) { if (unlikely(!q)) return; - if (unlikely(--req->ref_count)) + + if (q->mq_ops) { + blk_mq_free_request(req); return; + } blk_pm_put_request(req); @@ -1302,12 +1319,17 @@ EXPORT_SYMBOL_GPL(__blk_put_request); void blk_put_request(struct request *req) { - unsigned long flags; struct request_queue *q = req->q; - spin_lock_irqsave(q->queue_lock, flags); - __blk_put_request(q, req); - spin_unlock_irqrestore(q->queue_lock, flags); + if (q->mq_ops) + blk_mq_free_request(req); + else { + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __blk_put_request(q, req); + spin_unlock_irqrestore(q->queue_lock, flags); + } } EXPORT_SYMBOL(blk_put_request); @@ -1333,7 +1355,7 @@ void blk_add_request_payload(struct request *rq, struct page *page, bio->bi_io_vec->bv_offset = 0; bio->bi_io_vec->bv_len = len; - bio->bi_size = len; + bio->bi_iter.bi_size = len; bio->bi_vcnt = 1; bio->bi_phys_segments = 1; @@ -1343,8 +1365,8 @@ void blk_add_request_payload(struct request *rq, struct page *page, } EXPORT_SYMBOL_GPL(blk_add_request_payload); -static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, - struct bio *bio) +bool bio_attempt_back_merge(struct request_queue *q, struct request *req, + struct bio *bio) { const int ff = bio->bi_rw & REQ_FAILFAST_MASK; @@ -1358,15 +1380,15 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, req->biotail->bi_next = bio; req->biotail = bio; - req->__data_len += bio->bi_size; + req->__data_len += bio->bi_iter.bi_size; req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); - drive_stat_acct(req, 0); + blk_account_io_start(req, false); return true; } -static bool bio_attempt_front_merge(struct request_queue *q, - struct request *req, struct bio *bio) +bool bio_attempt_front_merge(struct request_queue *q, struct request *req, + struct bio *bio) { const int ff = bio->bi_rw & REQ_FAILFAST_MASK; @@ -1387,16 +1409,16 @@ static bool bio_attempt_front_merge(struct request_queue *q, * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->__sector = bio->bi_sector; - req->__data_len += bio->bi_size; + req->__sector = bio->bi_iter.bi_sector; + req->__data_len += bio->bi_iter.bi_size; req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); - drive_stat_acct(req, 0); + blk_account_io_start(req, false); return true; } /** - * attempt_plug_merge - try to merge with %current's plugged list + * blk_attempt_plug_merge - try to merge with %current's plugged list * @q: request_queue new bio is being queued at * @bio: new bio being queued * @request_count: out parameter for number of traversed plugged requests @@ -1412,19 +1434,28 @@ static bool bio_attempt_front_merge(struct request_queue *q, * reliable access to the elevator outside queue lock. Only check basic * merging parameters without querying the elevator. */ -static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count) +bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, + unsigned int *request_count) { struct blk_plug *plug; struct request *rq; bool ret = false; + struct list_head *plug_list; + + if (blk_queue_nomerges(q)) + goto out; plug = current->plug; if (!plug) goto out; *request_count = 0; - list_for_each_entry_reverse(rq, &plug->list, queuelist) { + if (q->mq_ops) + plug_list = &plug->mq_list; + else + plug_list = &plug->list; + + list_for_each_entry_reverse(rq, plug_list, queuelist) { int el_ret; if (rq->q == q) @@ -1457,7 +1488,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_FAILFAST_MASK; req->errors = 0; - req->__sector = bio->bi_sector; + req->__sector = bio->bi_iter.bi_sector; req->ioprio = bio_prio(bio); blk_rq_bio_prep(req->q, req, bio); } @@ -1492,7 +1523,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(q, bio, &request_count)) + if (blk_attempt_plug_merge(q, bio, &request_count)) return; spin_lock_irq(q->queue_lock); @@ -1560,7 +1591,7 @@ get_rq: } } list_add_tail(&req->queuelist, &plug->list); - drive_stat_acct(req, 1); + blk_account_io_start(req, true); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); @@ -1581,12 +1612,12 @@ static inline void blk_partition_remap(struct bio *bio) if (bio_sectors(bio) && bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; - bio->bi_sector += p->start_sect; + bio->bi_iter.bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, bdev->bd_dev, - bio->bi_sector - p->start_sect); + bio->bi_iter.bi_sector - p->start_sect); } } @@ -1652,7 +1683,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) /* Test device or partition size, when known. */ maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; if (maxsector) { - sector_t sector = bio->bi_sector; + sector_t sector = bio->bi_iter.bi_sector; if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { /* @@ -1688,7 +1719,7 @@ generic_make_request_checks(struct bio *bio) "generic_make_request: Trying to access " "nonexistent block-device %s (%Lu)\n", bdevname(bio->bi_bdev, b), - (long long) bio->bi_sector); + (long long) bio->bi_iter.bi_sector); goto end_io; } @@ -1702,9 +1733,9 @@ generic_make_request_checks(struct bio *bio) } part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_size) || + if (should_fail_request(part, bio->bi_iter.bi_size) || should_fail_request(&part_to_disk(part)->part0, - bio->bi_size)) + bio->bi_iter.bi_size)) goto end_io; /* @@ -1863,7 +1894,7 @@ void submit_bio(int rw, struct bio *bio) if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { - task_io_account_read(bio->bi_size); + task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } @@ -1872,7 +1903,7 @@ void submit_bio(int rw, struct bio *bio) printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", current->comm, task_pid_nr(current), (rw & WRITE) ? "WRITE" : "READ", - (unsigned long long)bio->bi_sector, + (unsigned long long)bio->bi_iter.bi_sector, bdevname(bio->bi_bdev, b), count); } @@ -2005,7 +2036,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq) for (bio = rq->bio; bio; bio = bio->bi_next) { if ((bio->bi_rw & ff) != ff) break; - bytes += bio->bi_size; + bytes += bio->bi_iter.bi_size; } /* this could lead to infinite loop */ @@ -2014,7 +2045,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_err_bytes); -static void blk_account_io_completion(struct request *req, unsigned int bytes) +void blk_account_io_completion(struct request *req, unsigned int bytes) { if (blk_do_io_stat(req)) { const int rw = rq_data_dir(req); @@ -2028,7 +2059,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) } } -static void blk_account_io_done(struct request *req) +void blk_account_io_done(struct request *req) { /* * Account IO completion. flush_rq isn't accounted as a @@ -2076,6 +2107,42 @@ static inline struct request *blk_pm_peek_request(struct request_queue *q, } #endif +void blk_account_io_start(struct request *rq, bool new_io) +{ + struct hd_struct *part; + int rw = rq_data_dir(rq); + int cpu; + + if (!blk_do_io_stat(rq)) + return; + + cpu = part_stat_lock(); + + if (!new_io) { + part = rq->part; + part_stat_inc(cpu, part, merges[rw]); + } else { + part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); + if (!hd_struct_try_get(part)) { + /* + * The partition is already being removed, + * the request will be accounted on the disk only + * + * We take a reference on disk->part0 although that + * partition will never be deleted, so we can treat + * it as any other partition. + */ + part = &rq->rq_disk->part0; + hd_struct_get(part); + } + part_round_stats(cpu, part); + part_inc_in_flight(part, rw); + rq->part = part; + } + + part_stat_unlock(); +} + /** * blk_peek_request - peek at the top of a request queue * @q: request queue to peek at @@ -2227,6 +2294,7 @@ void blk_start_request(struct request *req) if (unlikely(blk_bidi_rq(req))) req->next_rq->resid_len = blk_rq_bytes(req->next_rq); + BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); blk_add_timer(req); } EXPORT_SYMBOL(blk_start_request); @@ -2339,9 +2407,9 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) total_bytes = 0; while (req->bio) { struct bio *bio = req->bio; - unsigned bio_bytes = min(bio->bi_size, nr_bytes); + unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); - if (bio_bytes == bio->bi_size) + if (bio_bytes == bio->bi_iter.bi_size) req->bio = bio->bi_next; req_bio_endio(req, bio, bio_bytes, error); @@ -2451,7 +2519,6 @@ static void blk_finish_request(struct request *req, int error) if (req->cmd_flags & REQ_DONTPREP) blk_unprep_request(req); - blk_account_io_done(req); if (req->end_io) @@ -2690,7 +2757,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->nr_phys_segments = bio_phys_segments(q, bio); rq->buffer = bio_data(bio); } - rq->__data_len = bio->bi_size; + rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; if (bio->bi_bdev) @@ -2708,10 +2775,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, void rq_flush_dcache_pages(struct request *rq) { struct req_iterator iter; - struct bio_vec *bvec; + struct bio_vec bvec; rq_for_each_segment(bvec, rq, iter) - flush_dcache_page(bvec->bv_page); + flush_dcache_page(bvec.bv_page); } EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); #endif @@ -2873,6 +2940,7 @@ void blk_start_plug(struct blk_plug *plug) plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); + INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); /* @@ -2970,6 +3038,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) BUG_ON(plug->magic != PLUG_MAGIC); flush_plug_callbacks(plug, from_schedule); + + if (!list_empty(&plug->mq_list)) + blk_mq_flush_plug_list(plug, from_schedule); + if (list_empty(&plug->list)) return;