X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=block%2Fll_rw_blk.c;h=6b5173ac81313d8adb5c1d7b521559f565bb209b;hb=1abd727ed7abf5c19e7d1760671475cbecbccb0e;hp=a4ff3271d4a88b984b3da89821cbc6a2fd65885a;hpb=e18b890bb0881bbab6f4f1a6cd20d9c60d66b003;p=mv-sheeva.git

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index a4ff3271d4a..6b5173ac813 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -25,9 +25,11 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/blktrace_api.h>
+#include <linux/fault-inject.h>
 
 /*
  * for max sense size
@@ -127,13 +129,6 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
-void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
-{
-	q->activity_fn = fn;
-	q->activity_data = data;
-}
-EXPORT_SYMBOL(blk_queue_activity_fn);
-
 /**
  * blk_queue_prep_rq - set a prepare_request function for queue
  * @q:		queue
@@ -236,8 +231,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
-
-	blk_queue_activity_fn(q, NULL, NULL);
 }
 
 EXPORT_SYMBOL(blk_queue_make_request);
@@ -1228,7 +1221,7 @@ void blk_recount_segments(request_queue_t *q, struct bio *bio)
 		 * considered part of another segment, since that might
 		 * change with the bounce page.
 		 */
-		high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
+		high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
 		if (high || highprv)
 			goto new_hw_segment;
 		if (cluster) {
@@ -1271,7 +1264,7 @@ new_hw_segment:
 	bio->bi_hw_segments = nr_hw_segs;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
-
+EXPORT_SYMBOL(blk_recount_segments);
 
 static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
 				   struct bio *nxt)
@@ -1412,8 +1405,7 @@ static inline int ll_new_hw_segment(request_queue_t *q,
 	return 1;
 }
 
-static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
-			    struct bio *bio)
+int ll_back_merge_fn(request_queue_t *q, struct request *req, struct bio *bio)
 {
 	unsigned short max_sectors;
 	int len;
@@ -1449,6 +1441,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 
 	return ll_new_hw_segment(q, req, bio);
 }
+EXPORT_SYMBOL(ll_back_merge_fn);
 
 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
 			     struct bio *bio)
@@ -1711,7 +1704,7 @@ EXPORT_SYMBOL(blk_stop_queue);
  *     on a queue, such as calling the unplug function after a timeout.
  *     A block device may call blk_sync_queue to ensure that any
  *     such activity is cancelled, thus allowing it to release resources
- *     the the callbacks might use. The caller must already have made sure
+ *     that the callbacks might use. The caller must already have made sure
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
@@ -1719,7 +1712,6 @@ EXPORT_SYMBOL(blk_stop_queue);
 void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->unplug_timer);
-	kblockd_flush();
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -1919,9 +1911,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 	}
 
 	q->request_fn		= rfn;
-	q->back_merge_fn       	= ll_back_merge_fn;
-	q->front_merge_fn      	= ll_front_merge_fn;
-	q->merge_requests_fn	= ll_merge_requests_fn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
 	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER);
@@ -1935,6 +1924,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
 	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
 
+	q->sg_reserved_size = INT_MAX;
+
 	/*
 	 * all done
 	 */
@@ -2065,15 +2056,16 @@ static void freed_request(request_queue_t *q, int rw, int priv)
  * Returns NULL on failure, with queue_lock held.
  * Returns !NULL on success, with queue_lock *not held*.
  */
-static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
-				   gfp_t gfp_mask)
+static struct request *get_request(request_queue_t *q, int rw_flags,
+				   struct bio *bio, gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = NULL;
+	const int rw = rw_flags & 0x01;
 	int may_queue, priv;
 
-	may_queue = elv_may_queue(q, rw);
+	may_queue = elv_may_queue(q, rw_flags);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 
@@ -2121,7 +2113,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, priv, gfp_mask);
+	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -2169,12 +2161,13 @@ out:
  *
  * Called with q->queue_lock held, and returns with it unlocked.
  */
-static struct request *get_request_wait(request_queue_t *q, int rw,
+static struct request *get_request_wait(request_queue_t *q, int rw_flags,
 					struct bio *bio)
 {
+	const int rw = rw_flags & 0x01;
 	struct request *rq;
 
-	rq = get_request(q, rw, bio, GFP_NOIO);
+	rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	while (!rq) {
 		DEFINE_WAIT(wait);
 		struct request_list *rl = &q->rq;
@@ -2182,7 +2175,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 
-		rq = get_request(q, rw, bio, GFP_NOIO);
+		rq = get_request(q, rw_flags, bio, GFP_NOIO);
 
 		if (!rq) {
 			struct io_context *ioc;
@@ -2355,40 +2348,29 @@ static int __blk_rq_map_user(request_queue_t *q, struct request *rq,
 	else
 		bio = bio_copy_user(q, uaddr, len, reading);
 
-	if (IS_ERR(bio)) {
+	if (IS_ERR(bio))
 		return PTR_ERR(bio);
-	}
 
 	orig_bio = bio;
 	blk_queue_bounce(q, &bio);
+
 	/*
 	 * We link the bounce buffer in and could have to traverse it
 	 * later so we have to get a ref to prevent it from being freed
 	 */
 	bio_get(bio);
 
-	/*
-	 * for most (all? don't know of any) queues we could
-	 * skip grabbing the queue lock here. only drivers with
-	 * funky private ->back_merge_fn() function could be
-	 * problematic.
-	 */
-	spin_lock_irq(q->queue_lock);
 	if (!rq->bio)
 		blk_rq_bio_prep(q, rq, bio);
-	else if (!q->back_merge_fn(q, rq, bio)) {
+	else if (!ll_back_merge_fn(q, rq, bio)) {
 		ret = -EINVAL;
-		spin_unlock_irq(q->queue_lock);
 		goto unmap_bio;
 	} else {
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
 
-		rq->nr_sectors += bio_sectors(bio);
-		rq->hard_nr_sectors = rq->nr_sectors;
 		rq->data_len += bio->bi_size;
 	}
-	spin_unlock_irq(q->queue_lock);
 
 	return bio->bi_size;
 
@@ -2424,6 +2406,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
 		    unsigned long len)
 {
 	unsigned long bytes_read = 0;
+	struct bio *bio = NULL;
 	int ret;
 
 	if (len > (q->max_hw_sectors << 9))
@@ -2450,6 +2433,8 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
 		ret = __blk_rq_map_user(q, rq, ubuf, map_len);
 		if (ret < 0)
 			goto unmap_rq;
+		if (!bio)
+			bio = rq->bio;
 		bytes_read += ret;
 		ubuf += ret;
 	}
@@ -2457,7 +2442,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
 	rq->buffer = rq->data = NULL;
 	return 0;
 unmap_rq:
-	blk_rq_unmap_user(rq);
+	blk_rq_unmap_user(bio);
 	return ret;
 }
 
@@ -2469,6 +2454,7 @@ EXPORT_SYMBOL(blk_rq_map_user);
  * @rq:		request to map data to
  * @iov:	pointer to the iovec
  * @iov_count:	number of elements in the iovec
+ * @len:	I/O byte count
  *
  * Description:
  *    Data will be mapped directly for zero copy io, if possible. Otherwise
@@ -2514,27 +2500,33 @@ EXPORT_SYMBOL(blk_rq_map_user_iov);
 
 /**
  * blk_rq_unmap_user - unmap a request with user data
- * @rq:		rq to be unmapped
+ * @bio:	       start of bio list
  *
  * Description:
- *    Unmap a rq previously mapped by blk_rq_map_user().
- *    rq->bio must be set to the original head of the request.
+ *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
+ *    supply the original rq->bio from the blk_rq_map_user() return, since
+ *    the io completion may have changed rq->bio.
  */
-int blk_rq_unmap_user(struct request *rq)
+int blk_rq_unmap_user(struct bio *bio)
 {
-	struct bio *bio, *mapped_bio;
+	struct bio *mapped_bio;
+	int ret = 0, ret2;
 
-	while ((bio = rq->bio)) {
-		if (bio_flagged(bio, BIO_BOUNCED))
+	while (bio) {
+		mapped_bio = bio;
+		if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
 			mapped_bio = bio->bi_private;
-		else
-			mapped_bio = bio;
 
-		__blk_rq_unmap_user(mapped_bio);
-		rq->bio = bio->bi_next;
-		bio_put(bio);
+		ret2 = __blk_rq_unmap_user(mapped_bio);
+		if (ret2 && !ret)
+			ret = ret2;
+
+		mapped_bio = bio;
+		bio = bio->bi_next;
+		bio_put(mapped_bio);
 	}
-	return 0;
+
+	return ret;
 }
 
 EXPORT_SYMBOL(blk_rq_unmap_user);
@@ -2565,6 +2557,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
 		bio->bi_rw |= (1 << BIO_RW);
 
 	blk_rq_bio_prep(q, rq, bio);
+	blk_queue_bounce(q, &rq->bio);
 	rq->buffer = rq->data = NULL;
 	return 0;
 }
@@ -2694,9 +2687,6 @@ static inline void add_request(request_queue_t * q, struct request * req)
 {
 	drive_stat_acct(req, req->nr_sectors, 1);
 
-	if (q->activity_fn)
-		q->activity_fn(q->activity_data, rq_data_dir(req));
-
 	/*
 	 * elevator indicated where it wants this request to be
 	 * inserted at elevator_merge time
@@ -2830,7 +2820,7 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 	 * will have updated segment counts, update sector
 	 * counts here.
 	 */
-	if (!q->merge_requests_fn(q, req, next))
+	if (!ll_merge_requests_fn(q, req, next))
 		return 0;
 
 	/*
@@ -2920,6 +2910,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 	int el_ret, nr_sectors, barrier, err;
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
+	int rw_flags;
 
 	nr_sectors = bio_sectors(bio);
 
@@ -2946,7 +2937,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 		case ELEVATOR_BACK_MERGE:
 			BUG_ON(!rq_mergeable(req));
 
-			if (!q->back_merge_fn(q, req, bio))
+			if (!ll_back_merge_fn(q, req, bio))
 				break;
 
 			blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
@@ -2963,7 +2954,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 		case ELEVATOR_FRONT_MERGE:
 			BUG_ON(!rq_mergeable(req));
 
-			if (!q->front_merge_fn(q, req, bio))
+			if (!ll_front_merge_fn(q, req, bio))
 				break;
 
 			blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
@@ -2993,11 +2984,20 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 	}
 
 get_rq:
+	/*
+	 * This sync check and mask will be re-done in init_request_from_bio(),
+	 * but we need to set it earlier to expose the sync flag to the
+	 * rq allocator and io schedulers.
+	 */
+	rw_flags = bio_data_dir(bio);
+	if (sync)
+		rw_flags |= REQ_RW_SYNC;
+
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
-	req = get_request_wait(q, bio_data_dir(bio), bio);
+	req = get_request_wait(q, rw_flags, bio);
 
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
@@ -3056,6 +3056,42 @@ static void handle_bad_sector(struct bio *bio)
 	set_bit(BIO_EOF, &bio->bi_flags);
 }
 
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+
+static DECLARE_FAULT_ATTR(fail_make_request);
+
+static int __init setup_fail_make_request(char *str)
+{
+	return setup_fault_attr(&fail_make_request, str);
+}
+__setup("fail_make_request=", setup_fail_make_request);
+
+static int should_fail_request(struct bio *bio)
+{
+	if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) ||
+	    (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail))
+		return should_fail(&fail_make_request, bio->bi_size);
+
+	return 0;
+}
+
+static int __init fail_make_request_debugfs(void)
+{
+	return init_fault_attr_dentries(&fail_make_request,
+					"fail_make_request");
+}
+
+late_initcall(fail_make_request_debugfs);
+
+#else /* CONFIG_FAIL_MAKE_REQUEST */
+
+static inline int should_fail_request(struct bio *bio)
+{
+	return 0;
+}
+
+#endif /* CONFIG_FAIL_MAKE_REQUEST */
+
 /**
  * generic_make_request: hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -3080,7 +3116,7 @@ static void handle_bad_sector(struct bio *bio)
  * bi_sector for remaps as it sees fit.  So the values of these fields
  * should NOT be depended on after the call to generic_make_request.
  */
-void generic_make_request(struct bio *bio)
+static inline void __generic_make_request(struct bio *bio)
 {
 	request_queue_t *q;
 	sector_t maxsector;
@@ -3141,6 +3177,9 @@ end_io:
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 
+		if (should_fail_request(bio))
+			goto end_io;
+
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
@@ -3176,6 +3215,57 @@ end_io:
 	} while (ret);
 }
 
+/*
+ * We only want one ->make_request_fn to be active at a time,
+ * else stack usage with stacked devices could be a problem.
+ * So use current->bio_{list,tail} to keep a list of requests
+ * submited by a make_request_fn function.
+ * current->bio_tail is also used as a flag to say if
+ * generic_make_request is currently active in this task or not.
+ * If it is NULL, then no make_request is active.  If it is non-NULL,
+ * then a make_request is active, and new requests should be added
+ * at the tail
+ */
+void generic_make_request(struct bio *bio)
+{
+	if (current->bio_tail) {
+		/* make_request is active */
+		*(current->bio_tail) = bio;
+		bio->bi_next = NULL;
+		current->bio_tail = &bio->bi_next;
+		return;
+	}
+	/* following loop may be a bit non-obvious, and so deserves some
+	 * explanation.
+	 * Before entering the loop, bio->bi_next is NULL (as all callers
+	 * ensure that) so we have a list with a single bio.
+	 * We pretend that we have just taken it off a longer list, so
+	 * we assign bio_list to the next (which is NULL) and bio_tail
+	 * to &bio_list, thus initialising the bio_list of new bios to be
+	 * added.  __generic_make_request may indeed add some more bios
+	 * through a recursive call to generic_make_request.  If it
+	 * did, we find a non-NULL value in bio_list and re-enter the loop
+	 * from the top.  In this case we really did just take the bio
+	 * of the top of the list (no pretending) and so fixup bio_list and
+	 * bio_tail or bi_next, and call into __generic_make_request again.
+	 *
+	 * The loop was structured like this to make only one call to
+	 * __generic_make_request (which is important as it is large and
+	 * inlined) and to keep the structure simple.
+	 */
+	BUG_ON(bio->bi_next);
+	do {
+		current->bio_list = bio->bi_next;
+		if (bio->bi_next == NULL)
+			current->bio_tail = &current->bio_list;
+		else
+			bio->bi_next = NULL;
+		__generic_make_request(bio);
+		bio = current->bio_list;
+	} while (bio);
+	current->bio_tail = NULL; /* deactivate */
+}
+
 EXPORT_SYMBOL(generic_make_request);
 
 /**
@@ -3195,10 +3285,12 @@ void submit_bio(int rw, struct bio *bio)
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw |= rw;
-	if (rw & WRITE)
+	if (rw & WRITE) {
 		count_vm_events(PGPGOUT, count);
-	else
+	} else {
+		task_io_account_read(bio->bi_size);
 		count_vm_events(PGPGIN, count);
+	}
 
 	if (unlikely(block_dump)) {
 		char b[BDEVNAME_SIZE];
@@ -3459,8 +3551,6 @@ static void blk_done_softirq(struct softirq_action *h)
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
 static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
 			  void *hcpu)
 {
@@ -3468,7 +3558,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
 	 * If a CPU goes away, splice its entries to the current CPU
 	 * and trigger a run of the softirq
 	 */
-	if (action == CPU_DEAD) {
+	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
 		int cpu = (unsigned long) hcpu;
 
 		local_irq_disable();
@@ -3486,8 +3576,6 @@ static struct notifier_block __devinitdata blk_cpu_notifier = {
 	.notifier_call	= blk_cpu_notify,
 };
 
-#endif /* CONFIG_HOTPLUG_CPU */
-
 /**
  * blk_complete_request - end I/O on a request
  * @req:      the request being processed
@@ -3594,11 +3682,11 @@ int kblockd_schedule_work(struct work_struct *work)
 
 EXPORT_SYMBOL(kblockd_schedule_work);
 
-void kblockd_flush(void)
+void kblockd_flush_work(struct work_struct *work)
 {
-	flush_workqueue(kblockd_workqueue);
+	cancel_work_sync(work);
 }
-EXPORT_SYMBOL(kblockd_flush);
+EXPORT_SYMBOL(kblockd_flush_work);
 
 int __init blk_dev_init(void)
 {
@@ -3623,8 +3711,8 @@ int __init blk_dev_init(void)
 	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
 	register_hotcpu_notifier(&blk_cpu_notifier);
 
-	blk_max_low_pfn = max_low_pfn;
-	blk_max_pfn = max_pfn;
+	blk_max_low_pfn = max_low_pfn - 1;
+	blk_max_pfn = max_pfn - 1;
 
 	return 0;
 }
@@ -3706,6 +3794,7 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node)
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->cic_root.rb_node = NULL;
+		ret->ioc_data = NULL;
 		/* make sure set_task_ioprio() sees the settings above */
 		smp_wmb();
 		tsk->io_context = ret;
@@ -3713,7 +3802,6 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node)
 
 	return ret;
 }
-EXPORT_SYMBOL(current_io_context);
 
 /*
  * If the current task has no IO context then create one and initialise it.