]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/block/xen-blkback/blkback.c
Merge branch 'stable/for-jens-3.10' of git://git.kernel.org/pub/scm/linux/kernel...
[karo-tx-linux.git] / drivers / block / xen-blkback / blkback.c
index dd5b2fed97e9ae2b7665f13ec3c42db6ee6fb462..bf4b9d282c042ec4d5b3c31a9d3192bd059afcc5 100644 (file)
 #include "common.h"
 
 /*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
+ * Maximum number of unused free pages to keep in the internal buffer.
+ * Setting this to a value too low will reduce memory used in each backend,
+ * but can have a performance penalty.
  *
- * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
- *
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
+ * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can
+ * be set to a lower value that might degrade performance on some intensive
+ * IO workloads.
  */
-static int xen_blkif_reqs = 64;
-module_param_named(reqs, xen_blkif_reqs, int, 0);
-MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
 
-/* Run-time switchable: /sys/module/blkback/parameters/ */
-static unsigned int log_stats;
-module_param(log_stats, int, 0644);
+static int xen_blkif_max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+MODULE_PARM_DESC(max_buffer_pages,
+"Maximum number of free pages to keep in each block backend buffer");
 
 /*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it. Each buffer_head that completes decrements
- * the pendcnt towards zero. When it hits zero, the specified domain has a
- * response queued for it, with the saved 'id' passed back.
+ * Maximum number of grants to map persistently in blkback. For maximum
+ * performance this should be the total numbers of grants that can be used
+ * to fill the ring, but since this might become too high, specially with
+ * the use of indirect descriptors, we set it to a value that provides good
+ * performance without using too much memory.
+ *
+ * When the list of persistent grants is full we clean it up using a LRU
+ * algorithm.
  */
-struct pending_req {
-       struct xen_blkif        *blkif;
-       u64                     id;
-       int                     nr_pages;
-       atomic_t                pendcnt;
-       unsigned short          operation;
-       int                     status;
-       struct list_head        free_list;
-       DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-};
 
-#define BLKBACK_INVALID_HANDLE (~0)
+static int xen_blkif_max_pgrants = 1056;
+module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+MODULE_PARM_DESC(max_persistent_grants,
+                 "Maximum number of grants to map persistently");
 
-struct xen_blkbk {
-       struct pending_req      *pending_reqs;
-       /* List of all 'pending_req' available */
-       struct list_head        pending_free;
-       /* And its spinlock. */
-       spinlock_t              pending_free_lock;
-       wait_queue_head_t       pending_free_wq;
-       /* The list of all pages that are available. */
-       struct page             **pending_pages;
-       /* And the grant handles that are available. */
-       grant_handle_t          *pending_grant_handles;
-};
-
-static struct xen_blkbk *blkbk;
+/*
+ * The LRU mechanism to clean the lists of persistent grants needs to
+ * be executed periodically. The time interval between consecutive executions
+ * of the purge mechanism is set in ms.
+ */
+#define LRU_INTERVAL 100
 
 /*
- * Maximum number of grant pages that can be mapped in blkback.
- * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
- * pages that blkback will persistently map.
- * Currently, this is:
- * RING_SIZE = 32 (for all known ring types)
- * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
- * sizeof(struct persistent_gnt) = 48
- * So the maximum memory used to store the grants is:
- * 32 * 11 * 48 = 16896 bytes
+ * When the persistent grants list is full we will remove unused grants
+ * from the list. The percent number of grants to be removed at each LRU
+ * execution.
  */
-static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
+#define LRU_PERCENT_CLEAN 5
+
+/* Run-time switchable: /sys/module/blkback/parameters/ */
+static unsigned int log_stats;
+module_param(log_stats, int, 0644);
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+/* Number of free pages to remove on each call to free_xenballooned_pages */
+#define NUM_BATCH_FREE_PAGES 10
+
+static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
 {
-       switch (protocol) {
-       case BLKIF_PROTOCOL_NATIVE:
-               return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
-                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       case BLKIF_PROTOCOL_X86_32:
-               return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
-                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       case BLKIF_PROTOCOL_X86_64:
-               return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
-                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       default:
-               BUG();
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       if (list_empty(&blkif->free_pages)) {
+               BUG_ON(blkif->free_pages_num != 0);
+               spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+               return alloc_xenballooned_pages(1, page, false);
        }
+       BUG_ON(blkif->free_pages_num == 0);
+       page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+       list_del(&page[0]->lru);
+       blkif->free_pages_num--;
+       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+
        return 0;
 }
 
-
-/*
- * Little helpful macro to figure out the index and virtual address of the
- * pending_pages[..]. For each 'pending_req' we have have up to
- * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
- * 10 and would index in the pending_pages[..].
- */
-static inline int vaddr_pagenr(struct pending_req *req, int seg)
+static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+                                  int num)
 {
-       return (req - blkbk->pending_reqs) *
-               BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
-}
+       unsigned long flags;
+       int i;
 
-#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
+       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       for (i = 0; i < num; i++)
+               list_add(&page[i]->lru, &blkif->free_pages);
+       blkif->free_pages_num += num;
+       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+}
 
-static inline unsigned long vaddr(struct pending_req *req, int seg)
+static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
 {
-       unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
-       return (unsigned long)pfn_to_kaddr(pfn);
-}
+       /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
+       struct page *page[NUM_BATCH_FREE_PAGES];
+       unsigned int num_pages = 0;
+       unsigned long flags;
 
-#define pending_handle(_req, _seg) \
-       (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
+       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       while (blkif->free_pages_num > num) {
+               BUG_ON(list_empty(&blkif->free_pages));
+               page[num_pages] = list_first_entry(&blkif->free_pages,
+                                                  struct page, lru);
+               list_del(&page[num_pages]->lru);
+               blkif->free_pages_num--;
+               if (++num_pages == NUM_BATCH_FREE_PAGES) {
+                       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+                       free_xenballooned_pages(num_pages, page);
+                       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+                       num_pages = 0;
+               }
+       }
+       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       if (num_pages != 0)
+               free_xenballooned_pages(num_pages, page);
+}
 
+#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
 static int do_block_io_op(struct xen_blkif *blkif);
 static int dispatch_rw_block_io(struct xen_blkif *blkif,
@@ -170,13 +178,29 @@ static void make_response(struct xen_blkif *blkif, u64 id,
             (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
 
 
-static void add_persistent_gnt(struct rb_root *root,
+/*
+ * We don't need locking around the persistent grant helpers
+ * because blkback uses a single-thread for each backed, so we
+ * can be sure that this functions will never be called recursively.
+ *
+ * The only exception to that is put_persistent_grant, that can be called
+ * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
+ * bit operations to modify the flags of a persistent grant and to count
+ * the number of used grants.
+ */
+static int add_persistent_gnt(struct xen_blkif *blkif,
                               struct persistent_gnt *persistent_gnt)
 {
-       struct rb_node **new = &(root->rb_node), *parent = NULL;
+       struct rb_node **new = NULL, *parent = NULL;
        struct persistent_gnt *this;
 
+       if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+               if (!blkif->vbd.overflow_max_grants)
+                       blkif->vbd.overflow_max_grants = 1;
+               return -EBUSY;
+       }
        /* Figure out where to put new node */
+       new = &blkif->persistent_gnts.rb_node;
        while (*new) {
                this = container_of(*new, struct persistent_gnt, node);
 
@@ -186,22 +210,28 @@ static void add_persistent_gnt(struct rb_root *root,
                else if (persistent_gnt->gnt > this->gnt)
                        new = &((*new)->rb_right);
                else {
-                       pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
-                       BUG();
+                       pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n");
+                       return -EINVAL;
                }
        }
 
+       bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
+       set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
        /* Add new node and rebalance tree. */
        rb_link_node(&(persistent_gnt->node), parent, new);
-       rb_insert_color(&(persistent_gnt->node), root);
+       rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
+       blkif->persistent_gnt_c++;
+       atomic_inc(&blkif->persistent_gnt_in_use);
+       return 0;
 }
 
-static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
                                                 grant_ref_t gref)
 {
        struct persistent_gnt *data;
-       struct rb_node *node = root->rb_node;
+       struct rb_node *node = NULL;
 
+       node = blkif->persistent_gnts.rb_node;
        while (node) {
                data = container_of(node, struct persistent_gnt, node);
 
@@ -209,13 +239,31 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
                        node = node->rb_left;
                else if (gref > data->gnt)
                        node = node->rb_right;
-               else
+               else {
+                       if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+                               pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n");
+                               return NULL;
+                       }
+                       set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+                       atomic_inc(&blkif->persistent_gnt_in_use);
                        return data;
+               }
        }
        return NULL;
 }
 
-static void free_persistent_gnts(struct rb_root *root, unsigned int num)
+static void put_persistent_gnt(struct xen_blkif *blkif,
+                               struct persistent_gnt *persistent_gnt)
+{
+       if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+                 pr_alert_ratelimited(DRV_PFX " freeing a grant already unused");
+       set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+       clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+       atomic_dec(&blkif->persistent_gnt_in_use);
+}
+
+static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+                                 unsigned int num)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -240,7 +288,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
                        ret = gnttab_unmap_refs(unmap, NULL, pages,
                                segs_to_unmap);
                        BUG_ON(ret);
-                       free_xenballooned_pages(segs_to_unmap, pages);
+                       put_free_pages(blkif, pages, segs_to_unmap);
                        segs_to_unmap = 0;
                }
 
@@ -251,21 +299,148 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
        BUG_ON(num != 0);
 }
 
+static void unmap_purged_grants(struct work_struct *work)
+{
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct persistent_gnt *persistent_gnt;
+       int ret, segs_to_unmap = 0;
+       struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+
+       while(!list_empty(&blkif->persistent_purge_list)) {
+               persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+                                                 struct persistent_gnt,
+                                                 remove_node);
+               list_del(&persistent_gnt->remove_node);
+
+               gnttab_set_unmap_op(&unmap[segs_to_unmap],
+                       vaddr(persistent_gnt->page),
+                       GNTMAP_host_map,
+                       persistent_gnt->handle);
+
+               pages[segs_to_unmap] = persistent_gnt->page;
+
+               if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+                       ret = gnttab_unmap_refs(unmap, NULL, pages,
+                               segs_to_unmap);
+                       BUG_ON(ret);
+                       put_free_pages(blkif, pages, segs_to_unmap);
+                       segs_to_unmap = 0;
+               }
+               kfree(persistent_gnt);
+       }
+       if (segs_to_unmap > 0) {
+               ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
+               BUG_ON(ret);
+               put_free_pages(blkif, pages, segs_to_unmap);
+       }
+}
+
+static void purge_persistent_gnt(struct xen_blkif *blkif)
+{
+       struct persistent_gnt *persistent_gnt;
+       struct rb_node *n;
+       unsigned int num_clean, total;
+       bool scan_used = false, clean_used = false;
+       struct rb_root *root;
+
+       if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
+           (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
+           !blkif->vbd.overflow_max_grants)) {
+               return;
+       }
+
+       if (work_pending(&blkif->persistent_purge_work)) {
+               pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n");
+               return;
+       }
+
+       num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+       num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+       num_clean = min(blkif->persistent_gnt_c, num_clean);
+       if ((num_clean == 0) ||
+           (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
+               return;
+
+       /*
+        * At this point, we can assure that there will be no calls
+         * to get_persistent_grant (because we are executing this code from
+         * xen_blkif_schedule), there can only be calls to put_persistent_gnt,
+         * which means that the number of currently used grants will go down,
+         * but never up, so we will always be able to remove the requested
+         * number of grants.
+        */
+
+       total = num_clean;
+
+       pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
+
+       INIT_LIST_HEAD(&blkif->persistent_purge_list);
+       root = &blkif->persistent_gnts;
+purge_list:
+       foreach_grant_safe(persistent_gnt, n, root, node) {
+               BUG_ON(persistent_gnt->handle ==
+                       BLKBACK_INVALID_HANDLE);
+
+               if (clean_used) {
+                       clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+                       continue;
+               }
+
+               if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+                       continue;
+               if (!scan_used &&
+                   (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+                       continue;
+
+               rb_erase(&persistent_gnt->node, root);
+               list_add(&persistent_gnt->remove_node,
+                        &blkif->persistent_purge_list);
+               if (--num_clean == 0)
+                       goto finished;
+       }
+       /*
+        * If we get here it means we also need to start cleaning
+        * grants that were used since last purge in order to cope
+        * with the requested num
+        */
+       if (!scan_used && !clean_used) {
+               pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
+               scan_used = true;
+               goto purge_list;
+       }
+finished:
+       if (!clean_used) {
+               pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n");
+               clean_used = true;
+               goto purge_list;
+       }
+
+       blkif->persistent_gnt_c -= (total - num_clean);
+       blkif->vbd.overflow_max_grants = 0;
+
+       /* We can defer this work */
+       INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
+       schedule_work(&blkif->persistent_purge_work);
+       pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
+       return;
+}
+
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
-static struct pending_req *alloc_req(void)
+static struct pending_req *alloc_req(struct xen_blkif *blkif)
 {
        struct pending_req *req = NULL;
        unsigned long flags;
 
-       spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-       if (!list_empty(&blkbk->pending_free)) {
-               req = list_entry(blkbk->pending_free.next, struct pending_req,
+       spin_lock_irqsave(&blkif->pending_free_lock, flags);
+       if (!list_empty(&blkif->pending_free)) {
+               req = list_entry(blkif->pending_free.next, struct pending_req,
                                 free_list);
                list_del(&req->free_list);
        }
-       spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+       spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
        return req;
 }
 
@@ -273,17 +448,17 @@ static struct pending_req *alloc_req(void)
  * Return the 'pending_req' structure back to the freepool. We also
  * wake up the thread if it was waiting for a free page.
  */
-static void free_req(struct pending_req *req)
+static void free_req(struct xen_blkif *blkif, struct pending_req *req)
 {
        unsigned long flags;
        int was_empty;
 
-       spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-       was_empty = list_empty(&blkbk->pending_free);
-       list_add(&req->free_list, &blkbk->pending_free);
-       spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+       spin_lock_irqsave(&blkif->pending_free_lock, flags);
+       was_empty = list_empty(&blkif->pending_free);
+       list_add(&req->free_list, &blkif->pending_free);
+       spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
        if (was_empty)
-               wake_up(&blkbk->pending_free_wq);
+               wake_up(&blkif->pending_free_wq);
 }
 
 /*
@@ -382,10 +557,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 static void print_stats(struct xen_blkif *blkif)
 {
        pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
-                "  |  ds %4llu\n",
+                "  |  ds %4llu | pg: %4u/%4d\n",
                 current->comm, blkif->st_oo_req,
                 blkif->st_rd_req, blkif->st_wr_req,
-                blkif->st_f_req, blkif->st_ds_req);
+                blkif->st_f_req, blkif->st_ds_req,
+                blkif->persistent_gnt_c,
+                xen_blkif_max_pgrants);
        blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
        blkif->st_rd_req = 0;
        blkif->st_wr_req = 0;
@@ -397,6 +574,8 @@ int xen_blkif_schedule(void *arg)
 {
        struct xen_blkif *blkif = arg;
        struct xen_vbd *vbd = &blkif->vbd;
+       unsigned long timeout;
+       int ret;
 
        xen_blkif_get(blkif);
 
@@ -406,27 +585,52 @@ int xen_blkif_schedule(void *arg)
                if (unlikely(vbd->size != vbd_sz(vbd)))
                        xen_vbd_resize(blkif);
 
-               wait_event_interruptible(
+               timeout = msecs_to_jiffies(LRU_INTERVAL);
+
+               timeout = wait_event_interruptible_timeout(
                        blkif->wq,
-                       blkif->waiting_reqs || kthread_should_stop());
-               wait_event_interruptible(
-                       blkbk->pending_free_wq,
-                       !list_empty(&blkbk->pending_free) ||
-                       kthread_should_stop());
+                       blkif->waiting_reqs || kthread_should_stop(),
+                       timeout);
+               if (timeout == 0)
+                       goto purge_gnt_list;
+               timeout = wait_event_interruptible_timeout(
+                       blkif->pending_free_wq,
+                       !list_empty(&blkif->pending_free) ||
+                       kthread_should_stop(),
+                       timeout);
+               if (timeout == 0)
+                       goto purge_gnt_list;
 
                blkif->waiting_reqs = 0;
                smp_mb(); /* clear flag *before* checking for work */
 
-               if (do_block_io_op(blkif))
+               ret = do_block_io_op(blkif);
+               if (ret > 0)
                        blkif->waiting_reqs = 1;
+               if (ret == -EACCES)
+                       wait_event_interruptible(blkif->shutdown_wq,
+                                                kthread_should_stop());
+
+purge_gnt_list:
+               if (blkif->vbd.feature_gnt_persistent &&
+                   time_after(jiffies, blkif->next_lru)) {
+                       purge_persistent_gnt(blkif);
+                       blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+               }
+
+               /* Shrink if we have more than xen_blkif_max_buffer_pages */
+               shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
 
                if (log_stats && time_after(jiffies, blkif->st_print))
                        print_stats(blkif);
        }
 
+       /* Since we are shutting down remove all pages from the buffer */
+       shrink_free_pagepool(blkif, 0 /* All */);
+
        /* Free all persistent grant pages */
        if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
-               free_persistent_gnts(&blkif->persistent_gnts,
+               free_persistent_gnts(blkif, &blkif->persistent_gnts,
                        blkif->persistent_gnt_c);
 
        BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
@@ -441,148 +645,98 @@ int xen_blkif_schedule(void *arg)
        return 0;
 }
 
-struct seg_buf {
-       unsigned int offset;
-       unsigned int nsec;
-};
 /*
  * Unmap the grant references, and also remove the M2P over-rides
  * used in the 'pending_req'.
  */
-static void xen_blkbk_unmap(struct pending_req *req)
+static void xen_blkbk_unmap(struct xen_blkif *blkif,
+                            struct grant_page *pages[],
+                            int num)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int i, invcount = 0;
-       grant_handle_t handle;
        int ret;
 
-       for (i = 0; i < req->nr_pages; i++) {
-               if (!test_bit(i, req->unmap_seg))
+       for (i = 0; i < num; i++) {
+               if (pages[i]->persistent_gnt != NULL) {
+                       put_persistent_gnt(blkif, pages[i]->persistent_gnt);
                        continue;
-               handle = pending_handle(req, i);
-               if (handle == BLKBACK_INVALID_HANDLE)
+               }
+               if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
                        continue;
-               gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
-                                   GNTMAP_host_map, handle);
-               pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-               pages[invcount] = virt_to_page(vaddr(req, i));
-               invcount++;
+               unmap_pages[invcount] = pages[i]->page;
+               gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
+                                   GNTMAP_host_map, pages[i]->handle);
+               pages[i]->handle = BLKBACK_INVALID_HANDLE;
+               if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+                       ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
+                                               invcount);
+                       BUG_ON(ret);
+                       put_free_pages(blkif, unmap_pages, invcount);
+                       invcount = 0;
+               }
+       }
+       if (invcount) {
+               ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
+               BUG_ON(ret);
+               put_free_pages(blkif, unmap_pages, invcount);
        }
-
-       ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
-       BUG_ON(ret);
 }
 
-static int xen_blkbk_map(struct blkif_request *req,
-                        struct pending_req *pending_req,
-                        struct seg_buf seg[],
-                        struct page *pages[])
+static int xen_blkbk_map(struct xen_blkif *blkif,
+                        struct grant_page *pages[],
+                        int num, bool ro)
 {
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct persistent_gnt *persistent_gnt = NULL;
-       struct xen_blkif *blkif = pending_req->blkif;
        phys_addr_t addr = 0;
-       int i, j;
-       bool new_map;
-       int nseg = req->u.rw.nr_segments;
+       int i, seg_idx, new_map_idx;
        int segs_to_map = 0;
        int ret = 0;
+       int last_map = 0, map_until = 0;
        int use_persistent_gnts;
 
        use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
-       BUG_ON(blkif->persistent_gnt_c >
-                  max_mapped_grant_pages(pending_req->blkif->blk_protocol));
-
        /*
         * Fill out preq.nr_sects with proper amount of sectors, and setup
         * assign map[..] with the PFN of the page in our domain with the
         * corresponding grant reference for each page.
         */
-       for (i = 0; i < nseg; i++) {
+again:
+       for (i = map_until; i < num; i++) {
                uint32_t flags;
 
                if (use_persistent_gnts)
                        persistent_gnt = get_persistent_gnt(
-                               &blkif->persistent_gnts,
-                               req->u.rw.seg[i].gref);
+                               blkif,
+                               pages[i]->gref);
 
                if (persistent_gnt) {
                        /*
                         * We are using persistent grants and
                         * the grant is already mapped
                         */
-                       new_map = false;
-               } else if (use_persistent_gnts &&
-                          blkif->persistent_gnt_c <
-                          max_mapped_grant_pages(blkif->blk_protocol)) {
-                       /*
-                        * We are using persistent grants, the grant is
-                        * not mapped but we have room for it
-                        */
-                       new_map = true;
-                       persistent_gnt = kmalloc(
-                               sizeof(struct persistent_gnt),
-                               GFP_KERNEL);
-                       if (!persistent_gnt)
-                               return -ENOMEM;
-                       if (alloc_xenballooned_pages(1, &persistent_gnt->page,
-                           false)) {
-                               kfree(persistent_gnt);
-                               return -ENOMEM;
-                       }
-                       persistent_gnt->gnt = req->u.rw.seg[i].gref;
-                       persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
-
-                       pages_to_gnt[segs_to_map] =
-                               persistent_gnt->page;
-                       addr = (unsigned long) pfn_to_kaddr(
-                               page_to_pfn(persistent_gnt->page));
-
-                       add_persistent_gnt(&blkif->persistent_gnts,
-                               persistent_gnt);
-                       blkif->persistent_gnt_c++;
-                       pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
-                                persistent_gnt->gnt, blkif->persistent_gnt_c,
-                                max_mapped_grant_pages(blkif->blk_protocol));
+                       pages[i]->page = persistent_gnt->page;
+                       pages[i]->persistent_gnt = persistent_gnt;
                } else {
-                       /*
-                        * We are either using persistent grants and
-                        * hit the maximum limit of grants mapped,
-                        * or we are not using persistent grants.
-                        */
-                       if (use_persistent_gnts &&
-                               !blkif->vbd.overflow_max_grants) {
-                               blkif->vbd.overflow_max_grants = 1;
-                               pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
-                                        blkif->domid, blkif->vbd.handle);
-                       }
-                       new_map = true;
-                       pages[i] = blkbk->pending_page(pending_req, i);
-                       addr = vaddr(pending_req, i);
-                       pages_to_gnt[segs_to_map] =
-                               blkbk->pending_page(pending_req, i);
-               }
-
-               if (persistent_gnt) {
-                       pages[i] = persistent_gnt->page;
-                       persistent_gnts[i] = persistent_gnt;
-               } else {
-                       persistent_gnts[i] = NULL;
-               }
-
-               if (new_map) {
+                       if (get_free_page(blkif, &pages[i]->page))
+                               goto out_of_memory;
+                       addr = vaddr(pages[i]->page);
+                       pages_to_gnt[segs_to_map] = pages[i]->page;
+                       pages[i]->persistent_gnt = NULL;
                        flags = GNTMAP_host_map;
-                       if (!persistent_gnt &&
-                           (pending_req->operation != BLKIF_OP_READ))
+                       if (!use_persistent_gnts && ro)
                                flags |= GNTMAP_readonly;
                        gnttab_set_map_op(&map[segs_to_map++], addr,
-                                         flags, req->u.rw.seg[i].gref,
+                                         flags, pages[i]->gref,
                                          blkif->domid);
                }
+               map_until = i + 1;
+               if (segs_to_map == BLKIF_MAX_SEGMENTS_PER_REQUEST)
+                       break;
        }
 
        if (segs_to_map) {
@@ -595,49 +749,133 @@ static int xen_blkbk_map(struct blkif_request *req,
         * so that when we access vaddr(pending_req,i) it has the contents of
         * the page from the other domain.
         */
-       bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-       for (i = 0, j = 0; i < nseg; i++) {
-               if (!persistent_gnts[i] ||
-                   persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+       for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) {
+               if (!pages[seg_idx]->persistent_gnt) {
                        /* This is a newly mapped grant */
-                       BUG_ON(j >= segs_to_map);
-                       if (unlikely(map[j].status != 0)) {
+                       BUG_ON(new_map_idx >= segs_to_map);
+                       if (unlikely(map[new_map_idx].status != 0)) {
                                pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-                               map[j].handle = BLKBACK_INVALID_HANDLE;
+                               pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
                                ret |= 1;
-                               if (persistent_gnts[i]) {
-                                       rb_erase(&persistent_gnts[i]->node,
-                                                &blkif->persistent_gnts);
-                                       blkif->persistent_gnt_c--;
-                                       kfree(persistent_gnts[i]);
-                                       persistent_gnts[i] = NULL;
-                               }
+                               goto next;
                        }
+                       pages[seg_idx]->handle = map[new_map_idx].handle;
+               } else {
+                       continue;
                }
-               if (persistent_gnts[i]) {
-                       if (persistent_gnts[i]->handle ==
-                           BLKBACK_INVALID_HANDLE) {
+               if (use_persistent_gnts &&
+                   blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+                       /*
+                        * We are using persistent grants, the grant is
+                        * not mapped but we might have room for it.
+                        */
+                       persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
+                                                GFP_KERNEL);
+                       if (!persistent_gnt) {
                                /*
-                                * If this is a new persistent grant
-                                * save the handler
+                                * If we don't have enough memory to
+                                * allocate the persistent_gnt struct
+                                * map this grant non-persistenly
                                 */
-                               persistent_gnts[i]->handle = map[j++].handle;
+                               goto next;
                        }
-                       pending_handle(pending_req, i) =
-                               persistent_gnts[i]->handle;
+                       persistent_gnt->gnt = map[new_map_idx].ref;
+                       persistent_gnt->handle = map[new_map_idx].handle;
+                       persistent_gnt->page = pages[seg_idx]->page;
+                       if (add_persistent_gnt(blkif,
+                                              persistent_gnt)) {
+                               kfree(persistent_gnt);
+                               persistent_gnt = NULL;
+                               goto next;
+                       }
+                       pages[seg_idx]->persistent_gnt = persistent_gnt;
+                       pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+                                persistent_gnt->gnt, blkif->persistent_gnt_c,
+                                xen_blkif_max_pgrants);
+                       goto next;
+               }
+               if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
+                       blkif->vbd.overflow_max_grants = 1;
+                       pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+                                blkif->domid, blkif->vbd.handle);
+               }
+               /*
+                * We could not map this grant persistently, so use it as
+                * a non-persistent grant.
+                */
+next:
+               new_map_idx++;
+       }
+       segs_to_map = 0;
+       last_map = map_until;
+       if (map_until != num)
+               goto again;
 
-                       if (ret)
-                               continue;
-               } else {
-                       pending_handle(pending_req, i) = map[j++].handle;
-                       bitmap_set(pending_req->unmap_seg, i, 1);
+       return ret;
+
+out_of_memory:
+       pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+       put_free_pages(blkif, pages_to_gnt, segs_to_map);
+       return -ENOMEM;
+}
+
+static int xen_blkbk_map_seg(struct pending_req *pending_req)
+{
+       int rc;
+
+       rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
+                          pending_req->nr_pages,
+                          (pending_req->operation != BLKIF_OP_READ));
+
+       return rc;
+}
 
-                       if (ret)
-                               continue;
+static int xen_blkbk_parse_indirect(struct blkif_request *req,
+                                   struct pending_req *pending_req,
+                                   struct seg_buf seg[],
+                                   struct phys_req *preq)
+{
+       struct grant_page **pages = pending_req->indirect_pages;
+       struct xen_blkif *blkif = pending_req->blkif;
+       int indirect_grefs, rc, n, nseg, i;
+       struct blkif_request_segment_aligned *segments = NULL;
+
+       nseg = pending_req->nr_pages;
+       indirect_grefs = INDIRECT_PAGES(nseg);
+       BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
+
+       for (i = 0; i < indirect_grefs; i++)
+               pages[i]->gref = req->u.indirect.indirect_grefs[i];
+
+       rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
+       if (rc)
+               goto unmap;
+
+       for (n = 0, i = 0; n < nseg; n++) {
+               if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
+                       /* Map indirect segments */
+                       if (segments)
+                               kunmap_atomic(segments);
+                       segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
+               }
+               i = n % SEGS_PER_INDIRECT_FRAME;
+               pending_req->segments[n]->gref = segments[i].gref;
+               seg[n].nsec = segments[i].last_sect -
+                       segments[i].first_sect + 1;
+               seg[n].offset = (segments[i].first_sect << 9);
+               if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
+                   (segments[i].last_sect < segments[i].first_sect)) {
+                       rc = -EINVAL;
+                       goto unmap;
                }
-               seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+               preq->nr_sects += seg[n].nsec;
        }
-       return ret;
+
+unmap:
+       if (segments)
+               kunmap_atomic(segments);
+       xen_blkbk_unmap(blkif, pages, indirect_grefs);
+       return rc;
 }
 
 static int dispatch_discard_io(struct xen_blkif *blkif,
@@ -647,7 +885,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
        int status = BLKIF_RSP_OKAY;
        struct block_device *bdev = blkif->vbd.bdev;
        unsigned long secure;
+       struct phys_req preq;
+
+       preq.sector_number = req->u.discard.sector_number;
+       preq.nr_sects      = req->u.discard.nr_sectors;
 
+       err = xen_vbd_translate(&preq, blkif, WRITE);
+       if (err) {
+               pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n",
+                       preq.sector_number,
+                       preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
+               goto fail_response;
+       }
        blkif->st_ds_req++;
 
        xen_blkif_get(blkif);
@@ -658,7 +907,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
        err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
                                   req->u.discard.nr_sectors,
                                   GFP_KERNEL, secure);
-
+fail_response:
        if (err == -EOPNOTSUPP) {
                pr_debug(DRV_PFX "discard op failed, not supported\n");
                status = BLKIF_RSP_EOPNOTSUPP;
@@ -674,7 +923,7 @@ static int dispatch_other_io(struct xen_blkif *blkif,
                             struct blkif_request *req,
                             struct pending_req *pending_req)
 {
-       free_req(pending_req);
+       free_req(blkif, pending_req);
        make_response(blkif, req->u.other.id, req->operation,
                      BLKIF_RSP_EOPNOTSUPP);
        return -EIO;
@@ -726,7 +975,9 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
         * the proper response on the ring.
         */
        if (atomic_dec_and_test(&pending_req->pendcnt)) {
-               xen_blkbk_unmap(pending_req);
+               xen_blkbk_unmap(pending_req->blkif,
+                               pending_req->segments,
+                               pending_req->nr_pages);
                make_response(pending_req->blkif, pending_req->id,
                              pending_req->operation, pending_req->status);
                xen_blkif_put(pending_req->blkif);
@@ -734,7 +985,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
                        if (atomic_read(&pending_req->blkif->drain))
                                complete(&pending_req->blkif->drain_complete);
                }
-               free_req(pending_req);
+               free_req(pending_req->blkif, pending_req);
        }
 }
 
@@ -767,6 +1018,12 @@ __do_block_io_op(struct xen_blkif *blkif)
        rp = blk_rings->common.sring->req_prod;
        rmb(); /* Ensure we see queued requests up to 'rp'. */
 
+       if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
+               rc = blk_rings->common.rsp_prod_pvt;
+               pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
+                       rp, rc, rp - rc, blkif->vbd.pdevice);
+               return -EACCES;
+       }
        while (rc != rp) {
 
                if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
@@ -777,7 +1034,7 @@ __do_block_io_op(struct xen_blkif *blkif)
                        break;
                }
 
-               pending_req = alloc_req();
+               pending_req = alloc_req(blkif);
                if (NULL == pending_req) {
                        blkif->st_oo_req++;
                        more_to_do = 1;
@@ -807,11 +1064,12 @@ __do_block_io_op(struct xen_blkif *blkif)
                case BLKIF_OP_WRITE:
                case BLKIF_OP_WRITE_BARRIER:
                case BLKIF_OP_FLUSH_DISKCACHE:
+               case BLKIF_OP_INDIRECT:
                        if (dispatch_rw_block_io(blkif, &req, pending_req))
                                goto done;
                        break;
                case BLKIF_OP_DISCARD:
-                       free_req(pending_req);
+                       free_req(blkif, pending_req);
                        if (dispatch_discard_io(blkif, &req))
                                goto done;
                        break;
@@ -853,17 +1111,28 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                                struct pending_req *pending_req)
 {
        struct phys_req preq;
-       struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct seg_buf *seg = pending_req->seg;
        unsigned int nseg;
        struct bio *bio = NULL;
-       struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct bio **biolist = pending_req->biolist;
        int i, nbio = 0;
        int operation;
        struct blk_plug plug;
        bool drain = false;
-       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct grant_page **pages = pending_req->segments;
+       unsigned short req_operation;
+
+       req_operation = req->operation == BLKIF_OP_INDIRECT ?
+                       req->u.indirect.indirect_op : req->operation;
+       if ((req->operation == BLKIF_OP_INDIRECT) &&
+           (req_operation != BLKIF_OP_READ) &&
+           (req_operation != BLKIF_OP_WRITE)) {
+               pr_debug(DRV_PFX "Invalid indirect operation (%u)\n",
+                        req_operation);
+               goto fail_response;
+       }
 
-       switch (req->operation) {
+       switch (req_operation) {
        case BLKIF_OP_READ:
                blkif->st_rd_req++;
                operation = READ;
@@ -885,33 +1154,47 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        }
 
        /* Check that the number of segments is sane. */
-       nseg = req->u.rw.nr_segments;
+       nseg = req->operation == BLKIF_OP_INDIRECT ?
+              req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
        if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
-           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+           unlikely((req->operation != BLKIF_OP_INDIRECT) &&
+                    (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
+           unlikely((req->operation == BLKIF_OP_INDIRECT) &&
+                    (nseg > MAX_INDIRECT_SEGMENTS))) {
                pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
                         nseg);
                /* Haven't submitted any bio's yet. */
                goto fail_response;
        }
 
-       preq.sector_number = req->u.rw.sector_number;
        preq.nr_sects      = 0;
 
        pending_req->blkif     = blkif;
        pending_req->id        = req->u.rw.id;
-       pending_req->operation = req->operation;
+       pending_req->operation = req_operation;
        pending_req->status    = BLKIF_RSP_OKAY;
        pending_req->nr_pages  = nseg;
 
-       for (i = 0; i < nseg; i++) {
-               seg[i].nsec = req->u.rw.seg[i].last_sect -
-                       req->u.rw.seg[i].first_sect + 1;
-               if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
-                   (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
+       if (req->operation != BLKIF_OP_INDIRECT) {
+               preq.dev               = req->u.rw.handle;
+               preq.sector_number     = req->u.rw.sector_number;
+               for (i = 0; i < nseg; i++) {
+                       pages[i]->gref = req->u.rw.seg[i].gref;
+                       seg[i].nsec = req->u.rw.seg[i].last_sect -
+                               req->u.rw.seg[i].first_sect + 1;
+                       seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+                       if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+                           (req->u.rw.seg[i].last_sect <
+                            req->u.rw.seg[i].first_sect))
+                               goto fail_response;
+                       preq.nr_sects += seg[i].nsec;
+               }
+       } else {
+               preq.dev               = req->u.indirect.handle;
+               preq.sector_number     = req->u.indirect.sector_number;
+               if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq))
                        goto fail_response;
-               preq.nr_sects += seg[i].nsec;
-
        }
 
        if (xen_vbd_translate(&preq, blkif, operation) != 0) {
@@ -948,7 +1231,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         * the hypercall to unmap the grants - that is all done in
         * xen_blkbk_unmap.
         */
-       if (xen_blkbk_map(req, pending_req, seg, pages))
+       if (xen_blkbk_map_seg(pending_req))
                goto fail_flush;
 
        /*
@@ -960,11 +1243,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        for (i = 0; i < nseg; i++) {
                while ((bio == NULL) ||
                       (bio_add_page(bio,
-                                    pages[i],
+                                    pages[i]->page,
                                     seg[i].nsec << 9,
                                     seg[i].offset) == 0)) {
 
-                       bio = bio_alloc(GFP_KERNEL, nseg-i);
+                       int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
+                       bio = bio_alloc(GFP_KERNEL, nr_iovecs);
                        if (unlikely(bio == NULL))
                                goto fail_put_bio;
 
@@ -1009,11 +1293,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        return 0;
 
  fail_flush:
-       xen_blkbk_unmap(pending_req);
+       xen_blkbk_unmap(blkif, pending_req->segments,
+                       pending_req->nr_pages);
  fail_response:
        /* Haven't submitted any bio's yet. */
-       make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
-       free_req(pending_req);
+       make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
+       free_req(blkif, pending_req);
        msleep(1); /* back off a bit */
        return -EIO;
 
@@ -1070,73 +1355,20 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 
 static int __init xen_blkif_init(void)
 {
-       int i, mmap_pages;
        int rc = 0;
 
        if (!xen_domain())
                return -ENODEV;
 
-       blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
-       if (!blkbk) {
-               pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
-               return -ENOMEM;
-       }
-
-       mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-       blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
-                                       xen_blkif_reqs, GFP_KERNEL);
-       blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
-                                       mmap_pages, GFP_KERNEL);
-       blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
-                                       mmap_pages, GFP_KERNEL);
-
-       if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
-           !blkbk->pending_pages) {
-               rc = -ENOMEM;
-               goto out_of_memory;
-       }
-
-       for (i = 0; i < mmap_pages; i++) {
-               blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-               blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
-               if (blkbk->pending_pages[i] == NULL) {
-                       rc = -ENOMEM;
-                       goto out_of_memory;
-               }
-       }
        rc = xen_blkif_interface_init();
        if (rc)
                goto failed_init;
 
-       INIT_LIST_HEAD(&blkbk->pending_free);
-       spin_lock_init(&blkbk->pending_free_lock);
-       init_waitqueue_head(&blkbk->pending_free_wq);
-
-       for (i = 0; i < xen_blkif_reqs; i++)
-               list_add_tail(&blkbk->pending_reqs[i].free_list,
-                             &blkbk->pending_free);
-
        rc = xen_blkif_xenbus_init();
        if (rc)
                goto failed_init;
 
-       return 0;
-
- out_of_memory:
-       pr_alert(DRV_PFX "%s: out of memory\n", __func__);
  failed_init:
-       kfree(blkbk->pending_reqs);
-       kfree(blkbk->pending_grant_handles);
-       if (blkbk->pending_pages) {
-               for (i = 0; i < mmap_pages; i++) {
-                       if (blkbk->pending_pages[i])
-                               __free_page(blkbk->pending_pages[i]);
-               }
-               kfree(blkbk->pending_pages);
-       }
-       kfree(blkbk);
-       blkbk = NULL;
        return rc;
 }