]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
xen-blkfront: set pages are FOREIGN_FRAME when sharing them
authorStefano Stabellini <stefano.stabellini@eu.citrix.com>
Tue, 10 Apr 2012 16:25:19 +0000 (17:25 +0100)
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tue, 17 Apr 2012 14:44:32 +0000 (10:44 -0400)
Set pages as FOREIGN_FRAME whenever blkfront shares them with another
domain. Then when blkfront un-share them, also removes the
FOREIGN_FRAME_BIT from the p2m.

We do it so that when the source and the destination domain are the same
(blkfront connected to a disk backend in the same domain) we can more easily
recognize which ones are the source pfns and which ones are the
destination pfns (both are going to be pointing to the same mfns).

Without this patch enstablishing a connection between blkfront and QEMU
qdisk in the same domain causes QEMU to hang and never return.

The scenario where this used is when a disk image in QCOW2 is used
for extracting the kernel and initrd image. The QCOW2 image file cannot
be loopback-ed and to run 'pygrub', the weird scaffolding of:
 - setup QEMU and qdisk with the qcow2 image [disk backend]
 - setup xen-blkfront mounting said disk backend in the domain.
 - extract kernel and initrd
 - tear it down.

The MFNs shared shared by the frontend are going to back two
different sets of PFNs: the original PFNs allocated by the frontend and
the new ones allocated by gntdev for the backend.

The problem is that when Linux calls mfn_to_pfn, passing as argument
one of the MFN shared by the frontend, we want to get the PFN returned by
m2p_find_override_pfn (that is the PFN setup by gntdev) but actually we
get the original PFN allocated by the frontend because considering that
the frontend and the backend are in the same domain:

pfn = machine_to_phys_mapping[mfn];
mfn2 = get_phys_to_machine(pfn);

in this case mfn == mfn2.

One possible solution would be to always call m2p_find_override_pfn to
check out whether we have an entry for a given MFN. However it is not
very efficient or scalable.

The other option (that this patch is implementing) is to mark the pages
shared by the frontend as "foreign", so that mfn != mfn2.

It makes sense because from the frontend point of view they are donated
to the backend and while so they are not supposed to be used by the
frontend. In a way, they don't belong to the frontend anymore, at least
temporarily.

[v3: only set_phys_to_machine if xen_pv_domain]
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
[v1: Redid description a bit]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
drivers/block/xen-blkfront.c

index 4e86393a09cf5c880917fd81d3e67e507a83c0f6..6cd6a08b49a058d7ae7e747967b832a3eb4d65cd 100644 (file)
@@ -260,7 +260,7 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
 static int blkif_queue_request(struct request *req)
 {
        struct blkfront_info *info = req->rq_disk->private_data;
-       unsigned long buffer_mfn;
+       unsigned long buffer_mfn, buffer_pfn;
        struct blkif_request *ring_req;
        unsigned long id;
        unsigned int fsect, lsect;
@@ -319,7 +319,8 @@ static int blkif_queue_request(struct request *req)
                       BLKIF_MAX_SEGMENTS_PER_REQUEST);
 
                for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
-                       buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
+                       buffer_pfn = page_to_pfn(sg_page(sg));
+                       buffer_mfn = pfn_to_mfn(buffer_pfn);
                        fsect = sg->offset >> 9;
                        lsect = fsect + (sg->length >> 9) - 1;
                        /* install a grant reference. */
@@ -338,6 +339,17 @@ static int blkif_queue_request(struct request *req)
                                                .gref       = ref,
                                                .first_sect = fsect,
                                                .last_sect  = lsect };
+                       /* 
+                        * Set the page as foreign, considering that we are giving
+                        * it to a foreign domain.
+                        * This is important in case the destination domain is
+                        * ourselves, so that we can more easily recognize the
+                        * source pfn from destination pfn, both mapping to the same
+                        * mfn.
+                        */
+                       if (xen_pv_domain())
+                               set_phys_to_machine(buffer_pfn,
+                                               FOREIGN_FRAME(buffer_mfn));
                }
        }
 
@@ -713,8 +725,12 @@ static void blkif_completion(struct blk_shadow *s)
        int i;
        /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
         * flag. */
-       for (i = 0; i < s->req.u.rw.nr_segments; i++)
+       for (i = 0; i < s->req.u.rw.nr_segments; i++) {
                gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
+               if (xen_pv_domain())
+                       set_phys_to_machine(s->frame[i],
+                                       get_phys_to_machine(s->frame[i]) & ~FOREIGN_FRAME_BIT);
+       }
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1050,13 +1066,20 @@ static int blkif_recover(struct blkfront_info *info)
                memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
 
                if (req->operation != BLKIF_OP_DISCARD) {
+                       unsigned long buffer_pfn;
+                       unsigned long buffer_mfn;
                /* Rewrite any grant references invalidated by susp/resume. */
-                       for (j = 0; j < req->u.rw.nr_segments; j++)
+                       for (j = 0; j < req->u.rw.nr_segments; j++) {
+                               buffer_pfn = info->shadow[req->u.rw.id].frame[j];
+                               buffer_mfn = pfn_to_mfn(buffer_pfn);
                                gnttab_grant_foreign_access_ref(
                                        req->u.rw.seg[j].gref,
                                        info->xbdev->otherend_id,
-                                       pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
+                                       buffer_mfn,
                                        rq_data_dir(info->shadow[req->u.rw.id].request));
+                               if (xen_pv_domain())
+                                       set_phys_to_machine(buffer_pfn, FOREIGN_FRAME(buffer_mfn));
+                       }
                }
                info->shadow[req->u.rw.id].req = *req;