Btrfs: fix barrier flushes

author Chris Mason <chris.mason@oracle.com>

Fri, 18 Nov 2011 20:07:51 +0000 (15:07 -0500)

committer Chris Mason <chris.mason@oracle.com>

Sun, 20 Nov 2011 12:21:14 +0000 (07:21 -0500)
author Chris Mason <chris.mason@oracle.com>
Fri, 18 Nov 2011 20:07:51 +0000 (15:07 -0500)
committer Chris Mason <chris.mason@oracle.com>
Sun, 20 Nov 2011 12:21:14 +0000 (07:21 -0500)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index b6a5c0dd0dd8eac7e3e05731773c39771aa9e87a..48d30138237fe7d62add429a9f4ecdaff4ee123e 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2573,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device,
         int errors = 0;
         u32 crc;
         u64 bytenr;
-       int last_barrier = 0;
  
         if (max_mirrors == 0)
                 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
  
-       /* make sure only the last submit_bh does a barrier */
-       if (do_barriers) {
-               for (i = 0; i < max_mirrors; i++) {
-                       bytenr = btrfs_sb_offset(i);
-                       if (bytenr + BTRFS_SUPER_INFO_SIZE >=
-                           device->total_bytes)
-                               break;
-                       last_barrier = i;
-               }
-       }
-
         for (i = 0; i < max_mirrors; i++) {
                 bytenr = btrfs_sb_offset(i);
                 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
@@ -2634,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device,
                         bh->b_end_io = btrfs_end_buffer_write_sync;
                 }
  
-               if (i == last_barrier && do_barriers)
-                       ret = submit_bh(WRITE_FLUSH_FUA, bh);
-               else
-                       ret = submit_bh(WRITE_SYNC, bh);
-
+               /*
+                * we fua the first super.  The others we allow
+                * to go down lazy.
+                */
+               ret = submit_bh(WRITE_FUA, bh);
                 if (ret)
                         errors++;
         }
         return errors < i ? 0 : -1;
  }
  
+/*
+ * endio for the write_dev_flush, this will wake anyone waiting
+ * for the barrier when it is done
+ */
+static void btrfs_end_empty_barrier(struct bio *bio, int err)
+{
+       if (err) {
+               if (err == -EOPNOTSUPP)
+                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       }
+       if (bio->bi_private)
+               complete(bio->bi_private);
+       bio_put(bio);
+}
+
+/*
+ * trigger flushes for one the devices.  If you pass wait == 0, the flushes are
+ * sent down.  With wait == 1, it waits for the previous flush.
+ *
+ * any device where the flush fails with eopnotsupp are flagged as not-barrier
+ * capable
+ */
+static int write_dev_flush(struct btrfs_device *device, int wait)
+{
+       struct bio *bio;
+       int ret = 0;
+
+       if (device->nobarriers)
+               return 0;
+
+       if (wait) {
+               bio = device->flush_bio;
+               if (!bio)
+                       return 0;
+
+               wait_for_completion(&device->flush_wait);
+
+               if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
+                       printk("btrfs: disabling barriers on dev %s\n",
+                              device->name);
+                       device->nobarriers = 1;
+               }
+               if (!bio_flagged(bio, BIO_UPTODATE)) {
+                       ret = -EIO;
+               }
+
+               /* drop the reference from the wait == 0 run */
+               bio_put(bio);
+               device->flush_bio = NULL;
+
+               return ret;
+       }
+
+       /*
+        * one reference for us, and we leave it for the
+        * caller
+        */
+       device->flush_bio = NULL;;
+       bio = bio_alloc(GFP_NOFS, 0);
+       if (!bio)
+               return -ENOMEM;
+
+       bio->bi_end_io = btrfs_end_empty_barrier;
+       bio->bi_bdev = device->bdev;
+       init_completion(&device->flush_wait);
+       bio->bi_private = &device->flush_wait;
+       device->flush_bio = bio;
+
+       bio_get(bio);
+       submit_bio(WRITE_FLUSH, bio);
+
+       return 0;
+}
+
+/*
+ * send an empty flush down to each device in parallel,
+ * then wait for them
+ */
+static int barrier_all_devices(struct btrfs_fs_info *info)
+{
+       struct list_head *head;
+       struct btrfs_device *dev;
+       int errors = 0;
+       int ret;
+
+       /* send down all the barriers */
+       head = &info->fs_devices->devices;
+       list_for_each_entry_rcu(dev, head, dev_list) {
+               if (!dev->bdev) {
+                       errors++;
+                       continue;
+               }
+               if (!dev->in_fs_metadata || !dev->writeable)
+                       continue;
+
+               ret = write_dev_flush(dev, 0);
+               if (ret)
+                       errors++;
+       }
+
+       /* wait for all the barriers */
+       list_for_each_entry_rcu(dev, head, dev_list) {
+               if (!dev->bdev) {
+                       errors++;
+                       continue;
+               }
+               if (!dev->in_fs_metadata || !dev->writeable)
+                       continue;
+
+               ret = write_dev_flush(dev, 1);
+               if (ret)
+                       errors++;
+       }
+       if (errors)
+               return -EIO;
+       return 0;
+}
+
  int write_all_supers(struct btrfs_root *root, int max_mirrors)
  {
         struct list_head *head;
@@ -2666,6 +2773,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
  
         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
         head = &root->fs_info->fs_devices->devices;
+
+       if (do_barriers)
+               barrier_all_devices(root->fs_info);
+
         list_for_each_entry_rcu(dev, head, dev_list) {
                 if (!dev->bdev) {
                         total_errors++;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h

index ab5b1c49f3529e9e7e112649b98f22d0a923fb35..78f2d4d4f37fe81317395688a8b090b71e53a612 100644 (file)
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -100,6 +100,12 @@ struct btrfs_device {
         struct reada_zone *reada_curr_zone;
         struct radix_tree_root reada_zones;
         struct radix_tree_root reada_extents;
+
+       /* for sending down flush barriers */
+       struct bio *flush_bio;
+       struct completion flush_wait;
+       int nobarriers;
+
  };
  
  struct btrfs_fs_devices {
author	Chris Mason <chris.mason@oracle.com>
	Fri, 18 Nov 2011 20:07:51 +0000 (15:07 -0500)
committer	Chris Mason <chris.mason@oracle.com>
	Sun, 20 Nov 2011 12:21:14 +0000 (07:21 -0500)
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/volumes.h		patch \| blob \| history