]> git.karo-electronics.de Git - mv-sheeva.git/commitdiff
Merge branch 'cleanup-bd_claim' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorJens Axboe <jaxboe@fusionio.com>
Sat, 27 Nov 2010 18:49:18 +0000 (19:49 +0100)
committerJens Axboe <jaxboe@fusionio.com>
Sat, 27 Nov 2010 18:49:18 +0000 (19:49 +0100)
1  2 
block/ioctl.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/md/md.c
fs/partitions/check.c
fs/xfs/linux-2.6/xfs_super.c

diff --combined block/ioctl.c
index 3d866d0037f240c8d636f8b8523df962e72ab123,cc46d499fd27cf49df2f4b2f2f407be3cda14c49..fefa9a4967083208ff880d5a6b31139042661827
@@@ -125,7 -125,7 +125,7 @@@ static int blk_ioctl_discard(struct blo
        start >>= 9;
        len >>= 9;
  
 -      if (start + len > (bdev->bd_inode->i_size >> 9))
 +      if (start + len > (i_size_read(bdev->bd_inode) >> 9))
                return -EINVAL;
        if (secure)
                flags |= BLKDEV_DISCARD_SECURE;
@@@ -242,7 -242,6 +242,7 @@@ int blkdev_ioctl(struct block_device *b
                 * We need to set the startsect first, the driver may
                 * want to override it.
                 */
 +              memset(&geo, 0, sizeof(geo));
                geo.start = get_start_sect(bdev);
                ret = disk->fops->getgeo(bdev, &geo);
                if (ret)
                        return -EINVAL;
                if (get_user(n, (int __user *) arg))
                        return -EFAULT;
-               if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0)
+               if (!(mode & FMODE_EXCL) &&
+                   blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
                        return -EBUSY;
                ret = set_blocksize(bdev, n);
                if (!(mode & FMODE_EXCL))
-                       bd_release(bdev);
+                       blkdev_put(bdev, mode | FMODE_EXCL);
                return ret;
        case BLKPG:
                ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
                ret = blkdev_reread_part(bdev);
                break;
        case BLKGETSIZE:
 -              size = bdev->bd_inode->i_size;
 +              size = i_size_read(bdev->bd_inode);
                if ((size >> 9) > ~0UL)
                        return -EFBIG;
                return put_ulong(arg, size >> 9);
        case BLKGETSIZE64:
 -              return put_u64(arg, bdev->bd_inode->i_size);
 +              return put_u64(arg, i_size_read(bdev->bd_inode));
        case BLKTRACESTART:
        case BLKTRACESTOP:
        case BLKTRACESETUP:
index 1ea1a34e78b281d8ff8658028b38e2f423716917,0590b9f67ec692b888908b09e2a342f74f6c6781..3803a03489372911290cd539988da5e779ac85d2
@@@ -114,11 -114,11 +114,11 @@@ struct drbd_conf
  #define D_ASSERT(exp) if (!(exp)) \
         dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
  
 -#define ERR_IF(exp) if (({                            \
 -      int _b = (exp) != 0;                            \
 -      if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n",     \
 -              __func__, #exp, __FILE__, __LINE__);    \
 -       _b;                                            \
 +#define ERR_IF(exp) if (({                                            \
 +      int _b = (exp) != 0;                                            \
 +      if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n",      \
 +                      __func__, #exp, __FILE__, __LINE__);            \
 +      _b;                                                             \
        }))
  
  /* Defines to control fault insertion */
@@@ -749,12 -749,17 +749,12 @@@ struct drbd_epoch 
  
  /* drbd_epoch flag bits */
  enum {
 -      DE_BARRIER_IN_NEXT_EPOCH_ISSUED,
 -      DE_BARRIER_IN_NEXT_EPOCH_DONE,
 -      DE_CONTAINS_A_BARRIER,
        DE_HAVE_BARRIER_NUMBER,
 -      DE_IS_FINISHING,
  };
  
  enum epoch_event {
        EV_PUT,
        EV_GOT_BARRIER_NR,
 -      EV_BARRIER_DONE,
        EV_BECAME_LAST,
        EV_CLEANUP = 32, /* used as flag */
  };
@@@ -796,6 -801,11 +796,6 @@@ enum 
        __EE_CALL_AL_COMPLETE_IO,
        __EE_MAY_SET_IN_SYNC,
  
 -      /* This epoch entry closes an epoch using a barrier.
 -       * On sucessful completion, the epoch is released,
 -       * and the P_BARRIER_ACK send. */
 -      __EE_IS_BARRIER,
 -
        /* In case a barrier failed,
         * we need to resubmit without the barrier flag. */
        __EE_RESUBMITTED,
  };
  #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
  #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 -#define EE_IS_BARRIER          (1<<__EE_IS_BARRIER)
  #define       EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
  #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
  #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
@@@ -832,15 -843,16 +832,15 @@@ enum 
                                 * Gets cleared when the state.conn
                                 * goes into C_CONNECTED state. */
        WRITE_BM_AFTER_RESYNC,  /* A kmalloc() during resync failed */
 -      NO_BARRIER_SUPP,        /* underlying block device doesn't implement barriers */
        CONSIDER_RESYNC,
  
 -      MD_NO_BARRIER,          /* meta data device does not support barriers,
 -                                 so don't even try */
 +      MD_NO_FUA,              /* Users wants us to not use FUA/FLUSH on meta data dev */
        SUSPEND_IO,             /* suspend application io */
        BITMAP_IO,              /* suspend application io;
                                   once no more io in flight, start bitmap io */
        BITMAP_IO_QUEUED,       /* Started bitmap IO */
 -      GO_DISKLESS,            /* Disk failed, local_cnt reached zero, we are going diskless */
 +      GO_DISKLESS,            /* Disk is being detached, on io-error or admin request. */
 +      WAS_IO_ERROR,           /* Local disk failed returned IO error */
        RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
        NET_CONGESTED,          /* The data socket is congested */
  
@@@ -911,8 -923,6 +911,6 @@@ struct drbd_md 
  struct drbd_backing_dev {
        struct block_device *backing_bdev;
        struct block_device *md_bdev;
-       struct file *lo_file;
-       struct file *md_file;
        struct drbd_md md;
        struct disk_conf dc; /* The user provided config... */
        sector_t known_size; /* last known size of that backing device */
@@@ -935,6 -945,7 +933,6 @@@ enum write_ordering_e 
        WO_none,
        WO_drain_io,
        WO_bdev_flush,
 -      WO_bio_barrier
  };
  
  struct fifo_buffer {
@@@ -1268,7 -1279,6 +1266,7 @@@ extern int drbd_bmio_set_n_write(struc
  extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
  extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
  extern void drbd_go_diskless(struct drbd_conf *mdev);
 +extern void drbd_ldev_destroy(struct drbd_conf *mdev);
  
  
  /* Meta data layout
@@@ -1786,17 -1796,17 +1784,17 @@@ static inline void __drbd_chk_io_error_
        case EP_PASS_ON:
                if (!forcedetach) {
                        if (__ratelimit(&drbd_ratelimit_state))
 -                              dev_err(DEV, "Local IO failed in %s."
 -                                           "Passing error on...\n", where);
 +                              dev_err(DEV, "Local IO failed in %s.\n", where);
                        break;
                }
                /* NOTE fall through to detach case if forcedetach set */
        case EP_DETACH:
        case EP_CALL_HELPER:
 +              set_bit(WAS_IO_ERROR, &mdev->flags);
                if (mdev->state.disk > D_FAILED) {
                        _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
 -                      dev_err(DEV, "Local IO failed in %s."
 -                                   "Detaching...\n", where);
 +                      dev_err(DEV,
 +                              "Local IO failed in %s. Detaching...\n", where);
                }
                break;
        }
@@@ -1862,7 -1872,7 +1860,7 @@@ static inline sector_t drbd_md_last_sec
  static inline sector_t drbd_get_capacity(struct block_device *bdev)
  {
        /* return bdev ? get_capacity(bdev->bd_disk) : 0; */
 -      return bdev ? bdev->bd_inode->i_size >> 9 : 0;
 +      return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0;
  }
  
  /**
@@@ -2115,11 -2125,7 +2113,11 @@@ static inline void put_ldev(struct drbd
        __release(local);
        D_ASSERT(i >= 0);
        if (i == 0) {
 +              if (mdev->state.disk == D_DISKLESS)
 +                      /* even internal references gone, safe to destroy */
 +                      drbd_ldev_destroy(mdev);
                if (mdev->state.disk == D_FAILED)
 +                      /* all application IO references gone. */
                        drbd_go_diskless(mdev);
                wake_up(&mdev->misc_wait);
        }
@@@ -2130,10 -2136,6 +2128,10 @@@ static inline int _get_ldev_if_state(st
  {
        int io_allowed;
  
 +      /* never get a reference while D_DISKLESS */
 +      if (mdev->state.disk == D_DISKLESS)
 +              return 0;
 +
        atomic_inc(&mdev->local_cnt);
        io_allowed = (mdev->state.disk >= mins);
        if (!io_allowed)
@@@ -2402,12 -2404,12 +2400,12 @@@ static inline void drbd_md_flush(struc
  {
        int r;
  
 -      if (test_bit(MD_NO_BARRIER, &mdev->flags))
 +      if (test_bit(MD_NO_FUA, &mdev->flags))
                return;
  
        r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
        if (r) {
 -              set_bit(MD_NO_BARRIER, &mdev->flags);
 +              set_bit(MD_NO_FUA, &mdev->flags);
                dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
        }
  }
index 6be5401d0e88fd193b1af75c303c8140d4f6ccd3,7ec1a82064a9a089d7fc6212eabae0e051107100..29cd0dc9fe4f8c9fe6731aaaf6f85220b7bf5db0
@@@ -835,15 -835,6 +835,15 @@@ static union drbd_state sanitize_state(
            ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
                ns.conn = os.conn;
  
 +      /* we cannot fail (again) if we already detached */
 +      if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
 +              ns.disk = D_DISKLESS;
 +
 +      /* if we are only D_ATTACHING yet,
 +       * we can (and should) go directly to D_DISKLESS. */
 +      if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
 +              ns.disk = D_DISKLESS;
 +
        /* After C_DISCONNECTING only C_STANDALONE may follow */
        if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
                ns.conn = os.conn;
@@@ -1065,15 -1056,7 +1065,15 @@@ int __drbd_set_state(struct drbd_conf *
            !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
                set_bit(DEVICE_DYING, &mdev->flags);
  
 -      mdev->state.i = ns.i;
 +      /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
 +       * on the ldev here, to be sure the transition -> D_DISKLESS resp.
 +       * drbd_ldev_destroy() won't happen before our corresponding
 +       * after_state_ch works run, where we put_ldev again. */
 +      if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
 +          (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
 +              atomic_inc(&mdev->local_cnt);
 +
 +      mdev->state = ns;
        wake_up(&mdev->misc_wait);
        wake_up(&mdev->state_wait);
  
@@@ -1285,6 -1268,7 +1285,6 @@@ static void after_state_ch(struct drbd_
                        if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
                                drbd_uuid_new_current(mdev);
                                clear_bit(NEW_CUR_UUID, &mdev->flags);
 -                              drbd_md_sync(mdev);
                        }
                        spin_lock_irq(&mdev->req_lock);
                        _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
            os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
                drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
  
 -      /* first half of local IO error */
 -      if (os.disk > D_FAILED && ns.disk == D_FAILED) {
 -              enum drbd_io_error_p eh = EP_PASS_ON;
 +      /* first half of local IO error, failure to attach,
 +       * or administrative detach */
 +      if (os.disk != D_FAILED && ns.disk == D_FAILED) {
 +              enum drbd_io_error_p eh;
 +              int was_io_error;
 +              /* corresponding get_ldev was in __drbd_set_state, to serialize
 +               * our cleanup here with the transition to D_DISKLESS,
 +               * so it is safe to dreference ldev here. */
 +              eh = mdev->ldev->dc.on_io_error;
 +              was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
 +
 +              /* current state still has to be D_FAILED,
 +               * there is only one way out: to D_DISKLESS,
 +               * and that may only happen after our put_ldev below. */
 +              if (mdev->state.disk != D_FAILED)
 +                      dev_err(DEV,
 +                              "ASSERT FAILED: disk is %s during detach\n",
 +                              drbd_disk_str(mdev->state.disk));
  
                if (drbd_send_state(mdev))
 -                      dev_warn(DEV, "Notified peer that my disk is broken.\n");
 +                      dev_warn(DEV, "Notified peer that I am detaching my disk\n");
                else
 -                      dev_err(DEV, "Sending state for drbd_io_error() failed\n");
 +                      dev_err(DEV, "Sending state for detaching disk failed\n");
  
                drbd_rs_cancel_all(mdev);
  
 -              if (get_ldev_if_state(mdev, D_FAILED)) {
 -                      eh = mdev->ldev->dc.on_io_error;
 -                      put_ldev(mdev);
 -              }
 -              if (eh == EP_CALL_HELPER)
 +              /* In case we want to get something to stable storage still,
 +               * this may be the last chance.
 +               * Following put_ldev may transition to D_DISKLESS. */
 +              drbd_md_sync(mdev);
 +              put_ldev(mdev);
 +
 +              if (was_io_error && eh == EP_CALL_HELPER)
                        drbd_khelper(mdev, "local-io-error");
        }
  
 +        /* second half of local IO error, failure to attach,
 +         * or administrative detach,
 +         * after local_cnt references have reached zero again */
 +        if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
 +                /* We must still be diskless,
 +                 * re-attach has to be serialized with this! */
 +                if (mdev->state.disk != D_DISKLESS)
 +                        dev_err(DEV,
 +                                "ASSERT FAILED: disk is %s while going diskless\n",
 +                                drbd_disk_str(mdev->state.disk));
  
 -      /* second half of local IO error handling,
 -       * after local_cnt references have reached zero: */
 -      if (os.disk == D_FAILED && ns.disk == D_DISKLESS) {
 -              mdev->rs_total = 0;
 -              mdev->rs_failed = 0;
 -              atomic_set(&mdev->rs_pending_cnt, 0);
 -      }
 -
 -      if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
 -              /* We must still be diskless,
 -               * re-attach has to be serialized with this! */
 -              if (mdev->state.disk != D_DISKLESS)
 -                      dev_err(DEV,
 -                              "ASSERT FAILED: disk is %s while going diskless\n",
 -                              drbd_disk_str(mdev->state.disk));
 +                mdev->rs_total = 0;
 +                mdev->rs_failed = 0;
 +                atomic_set(&mdev->rs_pending_cnt, 0);
  
 -              /* we cannot assert local_cnt == 0 here, as get_ldev_if_state
 -               * will inc/dec it frequently. Since we became D_DISKLESS, no
 -               * one has touched the protected members anymore, though, so we
 -               * are safe to free them here. */
                if (drbd_send_state(mdev))
 -                      dev_warn(DEV, "Notified peer that I detached my disk.\n");
 +                      dev_warn(DEV, "Notified peer that I'm now diskless.\n");
                else
 -                      dev_err(DEV, "Sending state for detach failed\n");
 -
 -              lc_destroy(mdev->resync);
 -              mdev->resync = NULL;
 -              lc_destroy(mdev->act_log);
 -              mdev->act_log = NULL;
 -              __no_warn(local,
 -                      drbd_free_bc(mdev->ldev);
 -                      mdev->ldev = NULL;);
 -
 -              if (mdev->md_io_tmpp) {
 -                      __free_page(mdev->md_io_tmpp);
 -                      mdev->md_io_tmpp = NULL;
 -              }
 +                      dev_err(DEV, "Sending state for being diskless failed\n");
 +              /* corresponding get_ldev in __drbd_set_state
 +               * this may finaly trigger drbd_ldev_destroy. */
 +              put_ldev(mdev);
        }
  
        /* Disks got bigger while they were detached */
@@@ -2789,6 -2772,11 +2789,6 @@@ void drbd_init_set_defaults(struct drbd
  
        drbd_set_defaults(mdev);
  
 -      /* for now, we do NOT yet support it,
 -       * even though we start some framework
 -       * to eventually support barriers */
 -      set_bit(NO_BARRIER_SUPP, &mdev->flags);
 -
        atomic_set(&mdev->ap_bio_cnt, 0);
        atomic_set(&mdev->ap_pending_cnt, 0);
        atomic_set(&mdev->rs_pending_cnt, 0);
        drbd_thread_init(mdev, &mdev->asender, drbd_asender);
  
        mdev->agreed_pro_version = PRO_VERSION_MAX;
 -      mdev->write_ordering = WO_bio_barrier;
 +      mdev->write_ordering = WO_bdev_flush;
        mdev->resync_wenr = LC_FREE;
  }
  
@@@ -2911,6 -2899,7 +2911,6 @@@ void drbd_mdev_cleanup(struct drbd_con
        D_ASSERT(list_empty(&mdev->resync_work.list));
        D_ASSERT(list_empty(&mdev->unplug_work.list));
        D_ASSERT(list_empty(&mdev->go_diskless.list));
 -
  }
  
  
@@@ -3372,11 -3361,8 +3372,8 @@@ void drbd_free_bc(struct drbd_backing_d
        if (ldev == NULL)
                return;
  
-       bd_release(ldev->backing_bdev);
-       bd_release(ldev->md_bdev);
-       fput(ldev->lo_file);
-       fput(ldev->md_file);
+       blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+       blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
  
        kfree(ldev);
  }
@@@ -3671,8 -3657,6 +3668,8 @@@ void drbd_uuid_new_current(struct drbd_
  
        get_random_bytes(&val, sizeof(u64));
        _drbd_uuid_set(mdev, UI_CURRENT, val);
 +      /* get it to stable storage _now_ */
 +      drbd_md_sync(mdev);
  }
  
  void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
@@@ -3769,31 -3753,19 +3766,31 @@@ static int w_bitmap_io(struct drbd_con
        return 1;
  }
  
 +void drbd_ldev_destroy(struct drbd_conf *mdev)
 +{
 +      lc_destroy(mdev->resync);
 +      mdev->resync = NULL;
 +      lc_destroy(mdev->act_log);
 +      mdev->act_log = NULL;
 +      __no_warn(local,
 +              drbd_free_bc(mdev->ldev);
 +              mdev->ldev = NULL;);
 +
 +      if (mdev->md_io_tmpp) {
 +              __free_page(mdev->md_io_tmpp);
 +              mdev->md_io_tmpp = NULL;
 +      }
 +      clear_bit(GO_DISKLESS, &mdev->flags);
 +}
 +
  static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
  {
        D_ASSERT(mdev->state.disk == D_FAILED);
        /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
         * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
 -       * the protected members anymore, though, so in the after_state_ch work
 -       * it will be safe to free them. */
 +       * the protected members anymore, though, so once put_ldev reaches zero
 +       * again, it will be safe to free them. */
        drbd_force_state(mdev, NS(disk, D_DISKLESS));
 -      /* We need to wait for return of references checked out while we still
 -       * have been D_FAILED, though (drbd_md_sync, bitmap io). */
 -      wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 -
 -      clear_bit(GO_DISKLESS, &mdev->flags);
        return 1;
  }
  
@@@ -3802,6 -3774,9 +3799,6 @@@ void drbd_go_diskless(struct drbd_conf 
        D_ASSERT(mdev->state.disk == D_FAILED);
        if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
                drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
 -              /* don't drbd_queue_work_front,
 -               * we need to serialize with the after_state_ch work
 -               * of the -> D_FAILED transition. */
  }
  
  /**
index 29e5c70e4e26c7f6e35b8bb1bec0b7f65442f3a0,650e43ba4f7c8d655cb5d776027fa9415545160f..8cbfaa687d723152b27955cfbc4a43c1af858121
@@@ -855,7 -855,7 +855,7 @@@ static int drbd_nl_disk_conf(struct drb
        sector_t max_possible_sectors;
        sector_t min_md_device_sectors;
        struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
-       struct inode *inode, *inode2;
+       struct block_device *bdev;
        struct lru_cache *resync_lru = NULL;
        union drbd_state ns, os;
        unsigned int max_seg_s;
                retcode = ERR_DISK_CONFIGURED;
                goto fail;
        }
 +      /* It may just now have detached because of IO error.  Make sure
 +       * drbd_ldev_destroy is done already, we may end up here very fast,
 +       * e.g. if someone calls attach from the on-io-error handler,
 +       * to realize a "hot spare" feature (not that I'd recommend that) */
 +      wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
  
        /* allocation not in the IO path, cqueue thread context */
        nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
                }
        }
  
-       nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
-       if (IS_ERR(nbc->lo_file)) {
+       bdev = blkdev_get_by_path(nbc->dc.backing_dev,
+                                 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
+       if (IS_ERR(bdev)) {
                dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
-                   PTR_ERR(nbc->lo_file));
-               nbc->lo_file = NULL;
+                       PTR_ERR(bdev));
                retcode = ERR_OPEN_DISK;
                goto fail;
        }
-       inode = nbc->lo_file->f_dentry->d_inode;
-       if (!S_ISBLK(inode->i_mode)) {
-               retcode = ERR_DISK_NOT_BDEV;
-               goto fail;
-       }
-       nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0);
-       if (IS_ERR(nbc->md_file)) {
+       nbc->backing_bdev = bdev;
+       /*
+        * meta_dev_idx >= 0: external fixed size, possibly multiple
+        * drbd sharing one meta device.  TODO in that case, paranoia
+        * check that [md_bdev, meta_dev_idx] is not yet used by some
+        * other drbd minor!  (if you use drbd.conf + drbdadm, that
+        * should check it for you already; but if you don't, or
+        * someone fooled it, we need to double check here)
+        */
+       bdev = blkdev_get_by_path(nbc->dc.meta_dev,
+                                 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+                                 (nbc->dc.meta_dev_idx < 0) ?
+                                 (void *)mdev : (void *)drbd_m_holder);
+       if (IS_ERR(bdev)) {
                dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
-                   PTR_ERR(nbc->md_file));
-               nbc->md_file = NULL;
+                       PTR_ERR(bdev));
                retcode = ERR_OPEN_MD_DISK;
                goto fail;
        }
+       nbc->md_bdev = bdev;
  
-       inode2 = nbc->md_file->f_dentry->d_inode;
-       if (!S_ISBLK(inode2->i_mode)) {
-               retcode = ERR_MD_NOT_BDEV;
-               goto fail;
-       }
-       nbc->backing_bdev = inode->i_bdev;
-       if (bd_claim(nbc->backing_bdev, mdev)) {
-               printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
-                      nbc->backing_bdev, mdev,
-                      nbc->backing_bdev->bd_holder,
-                      nbc->backing_bdev->bd_contains->bd_holder,
-                      nbc->backing_bdev->bd_holders);
-               retcode = ERR_BDCLAIM_DISK;
+       if ((nbc->backing_bdev == nbc->md_bdev) !=
+           (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+            nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
+               retcode = ERR_MD_IDX_INVALID;
                goto fail;
        }
  
                        offsetof(struct bm_extent, lce));
        if (!resync_lru) {
                retcode = ERR_NOMEM;
-               goto release_bdev_fail;
-       }
-       /* meta_dev_idx >= 0: external fixed size,
-        * possibly multiple drbd sharing one meta device.
-        * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
-        * not yet used by some other drbd minor!
-        * (if you use drbd.conf + drbdadm,
-        * that should check it for you already; but if you don't, or someone
-        * fooled it, we need to double check here) */
-       nbc->md_bdev = inode2->i_bdev;
-       if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
-                               : (void *) drbd_m_holder)) {
-               retcode = ERR_BDCLAIM_MD_DISK;
-               goto release_bdev_fail;
-       }
-       if ((nbc->backing_bdev == nbc->md_bdev) !=
-           (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
-            nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
-               retcode = ERR_MD_IDX_INVALID;
-               goto release_bdev2_fail;
+               goto fail;
        }
  
        /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
                        (unsigned long long) drbd_get_max_capacity(nbc),
                        (unsigned long long) nbc->dc.disk_size);
                retcode = ERR_DISK_TO_SMALL;
-               goto release_bdev2_fail;
+               goto fail;
        }
  
        if (nbc->dc.meta_dev_idx < 0) {
                dev_warn(DEV, "refusing attach: md-device too small, "
                     "at least %llu sectors needed for this meta-disk type\n",
                     (unsigned long long) min_md_device_sectors);
-               goto release_bdev2_fail;
+               goto fail;
        }
  
        /* Make sure the new disk is big enough
        if (drbd_get_max_capacity(nbc) <
            drbd_get_capacity(mdev->this_bdev)) {
                retcode = ERR_DISK_TO_SMALL;
-               goto release_bdev2_fail;
+               goto fail;
        }
  
        nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
        retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
        drbd_resume_io(mdev);
        if (retcode < SS_SUCCESS)
-               goto release_bdev2_fail;
+               goto fail;
  
        if (!get_ldev_if_state(mdev, D_ATTACHING))
                goto force_diskless;
        /* Reset the "barriers don't work" bits here, then force meta data to
         * be written, to ensure we determine if barriers are supported. */
        if (nbc->dc.no_md_flush)
 -              set_bit(MD_NO_BARRIER, &mdev->flags);
 +              set_bit(MD_NO_FUA, &mdev->flags);
        else
 -              clear_bit(MD_NO_BARRIER, &mdev->flags);
 +              clear_bit(MD_NO_FUA, &mdev->flags);
  
        /* Point of no return reached.
         * Devices and memory are no longer released by error cleanup below.
        nbc = NULL;
        resync_lru = NULL;
  
 -      mdev->write_ordering = WO_bio_barrier;
 -      drbd_bump_write_ordering(mdev, WO_bio_barrier);
 +      mdev->write_ordering = WO_bdev_flush;
 +      drbd_bump_write_ordering(mdev, WO_bdev_flush);
  
        if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
                set_bit(CRASHED_PRIMARY, &mdev->flags);
   force_diskless_dec:
        put_ldev(mdev);
   force_diskless:
 -      drbd_force_state(mdev, NS(disk, D_DISKLESS));
 +      drbd_force_state(mdev, NS(disk, D_FAILED));
        drbd_md_sync(mdev);
-  release_bdev2_fail:
-       if (nbc)
-               bd_release(nbc->md_bdev);
-  release_bdev_fail:
-       if (nbc)
-               bd_release(nbc->backing_bdev);
   fail:
        if (nbc) {
-               if (nbc->lo_file)
-                       fput(nbc->lo_file);
-               if (nbc->md_file)
-                       fput(nbc->md_file);
+               if (nbc->backing_bdev)
+                       blkdev_put(nbc->backing_bdev,
+                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+               if (nbc->md_bdev)
+                       blkdev_put(nbc->md_bdev,
+                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL);
                kfree(nbc);
        }
        lc_destroy(resync_lru);
        return 0;
  }
  
 +/* Detaching the disk is a process in multiple stages.  First we need to lock
 + * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
 + * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
 + * internal references as well.
 + * Only then we have finally detached. */
  static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
                          struct drbd_nl_cfg_reply *reply)
  {
 +      drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
        reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
 +      if (mdev->state.disk == D_DISKLESS)
 +              wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 +      drbd_resume_io(mdev);
        return 0;
  }
  
@@@ -1967,6 -1922,7 +1936,6 @@@ static int drbd_nl_resume_io(struct drb
        if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
                drbd_uuid_new_current(mdev);
                clear_bit(NEW_CUR_UUID, &mdev->flags);
 -              drbd_md_sync(mdev);
        }
        drbd_suspend_io(mdev);
        reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
diff --combined drivers/md/md.c
index 324a3663fcdaa35dc9d0553f532543aa8019dd38,5aaa6bfbe6387492d6722ef364e915826602248f..3bacccab1b8ccc250aefc032bbc6b430e85ee081
@@@ -706,7 -706,7 +706,7 @@@ static struct mdk_personality *find_per
  /* return the offset of the super block in 512byte sectors */
  static inline sector_t calc_dev_sboffset(struct block_device *bdev)
  {
 -      sector_t num_sectors = bdev->bd_inode->i_size / 512;
 +      sector_t num_sectors = i_size_read(bdev->bd_inode) / 512;
        return MD_NEW_SIZE_SECTORS(num_sectors);
  }
  
@@@ -1386,7 -1386,7 +1386,7 @@@ static int super_1_load(mdk_rdev_t *rde
         */
        switch(minor_version) {
        case 0:
 -              sb_start = rdev->bdev->bd_inode->i_size >> 9;
 +              sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
                sb_start -= 8*2;
                sb_start &= ~(sector_t)(4*2-1);
                break;
                        ret = 0;
        }
        if (minor_version)
 -              rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) -
 +              rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
                        le64_to_cpu(sb->data_offset);
        else
                rdev->sectors = rdev->sb_start;
@@@ -1680,7 -1680,7 +1680,7 @@@ super_1_rdev_size_change(mdk_rdev_t *rd
                return 0; /* component must fit device */
        if (rdev->sb_start < rdev->data_offset) {
                /* minor versions 1 and 2; superblock before data */
 -              max_sectors = rdev->bdev->bd_inode->i_size >> 9;
 +              max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
                max_sectors -= rdev->data_offset;
                if (!num_sectors || num_sectors > max_sectors)
                        num_sectors = max_sectors;
        } else {
                /* minor version 0; superblock after data */
                sector_t sb_start;
 -              sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
 +              sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
                sb_start &= ~(sector_t)(4*2 - 1);
                max_sectors = rdev->sectors + sb_start - rdev->sb_start;
                if (!num_sectors || num_sectors > max_sectors)
@@@ -1880,7 -1880,7 +1880,7 @@@ static int bind_rdev_to_array(mdk_rdev_
        rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
  
        list_add_rcu(&rdev->same_set, &mddev->disks);
-       bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+       bd_link_disk_holder(rdev->bdev, mddev->gendisk);
  
        /* May as well allow recovery to be retried once */
        mddev->recovery_disabled = 0;
@@@ -1907,7 -1907,6 +1907,6 @@@ static void unbind_rdev_from_array(mdk_
                MD_BUG();
                return;
        }
-       bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
        list_del_rcu(&rdev->same_set);
        printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
        rdev->mddev = NULL;
@@@ -1935,19 -1934,13 +1934,13 @@@ static int lock_rdev(mdk_rdev_t *rdev, 
        struct block_device *bdev;
        char b[BDEVNAME_SIZE];
  
-       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                shared ? (mdk_rdev_t *)lock_rdev : rdev);
        if (IS_ERR(bdev)) {
                printk(KERN_ERR "md: could not open %s.\n",
                        __bdevname(dev, b));
                return PTR_ERR(bdev);
        }
-       err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
-       if (err) {
-               printk(KERN_ERR "md: could not bd_claim %s.\n",
-                       bdevname(bdev, b));
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
-               return err;
-       }
        if (!shared)
                set_bit(AllReserved, &rdev->flags);
        rdev->bdev = bdev;
@@@ -1960,8 -1953,7 +1953,7 @@@ static void unlock_rdev(mdk_rdev_t *rde
        rdev->bdev = NULL;
        if (!bdev)
                MD_BUG();
-       bd_release(bdev);
-       blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+       blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
  }
  
  void md_autodetect_dev(dev_t dev);
@@@ -2584,7 -2576,7 +2576,7 @@@ rdev_size_store(mdk_rdev_t *rdev, cons
                        if (!sectors)
                                return -EBUSY;
                } else if (!sectors)
 -                      sectors = (rdev->bdev->bd_inode->i_size >> 9) -
 +                      sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
                                rdev->data_offset;
        }
        if (sectors < my_mddev->dev_sectors)
@@@ -2797,7 -2789,7 +2789,7 @@@ static mdk_rdev_t *md_import_device(dev
  
        kobject_init(&rdev->kobj, &rdev_ktype);
  
 -      size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
 +      size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
        if (!size) {
                printk(KERN_WARNING 
                        "md: %s has zero or unknown size, marking faulty!\n",
@@@ -5235,8 -5227,8 +5227,8 @@@ static int add_new_disk(mddev_t * mddev
  
                if (!mddev->persistent) {
                        printk(KERN_INFO "md: nonpersistent superblock ...\n");
 -                      rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
 -              } else 
 +                      rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
 +              } else
                        rdev->sb_start = calc_dev_sboffset(rdev->bdev);
                rdev->sectors = rdev->sb_start;
  
@@@ -5306,7 -5298,7 +5298,7 @@@ static int hot_add_disk(mddev_t * mddev
        if (mddev->persistent)
                rdev->sb_start = calc_dev_sboffset(rdev->bdev);
        else
 -              rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
 +              rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
  
        rdev->sectors = rdev->sb_start;
  
diff --combined fs/partitions/check.c
index 12213f7ce7a26edeeb5bed0eec3d61504790ff2f,2e6501d034ab74e704ab2f26571a3a93719d36b0..bdf8d3cc95a4deeb89c5aab6a322f9824125862d
@@@ -237,13 -237,6 +237,13 @@@ ssize_t part_size_show(struct device *d
        return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
  }
  
 +ssize_t part_ro_show(struct device *dev,
 +                     struct device_attribute *attr, char *buf)
 +{
 +      struct hd_struct *p = dev_to_part(dev);
 +      return sprintf(buf, "%d\n", p->policy ? 1 : 0);
 +}
 +
  ssize_t part_alignment_offset_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
  {
@@@ -319,7 -312,6 +319,7 @@@ ssize_t part_fail_store(struct device *
  static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
  static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
  static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 +static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
  static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
  static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
                   NULL);
@@@ -334,7 -326,6 +334,7 @@@ static struct attribute *part_attrs[] 
        &dev_attr_partition.attr,
        &dev_attr_start.attr,
        &dev_attr_size.attr,
 +      &dev_attr_ro.attr,
        &dev_attr_alignment_offset.attr,
        &dev_attr_discard_alignment.attr,
        &dev_attr_stat.attr,
@@@ -558,7 -549,7 +558,7 @@@ void register_disk(struct gendisk *disk
                goto exit;
  
        bdev->bd_invalidated = 1;
-       err = blkdev_get(bdev, FMODE_READ);
+       err = blkdev_get(bdev, FMODE_READ, NULL);
        if (err < 0)
                goto exit;
        blkdev_put(bdev, FMODE_READ);
index 064f964d4f3c201ce52bf6bb42e7dbbd3c0526bd,9209cd199c479e43a6b25bda4eb2706b080447a1..2d2ce7f651a7c4ae74d458b576113298f77c039c
@@@ -353,6 -353,9 +353,6 @@@ xfs_parseargs
                        mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
                        mp->m_flags |= XFS_MOUNT_DELAYLOG;
 -                      cmn_err(CE_WARN,
 -                              "Enabling EXPERIMENTAL delayed logging feature "
 -                              "- use at your own risk.\n");
                } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
                        mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
                } else if (!strcmp(this_char, "ihashsize")) {
@@@ -606,7 -609,8 +606,8 @@@ xfs_blkdev_get
  {
        int                     error = 0;
  
-       *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
+       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                   mp);
        if (IS_ERR(*bdevp)) {
                error = PTR_ERR(*bdevp);
                printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@@ -620,7 -624,7 +621,7 @@@ xfs_blkdev_put
        struct block_device     *bdev)
  {
        if (bdev)
-               close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
  }
  
  /*