Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
diff --combined fs/xfs/linux-2.6/xfs_buf.c

index f3ccaec5760a5e7a386235d0160f250254106edf,52785189212fad95e7428c254b864216f0374f05..ba5312802aa99dfe266148b90d9dfa8e3f54e19d
--- 1/fs/xfs/linux-2.6/xfs_buf.c
--- 2/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@@ -188,8 -188,8 +188,8 @@@ _xfs_buf_initialize
         atomic_set(&bp->b_hold, 1);
         init_completion(&bp->b_iowait);
         INIT_LIST_HEAD(&bp->b_list);
-       INIT_LIST_HEAD(&bp->b_hash_list);
-       init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+       RB_CLEAR_NODE(&bp->b_rbnode);
+       sema_init(&bp->b_sema, 0); /* held, no waiters */
         XB_SET_OWNER(bp);
         bp->b_target = target;
         bp->b_file_offset = range_base;
@@@ -262,8 -262,6 +262,6 @@@ xfs_buf_free
   {
         trace_xfs_buf_free(bp, _RET_IP_);
   
-       ASSERT(list_empty(&bp->b_hash_list));
- 
         if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
                 uint            i;
   
@@@ -422,8 -420,10 +420,10 @@@ _xfs_buf_find
   {
         xfs_off_t               range_base;
         size_t                  range_length;
-       xfs_bufhash_t           *hash;
-       xfs_buf_t               *bp, *n;
+       struct xfs_perag        *pag;
+       struct rb_node          **rbp;
+       struct rb_node          *parent;
+       xfs_buf_t               *bp;
   
         range_base = (ioff << BBSHIFT);
         range_length = (isize << BBSHIFT);
@@@ -432,14 -432,37 +432,37 @@@
         ASSERT(!(range_length < (1 << btp->bt_sshift)));
         ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
   
-       hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
- 
-       spin_lock(&hash->bh_lock);
- 
-       list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-               ASSERT(btp == bp->b_target);
-               if (bp->b_file_offset == range_base &&
-                   bp->b_buffer_length == range_length) {
+       /* get tree root */
+       pag = xfs_perag_get(btp->bt_mount,
+                               xfs_daddr_to_agno(btp->bt_mount, ioff));
+ 
+       /* walk tree */
+       spin_lock(&pag->pag_buf_lock);
+       rbp = &pag->pag_buf_tree.rb_node;
+       parent = NULL;
+       bp = NULL;
+       while (*rbp) {
+               parent = *rbp;
+               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+ 
+               if (range_base < bp->b_file_offset)
+                       rbp = &(*rbp)->rb_left;
+               else if (range_base > bp->b_file_offset)
+                       rbp = &(*rbp)->rb_right;
+               else {
+                       /*
+                        * found a block offset match. If the range doesn't
+                        * match, the only way this is allowed is if the buffer
+                        * in the cache is stale and the transaction that made
+                        * it stale has not yet committed. i.e. we are
+                        * reallocating a busy extent. Skip this buffer and
+                        * continue searching to the right for an exact match.
+                        */
+                       if (bp->b_buffer_length != range_length) {
+                               ASSERT(bp->b_flags & XBF_STALE);
+                               rbp = &(*rbp)->rb_right;
+                               continue;
+                       }
                         atomic_inc(&bp->b_hold);
                         goto found;
                 }
@@@ -449,17 -472,21 +472,21 @@@
         if (new_bp) {
                 _xfs_buf_initialize(new_bp, btp, range_base,
                                 range_length, flags);
-               new_bp->b_hash = hash;
-               list_add(&new_bp->b_hash_list, &hash->bh_list);
+               rb_link_node(&new_bp->b_rbnode, parent, rbp);
+               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+               /* the buffer keeps the perag reference until it is freed */
+               new_bp->b_pag = pag;
+               spin_unlock(&pag->pag_buf_lock);
         } else {
                 XFS_STATS_INC(xb_miss_locked);
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
         }
- 
-       spin_unlock(&hash->bh_lock);
         return new_bp;
   
   found:
-       spin_unlock(&hash->bh_lock);
+       spin_unlock(&pag->pag_buf_lock);
+       xfs_perag_put(pag);
   
         /* Attempt to get the semaphore without sleeping,
          * if this does not work then we need to drop the
@@@ -625,8 -652,7 +652,7 @@@ voi
   xfs_buf_readahead(
         xfs_buftarg_t           *target,
         xfs_off_t               ioff,
-       size_t                  isize,
-       xfs_buf_flags_t         flags)
+       size_t                  isize)
   {
         struct backing_dev_info *bdi;
   
@@@ -634,8 -660,42 +660,42 @@@
         if (bdi_read_congested(bdi))
                 return;
   
-       flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
-       xfs_buf_read(target, ioff, isize, flags);
+       xfs_buf_read(target, ioff, isize,
+                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+ }
+ 
+ /*
+  * Read an uncached buffer from disk. Allocates and returns a locked
+  * buffer containing the disk contents or nothing.
+  */
+ struct xfs_buf *
+ xfs_buf_read_uncached(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             daddr,
+       size_t                  length,
+       int                     flags)
+ {
+       xfs_buf_t               *bp;
+       int                     error;
+ 
+       bp = xfs_buf_get_uncached(target, length, flags);
+       if (!bp)
+               return NULL;
+ 
+       /* set up the buffer for a read IO */
+       xfs_buf_lock(bp);
+       XFS_BUF_SET_ADDR(bp, daddr);
+       XFS_BUF_READ(bp);
+       XFS_BUF_BUSY(bp);
+ 
+       xfsbdstrat(mp, bp);
+       error = xfs_buf_iowait(bp);
+       if (error || bp->b_error) {
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+       return bp;
   }
   
   xfs_buf_t *
@@@ -707,9 -767,10 +767,10 @@@ xfs_buf_associate_memory
   }
   
   xfs_buf_t *
- xfs_buf_get_noaddr(
+ xfs_buf_get_uncached(
+       struct xfs_buftarg      *target,
         size_t                  len,
-       xfs_buftarg_t           *target)
+       int                     flags)
   {
         unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
         int                     error, i;
@@@ -725,7 -786,7 +786,7 @@@
                 goto fail_free_buf;
   
         for (i = 0; i < page_count; i++) {
-               bp->b_pages[i] = alloc_page(GFP_KERNEL);
+               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
                 if (!bp->b_pages[i])
                         goto fail_free_mem;
         }
@@@ -740,7 -801,7 +801,7 @@@
   
         xfs_buf_unlock(bp);
   
-       trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+       trace_xfs_buf_get_uncached(bp, _RET_IP_);
         return bp;
   
    fail_free_mem:
@@@ -774,29 -835,30 +835,30 @@@ voi
   xfs_buf_rele(
         xfs_buf_t               *bp)
   {
-       xfs_bufhash_t           *hash = bp->b_hash;
+       struct xfs_perag        *pag = bp->b_pag;
   
         trace_xfs_buf_rele(bp, _RET_IP_);
   
-       if (unlikely(!hash)) {
+       if (!pag) {
                 ASSERT(!bp->b_relse);
+               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
                 if (atomic_dec_and_test(&bp->b_hold))
                         xfs_buf_free(bp);
                 return;
         }
   
+       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
         ASSERT(atomic_read(&bp->b_hold) > 0);
-       if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
                 if (bp->b_relse) {
                         atomic_inc(&bp->b_hold);
-                       spin_unlock(&hash->bh_lock);
-                       (*(bp->b_relse)) (bp);
-               } else if (bp->b_flags & XBF_FS_MANAGED) {
-                       spin_unlock(&hash->bh_lock);
+                       spin_unlock(&pag->pag_buf_lock);
+                       bp->b_relse(bp);
                 } else {
                         ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                       list_del_init(&bp->b_hash_list);
-                       spin_unlock(&hash->bh_lock);
+                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+                       spin_unlock(&pag->pag_buf_lock);
+                       xfs_perag_put(pag);
                         xfs_buf_free(bp);
                 }
         }
@@@ -859,7 -921,7 +921,7 @@@ xfs_buf_lock
         trace_xfs_buf_lock(bp, _RET_IP_);
   
         if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_mount, 0);
+               xfs_log_force(bp->b_target->bt_mount, 0);
         if (atomic_read(&bp->b_io_remaining))
                 blk_run_address_space(bp->b_target->bt_mapping);
         down(&bp->b_sema);
@@@ -924,7 -986,19 +986,7 @@@ xfs_buf_iodone_work
         xfs_buf_t               *bp =
                 container_of(work, xfs_buf_t, b_iodone_work);
   
- -      /*
- -       * We can get an EOPNOTSUPP to ordered writes.  Here we clear the
- -       * ordered flag and reissue them.  Because we can't tell the higher
- -       * layers directly that they should not issue ordered I/O anymore, they
- -       * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
- -       */
- -      if ((bp->b_error == EOPNOTSUPP) &&
- -          (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
- -              trace_xfs_buf_ordered_retry(bp, _RET_IP_);
- -              bp->b_flags &= ~XBF_ORDERED;
- -              bp->b_flags |= _XFS_BARRIER_FAILED;
- -              xfs_buf_iorequest(bp);
- -      } else if (bp->b_iodone)
+ +      if (bp->b_iodone)
                 (*(bp->b_iodone))(bp);
         else if (bp->b_flags & XBF_ASYNC)
                 xfs_buf_relse(bp);
@@@ -970,7 -1044,6 +1032,6 @@@ xfs_bwrite
   {
         int                     error;
   
-       bp->b_mount = mp;
         bp->b_flags |= XBF_WRITE;
         bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
   
@@@ -991,8 -1064,6 +1052,6 @@@ xfs_bdwrite
   {
         trace_xfs_buf_bdwrite(bp, _RET_IP_);
   
-       bp->b_mount = mp;
- 
         bp->b_flags &= ~XBF_READ;
         bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
   
@@@ -1001,7 -1072,7 +1060,7 @@@
   
   /*
    * Called when we want to stop a buffer from getting written or read.
-  * We attach the EIO error, muck with its flags, and call biodone
+  * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
    * so that the proper iodone callbacks get called.
    */
   STATIC int
@@@ -1018,21 -1089,21 +1077,21 @@@ xfs_bioerror
         XFS_BUF_ERROR(bp, EIO);
   
         /*
-        * We're calling biodone, so delete XBF_DONE flag.
+        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
          */
         XFS_BUF_UNREAD(bp);
         XFS_BUF_UNDELAYWRITE(bp);
         XFS_BUF_UNDONE(bp);
         XFS_BUF_STALE(bp);
   
-       xfs_biodone(bp);
+       xfs_buf_ioend(bp, 0);
   
         return EIO;
   }
   
   /*
    * Same as xfs_bioerror, except that we are releasing the buffer
-  * here ourselves, and avoiding the biodone call.
+  * here ourselves, and avoiding the xfs_buf_ioend call.
    * This is meant for userdata errors; metadata bufs come with
    * iodone functions attached, so that we can track down errors.
    */
@@@ -1081,7 -1152,7 +1140,7 @@@ in
   xfs_bdstrat_cb(
         struct xfs_buf  *bp)
   {
-       if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
                 trace_xfs_bdstrat_shut(bp, _RET_IP_);
                 /*
                  * Metadata write that didn't get logged but
@@@ -1183,7 -1254,7 +1242,7 @@@ _xfs_buf_ioapply
   
         if (bp->b_flags & XBF_ORDERED) {
                 ASSERT(!(bp->b_flags & XBF_READ));
- -              rw = WRITE_BARRIER;
+ +              rw = WRITE_FLUSH_FUA;
         } else if (bp->b_flags & XBF_LOG_BUFFER) {
                 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
                 bp->b_flags &= ~_XBF_RUN_QUEUES;
@@@ -1387,62 -1458,24 +1446,24 @@@ xfs_buf_iomove
    */
   void
   xfs_wait_buftarg(
-       xfs_buftarg_t   *btp)
- {
-       xfs_buf_t       *bp, *n;
-       xfs_bufhash_t   *hash;
-       uint            i;
- 
-       for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-               hash = &btp->bt_hash[i];
- again:
-               spin_lock(&hash->bh_lock);
-               list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                       ASSERT(btp == bp->b_target);
-                       if (!(bp->b_flags & XBF_FS_MANAGED)) {
-                               spin_unlock(&hash->bh_lock);
-                               /*
-                                * Catch superblock reference count leaks
-                                * immediately
-                                */
-                               BUG_ON(bp->b_bn == 0);
-                               delay(100);
-                               goto again;
-                       }
-               }
-               spin_unlock(&hash->bh_lock);
-       }
- }
- 
- /*
-  *    Allocate buffer hash table for a given target.
-  *    For devices containing metadata (i.e. not the log/realtime devices)
-  *    we need to allocate a much larger hash table.
-  */
- STATIC void
- xfs_alloc_bufhash(
-       xfs_buftarg_t           *btp,
-       int                     external)
+       struct xfs_buftarg      *btp)
   {
-       unsigned int            i;
+       struct xfs_perag        *pag;
+       uint                    i;
   
-       btp->bt_hashshift = external ? 3 : 12;  /* 8 or 4096 buckets */
-       btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
-                                        sizeof(xfs_bufhash_t));
-       for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-               spin_lock_init(&btp->bt_hash[i].bh_lock);
-               INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+       for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
+               pag = xfs_perag_get(btp->bt_mount, i);
+               spin_lock(&pag->pag_buf_lock);
+               while (rb_first(&pag->pag_buf_tree)) {
+                       spin_unlock(&pag->pag_buf_lock);
+                       delay(100);
+                       spin_lock(&pag->pag_buf_lock);
+               }
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
         }
   }
   
- STATIC void
- xfs_free_bufhash(
-       xfs_buftarg_t           *btp)
- {
-       kmem_free_large(btp->bt_hash);
-       btp->bt_hash = NULL;
- }
- 
   /*
    *    buftarg list for delwrite queue processing
    */
@@@ -1475,7 -1508,6 +1496,6 @@@ xfs_free_buftarg
         xfs_flush_buftarg(btp, 1);
         if (mp->m_flags & XFS_MOUNT_BARRIER)
                 xfs_blkdev_issue_flush(btp);
-       xfs_free_bufhash(btp);
         iput(btp->bt_mapping->host);
   
         /* Unregister the buftarg first so that we don't get a
@@@ -1597,6 -1629,7 +1617,7 @@@ out_error
   
   xfs_buftarg_t *
   xfs_alloc_buftarg(
+       struct xfs_mount        *mp,
         struct block_device     *bdev,
         int                     external,
         const char              *fsname)
@@@ -1605,6 -1638,7 +1626,7 @@@
   
         btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
   
+       btp->bt_mount = mp;
         btp->bt_dev =  bdev->bd_dev;
         btp->bt_bdev = bdev;
         if (xfs_setsize_buftarg_early(btp, bdev))
@@@ -1613,7 -1647,6 +1635,6 @@@
                 goto error;
         if (xfs_alloc_delwrite_queue(btp, fsname))
                 goto error;
-       xfs_alloc_bufhash(btp, external);
         return btp;
   
   error:
@@@ -1904,7 -1937,7 +1925,7 @@@ xfs_flush_buftarg
                         bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
   
                         list_del_init(&bp->b_list);
-                       xfs_iowait(bp);
+                       xfs_buf_iowait(bp);
                         xfs_buf_relse(bp);
                 }
         }
@@@ -1921,7 -1954,7 +1942,7 @@@ xfs_buf_init(void
                 goto out;
   
         xfslogd_workqueue = alloc_workqueue("xfslogd",
- -                                      WQ_RESCUER | WQ_HIGHPRI, 1);
+ +                                      WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
         if (!xfslogd_workqueue)
                 goto out_free_buf_zone;
   
diff --combined fs/xfs/linux-2.6/xfs_buf.h

index 9d021c73ea5234d985b4fa062da7e790d4d58eeb,131c0ebf2c0da9f06efd797b99e5ce742a17c2a6..383a3f37cf987e75da67faad754145acd8f3cd5b
--- 1/fs/xfs/linux-2.6/xfs_buf.h
--- 2/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@@ -51,7 -51,6 +51,6 @@@ typedef enum 
   #define XBF_DONE      (1 << 5) /* all pages in the buffer uptodate */
   #define XBF_DELWRI    (1 << 6) /* buffer has dirty pages */
   #define XBF_STALE     (1 << 7) /* buffer has been staled, do not find it */
- #define XBF_FS_MANAGED        (1 << 8) /* filesystem controls freeing memory */
   #define XBF_ORDERED   (1 << 11)/* use ordered writes */
   #define XBF_READ_AHEAD        (1 << 12)/* asynchronous read-ahead */
   #define XBF_LOG_BUFFER        (1 << 13)/* this is a buffer used for the log */
@@@ -86,6 -85,14 +85,6 @@@
    */
   #define _XBF_PAGE_LOCKED      (1 << 22)
   
- -/*
- - * If we try a barrier write, but it fails we have to communicate
- - * this to the upper layers.  Unfortunately b_error gets overwritten
- - * when the buffer is re-issued so we have to add another flag to
- - * keep this information.
- - */
- -#define _XFS_BARRIER_FAILED   (1 << 23)
- -
   typedef unsigned int xfs_buf_flags_t;
   
   #define XFS_BUF_FLAGS \
@@@ -96,7 -103,6 +95,6 @@@
         { XBF_DONE,             "DONE" }, \
         { XBF_DELWRI,           "DELWRI" }, \
         { XBF_STALE,            "STALE" }, \
-       { XBF_FS_MANAGED,       "FS_MANAGED" }, \
         { XBF_ORDERED,          "ORDERED" }, \
         { XBF_READ_AHEAD,       "READ_AHEAD" }, \
         { XBF_LOCK,             "LOCK" },       /* should never be set */\
@@@ -106,7 -112,8 +104,7 @@@
         { _XBF_PAGES,           "PAGES" }, \
         { _XBF_RUN_QUEUES,      "RUN_QUEUES" }, \
         { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
- -      { _XBF_PAGE_LOCKED,     "PAGE_LOCKED" }, \
- -      { _XFS_BARRIER_FAILED,  "BARRIER_FAILED" }
+ +      { _XBF_PAGE_LOCKED,     "PAGE_LOCKED" }
   
   
   typedef enum {
@@@ -123,14 -130,11 +121,11 @@@ typedef struct xfs_buftarg 
         dev_t                   bt_dev;
         struct block_device     *bt_bdev;
         struct address_space    *bt_mapping;
+       struct xfs_mount        *bt_mount;
         unsigned int            bt_bsize;
         unsigned int            bt_sshift;
         size_t                  bt_smask;
   
-       /* per device buffer hash table */
-       uint                    bt_hashshift;
-       xfs_bufhash_t           *bt_hash;
- 
         /* per device delwri queue */
         struct task_struct      *bt_task;
         struct list_head        bt_list;
@@@ -158,34 -162,41 +153,41 @@@ typedef int (*xfs_buf_bdstrat_t)(struc
   #define XB_PAGES      2
   
   typedef struct xfs_buf {
+       /*
+        * first cacheline holds all the fields needed for an uncontended cache
+        * hit to be fully processed. The semaphore straddles the cacheline
+        * boundary, but the counter and lock sits on the first cacheline,
+        * which is the only bit that is touched if we hit the semaphore
+        * fast-path on locking.
+        */
+       struct rb_node          b_rbnode;       /* rbtree node */
+       xfs_off_t               b_file_offset;  /* offset in file */
+       size_t                  b_buffer_length;/* size of buffer in bytes */
+       atomic_t                b_hold;         /* reference count */
+       xfs_buf_flags_t         b_flags;        /* status flags */
         struct semaphore        b_sema;         /* semaphore for lockables */
-       unsigned long           b_queuetime;    /* time buffer was queued */
-       atomic_t                b_pin_count;    /* pin count */
+ 
         wait_queue_head_t       b_waiters;      /* unpin waiters */
         struct list_head        b_list;
-       xfs_buf_flags_t         b_flags;        /* status flags */
-       struct list_head        b_hash_list;    /* hash table list */
-       xfs_bufhash_t           *b_hash;        /* hash table list start */
+       struct xfs_perag        *b_pag;         /* contains rbtree root */
         xfs_buftarg_t           *b_target;      /* buffer target (device) */
-       atomic_t                b_hold;         /* reference count */
         xfs_daddr_t             b_bn;           /* block number for I/O */
-       xfs_off_t               b_file_offset;  /* offset in file */
-       size_t                  b_buffer_length;/* size of buffer in bytes */
         size_t                  b_count_desired;/* desired transfer size */
         void                    *b_addr;        /* virtual address of buffer */
         struct work_struct      b_iodone_work;
-       atomic_t                b_io_remaining; /* #outstanding I/O requests */
         xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
         xfs_buf_relse_t         b_relse;        /* releasing function */
         struct completion       b_iowait;       /* queue for I/O waiters */
         void                    *b_fspriv;
         void                    *b_fspriv2;
-       struct xfs_mount        *b_mount;
-       unsigned short          b_error;        /* error code on I/O */
-       unsigned int            b_page_count;   /* size of page array */
-       unsigned int            b_offset;       /* page offset in first page */
         struct page             **b_pages;      /* array of page pointers */
         struct page             *b_page_array[XB_PAGES]; /* inline pages */
+       unsigned long           b_queuetime;    /* time buffer was queued */
+       atomic_t                b_pin_count;    /* pin count */
+       atomic_t                b_io_remaining; /* #outstanding I/O requests */
+       unsigned int            b_page_count;   /* size of page array */
+       unsigned int            b_offset;       /* page offset in first page */
+       unsigned short          b_error;        /* error code on I/O */
   #ifdef XFS_BUF_LOCK_TRACKING
         int                     b_last_holder;
   #endif
@@@ -204,11 -215,13 +206,13 @@@ extern xfs_buf_t *xfs_buf_read(xfs_buft
                                 xfs_buf_flags_t);
   
   extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
- extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+ extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
   extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
   extern void xfs_buf_hold(xfs_buf_t *);
- extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
+ extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+ struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                               struct xfs_buftarg *target,
+                               xfs_daddr_t daddr, size_t length, int flags);
   
   /* Releasing Buffers */
   extern void xfs_buf_free(xfs_buf_t *);
@@@ -233,6 -246,8 +237,8 @@@ extern int xfs_buf_iorequest(xfs_buf_t 
   extern int xfs_buf_iowait(xfs_buf_t *);
   extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
                                 xfs_buf_rw_t);
+ #define xfs_buf_zero(bp, off, len) \
+           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
   
   static inline int xfs_buf_geterror(xfs_buf_t *bp)
   {
@@@ -267,8 -282,6 +273,6 @@@ extern void xfs_buf_terminate(void)
                                         XFS_BUF_DONE(bp);       \
                                 } while (0)
   
- #define XFS_BUF_UNMANAGE(bp)  ((bp)->b_flags &= ~XBF_FS_MANAGED)
- 
   #define XFS_BUF_DELAYWRITE(bp)                ((bp)->b_flags |= XBF_DELWRI)
   #define XFS_BUF_UNDELAYWRITE(bp)      xfs_buf_delwri_dequeue(bp)
   #define XFS_BUF_ISDELAYWRITE(bp)      ((bp)->b_flags & XBF_DELWRI)
@@@ -347,25 -360,11 +351,11 @@@ static inline void xfs_buf_relse(xfs_bu
         xfs_buf_rele(bp);
   }
   
- #define xfs_biodone(bp)               xfs_buf_ioend(bp, 0)
- 
- #define xfs_biomove(bp, off, len, data, rw) \
-           xfs_buf_iomove((bp), (off), (len), (data), \
-               ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
- 
- #define xfs_biozero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
- 
- #define xfs_iowait(bp)        xfs_buf_iowait(bp)
- 
- #define xfs_baread(target, rablkno, ralen)  \
-       xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
- 
- 
   /*
    *    Handling of buftargs.
    */
- extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
+ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                       struct block_device *, int, const char *);
   extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
   extern void xfs_wait_buftarg(xfs_buftarg_t *);
   extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --combined fs/xfs/linux-2.6/xfs_super.c

index 08fd3102128ca9ee5bc8bb855f86cb0fe8b5966d,fa1e40ac4b352725dd200d359999608dd8b4ed7c..ab31ce5aeaf9cde4c5c008e1492c7a42f5399863
--- 1/fs/xfs/linux-2.6/xfs_super.c
--- 2/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@@ -44,7 -44,6 +44,6 @@@
   #include "xfs_buf_item.h"
   #include "xfs_utils.h"
   #include "xfs_vnodeops.h"
- #include "xfs_version.h"
   #include "xfs_log_priv.h"
   #include "xfs_trans_priv.h"
   #include "xfs_filestream.h"
@@@ -645,7 -644,7 +644,7 @@@ xfs_barrier_test
         XFS_BUF_ORDERED(sbp);
   
         xfsbdstrat(mp, sbp);
-       error = xfs_iowait(sbp);
+       error = xfs_buf_iowait(sbp);
   
         /*
          * Clear all the flags we set and possible error state in the
@@@ -693,7 -692,8 +692,7 @@@ voi
   xfs_blkdev_issue_flush(
         xfs_buftarg_t           *buftarg)
   {
- -      blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
- -                      BLKDEV_IFL_WAIT);
+ +      blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
   }
   
   STATIC void
@@@ -757,18 -757,20 +756,20 @@@ xfs_open_devices
          * Setup xfs_mount buffer target pointers
          */
         error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
         if (!mp->m_ddev_targp)
                 goto out_close_rtdev;
   
         if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
+               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                       mp->m_fsname);
                 if (!mp->m_rtdev_targp)
                         goto out_free_ddev_targ;
         }
   
         if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
+               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                       mp->m_fsname);
                 if (!mp->m_logdev_targp)
                         goto out_free_rtdev_targ;
         } else {
@@@ -971,12 -973,7 +972,7 @@@ xfs_fs_inode_init_once
   
   /*
    * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
-  * we catch unlogged VFS level updates to the inode. Care must be taken
-  * here - the transaction code calls mark_inode_dirty_sync() to mark the
-  * VFS inode dirty in a transaction and clears the i_update_core field;
-  * it must clear the field after calling mark_inode_dirty_sync() to
-  * correctly indicate that the dirty state has been propagated into the
-  * inode log item.
+  * we catch unlogged VFS level updates to the inode.
    *
    * We need the barrier() to maintain correct ordering between unlogged
    * updates and the transaction commit code that clears the i_update_core
@@@ -1520,8 -1517,9 +1516,9 @@@ xfs_fs_fill_super
         if (error)
                 goto out_free_fsname;
   
-       if (xfs_icsb_init_counters(mp))
-               mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+       error = xfs_icsb_init_counters(mp);
+       if (error)
+               goto out_close_devices;
   
         error = xfs_readsb(mp, flags);
         if (error)
@@@ -1582,6 -1580,7 +1579,7 @@@
         xfs_freesb(mp);
    out_destroy_counters:
         xfs_icsb_destroy_counters(mp);
+  out_close_devices:
         xfs_close_devices(mp);
    out_free_fsname:
         xfs_free_fsname(mp);
diff --combined fs/xfs/linux-2.6/xfs_trace.h

index 8fe311a456e2c6f9838fc998b515a3b17419c412,286dc201c5b90e51582f6ff2c9441a241519d6b3..acef2e98c5940af5f0ed33f1579dd0732841e0ee
--- 1/fs/xfs/linux-2.6/xfs_trace.h
--- 2/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@@ -124,7 -124,7 +124,7 @@@ DEFINE_EVENT(xfs_perag_class, name,        
                  unsigned long caller_ip),                                      \
         TP_ARGS(mp, agno, refcount, caller_ip))
   DEFINE_PERAG_REF_EVENT(xfs_perag_get);
- DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+ DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
   DEFINE_PERAG_REF_EVENT(xfs_perag_put);
   DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
   DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@@ -325,12 -325,13 +325,12 @@@ DEFINE_BUF_EVENT(xfs_buf_lock)
   DEFINE_BUF_EVENT(xfs_buf_lock_done);
   DEFINE_BUF_EVENT(xfs_buf_cond_lock);
   DEFINE_BUF_EVENT(xfs_buf_unlock);
- -DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
   DEFINE_BUF_EVENT(xfs_buf_iowait);
   DEFINE_BUF_EVENT(xfs_buf_iowait_done);
   DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
   DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
   DEFINE_BUF_EVENT(xfs_buf_delwri_split);
- DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
+ DEFINE_BUF_EVENT(xfs_buf_get_uncached);
   DEFINE_BUF_EVENT(xfs_bdstrat_shut);
   DEFINE_BUF_EVENT(xfs_buf_item_relse);
   DEFINE_BUF_EVENT(xfs_buf_item_iodone);
diff --combined fs/xfs/xfs_log.c

index ba8e36e0b4e7b59e115f0861348eaca39f52179b,f4fd49c9b987e19eb5f41be42b16e9f3dc682cfd..cee4ab9f8a9ea60d498952d6cd45bd133e8bfc70
--- 1/fs/xfs/xfs_log.c
--- 2/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@@ -916,6 -916,19 +916,6 @@@ xlog_iodone(xfs_buf_t *bp
         aborted = 0;
         l = iclog->ic_log;
   
- -      /*
- -       * If the _XFS_BARRIER_FAILED flag was set by a lower
- -       * layer, it means the underlying device no longer supports
- -       * barrier I/O. Warn loudly and turn off barriers.
- -       */
- -      if (bp->b_flags & _XFS_BARRIER_FAILED) {
- -              bp->b_flags &= ~_XFS_BARRIER_FAILED;
- -              l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
- -              xfs_fs_cmn_err(CE_WARN, l->l_mp,
- -                              "xlog_iodone: Barriers are no longer supported"
- -                              " by device. Disabling barriers\n");
- -      }
- -
         /*
          * Race to shutdown the filesystem if we see an error.
          */
@@@ -1118,7 -1131,8 +1118,8 @@@ xlog_alloc_log(xfs_mount_t      *mp
                 iclog->ic_prev = prev_iclog;
                 prev_iclog = iclog;
   
-               bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+               bp = xfs_buf_get_uncached(mp->m_logdev_targp,
+                                               log->l_iclog_size, 0);
                 if (!bp)
                         goto out_free_iclog;
                 if (!XFS_BUF_CPSEMA(bp))
@@@ -1296,7 -1310,7 +1297,7 @@@ xlog_bdstrat
         if (iclog->ic_state & XLOG_STATE_IOERROR) {
                 XFS_BUF_ERROR(bp, EIO);
                 XFS_BUF_STALE(bp);
-               xfs_biodone(bp);
+               xfs_buf_ioend(bp, 0);
                 /*
                  * It would seem logical to return EIO here, but we rely on
                  * the log state machine to propagate I/O errors instead of
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
		1	2
fs/xfs/linux-2.6/xfs_buf.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/linux-2.6/xfs_buf.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/linux-2.6/xfs_super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/linux-2.6/xfs_trace.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_log.c	patch \|	diff1 \|	diff2 \|	blob \| history