]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/extent_map.c
Btrfs: Add an extent buffer LRU to reduce radix tree hits
[karo-tx-linux.git] / fs / btrfs / extent_map.c
index ff2b1dd25bf92ff0398bbdb810217de45d42867a..85b28a6a4e05d601cfafb3a257c01be897175c32 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/swap.h>
 #include "extent_map.h"
 
 /* temporary define until extent_map moves out of btrfs */
@@ -18,6 +19,13 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
 
 static struct kmem_cache *extent_map_cache;
 static struct kmem_cache *extent_state_cache;
+static struct kmem_cache *extent_buffer_cache;
+
+static LIST_HEAD(buffers);
+static LIST_HEAD(states);
+
+static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
+#define BUFFER_LRU_MAX 64
 
 struct tree_entry {
        u64 start;
@@ -26,34 +34,37 @@ struct tree_entry {
        struct rb_node rb_node;
 };
 
-/* bits for the extent state */
-#define EXTENT_DIRTY 1
-#define EXTENT_WRITEBACK (1 << 1)
-#define EXTENT_UPTODATE (1 << 2)
-#define EXTENT_LOCKED (1 << 3)
-#define EXTENT_NEW (1 << 4)
-#define EXTENT_DELALLOC (1 << 5)
-
-#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
-
 void __init extent_map_init(void)
 {
        extent_map_cache = btrfs_cache_create("extent_map",
-                                           sizeof(struct extent_map),
-                                           SLAB_DESTROY_BY_RCU,
+                                           sizeof(struct extent_map), 0,
                                            NULL);
        extent_state_cache = btrfs_cache_create("extent_state",
-                                           sizeof(struct extent_state),
-                                           SLAB_DESTROY_BY_RCU,
+                                           sizeof(struct extent_state), 0,
+                                           NULL);
+       extent_buffer_cache = btrfs_cache_create("extent_buffers",
+                                           sizeof(struct extent_buffer), 0,
                                            NULL);
 }
 
 void __exit extent_map_exit(void)
 {
+       struct extent_state *state;
+
+       while (!list_empty(&states)) {
+               state = list_entry(states.next, struct extent_state, list);
+               printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
+               list_del(&state->list);
+               kmem_cache_free(extent_state_cache, state);
+
+       }
+
        if (extent_map_cache)
                kmem_cache_destroy(extent_map_cache);
        if (extent_state_cache)
                kmem_cache_destroy(extent_state_cache);
+       if (extent_buffer_cache)
+               kmem_cache_destroy(extent_buffer_cache);
 }
 
 void extent_map_tree_init(struct extent_map_tree *tree,
@@ -63,10 +74,25 @@ void extent_map_tree_init(struct extent_map_tree *tree,
        tree->state.rb_node = NULL;
        tree->ops = NULL;
        rwlock_init(&tree->lock);
+       spin_lock_init(&tree->lru_lock);
        tree->mapping = mapping;
+       INIT_LIST_HEAD(&tree->buffer_lru);
+       tree->lru_size = 0;
 }
 EXPORT_SYMBOL(extent_map_tree_init);
 
+void extent_map_tree_cleanup(struct extent_map_tree *tree)
+{
+       struct extent_buffer *eb;
+       while(!list_empty(&tree->buffer_lru)) {
+               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
+                               lru);
+               list_del(&eb->lru);
+               free_extent_buffer(eb);
+       }
+}
+EXPORT_SYMBOL(extent_map_tree_cleanup);
+
 struct extent_map *alloc_extent_map(gfp_t mask)
 {
        struct extent_map *em;
@@ -94,12 +120,19 @@ EXPORT_SYMBOL(free_extent_map);
 struct extent_state *alloc_extent_state(gfp_t mask)
 {
        struct extent_state *state;
+       unsigned long flags;
+
        state = kmem_cache_alloc(extent_state_cache, mask);
        if (!state || IS_ERR(state))
                return state;
        state->state = 0;
        state->in_tree = 0;
        state->private = 0;
+
+       spin_lock_irqsave(&state_lock, flags);
+       list_add(&state->list, &states);
+       spin_unlock_irqrestore(&state_lock, flags);
+
        atomic_set(&state->refs, 1);
        init_waitqueue_head(&state->wq);
        return state;
@@ -108,10 +141,14 @@ EXPORT_SYMBOL(alloc_extent_state);
 
 void free_extent_state(struct extent_state *state)
 {
+       unsigned long flags;
        if (!state)
                return;
        if (atomic_dec_and_test(&state->refs)) {
                WARN_ON(state->in_tree);
+               spin_lock_irqsave(&state_lock, flags);
+               list_del(&state->list);
+               spin_unlock_irqrestore(&state_lock, flags);
                kmem_cache_free(extent_state_cache, state);
        }
 }
@@ -223,7 +260,8 @@ int add_extent_mapping(struct extent_map_tree *tree,
                if (rb)
                        prev = rb_entry(rb, struct extent_map, rb_node);
                if (prev && prev->end + 1 == em->start &&
-                   ((em->block_start == 0 && prev->block_start == 0) ||
+                   ((em->block_start == EXTENT_MAP_HOLE &&
+                     prev->block_start == EXTENT_MAP_HOLE) ||
                             (em->block_start == prev->block_end + 1))) {
                        em->start = prev->start;
                        em->block_start = prev->block_start;
@@ -353,10 +391,6 @@ static int insert_state(struct extent_map_tree *tree,
        state->state |= bits;
        state->start = start;
        state->end = end;
-       if ((end & 4095) == 0) {
-               printk("insert state %Lu %Lu strange end\n", start, end);
-               WARN_ON(1);
-       }
        node = tree_insert(&tree->state, end, &state->rb_node);
        if (node) {
                struct extent_state *found;
@@ -391,11 +425,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
        prealloc->end = split - 1;
        prealloc->state = orig->state;
        orig->start = split;
-       if ((prealloc->end & 4095) == 0) {
-               printk("insert state %Lu %Lu strange end\n", prealloc->start,
-                      prealloc->end);
-               WARN_ON(1);
-       }
+
        node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
        if (node) {
                struct extent_state *found;
@@ -541,7 +571,7 @@ out:
        return set;
 
 search_again:
-       if (start >= end)
+       if (start > end)
                goto out;
        write_unlock_irqrestore(&tree->lock, flags);
        if (mask & __GFP_WAIT)
@@ -786,6 +816,21 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
 }
 EXPORT_SYMBOL(set_extent_dirty);
 
+int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
+                   int bits, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, bits, 0, NULL,
+                             mask);
+}
+EXPORT_SYMBOL(set_extent_bits);
+
+int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
+                     int bits, gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_bits);
+
 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
                     gfp_t mask)
 {
@@ -926,6 +971,41 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
 }
 EXPORT_SYMBOL(set_range_writeback);
 
+int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
+                         u64 *start_ret, u64 *end_ret, int bits)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+       int ret = 1;
+
+       read_lock_irq(&tree->lock);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(&tree->state, start);
+       if (!node || IS_ERR(node)) {
+               goto out;
+       }
+
+       while(1) {
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (state->end >= start && (state->state & bits)) {
+                       *start_ret = state->start;
+                       *end_ret = state->end;
+                       ret = 0;
+                       break;
+               }
+               node = rb_next(node);
+               if (!node)
+                       break;
+       }
+out:
+       read_unlock_irq(&tree->lock);
+       return ret;
+}
+EXPORT_SYMBOL(find_first_extent_bit);
+
 u64 find_lock_delalloc_range(struct extent_map_tree *tree,
                             u64 start, u64 lock_start, u64 *end, u64 max_bytes)
 {
@@ -1070,7 +1150,6 @@ int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
 out:
        write_unlock_irq(&tree->lock);
        return ret;
-
 }
 
 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
@@ -1106,8 +1185,8 @@ out:
  * has the bits set.  Otherwise, 1 is returned if any bit in the
  * range is found set.
  */
-static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
-                         int bits, int filled)
+int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
+                  int bits, int filled)
 {
        struct extent_state *state = NULL;
        struct rb_node *node;
@@ -1140,6 +1219,7 @@ static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
        read_unlock_irq(&tree->lock);
        return bitset;
 }
+EXPORT_SYMBOL(test_range_bit);
 
 /*
  * helper function to set a given page up to date if all the
@@ -1231,6 +1311,8 @@ static int end_bio_extent_writepage(struct bio *bio,
                        end_page_writeback(page);
                else
                        check_page_writeback(tree, page);
+               if (tree->ops && tree->ops->writepage_end_io_hook)
+                       tree->ops->writepage_end_io_hook(page, start, end);
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
@@ -1377,6 +1459,16 @@ static int submit_extent_page(int rw, struct extent_map_tree *tree,
        return ret;
 }
 
+void set_page_extent_mapped(struct page *page)
+{
+       if (!PagePrivate(page)) {
+               SetPagePrivate(page);
+               WARN_ON(!page->mapping->a_ops->invalidatepage);
+               set_page_private(page, 1);
+               page_cache_get(page);
+       }
+}
+
 /*
  * basic readpage implementation.  Locked extent state structs are inserted
  * into the tree that are removed when the IO is done (by the end_io
@@ -1403,12 +1495,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
        size_t iosize;
        size_t blocksize = inode->i_sb->s_blocksize;
 
-       if (!PagePrivate(page)) {
-               SetPagePrivate(page);
-               WARN_ON(!page->mapping->a_ops->invalidatepage);
-               set_page_private(page, 1);
-               page_cache_get(page);
-       }
+       set_page_extent_mapped(page);
 
        end = page_end;
        lock_extent(tree, start, end, GFP_NOFS);
@@ -1443,7 +1530,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
                em = NULL;
 
                /* we've found a hole, just zero and go on */
-               if (block_start == 0) {
+               if (block_start == EXTENT_MAP_HOLE) {
                        zero_user_page(page, page_offset, iosize, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            GFP_NOFS);
@@ -1529,12 +1616,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                               PAGE_CACHE_SIZE - offset, KM_USER0);
        }
 
-       if (!PagePrivate(page)) {
-               SetPagePrivate(page);
-               set_page_private(page, 1);
-               WARN_ON(!page->mapping->a_ops->invalidatepage);
-               page_cache_get(page);
-       }
+       set_page_extent_mapped(page);
 
        lock_extent(tree, start, page_end, GFP_NOFS);
        nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1,
@@ -1591,7 +1673,8 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                free_extent_map(em);
                em = NULL;
 
-               if (block_start == 0 || block_start == EXTENT_MAP_INLINE) {
+               if (block_start == EXTENT_MAP_HOLE ||
+                   block_start == EXTENT_MAP_INLINE) {
                        clear_extent_dirty(tree, cur,
                                           cur + iosize - 1, GFP_NOFS);
                        cur = cur + iosize;
@@ -1607,7 +1690,12 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                        continue;
                }
                clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
-               ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1);
+               if (tree->ops && tree->ops->writepage_io_hook) {
+                       ret = tree->ops->writepage_io_hook(page, cur,
+                                               cur + iosize - 1);
+               } else {
+                       ret = 0;
+               }
                if (ret)
                        SetPageError(page);
                else {
@@ -1623,7 +1711,6 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                nr++;
        }
 done:
-       WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0));
        unlock_extent(tree, start, page_end, GFP_NOFS);
        unlock_page(page);
        return 0;
@@ -1665,13 +1752,7 @@ int extent_commit_write(struct extent_map_tree *tree,
 {
        loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 
-       if (!PagePrivate(page)) {
-               SetPagePrivate(page);
-               set_page_private(page, 1);
-               WARN_ON(!page->mapping->a_ops->invalidatepage);
-               page_cache_get(page);
-       }
-
+       set_page_extent_mapped(page);
        set_page_dirty(page);
 
        if (pos > inode->i_size) {
@@ -1702,12 +1783,8 @@ int extent_prepare_write(struct extent_map_tree *tree,
        int ret = 0;
        int isnew;
 
-       if (!PagePrivate(page)) {
-               SetPagePrivate(page);
-               set_page_private(page, 1);
-               WARN_ON(!page->mapping->a_ops->invalidatepage);
-               page_cache_get(page);
-       }
+       set_page_extent_mapped(page);
+
        block_start = (page_start + from) & ~((u64)blocksize - 1);
        block_end = (page_start + to - 1) | (blocksize - 1);
        orig_block_start = block_start;
@@ -1828,10 +1905,767 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
        if (!em || IS_ERR(em))
                return 0;
 
-       // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device
        if (em->block_start == EXTENT_MAP_INLINE ||
-           em->block_start == 0)
-               return 0;
+           em->block_start == EXTENT_MAP_HOLE)
+               return 0;
 
        return (em->block_start + start - em->start) >> inode->i_blkbits;
 }
+
+static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
+{
+       if (list_empty(&eb->lru)) {
+               extent_buffer_get(eb);
+               list_add(&eb->lru, &tree->buffer_lru);
+               tree->lru_size++;
+               if (tree->lru_size >= BUFFER_LRU_MAX) {
+                       struct extent_buffer *rm;
+                       rm = list_entry(tree->buffer_lru.prev,
+                                       struct extent_buffer, lru);
+                       tree->lru_size--;
+                       list_del(&rm->lru);
+                       free_extent_buffer(rm);
+               }
+       } else
+               list_move(&eb->lru, &tree->buffer_lru);
+       return 0;
+}
+static struct extent_buffer *find_lru(struct extent_map_tree *tree,
+                                     u64 start, unsigned long len)
+{
+       struct list_head *lru = &tree->buffer_lru;
+       struct list_head *cur = lru->next;
+       struct extent_buffer *eb;
+
+       if (list_empty(lru))
+               return NULL;
+
+       do {
+               eb = list_entry(cur, struct extent_buffer, lru);
+               if (eb->start == start && eb->len == len) {
+                       extent_buffer_get(eb);
+                       return eb;
+               }
+               cur = cur->next;
+       } while (cur != lru);
+       return NULL;
+}
+
+static inline unsigned long num_extent_pages(u64 start, u64 len)
+{
+       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+               (start >> PAGE_CACHE_SHIFT);
+}
+
+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
+                                             unsigned long i)
+{
+       struct page *p;
+
+       if (i == 0)
+               return eb->last_page;
+       i += eb->start >> PAGE_CACHE_SHIFT;
+       p = find_get_page(eb->last_page->mapping, i);
+       page_cache_release(p);
+       return p;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
+                                                  u64 start,
+                                                  unsigned long len,
+                                                  gfp_t mask)
+{
+       struct extent_buffer *eb = NULL;
+
+       spin_lock(&tree->lru_lock);
+       eb = find_lru(tree, start, len);
+       if (eb)
+               goto lru_add;
+       spin_unlock(&tree->lru_lock);
+
+       if (eb) {
+               memset(eb, 0, sizeof(*eb));
+       } else {
+               eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+       }
+       INIT_LIST_HEAD(&eb->lru);
+       eb->start = start;
+       eb->len = len;
+       atomic_set(&eb->refs, 1);
+
+       spin_lock(&tree->lru_lock);
+lru_add:
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
+       return eb;
+}
+
+static void __free_extent_buffer(struct extent_buffer *eb)
+{
+       kmem_cache_free(extent_buffer_cache, eb);
+}
+
+struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
+                                         u64 start, unsigned long len,
+                                         gfp_t mask)
+{
+       unsigned long num_pages = num_extent_pages(start, len);
+       unsigned long i;
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       struct extent_buffer *eb;
+       struct page *p;
+       struct address_space *mapping = tree->mapping;
+       int uptodate = 0;
+
+       eb = __alloc_extent_buffer(tree, start, len, mask);
+       if (!eb || IS_ERR(eb))
+               return NULL;
+
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
+
+       for (i = 0; i < num_pages; i++, index++) {
+               p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
+               if (!p) {
+                       WARN_ON(1);
+                       /* make sure the free only frees the pages we've
+                        * grabbed a reference on
+                        */
+                       eb->len = i << PAGE_CACHE_SHIFT;
+                       eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
+                       goto fail;
+               }
+               set_page_extent_mapped(p);
+               if (i == 0)
+                       eb->last_page = p;
+               if (!PageUptodate(p))
+                       uptodate = 0;
+               unlock_page(p);
+       }
+       if (uptodate)
+               eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
+       return eb;
+fail:
+       free_extent_buffer(eb);
+       return NULL;
+}
+EXPORT_SYMBOL(alloc_extent_buffer);
+
+struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
+                                        u64 start, unsigned long len,
+                                         gfp_t mask)
+{
+       unsigned long num_pages = num_extent_pages(start, len);
+       unsigned long i;
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       struct extent_buffer *eb;
+       struct page *p;
+       struct address_space *mapping = tree->mapping;
+       int uptodate = 1;
+
+       eb = __alloc_extent_buffer(tree, start, len, mask);
+       if (!eb || IS_ERR(eb))
+               return NULL;
+
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
+
+       for (i = 0; i < num_pages; i++, index++) {
+               p = find_lock_page(mapping, index);
+               if (!p) {
+                       /* make sure the free only frees the pages we've
+                        * grabbed a reference on
+                        */
+                       eb->len = i << PAGE_CACHE_SHIFT;
+                       eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
+                       goto fail;
+               }
+               set_page_extent_mapped(p);
+               if (i == 0)
+                       eb->last_page = p;
+               if (!PageUptodate(p))
+                       uptodate = 0;
+               unlock_page(p);
+       }
+       if (uptodate)
+               eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
+       return eb;
+fail:
+       free_extent_buffer(eb);
+       return NULL;
+}
+EXPORT_SYMBOL(find_extent_buffer);
+
+void free_extent_buffer(struct extent_buffer *eb)
+{
+       unsigned long i;
+       unsigned long num_pages;
+
+       if (!eb)
+               return;
+
+       if (!atomic_dec_and_test(&eb->refs))
+               return;
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+
+       for (i = 0; i < num_pages; i++) {
+               page_cache_release(extent_buffer_page(eb, i));
+       }
+       __free_extent_buffer(eb);
+}
+EXPORT_SYMBOL(free_extent_buffer);
+
+int clear_extent_buffer_dirty(struct extent_map_tree *tree,
+                             struct extent_buffer *eb)
+{
+       int set;
+       unsigned long i;
+       unsigned long num_pages;
+       struct page *page;
+
+       u64 start = eb->start;
+       u64 end = start + eb->len - 1;
+
+       set = clear_extent_dirty(tree, start, end, GFP_NOFS);
+       num_pages = num_extent_pages(eb->start, eb->len);
+
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               lock_page(page);
+               /*
+                * if we're on the last page or the first page and the
+                * block isn't aligned on a page boundary, do extra checks
+                * to make sure we don't clean page that is partially dirty
+                */
+               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
+                   ((i == num_pages - 1) &&
+                    ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
+                       start = page->index << PAGE_CACHE_SHIFT;
+                       end  = start + PAGE_CACHE_SIZE - 1;
+                       if (test_range_bit(tree, start, end,
+                                          EXTENT_DIRTY, 0)) {
+                               unlock_page(page);
+                               continue;
+                       }
+               }
+               clear_page_dirty_for_io(page);
+               unlock_page(page);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(clear_extent_buffer_dirty);
+
+int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
+                                   struct extent_buffer *eb)
+{
+       return wait_on_extent_writeback(tree, eb->start,
+                                       eb->start + eb->len - 1);
+}
+EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
+
+int set_extent_buffer_dirty(struct extent_map_tree *tree,
+                            struct extent_buffer *eb)
+{
+       return set_range_dirty(tree, eb->start, eb->start + eb->len - 1);
+}
+EXPORT_SYMBOL(set_extent_buffer_dirty);
+
+int set_extent_buffer_uptodate(struct extent_map_tree *tree,
+                               struct extent_buffer *eb)
+{
+       unsigned long i;
+       struct page *page;
+       unsigned long num_pages;
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+
+       set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                           GFP_NOFS);
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
+                   ((i == num_pages - 1) &&
+                    ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
+                       check_page_uptodate(tree, page);
+                       continue;
+               }
+               SetPageUptodate(page);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(set_extent_buffer_uptodate);
+
+int extent_buffer_uptodate(struct extent_map_tree *tree,
+                            struct extent_buffer *eb)
+{
+       if (eb->flags & EXTENT_UPTODATE)
+               return 1;
+       return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                          EXTENT_UPTODATE, 1);
+}
+EXPORT_SYMBOL(extent_buffer_uptodate);
+
+int read_extent_buffer_pages(struct extent_map_tree *tree,
+                            struct extent_buffer *eb, int wait)
+{
+       unsigned long i;
+       struct page *page;
+       int err;
+       int ret = 0;
+       unsigned long num_pages;
+
+       if (eb->flags & EXTENT_UPTODATE)
+               return 0;
+
+       if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                          EXTENT_UPTODATE, 1)) {
+               return 0;
+       }
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               if (PageUptodate(page)) {
+                       continue;
+               }
+               if (!wait) {
+                       if (TestSetPageLocked(page)) {
+                               continue;
+                       }
+               } else {
+                       lock_page(page);
+               }
+               if (!PageUptodate(page)) {
+                       err = page->mapping->a_ops->readpage(NULL, page);
+                       if (err) {
+                               ret = err;
+                       }
+               } else {
+                       unlock_page(page);
+               }
+       }
+
+       if (ret || !wait) {
+               return ret;
+       }
+
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               wait_on_page_locked(page);
+               if (!PageUptodate(page)) {
+                       ret = -EIO;
+               }
+       }
+       if (!ret)
+               eb->flags |= EXTENT_UPTODATE;
+       return ret;
+}
+EXPORT_SYMBOL(read_extent_buffer_pages);
+
+void read_extent_buffer(struct extent_buffer *eb, void *dstv,
+                       unsigned long start,
+                       unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       char *dst = (char *)dstv;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       unsigned long num_pages = num_extent_pages(eb->start, eb->len);
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       if (i == 0)
+               offset += start_offset;
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               if (!PageUptodate(page)) {
+                       printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
+                       WARN_ON(1);
+               }
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, (PAGE_CACHE_SIZE - offset));
+               kaddr = kmap_atomic(page, KM_USER0);
+               memcpy(dst, kaddr + offset, cur);
+               kunmap_atomic(kaddr, KM_USER0);
+
+               dst += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(read_extent_buffer);
+
+static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start,
+                              unsigned long min_len, char **token, char **map,
+                              unsigned long *map_start,
+                              unsigned long *map_len, int km)
+{
+       size_t offset = start & (PAGE_CACHE_SIZE - 1);
+       char *kaddr;
+       struct page *p;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       unsigned long end_i = (start_offset + start + min_len) >>
+                               PAGE_CACHE_SHIFT;
+
+       if (i != end_i)
+               return -EINVAL;
+
+       if (start >= eb->len) {
+               printk("bad start in map eb start %Lu len %lu caller start %lu min %lu\n", eb->start, eb->len, start, min_len);
+               WARN_ON(1);
+       }
+
+       if (i == 0) {
+               offset = start_offset;
+               *map_start = 0;
+       } else {
+               offset = 0;
+               *map_start = (i << PAGE_CACHE_SHIFT) - start_offset;
+       }
+
+       p = extent_buffer_page(eb, i);
+       WARN_ON(!PageUptodate(p));
+       kaddr = kmap_atomic(p, km);
+       *token = kaddr;
+       *map = kaddr + offset;
+       *map_len = PAGE_CACHE_SIZE - offset;
+       return 0;
+}
+
+int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
+                     unsigned long min_len,
+                     char **token, char **map,
+                     unsigned long *map_start,
+                     unsigned long *map_len, int km)
+{
+       int err;
+       int save = 0;
+       if (eb->map_token) {
+               if (start >= eb->map_start &&
+                   start + min_len <= eb->map_start + eb->map_len) {
+                       *token = eb->map_token;
+                       *map = eb->kaddr;
+                       *map_start = eb->map_start;
+                       *map_len = eb->map_len;
+                       return 0;
+               }
+               unmap_extent_buffer(eb, eb->map_token, km);
+               eb->map_token = NULL;
+               save = 1;
+       }
+       err = __map_extent_buffer(eb, start, min_len, token, map,
+                                  map_start, map_len, km);
+       if (!err && save) {
+               eb->map_token = *token;
+               eb->kaddr = *map;
+               eb->map_start = *map_start;
+               eb->map_len = *map_len;
+       }
+       return err;
+}
+EXPORT_SYMBOL(map_extent_buffer);
+
+void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
+{
+       kunmap_atomic(token, km);
+}
+EXPORT_SYMBOL(unmap_extent_buffer);
+
+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+                         unsigned long start,
+                         unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       char *ptr = (char *)ptrv;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       int ret = 0;
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       if (i == 0)
+               offset += start_offset;
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, (PAGE_CACHE_SIZE - offset));
+
+               kaddr = kmap_atomic(page, KM_USER0);
+               ret = memcmp(ptr, kaddr + offset, cur);
+               kunmap_atomic(kaddr, KM_USER0);
+               if (ret)
+                       break;
+
+               ptr += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+       return ret;
+}
+EXPORT_SYMBOL(memcmp_extent_buffer);
+
+void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
+                        unsigned long start, unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       char *src = (char *)srcv;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       if (i == 0)
+               offset += start_offset;
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, PAGE_CACHE_SIZE - offset);
+               kaddr = kmap_atomic(page, KM_USER0);
+               memcpy(kaddr + offset, src, cur);
+               kunmap_atomic(kaddr, KM_USER0);
+
+               src += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(write_extent_buffer);
+
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+                         unsigned long start, unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       if (i == 0)
+               offset += start_offset;
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, PAGE_CACHE_SIZE - offset);
+               kaddr = kmap_atomic(page, KM_USER0);
+               memset(kaddr + offset, c, cur);
+               kunmap_atomic(kaddr, KM_USER0);
+
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(memset_extent_buffer);
+
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+                       unsigned long dst_offset, unsigned long src_offset,
+                       unsigned long len)
+{
+       u64 dst_len = dst->len;
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+
+       WARN_ON(src->len != dst_len);
+
+       offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       if (i == 0)
+               offset += start_offset;
+
+       while(len > 0) {
+               page = extent_buffer_page(dst, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
+
+               kaddr = kmap_atomic(page, KM_USER1);
+               read_extent_buffer(src, kaddr + offset, src_offset, cur);
+               kunmap_atomic(kaddr, KM_USER1);
+
+               src_offset += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(copy_extent_buffer);
+
+static void move_pages(struct page *dst_page, struct page *src_page,
+                      unsigned long dst_off, unsigned long src_off,
+                      unsigned long len)
+{
+       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       if (dst_page == src_page) {
+               memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
+       } else {
+               char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+               char *p = dst_kaddr + dst_off + len;
+               char *s = src_kaddr + src_off + len;
+
+               while (len--)
+                       *--p = *--s;
+
+               kunmap_atomic(src_kaddr, KM_USER1);
+       }
+       kunmap_atomic(dst_kaddr, KM_USER0);
+}
+
+static void copy_pages(struct page *dst_page, struct page *src_page,
+                      unsigned long dst_off, unsigned long src_off,
+                      unsigned long len)
+{
+       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *src_kaddr;
+
+       if (dst_page != src_page)
+               src_kaddr = kmap_atomic(src_page, KM_USER1);
+       else
+               src_kaddr = dst_kaddr;
+
+       memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
+       kunmap_atomic(dst_kaddr, KM_USER0);
+       if (dst_page != src_page)
+               kunmap_atomic(src_kaddr, KM_USER1);
+}
+
+void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len)
+{
+       size_t cur;
+       size_t dst_off_in_page;
+       size_t src_off_in_page;
+       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long dst_i;
+       unsigned long src_i;
+
+       if (src_offset + len > dst->len) {
+               printk("memmove bogus src_offset %lu move len %lu len %lu\n",
+                      src_offset, len, dst->len);
+               BUG_ON(1);
+       }
+       if (dst_offset + len > dst->len) {
+               printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
+                      dst_offset, len, dst->len);
+               BUG_ON(1);
+       }
+
+       while(len > 0) {
+               dst_off_in_page = dst_offset &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+               src_off_in_page = src_offset &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+               dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+               src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
+
+               if (src_i == 0)
+                       src_off_in_page += start_offset;
+               if (dst_i == 0)
+                       dst_off_in_page += start_offset;
+
+               cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
+                                              src_off_in_page));
+               cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE -
+                                              dst_off_in_page));
+
+               copy_pages(extent_buffer_page(dst, dst_i),
+                          extent_buffer_page(dst, src_i),
+                          dst_off_in_page, src_off_in_page, cur);
+
+               src_offset += cur;
+               dst_offset += cur;
+               len -= cur;
+       }
+}
+EXPORT_SYMBOL(memcpy_extent_buffer);
+
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len)
+{
+       size_t cur;
+       size_t dst_off_in_page;
+       size_t src_off_in_page;
+       unsigned long dst_end = dst_offset + len - 1;
+       unsigned long src_end = src_offset + len - 1;
+       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long dst_i;
+       unsigned long src_i;
+
+       if (src_offset + len > dst->len) {
+               printk("memmove bogus src_offset %lu move len %lu len %lu\n",
+                      src_offset, len, dst->len);
+               BUG_ON(1);
+       }
+       if (dst_offset + len > dst->len) {
+               printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
+                      dst_offset, len, dst->len);
+               BUG_ON(1);
+       }
+       if (dst_offset < src_offset) {
+               memcpy_extent_buffer(dst, dst_offset, src_offset, len);
+               return;
+       }
+       while(len > 0) {
+               dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
+               src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
+
+               dst_off_in_page = dst_end &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+               src_off_in_page = src_end &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+               if (src_i == 0)
+                       src_off_in_page += start_offset;
+               if (dst_i == 0)
+                       dst_off_in_page += start_offset;
+
+               cur = min(len, src_off_in_page + 1);
+               cur = min(cur, dst_off_in_page + 1);
+               move_pages(extent_buffer_page(dst, dst_i),
+                          extent_buffer_page(dst, src_i),
+                          dst_off_in_page - cur + 1,
+                          src_off_in_page - cur + 1, cur);
+
+               dst_end -= cur;
+               src_end -= cur;
+               len -= cur;
+       }
+}
+EXPORT_SYMBOL(memmove_extent_buffer);