]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/extent_map.c
Btrfs: Add an extent buffer LRU to reduce radix tree hits
[karo-tx-linux.git] / fs / btrfs / extent_map.c
index f658703c42e6de1a63ce881549583ab9851b1a35..85b28a6a4e05d601cfafb3a257c01be897175c32 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/swap.h>
 #include "extent_map.h"
 
 /* temporary define until extent_map moves out of btrfs */
@@ -20,14 +21,11 @@ static struct kmem_cache *extent_map_cache;
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
 
-static LIST_HEAD(extent_buffers);
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
-static spinlock_t extent_buffers_lock;
 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
-static int nr_extent_buffers;
-#define MAX_EXTENT_BUFFER_CACHE 128
+#define BUFFER_LRU_MAX 64
 
 struct tree_entry {
        u64 start;
@@ -47,20 +45,12 @@ void __init extent_map_init(void)
        extent_buffer_cache = btrfs_cache_create("extent_buffers",
                                            sizeof(struct extent_buffer), 0,
                                            NULL);
-       spin_lock_init(&extent_buffers_lock);
 }
 
 void __exit extent_map_exit(void)
 {
-       struct extent_buffer *eb;
        struct extent_state *state;
 
-       while (!list_empty(&extent_buffers)) {
-               eb = list_entry(extent_buffers.next,
-                               struct extent_buffer, list);
-               list_del(&eb->list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
        while (!list_empty(&states)) {
                state = list_entry(states.next, struct extent_state, list);
                printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
@@ -68,14 +58,6 @@ void __exit extent_map_exit(void)
                kmem_cache_free(extent_state_cache, state);
 
        }
-       while (!list_empty(&buffers)) {
-               eb = list_entry(buffers.next,
-                               struct extent_buffer, leak_list);
-               printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr);
-               list_del(&eb->leak_list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
-
 
        if (extent_map_cache)
                kmem_cache_destroy(extent_map_cache);
@@ -92,10 +74,25 @@ void extent_map_tree_init(struct extent_map_tree *tree,
        tree->state.rb_node = NULL;
        tree->ops = NULL;
        rwlock_init(&tree->lock);
+       spin_lock_init(&tree->lru_lock);
        tree->mapping = mapping;
+       INIT_LIST_HEAD(&tree->buffer_lru);
+       tree->lru_size = 0;
 }
 EXPORT_SYMBOL(extent_map_tree_init);
 
+void extent_map_tree_cleanup(struct extent_map_tree *tree)
+{
+       struct extent_buffer *eb;
+       while(!list_empty(&tree->buffer_lru)) {
+               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
+                               lru);
+               list_del(&eb->lru);
+               free_extent_buffer(eb);
+       }
+}
+EXPORT_SYMBOL(extent_map_tree_cleanup);
+
 struct extent_map *alloc_extent_map(gfp_t mask)
 {
        struct extent_map *em;
@@ -981,7 +978,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
        struct extent_state *state;
        int ret = 1;
 
-       write_lock_irq(&tree->lock);
+       read_lock_irq(&tree->lock);
        /*
         * this search will find all the extents that end after
         * our range starts.
@@ -993,7 +990,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
 
        while(1) {
                state = rb_entry(node, struct extent_state, rb_node);
-               if (state->state & bits) {
+               if (state->end >= start && (state->state & bits)) {
                        *start_ret = state->start;
                        *end_ret = state->end;
                        ret = 0;
@@ -1004,7 +1001,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
                        break;
        }
 out:
-       write_unlock_irq(&tree->lock);
+       read_unlock_irq(&tree->lock);
        return ret;
 }
 EXPORT_SYMBOL(find_first_extent_bit);
@@ -1915,70 +1912,99 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
        return (em->block_start + start - em->start) >> inode->i_blkbits;
 }
 
-static struct extent_buffer *__alloc_extent_buffer(gfp_t mask)
+static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
 {
-       struct extent_buffer *eb = NULL;
-
-       spin_lock(&extent_buffers_lock);
-       if (!list_empty(&extent_buffers)) {
-               eb = list_entry(extent_buffers.next, struct extent_buffer,
-                               list);
-               list_del(&eb->list);
-               WARN_ON(nr_extent_buffers == 0);
-               nr_extent_buffers--;
-       }
-       spin_unlock(&extent_buffers_lock);
+       if (list_empty(&eb->lru)) {
+               extent_buffer_get(eb);
+               list_add(&eb->lru, &tree->buffer_lru);
+               tree->lru_size++;
+               if (tree->lru_size >= BUFFER_LRU_MAX) {
+                       struct extent_buffer *rm;
+                       rm = list_entry(tree->buffer_lru.prev,
+                                       struct extent_buffer, lru);
+                       tree->lru_size--;
+                       list_del(&rm->lru);
+                       free_extent_buffer(rm);
+               }
+       } else
+               list_move(&eb->lru, &tree->buffer_lru);
+       return 0;
+}
+static struct extent_buffer *find_lru(struct extent_map_tree *tree,
+                                     u64 start, unsigned long len)
+{
+       struct list_head *lru = &tree->buffer_lru;
+       struct list_head *cur = lru->next;
+       struct extent_buffer *eb;
 
-       if (eb) {
-               memset(eb, 0, sizeof(*eb));
-       } else {
-               eb = kmem_cache_zalloc(extent_buffer_cache, mask);
-       }
-       spin_lock(&extent_buffers_lock);
-       list_add(&eb->leak_list, &buffers);
-       spin_unlock(&extent_buffers_lock);
+       if (list_empty(lru))
+               return NULL;
 
-       return eb;
+       do {
+               eb = list_entry(cur, struct extent_buffer, lru);
+               if (eb->start == start && eb->len == len) {
+                       extent_buffer_get(eb);
+                       return eb;
+               }
+               cur = cur->next;
+       } while (cur != lru);
+       return NULL;
 }
 
-static void __free_extent_buffer(struct extent_buffer *eb)
+static inline unsigned long num_extent_pages(u64 start, u64 len)
 {
-
-       spin_lock(&extent_buffers_lock);
-       list_del_init(&eb->leak_list);
-       spin_unlock(&extent_buffers_lock);
-
-       if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) {
-               kmem_cache_free(extent_buffer_cache, eb);
-       } else {
-               spin_lock(&extent_buffers_lock);
-               list_add(&eb->list, &extent_buffers);
-               nr_extent_buffers++;
-               spin_unlock(&extent_buffers_lock);
-       }
+       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+               (start >> PAGE_CACHE_SHIFT);
 }
 
-static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i)
+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
+                                             unsigned long i)
 {
        struct page *p;
-       if (i == 0)
-               return eb->first_page;
 
-       i += eb->start >> PAGE_CACHE_SHIFT;
-       if (eb->last_page && eb->last_page->index == i)
+       if (i == 0)
                return eb->last_page;
-
-       p = find_get_page(eb->first_page->mapping, i);
+       i += eb->start >> PAGE_CACHE_SHIFT;
+       p = find_get_page(eb->last_page->mapping, i);
        page_cache_release(p);
-       eb->last_page = p;
        return p;
 }
 
-static inline unsigned long num_extent_pages(u64 start, u64 len)
+static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
+                                                  u64 start,
+                                                  unsigned long len,
+                                                  gfp_t mask)
 {
-       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-               (start >> PAGE_CACHE_SHIFT);
+       struct extent_buffer *eb = NULL;
+
+       spin_lock(&tree->lru_lock);
+       eb = find_lru(tree, start, len);
+       if (eb)
+               goto lru_add;
+       spin_unlock(&tree->lru_lock);
+
+       if (eb) {
+               memset(eb, 0, sizeof(*eb));
+       } else {
+               eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+       }
+       INIT_LIST_HEAD(&eb->lru);
+       eb->start = start;
+       eb->len = len;
+       atomic_set(&eb->refs, 1);
+
+       spin_lock(&tree->lru_lock);
+lru_add:
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
+       return eb;
 }
+
+static void __free_extent_buffer(struct extent_buffer *eb)
+{
+       kmem_cache_free(extent_buffer_cache, eb);
+}
+
 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                                          u64 start, unsigned long len,
                                          gfp_t mask)
@@ -1991,14 +2017,12 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
        struct address_space *mapping = tree->mapping;
        int uptodate = 0;
 
-       eb = __alloc_extent_buffer(mask);
+       eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb || IS_ERR(eb))
                return NULL;
 
-       eb->alloc_addr = (unsigned long)__builtin_return_address(0);
-       eb->start = start;
-       eb->len = len;
-       atomic_set(&eb->refs, 1);
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
 
        for (i = 0; i < num_pages; i++, index++) {
                p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
@@ -2013,13 +2037,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                }
                set_page_extent_mapped(p);
                if (i == 0)
-                       eb->first_page = p;
+                       eb->last_page = p;
                if (!PageUptodate(p))
                        uptodate = 0;
                unlock_page(p);
        }
        if (uptodate)
                eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
        return eb;
 fail:
        free_extent_buffer(eb);
@@ -2037,18 +2062,17 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
        struct extent_buffer *eb;
        struct page *p;
        struct address_space *mapping = tree->mapping;
+       int uptodate = 1;
 
-       eb = __alloc_extent_buffer(mask);
+       eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb || IS_ERR(eb))
                return NULL;
 
-       eb->alloc_addr = (unsigned long)__builtin_return_address(0);
-       eb->start = start;
-       eb->len = len;
-       atomic_set(&eb->refs, 1);
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
 
        for (i = 0; i < num_pages; i++, index++) {
-               p = find_get_page(mapping, index);
+               p = find_lock_page(mapping, index);
                if (!p) {
                        /* make sure the free only frees the pages we've
                         * grabbed a reference on
@@ -2059,8 +2083,14 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
                }
                set_page_extent_mapped(p);
                if (i == 0)
-                       eb->first_page = p;
+                       eb->last_page = p;
+               if (!PageUptodate(p))
+                       uptodate = 0;
+               unlock_page(p);
        }
+       if (uptodate)
+               eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
        return eb;
 fail:
        free_extent_buffer(eb);
@@ -2081,9 +2111,7 @@ void free_extent_buffer(struct extent_buffer *eb)
 
        num_pages = num_extent_pages(eb->start, eb->len);
 
-       if (eb->first_page)
-               page_cache_release(eb->first_page);
-       for (i = 1; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page_cache_release(extent_buffer_page(eb, i));
        }
        __free_extent_buffer(eb);
@@ -2192,7 +2220,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree,
        if (eb->flags & EXTENT_UPTODATE)
                return 0;
 
-       if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+       if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
                           EXTENT_UPTODATE, 1)) {
                return 0;
        }
@@ -2231,7 +2259,8 @@ int read_extent_buffer_pages(struct extent_map_tree *tree,
                        ret = -EIO;
                }
        }
-       eb->flags |= EXTENT_UPTODATE;
+       if (!ret)
+               eb->flags |= EXTENT_UPTODATE;
        return ret;
 }
 EXPORT_SYMBOL(read_extent_buffer_pages);
@@ -2247,6 +2276,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
        char *dst = (char *)dstv;
        size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
        unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       unsigned long num_pages = num_extent_pages(eb->start, eb->len);
 
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
@@ -2257,6 +2287,10 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
+               if (!PageUptodate(page)) {
+                       printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
+                       WARN_ON(1);
+               }
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));