From 6af118ce51b52ceda357c671550c79628b9c4a65 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:07 -0400 Subject: [PATCH] Btrfs: Index extent buffers in an rbtree Before, extent buffers were a temporary object, meant to map a number of pages at once and collect operations on them. But, a few extra fields have crept in, and they are also the best place to store a per-tree block lock field as well. This commit puts the extent buffers into an rbtree, and ensures a single extent buffer for each tree block. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 ++-- fs/btrfs/extent_io.c | 309 ++++++++++++++++--------------------------- fs/btrfs/extent_io.h | 11 +- fs/btrfs/inode.c | 3 - 4 files changed, 129 insertions(+), 220 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 99bb385c2982..86e84a8579e3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -381,7 +381,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; - release_extent_buffer_tail_pages(eb); err: free_extent_buffer(eb); out: @@ -563,21 +562,21 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) struct extent_map_tree *map; int ret; - if (page_count(page) > 3) { - /* once for page->private, once for the caller, once - * once for the page cache - */ - return 0; - } tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_state(map, tree, page, gfp_flags); + if (!ret) { + return 0; + } + + ret = try_release_extent_buffer(tree, page); if (ret == 1) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); } + return ret; } @@ -588,7 +587,8 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); + printk("warning page private not zero on page %Lu\n", + page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -1456,7 +1456,6 @@ fail_tree_root: free_extent_buffer(tree_root->node); fail_sys_array: fail_sb_buffer: - extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); @@ -1705,13 +1704,6 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_io_tree_empty_lru(&fs_info->free_space_cache); - extent_io_tree_empty_lru(&fs_info->block_group_cache); - extent_io_tree_empty_lru(&fs_info->pinned_extents); - extent_io_tree_empty_lru(&fs_info->pending_del); - extent_io_tree_empty_lru(&fs_info->extent_ins); - extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); btrfs_stop_workers(&fs_info->fixup_workers); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d4a63ae7ed1b..32bb4ed3723d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -91,29 +91,16 @@ void extent_io_tree_init(struct extent_io_tree *tree, struct address_space *mapping, gfp_t mask) { tree->state.rb_node = NULL; + tree->buffer.rb_node = NULL; tree->ops = NULL; tree->dirty_bytes = 0; spin_lock_init(&tree->lock); - spin_lock_init(&tree->lru_lock); + spin_lock_init(&tree->buffer_lock); tree->mapping = mapping; - INIT_LIST_HEAD(&tree->buffer_lru); - tree->lru_size = 0; tree->last = NULL; } EXPORT_SYMBOL(extent_io_tree_init); -void extent_io_tree_empty_lru(struct extent_io_tree *tree) -{ - struct extent_buffer *eb; - while(!list_empty(&tree->buffer_lru)) { - eb = list_entry(tree->buffer_lru.next, struct extent_buffer, - lru); - list_del_init(&eb->lru); - free_extent_buffer(eb); - } -} -EXPORT_SYMBOL(extent_io_tree_empty_lru); - struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; @@ -245,6 +232,50 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, return ret; } +static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, + u64 offset, struct rb_node *node) +{ + struct rb_root *root = &tree->buffer; + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct extent_buffer *eb; + + while(*p) { + parent = *p; + eb = rb_entry(parent, struct extent_buffer, rb_node); + + if (offset < eb->start) + p = &(*p)->rb_left; + else if (offset > eb->start) + p = &(*p)->rb_right; + else + return eb; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct extent_buffer *buffer_search(struct extent_io_tree *tree, + u64 offset) +{ + struct rb_root *root = &tree->buffer; + struct rb_node * n = root->rb_node; + struct extent_buffer *eb; + + while(n) { + eb = rb_entry(n, struct extent_buffer, rb_node); + if (offset < eb->start) + n = n->rb_left; + else if (offset > eb->start) + n = n->rb_right; + else + return eb; + } + return NULL; +} + /* * utility function to look for merge candidates inside a given range. * Any extents with matching state are merged together into a single @@ -1817,9 +1848,8 @@ void set_page_extent_mapped(struct page *page) { if (!PagePrivate(page)) { SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, EXTENT_PAGE_PRIVATE); page_cache_get(page); + set_page_private(page, EXTENT_PAGE_PRIVATE); } } @@ -2627,51 +2657,6 @@ out: return sector; } -static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb) -{ - if (list_empty(&eb->lru)) { - extent_buffer_get(eb); - list_add(&eb->lru, &tree->buffer_lru); - tree->lru_size++; - if (tree->lru_size >= BUFFER_LRU_MAX) { - struct extent_buffer *rm; - rm = list_entry(tree->buffer_lru.prev, - struct extent_buffer, lru); - tree->lru_size--; - list_del_init(&rm->lru); - free_extent_buffer(rm); - } - } else - list_move(&eb->lru, &tree->buffer_lru); - return 0; -} -static struct extent_buffer *find_lru(struct extent_io_tree *tree, - u64 start, unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - - if (list_empty(lru)) - return NULL; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start == start && eb->len == len) { - extent_buffer_get(eb); - return eb; - } - cur = cur->next; - } while (cur != lru); - return NULL; -} - -static inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - static inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { @@ -2688,44 +2673,10 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, return p; } -int release_extent_buffer_tail_pages(struct extent_buffer *eb) -{ - unsigned long num_pages = num_extent_pages(eb->start, eb->len); - struct page *page; - unsigned long i; - - if (num_pages == 1) - return 0; - for (i = 1; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - page_cache_release(page); - } - return 0; -} - - -int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, - unsigned long len) +static inline unsigned long num_extent_pages(u64 start, u64 len) { - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - int found = 0; - - spin_lock(&tree->lru_lock); - if (list_empty(lru)) - goto out; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start <= start && eb->start + eb->len > start) { - eb->flags &= ~EXTENT_UPTODATE; - } - cur = cur->next; - } while (cur != lru); -out: - spin_unlock(&tree->lru_lock); - return found; + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); } static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, @@ -2736,15 +2687,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *eb = NULL; unsigned long flags; - spin_lock(&tree->lru_lock); - eb = find_lru(tree, start, len); - spin_unlock(&tree->lru_lock); - if (eb) { - return eb; - } - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - INIT_LIST_HEAD(&eb->lru); eb->start = start; eb->len = len; spin_lock_irqsave(&leak_lock, flags); @@ -2773,17 +2716,24 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; + struct extent_buffer *exists = NULL; struct page *p; struct address_space *mapping = tree->mapping; int uptodate = 1; + spin_lock(&tree->buffer_lock); + eb = buffer_search(tree, start); + if (eb) { + atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); + return eb; + } + spin_unlock(&tree->buffer_lock); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb) return NULL; - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - if (page0) { eb->first_page = page0; i = 1; @@ -2800,7 +2750,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { WARN_ON(1); - goto fail; + goto free_eb; } set_page_extent_mapped(p); mark_page_accessed(p); @@ -2818,25 +2768,28 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, eb->flags |= EXTENT_UPTODATE; eb->flags |= EXTENT_BUFFER_FILLED; -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); + spin_lock(&tree->buffer_lock); + exists = buffer_tree_insert(tree, start, &eb->rb_node); + if (exists) { + /* add one reference for the caller */ + atomic_inc(&exists->refs); + spin_unlock(&tree->buffer_lock); + goto free_eb; + } + spin_unlock(&tree->buffer_lock); + + /* add one reference for the tree */ + atomic_inc(&eb->refs); return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); +free_eb: if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { + return exists; + for (index = 1; index < i; index++) page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); - return NULL; + return exists; } EXPORT_SYMBOL(alloc_extent_buffer); @@ -2844,89 +2797,27 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - for (i = 0; i < num_pages; i++, index++) { - p = find_get_page(mapping, index); - if (!p) { - goto fail; - } - if (TestSetPageLocked(p)) { - page_cache_release(p); - goto fail; - } - - set_page_extent_mapped(p); - mark_page_accessed(p); - - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; + spin_lock(&tree->buffer_lock); + eb = buffer_search(tree, start); + if (eb) + atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; } EXPORT_SYMBOL(find_extent_buffer); void free_extent_buffer(struct extent_buffer *eb) { - unsigned long i; - unsigned long num_pages; - if (!eb) return; if (!atomic_dec_and_test(&eb->refs)) return; - WARN_ON(!list_empty(&eb->lru)); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 1; i < num_pages; i++) { - page_cache_release(extent_buffer_page(eb, i)); - } - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); + WARN_ON(1); } EXPORT_SYMBOL(free_extent_buffer); @@ -3583,3 +3474,35 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, } } EXPORT_SYMBOL(memmove_extent_buffer); + +int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) +{ + u64 start = page_offset(page); + struct extent_buffer *eb; + int ret = 1; + unsigned long i; + unsigned long num_pages; + + spin_lock(&tree->buffer_lock); + eb = buffer_search(tree, start); + if (!eb) + goto out; + + if (atomic_read(&eb->refs) > 1) { + ret = 0; + goto out; + } + /* at this point we can safely release the extent buffer */ + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + page_cache_release(page); + } + rb_erase(&eb->rb_node, &tree->buffer); + __free_extent_buffer(eb); +out: + spin_unlock(&tree->buffer_lock); + return ret; +} +EXPORT_SYMBOL(try_release_extent_buffer); + diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 23affd27af5e..dd367617d780 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -54,13 +54,12 @@ struct extent_io_ops { struct extent_io_tree { struct rb_root state; + struct rb_root buffer; struct address_space *mapping; u64 dirty_bytes; spinlock_t lock; + spinlock_t buffer_lock; struct extent_io_ops *ops; - spinlock_t lru_lock; - struct list_head buffer_lru; - int lru_size; struct extent_state *last; }; @@ -87,10 +86,10 @@ struct extent_buffer { unsigned long map_start; unsigned long map_len; struct page *first_page; - struct list_head lru; atomic_t refs; int flags; struct list_head leak_list; + struct rb_node rb_node; }; struct extent_map_tree; @@ -112,10 +111,10 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_io_tree_init(struct extent_io_tree *tree, struct address_space *mapping, gfp_t mask); -void extent_io_tree_empty_lru(struct extent_io_tree *tree); int try_release_extent_mapping(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); +int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page); int try_release_extent_state(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); @@ -241,8 +240,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_start, unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); -int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, - unsigned long len); int release_extent_buffer_tail_pages(struct extent_buffer *eb); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8fb6dc25e7a5..60852ada658e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2670,7 +2670,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2721,8 +2720,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) ClearPageChecked(page); if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), - PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); -- 2.39.5