]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/ext4/extents_status.c
ext4: ext4_split_extent should take care of extent zeroout
[karo-tx-linux.git] / fs / ext4 / extents_status.c
index 76f4351ea82183eaa8d420a2e4f9a9027eb12dab..95796a1b7522b7e02dd72dfab15fbf2cebdb6b13 100644 (file)
 
 static struct kmem_cache *ext4_es_cachep;
 
-static int __es_insert_extent(struct ext4_es_tree *tree,
-                             struct extent_status *newes);
-static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
+static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                              ext4_lblk_t end);
+static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
+                                      int nr_to_scan);
 
 int __init ext4_init_es(void)
 {
-       ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
+       ext4_es_cachep = kmem_cache_create("ext4_extent_status",
+                                          sizeof(struct extent_status),
+                                          0, (SLAB_RECLAIM_ACCOUNT), NULL);
        if (ext4_es_cachep == NULL)
                return -ENOMEM;
        return 0;
@@ -281,11 +284,13 @@ out:
 
        read_unlock(&EXT4_I(inode)->i_es_lock);
 
+       ext4_es_lru_add(inode);
        trace_ext4_es_find_delayed_extent_exit(inode, es);
 }
 
 static struct extent_status *
-ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
+ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
+                    ext4_fsblk_t pblk)
 {
        struct extent_status *es;
        es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
@@ -294,11 +299,27 @@ ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
        es->es_lblk = lblk;
        es->es_len = len;
        es->es_pblk = pblk;
+
+       /*
+        * We don't count delayed extent because we never try to reclaim them
+        */
+       if (!ext4_es_is_delayed(es)) {
+               EXT4_I(inode)->i_es_lru_nr++;
+               percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+       }
+
        return es;
 }
 
-static void ext4_es_free_extent(struct extent_status *es)
+static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 {
+       /* Decrease the lru counter when this es is not delayed */
+       if (!ext4_es_is_delayed(es)) {
+               BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
+               EXT4_I(inode)->i_es_lru_nr--;
+               percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+       }
+
        kmem_cache_free(ext4_es_cachep, es);
 }
 
@@ -326,8 +347,9 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
 }
 
 static struct extent_status *
-ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
+ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
 {
+       struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct extent_status *es1;
        struct rb_node *node;
 
@@ -339,7 +361,7 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
        if (ext4_es_can_be_merged(es1, es)) {
                es1->es_len += es->es_len;
                rb_erase(&es->rb_node, &tree->root);
-               ext4_es_free_extent(es);
+               ext4_es_free_extent(inode, es);
                es = es1;
        }
 
@@ -347,8 +369,9 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
 }
 
 static struct extent_status *
-ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
+ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
 {
+       struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct extent_status *es1;
        struct rb_node *node;
 
@@ -360,15 +383,15 @@ ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
        if (ext4_es_can_be_merged(es, es1)) {
                es->es_len += es1->es_len;
                rb_erase(node, &tree->root);
-               ext4_es_free_extent(es1);
+               ext4_es_free_extent(inode, es1);
        }
 
        return es;
 }
 
-static int __es_insert_extent(struct ext4_es_tree *tree,
-                             struct extent_status *newes)
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
 {
+       struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct rb_node **p = &tree->root.rb_node;
        struct rb_node *parent = NULL;
        struct extent_status *es;
@@ -389,14 +412,14 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
                                    ext4_es_is_unwritten(es))
                                        ext4_es_store_pblock(es,
                                                             newes->es_pblk);
-                               es = ext4_es_try_to_merge_left(tree, es);
+                               es = ext4_es_try_to_merge_left(inode, es);
                                goto out;
                        }
                        p = &(*p)->rb_left;
                } else if (newes->es_lblk > ext4_es_end(es)) {
                        if (ext4_es_can_be_merged(es, newes)) {
                                es->es_len += newes->es_len;
-                               es = ext4_es_try_to_merge_right(tree, es);
+                               es = ext4_es_try_to_merge_right(inode, es);
                                goto out;
                        }
                        p = &(*p)->rb_right;
@@ -406,7 +429,7 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
                }
        }
 
-       es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len,
+       es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
                                  newes->es_pblk);
        if (!es)
                return -ENOMEM;
@@ -430,7 +453,6 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
                          ext4_lblk_t len, ext4_fsblk_t pblk,
                          unsigned long long status)
 {
-       struct ext4_es_tree *tree;
        struct extent_status newes;
        ext4_lblk_t end = lblk + len - 1;
        int err = 0;
@@ -438,6 +460,9 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
        es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
                 lblk, len, pblk, status, inode->i_ino);
 
+       if (!len)
+               return 0;
+
        BUG_ON(end < lblk);
 
        newes.es_lblk = lblk;
@@ -447,23 +472,85 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
        trace_ext4_es_insert_extent(inode, &newes);
 
        write_lock(&EXT4_I(inode)->i_es_lock);
-       tree = &EXT4_I(inode)->i_es_tree;
-       err = __es_remove_extent(tree, lblk, end);
+       err = __es_remove_extent(inode, lblk, end);
        if (err != 0)
                goto error;
-       err = __es_insert_extent(tree, &newes);
+       err = __es_insert_extent(inode, &newes);
 
 error:
        write_unlock(&EXT4_I(inode)->i_es_lock);
 
+       ext4_es_lru_add(inode);
        ext4_es_print_tree(inode);
 
        return err;
 }
 
-static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
-                                ext4_lblk_t end)
+/*
+ * ext4_es_lookup_extent() looks up an extent in extent status tree.
+ *
+ * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
+ *
+ * Return: 1 on found, 0 on not
+ */
+int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
+                         struct extent_status *es)
+{
+       struct ext4_es_tree *tree;
+       struct extent_status *es1 = NULL;
+       struct rb_node *node;
+       int found = 0;
+
+       trace_ext4_es_lookup_extent_enter(inode, lblk);
+       es_debug("lookup extent in block %u\n", lblk);
+
+       tree = &EXT4_I(inode)->i_es_tree;
+       read_lock(&EXT4_I(inode)->i_es_lock);
+
+       /* find extent in cache firstly */
+       es->es_lblk = es->es_len = es->es_pblk = 0;
+       if (tree->cache_es) {
+               es1 = tree->cache_es;
+               if (in_range(lblk, es1->es_lblk, es1->es_len)) {
+                       es_debug("%u cached by [%u/%u)\n",
+                                lblk, es1->es_lblk, es1->es_len);
+                       found = 1;
+                       goto out;
+               }
+       }
+
+       node = tree->root.rb_node;
+       while (node) {
+               es1 = rb_entry(node, struct extent_status, rb_node);
+               if (lblk < es1->es_lblk)
+                       node = node->rb_left;
+               else if (lblk > ext4_es_end(es1))
+                       node = node->rb_right;
+               else {
+                       found = 1;
+                       break;
+               }
+       }
+
+out:
+       if (found) {
+               BUG_ON(!es1);
+               es->es_lblk = es1->es_lblk;
+               es->es_len = es1->es_len;
+               es->es_pblk = es1->es_pblk;
+       }
+
+       read_unlock(&EXT4_I(inode)->i_es_lock);
+
+       ext4_es_lru_add(inode);
+       trace_ext4_es_lookup_extent_exit(inode, es, found);
+       return found;
+}
+
+static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
+                             ext4_lblk_t end)
 {
+       struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct rb_node *node;
        struct extent_status *es;
        struct extent_status orig_es;
@@ -501,7 +588,7 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
                                ext4_es_store_pblock(&newes, block);
                        }
                        ext4_es_store_status(&newes, ext4_es_status(&orig_es));
-                       err = __es_insert_extent(tree, &newes);
+                       err = __es_insert_extent(inode, &newes);
                        if (err) {
                                es->es_lblk = orig_es.es_lblk;
                                es->es_len = orig_es.es_len;
@@ -530,7 +617,7 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
        while (es && ext4_es_end(es) <= end) {
                node = rb_next(&es->rb_node);
                rb_erase(&es->rb_node, &tree->root);
-               ext4_es_free_extent(es);
+               ext4_es_free_extent(inode, es);
                if (!node) {
                        es = NULL;
                        break;
@@ -562,7 +649,6 @@ out:
 int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                          ext4_lblk_t len)
 {
-       struct ext4_es_tree *tree;
        ext4_lblk_t end;
        int err = 0;
 
@@ -570,14 +656,135 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
        es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
                 lblk, len, inode->i_ino);
 
+       if (!len)
+               return err;
+
        end = lblk + len - 1;
        BUG_ON(end < lblk);
 
-       tree = &EXT4_I(inode)->i_es_tree;
-
        write_lock(&EXT4_I(inode)->i_es_lock);
-       err = __es_remove_extent(tree, lblk, end);
+       err = __es_remove_extent(inode, lblk, end);
        write_unlock(&EXT4_I(inode)->i_es_lock);
        ext4_es_print_tree(inode);
        return err;
 }
+
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+       struct ext4_sb_info *sbi = container_of(shrink,
+                                       struct ext4_sb_info, s_es_shrinker);
+       struct ext4_inode_info *ei;
+       struct list_head *cur, *tmp, scanned;
+       int nr_to_scan = sc->nr_to_scan;
+       int ret, nr_shrunk = 0;
+
+       ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+       trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+
+       if (!nr_to_scan)
+               return ret;
+
+       INIT_LIST_HEAD(&scanned);
+
+       spin_lock(&sbi->s_es_lru_lock);
+       list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
+               list_move_tail(cur, &scanned);
+
+               ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
+
+               read_lock(&ei->i_es_lock);
+               if (ei->i_es_lru_nr == 0) {
+                       read_unlock(&ei->i_es_lock);
+                       continue;
+               }
+               read_unlock(&ei->i_es_lock);
+
+               write_lock(&ei->i_es_lock);
+               ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
+               write_unlock(&ei->i_es_lock);
+
+               nr_shrunk += ret;
+               nr_to_scan -= ret;
+               if (nr_to_scan == 0)
+                       break;
+       }
+       list_splice_tail(&scanned, &sbi->s_es_lru);
+       spin_unlock(&sbi->s_es_lru_lock);
+
+       ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+       trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
+       return ret;
+}
+
+void ext4_es_register_shrinker(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi;
+
+       sbi = EXT4_SB(sb);
+       INIT_LIST_HEAD(&sbi->s_es_lru);
+       spin_lock_init(&sbi->s_es_lru_lock);
+       sbi->s_es_shrinker.shrink = ext4_es_shrink;
+       sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
+       register_shrinker(&sbi->s_es_shrinker);
+}
+
+void ext4_es_unregister_shrinker(struct super_block *sb)
+{
+       unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
+}
+
+void ext4_es_lru_add(struct inode *inode)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+       spin_lock(&sbi->s_es_lru_lock);
+       if (list_empty(&ei->i_es_lru))
+               list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
+       else
+               list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
+       spin_unlock(&sbi->s_es_lru_lock);
+}
+
+void ext4_es_lru_del(struct inode *inode)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+       spin_lock(&sbi->s_es_lru_lock);
+       if (!list_empty(&ei->i_es_lru))
+               list_del_init(&ei->i_es_lru);
+       spin_unlock(&sbi->s_es_lru_lock);
+}
+
+static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
+                                      int nr_to_scan)
+{
+       struct inode *inode = &ei->vfs_inode;
+       struct ext4_es_tree *tree = &ei->i_es_tree;
+       struct rb_node *node;
+       struct extent_status *es;
+       int nr_shrunk = 0;
+
+       if (ei->i_es_lru_nr == 0)
+               return 0;
+
+       node = rb_first(&tree->root);
+       while (node != NULL) {
+               es = rb_entry(node, struct extent_status, rb_node);
+               node = rb_next(&es->rb_node);
+               /*
+                * We can't reclaim delayed extent from status tree because
+                * fiemap, bigallic, and seek_data/hole need to use it.
+                */
+               if (!ext4_es_is_delayed(es)) {
+                       rb_erase(&es->rb_node, &tree->root);
+                       ext4_es_free_extent(inode, es);
+                       nr_shrunk++;
+                       if (--nr_to_scan == 0)
+                               break;
+               }
+       }
+       tree->cache_es = NULL;
+       return nr_shrunk;
+}