]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-chris' of git://git.jan-o-sch.net/btrfs-unstable into for-linus
authorChris Mason <chris.mason@fusionio.com>
Fri, 15 Jun 2012 01:33:34 +0000 (21:33 -0400)
committerChris Mason <chris.mason@oracle.com>
Fri, 15 Jun 2012 15:33:16 +0000 (11:33 -0400)
13 files changed:
fs/btrfs/btrfs_inode.h
fs/btrfs/check-integrity.c
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/rcu-string.h [new file with mode: 0644]
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index e616f8872e69bb0cf3b9a3f369ba49eeca57f2de..12394a90d60fb7115d5b8feee0e7df701f9e743d 100644 (file)
@@ -37,6 +37,7 @@
 #define BTRFS_INODE_IN_DEFRAG                  3
 #define BTRFS_INODE_DELALLOC_META_RESERVED     4
 #define BTRFS_INODE_HAS_ORPHAN_ITEM            5
+#define BTRFS_INODE_HAS_ASYNC_EXTENT           6
 
 /* in memory btrfs inode */
 struct btrfs_inode {
index 9cebb1fd6a3cc59919c7c990d3016caee52b5849..da6e9364a5e3caa48b67c5e17f95a21ded5ee9ec 100644 (file)
@@ -93,6 +93,7 @@
 #include "print-tree.h"
 #include "locking.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 
 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(
                superblock_tmp->never_written = 0;
                superblock_tmp->mirror_num = 1 + superblock_mirror_num;
                if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
-                       printk(KERN_INFO "New initial S-block (bdev %p, %s)"
-                              " @%llu (%s/%llu/%d)\n",
-                              superblock_bdev, device->name,
-                              (unsigned long long)dev_bytenr,
-                              dev_state->name,
-                              (unsigned long long)dev_bytenr,
-                              superblock_mirror_num);
+                       printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
+                                    " @%llu (%s/%llu/%d)\n",
+                                    superblock_bdev,
+                                    rcu_str_deref(device->name),
+                                    (unsigned long long)dev_bytenr,
+                                    dev_state->name,
+                                    (unsigned long long)dev_bytenr,
+                                    superblock_mirror_num);
                list_add(&superblock_tmp->all_blocks_node,
                         &state->all_blocks_list);
                btrfsic_block_hashtable_add(superblock_tmp,
index b99d5127ba18997a6aab35f8946711ae0b3ff5b4..9a569aef72eafa6586d768c9969c46de1341c60f 100644 (file)
@@ -44,6 +44,7 @@
 #include "free-space-cache.h"
 #include "inode-map.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 
 static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
@@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,
 
        features = btrfs_super_incompat_flags(disk_super);
        features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
-       if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
+       if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
                features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
 
        /*
@@ -2575,8 +2576,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
                struct btrfs_device *device = (struct btrfs_device *)
                        bh->b_private;
 
-               printk_ratelimited(KERN_WARNING "lost page write due to "
-                                  "I/O error on %s\n", device->name);
+               printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
+                                         "I/O error on %s\n",
+                                         rcu_str_deref(device->name));
                /* note, we dont' set_buffer_write_io_error because we have
                 * our own ways of dealing with the IO errors
                 */
@@ -2749,8 +2751,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
                wait_for_completion(&device->flush_wait);
 
                if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-                       printk("btrfs: disabling barriers on dev %s\n",
-                              device->name);
+                       printk_in_rcu("btrfs: disabling barriers on dev %s\n",
+                                     rcu_str_deref(device->name));
                        device->nobarriers = 1;
                }
                if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3400,7 +3402,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
        delayed_refs = &trans->delayed_refs;
 
-again:
        spin_lock(&delayed_refs->lock);
        if (delayed_refs->num_entries == 0) {
                spin_unlock(&delayed_refs->lock);
@@ -3408,31 +3409,36 @@ again:
                return ret;
        }
 
-       node = rb_first(&delayed_refs->root);
-       while (node) {
+       while ((node = rb_first(&delayed_refs->root)) != NULL) {
                ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_next(node);
-
-               ref->in_tree = 0;
-               rb_erase(&ref->rb_node, &delayed_refs->root);
-               delayed_refs->num_entries--;
 
                atomic_set(&ref->refs, 1);
                if (btrfs_delayed_ref_is_head(ref)) {
                        struct btrfs_delayed_ref_head *head;
 
                        head = btrfs_delayed_node_to_head(ref);
-                       spin_unlock(&delayed_refs->lock);
-                       mutex_lock(&head->mutex);
+                       if (!mutex_trylock(&head->mutex)) {
+                               atomic_inc(&ref->refs);
+                               spin_unlock(&delayed_refs->lock);
+
+                               /* Need to wait for the delayed ref to run */
+                               mutex_lock(&head->mutex);
+                               mutex_unlock(&head->mutex);
+                               btrfs_put_delayed_ref(ref);
+
+                               continue;
+                       }
+
                        kfree(head->extent_op);
                        delayed_refs->num_heads--;
                        if (list_empty(&head->cluster))
                                delayed_refs->num_heads_ready--;
                        list_del_init(&head->cluster);
-                       mutex_unlock(&head->mutex);
-                       btrfs_put_delayed_ref(ref);
-                       goto again;
                }
+               ref->in_tree = 0;
+               rb_erase(&ref->rb_node, &delayed_refs->root);
+               delayed_refs->num_entries--;
+
                spin_unlock(&delayed_refs->lock);
                btrfs_put_delayed_ref(ref);
 
@@ -3520,11 +3526,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                             &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
                                               offset >> PAGE_CACHE_SHIFT);
                        spin_unlock(&dirty_pages->buffer_lock);
-                       if (eb) {
+                       if (eb)
                                ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
                                                         &eb->bflags);
-                               atomic_set(&eb->refs, 1);
-                       }
                        if (PageWriteback(page))
                                end_page_writeback(page);
 
@@ -3538,8 +3542,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                                spin_unlock_irq(&page->mapping->tree_lock);
                        }
 
-                       page->mapping->a_ops->invalidatepage(page, 0);
                        unlock_page(page);
+                       page_cache_release(page);
                }
        }
 
@@ -3585,16 +3589,13 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        /* FIXME: cleanup wait for commit */
        cur_trans->in_commit = 1;
        cur_trans->blocked = 1;
-       if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
-               wake_up(&root->fs_info->transaction_blocked_wait);
+       wake_up(&root->fs_info->transaction_blocked_wait);
 
        cur_trans->blocked = 0;
-       if (waitqueue_active(&root->fs_info->transaction_wait))
-               wake_up(&root->fs_info->transaction_wait);
+       wake_up(&root->fs_info->transaction_wait);
 
        cur_trans->commit_done = 1;
-       if (waitqueue_active(&cur_trans->commit_wait))
-               wake_up(&cur_trans->commit_wait);
+       wake_up(&cur_trans->commit_wait);
 
        btrfs_destroy_pending_snapshots(cur_trans);
 
index 2c8f7b2046173954f720125a6e53e96de3c7727e..aaa12c1eb3483a8371df48e5765b986c436451c4 100644 (file)
@@ -20,6 +20,7 @@
 #include "volumes.h"
 #include "check-integrity.h"
 #include "locking.h"
+#include "rcu-string.h"
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
                return -EIO;
        }
 
-       printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s "
-                       "sector %llu)\n", page->mapping->host->i_ino, start,
-                       dev->name, sector);
+       printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
+                     "(dev %s sector %llu)\n", page->mapping->host->i_ino,
+                     start, rcu_str_deref(dev->name), sector);
 
        bio_put(bio);
        return 0;
index 92df0a5d1d942c56d48d754ec4b4f901ce984c29..7a090fb4eb988a03d6c5b827ecffffca2a64ac50 100644 (file)
@@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,
        if (IS_ERR(trans)) {
                extent_clear_unlock_delalloc(inode,
                             &BTRFS_I(inode)->io_tree,
-                            start, end, NULL,
+                            start, end, locked_page,
                             EXTENT_CLEAR_UNLOCK_PAGE |
                             EXTENT_CLEAR_UNLOCK |
                             EXTENT_CLEAR_DELALLOC |
@@ -963,7 +963,7 @@ out:
 out_unlock:
        extent_clear_unlock_delalloc(inode,
                     &BTRFS_I(inode)->io_tree,
-                    start, end, NULL,
+                    start, end, locked_page,
                     EXTENT_CLEAR_UNLOCK_PAGE |
                     EXTENT_CLEAR_UNLOCK |
                     EXTENT_CLEAR_DELALLOC |
@@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
        compress_file_range(async_cow->inode, async_cow->locked_page,
                            async_cow->start, async_cow->end, async_cow,
                            &num_added);
-       if (num_added == 0)
+       if (num_added == 0) {
+               iput(async_cow->inode);
                async_cow->inode = NULL;
+       }
 }
 
 /*
@@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
 {
        struct async_cow *async_cow;
        async_cow = container_of(work, struct async_cow, work);
+       if (async_cow->inode)
+               iput(async_cow->inode);
        kfree(async_cow);
 }
 
@@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
        while (start < end) {
                async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
                BUG_ON(!async_cow); /* -ENOMEM */
-               async_cow->inode = inode;
+               async_cow->inode = igrab(inode);
                async_cow->root = root;
                async_cow->locked_page = locked_page;
                async_cow->start = start;
@@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        u64 ino = btrfs_ino(inode);
 
        path = btrfs_alloc_path();
-       if (!path)
+       if (!path) {
+               extent_clear_unlock_delalloc(inode,
+                            &BTRFS_I(inode)->io_tree,
+                            start, end, locked_page,
+                            EXTENT_CLEAR_UNLOCK_PAGE |
+                            EXTENT_CLEAR_UNLOCK |
+                            EXTENT_CLEAR_DELALLOC |
+                            EXTENT_CLEAR_DIRTY |
+                            EXTENT_SET_WRITEBACK |
+                            EXTENT_END_WRITEBACK);
                return -ENOMEM;
+       }
 
        nolock = btrfs_is_free_space_inode(root, inode);
 
@@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,
                trans = btrfs_join_transaction(root);
 
        if (IS_ERR(trans)) {
+               extent_clear_unlock_delalloc(inode,
+                            &BTRFS_I(inode)->io_tree,
+                            start, end, locked_page,
+                            EXTENT_CLEAR_UNLOCK_PAGE |
+                            EXTENT_CLEAR_UNLOCK |
+                            EXTENT_CLEAR_DELALLOC |
+                            EXTENT_CLEAR_DIRTY |
+                            EXTENT_SET_WRITEBACK |
+                            EXTENT_END_WRITEBACK);
                btrfs_free_path(path);
                return PTR_ERR(trans);
        }
@@ -1327,8 +1350,11 @@ out_check:
        }
        btrfs_release_path(path);
 
-       if (cur_offset <= end && cow_start == (u64)-1)
+       if (cur_offset <= end && cow_start == (u64)-1) {
                cow_start = cur_offset;
+               cur_offset = end;
+       }
+
        if (cow_start != (u64)-1) {
                ret = cow_file_range(inode, locked_page, cow_start, end,
                                     page_started, nr_written, 1);
@@ -1347,6 +1373,17 @@ error:
        if (!ret)
                ret = err;
 
+       if (ret && cur_offset < end)
+               extent_clear_unlock_delalloc(inode,
+                            &BTRFS_I(inode)->io_tree,
+                            cur_offset, end, locked_page,
+                            EXTENT_CLEAR_UNLOCK_PAGE |
+                            EXTENT_CLEAR_UNLOCK |
+                            EXTENT_CLEAR_DELALLOC |
+                            EXTENT_CLEAR_DIRTY |
+                            EXTENT_SET_WRITEBACK |
+                            EXTENT_END_WRITEBACK);
+
        btrfs_free_path(path);
        return ret;
 }
@@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
        int ret;
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
-       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)
+       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 1, nr_written);
-       else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
+       } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
-       else if (!btrfs_test_opt(root, COMPRESS) &&
-                !(BTRFS_I(inode)->force_compress) &&
-                !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
+       else if (!btrfs_test_opt(root, COMPRESS) &&
+                  !(BTRFS_I(inode)->force_compress) &&
+                  !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
                ret = cow_file_range(inode, locked_page, start, end,
                                      page_started, nr_written, 1);
-       else
+       } else {
+               set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                       &BTRFS_I(inode)->runtime_flags);
                ret = cow_file_range_async(inode, locked_page, start, end,
                                           page_started, nr_written);
+       }
        return ret;
 }
 
index 24b776c08d99f7bbb621076f68500464b6829435..a98f7d252829aa14ef6c530885d7dadc98f5bb12 100644 (file)
@@ -52,6 +52,7 @@
 #include "locking.h"
 #include "inode-map.h"
 #include "backref.h"
+#include "rcu-string.h"
 
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -785,39 +786,57 @@ none:
        return -ENOENT;
 }
 
-/*
- * Validaty check of prev em and next em:
- * 1) no prev/next em
- * 2) prev/next em is an hole/inline extent
- */
-static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
+static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
 {
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-       struct extent_map *prev = NULL, *next = NULL;
-       int ret = 0;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_map *em;
+       u64 len = PAGE_CACHE_SIZE;
 
+       /*
+        * hopefully we have this extent in the tree already, try without
+        * the full extent lock
+        */
        read_lock(&em_tree->lock);
-       prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1);
-       next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
+       em = lookup_extent_mapping(em_tree, start, len);
        read_unlock(&em_tree->lock);
 
-       if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) &&
-           (!next || next->block_start >= EXTENT_MAP_LAST_BYTE))
-               ret = 1;
-       free_extent_map(prev);
-       free_extent_map(next);
+       if (!em) {
+               /* get the big lock and read metadata off disk */
+               lock_extent(io_tree, start, start + len - 1);
+               em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+               unlock_extent(io_tree, start, start + len - 1);
+
+               if (IS_ERR(em))
+                       return NULL;
+       }
+
+       return em;
+}
 
+static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
+{
+       struct extent_map *next;
+       bool ret = true;
+
+       /* this is the last extent */
+       if (em->start + em->len >= i_size_read(inode))
+               return false;
+
+       next = defrag_lookup_extent(inode, em->start + em->len);
+       if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+               ret = false;
+
+       free_extent_map(next);
        return ret;
 }
 
-static int should_defrag_range(struct inode *inode, u64 start, u64 len,
-                              int thresh, u64 *last_len, u64 *skip,
-                              u64 *defrag_end)
+static int should_defrag_range(struct inode *inode, u64 start, int thresh,
+                              u64 *last_len, u64 *skip, u64 *defrag_end)
 {
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-       struct extent_map *em = NULL;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
        int ret = 1;
+       bool next_mergeable = true;
 
        /*
         * make sure that once we start defragging an extent, we keep on
@@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
 
        *skip = 0;
 
-       /*
-        * hopefully we have this extent in the tree already, try without
-        * the full extent lock
-        */
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, start, len);
-       read_unlock(&em_tree->lock);
-
-       if (!em) {
-               /* get the big lock and read metadata off disk */
-               lock_extent(io_tree, start, start + len - 1);
-               em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-               unlock_extent(io_tree, start, start + len - 1);
-
-               if (IS_ERR(em))
-                       return 0;
-       }
+       em = defrag_lookup_extent(inode, start);
+       if (!em)
+               return 0;
 
        /* this will cover holes, and inline extents */
        if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
@@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
                goto out;
        }
 
-       /* If we have nothing to merge with us, just skip. */
-       if (check_adjacent_extents(inode, em)) {
-               ret = 0;
-               goto out;
-       }
+       next_mergeable = defrag_check_next_extent(inode, em);
 
        /*
-        * we hit a real extent, if it is big don't bother defragging it again
+        * we hit a real extent, if it is big or the next extent is not a
+        * real extent, don't bother defragging it
         */
-       if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
+       if ((*last_len == 0 || *last_len >= thresh) &&
+           (em->len >= thresh || !next_mergeable))
                ret = 0;
-
 out:
        /*
         * last_len ends up being a counter of how many bytes we've defragged.
@@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        break;
 
                if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
-                                        PAGE_CACHE_SIZE, extent_thresh,
-                                        &last_len, &skip, &defrag_end)) {
+                                        extent_thresh, &last_len, &skip,
+                                        &defrag_end)) {
                        unsigned long next;
                        /*
                         * the should_defrag function tells us how much to skip
@@ -1345,8 +1347,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
        do_div(new_size, root->sectorsize);
        new_size *= root->sectorsize;
 
-       printk(KERN_INFO "btrfs: new size for %s is %llu\n",
-               device->name, (unsigned long long)new_size);
+       printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
+                     rcu_str_deref(device->name),
+                     (unsigned long long)new_size);
 
        if (new_size > old_size) {
                trans = btrfs_start_transaction(root, 0);
@@ -2264,7 +2267,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
        di_args->total_bytes = dev->total_bytes;
        memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
        if (dev->name) {
-               strncpy(di_args->path, dev->name, sizeof(di_args->path));
+               struct rcu_string *name;
+
+               rcu_read_lock();
+               name = rcu_dereference(dev->name);
+               strncpy(di_args->path, name->str, sizeof(di_args->path));
+               rcu_read_unlock();
                di_args->path[sizeof(di_args->path) - 1] = 0;
        } else {
                di_args->path[0] = '\0';
index 9e138cdc36c5eb7d66bf80dfc37829878eeaa6e2..643335a4fe3c6699a894c19d01e79bed9ef631c7 100644 (file)
@@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
        /* start IO across the range first to instantiate any delalloc
         * extents
         */
-       filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
+       filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+
+       /*
+        * So with compression we will find and lock a dirty page and clear the
+        * first one as dirty, setup an async extent, and immediately return
+        * with the entire range locked but with nobody actually marked with
+        * writeback.  So we can't just filemap_write_and_wait_range() and
+        * expect it to work since it will just kick off a thread to do the
+        * actual work.  So we need to call filemap_fdatawrite_range _again_
+        * since it will wait on the page lock, which won't be unlocked until
+        * after the pages have been marked as writeback and so we're good to go
+        * from there.  We have to do this otherwise we'll miss the ordered
+        * extents and that results in badness.  Please Josef, do not think you
+        * know better and pull this out at some point in the future, it is
+        * right and you are wrong.
+        */
+       if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                    &BTRFS_I(inode)->runtime_flags))
+               filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+
+       filemap_fdatawait_range(inode->i_mapping, start, orig_end);
 
        end = orig_end;
        found = 0;
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
new file mode 100644 (file)
index 0000000..9e111e4
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Red Hat.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+struct rcu_string {
+       struct rcu_head rcu;
+       char str[0];
+};
+
+static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
+{
+       size_t len = strlen(src) + 1;
+       struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
+                                        (len * sizeof(char)), mask);
+       if (!ret)
+               return ret;
+       strncpy(ret->str, src, len);
+       return ret;
+}
+
+static inline void rcu_string_free(struct rcu_string *str)
+{
+       if (str)
+               kfree_rcu(str, rcu);
+}
+
+#define printk_in_rcu(fmt, ...) do {   \
+       rcu_read_lock();                \
+       printk(fmt, __VA_ARGS__);       \
+       rcu_read_unlock();              \
+} while (0)
+
+#define printk_ratelimited_in_rcu(fmt, ...) do {       \
+       rcu_read_lock();                                \
+       printk_ratelimited(fmt, __VA_ARGS__);           \
+       rcu_read_unlock();                              \
+} while (0)
+
+#define rcu_str_deref(rcu_str) ({                              \
+       struct rcu_string *__str = rcu_dereference(rcu_str);    \
+       __str->str;                                             \
+})
index a38cfa4f251ec1065410f561188c4adf5868cea3..b223620cd5a6d59aa4b707e2539e59a8244d2dd1 100644 (file)
@@ -26,6 +26,7 @@
 #include "backref.h"
 #include "extent_io.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 
 /*
  * This is only the first step towards a full-features scrub. It reads all
@@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
         * hold all of the paths here
         */
        for (i = 0; i < ipath->fspath->elem_cnt; ++i)
-               printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
+               printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
                        "%s, sector %llu, root %llu, inode %llu, offset %llu, "
                        "length %llu, links %u (path: %s)\n", swarn->errstr,
-                       swarn->logical, swarn->dev->name,
+                       swarn->logical, rcu_str_deref(swarn->dev->name),
                        (unsigned long long)swarn->sector, root, inum, offset,
                        min(isize - offset, (u64)PAGE_SIZE), nlink,
                        (char *)(unsigned long)ipath->fspath->val[i]);
@@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
        return 0;
 
 err:
-       printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
+       printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
                "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
                "resolving failed with ret=%d\n", swarn->errstr,
-               swarn->logical, swarn->dev->name,
+               swarn->logical, rcu_str_deref(swarn->dev->name),
                (unsigned long long)swarn->sector, root, inum, offset, ret);
 
        free_ipath(ipath);
@@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
                do {
                        ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
                                                        &ref_root, &ref_level);
-                       printk(KERN_WARNING
+                       printk_in_rcu(KERN_WARNING
                                "btrfs: %s at logical %llu on dev %s, "
                                "sector %llu: metadata %s (level %d) in tree "
-                               "%llu\n", errstr, swarn.logical, dev->name,
+                               "%llu\n", errstr, swarn.logical,
+                               rcu_str_deref(dev->name),
                                (unsigned long long)swarn.sector,
                                ref_level ? "node" : "leaf",
                                ret < 0 ? -1 : ref_level,
@@ -580,9 +582,11 @@ out:
                spin_lock(&sdev->stat_lock);
                ++sdev->stat.uncorrectable_errors;
                spin_unlock(&sdev->stat_lock);
-               printk_ratelimited(KERN_ERR
+
+               printk_ratelimited_in_rcu(KERN_ERR
                        "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
-                       (unsigned long long)fixup->logical, sdev->dev->name);
+                       (unsigned long long)fixup->logical,
+                       rcu_str_deref(sdev->dev->name));
        }
 
        btrfs_free_path(path);
@@ -936,18 +940,20 @@ corrected_error:
                        spin_lock(&sdev->stat_lock);
                        sdev->stat.corrected_errors++;
                        spin_unlock(&sdev->stat_lock);
-                       printk_ratelimited(KERN_ERR
+                       printk_ratelimited_in_rcu(KERN_ERR
                                "btrfs: fixed up error at logical %llu on dev %s\n",
-                               (unsigned long long)logical, sdev->dev->name);
+                               (unsigned long long)logical,
+                               rcu_str_deref(sdev->dev->name));
                }
        } else {
 did_not_correct_error:
                spin_lock(&sdev->stat_lock);
                sdev->stat.uncorrectable_errors++;
                spin_unlock(&sdev->stat_lock);
-               printk_ratelimited(KERN_ERR
+               printk_ratelimited_in_rcu(KERN_ERR
                        "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
-                       (unsigned long long)logical, sdev->dev->name);
+                       (unsigned long long)logical,
+                       rcu_str_deref(sdev->dev->name));
        }
 
 out:
index 96eb9fef7bd279584cf4dd8b6ed42cc09e425c1d..0eb9a4da069e6cb1e3c4294d4ad02e2ab92da666 100644 (file)
@@ -54,6 +54,7 @@
 #include "version.h"
 #include "export.h"
 #include "compression.h"
+#include "rcu-string.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/btrfs.h>
@@ -1482,12 +1483,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
                                   "error %d\n", btrfs_ino(inode), ret);
 }
 
+static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
+       struct btrfs_fs_devices *cur_devices;
+       struct btrfs_device *dev, *first_dev = NULL;
+       struct list_head *head;
+       struct rcu_string *name;
+
+       mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       cur_devices = fs_info->fs_devices;
+       while (cur_devices) {
+               head = &cur_devices->devices;
+               list_for_each_entry(dev, head, dev_list) {
+                       if (!first_dev || dev->devid < first_dev->devid)
+                               first_dev = dev;
+               }
+               cur_devices = cur_devices->seed;
+       }
+
+       if (first_dev) {
+               rcu_read_lock();
+               name = rcu_dereference(first_dev->name);
+               seq_escape(m, name->str, " \t\n\\");
+               rcu_read_unlock();
+       } else {
+               WARN_ON(1);
+       }
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+       return 0;
+}
+
 static const struct super_operations btrfs_super_ops = {
        .drop_inode     = btrfs_drop_inode,
        .evict_inode    = btrfs_evict_inode,
        .put_super      = btrfs_put_super,
        .sync_fs        = btrfs_sync_fs,
        .show_options   = btrfs_show_options,
+       .show_devname   = btrfs_show_devname,
        .write_inode    = btrfs_write_inode,
        .dirty_inode    = btrfs_fs_dirty_inode,
        .alloc_inode    = btrfs_alloc_inode,
index 1791c6e3d83487d82c9ffe80ab0239976cfd1c96..b72b068183ec6bb334a1cb9088ffa02d7eb13029 100644 (file)
@@ -100,6 +100,10 @@ loop:
                kmem_cache_free(btrfs_transaction_cachep, cur_trans);
                cur_trans = fs_info->running_transaction;
                goto loop;
+       } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+               spin_unlock(&root->fs_info->trans_lock);
+               kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+               return -EROFS;
        }
 
        atomic_set(&cur_trans->num_writers, 1);
@@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 
 
 static void cleanup_transaction(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root)
+                               struct btrfs_root *root, int err)
 {
        struct btrfs_transaction *cur_trans = trans->transaction;
 
        WARN_ON(trans->use_count > 1);
 
+       btrfs_abort_transaction(trans, root, err);
+
        spin_lock(&root->fs_info->trans_lock);
        list_del_init(&cur_trans->list);
+       if (cur_trans == root->fs_info->running_transaction) {
+               root->fs_info->running_transaction = NULL;
+               root->fs_info->trans_no_join = 0;
+       }
        spin_unlock(&root->fs_info->trans_lock);
 
        btrfs_cleanup_one_transaction(trans->transaction, root);
@@ -1526,7 +1536,7 @@ cleanup_transaction:
 //     WARN_ON(1);
        if (current->journal_info == trans)
                current->journal_info = NULL;
-       cleanup_transaction(trans, root);
+       cleanup_transaction(trans, root, ret);
 
        return ret;
 }
index 7782020996feccd4b7103528a4c2989230f79b71..8a3d2594b80726b3a4da08c5924cb51b8e28a0ba 100644 (file)
@@ -35,6 +35,7 @@
 #include "volumes.h"
 #include "async-thread.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
@@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
                device = list_entry(fs_devices->devices.next,
                                    struct btrfs_device, dev_list);
                list_del(&device->dev_list);
-               kfree(device->name);
+               rcu_string_free(device->name);
                kfree(device);
        }
        kfree(fs_devices);
@@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,
 {
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices;
+       struct rcu_string *name;
        u64 found_transid = btrfs_super_generation(disk_super);
-       char *name;
 
        fs_devices = find_fsid(disk_super->fsid);
        if (!fs_devices) {
@@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,
                memcpy(device->uuid, disk_super->dev_item.uuid,
                       BTRFS_UUID_SIZE);
                spin_lock_init(&device->io_lock);
-               device->name = kstrdup(path, GFP_NOFS);
-               if (!device->name) {
+
+               name = rcu_string_strdup(path, GFP_NOFS);
+               if (!name) {
                        kfree(device);
                        return -ENOMEM;
                }
+               rcu_assign_pointer(device->name, name);
                INIT_LIST_HEAD(&device->dev_alloc_list);
 
                /* init readahead state */
@@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,
 
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
-       } else if (!device->name || strcmp(device->name, path)) {
-               name = kstrdup(path, GFP_NOFS);
+       } else if (!device->name || strcmp(device->name->str, path)) {
+               name = rcu_string_strdup(path, GFP_NOFS);
                if (!name)
                        return -ENOMEM;
-               kfree(device->name);
-               device->name = name;
+               rcu_string_free(device->name);
+               rcu_assign_pointer(device->name, name);
                if (device->missing) {
                        fs_devices->missing_devices--;
                        device->missing = 0;
@@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 
        /* We have held the volume lock, it is safe to get the devices. */
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
+               struct rcu_string *name;
+
                device = kzalloc(sizeof(*device), GFP_NOFS);
                if (!device)
                        goto error;
 
-               device->name = kstrdup(orig_dev->name, GFP_NOFS);
-               if (!device->name) {
+               /*
+                * This is ok to do without rcu read locked because we hold the
+                * uuid mutex so nothing we touch in here is going to disappear.
+                */
+               name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+               if (!name) {
                        kfree(device);
                        goto error;
                }
+               rcu_assign_pointer(device->name, name);
 
                device->devid = orig_dev->devid;
                device->work.func = pending_bios_fn;
@@ -491,7 +501,7 @@ again:
                }
                list_del_init(&device->dev_list);
                fs_devices->num_devices--;
-               kfree(device->name);
+               rcu_string_free(device->name);
                kfree(device);
        }
 
@@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)
        if (device->bdev)
                blkdev_put(device->bdev, device->mode);
 
-       kfree(device->name);
+       rcu_string_free(device->name);
        kfree(device);
 }
 
@@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
        mutex_lock(&fs_devices->device_list_mutex);
        list_for_each_entry(device, &fs_devices->devices, dev_list) {
                struct btrfs_device *new_device;
+               struct rcu_string *name;
 
                if (device->bdev)
                        fs_devices->open_devices--;
@@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
                BUG_ON(!new_device); /* -ENOMEM */
                memcpy(new_device, device, sizeof(*new_device));
-               new_device->name = kstrdup(device->name, GFP_NOFS);
-               BUG_ON(device->name && !new_device->name); /* -ENOMEM */
+
+               /* Safe because we are under uuid_mutex */
+               name = rcu_string_strdup(device->name->str, GFP_NOFS);
+               BUG_ON(device->name && !name); /* -ENOMEM */
+               rcu_assign_pointer(new_device->name, name);
                new_device->bdev = NULL;
                new_device->writeable = 0;
                new_device->in_fs_metadata = 0;
@@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                if (!device->name)
                        continue;
 
-               bdev = blkdev_get_by_path(device->name, flags, holder);
+               bdev = blkdev_get_by_path(device->name->str, flags, holder);
                if (IS_ERR(bdev)) {
-                       printk(KERN_INFO "open %s failed\n", device->name);
+                       printk(KERN_INFO "open %s failed\n", device->name->str);
                        goto error;
                }
                filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        struct block_device *bdev;
        struct list_head *devices;
        struct super_block *sb = root->fs_info->sb;
+       struct rcu_string *name;
        u64 total_bytes;
        int seeding_dev = 0;
        int ret = 0;
@@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                goto error;
        }
 
-       device->name = kstrdup(device_path, GFP_NOFS);
-       if (!device->name) {
+       name = rcu_string_strdup(device_path, GFP_NOFS);
+       if (!name) {
                kfree(device);
                ret = -ENOMEM;
                goto error;
        }
+       rcu_assign_pointer(device->name, name);
 
        ret = find_next_devid(root, &device->devid);
        if (ret) {
-               kfree(device->name);
+               rcu_string_free(device->name);
                kfree(device);
                goto error;
        }
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
-               kfree(device->name);
+               rcu_string_free(device->name);
                kfree(device);
                ret = PTR_ERR(trans);
                goto error;
@@ -1796,7 +1812,7 @@ error_trans:
        unlock_chunks(root);
        btrfs_abort_transaction(trans, root, ret);
        btrfs_end_transaction(trans, root);
-       kfree(device->name);
+       rcu_string_free(device->name);
        kfree(device);
 error:
        blkdev_put(bdev, FMODE_EXCL);
@@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
                dev = bbio->stripes[dev_nr].dev;
                if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
+#ifdef DEBUG
+                       struct rcu_string *name;
+
+                       rcu_read_lock();
+                       name = rcu_dereference(dev->name);
                        pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
                                 "(%s id %llu), size=%u\n", rw,
                                 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
-                                dev->name, dev->devid, bio->bi_size);
+                                name->str, dev->devid, bio->bi_size);
+                       rcu_read_unlock();
+#endif
                        bio->bi_bdev = dev->bdev;
                        if (async_submit)
                                schedule_bio(root, dev, rw, bio);
@@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
                key.offset = device->devid;
                ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
                if (ret) {
-                       printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
-                              device->name, (unsigned long long)device->devid);
+                       printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
+                                     rcu_str_deref(device->name),
+                                     (unsigned long long)device->devid);
                        __btrfs_reset_dev_stats(device);
                        device->dev_stats_valid = 1;
                        btrfs_release_path(path);
@@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
        BUG_ON(!path);
        ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
        if (ret < 0) {
-               printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
-                      ret, device->name);
+               printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
+                             ret, rcu_str_deref(device->name));
                goto out;
        }
 
@@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                /* need to delete old one and insert a new one */
                ret = btrfs_del_item(trans, dev_root, path);
                if (ret != 0) {
-                       printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
-                              device->name, ret);
+                       printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
+                                     rcu_str_deref(device->name), ret);
                        goto out;
                }
                ret = 1;
@@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                ret = btrfs_insert_empty_item(trans, dev_root, path,
                                              &key, sizeof(*ptr));
                if (ret < 0) {
-                       printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
-                              device->name, ret);
+                       printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
+                                     rcu_str_deref(device->name), ret);
                        goto out;
                }
        }
@@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
 {
        if (!dev->dev_stats_valid)
                return;
-       printk_ratelimited(KERN_ERR
+       printk_ratelimited_in_rcu(KERN_ERR
                           "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
-                          dev->name,
+                          rcu_str_deref(dev->name),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
@@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
 
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
 {
-       printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
-              dev->name,
+       printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+              rcu_str_deref(dev->name),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
index 3406a88ca83e023429b8af19f2d6aa64d4cac6f8..74366f27a76bbc7272e4b41f2f92bf9520bca813 100644 (file)
@@ -58,7 +58,7 @@ struct btrfs_device {
        /* the mode sent to blkdev_get */
        fmode_t mode;
 
-       char *name;
+       struct rcu_string *name;
 
        /* the internal btrfs device id */
        u64 devid;