]> git.karo-electronics.de Git - mv-sheeva.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 13 Mar 2011 23:00:49 +0000 (16:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 13 Mar 2011 23:00:49 +0000 (16:00 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: break out of shrink_delalloc earlier
  btrfs: fix not enough reserved space
  btrfs: fix dip leak
  Btrfs: make sure not to return overlapping extents to fiemap
  Btrfs: deal with short returns from copy_from_user
  Btrfs: fix regressions in copy_from_user handling

fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/inode.c

index 6f820fa23df4f362e5d553d06db021d7e20e6065..7f78cc78fdd0a3dcc77218be33be516b5f2ac038 100644 (file)
@@ -729,6 +729,15 @@ struct btrfs_space_info {
        u64 disk_total;         /* total bytes on disk, takes mirrors into
                                   account */
 
+       /*
+        * we bump reservation progress every time we decrement
+        * bytes_reserved.  This way people waiting for reservations
+        * know something good has happened and they can check
+        * for progress.  The number here isn't to be trusted, it
+        * just shows reclaim activity
+        */
+       unsigned long reservation_progress;
+
        int full;               /* indicates that we cannot allocate any more
                                   chunks for this space */
        int force_alloc;        /* set if we need to force a chunk alloc for
index 588ff9849873c5dc6690ae5baebd2a1111de00ac..7b3089b5c2df816522e2356d3064f0b2bdb40c4f 100644 (file)
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        u64 max_reclaim;
        u64 reclaimed = 0;
        long time_left;
-       int pause = 1;
        int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
        int loops = 0;
+       unsigned long progress;
 
        block_rsv = &root->fs_info->delalloc_block_rsv;
        space_info = block_rsv->space_info;
 
        smp_mb();
        reserved = space_info->bytes_reserved;
+       progress = space_info->reservation_progress;
 
        if (reserved == 0)
                return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
 
                spin_lock(&space_info->lock);
-               if (reserved > space_info->bytes_reserved) {
-                       loops = 0;
+               if (reserved > space_info->bytes_reserved)
                        reclaimed += reserved - space_info->bytes_reserved;
-               } else {
-                       loops++;
-               }
                reserved = space_info->bytes_reserved;
                spin_unlock(&space_info->lock);
 
+               loops++;
+
                if (reserved == 0 || reclaimed >= max_reclaim)
                        break;
 
                if (trans && trans->transaction->blocked)
                        return -EAGAIN;
 
-               __set_current_state(TASK_INTERRUPTIBLE);
-               time_left = schedule_timeout(pause);
+               time_left = schedule_timeout_interruptible(1);
 
                /* We were interrupted, exit */
                if (time_left)
                        break;
 
-               pause <<= 1;
-               if (pause > HZ / 10)
-                       pause = HZ / 10;
+               /* we've kicked the IO a few times, if anything has been freed,
+                * exit.  There is no sense in looping here for a long time
+                * when we really need to commit the transaction, or there are
+                * just too many writers without enough free space
+                */
+
+               if (loops > 3) {
+                       smp_mb();
+                       if (progress != space_info->reservation_progress)
+                               break;
+               }
 
        }
        return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
                if (num_bytes) {
                        spin_lock(&space_info->lock);
                        space_info->bytes_reserved -= num_bytes;
+                       space_info->reservation_progress++;
                        spin_unlock(&space_info->lock);
                }
        }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
        if (block_rsv->reserved >= block_rsv->size) {
                num_bytes = block_rsv->reserved - block_rsv->size;
                sinfo->bytes_reserved -= num_bytes;
+               sinfo->reservation_progress++;
                block_rsv->reserved = block_rsv->size;
                block_rsv->full = 1;
        }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
                to_reserve = 0;
        }
        spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
        if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                        btrfs_set_block_group_used(&cache->item, old_val);
                        cache->reserved -= num_bytes;
                        cache->space_info->bytes_reserved -= num_bytes;
+                       cache->space_info->reservation_progress++;
                        cache->space_info->bytes_used += num_bytes;
                        cache->space_info->disk_used += num_bytes * factor;
                        spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
        if (reserved) {
                cache->reserved -= num_bytes;
                cache->space_info->bytes_reserved -= num_bytes;
+               cache->space_info->reservation_progress++;
        }
        spin_unlock(&cache->lock);
        spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
                                space_info->bytes_readonly += num_bytes;
                        cache->reserved -= num_bytes;
                        space_info->bytes_reserved -= num_bytes;
+                       space_info->reservation_progress++;
                }
                spin_unlock(&cache->lock);
                spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                if (ret) {
                        spin_lock(&cache->space_info->lock);
                        cache->space_info->bytes_reserved -= buf->len;
+                       cache->space_info->reservation_progress++;
                        spin_unlock(&cache->space_info->lock);
                }
                goto out;
index fd3f172e94e69491d80c3b1be950b5f98a95c41b..714adc4ac4c24eaae26900bb9e862bc8b874432b 100644 (file)
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
 
        while (!end) {
-               off = extent_map_end(em);
-               if (off >= max)
-                       end = 1;
+               u64 offset_in_extent;
+
+               /* break if the extent we found is outside the range */
+               if (em->start >= max || extent_map_end(em) < off)
+                       break;
+
+               /*
+                * get_extent may return an extent that starts before our
+                * requested range.  We have to make sure the ranges
+                * we return to fiemap always move forward and don't
+                * overlap, so adjust the offsets here
+                */
+               em_start = max(em->start, off);
 
-               em_start = em->start;
-               em_len = em->len;
+               /*
+                * record the offset from the start of the extent
+                * for adjusting the disk offset below
+                */
+               offset_in_extent = em_start - em->start;
                em_end = extent_map_end(em);
+               em_len = em_end - em_start;
                emflags = em->flags;
                disko = 0;
                flags = 0;
 
+               /*
+                * bump off for our next call to get_extent
+                */
+               off = extent_map_end(em);
+               if (off >= max)
+                       end = 1;
+
                if (em->block_start == EXTENT_MAP_LAST_BYTE) {
                        end = 1;
                        flags |= FIEMAP_EXTENT_LAST;
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        flags |= (FIEMAP_EXTENT_DELALLOC |
                                  FIEMAP_EXTENT_UNKNOWN);
                } else {
-                       disko = em->block_start;
+                       disko = em->block_start + offset_in_extent;
                }
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
index 7084140d5940e68f653ad8919a0084549d6b8e1b..f447b783bb84ce25af44cb2276b3d49684948166 100644 (file)
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 
                /* Flush processor's dcache for this page */
                flush_dcache_page(page);
+
+               /*
+                * if we get a partial write, we can end up with
+                * partially up to date pages.  These add
+                * a lot of complexity, so make sure they don't
+                * happen by forcing this copy to be retried.
+                *
+                * The rest of the btrfs_file_write code will fall
+                * back to page at a time copies after we return 0.
+                */
+               if (!PageUptodate(page) && copied < count)
+                       copied = 0;
+
                iov_iter_advance(i, copied);
                write_bytes -= copied;
                total_copied += copied;
@@ -762,6 +775,27 @@ out:
        return 0;
 }
 
+/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+       int ret = 0;
+
+       if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+               ret = btrfs_readpage(NULL, page);
+               if (ret)
+                       return ret;
+               lock_page(page);
+               if (!PageUptodate(page)) {
+                       unlock_page(page);
+                       return -EIO;
+               }
+       }
+       return 0;
+}
+
 /*
  * this gets pages into the page cache and locks them down, it also properly
  * waits for data=ordered extents to finish before allowing the pages to be
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
        unsigned long index = pos >> PAGE_CACHE_SHIFT;
        struct inode *inode = fdentry(file)->d_inode;
        int err = 0;
+       int faili = 0;
        u64 start_pos;
        u64 last_pos;
 
@@ -794,15 +829,24 @@ again:
        for (i = 0; i < num_pages; i++) {
                pages[i] = grab_cache_page(inode->i_mapping, index + i);
                if (!pages[i]) {
-                       int c;
-                       for (c = i - 1; c >= 0; c--) {
-                               unlock_page(pages[c]);
-                               page_cache_release(pages[c]);
-                       }
-                       return -ENOMEM;
+                       faili = i - 1;
+                       err = -ENOMEM;
+                       goto fail;
+               }
+
+               if (i == 0)
+                       err = prepare_uptodate_page(pages[i], pos);
+               if (i == num_pages - 1)
+                       err = prepare_uptodate_page(pages[i],
+                                                   pos + write_bytes);
+               if (err) {
+                       page_cache_release(pages[i]);
+                       faili = i - 1;
+                       goto fail;
                }
                wait_on_page_writeback(pages[i]);
        }
+       err = 0;
        if (start_pos < inode->i_size) {
                struct btrfs_ordered_extent *ordered;
                lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
                WARN_ON(!PageLocked(pages[i]));
        }
        return 0;
+fail:
+       while (faili >= 0) {
+               unlock_page(pages[faili]);
+               page_cache_release(pages[faili]);
+               faili--;
+       }
+       return err;
+
 }
 
 static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
        struct inode *inode = fdentry(file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct page *pinned[2];
        struct page **pages = NULL;
        struct iov_iter i;
        loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
                      (file->f_flags & O_DIRECT));
 
-       pinned[0] = NULL;
-       pinned[1] = NULL;
-
        start_pos = pos;
 
        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        first_index = pos >> PAGE_CACHE_SHIFT;
        last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
 
-       /*
-        * there are lots of better ways to do this, but this code
-        * makes sure the first and last page in the file range are
-        * up to date and ready for cow
-        */
-       if ((pos & (PAGE_CACHE_SIZE - 1))) {
-               pinned[0] = grab_cache_page(inode->i_mapping, first_index);
-               if (!PageUptodate(pinned[0])) {
-                       ret = btrfs_readpage(NULL, pinned[0]);
-                       BUG_ON(ret);
-                       wait_on_page_locked(pinned[0]);
-               } else {
-                       unlock_page(pinned[0]);
-               }
-       }
-       if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
-               pinned[1] = grab_cache_page(inode->i_mapping, last_index);
-               if (!PageUptodate(pinned[1])) {
-                       ret = btrfs_readpage(NULL, pinned[1]);
-                       BUG_ON(ret);
-                       wait_on_page_locked(pinned[1]);
-               } else {
-                       unlock_page(pinned[1]);
-               }
-       }
-
        while (iov_iter_count(&i) > 0) {
                size_t offset = pos & (PAGE_CACHE_SIZE - 1);
                size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
                copied = btrfs_copy_from_user(pos, num_pages,
                                           write_bytes, pages, &i);
-               dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
-                               PAGE_CACHE_SHIFT;
+
+               /*
+                * if we have trouble faulting in the pages, fall
+                * back to one page at a time
+                */
+               if (copied < write_bytes)
+                       nrptrs = 1;
+
+               if (copied == 0)
+                       dirty_pages = 0;
+               else
+                       dirty_pages = (copied + offset +
+                                      PAGE_CACHE_SIZE - 1) >>
+                                      PAGE_CACHE_SHIFT;
 
                if (num_pages > dirty_pages) {
                        if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
                err = ret;
 
        kfree(pages);
-       if (pinned[0])
-               page_cache_release(pinned[0]);
-       if (pinned[1])
-               page_cache_release(pinned[1]);
        *ppos = pos;
 
        /*
index 0efdb65953c577cc27fb7d190dc4f66cd7a933a2..9007bbd01dbf69fd53dbd8c9269afe35ee3f79f6 100644 (file)
@@ -4821,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                goto fail;
 
        /*
-        * 1 item for inode ref
+        * 2 items for inode and inode ref
         * 2 items for dir items
+        * 1 item for parent inode
         */
-       trans = btrfs_start_transaction(root, 3);
+       trans = btrfs_start_transaction(root, 5);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto fail;
@@ -6056,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        if (!skip_sum) {
                dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
                if (!dip->csums) {
+                       kfree(dip);
                        ret = -ENOMEM;
                        goto free_ordered;
                }