]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - fs/ext4/extents.c
Merge tag 'v2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[mv-sheeva.git] / fs / ext4 / extents.c
index 0554c48cb1fddbc97bc81c3443218b6a9e97fca8..ccce8a7e94edc9ca2d23b76e94bd45339869970d 100644 (file)
@@ -117,11 +117,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
                struct ext4_extent *ex;
                depth = path->p_depth;
 
-               /* try to predict block placement */
+               /*
+                * Try to predict block placement assuming that we are
+                * filling in a file which will eventually be
+                * non-sparse --- i.e., in the case of libbfd writing
+                * an ELF object sections out-of-order but in a way
+                * the eventually results in a contiguous object or
+                * executable file, or some database extending a table
+                * space file.  However, this is actually somewhat
+                * non-ideal if we are writing a sparse file such as
+                * qemu or KVM writing a raw image file that is going
+                * to stay fairly sparse, since it will end up
+                * fragmenting the file system's free space.  Maybe we
+                * should have some hueristics or some way to allow
+                * userspace to pass a hint to file system,
+                * especiially if the latter case turns out to be
+                * common.
+                */
                ex = path[depth].p_ext;
-               if (ex)
-                       return (ext4_ext_pblock(ex) +
-                               (block - le32_to_cpu(ex->ee_block)));
+               if (ex) {
+                       ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
+                       ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
+
+                       if (block > ext_block)
+                               return ext_pblk + (block - ext_block);
+                       else
+                               return ext_pblk - (ext_block - block);
+               }
 
                /* it looks like index is empty;
                 * try to find starting block from index itself */
@@ -244,7 +266,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
  * to allocate @blocks
  * Worse case is one block per extent
  */
-int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
+int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
 {
        struct ext4_inode_info *ei = EXT4_I(inode);
        int idxs, num = 0;
@@ -1872,12 +1894,10 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
                        cbex.ec_block = start;
                        cbex.ec_len = end - start;
                        cbex.ec_start = 0;
-                       cbex.ec_type = EXT4_EXT_CACHE_GAP;
                } else {
                        cbex.ec_block = le32_to_cpu(ex->ee_block);
                        cbex.ec_len = ext4_ext_get_actual_len(ex);
                        cbex.ec_start = ext4_ext_pblock(ex);
-                       cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
                }
 
                if (unlikely(cbex.ec_len == 0)) {
@@ -1917,13 +1937,12 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 
 static void
 ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
-                       __u32 len, ext4_fsblk_t start, int type)
+                       __u32 len, ext4_fsblk_t start)
 {
        struct ext4_ext_cache *cex;
        BUG_ON(len == 0);
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        cex = &EXT4_I(inode)->i_cached_extent;
-       cex->ec_type = type;
        cex->ec_block = block;
        cex->ec_len = len;
        cex->ec_start = start;
@@ -1976,15 +1995,18 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
        }
 
        ext_debug(" -> %u:%lu\n", lblock, len);
-       ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
+       ext4_ext_put_in_cache(inode, lblock, len, 0);
 }
 
+/*
+ * Return 0 if cache is invalid; 1 if the cache is valid
+ */
 static int
 ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
                        struct ext4_extent *ex)
 {
        struct ext4_ext_cache *cex;
-       int ret = EXT4_EXT_CACHE_NO;
+       int ret = 0;
 
        /*
         * We borrow i_block_reservation_lock to protect i_cached_extent
@@ -1993,11 +2015,9 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
        cex = &EXT4_I(inode)->i_cached_extent;
 
        /* has cache valid data? */
-       if (cex->ec_type == EXT4_EXT_CACHE_NO)
+       if (cex->ec_len == 0)
                goto errout;
 
-       BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
-                       cex->ec_type != EXT4_EXT_CACHE_EXTENT);
        if (in_range(block, cex->ec_block, cex->ec_len)) {
                ex->ee_block = cpu_to_le32(cex->ec_block);
                ext4_ext_store_pblock(ex, cex->ec_start);
@@ -2005,7 +2025,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
                ext_debug("%u cached by %u:%u:%llu\n",
                                block,
                                cex->ec_block, cex->ec_len, cex->ec_start);
-               ret = cex->ec_type;
+               ret = 1;
        }
 errout:
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2825,14 +2845,14 @@ fix_extent_len:
  * to an uninitialized extent.
  *
  * Writing to an uninitized extent may result in splitting the uninitialized
- * extent into multiple /intialized unintialized extents (up to three)
+ * extent into multiple /initialized uninitialized extents (up to three)
  * There are three possibilities:
  *   a> There is no split required: Entire extent should be uninitialized
  *   b> Splits in two extents: Write is happening at either end of the extent
  *   c> Splits in three extents: Somone is writing in middle of the extent
  *
  * One of more index blocks maybe needed if the extent tree grow after
- * the unintialized extent split. To prevent ENOSPC occur at the IO
+ * the uninitialized extent split. To prevent ENOSPC occur at the IO
  * complete, we need to split the uninitialized extent before DIO submit
  * the IO. The uninitialized extent called at this time will be split
  * into three uninitialized extent(at most). After IO complete, the part
@@ -3082,7 +3102,7 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
  * Handle EOFBLOCKS_FL flag, clearing it if necessary
  */
 static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
-                             struct ext4_map_blocks *map,
+                             ext4_lblk_t lblk,
                              struct ext4_ext_path *path,
                              unsigned int len)
 {
@@ -3112,7 +3132,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
         * this turns out to be false, we can bail out from this
         * function immediately.
         */
-       if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
+       if (lblk + len < le32_to_cpu(last_ex->ee_block) +
            ext4_ext_get_actual_len(last_ex))
                return 0;
        /*
@@ -3154,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                 * that this IO needs to convertion to written when IO is
                 * completed
                 */
-               if (io)
+               if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
                        io->flag = EXT4_IO_END_UNWRITTEN;
-               else
+                       atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+               } else
                        ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
                if (ext4_should_dioread_nolock(inode))
                        map->m_flags |= EXT4_MAP_UNINIT;
@@ -3168,8 +3189,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                                                        path);
                if (ret >= 0) {
                        ext4_update_inode_fsync_trans(handle, inode, 1);
-                       err = check_eofblocks_fl(handle, inode, map, path,
-                                                map->m_len);
+                       err = check_eofblocks_fl(handle, inode, map->m_lblk,
+                                                path, map->m_len);
                } else
                        err = ret;
                goto out2;
@@ -3199,7 +3220,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
        if (ret >= 0) {
                ext4_update_inode_fsync_trans(handle, inode, 1);
-               err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
+               err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+                                        map->m_len);
                if (err < 0)
                        goto out2;
        }
@@ -3276,7 +3298,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        struct ext4_extent_header *eh;
        struct ext4_extent newex, *ex;
        ext4_fsblk_t newblock;
-       int err = 0, depth, ret, cache_type;
+       int err = 0, depth, ret;
        unsigned int allocated = 0;
        struct ext4_allocation_request ar;
        ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3285,9 +3307,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                  map->m_lblk, map->m_len, inode->i_ino);
 
        /* check in cache */
-       cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex);
-       if (cache_type) {
-               if (cache_type == EXT4_EXT_CACHE_GAP) {
+       if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
+               if (!newex.ee_start_lo && !newex.ee_start_hi) {
                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
                                /*
                                 * block isn't allocated yet and
@@ -3296,7 +3317,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                                goto out2;
                        }
                        /* we should allocate requested block */
-               } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
+               } else {
                        /* block is already allocated */
                        newblock = map->m_lblk
                                   - le32_to_cpu(newex.ee_block)
@@ -3305,8 +3326,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        allocated = ext4_ext_get_actual_len(&newex) -
                                (map->m_lblk - le32_to_cpu(newex.ee_block));
                        goto out;
-               } else {
-                       BUG();
                }
        }
 
@@ -3357,8 +3376,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        /* Do not put uninitialized extent in the cache */
                        if (!ext4_ext_is_uninitialized(ex)) {
                                ext4_ext_put_in_cache(inode, ee_block,
-                                                       ee_len, ee_start,
-                                                       EXT4_EXT_CACHE_EXTENT);
+                                                       ee_len, ee_start);
                                goto out;
                        }
                        ret = ext4_ext_handle_uninitialized_extents(handle,
@@ -3446,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                 * that we need to perform convertion when IO is done.
                 */
                if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-                       if (io)
+                       if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
                                io->flag = EXT4_IO_END_UNWRITTEN;
-                       else
+                               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+                       } else
                                ext4_set_inode_state(inode,
                                                     EXT4_STATE_DIO_UNWRITTEN);
                }
@@ -3456,7 +3475,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        map->m_flags |= EXT4_MAP_UNINIT;
        }
 
-       err = check_eofblocks_fl(handle, inode, map, path, ar.len);
+       err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
        if (err)
                goto out2;
 
@@ -3490,8 +3509,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
         * when it is _not_ an uninitialized extent.
         */
        if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
-               ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock,
-                                               EXT4_EXT_CACHE_EXTENT);
+               ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
                ext4_update_inode_fsync_trans(handle, inode, 1);
        } else
                ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -3518,6 +3536,12 @@ void ext4_ext_truncate(struct inode *inode)
        handle_t *handle;
        int err = 0;
 
+       /*
+        * finish any pending end_io work so we won't run the risk of
+        * converting any truncated blocks to initialized later
+        */
+       ext4_flush_completed_IO(inode);
+
        /*
         * probably first extent we're gonna free will be last in block
         */
@@ -3605,14 +3629,15 @@ static void ext4_falloc_update_inode(struct inode *inode,
 }
 
 /*
- * preallocate space for a file. This implements ext4's fallocate inode
+ * preallocate space for a file. This implements ext4's fallocate file
  * operation, which gets called from sys_fallocate system call.
  * For block-mapped files, posix_fallocate should fall back to the method
  * of writing zeroes to the required new blocks (the same behavior which is
  * expected for file systems which do not support fallocate() system call).
  */
-long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
+long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
+       struct inode *inode = file->f_path.dentry->d_inode;
        handle_t *handle;
        loff_t new_size;
        unsigned int max_blocks;
@@ -3622,6 +3647,10 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
        struct ext4_map_blocks map;
        unsigned int credits, blkbits = inode->i_blkbits;
 
+       /* We only support the FALLOC_FL_KEEP_SIZE mode */
+       if (mode & ~FALLOC_FL_KEEP_SIZE)
+               return -EOPNOTSUPP;
+
        /*
         * currently supporting (pre)allocate mode for extent-based
         * files _only_
@@ -3629,10 +3658,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
 
-       /* preallocation to directories is currently not supported */
-       if (S_ISDIR(inode->i_mode))
-               return -ENODEV;
-
        map.m_lblk = offset >> blkbits;
        /*
         * We can't just convert len to max_blocks because
@@ -3767,7 +3792,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
 
        logical =  (__u64)newex->ec_block << blksize_bits;
 
-       if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
+       if (newex->ec_start == 0) {
                pgoff_t offset;
                struct page *page;
                struct buffer_head *bh = NULL;