]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - fs/ocfs2/move_extents.c
Merge branch 'for-usb-next' of git://git.kernel.org/pub/scm/linux/kernel/git/sarah...
[mv-sheeva.git] / fs / ocfs2 / move_extents.c
index 800552168d8a8052c56ebb0ac95c93fc1964a39e..cd9427023d2e72dd1d5109113569f7dee6eb1066 100644 (file)
@@ -44,6 +44,7 @@ struct ocfs2_move_extents_context {
        struct inode *inode;
        struct file *file;
        int auto_defrag;
+       int partial;
        int credits;
        u32 new_phys_cpos;
        u32 clusters_moved;
@@ -219,11 +220,14 @@ out:
 /*
  * Using one journal handle to guarantee the data consistency in case
  * crash happens anywhere.
+ *
+ *  XXX: defrag can end up with finishing partial extent as requested,
+ * due to not enough contiguous clusters can be found in allocator.
  */
 static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
-                              u32 cpos, u32 phys_cpos, u32 len, int ext_flags)
+                              u32 cpos, u32 phys_cpos, u32 *len, int ext_flags)
 {
-       int ret, credits = 0, extra_blocks = 0;
+       int ret, credits = 0, extra_blocks = 0, partial = context->partial;
        handle_t *handle;
        struct inode *inode = context->inode;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -232,7 +236,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
        u32 new_phys_cpos, new_len;
        u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 
-       if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) {
+       if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
 
                BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
                         OCFS2_HAS_REFCOUNT_FL));
@@ -249,7 +253,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
                ret = ocfs2_prepare_refcount_change_for_del(inode,
                                                        context->refcount_loc,
                                                        phys_blkno,
-                                                       len,
+                                                       *len,
                                                        &credits,
                                                        &extra_blocks);
                if (ret) {
@@ -258,7 +262,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
                }
        }
 
-       ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
+       ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
                                                 &context->meta_ac,
                                                 &context->data_ac,
                                                 extra_blocks, &credits);
@@ -291,7 +295,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
                goto out_unlock_mutex;
        }
 
-       ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, len,
+       ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len,
                                     &new_phys_cpos, &new_len);
        if (ret) {
                mlog_errno(ret);
@@ -299,33 +303,36 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
        }
 
        /*
-        * we're not quite patient here to make multiple attempts for claiming
-        * enough clusters, failure to claim clusters per-requested is not a
-        * disaster though, it can only mean partial range of defragmentation
-        * or extent movements gets gone, users anyway is able to have another
-        * try as they wish anytime, since they're going to be returned a
-        * '-ENOSPC' and completed length of this movement.
+        * allowing partial extent moving is kind of 'pros and cons', it makes
+        * whole defragmentation less likely to fail, on the contrary, the bad
+        * thing is it may make the fs even more fragmented after moving, let
+        * userspace make a good decision here.
         */
-       if (new_len != len) {
-               mlog(0, "len_claimed: %u, len: %u\n", new_len, len);
-               context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
-               ret = -ENOSPC;
-               goto out_commit;
+       if (new_len != *len) {
+               mlog(0, "len_claimed: %u, len: %u\n", new_len, *len);
+               if (!partial) {
+                       context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
+                       ret = -ENOSPC;
+                       goto out_commit;
+               }
        }
 
        mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos,
             phys_cpos, new_phys_cpos);
 
-       ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos,
+       ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos,
                                  new_phys_cpos, ext_flags);
        if (ret)
                mlog_errno(ret);
 
+       if (partial && (new_len != *len))
+               *len = new_len;
+
        /*
         * Here we should write the new page out first if we are
         * in write-back mode.
         */
-       ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len);
+       ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len);
        if (ret)
                mlog_errno(ret);
 
@@ -361,7 +368,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
                                         int *vict_bit,
                                         struct buffer_head **ret_bh)
 {
-       int ret, i, blocks_per_unit = 1;
+       int ret, i, bits_per_unit = 0;
        u64 blkno;
        char namebuf[40];
 
@@ -391,14 +398,14 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
        rec = &(cl->cl_recs[0]);
 
        if (type == GLOBAL_BITMAP_SYSTEM_INODE)
-               blocks_per_unit <<= (osb->s_clustersize_bits -
-                                               inode->i_sb->s_blocksize_bits);
+               bits_per_unit = osb->s_clustersize_bits -
+                                       inode->i_sb->s_blocksize_bits;
        /*
         * 'vict_blkno' was out of the valid range.
         */
        if ((vict_blkno < le64_to_cpu(rec->c_blkno)) ||
-           (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) *
-                               blocks_per_unit))) {
+           (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
+                               bits_per_unit))) {
                ret = -EINVAL;
                goto out;
        }
@@ -434,8 +441,8 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
                                                le16_to_cpu(bg->bg_bits))) {
 
                                *ret_bh = gd_bh;
-                               *vict_bit = (vict_blkno - blkno) /
-                                                       blocks_per_unit;
+                               *vict_bit = (vict_blkno - blkno) >>
+                                                       bits_per_unit;
                                mlog(0, "find the victim group: #%llu, "
                                     "total_bits: %u, vict_bit: %u\n",
                                     blkno, le16_to_cpu(bg->bg_bits),
@@ -465,11 +472,23 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
        int ret, goal_bit = 0;
 
        struct buffer_head *gd_bh = NULL;
-       struct ocfs2_group_desc *bg;
+       struct ocfs2_group_desc *bg = NULL;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        int c_to_b = 1 << (osb->s_clustersize_bits -
                                        inode->i_sb->s_blocksize_bits);
 
+       /*
+        * make goal become cluster aligned.
+        */
+       range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb,
+                                                     range->me_goal);
+       /*
+        * moving goal is not allowd to start with a group desc blok(#0 blk)
+        * let's compromise to the latter cluster.
+        */
+       if (range->me_goal == le64_to_cpu(bg->bg_blkno))
+               range->me_goal += c_to_b;
+
        /*
         * validate goal sits within global_bitmap, and return the victim
         * group desc
@@ -483,19 +502,6 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
 
        bg = (struct ocfs2_group_desc *)gd_bh->b_data;
 
-       /*
-        * make goal become cluster aligned.
-        */
-       if (range->me_goal % c_to_b)
-               range->me_goal = range->me_goal / c_to_b * c_to_b;
-
-       /*
-        * moving goal is not allowd to start with a group desc blok(#0 blk)
-        * let's compromise to the latter cluster.
-        */
-       if (range->me_goal == le64_to_cpu(bg->bg_blkno))
-               range->me_goal += c_to_b;
-
        /*
         * movement is not gonna cross two groups.
         */
@@ -872,9 +878,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
        else
                len_to_move = 0;
 
-       if (do_defrag)
+       if (do_defrag) {
                defrag_thresh = range->me_threshold >> osb->s_clustersize_bits;
-       else
+               if (defrag_thresh <= 1)
+                       goto done;
+       } else
                new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
                                                         range->me_goal);
 
@@ -926,7 +934,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
                             cpos, phys_cpos, alloc_size, len_defraged);
 
                        ret = ocfs2_defrag_extent(context, cpos, phys_cpos,
-                                                 alloc_size, flags);
+                                                 &alloc_size, flags);
                } else {
                        ret = ocfs2_move_extent(context, cpos, phys_cpos,
                                                &new_phys_cpos, alloc_size,
@@ -946,6 +954,7 @@ next:
                len_to_move -= alloc_size;
        }
 
+done:
        range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE;
 
 out:
@@ -1094,13 +1103,19 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
 
        if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
                context->auto_defrag = 1;
+               /*
+                * ok, the default theshold for the defragmentation
+                * is 1M, since our maximum clustersize was 1M also.
+                * any thought?
+                */
                if (!range.me_threshold)
-                       /*
-                        * ok, the default theshold for the defragmentation
-                        * is 1M, since our maximum clustersize was 1M also.
-                        * any thought?
-                        */
                        range.me_threshold = 1024 * 1024;
+
+               if (range.me_threshold > i_size_read(inode))
+                       range.me_threshold = i_size_read(inode);
+
+               if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
+                       context->partial = 1;
        } else {
                /*
                 * first best-effort attempt to validate and adjust the goal