]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/volumes.c
Merge tag 'drm-intel-next-2017-05-29' of git://anongit.freedesktop.org/git/drm-intel...
[karo-tx-linux.git] / fs / btrfs / volumes.c
index ab8a66d852f91cb04206361a551b8c57760c9c40..017b67daa3bbf375919019e5c089b94f97d3e2a5 100644 (file)
@@ -139,6 +139,11 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+                            enum btrfs_map_op op,
+                            u64 logical, u64 *length,
+                            struct btrfs_bio **bbio_ret,
+                            int mirror_num, int need_raid_map);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
@@ -1008,14 +1013,13 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                q = bdev_get_queue(bdev);
                if (blk_queue_discard(q))
                        device->can_discard = 1;
+               if (!blk_queue_nonrot(q))
+                       fs_devices->rotating = 1;
 
                device->bdev = bdev;
                device->in_fs_metadata = 0;
                device->mode = flags;
 
-               if (!blk_queue_nonrot(bdev_get_queue(bdev)))
-                       fs_devices->rotating = 1;
-
                fs_devices->open_devices++;
                if (device->writeable &&
                    device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -2417,7 +2421,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        fs_info->free_chunk_space += device->total_bytes;
        spin_unlock(&fs_info->free_chunk_lock);
 
-       if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+       if (!blk_queue_nonrot(q))
                fs_info->fs_devices->rotating = 1;
 
        tmp = btrfs_super_total_bytes(fs_info->super_copy);
@@ -2795,10 +2799,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info,
        return ret;
 }
 
+static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
+                                       u64 logical, u64 length)
+{
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+
+       em_tree = &fs_info->mapping_tree.map_tree;
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, logical, length);
+       read_unlock(&em_tree->lock);
+
+       if (!em) {
+               btrfs_crit(fs_info, "unable to find logical %llu length %llu",
+                          logical, length);
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (em->start > logical || em->start + em->len < logical) {
+               btrfs_crit(fs_info,
+                          "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
+                          logical, length, em->start, em->start + em->len);
+               free_extent_map(em);
+               return ERR_PTR(-EINVAL);
+       }
+
+       /* callers are responsible for dropping em's ref. */
+       return em;
+}
+
 int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                       struct btrfs_fs_info *fs_info, u64 chunk_offset)
 {
-       struct extent_map_tree *em_tree;
        struct extent_map *em;
        struct map_lookup *map;
        u64 dev_extent_len = 0;
@@ -2806,23 +2838,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
        int i, ret = 0;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 
-       em_tree = &fs_info->mapping_tree.map_tree;
-
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, chunk_offset, 1);
-       read_unlock(&em_tree->lock);
-
-       if (!em || em->start > chunk_offset ||
-           em->start + em->len < chunk_offset) {
+       em = get_chunk_map(fs_info, chunk_offset, 1);
+       if (IS_ERR(em)) {
                /*
                 * This is a logic error, but we don't want to just rely on the
                 * user having built with ASSERT enabled, so if ASSERT doesn't
                 * do anything we still error out.
                 */
                ASSERT(0);
-               if (em)
-                       free_extent_map(em);
-               return -EINVAL;
+               return PTR_ERR(em);
        }
        map = em->map_lookup;
        mutex_lock(&fs_info->chunk_mutex);
@@ -3736,7 +3760,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        if (ret)
                btrfs_handle_fs_error(fs_info, ret, NULL);
 
-       atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+       clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 }
 
 /* Non-zero return value signifies invalidity */
@@ -3755,6 +3779,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                  struct btrfs_ioctl_balance_args *bargs)
 {
        struct btrfs_fs_info *fs_info = bctl->fs_info;
+       u64 meta_target, data_target;
        u64 allowed;
        int mixed = 0;
        int ret;
@@ -3851,11 +3876,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                }
        } while (read_seqretry(&fs_info->profiles_lock, seq));
 
-       if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
-               btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
+       /* if we're not converting, the target field is uninitialized */
+       meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+               bctl->meta.target : fs_info->avail_metadata_alloc_bits;
+       data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+               bctl->data.target : fs_info->avail_data_alloc_bits;
+       if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
+               btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
                btrfs_warn(fs_info,
                           "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
-                          bctl->meta.target, bctl->data.target);
+                          meta_target, data_target);
        }
 
        if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
@@ -3910,7 +3940,7 @@ out:
                __cancel_balance(fs_info);
        else {
                kfree(bctl);
-               atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+               clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
        }
        return ret;
 }
@@ -4000,7 +4030,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
        btrfs_balance_sys(leaf, item, &disk_bargs);
        btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
 
-       WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
+       WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 
        mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&fs_info->balance_mutex);
@@ -4785,7 +4815,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        stripe_size = div_u64(stripe_size, dev_stripes);
 
        /* align to BTRFS_STRIPE_LEN */
-       stripe_size = div_u64(stripe_size, raid_stripe_len);
+       stripe_size = div64_u64(stripe_size, raid_stripe_len);
        stripe_size *= raid_stripe_len;
 
        map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -4833,7 +4863,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        ret = add_extent_mapping(em_tree, em, 0);
        if (!ret) {
                list_add_tail(&em->list, &trans->transaction->pending_chunks);
-               atomic_inc(&em->refs);
+               refcount_inc(&em->refs);
        }
        write_unlock(&em_tree->lock);
        if (ret) {
@@ -4888,7 +4918,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
        struct btrfs_device *device;
        struct btrfs_chunk *chunk;
        struct btrfs_stripe *stripe;
-       struct extent_map_tree *em_tree;
        struct extent_map *em;
        struct map_lookup *map;
        size_t item_size;
@@ -4897,24 +4926,9 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
        int i = 0;
        int ret = 0;
 
-       em_tree = &fs_info->mapping_tree.map_tree;
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
-       read_unlock(&em_tree->lock);
-
-       if (!em) {
-               btrfs_crit(fs_info, "unable to find logical %Lu len %Lu",
-                          chunk_offset, chunk_size);
-               return -EINVAL;
-       }
-
-       if (em->start != chunk_offset || em->len != chunk_size) {
-               btrfs_crit(fs_info,
-                          "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu",
-                           chunk_offset, chunk_size, em->start, em->len);
-               free_extent_map(em);
-               return -EINVAL;
-       }
+       em = get_chunk_map(fs_info, chunk_offset, chunk_size);
+       if (IS_ERR(em))
+               return PTR_ERR(em);
 
        map = em->map_lookup;
        item_size = btrfs_chunk_item_size(map->num_stripes);
@@ -5055,15 +5069,12 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 {
        struct extent_map *em;
        struct map_lookup *map;
-       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        int readonly = 0;
        int miss_ndevs = 0;
        int i;
 
-       read_lock(&map_tree->map_tree.lock);
-       em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-       read_unlock(&map_tree->map_tree.lock);
-       if (!em)
+       em = get_chunk_map(fs_info, chunk_offset, 1);
+       if (IS_ERR(em))
                return 1;
 
        map = em->map_lookup;
@@ -5117,34 +5128,19 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
 
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 {
-       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        struct extent_map *em;
        struct map_lookup *map;
-       struct extent_map_tree *em_tree = &map_tree->map_tree;
        int ret;
 
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, logical, len);
-       read_unlock(&em_tree->lock);
-
-       /*
-        * We could return errors for these cases, but that could get ugly and
-        * we'd probably do the same thing which is just not do anything else
-        * and exit, so return 1 so the callers don't try to use other copies.
-        */
-       if (!em) {
-               btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
-                           logical+len);
-               return 1;
-       }
-
-       if (em->start > logical || em->start + em->len < logical) {
-               btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu",
-                          logical, logical+len, em->start,
-                          em->start + em->len);
-               free_extent_map(em);
+       em = get_chunk_map(fs_info, logical, len);
+       if (IS_ERR(em))
+               /*
+                * We could return errors for these cases, but that could get
+                * ugly and we'd probably do the same thing which is just not do
+                * anything else and exit, so return 1 so the callers don't try
+                * to use other copies.
+                */
                return 1;
-       }
 
        map = em->map_lookup;
        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
@@ -5160,7 +5156,8 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
        free_extent_map(em);
 
        btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
-       if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
+       if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
+           fs_info->dev_replace.tgtdev)
                ret++;
        btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
 
@@ -5173,15 +5170,11 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 {
        struct extent_map *em;
        struct map_lookup *map;
-       struct extent_map_tree *em_tree = &map_tree->map_tree;
        unsigned long len = fs_info->sectorsize;
 
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, logical, len);
-       read_unlock(&em_tree->lock);
-       BUG_ON(!em);
+       em = get_chunk_map(fs_info, logical, len);
+       WARN_ON(IS_ERR(em));
 
-       BUG_ON(em->start > logical || em->start + em->len < logical);
        map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                len = map->stripe_len * nr_data_stripes(map);
@@ -5189,20 +5182,16 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
        return len;
 }
 
-int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
+int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
                           u64 logical, u64 len, int mirror_num)
 {
        struct extent_map *em;
        struct map_lookup *map;
-       struct extent_map_tree *em_tree = &map_tree->map_tree;
        int ret = 0;
 
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, logical, len);
-       read_unlock(&em_tree->lock);
-       BUG_ON(!em);
+       em = get_chunk_map(fs_info, logical, len);
+       WARN_ON(IS_ERR(em));
 
-       BUG_ON(em->start > logical || em->start + em->len < logical);
        map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                ret = 1;
@@ -5295,25 +5284,353 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
                GFP_NOFS|__GFP_NOFAIL);
 
        atomic_set(&bbio->error, 0);
-       atomic_set(&bbio->refs, 1);
+       refcount_set(&bbio->refs, 1);
 
        return bbio;
 }
 
 void btrfs_get_bbio(struct btrfs_bio *bbio)
 {
-       WARN_ON(!atomic_read(&bbio->refs));
-       atomic_inc(&bbio->refs);
+       WARN_ON(!refcount_read(&bbio->refs));
+       refcount_inc(&bbio->refs);
 }
 
 void btrfs_put_bbio(struct btrfs_bio *bbio)
 {
        if (!bbio)
                return;
-       if (atomic_dec_and_test(&bbio->refs))
+       if (refcount_dec_and_test(&bbio->refs))
                kfree(bbio);
 }
 
+/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
+/*
+ * Please note that, discard won't be sent to target device of device
+ * replace.
+ */
+static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+                                        u64 logical, u64 length,
+                                        struct btrfs_bio **bbio_ret)
+{
+       struct extent_map *em;
+       struct map_lookup *map;
+       struct btrfs_bio *bbio;
+       u64 offset;
+       u64 stripe_nr;
+       u64 stripe_nr_end;
+       u64 stripe_end_offset;
+       u64 stripe_cnt;
+       u64 stripe_len;
+       u64 stripe_offset;
+       u64 num_stripes;
+       u32 stripe_index;
+       u32 factor = 0;
+       u32 sub_stripes = 0;
+       u64 stripes_per_dev = 0;
+       u32 remaining_stripes = 0;
+       u32 last_stripe = 0;
+       int ret = 0;
+       int i;
+
+       /* discard always return a bbio */
+       ASSERT(bbio_ret);
+
+       em = get_chunk_map(fs_info, logical, length);
+       if (IS_ERR(em))
+               return PTR_ERR(em);
+
+       map = em->map_lookup;
+       /* we don't discard raid56 yet */
+       if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       offset = logical - em->start;
+       length = min_t(u64, em->len - offset, length);
+
+       stripe_len = map->stripe_len;
+       /*
+        * stripe_nr counts the total number of stripes we have to stride
+        * to get to this block
+        */
+       stripe_nr = div64_u64(offset, stripe_len);
+
+       /* stripe_offset is the offset of this block in its stripe */
+       stripe_offset = offset - stripe_nr * stripe_len;
+
+       stripe_nr_end = round_up(offset + length, map->stripe_len);
+       stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
+       stripe_cnt = stripe_nr_end - stripe_nr;
+       stripe_end_offset = stripe_nr_end * map->stripe_len -
+                           (offset + length);
+       /*
+        * after this, stripe_nr is the number of stripes on this
+        * device we have to walk to find the data, and stripe_index is
+        * the number of our device in the stripe array
+        */
+       num_stripes = 1;
+       stripe_index = 0;
+       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                        BTRFS_BLOCK_GROUP_RAID10)) {
+               if (map->type & BTRFS_BLOCK_GROUP_RAID0)
+                       sub_stripes = 1;
+               else
+                       sub_stripes = map->sub_stripes;
+
+               factor = map->num_stripes / sub_stripes;
+               num_stripes = min_t(u64, map->num_stripes,
+                                   sub_stripes * stripe_cnt);
+               stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+               stripe_index *= sub_stripes;
+               stripes_per_dev = div_u64_rem(stripe_cnt, factor,
+                                             &remaining_stripes);
+               div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
+               last_stripe *= sub_stripes;
+       } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+                               BTRFS_BLOCK_GROUP_DUP)) {
+               num_stripes = map->num_stripes;
+       } else {
+               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+                                       &stripe_index);
+       }
+
+       bbio = alloc_btrfs_bio(num_stripes, 0);
+       if (!bbio) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       for (i = 0; i < num_stripes; i++) {
+               bbio->stripes[i].physical =
+                       map->stripes[stripe_index].physical +
+                       stripe_offset + stripe_nr * map->stripe_len;
+               bbio->stripes[i].dev = map->stripes[stripe_index].dev;
+
+               if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                                BTRFS_BLOCK_GROUP_RAID10)) {
+                       bbio->stripes[i].length = stripes_per_dev *
+                               map->stripe_len;
+
+                       if (i / sub_stripes < remaining_stripes)
+                               bbio->stripes[i].length +=
+                                       map->stripe_len;
+
+                       /*
+                        * Special for the first stripe and
+                        * the last stripe:
+                        *
+                        * |-------|...|-------|
+                        *     |----------|
+                        *    off     end_off
+                        */
+                       if (i < sub_stripes)
+                               bbio->stripes[i].length -=
+                                       stripe_offset;
+
+                       if (stripe_index >= last_stripe &&
+                           stripe_index <= (last_stripe +
+                                            sub_stripes - 1))
+                               bbio->stripes[i].length -=
+                                       stripe_end_offset;
+
+                       if (i == sub_stripes - 1)
+                               stripe_offset = 0;
+               } else {
+                       bbio->stripes[i].length = length;
+               }
+
+               stripe_index++;
+               if (stripe_index == map->num_stripes) {
+                       stripe_index = 0;
+                       stripe_nr++;
+               }
+       }
+
+       *bbio_ret = bbio;
+       bbio->map_type = map->type;
+       bbio->num_stripes = num_stripes;
+out:
+       free_extent_map(em);
+       return ret;
+}
+
+/*
+ * In dev-replace case, for repair case (that's the only case where the mirror
+ * is selected explicitly when calling btrfs_map_block), blocks left of the
+ * left cursor can also be read from the target drive.
+ *
+ * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
+ * array of stripes.
+ * For READ, it also needs to be supported using the same mirror number.
+ *
+ * If the requested block is not left of the left cursor, EIO is returned. This
+ * can happen because btrfs_num_copies() returns one more in the dev-replace
+ * case.
+ */
+static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+                                        u64 logical, u64 length,
+                                        u64 srcdev_devid, int *mirror_num,
+                                        u64 *physical)
+{
+       struct btrfs_bio *bbio = NULL;
+       int num_stripes;
+       int index_srcdev = 0;
+       int found = 0;
+       u64 physical_of_found = 0;
+       int i;
+       int ret = 0;
+
+       ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+                               logical, &length, &bbio, 0, 0);
+       if (ret) {
+               ASSERT(bbio == NULL);
+               return ret;
+       }
+
+       num_stripes = bbio->num_stripes;
+       if (*mirror_num > num_stripes) {
+               /*
+                * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
+                * that means that the requested area is not left of the left
+                * cursor
+                */
+               btrfs_put_bbio(bbio);
+               return -EIO;
+       }
+
+       /*
+        * process the rest of the function using the mirror_num of the source
+        * drive. Therefore look it up first.  At the end, patch the device
+        * pointer to the one of the target drive.
+        */
+       for (i = 0; i < num_stripes; i++) {
+               if (bbio->stripes[i].dev->devid != srcdev_devid)
+                       continue;
+
+               /*
+                * In case of DUP, in order to keep it simple, only add the
+                * mirror with the lowest physical address
+                */
+               if (found &&
+                   physical_of_found <= bbio->stripes[i].physical)
+                       continue;
+
+               index_srcdev = i;
+               found = 1;
+               physical_of_found = bbio->stripes[i].physical;
+       }
+
+       btrfs_put_bbio(bbio);
+
+       ASSERT(found);
+       if (!found)
+               return -EIO;
+
+       *mirror_num = index_srcdev + 1;
+       *physical = physical_of_found;
+       return ret;
+}
+
+static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+                                     struct btrfs_bio **bbio_ret,
+                                     struct btrfs_dev_replace *dev_replace,
+                                     int *num_stripes_ret, int *max_errors_ret)
+{
+       struct btrfs_bio *bbio = *bbio_ret;
+       u64 srcdev_devid = dev_replace->srcdev->devid;
+       int tgtdev_indexes = 0;
+       int num_stripes = *num_stripes_ret;
+       int max_errors = *max_errors_ret;
+       int i;
+
+       if (op == BTRFS_MAP_WRITE) {
+               int index_where_to_add;
+
+               /*
+                * duplicate the write operations while the dev replace
+                * procedure is running. Since the copying of the old disk to
+                * the new disk takes place at run time while the filesystem is
+                * mounted writable, the regular write operations to the old
+                * disk have to be duplicated to go to the new disk as well.
+                *
+                * Note that device->missing is handled by the caller, and that
+                * the write to the old disk is already set up in the stripes
+                * array.
+                */
+               index_where_to_add = num_stripes;
+               for (i = 0; i < num_stripes; i++) {
+                       if (bbio->stripes[i].dev->devid == srcdev_devid) {
+                               /* write to new disk, too */
+                               struct btrfs_bio_stripe *new =
+                                       bbio->stripes + index_where_to_add;
+                               struct btrfs_bio_stripe *old =
+                                       bbio->stripes + i;
+
+                               new->physical = old->physical;
+                               new->length = old->length;
+                               new->dev = dev_replace->tgtdev;
+                               bbio->tgtdev_map[i] = index_where_to_add;
+                               index_where_to_add++;
+                               max_errors++;
+                               tgtdev_indexes++;
+                       }
+               }
+               num_stripes = index_where_to_add;
+       } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
+               int index_srcdev = 0;
+               int found = 0;
+               u64 physical_of_found = 0;
+
+               /*
+                * During the dev-replace procedure, the target drive can also
+                * be used to read data in case it is needed to repair a corrupt
+                * block elsewhere. This is possible if the requested area is
+                * left of the left cursor. In this area, the target drive is a
+                * full copy of the source drive.
+                */
+               for (i = 0; i < num_stripes; i++) {
+                       if (bbio->stripes[i].dev->devid == srcdev_devid) {
+                               /*
+                                * In case of DUP, in order to keep it simple,
+                                * only add the mirror with the lowest physical
+                                * address
+                                */
+                               if (found &&
+                                   physical_of_found <=
+                                    bbio->stripes[i].physical)
+                                       continue;
+                               index_srcdev = i;
+                               found = 1;
+                               physical_of_found = bbio->stripes[i].physical;
+                       }
+               }
+               if (found) {
+                       struct btrfs_bio_stripe *tgtdev_stripe =
+                               bbio->stripes + num_stripes;
+
+                       tgtdev_stripe->physical = physical_of_found;
+                       tgtdev_stripe->length =
+                               bbio->stripes[index_srcdev].length;
+                       tgtdev_stripe->dev = dev_replace->tgtdev;
+                       bbio->tgtdev_map[index_srcdev] = num_stripes;
+
+                       tgtdev_indexes++;
+                       num_stripes++;
+               }
+       }
+
+       *num_stripes_ret = num_stripes;
+       *max_errors_ret = max_errors;
+       bbio->num_tgtdevs = tgtdev_indexes;
+       *bbio_ret = bbio;
+}
+
+static bool need_full_stripe(enum btrfs_map_op op)
+{
+       return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
+}
+
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                             enum btrfs_map_op op,
                             u64 logical, u64 *length,
@@ -5322,14 +5639,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 {
        struct extent_map *em;
        struct map_lookup *map;
-       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
-       struct extent_map_tree *em_tree = &map_tree->map_tree;
        u64 offset;
        u64 stripe_offset;
-       u64 stripe_end_offset;
        u64 stripe_nr;
-       u64 stripe_nr_orig;
-       u64 stripe_nr_end;
        u64 stripe_len;
        u32 stripe_index;
        int i;
@@ -5345,23 +5657,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
        u64 physical_to_patch_in_first_stripe = 0;
        u64 raid56_full_stripe_start = (u64)-1;
 
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, logical, *length);
-       read_unlock(&em_tree->lock);
-
-       if (!em) {
-               btrfs_crit(fs_info, "unable to find logical %llu len %llu",
-                       logical, *length);
-               return -EINVAL;
-       }
+       if (op == BTRFS_MAP_DISCARD)
+               return __btrfs_map_block_for_discard(fs_info, logical,
+                                                    *length, bbio_ret);
 
-       if (em->start > logical || em->start + em->len < logical) {
-               btrfs_crit(fs_info,
-                          "found a bad mapping, wanted %Lu, found %Lu-%Lu",
-                          logical, em->start, em->start + em->len);
-               free_extent_map(em);
-               return -EINVAL;
-       }
+       em = get_chunk_map(fs_info, logical, *length);
+       if (IS_ERR(em))
+               return PTR_ERR(em);
 
        map = em->map_lookup;
        offset = logical - em->start;
@@ -5400,14 +5702,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                raid56_full_stripe_start *= full_stripe_len;
        }
 
-       if (op == BTRFS_MAP_DISCARD) {
-               /* we don't discard raid56 yet */
-               if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-                       ret = -EOPNOTSUPP;
-                       goto out;
-               }
-               *length = min_t(u64, em->len - offset, *length);
-       } else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+       if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
                u64 max_len;
                /* For writes to RAID[56], allow a full stripeset across all disks.
                   For other RAID types and for RAID[56] reads, just allow a single
@@ -5438,105 +5733,28 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                btrfs_dev_replace_set_lock_blocking(dev_replace);
 
        if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
-           op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-           op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
-               /*
-                * in dev-replace case, for repair case (that's the only
-                * case where the mirror is selected explicitly when
-                * calling btrfs_map_block), blocks left of the left cursor
-                * can also be read from the target drive.
-                * For REQ_GET_READ_MIRRORS, the target drive is added as
-                * the last one to the array of stripes. For READ, it also
-                * needs to be supported using the same mirror number.
-                * If the requested block is not left of the left cursor,
-                * EIO is returned. This can happen because btrfs_num_copies()
-                * returns one more in the dev-replace case.
-                */
-               u64 tmp_length = *length;
-               struct btrfs_bio *tmp_bbio = NULL;
-               int tmp_num_stripes;
-               u64 srcdev_devid = dev_replace->srcdev->devid;
-               int index_srcdev = 0;
-               int found = 0;
-               u64 physical_of_found = 0;
-
-               ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
-                            logical, &tmp_length, &tmp_bbio, 0, 0);
-               if (ret) {
-                       WARN_ON(tmp_bbio != NULL);
-                       goto out;
-               }
-
-               tmp_num_stripes = tmp_bbio->num_stripes;
-               if (mirror_num > tmp_num_stripes) {
-                       /*
-                        * BTRFS_MAP_GET_READ_MIRRORS does not contain this
-                        * mirror, that means that the requested area
-                        * is not left of the left cursor
-                        */
-                       ret = -EIO;
-                       btrfs_put_bbio(tmp_bbio);
-                       goto out;
-               }
-
-               /*
-                * process the rest of the function using the mirror_num
-                * of the source drive. Therefore look it up first.
-                * At the end, patch the device pointer to the one of the
-                * target drive.
-                */
-               for (i = 0; i < tmp_num_stripes; i++) {
-                       if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
-                               continue;
-
-                       /*
-                        * In case of DUP, in order to keep it simple, only add
-                        * the mirror with the lowest physical address
-                        */
-                       if (found &&
-                           physical_of_found <= tmp_bbio->stripes[i].physical)
-                               continue;
-
-                       index_srcdev = i;
-                       found = 1;
-                       physical_of_found = tmp_bbio->stripes[i].physical;
-               }
-
-               btrfs_put_bbio(tmp_bbio);
-
-               if (!found) {
-                       WARN_ON(1);
-                       ret = -EIO;
+           !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
+               ret = get_extra_mirror_from_replace(fs_info, logical, *length,
+                                                   dev_replace->srcdev->devid,
+                                                   &mirror_num,
+                                           &physical_to_patch_in_first_stripe);
+               if (ret)
                        goto out;
-               }
-
-               mirror_num = index_srcdev + 1;
-               patch_the_first_stripe_for_dev_replace = 1;
-               physical_to_patch_in_first_stripe = physical_of_found;
+               else
+                       patch_the_first_stripe_for_dev_replace = 1;
        } else if (mirror_num > map->num_stripes) {
                mirror_num = 0;
        }
 
        num_stripes = 1;
        stripe_index = 0;
-       stripe_nr_orig = stripe_nr;
-       stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
-       stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
-       stripe_end_offset = stripe_nr_end * map->stripe_len -
-                           (offset + *length);
-
        if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
-               if (op == BTRFS_MAP_DISCARD)
-                       num_stripes = min_t(u64, map->num_stripes,
-                                           stripe_nr_end - stripe_nr_orig);
                stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
                                &stripe_index);
-               if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-                   op != BTRFS_MAP_GET_READ_MIRRORS)
+               if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS)
                        mirror_num = 1;
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-               if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
-                   op == BTRFS_MAP_GET_READ_MIRRORS)
+               if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -5549,8 +5767,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-               if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
-                   op == BTRFS_MAP_GET_READ_MIRRORS) {
+               if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) {
                        num_stripes = map->num_stripes;
                } else if (mirror_num) {
                        stripe_index = mirror_num - 1;
@@ -5566,10 +5783,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
                if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
                        num_stripes = map->sub_stripes;
-               else if (op == BTRFS_MAP_DISCARD)
-                       num_stripes = min_t(u64, map->sub_stripes *
-                                           (stripe_nr_end - stripe_nr_orig),
-                                           map->num_stripes);
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
                else {
@@ -5587,7 +5800,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                    (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS ||
                     mirror_num > 1)) {
                        /* push stripe_nr back to the start of the full stripe */
-                       stripe_nr = div_u64(raid56_full_stripe_start,
+                       stripe_nr = div64_u64(raid56_full_stripe_start,
                                        stripe_len * nr_data_stripes(map));
 
                        /* RAID[56] write or recovery. Return all stripes */
@@ -5612,8 +5825,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                        /* We distribute the parity blocks across stripes */
                        div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
                                        &stripe_index);
-                       if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-                           op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1)
+                       if ((op != BTRFS_MAP_WRITE &&
+                            op != BTRFS_MAP_GET_READ_MIRRORS) &&
+                           mirror_num <= 1)
                                mirror_num = 1;
                }
        } else {
@@ -5635,8 +5849,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
        }
 
        num_alloc_stripes = num_stripes;
-       if (dev_replace_is_ongoing) {
-               if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD)
+       if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
+               if (op == BTRFS_MAP_WRITE)
                        num_alloc_stripes <<= 1;
                if (op == BTRFS_MAP_GET_READ_MIRRORS)
                        num_alloc_stripes++;
@@ -5648,14 +5862,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                ret = -ENOMEM;
                goto out;
        }
-       if (dev_replace_is_ongoing)
+       if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
                bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
 
        /* build raid_map */
-       if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
-           need_raid_map &&
-           ((op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) ||
-           mirror_num > 1)) {
+       if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
+           (need_full_stripe(op) || mirror_num > 1)) {
                u64 tmp;
                unsigned rot;
 
@@ -5679,173 +5891,27 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                                RAID6_Q_STRIPE;
        }
 
-       if (op == BTRFS_MAP_DISCARD) {
-               u32 factor = 0;
-               u32 sub_stripes = 0;
-               u64 stripes_per_dev = 0;
-               u32 remaining_stripes = 0;
-               u32 last_stripe = 0;
 
-               if (map->type &
-                   (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
-                       if (map->type & BTRFS_BLOCK_GROUP_RAID0)
-                               sub_stripes = 1;
-                       else
-                               sub_stripes = map->sub_stripes;
-
-                       factor = map->num_stripes / sub_stripes;
-                       stripes_per_dev = div_u64_rem(stripe_nr_end -
-                                                     stripe_nr_orig,
-                                                     factor,
-                                                     &remaining_stripes);
-                       div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
-                       last_stripe *= sub_stripes;
-               }
-
-               for (i = 0; i < num_stripes; i++) {
-                       bbio->stripes[i].physical =
-                               map->stripes[stripe_index].physical +
-                               stripe_offset + stripe_nr * map->stripe_len;
-                       bbio->stripes[i].dev = map->stripes[stripe_index].dev;
-
-                       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
-                                        BTRFS_BLOCK_GROUP_RAID10)) {
-                               bbio->stripes[i].length = stripes_per_dev *
-                                                         map->stripe_len;
-
-                               if (i / sub_stripes < remaining_stripes)
-                                       bbio->stripes[i].length +=
-                                               map->stripe_len;
-
-                               /*
-                                * Special for the first stripe and
-                                * the last stripe:
-                                *
-                                * |-------|...|-------|
-                                *     |----------|
-                                *    off     end_off
-                                */
-                               if (i < sub_stripes)
-                                       bbio->stripes[i].length -=
-                                               stripe_offset;
-
-                               if (stripe_index >= last_stripe &&
-                                   stripe_index <= (last_stripe +
-                                                    sub_stripes - 1))
-                                       bbio->stripes[i].length -=
-                                               stripe_end_offset;
-
-                               if (i == sub_stripes - 1)
-                                       stripe_offset = 0;
-                       } else
-                               bbio->stripes[i].length = *length;
-
-                       stripe_index++;
-                       if (stripe_index == map->num_stripes) {
-                               /* This could only happen for RAID0/10 */
-                               stripe_index = 0;
-                               stripe_nr++;
-                       }
-               }
-       } else {
-               for (i = 0; i < num_stripes; i++) {
-                       bbio->stripes[i].physical =
-                               map->stripes[stripe_index].physical +
-                               stripe_offset +
-                               stripe_nr * map->stripe_len;
-                       bbio->stripes[i].dev =
-                               map->stripes[stripe_index].dev;
-                       stripe_index++;
-               }
+       for (i = 0; i < num_stripes; i++) {
+               bbio->stripes[i].physical =
+                       map->stripes[stripe_index].physical +
+                       stripe_offset +
+                       stripe_nr * map->stripe_len;
+               bbio->stripes[i].dev =
+                       map->stripes[stripe_index].dev;
+               stripe_index++;
        }
 
-       if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
+       if (need_full_stripe(op))
                max_errors = btrfs_chunk_max_errors(map);
 
        if (bbio->raid_map)
                sort_parity_stripes(bbio, num_stripes);
 
-       tgtdev_indexes = 0;
-       if (dev_replace_is_ongoing &&
-          (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) &&
-           dev_replace->tgtdev != NULL) {
-               int index_where_to_add;
-               u64 srcdev_devid = dev_replace->srcdev->devid;
-
-               /*
-                * duplicate the write operations while the dev replace
-                * procedure is running. Since the copying of the old disk
-                * to the new disk takes place at run time while the
-                * filesystem is mounted writable, the regular write
-                * operations to the old disk have to be duplicated to go
-                * to the new disk as well.
-                * Note that device->missing is handled by the caller, and
-                * that the write to the old disk is already set up in the
-                * stripes array.
-                */
-               index_where_to_add = num_stripes;
-               for (i = 0; i < num_stripes; i++) {
-                       if (bbio->stripes[i].dev->devid == srcdev_devid) {
-                               /* write to new disk, too */
-                               struct btrfs_bio_stripe *new =
-                                       bbio->stripes + index_where_to_add;
-                               struct btrfs_bio_stripe *old =
-                                       bbio->stripes + i;
-
-                               new->physical = old->physical;
-                               new->length = old->length;
-                               new->dev = dev_replace->tgtdev;
-                               bbio->tgtdev_map[i] = index_where_to_add;
-                               index_where_to_add++;
-                               max_errors++;
-                               tgtdev_indexes++;
-                       }
-               }
-               num_stripes = index_where_to_add;
-       } else if (dev_replace_is_ongoing &&
-                  op == BTRFS_MAP_GET_READ_MIRRORS &&
-                  dev_replace->tgtdev != NULL) {
-               u64 srcdev_devid = dev_replace->srcdev->devid;
-               int index_srcdev = 0;
-               int found = 0;
-               u64 physical_of_found = 0;
-
-               /*
-                * During the dev-replace procedure, the target drive can
-                * also be used to read data in case it is needed to repair
-                * a corrupt block elsewhere. This is possible if the
-                * requested area is left of the left cursor. In this area,
-                * the target drive is a full copy of the source drive.
-                */
-               for (i = 0; i < num_stripes; i++) {
-                       if (bbio->stripes[i].dev->devid == srcdev_devid) {
-                               /*
-                                * In case of DUP, in order to keep it
-                                * simple, only add the mirror with the
-                                * lowest physical address
-                                */
-                               if (found &&
-                                   physical_of_found <=
-                                    bbio->stripes[i].physical)
-                                       continue;
-                               index_srcdev = i;
-                               found = 1;
-                               physical_of_found = bbio->stripes[i].physical;
-                       }
-               }
-               if (found) {
-                       struct btrfs_bio_stripe *tgtdev_stripe =
-                               bbio->stripes + num_stripes;
-
-                       tgtdev_stripe->physical = physical_of_found;
-                       tgtdev_stripe->length =
-                               bbio->stripes[index_srcdev].length;
-                       tgtdev_stripe->dev = dev_replace->tgtdev;
-                       bbio->tgtdev_map[index_srcdev] = num_stripes;
-
-                       tgtdev_indexes++;
-                       num_stripes++;
-               }
+       if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
+           need_full_stripe(op)) {
+               handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
+                                         &max_errors);
        }
 
        *bbio_ret = bbio;
@@ -5853,7 +5919,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
        bbio->num_stripes = num_stripes;
        bbio->max_errors = max_errors;
        bbio->mirror_num = mirror_num;
-       bbio->num_tgtdevs = tgtdev_indexes;
 
        /*
         * this is the case that REQ_READ && dev_replace_is_ongoing &&
@@ -5886,19 +5951,15 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 /* For Scrub/replace */
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                     u64 logical, u64 *length,
-                    struct btrfs_bio **bbio_ret, int mirror_num,
-                    int need_raid_map)
+                    struct btrfs_bio **bbio_ret)
 {
-       return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
-                                mirror_num, need_raid_map);
+       return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
 }
 
 int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
                     u64 chunk_start, u64 physical, u64 devid,
                     u64 **logical, int *naddrs, int *stripe_len)
 {
-       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
-       struct extent_map_tree *em_tree = &map_tree->map_tree;
        struct extent_map *em;
        struct map_lookup *map;
        u64 *buf;
@@ -5908,24 +5969,11 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
        u64 rmap_len;
        int i, j, nr = 0;
 
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, chunk_start, 1);
-       read_unlock(&em_tree->lock);
-
-       if (!em) {
-               btrfs_err(fs_info, "couldn't find em for chunk %Lu",
-                       chunk_start);
+       em = get_chunk_map(fs_info, chunk_start, 1);
+       if (IS_ERR(em))
                return -EIO;
-       }
 
-       if (em->start != chunk_start) {
-               btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu",
-                      em->start, chunk_start);
-               free_extent_map(em);
-               return -EIO;
-       }
        map = em->map_lookup;
-
        length = em->len;
        rmap_len = map->stripe_len;
 
@@ -5949,7 +5997,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
                        continue;
 
                stripe_nr = physical - map->stripes[i].physical;
-               stripe_nr = div_u64(stripe_nr, map->stripe_len);
+               stripe_nr = div64_u64(stripe_nr, map->stripe_len);
 
                if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
                        stripe_nr = stripe_nr * map->num_stripes + i;