From: Joe Thornber Date: Thu, 22 Sep 2016 10:15:21 +0000 (-0400) Subject: dm cache metadata: add "metadata2" feature X-Git-Tag: v4.11-rc1~139^2~9 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=629d0a8a1a104187db8fbf966e4cc5cfb6aa9a3c;p=karo-tx-linux.git dm cache metadata: add "metadata2" feature If "metadata2" is provided as a table argument when creating/loading a cache target a more compact metadata format, with separate dirty bits, is used. "metadata2" improves speed of shutting down a cache target. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt index 785eab87aa71..f228604ddbcd 100644 --- a/Documentation/device-mapper/cache.txt +++ b/Documentation/device-mapper/cache.txt @@ -207,6 +207,10 @@ Optional feature arguments are: block, then the cache block is invalidated. To enable passthrough mode the cache must be clean. + metadata2 : use version 2 of the metadata. This stores the dirty bits + in a separate btree, which improves speed of shutting + down the cache. + A policy called 'default' is always registered. This is an alias for the policy we currently think is giving best all round performance. diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 9364a02e1646..0610be7846dc 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -25,7 +25,7 @@ * defines a range of metadata versions that this module can handle. */ #define MIN_CACHE_VERSION 1 -#define MAX_CACHE_VERSION 1 +#define MAX_CACHE_VERSION 2 #define CACHE_METADATA_CACHE_SIZE 64 @@ -55,6 +55,7 @@ enum mapping_bits { /* * The data on the cache is different from that on the origin. + * This flag is only used by metadata format 1. */ M_DIRTY = 2 }; @@ -93,12 +94,18 @@ struct cache_disk_superblock { __le32 write_misses; __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; + + /* + * Metadata format 2 fields. + */ + __le64 dirty_root; } __packed; struct dm_cache_metadata { atomic_t ref_count; struct list_head list; + unsigned version; struct block_device *bdev; struct dm_block_manager *bm; struct dm_space_map *metadata_sm; @@ -141,12 +148,19 @@ struct dm_cache_metadata { */ bool fail_io:1; + /* + * Metadata format 2 fields. + */ + dm_block_t dirty_root; + struct dm_disk_bitset dirty_info; + /* * These structures are used when loading metadata. They're too * big to put on the stack. */ struct dm_array_cursor mapping_cursor; struct dm_array_cursor hint_cursor; + struct dm_bitset_cursor dirty_cursor; }; /*------------------------------------------------------------------- @@ -170,6 +184,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v, static int check_metadata_version(struct cache_disk_superblock *disk_super) { uint32_t metadata_version = le32_to_cpu(disk_super->version); + if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) { DMERR("Cache metadata version %u found, but only versions between %u and %u supported.", metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION); @@ -310,6 +325,11 @@ static void __copy_sm_root(struct dm_cache_metadata *cmd, sizeof(cmd->metadata_space_map_root)); } +static bool separate_dirty_bits(struct dm_cache_metadata *cmd) +{ + return cmd->version >= 2; +} + static int __write_initial_superblock(struct dm_cache_metadata *cmd) { int r; @@ -341,7 +361,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->flags = 0; memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); - disk_super->version = cpu_to_le32(MAX_CACHE_VERSION); + disk_super->version = cpu_to_le32(cmd->version); memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); disk_super->policy_hint_size = 0; @@ -362,6 +382,9 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->write_hits = cpu_to_le32(0); disk_super->write_misses = cpu_to_le32(0); + if (separate_dirty_bits(cmd)) + disk_super->dirty_root = cpu_to_le64(cmd->dirty_root); + return dm_tm_commit(cmd->tm, sblock); } @@ -382,6 +405,13 @@ static int __format_metadata(struct dm_cache_metadata *cmd) if (r < 0) goto bad; + if (separate_dirty_bits(cmd)) { + dm_disk_bitset_init(cmd->tm, &cmd->dirty_info); + r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root); + if (r < 0) + goto bad; + } + dm_disk_bitset_init(cmd->tm, &cmd->discard_info); r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); if (r < 0) @@ -407,9 +437,10 @@ bad: static int __check_incompat_features(struct cache_disk_superblock *disk_super, struct dm_cache_metadata *cmd) { - uint32_t features; + uint32_t incompat_flags, features; - features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; + incompat_flags = le32_to_cpu(disk_super->incompat_flags); + features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; if (features) { DMERR("could not access metadata due to unsupported optional features (%lx).", (unsigned long)features); @@ -470,6 +501,7 @@ static int __open_metadata(struct dm_cache_metadata *cmd) } __setup_mapping_info(cmd); + dm_disk_bitset_init(cmd->tm, &cmd->dirty_info); dm_disk_bitset_init(cmd->tm, &cmd->discard_info); sb_flags = le32_to_cpu(disk_super->flags); cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); @@ -548,6 +580,7 @@ static unsigned long clear_clean_shutdown(unsigned long flags) static void read_superblock_fields(struct dm_cache_metadata *cmd, struct cache_disk_superblock *disk_super) { + cmd->version = le32_to_cpu(disk_super->version); cmd->flags = le32_to_cpu(disk_super->flags); cmd->root = le64_to_cpu(disk_super->mapping_root); cmd->hint_root = le64_to_cpu(disk_super->hint_root); @@ -567,6 +600,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); + if (separate_dirty_bits(cmd)) + cmd->dirty_root = le64_to_cpu(disk_super->dirty_root); + cmd->changed = false; } @@ -625,6 +661,13 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, */ BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); + if (separate_dirty_bits(cmd)) { + r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root, + &cmd->dirty_root); + if (r) + return r; + } + r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, &cmd->discard_root); if (r) @@ -649,6 +692,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, update_flags(disk_super, mutator); disk_super->mapping_root = cpu_to_le64(cmd->root); + if (separate_dirty_bits(cmd)) + disk_super->dirty_root = cpu_to_le64(cmd->dirty_root); disk_super->hint_root = cpu_to_le64(cmd->hint_root); disk_super->discard_root = cpu_to_le64(cmd->discard_root); disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); @@ -698,7 +743,8 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) static struct dm_cache_metadata *metadata_open(struct block_device *bdev, sector_t data_block_size, bool may_format_device, - size_t policy_hint_size) + size_t policy_hint_size, + unsigned metadata_version) { int r; struct dm_cache_metadata *cmd; @@ -709,6 +755,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev, return ERR_PTR(-ENOMEM); } + cmd->version = metadata_version; atomic_set(&cmd->ref_count, 1); init_rwsem(&cmd->root_lock); cmd->bdev = bdev; @@ -757,7 +804,8 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev) static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, sector_t data_block_size, bool may_format_device, - size_t policy_hint_size) + size_t policy_hint_size, + unsigned metadata_version) { struct dm_cache_metadata *cmd, *cmd2; @@ -768,7 +816,8 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, if (cmd) return cmd; - cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); + cmd = metadata_open(bdev, data_block_size, may_format_device, + policy_hint_size, metadata_version); if (!IS_ERR(cmd)) { mutex_lock(&table_lock); cmd2 = lookup(bdev); @@ -800,10 +849,11 @@ static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, sector_t data_block_size, bool may_format_device, - size_t policy_hint_size) + size_t policy_hint_size, + unsigned metadata_version) { - struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, - may_format_device, policy_hint_size); + struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device, + policy_hint_size, metadata_version); if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) { dm_cache_metadata_close(cmd); @@ -829,8 +879,8 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) /* * Checks that the given cache block is either unmapped or clean. */ -static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, - bool *result) +static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b, + bool *result) { int r; __le64 value; @@ -838,10 +888,8 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, unsigned flags; r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); - if (r) { - DMERR("block_unmapped_or_clean failed"); + if (r) return r; - } unpack_value(value, &ob, &flags); *result = !((flags & M_VALID) && (flags & M_DIRTY)); @@ -849,17 +897,19 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, return 0; } -static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, - dm_cblock_t begin, dm_cblock_t end, - bool *result) +static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd, + dm_cblock_t begin, dm_cblock_t end, + bool *result) { int r; *result = true; while (begin != end) { - r = block_unmapped_or_clean(cmd, begin, result); - if (r) + r = block_clean_combined_dirty(cmd, begin, result); + if (r) { + DMERR("block_clean_combined_dirty failed"); return r; + } if (!*result) { DMERR("cache block %llu is dirty", @@ -873,6 +923,48 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, return 0; } +static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, + dm_cblock_t begin, dm_cblock_t end, + bool *result) +{ + int r; + bool dirty_flag; + *result = true; + + // FIXME: use a cursor so we can benefit from preloading metadata. + while (begin != end) { + /* + * We assume that unmapped blocks have their dirty bit + * cleared. + */ + r = dm_bitset_test_bit(&cmd->dirty_info, cmd->dirty_root, + from_cblock(begin), &cmd->dirty_root, &dirty_flag); + if (r) + return r; + + if (dirty_flag) { + DMERR("cache block %llu is dirty", + (unsigned long long) from_cblock(begin)); + *result = false; + return 0; + } + + begin = to_cblock(from_cblock(begin) + 1); + } + + return 0; +} + +static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, + dm_cblock_t begin, dm_cblock_t end, + bool *result) +{ + if (separate_dirty_bits(cmd)) + return blocks_are_clean_separate_dirty(cmd, begin, end, result); + else + return blocks_are_clean_combined_dirty(cmd, begin, end, result); +} + static bool cmd_write_lock(struct dm_cache_metadata *cmd) { down_write(&cmd->root_lock); @@ -950,8 +1042,18 @@ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), from_cblock(new_cache_size), &null_mapping, &cmd->root); - if (!r) - cmd->cache_blocks = new_cache_size; + if (r) + goto out; + + if (separate_dirty_bits(cmd)) { + r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root, + from_cblock(cmd->cache_blocks), from_cblock(new_cache_size), + false, &cmd->dirty_root); + if (r) + goto out; + } + + cmd->cache_blocks = new_cache_size; cmd->changed = true; out: @@ -1185,11 +1287,11 @@ static bool hints_array_available(struct dm_cache_metadata *cmd, hints_array_initialized(cmd); } -static int __load_mapping(struct dm_cache_metadata *cmd, - uint64_t cb, bool hints_valid, - struct dm_array_cursor *mapping_cursor, - struct dm_array_cursor *hint_cursor, - load_mapping_fn fn, void *context) +static int __load_mapping_v1(struct dm_cache_metadata *cmd, + uint64_t cb, bool hints_valid, + struct dm_array_cursor *mapping_cursor, + struct dm_array_cursor *hint_cursor, + load_mapping_fn fn, void *context) { int r = 0; @@ -1221,6 +1323,45 @@ static int __load_mapping(struct dm_cache_metadata *cmd, return r; } +static int __load_mapping_v2(struct dm_cache_metadata *cmd, + uint64_t cb, bool hints_valid, + struct dm_array_cursor *mapping_cursor, + struct dm_array_cursor *hint_cursor, + struct dm_bitset_cursor *dirty_cursor, + load_mapping_fn fn, void *context) +{ + int r = 0; + + __le64 mapping; + __le32 hint = 0; + + __le64 *mapping_value_le; + __le32 *hint_value_le; + + dm_oblock_t oblock; + unsigned flags; + bool dirty; + + dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); + memcpy(&mapping, mapping_value_le, sizeof(mapping)); + unpack_value(mapping, &oblock, &flags); + + if (flags & M_VALID) { + if (hints_valid) { + dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); + memcpy(&hint, hint_value_le, sizeof(hint)); + } + + dirty = dm_bitset_cursor_get_value(dirty_cursor); + r = fn(context, oblock, to_cblock(cb), dirty, + le32_to_cpu(hint), hints_valid); + if (r) + DMERR("policy couldn't load cblock"); + } + + return r; +} + static int __load_mappings(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy, load_mapping_fn fn, void *context) @@ -1246,10 +1387,28 @@ static int __load_mappings(struct dm_cache_metadata *cmd, } } + if (separate_dirty_bits(cmd)) { + r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, + from_cblock(cmd->cache_blocks), + &cmd->dirty_cursor); + if (r) { + dm_array_cursor_end(&cmd->hint_cursor); + dm_array_cursor_end(&cmd->mapping_cursor); + return r; + } + } + for (cb = 0; ; cb++) { - r = __load_mapping(cmd, cb, hints_valid, - &cmd->mapping_cursor, &cmd->hint_cursor, - fn, context); + if (separate_dirty_bits(cmd)) + r = __load_mapping_v2(cmd, cb, hints_valid, + &cmd->mapping_cursor, + &cmd->hint_cursor, + &cmd->dirty_cursor, + fn, context); + else + r = __load_mapping_v1(cmd, cb, hints_valid, + &cmd->mapping_cursor, &cmd->hint_cursor, + fn, context); if (r) goto out; @@ -1272,12 +1431,23 @@ static int __load_mappings(struct dm_cache_metadata *cmd, goto out; } } + + if (separate_dirty_bits(cmd)) { + r = dm_bitset_cursor_next(&cmd->dirty_cursor); + if (r) { + DMERR("dm_bitset_cursor_next for dirty failed"); + goto out; + } + } } out: dm_array_cursor_end(&cmd->mapping_cursor); if (hints_valid) dm_array_cursor_end(&cmd->hint_cursor); + if (separate_dirty_bits(cmd)) + dm_bitset_cursor_end(&cmd->dirty_cursor); + return r; } @@ -1360,13 +1530,55 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty } -int dm_cache_set_dirty(struct dm_cache_metadata *cmd, - dm_cblock_t cblock, bool dirty) +static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits) +{ + int r; + unsigned i; + for (i = 0; i < nr_bits; i++) { + r = __dirty(cmd, to_cblock(i), test_bit(i, bits)); + if (r) + return r; + } + + return 0; +} + +static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits) +{ + int r = 0; + unsigned i; + + /* nr_bits is really just a sanity check */ + if (nr_bits != from_cblock(cmd->cache_blocks)) { + DMERR("dirty bitset is wrong size"); + return -EINVAL; + } + + for (i = 0; i < nr_bits; i++) { + if (test_bit(i, bits)) + r = dm_bitset_set_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root); + else + r = dm_bitset_clear_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root); + + if (r) + return r; + } + + cmd->changed = true; + return dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root, &cmd->dirty_root); +} + +int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd, + unsigned nr_bits, + unsigned long *bits) { int r; WRITE_LOCK(cmd); - r = __dirty(cmd, cblock, dirty); + if (separate_dirty_bits(cmd)) + r = __set_dirty_bits_v2(cmd, nr_bits, bits); + else + r = __set_dirty_bits_v1(cmd, nr_bits, bits); WRITE_UNLOCK(cmd); return r; diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 8528744195e5..4f07c08cf107 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -45,18 +45,20 @@ * As these various flags are defined they should be added to the * following masks. */ + #define DM_CACHE_FEATURE_COMPAT_SUPP 0UL #define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL #define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL /* - * Reopens or creates a new, empty metadata volume. - * Returns an ERR_PTR on failure. + * Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on + * failure. If reopening then features must match. */ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, sector_t data_block_size, bool may_format_device, - size_t policy_hint_size); + size_t policy_hint_size, + unsigned metadata_version); void dm_cache_metadata_close(struct dm_cache_metadata *cmd); @@ -91,7 +93,8 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd, load_mapping_fn fn, void *context); -int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty); +int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd, + unsigned nr_bits, unsigned long *bits); struct dm_cache_statistics { uint32_t read_hits; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 897dc72f07c9..5813d2a7eefe 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -179,6 +179,7 @@ enum cache_io_mode { struct cache_features { enum cache_metadata_mode mode; enum cache_io_mode io_mode; + unsigned metadata_version; }; struct cache_stats { @@ -2541,13 +2542,14 @@ static void init_features(struct cache_features *cf) { cf->mode = CM_WRITE; cf->io_mode = CM_IO_WRITEBACK; + cf->metadata_version = 1; } static int parse_features(struct cache_args *ca, struct dm_arg_set *as, char **error) { static struct dm_arg _args[] = { - {0, 1, "Invalid number of cache feature arguments"}, + {0, 2, "Invalid number of cache feature arguments"}, }; int r; @@ -2573,6 +2575,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, else if (!strcasecmp(arg, "passthrough")) cf->io_mode = CM_IO_PASSTHROUGH; + else if (!strcasecmp(arg, "metadata2")) + cf->metadata_version = 2; + else { *error = "Unrecognised cache feature requested"; return -EINVAL; @@ -2827,7 +2832,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, ca->block_size, may_format, - dm_cache_policy_get_hint_size(cache->policy)); + dm_cache_policy_get_hint_size(cache->policy), + ca->features.metadata_version); if (IS_ERR(cmd)) { *error = "Error creating metadata object"; r = PTR_ERR(cmd); @@ -3172,21 +3178,16 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) static int write_dirty_bitset(struct cache *cache) { - unsigned i, r; + int r; if (get_cache_mode(cache) >= CM_READ_ONLY) return -EINVAL; - for (i = 0; i < from_cblock(cache->cache_size); i++) { - r = dm_cache_set_dirty(cache->cmd, to_cblock(i), - is_dirty(cache, to_cblock(i))); - if (r) { - metadata_operation_failed(cache, "dm_cache_set_dirty", r); - return r; - } - } + r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset); + if (r) + metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r); - return 0; + return r; } static int write_discard_bitset(struct cache *cache) @@ -3562,14 +3563,19 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned) atomic_read(&cache->stats.promotion), (unsigned long) atomic_read(&cache->nr_dirty)); + if (cache->features.metadata_version == 2) + DMEMIT("2 metadata2 "); + else + DMEMIT("1 "); + if (writethrough_mode(&cache->features)) - DMEMIT("1 writethrough "); + DMEMIT("writethrough "); else if (passthrough_mode(&cache->features)) - DMEMIT("1 passthrough "); + DMEMIT("passthrough "); else if (writeback_mode(&cache->features)) - DMEMIT("1 writeback "); + DMEMIT("writeback "); else { DMERR("%s: internal error: unknown io mode: %d", @@ -3817,7 +3823,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 9, 0}, + .version = {1, 10, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr,