]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/md/dm-cache-metadata.c
dm transaction manager: fix corruption due to non-atomic transaction commit
[karo-tx-linux.git] / drivers / md / dm-cache-metadata.c
1 /*
2  * Copyright (C) 2012 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6
7 #include "dm-cache-metadata.h"
8
9 #include "persistent-data/dm-array.h"
10 #include "persistent-data/dm-bitset.h"
11 #include "persistent-data/dm-space-map.h"
12 #include "persistent-data/dm-space-map-disk.h"
13 #include "persistent-data/dm-transaction-manager.h"
14
15 #include <linux/device-mapper.h>
16
17 /*----------------------------------------------------------------*/
18
19 #define DM_MSG_PREFIX   "cache metadata"
20
21 #define CACHE_SUPERBLOCK_MAGIC 06142003
22 #define CACHE_SUPERBLOCK_LOCATION 0
23
24 /*
25  * defines a range of metadata versions that this module can handle.
26  */
27 #define MIN_CACHE_VERSION 1
28 #define MAX_CACHE_VERSION 1
29
30 #define CACHE_METADATA_CACHE_SIZE 64
31
32 /*
33  *  3 for btree insert +
34  *  2 for btree lookup used within space map
35  */
36 #define CACHE_MAX_CONCURRENT_LOCKS 5
37 #define SPACE_MAP_ROOT_SIZE 128
38
39 enum superblock_flag_bits {
40         /* for spotting crashes that would invalidate the dirty bitset */
41         CLEAN_SHUTDOWN,
42 };
43
44 /*
45  * Each mapping from cache block -> origin block carries a set of flags.
46  */
47 enum mapping_bits {
48         /*
49          * A valid mapping.  Because we're using an array we clear this
50          * flag for an non existant mapping.
51          */
52         M_VALID = 1,
53
54         /*
55          * The data on the cache is different from that on the origin.
56          */
57         M_DIRTY = 2
58 };
59
60 struct cache_disk_superblock {
61         __le32 csum;
62         __le32 flags;
63         __le64 blocknr;
64
65         __u8 uuid[16];
66         __le64 magic;
67         __le32 version;
68
69         __u8 policy_name[CACHE_POLICY_NAME_SIZE];
70         __le32 policy_hint_size;
71
72         __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
73         __le64 mapping_root;
74         __le64 hint_root;
75
76         __le64 discard_root;
77         __le64 discard_block_size;
78         __le64 discard_nr_blocks;
79
80         __le32 data_block_size;
81         __le32 metadata_block_size;
82         __le32 cache_blocks;
83
84         __le32 compat_flags;
85         __le32 compat_ro_flags;
86         __le32 incompat_flags;
87
88         __le32 read_hits;
89         __le32 read_misses;
90         __le32 write_hits;
91         __le32 write_misses;
92
93         __le32 policy_version[CACHE_POLICY_VERSION_SIZE];
94 } __packed;
95
96 struct dm_cache_metadata {
97         struct block_device *bdev;
98         struct dm_block_manager *bm;
99         struct dm_space_map *metadata_sm;
100         struct dm_transaction_manager *tm;
101
102         struct dm_array_info info;
103         struct dm_array_info hint_info;
104         struct dm_disk_bitset discard_info;
105
106         struct rw_semaphore root_lock;
107         dm_block_t root;
108         dm_block_t hint_root;
109         dm_block_t discard_root;
110
111         sector_t discard_block_size;
112         dm_oblock_t discard_nr_blocks;
113
114         sector_t data_block_size;
115         dm_cblock_t cache_blocks;
116         bool changed:1;
117         bool clean_when_opened:1;
118
119         char policy_name[CACHE_POLICY_NAME_SIZE];
120         unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
121         size_t policy_hint_size;
122         struct dm_cache_statistics stats;
123 };
124
125 /*-------------------------------------------------------------------
126  * superblock validator
127  *-----------------------------------------------------------------*/
128
129 #define SUPERBLOCK_CSUM_XOR 9031977
130
131 static void sb_prepare_for_write(struct dm_block_validator *v,
132                                  struct dm_block *b,
133                                  size_t sb_block_size)
134 {
135         struct cache_disk_superblock *disk_super = dm_block_data(b);
136
137         disk_super->blocknr = cpu_to_le64(dm_block_location(b));
138         disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
139                                                       sb_block_size - sizeof(__le32),
140                                                       SUPERBLOCK_CSUM_XOR));
141 }
142
143 static int check_metadata_version(struct cache_disk_superblock *disk_super)
144 {
145         uint32_t metadata_version = le32_to_cpu(disk_super->version);
146         if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
147                 DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
148                       metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
149                 return -EINVAL;
150         }
151
152         return 0;
153 }
154
155 static int sb_check(struct dm_block_validator *v,
156                     struct dm_block *b,
157                     size_t sb_block_size)
158 {
159         struct cache_disk_superblock *disk_super = dm_block_data(b);
160         __le32 csum_le;
161
162         if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
163                 DMERR("sb_check failed: blocknr %llu: wanted %llu",
164                       le64_to_cpu(disk_super->blocknr),
165                       (unsigned long long)dm_block_location(b));
166                 return -ENOTBLK;
167         }
168
169         if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
170                 DMERR("sb_check failed: magic %llu: wanted %llu",
171                       le64_to_cpu(disk_super->magic),
172                       (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
173                 return -EILSEQ;
174         }
175
176         csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
177                                              sb_block_size - sizeof(__le32),
178                                              SUPERBLOCK_CSUM_XOR));
179         if (csum_le != disk_super->csum) {
180                 DMERR("sb_check failed: csum %u: wanted %u",
181                       le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
182                 return -EILSEQ;
183         }
184
185         return check_metadata_version(disk_super);
186 }
187
188 static struct dm_block_validator sb_validator = {
189         .name = "superblock",
190         .prepare_for_write = sb_prepare_for_write,
191         .check = sb_check
192 };
193
194 /*----------------------------------------------------------------*/
195
196 static int superblock_read_lock(struct dm_cache_metadata *cmd,
197                                 struct dm_block **sblock)
198 {
199         return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
200                                &sb_validator, sblock);
201 }
202
203 static int superblock_lock_zero(struct dm_cache_metadata *cmd,
204                                 struct dm_block **sblock)
205 {
206         return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
207                                      &sb_validator, sblock);
208 }
209
210 static int superblock_lock(struct dm_cache_metadata *cmd,
211                            struct dm_block **sblock)
212 {
213         return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
214                                 &sb_validator, sblock);
215 }
216
217 /*----------------------------------------------------------------*/
218
219 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
220 {
221         int r;
222         unsigned i;
223         struct dm_block *b;
224         __le64 *data_le, zero = cpu_to_le64(0);
225         unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
226
227         /*
228          * We can't use a validator here - it may be all zeroes.
229          */
230         r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
231         if (r)
232                 return r;
233
234         data_le = dm_block_data(b);
235         *result = true;
236         for (i = 0; i < sb_block_size; i++) {
237                 if (data_le[i] != zero) {
238                         *result = false;
239                         break;
240                 }
241         }
242
243         return dm_bm_unlock(b);
244 }
245
246 static void __setup_mapping_info(struct dm_cache_metadata *cmd)
247 {
248         struct dm_btree_value_type vt;
249
250         vt.context = NULL;
251         vt.size = sizeof(__le64);
252         vt.inc = NULL;
253         vt.dec = NULL;
254         vt.equal = NULL;
255         dm_array_info_init(&cmd->info, cmd->tm, &vt);
256
257         if (cmd->policy_hint_size) {
258                 vt.size = sizeof(__le32);
259                 dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
260         }
261 }
262
263 static int __write_initial_superblock(struct dm_cache_metadata *cmd)
264 {
265         int r;
266         struct dm_block *sblock;
267         size_t metadata_len;
268         struct cache_disk_superblock *disk_super;
269         sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
270
271         /* FIXME: see if we can lose the max sectors limit */
272         if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
273                 bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
274
275         r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
276         if (r < 0)
277                 return r;
278
279         r = dm_tm_pre_commit(cmd->tm);
280         if (r < 0)
281                 return r;
282
283         r = superblock_lock_zero(cmd, &sblock);
284         if (r)
285                 return r;
286
287         disk_super = dm_block_data(sblock);
288         disk_super->flags = 0;
289         memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
290         disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
291         disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
292         memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
293         memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
294         disk_super->policy_hint_size = 0;
295
296         r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
297                             metadata_len);
298         if (r < 0)
299                 goto bad_locked;
300
301         disk_super->mapping_root = cpu_to_le64(cmd->root);
302         disk_super->hint_root = cpu_to_le64(cmd->hint_root);
303         disk_super->discard_root = cpu_to_le64(cmd->discard_root);
304         disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
305         disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
306         disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
307         disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
308         disk_super->cache_blocks = cpu_to_le32(0);
309
310         disk_super->read_hits = cpu_to_le32(0);
311         disk_super->read_misses = cpu_to_le32(0);
312         disk_super->write_hits = cpu_to_le32(0);
313         disk_super->write_misses = cpu_to_le32(0);
314
315         return dm_tm_commit(cmd->tm, sblock);
316
317 bad_locked:
318         dm_bm_unlock(sblock);
319         return r;
320 }
321
322 static int __format_metadata(struct dm_cache_metadata *cmd)
323 {
324         int r;
325
326         r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
327                                  &cmd->tm, &cmd->metadata_sm);
328         if (r < 0) {
329                 DMERR("tm_create_with_sm failed");
330                 return r;
331         }
332
333         __setup_mapping_info(cmd);
334
335         r = dm_array_empty(&cmd->info, &cmd->root);
336         if (r < 0)
337                 goto bad;
338
339         dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
340
341         r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
342         if (r < 0)
343                 goto bad;
344
345         cmd->discard_block_size = 0;
346         cmd->discard_nr_blocks = 0;
347
348         r = __write_initial_superblock(cmd);
349         if (r)
350                 goto bad;
351
352         cmd->clean_when_opened = true;
353         return 0;
354
355 bad:
356         dm_tm_destroy(cmd->tm);
357         dm_sm_destroy(cmd->metadata_sm);
358
359         return r;
360 }
361
362 static int __check_incompat_features(struct cache_disk_superblock *disk_super,
363                                      struct dm_cache_metadata *cmd)
364 {
365         uint32_t features;
366
367         features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
368         if (features) {
369                 DMERR("could not access metadata due to unsupported optional features (%lx).",
370                       (unsigned long)features);
371                 return -EINVAL;
372         }
373
374         /*
375          * Check for read-only metadata to skip the following RDWR checks.
376          */
377         if (get_disk_ro(cmd->bdev->bd_disk))
378                 return 0;
379
380         features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
381         if (features) {
382                 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
383                       (unsigned long)features);
384                 return -EINVAL;
385         }
386
387         return 0;
388 }
389
390 static int __open_metadata(struct dm_cache_metadata *cmd)
391 {
392         int r;
393         struct dm_block *sblock;
394         struct cache_disk_superblock *disk_super;
395         unsigned long sb_flags;
396
397         r = superblock_read_lock(cmd, &sblock);
398         if (r < 0) {
399                 DMERR("couldn't read lock superblock");
400                 return r;
401         }
402
403         disk_super = dm_block_data(sblock);
404
405         r = __check_incompat_features(disk_super, cmd);
406         if (r < 0)
407                 goto bad;
408
409         r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
410                                disk_super->metadata_space_map_root,
411                                sizeof(disk_super->metadata_space_map_root),
412                                &cmd->tm, &cmd->metadata_sm);
413         if (r < 0) {
414                 DMERR("tm_open_with_sm failed");
415                 goto bad;
416         }
417
418         __setup_mapping_info(cmd);
419         dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
420         sb_flags = le32_to_cpu(disk_super->flags);
421         cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
422         return dm_bm_unlock(sblock);
423
424 bad:
425         dm_bm_unlock(sblock);
426         return r;
427 }
428
429 static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
430                                      bool format_device)
431 {
432         int r;
433         bool unformatted = false;
434
435         r = __superblock_all_zeroes(cmd->bm, &unformatted);
436         if (r)
437                 return r;
438
439         if (unformatted)
440                 return format_device ? __format_metadata(cmd) : -EPERM;
441
442         return __open_metadata(cmd);
443 }
444
445 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
446                                             bool may_format_device)
447 {
448         int r;
449         cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
450                                           CACHE_METADATA_CACHE_SIZE,
451                                           CACHE_MAX_CONCURRENT_LOCKS);
452         if (IS_ERR(cmd->bm)) {
453                 DMERR("could not create block manager");
454                 return PTR_ERR(cmd->bm);
455         }
456
457         r = __open_or_format_metadata(cmd, may_format_device);
458         if (r)
459                 dm_block_manager_destroy(cmd->bm);
460
461         return r;
462 }
463
464 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
465 {
466         dm_sm_destroy(cmd->metadata_sm);
467         dm_tm_destroy(cmd->tm);
468         dm_block_manager_destroy(cmd->bm);
469 }
470
471 typedef unsigned long (*flags_mutator)(unsigned long);
472
473 static void update_flags(struct cache_disk_superblock *disk_super,
474                          flags_mutator mutator)
475 {
476         uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
477         disk_super->flags = cpu_to_le32(sb_flags);
478 }
479
480 static unsigned long set_clean_shutdown(unsigned long flags)
481 {
482         set_bit(CLEAN_SHUTDOWN, &flags);
483         return flags;
484 }
485
486 static unsigned long clear_clean_shutdown(unsigned long flags)
487 {
488         clear_bit(CLEAN_SHUTDOWN, &flags);
489         return flags;
490 }
491
492 static void read_superblock_fields(struct dm_cache_metadata *cmd,
493                                    struct cache_disk_superblock *disk_super)
494 {
495         cmd->root = le64_to_cpu(disk_super->mapping_root);
496         cmd->hint_root = le64_to_cpu(disk_super->hint_root);
497         cmd->discard_root = le64_to_cpu(disk_super->discard_root);
498         cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
499         cmd->discard_nr_blocks = to_oblock(le64_to_cpu(disk_super->discard_nr_blocks));
500         cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
501         cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
502         strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
503         cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
504         cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
505         cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
506         cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
507
508         cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
509         cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
510         cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
511         cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
512
513         cmd->changed = false;
514 }
515
516 /*
517  * The mutator updates the superblock flags.
518  */
519 static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
520                                      flags_mutator mutator)
521 {
522         int r;
523         struct cache_disk_superblock *disk_super;
524         struct dm_block *sblock;
525
526         r = superblock_lock(cmd, &sblock);
527         if (r)
528                 return r;
529
530         disk_super = dm_block_data(sblock);
531         update_flags(disk_super, mutator);
532         read_superblock_fields(cmd, disk_super);
533         dm_bm_unlock(sblock);
534
535         return dm_bm_flush(cmd->bm);
536 }
537
538 static int __begin_transaction(struct dm_cache_metadata *cmd)
539 {
540         int r;
541         struct cache_disk_superblock *disk_super;
542         struct dm_block *sblock;
543
544         /*
545          * We re-read the superblock every time.  Shouldn't need to do this
546          * really.
547          */
548         r = superblock_read_lock(cmd, &sblock);
549         if (r)
550                 return r;
551
552         disk_super = dm_block_data(sblock);
553         read_superblock_fields(cmd, disk_super);
554         dm_bm_unlock(sblock);
555
556         return 0;
557 }
558
559 static int __commit_transaction(struct dm_cache_metadata *cmd,
560                                 flags_mutator mutator)
561 {
562         int r;
563         size_t metadata_len;
564         struct cache_disk_superblock *disk_super;
565         struct dm_block *sblock;
566
567         /*
568          * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
569          */
570         BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
571
572         r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
573                             &cmd->discard_root);
574         if (r)
575                 return r;
576
577         r = dm_tm_pre_commit(cmd->tm);
578         if (r < 0)
579                 return r;
580
581         r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
582         if (r < 0)
583                 return r;
584
585         r = superblock_lock(cmd, &sblock);
586         if (r)
587                 return r;
588
589         disk_super = dm_block_data(sblock);
590
591         if (mutator)
592                 update_flags(disk_super, mutator);
593
594         disk_super->mapping_root = cpu_to_le64(cmd->root);
595         disk_super->hint_root = cpu_to_le64(cmd->hint_root);
596         disk_super->discard_root = cpu_to_le64(cmd->discard_root);
597         disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
598         disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
599         disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
600         strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
601         disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
602         disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
603         disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
604
605         disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
606         disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
607         disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
608         disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
609
610         r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
611                             metadata_len);
612         if (r < 0) {
613                 dm_bm_unlock(sblock);
614                 return r;
615         }
616
617         return dm_tm_commit(cmd->tm, sblock);
618 }
619
620 /*----------------------------------------------------------------*/
621
622 /*
623  * The mappings are held in a dm-array that has 64-bit values stored in
624  * little-endian format.  The index is the cblock, the high 48bits of the
625  * value are the oblock and the low 16 bit the flags.
626  */
627 #define FLAGS_MASK ((1 << 16) - 1)
628
629 static __le64 pack_value(dm_oblock_t block, unsigned flags)
630 {
631         uint64_t value = from_oblock(block);
632         value <<= 16;
633         value = value | (flags & FLAGS_MASK);
634         return cpu_to_le64(value);
635 }
636
637 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
638 {
639         uint64_t value = le64_to_cpu(value_le);
640         uint64_t b = value >> 16;
641         *block = to_oblock(b);
642         *flags = value & FLAGS_MASK;
643 }
644
645 /*----------------------------------------------------------------*/
646
647 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
648                                                  sector_t data_block_size,
649                                                  bool may_format_device,
650                                                  size_t policy_hint_size)
651 {
652         int r;
653         struct dm_cache_metadata *cmd;
654
655         cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
656         if (!cmd) {
657                 DMERR("could not allocate metadata struct");
658                 return NULL;
659         }
660
661         init_rwsem(&cmd->root_lock);
662         cmd->bdev = bdev;
663         cmd->data_block_size = data_block_size;
664         cmd->cache_blocks = 0;
665         cmd->policy_hint_size = policy_hint_size;
666         cmd->changed = true;
667
668         r = __create_persistent_data_objects(cmd, may_format_device);
669         if (r) {
670                 kfree(cmd);
671                 return ERR_PTR(r);
672         }
673
674         r = __begin_transaction_flags(cmd, clear_clean_shutdown);
675         if (r < 0) {
676                 dm_cache_metadata_close(cmd);
677                 return ERR_PTR(r);
678         }
679
680         return cmd;
681 }
682
683 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
684 {
685         __destroy_persistent_data_objects(cmd);
686         kfree(cmd);
687 }
688
689 /*
690  * Checks that the given cache block is either unmapped or clean.
691  */
692 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
693                                    bool *result)
694 {
695         int r;
696         __le64 value;
697         dm_oblock_t ob;
698         unsigned flags;
699
700         r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
701         if (r) {
702                 DMERR("block_unmapped_or_clean failed");
703                 return r;
704         }
705
706         unpack_value(value, &ob, &flags);
707         *result = !((flags & M_VALID) && (flags & M_DIRTY));
708
709         return 0;
710 }
711
712 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
713                                         dm_cblock_t begin, dm_cblock_t end,
714                                         bool *result)
715 {
716         int r;
717         *result = true;
718
719         while (begin != end) {
720                 r = block_unmapped_or_clean(cmd, begin, result);
721                 if (r)
722                         return r;
723
724                 if (!*result) {
725                         DMERR("cache block %llu is dirty",
726                               (unsigned long long) from_cblock(begin));
727                         return 0;
728                 }
729
730                 begin = to_cblock(from_cblock(begin) + 1);
731         }
732
733         return 0;
734 }
735
736 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
737 {
738         int r;
739         bool clean;
740         __le64 null_mapping = pack_value(0, 0);
741
742         down_write(&cmd->root_lock);
743         __dm_bless_for_disk(&null_mapping);
744
745         if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
746                 r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
747                 if (r) {
748                         __dm_unbless_for_disk(&null_mapping);
749                         goto out;
750                 }
751
752                 if (!clean) {
753                         DMERR("unable to shrink cache due to dirty blocks");
754                         r = -EINVAL;
755                         __dm_unbless_for_disk(&null_mapping);
756                         goto out;
757                 }
758         }
759
760         r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
761                             from_cblock(new_cache_size),
762                             &null_mapping, &cmd->root);
763         if (!r)
764                 cmd->cache_blocks = new_cache_size;
765         cmd->changed = true;
766
767 out:
768         up_write(&cmd->root_lock);
769
770         return r;
771 }
772
773 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
774                                    sector_t discard_block_size,
775                                    dm_oblock_t new_nr_entries)
776 {
777         int r;
778
779         down_write(&cmd->root_lock);
780         r = dm_bitset_resize(&cmd->discard_info,
781                              cmd->discard_root,
782                              from_oblock(cmd->discard_nr_blocks),
783                              from_oblock(new_nr_entries),
784                              false, &cmd->discard_root);
785         if (!r) {
786                 cmd->discard_block_size = discard_block_size;
787                 cmd->discard_nr_blocks = new_nr_entries;
788         }
789
790         cmd->changed = true;
791         up_write(&cmd->root_lock);
792
793         return r;
794 }
795
796 static int __set_discard(struct dm_cache_metadata *cmd, dm_oblock_t b)
797 {
798         return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
799                                  from_oblock(b), &cmd->discard_root);
800 }
801
802 static int __clear_discard(struct dm_cache_metadata *cmd, dm_oblock_t b)
803 {
804         return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
805                                    from_oblock(b), &cmd->discard_root);
806 }
807
808 static int __is_discarded(struct dm_cache_metadata *cmd, dm_oblock_t b,
809                           bool *is_discarded)
810 {
811         return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
812                                   from_oblock(b), &cmd->discard_root,
813                                   is_discarded);
814 }
815
816 static int __discard(struct dm_cache_metadata *cmd,
817                      dm_oblock_t dblock, bool discard)
818 {
819         int r;
820
821         r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
822         if (r)
823                 return r;
824
825         cmd->changed = true;
826         return 0;
827 }
828
829 int dm_cache_set_discard(struct dm_cache_metadata *cmd,
830                          dm_oblock_t dblock, bool discard)
831 {
832         int r;
833
834         down_write(&cmd->root_lock);
835         r = __discard(cmd, dblock, discard);
836         up_write(&cmd->root_lock);
837
838         return r;
839 }
840
841 static int __load_discards(struct dm_cache_metadata *cmd,
842                            load_discard_fn fn, void *context)
843 {
844         int r = 0;
845         dm_block_t b;
846         bool discard;
847
848         for (b = 0; b < from_oblock(cmd->discard_nr_blocks); b++) {
849                 dm_oblock_t dblock = to_oblock(b);
850
851                 if (cmd->clean_when_opened) {
852                         r = __is_discarded(cmd, dblock, &discard);
853                         if (r)
854                                 return r;
855                 } else
856                         discard = false;
857
858                 r = fn(context, cmd->discard_block_size, dblock, discard);
859                 if (r)
860                         break;
861         }
862
863         return r;
864 }
865
866 int dm_cache_load_discards(struct dm_cache_metadata *cmd,
867                            load_discard_fn fn, void *context)
868 {
869         int r;
870
871         down_read(&cmd->root_lock);
872         r = __load_discards(cmd, fn, context);
873         up_read(&cmd->root_lock);
874
875         return r;
876 }
877
878 dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd)
879 {
880         dm_cblock_t r;
881
882         down_read(&cmd->root_lock);
883         r = cmd->cache_blocks;
884         up_read(&cmd->root_lock);
885
886         return r;
887 }
888
889 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
890 {
891         int r;
892         __le64 value = pack_value(0, 0);
893
894         __dm_bless_for_disk(&value);
895         r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
896                                &value, &cmd->root);
897         if (r)
898                 return r;
899
900         cmd->changed = true;
901         return 0;
902 }
903
904 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
905 {
906         int r;
907
908         down_write(&cmd->root_lock);
909         r = __remove(cmd, cblock);
910         up_write(&cmd->root_lock);
911
912         return r;
913 }
914
915 static int __insert(struct dm_cache_metadata *cmd,
916                     dm_cblock_t cblock, dm_oblock_t oblock)
917 {
918         int r;
919         __le64 value = pack_value(oblock, M_VALID);
920         __dm_bless_for_disk(&value);
921
922         r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
923                                &value, &cmd->root);
924         if (r)
925                 return r;
926
927         cmd->changed = true;
928         return 0;
929 }
930
931 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
932                             dm_cblock_t cblock, dm_oblock_t oblock)
933 {
934         int r;
935
936         down_write(&cmd->root_lock);
937         r = __insert(cmd, cblock, oblock);
938         up_write(&cmd->root_lock);
939
940         return r;
941 }
942
943 struct thunk {
944         load_mapping_fn fn;
945         void *context;
946
947         struct dm_cache_metadata *cmd;
948         bool respect_dirty_flags;
949         bool hints_valid;
950 };
951
952 static bool policy_unchanged(struct dm_cache_metadata *cmd,
953                              struct dm_cache_policy *policy)
954 {
955         const char *policy_name = dm_cache_policy_get_name(policy);
956         const unsigned *policy_version = dm_cache_policy_get_version(policy);
957         size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
958
959         /*
960          * Ensure policy names match.
961          */
962         if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
963                 return false;
964
965         /*
966          * Ensure policy major versions match.
967          */
968         if (cmd->policy_version[0] != policy_version[0])
969                 return false;
970
971         /*
972          * Ensure policy hint sizes match.
973          */
974         if (cmd->policy_hint_size != policy_hint_size)
975                 return false;
976
977         return true;
978 }
979
980 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
981 {
982         return cmd->hint_root && cmd->policy_hint_size;
983 }
984
985 static bool hints_array_available(struct dm_cache_metadata *cmd,
986                                   struct dm_cache_policy *policy)
987 {
988         return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
989                 hints_array_initialized(cmd);
990 }
991
992 static int __load_mapping(void *context, uint64_t cblock, void *leaf)
993 {
994         int r = 0;
995         bool dirty;
996         __le64 value;
997         __le32 hint_value = 0;
998         dm_oblock_t oblock;
999         unsigned flags;
1000         struct thunk *thunk = context;
1001         struct dm_cache_metadata *cmd = thunk->cmd;
1002
1003         memcpy(&value, leaf, sizeof(value));
1004         unpack_value(value, &oblock, &flags);
1005
1006         if (flags & M_VALID) {
1007                 if (thunk->hints_valid) {
1008                         r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
1009                                                cblock, &hint_value);
1010                         if (r && r != -ENODATA)
1011                                 return r;
1012                 }
1013
1014                 dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
1015                 r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
1016                               dirty, le32_to_cpu(hint_value), thunk->hints_valid);
1017         }
1018
1019         return r;
1020 }
1021
1022 static int __load_mappings(struct dm_cache_metadata *cmd,
1023                            struct dm_cache_policy *policy,
1024                            load_mapping_fn fn, void *context)
1025 {
1026         struct thunk thunk;
1027
1028         thunk.fn = fn;
1029         thunk.context = context;
1030
1031         thunk.cmd = cmd;
1032         thunk.respect_dirty_flags = cmd->clean_when_opened;
1033         thunk.hints_valid = hints_array_available(cmd, policy);
1034
1035         return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
1036 }
1037
1038 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1039                            struct dm_cache_policy *policy,
1040                            load_mapping_fn fn, void *context)
1041 {
1042         int r;
1043
1044         down_read(&cmd->root_lock);
1045         r = __load_mappings(cmd, policy, fn, context);
1046         up_read(&cmd->root_lock);
1047
1048         return r;
1049 }
1050
1051 static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1052 {
1053         int r = 0;
1054         __le64 value;
1055         dm_oblock_t oblock;
1056         unsigned flags;
1057
1058         memcpy(&value, leaf, sizeof(value));
1059         unpack_value(value, &oblock, &flags);
1060
1061         return r;
1062 }
1063
1064 static int __dump_mappings(struct dm_cache_metadata *cmd)
1065 {
1066         return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1067 }
1068
1069 void dm_cache_dump(struct dm_cache_metadata *cmd)
1070 {
1071         down_read(&cmd->root_lock);
1072         __dump_mappings(cmd);
1073         up_read(&cmd->root_lock);
1074 }
1075
1076 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1077 {
1078         int r;
1079
1080         down_read(&cmd->root_lock);
1081         r = cmd->changed;
1082         up_read(&cmd->root_lock);
1083
1084         return r;
1085 }
1086
1087 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1088 {
1089         int r;
1090         unsigned flags;
1091         dm_oblock_t oblock;
1092         __le64 value;
1093
1094         r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1095         if (r)
1096                 return r;
1097
1098         unpack_value(value, &oblock, &flags);
1099
1100         if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1101                 /* nothing to be done */
1102                 return 0;
1103
1104         value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1105         __dm_bless_for_disk(&value);
1106
1107         r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1108                                &value, &cmd->root);
1109         if (r)
1110                 return r;
1111
1112         cmd->changed = true;
1113         return 0;
1114
1115 }
1116
1117 int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
1118                        dm_cblock_t cblock, bool dirty)
1119 {
1120         int r;
1121
1122         down_write(&cmd->root_lock);
1123         r = __dirty(cmd, cblock, dirty);
1124         up_write(&cmd->root_lock);
1125
1126         return r;
1127 }
1128
1129 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1130                                  struct dm_cache_statistics *stats)
1131 {
1132         down_read(&cmd->root_lock);
1133         *stats = cmd->stats;
1134         up_read(&cmd->root_lock);
1135 }
1136
1137 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1138                                  struct dm_cache_statistics *stats)
1139 {
1140         down_write(&cmd->root_lock);
1141         cmd->stats = *stats;
1142         up_write(&cmd->root_lock);
1143 }
1144
1145 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1146 {
1147         int r;
1148         flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1149                                  clear_clean_shutdown);
1150
1151         down_write(&cmd->root_lock);
1152         r = __commit_transaction(cmd, mutator);
1153         if (r)
1154                 goto out;
1155
1156         r = __begin_transaction(cmd);
1157
1158 out:
1159         up_write(&cmd->root_lock);
1160         return r;
1161 }
1162
1163 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1164                                            dm_block_t *result)
1165 {
1166         int r = -EINVAL;
1167
1168         down_read(&cmd->root_lock);
1169         r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1170         up_read(&cmd->root_lock);
1171
1172         return r;
1173 }
1174
1175 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1176                                    dm_block_t *result)
1177 {
1178         int r = -EINVAL;
1179
1180         down_read(&cmd->root_lock);
1181         r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1182         up_read(&cmd->root_lock);
1183
1184         return r;
1185 }
1186
1187 /*----------------------------------------------------------------*/
1188
1189 static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1190 {
1191         int r;
1192         __le32 value;
1193         size_t hint_size;
1194         const char *policy_name = dm_cache_policy_get_name(policy);
1195         const unsigned *policy_version = dm_cache_policy_get_version(policy);
1196
1197         if (!policy_name[0] ||
1198             (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1199                 return -EINVAL;
1200
1201         if (!policy_unchanged(cmd, policy)) {
1202                 strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1203                 memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1204
1205                 hint_size = dm_cache_policy_get_hint_size(policy);
1206                 if (!hint_size)
1207                         return 0; /* short-circuit hints initialization */
1208                 cmd->policy_hint_size = hint_size;
1209
1210                 if (cmd->hint_root) {
1211                         r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1212                         if (r)
1213                                 return r;
1214                 }
1215
1216                 r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
1217                 if (r)
1218                         return r;
1219
1220                 value = cpu_to_le32(0);
1221                 __dm_bless_for_disk(&value);
1222                 r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
1223                                     from_cblock(cmd->cache_blocks),
1224                                     &value, &cmd->hint_root);
1225                 if (r)
1226                         return r;
1227         }
1228
1229         return 0;
1230 }
1231
1232 int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1233 {
1234         int r;
1235
1236         down_write(&cmd->root_lock);
1237         r = begin_hints(cmd, policy);
1238         up_write(&cmd->root_lock);
1239
1240         return r;
1241 }
1242
1243 static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1244                      uint32_t hint)
1245 {
1246         int r;
1247         __le32 value = cpu_to_le32(hint);
1248         __dm_bless_for_disk(&value);
1249
1250         r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
1251                                from_cblock(cblock), &value, &cmd->hint_root);
1252         cmd->changed = true;
1253
1254         return r;
1255 }
1256
1257 int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1258                        uint32_t hint)
1259 {
1260         int r;
1261
1262         if (!hints_array_initialized(cmd))
1263                 return 0;
1264
1265         down_write(&cmd->root_lock);
1266         r = save_hint(cmd, cblock, hint);
1267         up_write(&cmd->root_lock);
1268
1269         return r;
1270 }
1271
1272 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1273 {
1274         return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1275 }