]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/check-integrity.c
Merge branches 'fix/core', 'fix/cs4271', 'fix/cs42l52', 'fix/mxs', 'fix/samsung'...
[karo-tx-linux.git] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and no write error was indicated and a
41  *        FLUSH request to the device where these blocks are
42  *        located was received and completed.
43  *    2b. All referenced blocks need to have a generation
44  *        number which is equal to the parent's number.
45  *
46  * One issue that was found using this module was that the log
47  * tree on disk became temporarily corrupted because disk blocks
48  * that had been in use for the log tree had been freed and
49  * reused too early, while being referenced by the written super
50  * block.
51  *
52  * The search term in the kernel log that can be used to filter
53  * on the existence of detected integrity issues is
54  * "btrfs: attempt".
55  *
56  * The integrity check is enabled via mount options. These
57  * mount options are only supported if the integrity check
58  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
59  *
60  * Example #1, apply integrity checks to all metadata:
61  * mount /dev/sdb1 /mnt -o check_int
62  *
63  * Example #2, apply integrity checks to all metadata and
64  * to data extents:
65  * mount /dev/sdb1 /mnt -o check_int_data
66  *
67  * Example #3, apply integrity checks to all metadata and dump
68  * the tree that the super block references to kernel messages
69  * each time after a super block was written:
70  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
71  *
72  * If the integrity check tool is included and activated in
73  * the mount options, plenty of kernel memory is used, and
74  * plenty of additional CPU cycles are spent. Enabling this
75  * functionality is not intended for normal use. In most
76  * cases, unless you are a btrfs developer who needs to verify
77  * the integrity of (super)-block write requests, do not
78  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79  * include and compile the integrity check tool.
80  */
81
82 #include <linux/sched.h>
83 #include <linux/slab.h>
84 #include <linux/buffer_head.h>
85 #include <linux/mutex.h>
86 #include <linux/crc32c.h>
87 #include <linux/genhd.h>
88 #include <linux/blkdev.h>
89 #include "ctree.h"
90 #include "disk-io.h"
91 #include "transaction.h"
92 #include "extent_io.h"
93 #include "volumes.h"
94 #include "print-tree.h"
95 #include "locking.h"
96 #include "check-integrity.h"
97 #include "rcu-string.h"
98
99 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
100 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
101 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
102 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
103 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
104 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
105 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
106 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
107                                                          * excluding " [...]" */
108 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
109
110 /*
111  * The definition of the bitmask fields for the print_mask.
112  * They are specified with the mount option check_integrity_print_mask.
113  */
114 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
115 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
116 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
117 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
118 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
119 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
120 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
121 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
122 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
123 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
124 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
125 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
126 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
127
128 struct btrfsic_dev_state;
129 struct btrfsic_state;
130
131 struct btrfsic_block {
132         u32 magic_num;          /* only used for debug purposes */
133         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
134         unsigned int is_superblock:1;   /* if it is one of the superblocks */
135         unsigned int is_iodone:1;       /* if is done by lower subsystem */
136         unsigned int iodone_w_error:1;  /* error was indicated to endio */
137         unsigned int never_written:1;   /* block was added because it was
138                                          * referenced, not because it was
139                                          * written */
140         unsigned int mirror_num:2;      /* large enough to hold
141                                          * BTRFS_SUPER_MIRROR_MAX */
142         struct btrfsic_dev_state *dev_state;
143         u64 dev_bytenr;         /* key, physical byte num on disk */
144         u64 logical_bytenr;     /* logical byte num on disk */
145         u64 generation;
146         struct btrfs_disk_key disk_key; /* extra info to print in case of
147                                          * issues, will not always be correct */
148         struct list_head collision_resolving_node;      /* list node */
149         struct list_head all_blocks_node;       /* list node */
150
151         /* the following two lists contain block_link items */
152         struct list_head ref_to_list;   /* list */
153         struct list_head ref_from_list; /* list */
154         struct btrfsic_block *next_in_same_bio;
155         void *orig_bio_bh_private;
156         union {
157                 bio_end_io_t *bio;
158                 bh_end_io_t *bh;
159         } orig_bio_bh_end_io;
160         int submit_bio_bh_rw;
161         u64 flush_gen; /* only valid if !never_written */
162 };
163
164 /*
165  * Elements of this type are allocated dynamically and required because
166  * each block object can refer to and can be ref from multiple blocks.
167  * The key to lookup them in the hashtable is the dev_bytenr of
168  * the block ref to plus the one from the block refered from.
169  * The fact that they are searchable via a hashtable and that a
170  * ref_cnt is maintained is not required for the btrfs integrity
171  * check algorithm itself, it is only used to make the output more
172  * beautiful in case that an error is detected (an error is defined
173  * as a write operation to a block while that block is still referenced).
174  */
175 struct btrfsic_block_link {
176         u32 magic_num;          /* only used for debug purposes */
177         u32 ref_cnt;
178         struct list_head node_ref_to;   /* list node */
179         struct list_head node_ref_from; /* list node */
180         struct list_head collision_resolving_node;      /* list node */
181         struct btrfsic_block *block_ref_to;
182         struct btrfsic_block *block_ref_from;
183         u64 parent_generation;
184 };
185
186 struct btrfsic_dev_state {
187         u32 magic_num;          /* only used for debug purposes */
188         struct block_device *bdev;
189         struct btrfsic_state *state;
190         struct list_head collision_resolving_node;      /* list node */
191         struct btrfsic_block dummy_block_for_bio_bh_flush;
192         u64 last_flush_gen;
193         char name[BDEVNAME_SIZE];
194 };
195
196 struct btrfsic_block_hashtable {
197         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
198 };
199
200 struct btrfsic_block_link_hashtable {
201         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
202 };
203
204 struct btrfsic_dev_state_hashtable {
205         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
206 };
207
208 struct btrfsic_block_data_ctx {
209         u64 start;              /* virtual bytenr */
210         u64 dev_bytenr;         /* physical bytenr on device */
211         u32 len;
212         struct btrfsic_dev_state *dev;
213         char **datav;
214         struct page **pagev;
215         void *mem_to_free;
216 };
217
218 /* This structure is used to implement recursion without occupying
219  * any stack space, refer to btrfsic_process_metablock() */
220 struct btrfsic_stack_frame {
221         u32 magic;
222         u32 nr;
223         int error;
224         int i;
225         int limit_nesting;
226         int num_copies;
227         int mirror_num;
228         struct btrfsic_block *block;
229         struct btrfsic_block_data_ctx *block_ctx;
230         struct btrfsic_block *next_block;
231         struct btrfsic_block_data_ctx next_block_ctx;
232         struct btrfs_header *hdr;
233         struct btrfsic_stack_frame *prev;
234 };
235
236 /* Some state per mounted filesystem */
237 struct btrfsic_state {
238         u32 print_mask;
239         int include_extent_data;
240         int csum_size;
241         struct list_head all_blocks_list;
242         struct btrfsic_block_hashtable block_hashtable;
243         struct btrfsic_block_link_hashtable block_link_hashtable;
244         struct btrfs_root *root;
245         u64 max_superblock_generation;
246         struct btrfsic_block *latest_superblock;
247         u32 metablock_size;
248         u32 datablock_size;
249 };
250
251 static void btrfsic_block_init(struct btrfsic_block *b);
252 static struct btrfsic_block *btrfsic_block_alloc(void);
253 static void btrfsic_block_free(struct btrfsic_block *b);
254 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
255 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
256 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
257 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
258 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
259 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
260 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
261 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
262                                         struct btrfsic_block_hashtable *h);
263 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
264 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
265                 struct block_device *bdev,
266                 u64 dev_bytenr,
267                 struct btrfsic_block_hashtable *h);
268 static void btrfsic_block_link_hashtable_init(
269                 struct btrfsic_block_link_hashtable *h);
270 static void btrfsic_block_link_hashtable_add(
271                 struct btrfsic_block_link *l,
272                 struct btrfsic_block_link_hashtable *h);
273 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
274 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
275                 struct block_device *bdev_ref_to,
276                 u64 dev_bytenr_ref_to,
277                 struct block_device *bdev_ref_from,
278                 u64 dev_bytenr_ref_from,
279                 struct btrfsic_block_link_hashtable *h);
280 static void btrfsic_dev_state_hashtable_init(
281                 struct btrfsic_dev_state_hashtable *h);
282 static void btrfsic_dev_state_hashtable_add(
283                 struct btrfsic_dev_state *ds,
284                 struct btrfsic_dev_state_hashtable *h);
285 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
286 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
287                 struct block_device *bdev,
288                 struct btrfsic_dev_state_hashtable *h);
289 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
290 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
291 static int btrfsic_process_superblock(struct btrfsic_state *state,
292                                       struct btrfs_fs_devices *fs_devices);
293 static int btrfsic_process_metablock(struct btrfsic_state *state,
294                                      struct btrfsic_block *block,
295                                      struct btrfsic_block_data_ctx *block_ctx,
296                                      int limit_nesting, int force_iodone_flag);
297 static void btrfsic_read_from_block_data(
298         struct btrfsic_block_data_ctx *block_ctx,
299         void *dst, u32 offset, size_t len);
300 static int btrfsic_create_link_to_next_block(
301                 struct btrfsic_state *state,
302                 struct btrfsic_block *block,
303                 struct btrfsic_block_data_ctx
304                 *block_ctx, u64 next_bytenr,
305                 int limit_nesting,
306                 struct btrfsic_block_data_ctx *next_block_ctx,
307                 struct btrfsic_block **next_blockp,
308                 int force_iodone_flag,
309                 int *num_copiesp, int *mirror_nump,
310                 struct btrfs_disk_key *disk_key,
311                 u64 parent_generation);
312 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
313                                       struct btrfsic_block *block,
314                                       struct btrfsic_block_data_ctx *block_ctx,
315                                       u32 item_offset, int force_iodone_flag);
316 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
317                              struct btrfsic_block_data_ctx *block_ctx_out,
318                              int mirror_num);
319 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
320                                   u32 len, struct block_device *bdev,
321                                   struct btrfsic_block_data_ctx *block_ctx_out);
322 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
323 static int btrfsic_read_block(struct btrfsic_state *state,
324                               struct btrfsic_block_data_ctx *block_ctx);
325 static void btrfsic_dump_database(struct btrfsic_state *state);
326 static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
327 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
328                                      char **datav, unsigned int num_pages);
329 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
330                                           u64 dev_bytenr, char **mapped_datav,
331                                           unsigned int num_pages,
332                                           struct bio *bio, int *bio_is_patched,
333                                           struct buffer_head *bh,
334                                           int submit_bio_bh_rw);
335 static int btrfsic_process_written_superblock(
336                 struct btrfsic_state *state,
337                 struct btrfsic_block *const block,
338                 struct btrfs_super_block *const super_hdr);
339 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
340 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
341 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
342                                               const struct btrfsic_block *block,
343                                               int recursion_level);
344 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
345                                         struct btrfsic_block *const block,
346                                         int recursion_level);
347 static void btrfsic_print_add_link(const struct btrfsic_state *state,
348                                    const struct btrfsic_block_link *l);
349 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
350                                    const struct btrfsic_block_link *l);
351 static char btrfsic_get_block_type(const struct btrfsic_state *state,
352                                    const struct btrfsic_block *block);
353 static void btrfsic_dump_tree(const struct btrfsic_state *state);
354 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
355                                   const struct btrfsic_block *block,
356                                   int indent_level);
357 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
358                 struct btrfsic_state *state,
359                 struct btrfsic_block_data_ctx *next_block_ctx,
360                 struct btrfsic_block *next_block,
361                 struct btrfsic_block *from_block,
362                 u64 parent_generation);
363 static struct btrfsic_block *btrfsic_block_lookup_or_add(
364                 struct btrfsic_state *state,
365                 struct btrfsic_block_data_ctx *block_ctx,
366                 const char *additional_string,
367                 int is_metadata,
368                 int is_iodone,
369                 int never_written,
370                 int mirror_num,
371                 int *was_created);
372 static int btrfsic_process_superblock_dev_mirror(
373                 struct btrfsic_state *state,
374                 struct btrfsic_dev_state *dev_state,
375                 struct btrfs_device *device,
376                 int superblock_mirror_num,
377                 struct btrfsic_dev_state **selected_dev_state,
378                 struct btrfs_super_block *selected_super);
379 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
380                 struct block_device *bdev);
381 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
382                                            u64 bytenr,
383                                            struct btrfsic_dev_state *dev_state,
384                                            u64 dev_bytenr);
385
386 static struct mutex btrfsic_mutex;
387 static int btrfsic_is_initialized;
388 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
389
390
391 static void btrfsic_block_init(struct btrfsic_block *b)
392 {
393         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
394         b->dev_state = NULL;
395         b->dev_bytenr = 0;
396         b->logical_bytenr = 0;
397         b->generation = BTRFSIC_GENERATION_UNKNOWN;
398         b->disk_key.objectid = 0;
399         b->disk_key.type = 0;
400         b->disk_key.offset = 0;
401         b->is_metadata = 0;
402         b->is_superblock = 0;
403         b->is_iodone = 0;
404         b->iodone_w_error = 0;
405         b->never_written = 0;
406         b->mirror_num = 0;
407         b->next_in_same_bio = NULL;
408         b->orig_bio_bh_private = NULL;
409         b->orig_bio_bh_end_io.bio = NULL;
410         INIT_LIST_HEAD(&b->collision_resolving_node);
411         INIT_LIST_HEAD(&b->all_blocks_node);
412         INIT_LIST_HEAD(&b->ref_to_list);
413         INIT_LIST_HEAD(&b->ref_from_list);
414         b->submit_bio_bh_rw = 0;
415         b->flush_gen = 0;
416 }
417
418 static struct btrfsic_block *btrfsic_block_alloc(void)
419 {
420         struct btrfsic_block *b;
421
422         b = kzalloc(sizeof(*b), GFP_NOFS);
423         if (NULL != b)
424                 btrfsic_block_init(b);
425
426         return b;
427 }
428
429 static void btrfsic_block_free(struct btrfsic_block *b)
430 {
431         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
432         kfree(b);
433 }
434
435 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
436 {
437         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
438         l->ref_cnt = 1;
439         INIT_LIST_HEAD(&l->node_ref_to);
440         INIT_LIST_HEAD(&l->node_ref_from);
441         INIT_LIST_HEAD(&l->collision_resolving_node);
442         l->block_ref_to = NULL;
443         l->block_ref_from = NULL;
444 }
445
446 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
447 {
448         struct btrfsic_block_link *l;
449
450         l = kzalloc(sizeof(*l), GFP_NOFS);
451         if (NULL != l)
452                 btrfsic_block_link_init(l);
453
454         return l;
455 }
456
457 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
458 {
459         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
460         kfree(l);
461 }
462
463 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
464 {
465         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
466         ds->bdev = NULL;
467         ds->state = NULL;
468         ds->name[0] = '\0';
469         INIT_LIST_HEAD(&ds->collision_resolving_node);
470         ds->last_flush_gen = 0;
471         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
472         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
473         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
474 }
475
476 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
477 {
478         struct btrfsic_dev_state *ds;
479
480         ds = kzalloc(sizeof(*ds), GFP_NOFS);
481         if (NULL != ds)
482                 btrfsic_dev_state_init(ds);
483
484         return ds;
485 }
486
487 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
488 {
489         BUG_ON(!(NULL == ds ||
490                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
491         kfree(ds);
492 }
493
494 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
495 {
496         int i;
497
498         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
499                 INIT_LIST_HEAD(h->table + i);
500 }
501
502 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
503                                         struct btrfsic_block_hashtable *h)
504 {
505         const unsigned int hashval =
506             (((unsigned int)(b->dev_bytenr >> 16)) ^
507              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
508              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
509
510         list_add(&b->collision_resolving_node, h->table + hashval);
511 }
512
513 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
514 {
515         list_del(&b->collision_resolving_node);
516 }
517
518 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
519                 struct block_device *bdev,
520                 u64 dev_bytenr,
521                 struct btrfsic_block_hashtable *h)
522 {
523         const unsigned int hashval =
524             (((unsigned int)(dev_bytenr >> 16)) ^
525              ((unsigned int)((uintptr_t)bdev))) &
526              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
527         struct list_head *elem;
528
529         list_for_each(elem, h->table + hashval) {
530                 struct btrfsic_block *const b =
531                     list_entry(elem, struct btrfsic_block,
532                                collision_resolving_node);
533
534                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
535                         return b;
536         }
537
538         return NULL;
539 }
540
541 static void btrfsic_block_link_hashtable_init(
542                 struct btrfsic_block_link_hashtable *h)
543 {
544         int i;
545
546         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
547                 INIT_LIST_HEAD(h->table + i);
548 }
549
550 static void btrfsic_block_link_hashtable_add(
551                 struct btrfsic_block_link *l,
552                 struct btrfsic_block_link_hashtable *h)
553 {
554         const unsigned int hashval =
555             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
556              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
557              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
558              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
559              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
560
561         BUG_ON(NULL == l->block_ref_to);
562         BUG_ON(NULL == l->block_ref_from);
563         list_add(&l->collision_resolving_node, h->table + hashval);
564 }
565
566 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
567 {
568         list_del(&l->collision_resolving_node);
569 }
570
571 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
572                 struct block_device *bdev_ref_to,
573                 u64 dev_bytenr_ref_to,
574                 struct block_device *bdev_ref_from,
575                 u64 dev_bytenr_ref_from,
576                 struct btrfsic_block_link_hashtable *h)
577 {
578         const unsigned int hashval =
579             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
580              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
581              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
582              ((unsigned int)((uintptr_t)bdev_ref_from))) &
583              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
584         struct list_head *elem;
585
586         list_for_each(elem, h->table + hashval) {
587                 struct btrfsic_block_link *const l =
588                     list_entry(elem, struct btrfsic_block_link,
589                                collision_resolving_node);
590
591                 BUG_ON(NULL == l->block_ref_to);
592                 BUG_ON(NULL == l->block_ref_from);
593                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
594                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
595                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
596                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
597                         return l;
598         }
599
600         return NULL;
601 }
602
603 static void btrfsic_dev_state_hashtable_init(
604                 struct btrfsic_dev_state_hashtable *h)
605 {
606         int i;
607
608         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
609                 INIT_LIST_HEAD(h->table + i);
610 }
611
612 static void btrfsic_dev_state_hashtable_add(
613                 struct btrfsic_dev_state *ds,
614                 struct btrfsic_dev_state_hashtable *h)
615 {
616         const unsigned int hashval =
617             (((unsigned int)((uintptr_t)ds->bdev)) &
618              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
619
620         list_add(&ds->collision_resolving_node, h->table + hashval);
621 }
622
623 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
624 {
625         list_del(&ds->collision_resolving_node);
626 }
627
628 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
629                 struct block_device *bdev,
630                 struct btrfsic_dev_state_hashtable *h)
631 {
632         const unsigned int hashval =
633             (((unsigned int)((uintptr_t)bdev)) &
634              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
635         struct list_head *elem;
636
637         list_for_each(elem, h->table + hashval) {
638                 struct btrfsic_dev_state *const ds =
639                     list_entry(elem, struct btrfsic_dev_state,
640                                collision_resolving_node);
641
642                 if (ds->bdev == bdev)
643                         return ds;
644         }
645
646         return NULL;
647 }
648
649 static int btrfsic_process_superblock(struct btrfsic_state *state,
650                                       struct btrfs_fs_devices *fs_devices)
651 {
652         int ret = 0;
653         struct btrfs_super_block *selected_super;
654         struct list_head *dev_head = &fs_devices->devices;
655         struct btrfs_device *device;
656         struct btrfsic_dev_state *selected_dev_state = NULL;
657         int pass;
658
659         BUG_ON(NULL == state);
660         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
661         if (NULL == selected_super) {
662                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
663                 return -1;
664         }
665
666         list_for_each_entry(device, dev_head, dev_list) {
667                 int i;
668                 struct btrfsic_dev_state *dev_state;
669
670                 if (!device->bdev || !device->name)
671                         continue;
672
673                 dev_state = btrfsic_dev_state_lookup(device->bdev);
674                 BUG_ON(NULL == dev_state);
675                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
676                         ret = btrfsic_process_superblock_dev_mirror(
677                                         state, dev_state, device, i,
678                                         &selected_dev_state, selected_super);
679                         if (0 != ret && 0 == i) {
680                                 kfree(selected_super);
681                                 return ret;
682                         }
683                 }
684         }
685
686         if (NULL == state->latest_superblock) {
687                 printk(KERN_INFO "btrfsic: no superblock found!\n");
688                 kfree(selected_super);
689                 return -1;
690         }
691
692         state->csum_size = btrfs_super_csum_size(selected_super);
693
694         for (pass = 0; pass < 3; pass++) {
695                 int num_copies;
696                 int mirror_num;
697                 u64 next_bytenr;
698
699                 switch (pass) {
700                 case 0:
701                         next_bytenr = btrfs_super_root(selected_super);
702                         if (state->print_mask &
703                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
704                                 printk(KERN_INFO "root@%llu\n",
705                                        (unsigned long long)next_bytenr);
706                         break;
707                 case 1:
708                         next_bytenr = btrfs_super_chunk_root(selected_super);
709                         if (state->print_mask &
710                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
711                                 printk(KERN_INFO "chunk@%llu\n",
712                                        (unsigned long long)next_bytenr);
713                         break;
714                 case 2:
715                         next_bytenr = btrfs_super_log_root(selected_super);
716                         if (0 == next_bytenr)
717                                 continue;
718                         if (state->print_mask &
719                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
720                                 printk(KERN_INFO "log@%llu\n",
721                                        (unsigned long long)next_bytenr);
722                         break;
723                 }
724
725                 num_copies =
726                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
727                                      next_bytenr, state->metablock_size);
728                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
729                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
730                                (unsigned long long)next_bytenr, num_copies);
731
732                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
733                         struct btrfsic_block *next_block;
734                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
735                         struct btrfsic_block_link *l;
736
737                         ret = btrfsic_map_block(state, next_bytenr,
738                                                 state->metablock_size,
739                                                 &tmp_next_block_ctx,
740                                                 mirror_num);
741                         if (ret) {
742                                 printk(KERN_INFO "btrfsic:"
743                                        " btrfsic_map_block(root @%llu,"
744                                        " mirror %d) failed!\n",
745                                        (unsigned long long)next_bytenr,
746                                        mirror_num);
747                                 kfree(selected_super);
748                                 return -1;
749                         }
750
751                         next_block = btrfsic_block_hashtable_lookup(
752                                         tmp_next_block_ctx.dev->bdev,
753                                         tmp_next_block_ctx.dev_bytenr,
754                                         &state->block_hashtable);
755                         BUG_ON(NULL == next_block);
756
757                         l = btrfsic_block_link_hashtable_lookup(
758                                         tmp_next_block_ctx.dev->bdev,
759                                         tmp_next_block_ctx.dev_bytenr,
760                                         state->latest_superblock->dev_state->
761                                         bdev,
762                                         state->latest_superblock->dev_bytenr,
763                                         &state->block_link_hashtable);
764                         BUG_ON(NULL == l);
765
766                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
767                         if (ret < (int)PAGE_CACHE_SIZE) {
768                                 printk(KERN_INFO
769                                        "btrfsic: read @logical %llu failed!\n",
770                                        (unsigned long long)
771                                        tmp_next_block_ctx.start);
772                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
773                                 kfree(selected_super);
774                                 return -1;
775                         }
776
777                         ret = btrfsic_process_metablock(state,
778                                                         next_block,
779                                                         &tmp_next_block_ctx,
780                                                         BTRFS_MAX_LEVEL + 3, 1);
781                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
782                 }
783         }
784
785         kfree(selected_super);
786         return ret;
787 }
788
789 static int btrfsic_process_superblock_dev_mirror(
790                 struct btrfsic_state *state,
791                 struct btrfsic_dev_state *dev_state,
792                 struct btrfs_device *device,
793                 int superblock_mirror_num,
794                 struct btrfsic_dev_state **selected_dev_state,
795                 struct btrfs_super_block *selected_super)
796 {
797         struct btrfs_super_block *super_tmp;
798         u64 dev_bytenr;
799         struct buffer_head *bh;
800         struct btrfsic_block *superblock_tmp;
801         int pass;
802         struct block_device *const superblock_bdev = device->bdev;
803
804         /* super block bytenr is always the unmapped device bytenr */
805         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
806         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
807                 return -1;
808         bh = __bread(superblock_bdev, dev_bytenr / 4096,
809                      BTRFS_SUPER_INFO_SIZE);
810         if (NULL == bh)
811                 return -1;
812         super_tmp = (struct btrfs_super_block *)
813             (bh->b_data + (dev_bytenr & 4095));
814
815         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
816             strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
817                     sizeof(super_tmp->magic)) ||
818             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
819             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
820             btrfs_super_leafsize(super_tmp) != state->metablock_size ||
821             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
822                 brelse(bh);
823                 return 0;
824         }
825
826         superblock_tmp =
827             btrfsic_block_hashtable_lookup(superblock_bdev,
828                                            dev_bytenr,
829                                            &state->block_hashtable);
830         if (NULL == superblock_tmp) {
831                 superblock_tmp = btrfsic_block_alloc();
832                 if (NULL == superblock_tmp) {
833                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
834                         brelse(bh);
835                         return -1;
836                 }
837                 /* for superblock, only the dev_bytenr makes sense */
838                 superblock_tmp->dev_bytenr = dev_bytenr;
839                 superblock_tmp->dev_state = dev_state;
840                 superblock_tmp->logical_bytenr = dev_bytenr;
841                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
842                 superblock_tmp->is_metadata = 1;
843                 superblock_tmp->is_superblock = 1;
844                 superblock_tmp->is_iodone = 1;
845                 superblock_tmp->never_written = 0;
846                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
847                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
848                         printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
849                                      " @%llu (%s/%llu/%d)\n",
850                                      superblock_bdev,
851                                      rcu_str_deref(device->name),
852                                      (unsigned long long)dev_bytenr,
853                                      dev_state->name,
854                                      (unsigned long long)dev_bytenr,
855                                      superblock_mirror_num);
856                 list_add(&superblock_tmp->all_blocks_node,
857                          &state->all_blocks_list);
858                 btrfsic_block_hashtable_add(superblock_tmp,
859                                             &state->block_hashtable);
860         }
861
862         /* select the one with the highest generation field */
863         if (btrfs_super_generation(super_tmp) >
864             state->max_superblock_generation ||
865             0 == state->max_superblock_generation) {
866                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
867                 *selected_dev_state = dev_state;
868                 state->max_superblock_generation =
869                     btrfs_super_generation(super_tmp);
870                 state->latest_superblock = superblock_tmp;
871         }
872
873         for (pass = 0; pass < 3; pass++) {
874                 u64 next_bytenr;
875                 int num_copies;
876                 int mirror_num;
877                 const char *additional_string = NULL;
878                 struct btrfs_disk_key tmp_disk_key;
879
880                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
881                 tmp_disk_key.offset = 0;
882                 switch (pass) {
883                 case 0:
884                         tmp_disk_key.objectid =
885                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
886                         additional_string = "initial root ";
887                         next_bytenr = btrfs_super_root(super_tmp);
888                         break;
889                 case 1:
890                         tmp_disk_key.objectid =
891                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
892                         additional_string = "initial chunk ";
893                         next_bytenr = btrfs_super_chunk_root(super_tmp);
894                         break;
895                 case 2:
896                         tmp_disk_key.objectid =
897                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
898                         additional_string = "initial log ";
899                         next_bytenr = btrfs_super_log_root(super_tmp);
900                         if (0 == next_bytenr)
901                                 continue;
902                         break;
903                 }
904
905                 num_copies =
906                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
907                                      next_bytenr, state->metablock_size);
908                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
909                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
910                                (unsigned long long)next_bytenr, num_copies);
911                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
912                         struct btrfsic_block *next_block;
913                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
914                         struct btrfsic_block_link *l;
915
916                         if (btrfsic_map_block(state, next_bytenr,
917                                               state->metablock_size,
918                                               &tmp_next_block_ctx,
919                                               mirror_num)) {
920                                 printk(KERN_INFO "btrfsic: btrfsic_map_block("
921                                        "bytenr @%llu, mirror %d) failed!\n",
922                                        (unsigned long long)next_bytenr,
923                                        mirror_num);
924                                 brelse(bh);
925                                 return -1;
926                         }
927
928                         next_block = btrfsic_block_lookup_or_add(
929                                         state, &tmp_next_block_ctx,
930                                         additional_string, 1, 1, 0,
931                                         mirror_num, NULL);
932                         if (NULL == next_block) {
933                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
934                                 brelse(bh);
935                                 return -1;
936                         }
937
938                         next_block->disk_key = tmp_disk_key;
939                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
940                         l = btrfsic_block_link_lookup_or_add(
941                                         state, &tmp_next_block_ctx,
942                                         next_block, superblock_tmp,
943                                         BTRFSIC_GENERATION_UNKNOWN);
944                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
945                         if (NULL == l) {
946                                 brelse(bh);
947                                 return -1;
948                         }
949                 }
950         }
951         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
952                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
953
954         brelse(bh);
955         return 0;
956 }
957
958 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
959 {
960         struct btrfsic_stack_frame *sf;
961
962         sf = kzalloc(sizeof(*sf), GFP_NOFS);
963         if (NULL == sf)
964                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
965         else
966                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
967         return sf;
968 }
969
970 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
971 {
972         BUG_ON(!(NULL == sf ||
973                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
974         kfree(sf);
975 }
976
977 static int btrfsic_process_metablock(
978                 struct btrfsic_state *state,
979                 struct btrfsic_block *const first_block,
980                 struct btrfsic_block_data_ctx *const first_block_ctx,
981                 int first_limit_nesting, int force_iodone_flag)
982 {
983         struct btrfsic_stack_frame initial_stack_frame = { 0 };
984         struct btrfsic_stack_frame *sf;
985         struct btrfsic_stack_frame *next_stack;
986         struct btrfs_header *const first_hdr =
987                 (struct btrfs_header *)first_block_ctx->datav[0];
988
989         BUG_ON(!first_hdr);
990         sf = &initial_stack_frame;
991         sf->error = 0;
992         sf->i = -1;
993         sf->limit_nesting = first_limit_nesting;
994         sf->block = first_block;
995         sf->block_ctx = first_block_ctx;
996         sf->next_block = NULL;
997         sf->hdr = first_hdr;
998         sf->prev = NULL;
999
1000 continue_with_new_stack_frame:
1001         sf->block->generation = le64_to_cpu(sf->hdr->generation);
1002         if (0 == sf->hdr->level) {
1003                 struct btrfs_leaf *const leafhdr =
1004                     (struct btrfs_leaf *)sf->hdr;
1005
1006                 if (-1 == sf->i) {
1007                         sf->nr = le32_to_cpu(leafhdr->header.nritems);
1008
1009                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1010                                 printk(KERN_INFO
1011                                        "leaf %llu items %d generation %llu"
1012                                        " owner %llu\n",
1013                                        (unsigned long long)
1014                                        sf->block_ctx->start,
1015                                        sf->nr,
1016                                        (unsigned long long)
1017                                        le64_to_cpu(leafhdr->header.generation),
1018                                        (unsigned long long)
1019                                        le64_to_cpu(leafhdr->header.owner));
1020                 }
1021
1022 continue_with_current_leaf_stack_frame:
1023                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1024                         sf->i++;
1025                         sf->num_copies = 0;
1026                 }
1027
1028                 if (sf->i < sf->nr) {
1029                         struct btrfs_item disk_item;
1030                         u32 disk_item_offset =
1031                                 (uintptr_t)(leafhdr->items + sf->i) -
1032                                 (uintptr_t)leafhdr;
1033                         struct btrfs_disk_key *disk_key;
1034                         u8 type;
1035                         u32 item_offset;
1036                         u32 item_size;
1037
1038                         if (disk_item_offset + sizeof(struct btrfs_item) >
1039                             sf->block_ctx->len) {
1040 leaf_item_out_of_bounce_error:
1041                                 printk(KERN_INFO
1042                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1043                                        sf->block_ctx->start,
1044                                        sf->block_ctx->dev->name);
1045                                 goto one_stack_frame_backwards;
1046                         }
1047                         btrfsic_read_from_block_data(sf->block_ctx,
1048                                                      &disk_item,
1049                                                      disk_item_offset,
1050                                                      sizeof(struct btrfs_item));
1051                         item_offset = le32_to_cpu(disk_item.offset);
1052                         item_size = le32_to_cpu(disk_item.size);
1053                         disk_key = &disk_item.key;
1054                         type = disk_key->type;
1055
1056                         if (BTRFS_ROOT_ITEM_KEY == type) {
1057                                 struct btrfs_root_item root_item;
1058                                 u32 root_item_offset;
1059                                 u64 next_bytenr;
1060
1061                                 root_item_offset = item_offset +
1062                                         offsetof(struct btrfs_leaf, items);
1063                                 if (root_item_offset + item_size >
1064                                     sf->block_ctx->len)
1065                                         goto leaf_item_out_of_bounce_error;
1066                                 btrfsic_read_from_block_data(
1067                                         sf->block_ctx, &root_item,
1068                                         root_item_offset,
1069                                         item_size);
1070                                 next_bytenr = le64_to_cpu(root_item.bytenr);
1071
1072                                 sf->error =
1073                                     btrfsic_create_link_to_next_block(
1074                                                 state,
1075                                                 sf->block,
1076                                                 sf->block_ctx,
1077                                                 next_bytenr,
1078                                                 sf->limit_nesting,
1079                                                 &sf->next_block_ctx,
1080                                                 &sf->next_block,
1081                                                 force_iodone_flag,
1082                                                 &sf->num_copies,
1083                                                 &sf->mirror_num,
1084                                                 disk_key,
1085                                                 le64_to_cpu(root_item.
1086                                                 generation));
1087                                 if (sf->error)
1088                                         goto one_stack_frame_backwards;
1089
1090                                 if (NULL != sf->next_block) {
1091                                         struct btrfs_header *const next_hdr =
1092                                             (struct btrfs_header *)
1093                                             sf->next_block_ctx.datav[0];
1094
1095                                         next_stack =
1096                                             btrfsic_stack_frame_alloc();
1097                                         if (NULL == next_stack) {
1098                                                 btrfsic_release_block_ctx(
1099                                                                 &sf->
1100                                                                 next_block_ctx);
1101                                                 goto one_stack_frame_backwards;
1102                                         }
1103
1104                                         next_stack->i = -1;
1105                                         next_stack->block = sf->next_block;
1106                                         next_stack->block_ctx =
1107                                             &sf->next_block_ctx;
1108                                         next_stack->next_block = NULL;
1109                                         next_stack->hdr = next_hdr;
1110                                         next_stack->limit_nesting =
1111                                             sf->limit_nesting - 1;
1112                                         next_stack->prev = sf;
1113                                         sf = next_stack;
1114                                         goto continue_with_new_stack_frame;
1115                                 }
1116                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1117                                    state->include_extent_data) {
1118                                 sf->error = btrfsic_handle_extent_data(
1119                                                 state,
1120                                                 sf->block,
1121                                                 sf->block_ctx,
1122                                                 item_offset,
1123                                                 force_iodone_flag);
1124                                 if (sf->error)
1125                                         goto one_stack_frame_backwards;
1126                         }
1127
1128                         goto continue_with_current_leaf_stack_frame;
1129                 }
1130         } else {
1131                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1132
1133                 if (-1 == sf->i) {
1134                         sf->nr = le32_to_cpu(nodehdr->header.nritems);
1135
1136                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1137                                 printk(KERN_INFO "node %llu level %d items %d"
1138                                        " generation %llu owner %llu\n",
1139                                        (unsigned long long)
1140                                        sf->block_ctx->start,
1141                                        nodehdr->header.level, sf->nr,
1142                                        (unsigned long long)
1143                                        le64_to_cpu(nodehdr->header.generation),
1144                                        (unsigned long long)
1145                                        le64_to_cpu(nodehdr->header.owner));
1146                 }
1147
1148 continue_with_current_node_stack_frame:
1149                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1150                         sf->i++;
1151                         sf->num_copies = 0;
1152                 }
1153
1154                 if (sf->i < sf->nr) {
1155                         struct btrfs_key_ptr key_ptr;
1156                         u32 key_ptr_offset;
1157                         u64 next_bytenr;
1158
1159                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1160                                           (uintptr_t)nodehdr;
1161                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1162                             sf->block_ctx->len) {
1163                                 printk(KERN_INFO
1164                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1165                                        sf->block_ctx->start,
1166                                        sf->block_ctx->dev->name);
1167                                 goto one_stack_frame_backwards;
1168                         }
1169                         btrfsic_read_from_block_data(
1170                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1171                                 sizeof(struct btrfs_key_ptr));
1172                         next_bytenr = le64_to_cpu(key_ptr.blockptr);
1173
1174                         sf->error = btrfsic_create_link_to_next_block(
1175                                         state,
1176                                         sf->block,
1177                                         sf->block_ctx,
1178                                         next_bytenr,
1179                                         sf->limit_nesting,
1180                                         &sf->next_block_ctx,
1181                                         &sf->next_block,
1182                                         force_iodone_flag,
1183                                         &sf->num_copies,
1184                                         &sf->mirror_num,
1185                                         &key_ptr.key,
1186                                         le64_to_cpu(key_ptr.generation));
1187                         if (sf->error)
1188                                 goto one_stack_frame_backwards;
1189
1190                         if (NULL != sf->next_block) {
1191                                 struct btrfs_header *const next_hdr =
1192                                     (struct btrfs_header *)
1193                                     sf->next_block_ctx.datav[0];
1194
1195                                 next_stack = btrfsic_stack_frame_alloc();
1196                                 if (NULL == next_stack)
1197                                         goto one_stack_frame_backwards;
1198
1199                                 next_stack->i = -1;
1200                                 next_stack->block = sf->next_block;
1201                                 next_stack->block_ctx = &sf->next_block_ctx;
1202                                 next_stack->next_block = NULL;
1203                                 next_stack->hdr = next_hdr;
1204                                 next_stack->limit_nesting =
1205                                     sf->limit_nesting - 1;
1206                                 next_stack->prev = sf;
1207                                 sf = next_stack;
1208                                 goto continue_with_new_stack_frame;
1209                         }
1210
1211                         goto continue_with_current_node_stack_frame;
1212                 }
1213         }
1214
1215 one_stack_frame_backwards:
1216         if (NULL != sf->prev) {
1217                 struct btrfsic_stack_frame *const prev = sf->prev;
1218
1219                 /* the one for the initial block is freed in the caller */
1220                 btrfsic_release_block_ctx(sf->block_ctx);
1221
1222                 if (sf->error) {
1223                         prev->error = sf->error;
1224                         btrfsic_stack_frame_free(sf);
1225                         sf = prev;
1226                         goto one_stack_frame_backwards;
1227                 }
1228
1229                 btrfsic_stack_frame_free(sf);
1230                 sf = prev;
1231                 goto continue_with_new_stack_frame;
1232         } else {
1233                 BUG_ON(&initial_stack_frame != sf);
1234         }
1235
1236         return sf->error;
1237 }
1238
1239 static void btrfsic_read_from_block_data(
1240         struct btrfsic_block_data_ctx *block_ctx,
1241         void *dstv, u32 offset, size_t len)
1242 {
1243         size_t cur;
1244         size_t offset_in_page;
1245         char *kaddr;
1246         char *dst = (char *)dstv;
1247         size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1248         unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1249
1250         WARN_ON(offset + len > block_ctx->len);
1251         offset_in_page = (start_offset + offset) &
1252                          ((unsigned long)PAGE_CACHE_SIZE - 1);
1253
1254         while (len > 0) {
1255                 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1256                 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1257                             PAGE_CACHE_SHIFT);
1258                 kaddr = block_ctx->datav[i];
1259                 memcpy(dst, kaddr + offset_in_page, cur);
1260
1261                 dst += cur;
1262                 len -= cur;
1263                 offset_in_page = 0;
1264                 i++;
1265         }
1266 }
1267
1268 static int btrfsic_create_link_to_next_block(
1269                 struct btrfsic_state *state,
1270                 struct btrfsic_block *block,
1271                 struct btrfsic_block_data_ctx *block_ctx,
1272                 u64 next_bytenr,
1273                 int limit_nesting,
1274                 struct btrfsic_block_data_ctx *next_block_ctx,
1275                 struct btrfsic_block **next_blockp,
1276                 int force_iodone_flag,
1277                 int *num_copiesp, int *mirror_nump,
1278                 struct btrfs_disk_key *disk_key,
1279                 u64 parent_generation)
1280 {
1281         struct btrfsic_block *next_block = NULL;
1282         int ret;
1283         struct btrfsic_block_link *l;
1284         int did_alloc_block_link;
1285         int block_was_created;
1286
1287         *next_blockp = NULL;
1288         if (0 == *num_copiesp) {
1289                 *num_copiesp =
1290                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1291                                      next_bytenr, state->metablock_size);
1292                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1293                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1294                                (unsigned long long)next_bytenr, *num_copiesp);
1295                 *mirror_nump = 1;
1296         }
1297
1298         if (*mirror_nump > *num_copiesp)
1299                 return 0;
1300
1301         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1302                 printk(KERN_INFO
1303                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1304                        *mirror_nump);
1305         ret = btrfsic_map_block(state, next_bytenr,
1306                                 state->metablock_size,
1307                                 next_block_ctx, *mirror_nump);
1308         if (ret) {
1309                 printk(KERN_INFO
1310                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1311                        (unsigned long long)next_bytenr, *mirror_nump);
1312                 btrfsic_release_block_ctx(next_block_ctx);
1313                 *next_blockp = NULL;
1314                 return -1;
1315         }
1316
1317         next_block = btrfsic_block_lookup_or_add(state,
1318                                                  next_block_ctx, "referenced ",
1319                                                  1, force_iodone_flag,
1320                                                  !force_iodone_flag,
1321                                                  *mirror_nump,
1322                                                  &block_was_created);
1323         if (NULL == next_block) {
1324                 btrfsic_release_block_ctx(next_block_ctx);
1325                 *next_blockp = NULL;
1326                 return -1;
1327         }
1328         if (block_was_created) {
1329                 l = NULL;
1330                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1331         } else {
1332                 if (next_block->logical_bytenr != next_bytenr &&
1333                     !(!next_block->is_metadata &&
1334                       0 == next_block->logical_bytenr)) {
1335                         printk(KERN_INFO
1336                                "Referenced block @%llu (%s/%llu/%d)"
1337                                " found in hash table, %c,"
1338                                " bytenr mismatch (!= stored %llu).\n",
1339                                (unsigned long long)next_bytenr,
1340                                next_block_ctx->dev->name,
1341                                (unsigned long long)next_block_ctx->dev_bytenr,
1342                                *mirror_nump,
1343                                btrfsic_get_block_type(state, next_block),
1344                                (unsigned long long)next_block->logical_bytenr);
1345                 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1346                         printk(KERN_INFO
1347                                "Referenced block @%llu (%s/%llu/%d)"
1348                                " found in hash table, %c.\n",
1349                                (unsigned long long)next_bytenr,
1350                                next_block_ctx->dev->name,
1351                                (unsigned long long)next_block_ctx->dev_bytenr,
1352                                *mirror_nump,
1353                                btrfsic_get_block_type(state, next_block));
1354                 next_block->logical_bytenr = next_bytenr;
1355
1356                 next_block->mirror_num = *mirror_nump;
1357                 l = btrfsic_block_link_hashtable_lookup(
1358                                 next_block_ctx->dev->bdev,
1359                                 next_block_ctx->dev_bytenr,
1360                                 block_ctx->dev->bdev,
1361                                 block_ctx->dev_bytenr,
1362                                 &state->block_link_hashtable);
1363         }
1364
1365         next_block->disk_key = *disk_key;
1366         if (NULL == l) {
1367                 l = btrfsic_block_link_alloc();
1368                 if (NULL == l) {
1369                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1370                         btrfsic_release_block_ctx(next_block_ctx);
1371                         *next_blockp = NULL;
1372                         return -1;
1373                 }
1374
1375                 did_alloc_block_link = 1;
1376                 l->block_ref_to = next_block;
1377                 l->block_ref_from = block;
1378                 l->ref_cnt = 1;
1379                 l->parent_generation = parent_generation;
1380
1381                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1382                         btrfsic_print_add_link(state, l);
1383
1384                 list_add(&l->node_ref_to, &block->ref_to_list);
1385                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1386
1387                 btrfsic_block_link_hashtable_add(l,
1388                                                  &state->block_link_hashtable);
1389         } else {
1390                 did_alloc_block_link = 0;
1391                 if (0 == limit_nesting) {
1392                         l->ref_cnt++;
1393                         l->parent_generation = parent_generation;
1394                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1395                                 btrfsic_print_add_link(state, l);
1396                 }
1397         }
1398
1399         if (limit_nesting > 0 && did_alloc_block_link) {
1400                 ret = btrfsic_read_block(state, next_block_ctx);
1401                 if (ret < (int)next_block_ctx->len) {
1402                         printk(KERN_INFO
1403                                "btrfsic: read block @logical %llu failed!\n",
1404                                (unsigned long long)next_bytenr);
1405                         btrfsic_release_block_ctx(next_block_ctx);
1406                         *next_blockp = NULL;
1407                         return -1;
1408                 }
1409
1410                 *next_blockp = next_block;
1411         } else {
1412                 *next_blockp = NULL;
1413         }
1414         (*mirror_nump)++;
1415
1416         return 0;
1417 }
1418
1419 static int btrfsic_handle_extent_data(
1420                 struct btrfsic_state *state,
1421                 struct btrfsic_block *block,
1422                 struct btrfsic_block_data_ctx *block_ctx,
1423                 u32 item_offset, int force_iodone_flag)
1424 {
1425         int ret;
1426         struct btrfs_file_extent_item file_extent_item;
1427         u64 file_extent_item_offset;
1428         u64 next_bytenr;
1429         u64 num_bytes;
1430         u64 generation;
1431         struct btrfsic_block_link *l;
1432
1433         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1434                                   item_offset;
1435         if (file_extent_item_offset +
1436             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1437             block_ctx->len) {
1438                 printk(KERN_INFO
1439                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1440                        block_ctx->start, block_ctx->dev->name);
1441                 return -1;
1442         }
1443
1444         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1445                 file_extent_item_offset,
1446                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1447         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1448             ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1449                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1450                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1451                                file_extent_item.type,
1452                                (unsigned long long)
1453                                le64_to_cpu(file_extent_item.disk_bytenr));
1454                 return 0;
1455         }
1456
1457         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1458             block_ctx->len) {
1459                 printk(KERN_INFO
1460                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1461                        block_ctx->start, block_ctx->dev->name);
1462                 return -1;
1463         }
1464         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1465                                      file_extent_item_offset,
1466                                      sizeof(struct btrfs_file_extent_item));
1467         next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1468                       le64_to_cpu(file_extent_item.offset);
1469         generation = le64_to_cpu(file_extent_item.generation);
1470         num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1471         generation = le64_to_cpu(file_extent_item.generation);
1472
1473         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1474                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1475                        " offset = %llu, num_bytes = %llu\n",
1476                        file_extent_item.type,
1477                        (unsigned long long)
1478                        le64_to_cpu(file_extent_item.disk_bytenr),
1479                        (unsigned long long)le64_to_cpu(file_extent_item.offset),
1480                        (unsigned long long)num_bytes);
1481         while (num_bytes > 0) {
1482                 u32 chunk_len;
1483                 int num_copies;
1484                 int mirror_num;
1485
1486                 if (num_bytes > state->datablock_size)
1487                         chunk_len = state->datablock_size;
1488                 else
1489                         chunk_len = num_bytes;
1490
1491                 num_copies =
1492                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1493                                      next_bytenr, state->datablock_size);
1494                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1495                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1496                                (unsigned long long)next_bytenr, num_copies);
1497                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1498                         struct btrfsic_block_data_ctx next_block_ctx;
1499                         struct btrfsic_block *next_block;
1500                         int block_was_created;
1501
1502                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1503                                 printk(KERN_INFO "btrfsic_handle_extent_data("
1504                                        "mirror_num=%d)\n", mirror_num);
1505                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1506                                 printk(KERN_INFO
1507                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1508                                        (unsigned long long)next_bytenr,
1509                                        chunk_len);
1510                         ret = btrfsic_map_block(state, next_bytenr,
1511                                                 chunk_len, &next_block_ctx,
1512                                                 mirror_num);
1513                         if (ret) {
1514                                 printk(KERN_INFO
1515                                        "btrfsic: btrfsic_map_block(@%llu,"
1516                                        " mirror=%d) failed!\n",
1517                                        (unsigned long long)next_bytenr,
1518                                        mirror_num);
1519                                 return -1;
1520                         }
1521
1522                         next_block = btrfsic_block_lookup_or_add(
1523                                         state,
1524                                         &next_block_ctx,
1525                                         "referenced ",
1526                                         0,
1527                                         force_iodone_flag,
1528                                         !force_iodone_flag,
1529                                         mirror_num,
1530                                         &block_was_created);
1531                         if (NULL == next_block) {
1532                                 printk(KERN_INFO
1533                                        "btrfsic: error, kmalloc failed!\n");
1534                                 btrfsic_release_block_ctx(&next_block_ctx);
1535                                 return -1;
1536                         }
1537                         if (!block_was_created) {
1538                                 if (next_block->logical_bytenr != next_bytenr &&
1539                                     !(!next_block->is_metadata &&
1540                                       0 == next_block->logical_bytenr)) {
1541                                         printk(KERN_INFO
1542                                                "Referenced block"
1543                                                " @%llu (%s/%llu/%d)"
1544                                                " found in hash table, D,"
1545                                                " bytenr mismatch"
1546                                                " (!= stored %llu).\n",
1547                                                (unsigned long long)next_bytenr,
1548                                                next_block_ctx.dev->name,
1549                                                (unsigned long long)
1550                                                next_block_ctx.dev_bytenr,
1551                                                mirror_num,
1552                                                (unsigned long long)
1553                                                next_block->logical_bytenr);
1554                                 }
1555                                 next_block->logical_bytenr = next_bytenr;
1556                                 next_block->mirror_num = mirror_num;
1557                         }
1558
1559                         l = btrfsic_block_link_lookup_or_add(state,
1560                                                              &next_block_ctx,
1561                                                              next_block, block,
1562                                                              generation);
1563                         btrfsic_release_block_ctx(&next_block_ctx);
1564                         if (NULL == l)
1565                                 return -1;
1566                 }
1567
1568                 next_bytenr += chunk_len;
1569                 num_bytes -= chunk_len;
1570         }
1571
1572         return 0;
1573 }
1574
1575 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1576                              struct btrfsic_block_data_ctx *block_ctx_out,
1577                              int mirror_num)
1578 {
1579         int ret;
1580         u64 length;
1581         struct btrfs_bio *multi = NULL;
1582         struct btrfs_device *device;
1583
1584         length = len;
1585         ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
1586                               bytenr, &length, &multi, mirror_num);
1587
1588         device = multi->stripes[0].dev;
1589         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1590         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1591         block_ctx_out->start = bytenr;
1592         block_ctx_out->len = len;
1593         block_ctx_out->datav = NULL;
1594         block_ctx_out->pagev = NULL;
1595         block_ctx_out->mem_to_free = NULL;
1596
1597         if (0 == ret)
1598                 kfree(multi);
1599         if (NULL == block_ctx_out->dev) {
1600                 ret = -ENXIO;
1601                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1602         }
1603
1604         return ret;
1605 }
1606
1607 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1608                                   u32 len, struct block_device *bdev,
1609                                   struct btrfsic_block_data_ctx *block_ctx_out)
1610 {
1611         block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1612         block_ctx_out->dev_bytenr = bytenr;
1613         block_ctx_out->start = bytenr;
1614         block_ctx_out->len = len;
1615         block_ctx_out->datav = NULL;
1616         block_ctx_out->pagev = NULL;
1617         block_ctx_out->mem_to_free = NULL;
1618         if (NULL != block_ctx_out->dev) {
1619                 return 0;
1620         } else {
1621                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1622                 return -ENXIO;
1623         }
1624 }
1625
1626 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1627 {
1628         if (block_ctx->mem_to_free) {
1629                 unsigned int num_pages;
1630
1631                 BUG_ON(!block_ctx->datav);
1632                 BUG_ON(!block_ctx->pagev);
1633                 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1634                             PAGE_CACHE_SHIFT;
1635                 while (num_pages > 0) {
1636                         num_pages--;
1637                         if (block_ctx->datav[num_pages]) {
1638                                 kunmap(block_ctx->pagev[num_pages]);
1639                                 block_ctx->datav[num_pages] = NULL;
1640                         }
1641                         if (block_ctx->pagev[num_pages]) {
1642                                 __free_page(block_ctx->pagev[num_pages]);
1643                                 block_ctx->pagev[num_pages] = NULL;
1644                         }
1645                 }
1646
1647                 kfree(block_ctx->mem_to_free);
1648                 block_ctx->mem_to_free = NULL;
1649                 block_ctx->pagev = NULL;
1650                 block_ctx->datav = NULL;
1651         }
1652 }
1653
1654 static int btrfsic_read_block(struct btrfsic_state *state,
1655                               struct btrfsic_block_data_ctx *block_ctx)
1656 {
1657         unsigned int num_pages;
1658         unsigned int i;
1659         u64 dev_bytenr;
1660         int ret;
1661
1662         BUG_ON(block_ctx->datav);
1663         BUG_ON(block_ctx->pagev);
1664         BUG_ON(block_ctx->mem_to_free);
1665         if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1666                 printk(KERN_INFO
1667                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1668                        (unsigned long long)block_ctx->dev_bytenr);
1669                 return -1;
1670         }
1671
1672         num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1673                     PAGE_CACHE_SHIFT;
1674         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1675                                           sizeof(*block_ctx->pagev)) *
1676                                          num_pages, GFP_NOFS);
1677         if (!block_ctx->mem_to_free)
1678                 return -1;
1679         block_ctx->datav = block_ctx->mem_to_free;
1680         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1681         for (i = 0; i < num_pages; i++) {
1682                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1683                 if (!block_ctx->pagev[i])
1684                         return -1;
1685         }
1686
1687         dev_bytenr = block_ctx->dev_bytenr;
1688         for (i = 0; i < num_pages;) {
1689                 struct bio *bio;
1690                 unsigned int j;
1691                 DECLARE_COMPLETION_ONSTACK(complete);
1692
1693                 bio = bio_alloc(GFP_NOFS, num_pages - i);
1694                 if (!bio) {
1695                         printk(KERN_INFO
1696                                "btrfsic: bio_alloc() for %u pages failed!\n",
1697                                num_pages - i);
1698                         return -1;
1699                 }
1700                 bio->bi_bdev = block_ctx->dev->bdev;
1701                 bio->bi_sector = dev_bytenr >> 9;
1702                 bio->bi_end_io = btrfsic_complete_bio_end_io;
1703                 bio->bi_private = &complete;
1704
1705                 for (j = i; j < num_pages; j++) {
1706                         ret = bio_add_page(bio, block_ctx->pagev[j],
1707                                            PAGE_CACHE_SIZE, 0);
1708                         if (PAGE_CACHE_SIZE != ret)
1709                                 break;
1710                 }
1711                 if (j == i) {
1712                         printk(KERN_INFO
1713                                "btrfsic: error, failed to add a single page!\n");
1714                         return -1;
1715                 }
1716                 submit_bio(READ, bio);
1717
1718                 /* this will also unplug the queue */
1719                 wait_for_completion(&complete);
1720
1721                 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1722                         printk(KERN_INFO
1723                                "btrfsic: read error at logical %llu dev %s!\n",
1724                                block_ctx->start, block_ctx->dev->name);
1725                         bio_put(bio);
1726                         return -1;
1727                 }
1728                 bio_put(bio);
1729                 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1730                 i = j;
1731         }
1732         for (i = 0; i < num_pages; i++) {
1733                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1734                 if (!block_ctx->datav[i]) {
1735                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1736                                block_ctx->dev->name);
1737                         return -1;
1738                 }
1739         }
1740
1741         return block_ctx->len;
1742 }
1743
1744 static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1745 {
1746         complete((struct completion *)bio->bi_private);
1747 }
1748
1749 static void btrfsic_dump_database(struct btrfsic_state *state)
1750 {
1751         struct list_head *elem_all;
1752
1753         BUG_ON(NULL == state);
1754
1755         printk(KERN_INFO "all_blocks_list:\n");
1756         list_for_each(elem_all, &state->all_blocks_list) {
1757                 const struct btrfsic_block *const b_all =
1758                     list_entry(elem_all, struct btrfsic_block,
1759                                all_blocks_node);
1760                 struct list_head *elem_ref_to;
1761                 struct list_head *elem_ref_from;
1762
1763                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1764                        btrfsic_get_block_type(state, b_all),
1765                        (unsigned long long)b_all->logical_bytenr,
1766                        b_all->dev_state->name,
1767                        (unsigned long long)b_all->dev_bytenr,
1768                        b_all->mirror_num);
1769
1770                 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1771                         const struct btrfsic_block_link *const l =
1772                             list_entry(elem_ref_to,
1773                                        struct btrfsic_block_link,
1774                                        node_ref_to);
1775
1776                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1777                                " refers %u* to"
1778                                " %c @%llu (%s/%llu/%d)\n",
1779                                btrfsic_get_block_type(state, b_all),
1780                                (unsigned long long)b_all->logical_bytenr,
1781                                b_all->dev_state->name,
1782                                (unsigned long long)b_all->dev_bytenr,
1783                                b_all->mirror_num,
1784                                l->ref_cnt,
1785                                btrfsic_get_block_type(state, l->block_ref_to),
1786                                (unsigned long long)
1787                                l->block_ref_to->logical_bytenr,
1788                                l->block_ref_to->dev_state->name,
1789                                (unsigned long long)l->block_ref_to->dev_bytenr,
1790                                l->block_ref_to->mirror_num);
1791                 }
1792
1793                 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1794                         const struct btrfsic_block_link *const l =
1795                             list_entry(elem_ref_from,
1796                                        struct btrfsic_block_link,
1797                                        node_ref_from);
1798
1799                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1800                                " is ref %u* from"
1801                                " %c @%llu (%s/%llu/%d)\n",
1802                                btrfsic_get_block_type(state, b_all),
1803                                (unsigned long long)b_all->logical_bytenr,
1804                                b_all->dev_state->name,
1805                                (unsigned long long)b_all->dev_bytenr,
1806                                b_all->mirror_num,
1807                                l->ref_cnt,
1808                                btrfsic_get_block_type(state, l->block_ref_from),
1809                                (unsigned long long)
1810                                l->block_ref_from->logical_bytenr,
1811                                l->block_ref_from->dev_state->name,
1812                                (unsigned long long)
1813                                l->block_ref_from->dev_bytenr,
1814                                l->block_ref_from->mirror_num);
1815                 }
1816
1817                 printk(KERN_INFO "\n");
1818         }
1819 }
1820
1821 /*
1822  * Test whether the disk block contains a tree block (leaf or node)
1823  * (note that this test fails for the super block)
1824  */
1825 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1826                                      char **datav, unsigned int num_pages)
1827 {
1828         struct btrfs_header *h;
1829         u8 csum[BTRFS_CSUM_SIZE];
1830         u32 crc = ~(u32)0;
1831         unsigned int i;
1832
1833         if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1834                 return 1; /* not metadata */
1835         num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1836         h = (struct btrfs_header *)datav[0];
1837
1838         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1839                 return 1;
1840
1841         for (i = 0; i < num_pages; i++) {
1842                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1843                 size_t sublen = i ? PAGE_CACHE_SIZE :
1844                                     (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1845
1846                 crc = crc32c(crc, data, sublen);
1847         }
1848         btrfs_csum_final(crc, csum);
1849         if (memcmp(csum, h->csum, state->csum_size))
1850                 return 1;
1851
1852         return 0; /* is metadata */
1853 }
1854
1855 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1856                                           u64 dev_bytenr, char **mapped_datav,
1857                                           unsigned int num_pages,
1858                                           struct bio *bio, int *bio_is_patched,
1859                                           struct buffer_head *bh,
1860                                           int submit_bio_bh_rw)
1861 {
1862         int is_metadata;
1863         struct btrfsic_block *block;
1864         struct btrfsic_block_data_ctx block_ctx;
1865         int ret;
1866         struct btrfsic_state *state = dev_state->state;
1867         struct block_device *bdev = dev_state->bdev;
1868         unsigned int processed_len;
1869
1870         if (NULL != bio_is_patched)
1871                 *bio_is_patched = 0;
1872
1873 again:
1874         if (num_pages == 0)
1875                 return;
1876
1877         processed_len = 0;
1878         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1879                                                       num_pages));
1880
1881         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1882                                                &state->block_hashtable);
1883         if (NULL != block) {
1884                 u64 bytenr = 0;
1885                 struct list_head *elem_ref_to;
1886                 struct list_head *tmp_ref_to;
1887
1888                 if (block->is_superblock) {
1889                         bytenr = le64_to_cpu(((struct btrfs_super_block *)
1890                                               mapped_datav[0])->bytenr);
1891                         if (num_pages * PAGE_CACHE_SIZE <
1892                             BTRFS_SUPER_INFO_SIZE) {
1893                                 printk(KERN_INFO
1894                                        "btrfsic: cannot work with too short bios!\n");
1895                                 return;
1896                         }
1897                         is_metadata = 1;
1898                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1899                         processed_len = BTRFS_SUPER_INFO_SIZE;
1900                         if (state->print_mask &
1901                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1902                                 printk(KERN_INFO
1903                                        "[before new superblock is written]:\n");
1904                                 btrfsic_dump_tree_sub(state, block, 0);
1905                         }
1906                 }
1907                 if (is_metadata) {
1908                         if (!block->is_superblock) {
1909                                 if (num_pages * PAGE_CACHE_SIZE <
1910                                     state->metablock_size) {
1911                                         printk(KERN_INFO
1912                                                "btrfsic: cannot work with too short bios!\n");
1913                                         return;
1914                                 }
1915                                 processed_len = state->metablock_size;
1916                                 bytenr = le64_to_cpu(((struct btrfs_header *)
1917                                                       mapped_datav[0])->bytenr);
1918                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1919                                                                dev_state,
1920                                                                dev_bytenr);
1921                         }
1922                         if (block->logical_bytenr != bytenr) {
1923                                 printk(KERN_INFO
1924                                        "Written block @%llu (%s/%llu/%d)"
1925                                        " found in hash table, %c,"
1926                                        " bytenr mismatch"
1927                                        " (!= stored %llu).\n",
1928                                        (unsigned long long)bytenr,
1929                                        dev_state->name,
1930                                        (unsigned long long)dev_bytenr,
1931                                        block->mirror_num,
1932                                        btrfsic_get_block_type(state, block),
1933                                        (unsigned long long)
1934                                        block->logical_bytenr);
1935                                 block->logical_bytenr = bytenr;
1936                         } else if (state->print_mask &
1937                                    BTRFSIC_PRINT_MASK_VERBOSE)
1938                                 printk(KERN_INFO
1939                                        "Written block @%llu (%s/%llu/%d)"
1940                                        " found in hash table, %c.\n",
1941                                        (unsigned long long)bytenr,
1942                                        dev_state->name,
1943                                        (unsigned long long)dev_bytenr,
1944                                        block->mirror_num,
1945                                        btrfsic_get_block_type(state, block));
1946                 } else {
1947                         if (num_pages * PAGE_CACHE_SIZE <
1948                             state->datablock_size) {
1949                                 printk(KERN_INFO
1950                                        "btrfsic: cannot work with too short bios!\n");
1951                                 return;
1952                         }
1953                         processed_len = state->datablock_size;
1954                         bytenr = block->logical_bytenr;
1955                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1956                                 printk(KERN_INFO
1957                                        "Written block @%llu (%s/%llu/%d)"
1958                                        " found in hash table, %c.\n",
1959                                        (unsigned long long)bytenr,
1960                                        dev_state->name,
1961                                        (unsigned long long)dev_bytenr,
1962                                        block->mirror_num,
1963                                        btrfsic_get_block_type(state, block));
1964                 }
1965
1966                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1967                         printk(KERN_INFO
1968                                "ref_to_list: %cE, ref_from_list: %cE\n",
1969                                list_empty(&block->ref_to_list) ? ' ' : '!',
1970                                list_empty(&block->ref_from_list) ? ' ' : '!');
1971                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1972                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1973                                " @%llu (%s/%llu/%d), old(gen=%llu,"
1974                                " objectid=%llu, type=%d, offset=%llu),"
1975                                " new(gen=%llu),"
1976                                " which is referenced by most recent superblock"
1977                                " (superblockgen=%llu)!\n",
1978                                btrfsic_get_block_type(state, block),
1979                                (unsigned long long)bytenr,
1980                                dev_state->name,
1981                                (unsigned long long)dev_bytenr,
1982                                block->mirror_num,
1983                                (unsigned long long)block->generation,
1984                                (unsigned long long)
1985                                le64_to_cpu(block->disk_key.objectid),
1986                                block->disk_key.type,
1987                                (unsigned long long)
1988                                le64_to_cpu(block->disk_key.offset),
1989                                (unsigned long long)
1990                                le64_to_cpu(((struct btrfs_header *)
1991                                             mapped_datav[0])->generation),
1992                                (unsigned long long)
1993                                state->max_superblock_generation);
1994                         btrfsic_dump_tree(state);
1995                 }
1996
1997                 if (!block->is_iodone && !block->never_written) {
1998                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1999                                " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
2000                                " which is not yet iodone!\n",
2001                                btrfsic_get_block_type(state, block),
2002                                (unsigned long long)bytenr,
2003                                dev_state->name,
2004                                (unsigned long long)dev_bytenr,
2005                                block->mirror_num,
2006                                (unsigned long long)block->generation,
2007                                (unsigned long long)
2008                                le64_to_cpu(((struct btrfs_header *)
2009                                             mapped_datav[0])->generation));
2010                         /* it would not be safe to go on */
2011                         btrfsic_dump_tree(state);
2012                         goto continue_loop;
2013                 }
2014
2015                 /*
2016                  * Clear all references of this block. Do not free
2017                  * the block itself even if is not referenced anymore
2018                  * because it still carries valueable information
2019                  * like whether it was ever written and IO completed.
2020                  */
2021                 list_for_each_safe(elem_ref_to, tmp_ref_to,
2022                                    &block->ref_to_list) {
2023                         struct btrfsic_block_link *const l =
2024                             list_entry(elem_ref_to,
2025                                        struct btrfsic_block_link,
2026                                        node_ref_to);
2027
2028                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2029                                 btrfsic_print_rem_link(state, l);
2030                         l->ref_cnt--;
2031                         if (0 == l->ref_cnt) {
2032                                 list_del(&l->node_ref_to);
2033                                 list_del(&l->node_ref_from);
2034                                 btrfsic_block_link_hashtable_remove(l);
2035                                 btrfsic_block_link_free(l);
2036                         }
2037                 }
2038
2039                 if (block->is_superblock)
2040                         ret = btrfsic_map_superblock(state, bytenr,
2041                                                      processed_len,
2042                                                      bdev, &block_ctx);
2043                 else
2044                         ret = btrfsic_map_block(state, bytenr, processed_len,
2045                                                 &block_ctx, 0);
2046                 if (ret) {
2047                         printk(KERN_INFO
2048                                "btrfsic: btrfsic_map_block(root @%llu)"
2049                                " failed!\n", (unsigned long long)bytenr);
2050                         goto continue_loop;
2051                 }
2052                 block_ctx.datav = mapped_datav;
2053                 /* the following is required in case of writes to mirrors,
2054                  * use the same that was used for the lookup */
2055                 block_ctx.dev = dev_state;
2056                 block_ctx.dev_bytenr = dev_bytenr;
2057
2058                 if (is_metadata || state->include_extent_data) {
2059                         block->never_written = 0;
2060                         block->iodone_w_error = 0;
2061                         if (NULL != bio) {
2062                                 block->is_iodone = 0;
2063                                 BUG_ON(NULL == bio_is_patched);
2064                                 if (!*bio_is_patched) {
2065                                         block->orig_bio_bh_private =
2066                                             bio->bi_private;
2067                                         block->orig_bio_bh_end_io.bio =
2068                                             bio->bi_end_io;
2069                                         block->next_in_same_bio = NULL;
2070                                         bio->bi_private = block;
2071                                         bio->bi_end_io = btrfsic_bio_end_io;
2072                                         *bio_is_patched = 1;
2073                                 } else {
2074                                         struct btrfsic_block *chained_block =
2075                                             (struct btrfsic_block *)
2076                                             bio->bi_private;
2077
2078                                         BUG_ON(NULL == chained_block);
2079                                         block->orig_bio_bh_private =
2080                                             chained_block->orig_bio_bh_private;
2081                                         block->orig_bio_bh_end_io.bio =
2082                                             chained_block->orig_bio_bh_end_io.
2083                                             bio;
2084                                         block->next_in_same_bio = chained_block;
2085                                         bio->bi_private = block;
2086                                 }
2087                         } else if (NULL != bh) {
2088                                 block->is_iodone = 0;
2089                                 block->orig_bio_bh_private = bh->b_private;
2090                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2091                                 block->next_in_same_bio = NULL;
2092                                 bh->b_private = block;
2093                                 bh->b_end_io = btrfsic_bh_end_io;
2094                         } else {
2095                                 block->is_iodone = 1;
2096                                 block->orig_bio_bh_private = NULL;
2097                                 block->orig_bio_bh_end_io.bio = NULL;
2098                                 block->next_in_same_bio = NULL;
2099                         }
2100                 }
2101
2102                 block->flush_gen = dev_state->last_flush_gen + 1;
2103                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2104                 if (is_metadata) {
2105                         block->logical_bytenr = bytenr;
2106                         block->is_metadata = 1;
2107                         if (block->is_superblock) {
2108                                 BUG_ON(PAGE_CACHE_SIZE !=
2109                                        BTRFS_SUPER_INFO_SIZE);
2110                                 ret = btrfsic_process_written_superblock(
2111                                                 state,
2112                                                 block,
2113                                                 (struct btrfs_super_block *)
2114                                                 mapped_datav[0]);
2115                                 if (state->print_mask &
2116                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2117                                         printk(KERN_INFO
2118                                         "[after new superblock is written]:\n");
2119                                         btrfsic_dump_tree_sub(state, block, 0);
2120                                 }
2121                         } else {
2122                                 block->mirror_num = 0;  /* unknown */
2123                                 ret = btrfsic_process_metablock(
2124                                                 state,
2125                                                 block,
2126                                                 &block_ctx,
2127                                                 0, 0);
2128                         }
2129                         if (ret)
2130                                 printk(KERN_INFO
2131                                        "btrfsic: btrfsic_process_metablock"
2132                                        "(root @%llu) failed!\n",
2133                                        (unsigned long long)dev_bytenr);
2134                 } else {
2135                         block->is_metadata = 0;
2136                         block->mirror_num = 0;  /* unknown */
2137                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2138                         if (!state->include_extent_data
2139                             && list_empty(&block->ref_from_list)) {
2140                                 /*
2141                                  * disk block is overwritten with extent
2142                                  * data (not meta data) and we are configured
2143                                  * to not include extent data: take the
2144                                  * chance and free the block's memory
2145                                  */
2146                                 btrfsic_block_hashtable_remove(block);
2147                                 list_del(&block->all_blocks_node);
2148                                 btrfsic_block_free(block);
2149                         }
2150                 }
2151                 btrfsic_release_block_ctx(&block_ctx);
2152         } else {
2153                 /* block has not been found in hash table */
2154                 u64 bytenr;
2155
2156                 if (!is_metadata) {
2157                         processed_len = state->datablock_size;
2158                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2159                                 printk(KERN_INFO "Written block (%s/%llu/?)"
2160                                        " !found in hash table, D.\n",
2161                                        dev_state->name,
2162                                        (unsigned long long)dev_bytenr);
2163                         if (!state->include_extent_data) {
2164                                 /* ignore that written D block */
2165                                 goto continue_loop;
2166                         }
2167
2168                         /* this is getting ugly for the
2169                          * include_extent_data case... */
2170                         bytenr = 0;     /* unknown */
2171                         block_ctx.start = bytenr;
2172                         block_ctx.len = processed_len;
2173                         block_ctx.mem_to_free = NULL;
2174                         block_ctx.pagev = NULL;
2175                 } else {
2176                         processed_len = state->metablock_size;
2177                         bytenr = le64_to_cpu(((struct btrfs_header *)
2178                                               mapped_datav[0])->bytenr);
2179                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2180                                                        dev_bytenr);
2181                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2182                                 printk(KERN_INFO
2183                                        "Written block @%llu (%s/%llu/?)"
2184                                        " !found in hash table, M.\n",
2185                                        (unsigned long long)bytenr,
2186                                        dev_state->name,
2187                                        (unsigned long long)dev_bytenr);
2188
2189                         ret = btrfsic_map_block(state, bytenr, processed_len,
2190                                                 &block_ctx, 0);
2191                         if (ret) {
2192                                 printk(KERN_INFO
2193                                        "btrfsic: btrfsic_map_block(root @%llu)"
2194                                        " failed!\n",
2195                                        (unsigned long long)dev_bytenr);
2196                                 goto continue_loop;
2197                         }
2198                 }
2199                 block_ctx.datav = mapped_datav;
2200                 /* the following is required in case of writes to mirrors,
2201                  * use the same that was used for the lookup */
2202                 block_ctx.dev = dev_state;
2203                 block_ctx.dev_bytenr = dev_bytenr;
2204
2205                 block = btrfsic_block_alloc();
2206                 if (NULL == block) {
2207                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2208                         btrfsic_release_block_ctx(&block_ctx);
2209                         goto continue_loop;
2210                 }
2211                 block->dev_state = dev_state;
2212                 block->dev_bytenr = dev_bytenr;
2213                 block->logical_bytenr = bytenr;
2214                 block->is_metadata = is_metadata;
2215                 block->never_written = 0;
2216                 block->iodone_w_error = 0;
2217                 block->mirror_num = 0;  /* unknown */
2218                 block->flush_gen = dev_state->last_flush_gen + 1;
2219                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2220                 if (NULL != bio) {
2221                         block->is_iodone = 0;
2222                         BUG_ON(NULL == bio_is_patched);
2223                         if (!*bio_is_patched) {
2224                                 block->orig_bio_bh_private = bio->bi_private;
2225                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2226                                 block->next_in_same_bio = NULL;
2227                                 bio->bi_private = block;
2228                                 bio->bi_end_io = btrfsic_bio_end_io;
2229                                 *bio_is_patched = 1;
2230                         } else {
2231                                 struct btrfsic_block *chained_block =
2232                                     (struct btrfsic_block *)
2233                                     bio->bi_private;
2234
2235                                 BUG_ON(NULL == chained_block);
2236                                 block->orig_bio_bh_private =
2237                                     chained_block->orig_bio_bh_private;
2238                                 block->orig_bio_bh_end_io.bio =
2239                                     chained_block->orig_bio_bh_end_io.bio;
2240                                 block->next_in_same_bio = chained_block;
2241                                 bio->bi_private = block;
2242                         }
2243                 } else if (NULL != bh) {
2244                         block->is_iodone = 0;
2245                         block->orig_bio_bh_private = bh->b_private;
2246                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2247                         block->next_in_same_bio = NULL;
2248                         bh->b_private = block;
2249                         bh->b_end_io = btrfsic_bh_end_io;
2250                 } else {
2251                         block->is_iodone = 1;
2252                         block->orig_bio_bh_private = NULL;
2253                         block->orig_bio_bh_end_io.bio = NULL;
2254                         block->next_in_same_bio = NULL;
2255                 }
2256                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2257                         printk(KERN_INFO
2258                                "New written %c-block @%llu (%s/%llu/%d)\n",
2259                                is_metadata ? 'M' : 'D',
2260                                (unsigned long long)block->logical_bytenr,
2261                                block->dev_state->name,
2262                                (unsigned long long)block->dev_bytenr,
2263                                block->mirror_num);
2264                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2265                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2266
2267                 if (is_metadata) {
2268                         ret = btrfsic_process_metablock(state, block,
2269                                                         &block_ctx, 0, 0);
2270                         if (ret)
2271                                 printk(KERN_INFO
2272                                        "btrfsic: process_metablock(root @%llu)"
2273                                        " failed!\n",
2274                                        (unsigned long long)dev_bytenr);
2275                 }
2276                 btrfsic_release_block_ctx(&block_ctx);
2277         }
2278
2279 continue_loop:
2280         BUG_ON(!processed_len);
2281         dev_bytenr += processed_len;
2282         mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2283         num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2284         goto again;
2285 }
2286
2287 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2288 {
2289         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2290         int iodone_w_error;
2291
2292         /* mutex is not held! This is not save if IO is not yet completed
2293          * on umount */
2294         iodone_w_error = 0;
2295         if (bio_error_status)
2296                 iodone_w_error = 1;
2297
2298         BUG_ON(NULL == block);
2299         bp->bi_private = block->orig_bio_bh_private;
2300         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2301
2302         do {
2303                 struct btrfsic_block *next_block;
2304                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2305
2306                 if ((dev_state->state->print_mask &
2307                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2308                         printk(KERN_INFO
2309                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2310                                bio_error_status,
2311                                btrfsic_get_block_type(dev_state->state, block),
2312                                (unsigned long long)block->logical_bytenr,
2313                                dev_state->name,
2314                                (unsigned long long)block->dev_bytenr,
2315                                block->mirror_num);
2316                 next_block = block->next_in_same_bio;
2317                 block->iodone_w_error = iodone_w_error;
2318                 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2319                         dev_state->last_flush_gen++;
2320                         if ((dev_state->state->print_mask &
2321                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2322                                 printk(KERN_INFO
2323                                        "bio_end_io() new %s flush_gen=%llu\n",
2324                                        dev_state->name,
2325                                        (unsigned long long)
2326                                        dev_state->last_flush_gen);
2327                 }
2328                 if (block->submit_bio_bh_rw & REQ_FUA)
2329                         block->flush_gen = 0; /* FUA completed means block is
2330                                                * on disk */
2331                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2332                 block = next_block;
2333         } while (NULL != block);
2334
2335         bp->bi_end_io(bp, bio_error_status);
2336 }
2337
2338 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2339 {
2340         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2341         int iodone_w_error = !uptodate;
2342         struct btrfsic_dev_state *dev_state;
2343
2344         BUG_ON(NULL == block);
2345         dev_state = block->dev_state;
2346         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2347                 printk(KERN_INFO
2348                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2349                        iodone_w_error,
2350                        btrfsic_get_block_type(dev_state->state, block),
2351                        (unsigned long long)block->logical_bytenr,
2352                        block->dev_state->name,
2353                        (unsigned long long)block->dev_bytenr,
2354                        block->mirror_num);
2355
2356         block->iodone_w_error = iodone_w_error;
2357         if (block->submit_bio_bh_rw & REQ_FLUSH) {
2358                 dev_state->last_flush_gen++;
2359                 if ((dev_state->state->print_mask &
2360                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2361                         printk(KERN_INFO
2362                                "bh_end_io() new %s flush_gen=%llu\n",
2363                                dev_state->name,
2364                                (unsigned long long)dev_state->last_flush_gen);
2365         }
2366         if (block->submit_bio_bh_rw & REQ_FUA)
2367                 block->flush_gen = 0; /* FUA completed means block is on disk */
2368
2369         bh->b_private = block->orig_bio_bh_private;
2370         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2371         block->is_iodone = 1; /* for FLUSH, this releases the block */
2372         bh->b_end_io(bh, uptodate);
2373 }
2374
2375 static int btrfsic_process_written_superblock(
2376                 struct btrfsic_state *state,
2377                 struct btrfsic_block *const superblock,
2378                 struct btrfs_super_block *const super_hdr)
2379 {
2380         int pass;
2381
2382         superblock->generation = btrfs_super_generation(super_hdr);
2383         if (!(superblock->generation > state->max_superblock_generation ||
2384               0 == state->max_superblock_generation)) {
2385                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2386                         printk(KERN_INFO
2387                                "btrfsic: superblock @%llu (%s/%llu/%d)"
2388                                " with old gen %llu <= %llu\n",
2389                                (unsigned long long)superblock->logical_bytenr,
2390                                superblock->dev_state->name,
2391                                (unsigned long long)superblock->dev_bytenr,
2392                                superblock->mirror_num,
2393                                (unsigned long long)
2394                                btrfs_super_generation(super_hdr),
2395                                (unsigned long long)
2396                                state->max_superblock_generation);
2397         } else {
2398                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2399                         printk(KERN_INFO
2400                                "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2401                                " with new gen %llu > %llu\n",
2402                                (unsigned long long)superblock->logical_bytenr,
2403                                superblock->dev_state->name,
2404                                (unsigned long long)superblock->dev_bytenr,
2405                                superblock->mirror_num,
2406                                (unsigned long long)
2407                                btrfs_super_generation(super_hdr),
2408                                (unsigned long long)
2409                                state->max_superblock_generation);
2410
2411                 state->max_superblock_generation =
2412                     btrfs_super_generation(super_hdr);
2413                 state->latest_superblock = superblock;
2414         }
2415
2416         for (pass = 0; pass < 3; pass++) {
2417                 int ret;
2418                 u64 next_bytenr;
2419                 struct btrfsic_block *next_block;
2420                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2421                 struct btrfsic_block_link *l;
2422                 int num_copies;
2423                 int mirror_num;
2424                 const char *additional_string = NULL;
2425                 struct btrfs_disk_key tmp_disk_key;
2426
2427                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2428                 tmp_disk_key.offset = 0;
2429
2430                 switch (pass) {
2431                 case 0:
2432                         tmp_disk_key.objectid =
2433                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2434                         additional_string = "root ";
2435                         next_bytenr = btrfs_super_root(super_hdr);
2436                         if (state->print_mask &
2437                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2438                                 printk(KERN_INFO "root@%llu\n",
2439                                        (unsigned long long)next_bytenr);
2440                         break;
2441                 case 1:
2442                         tmp_disk_key.objectid =
2443                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2444                         additional_string = "chunk ";
2445                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2446                         if (state->print_mask &
2447                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2448                                 printk(KERN_INFO "chunk@%llu\n",
2449                                        (unsigned long long)next_bytenr);
2450                         break;
2451                 case 2:
2452                         tmp_disk_key.objectid =
2453                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2454                         additional_string = "log ";
2455                         next_bytenr = btrfs_super_log_root(super_hdr);
2456                         if (0 == next_bytenr)
2457                                 continue;
2458                         if (state->print_mask &
2459                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2460                                 printk(KERN_INFO "log@%llu\n",
2461                                        (unsigned long long)next_bytenr);
2462                         break;
2463                 }
2464
2465                 num_copies =
2466                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
2467                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2468                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2469                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2470                                (unsigned long long)next_bytenr, num_copies);
2471                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2472                         int was_created;
2473
2474                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2475                                 printk(KERN_INFO
2476                                        "btrfsic_process_written_superblock("
2477                                        "mirror_num=%d)\n", mirror_num);
2478                         ret = btrfsic_map_block(state, next_bytenr,
2479                                                 BTRFS_SUPER_INFO_SIZE,
2480                                                 &tmp_next_block_ctx,
2481                                                 mirror_num);
2482                         if (ret) {
2483                                 printk(KERN_INFO
2484                                        "btrfsic: btrfsic_map_block(@%llu,"
2485                                        " mirror=%d) failed!\n",
2486                                        (unsigned long long)next_bytenr,
2487                                        mirror_num);
2488                                 return -1;
2489                         }
2490
2491                         next_block = btrfsic_block_lookup_or_add(
2492                                         state,
2493                                         &tmp_next_block_ctx,
2494                                         additional_string,
2495                                         1, 0, 1,
2496                                         mirror_num,
2497                                         &was_created);
2498                         if (NULL == next_block) {
2499                                 printk(KERN_INFO
2500                                        "btrfsic: error, kmalloc failed!\n");
2501                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2502                                 return -1;
2503                         }
2504
2505                         next_block->disk_key = tmp_disk_key;
2506                         if (was_created)
2507                                 next_block->generation =
2508                                     BTRFSIC_GENERATION_UNKNOWN;
2509                         l = btrfsic_block_link_lookup_or_add(
2510                                         state,
2511                                         &tmp_next_block_ctx,
2512                                         next_block,
2513                                         superblock,
2514                                         BTRFSIC_GENERATION_UNKNOWN);
2515                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2516                         if (NULL == l)
2517                                 return -1;
2518                 }
2519         }
2520
2521         if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2522                 WARN_ON(1);
2523                 btrfsic_dump_tree(state);
2524         }
2525
2526         return 0;
2527 }
2528
2529 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2530                                         struct btrfsic_block *const block,
2531                                         int recursion_level)
2532 {
2533         struct list_head *elem_ref_to;
2534         int ret = 0;
2535
2536         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2537                 /*
2538                  * Note that this situation can happen and does not
2539                  * indicate an error in regular cases. It happens
2540                  * when disk blocks are freed and later reused.
2541                  * The check-integrity module is not aware of any
2542                  * block free operations, it just recognizes block
2543                  * write operations. Therefore it keeps the linkage
2544                  * information for a block until a block is
2545                  * rewritten. This can temporarily cause incorrect
2546                  * and even circular linkage informations. This
2547                  * causes no harm unless such blocks are referenced
2548                  * by the most recent super block.
2549                  */
2550                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2551                         printk(KERN_INFO
2552                                "btrfsic: abort cyclic linkage (case 1).\n");
2553
2554                 return ret;
2555         }
2556
2557         /*
2558          * This algorithm is recursive because the amount of used stack
2559          * space is very small and the max recursion depth is limited.
2560          */
2561         list_for_each(elem_ref_to, &block->ref_to_list) {
2562                 const struct btrfsic_block_link *const l =
2563                     list_entry(elem_ref_to, struct btrfsic_block_link,
2564                                node_ref_to);
2565
2566                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2567                         printk(KERN_INFO
2568                                "rl=%d, %c @%llu (%s/%llu/%d)"
2569                                " %u* refers to %c @%llu (%s/%llu/%d)\n",
2570                                recursion_level,
2571                                btrfsic_get_block_type(state, block),
2572                                (unsigned long long)block->logical_bytenr,
2573                                block->dev_state->name,
2574                                (unsigned long long)block->dev_bytenr,
2575                                block->mirror_num,
2576                                l->ref_cnt,
2577                                btrfsic_get_block_type(state, l->block_ref_to),
2578                                (unsigned long long)
2579                                l->block_ref_to->logical_bytenr,
2580                                l->block_ref_to->dev_state->name,
2581                                (unsigned long long)l->block_ref_to->dev_bytenr,
2582                                l->block_ref_to->mirror_num);
2583                 if (l->block_ref_to->never_written) {
2584                         printk(KERN_INFO "btrfs: attempt to write superblock"
2585                                " which references block %c @%llu (%s/%llu/%d)"
2586                                " which is never written!\n",
2587                                btrfsic_get_block_type(state, l->block_ref_to),
2588                                (unsigned long long)
2589                                l->block_ref_to->logical_bytenr,
2590                                l->block_ref_to->dev_state->name,
2591                                (unsigned long long)l->block_ref_to->dev_bytenr,
2592                                l->block_ref_to->mirror_num);
2593                         ret = -1;
2594                 } else if (!l->block_ref_to->is_iodone) {
2595                         printk(KERN_INFO "btrfs: attempt to write superblock"
2596                                " which references block %c @%llu (%s/%llu/%d)"
2597                                " which is not yet iodone!\n",
2598                                btrfsic_get_block_type(state, l->block_ref_to),
2599                                (unsigned long long)
2600                                l->block_ref_to->logical_bytenr,
2601                                l->block_ref_to->dev_state->name,
2602                                (unsigned long long)l->block_ref_to->dev_bytenr,
2603                                l->block_ref_to->mirror_num);
2604                         ret = -1;
2605                 } else if (l->block_ref_to->iodone_w_error) {
2606                         printk(KERN_INFO "btrfs: attempt to write superblock"
2607                                " which references block %c @%llu (%s/%llu/%d)"
2608                                " which has write error!\n",
2609                                btrfsic_get_block_type(state, l->block_ref_to),
2610                                (unsigned long long)
2611                                l->block_ref_to->logical_bytenr,
2612                                l->block_ref_to->dev_state->name,
2613                                (unsigned long long)l->block_ref_to->dev_bytenr,
2614                                l->block_ref_to->mirror_num);
2615                         ret = -1;
2616                 } else if (l->parent_generation !=
2617                            l->block_ref_to->generation &&
2618                            BTRFSIC_GENERATION_UNKNOWN !=
2619                            l->parent_generation &&
2620                            BTRFSIC_GENERATION_UNKNOWN !=
2621                            l->block_ref_to->generation) {
2622                         printk(KERN_INFO "btrfs: attempt to write superblock"
2623                                " which references block %c @%llu (%s/%llu/%d)"
2624                                " with generation %llu !="
2625                                " parent generation %llu!\n",
2626                                btrfsic_get_block_type(state, l->block_ref_to),
2627                                (unsigned long long)
2628                                l->block_ref_to->logical_bytenr,
2629                                l->block_ref_to->dev_state->name,
2630                                (unsigned long long)l->block_ref_to->dev_bytenr,
2631                                l->block_ref_to->mirror_num,
2632                                (unsigned long long)l->block_ref_to->generation,
2633                                (unsigned long long)l->parent_generation);
2634                         ret = -1;
2635                 } else if (l->block_ref_to->flush_gen >
2636                            l->block_ref_to->dev_state->last_flush_gen) {
2637                         printk(KERN_INFO "btrfs: attempt to write superblock"
2638                                " which references block %c @%llu (%s/%llu/%d)"
2639                                " which is not flushed out of disk's write cache"
2640                                " (block flush_gen=%llu,"
2641                                " dev->flush_gen=%llu)!\n",
2642                                btrfsic_get_block_type(state, l->block_ref_to),
2643                                (unsigned long long)
2644                                l->block_ref_to->logical_bytenr,
2645                                l->block_ref_to->dev_state->name,
2646                                (unsigned long long)l->block_ref_to->dev_bytenr,
2647                                l->block_ref_to->mirror_num,
2648                                (unsigned long long)block->flush_gen,
2649                                (unsigned long long)
2650                                l->block_ref_to->dev_state->last_flush_gen);
2651                         ret = -1;
2652                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2653                                                               l->block_ref_to,
2654                                                               recursion_level +
2655                                                               1)) {
2656                         ret = -1;
2657                 }
2658         }
2659
2660         return ret;
2661 }
2662
2663 static int btrfsic_is_block_ref_by_superblock(
2664                 const struct btrfsic_state *state,
2665                 const struct btrfsic_block *block,
2666                 int recursion_level)
2667 {
2668         struct list_head *elem_ref_from;
2669
2670         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2671                 /* refer to comment at "abort cyclic linkage (case 1)" */
2672                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2673                         printk(KERN_INFO
2674                                "btrfsic: abort cyclic linkage (case 2).\n");
2675
2676                 return 0;
2677         }
2678
2679         /*
2680          * This algorithm is recursive because the amount of used stack space
2681          * is very small and the max recursion depth is limited.
2682          */
2683         list_for_each(elem_ref_from, &block->ref_from_list) {
2684                 const struct btrfsic_block_link *const l =
2685                     list_entry(elem_ref_from, struct btrfsic_block_link,
2686                                node_ref_from);
2687
2688                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2689                         printk(KERN_INFO
2690                                "rl=%d, %c @%llu (%s/%llu/%d)"
2691                                " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2692                                recursion_level,
2693                                btrfsic_get_block_type(state, block),
2694                                (unsigned long long)block->logical_bytenr,
2695                                block->dev_state->name,
2696                                (unsigned long long)block->dev_bytenr,
2697                                block->mirror_num,
2698                                l->ref_cnt,
2699                                btrfsic_get_block_type(state, l->block_ref_from),
2700                                (unsigned long long)
2701                                l->block_ref_from->logical_bytenr,
2702                                l->block_ref_from->dev_state->name,
2703                                (unsigned long long)
2704                                l->block_ref_from->dev_bytenr,
2705                                l->block_ref_from->mirror_num);
2706                 if (l->block_ref_from->is_superblock &&
2707                     state->latest_superblock->dev_bytenr ==
2708                     l->block_ref_from->dev_bytenr &&
2709                     state->latest_superblock->dev_state->bdev ==
2710                     l->block_ref_from->dev_state->bdev)
2711                         return 1;
2712                 else if (btrfsic_is_block_ref_by_superblock(state,
2713                                                             l->block_ref_from,
2714                                                             recursion_level +
2715                                                             1))
2716                         return 1;
2717         }
2718
2719         return 0;
2720 }
2721
2722 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2723                                    const struct btrfsic_block_link *l)
2724 {
2725         printk(KERN_INFO
2726                "Add %u* link from %c @%llu (%s/%llu/%d)"
2727                " to %c @%llu (%s/%llu/%d).\n",
2728                l->ref_cnt,
2729                btrfsic_get_block_type(state, l->block_ref_from),
2730                (unsigned long long)l->block_ref_from->logical_bytenr,
2731                l->block_ref_from->dev_state->name,
2732                (unsigned long long)l->block_ref_from->dev_bytenr,
2733                l->block_ref_from->mirror_num,
2734                btrfsic_get_block_type(state, l->block_ref_to),
2735                (unsigned long long)l->block_ref_to->logical_bytenr,
2736                l->block_ref_to->dev_state->name,
2737                (unsigned long long)l->block_ref_to->dev_bytenr,
2738                l->block_ref_to->mirror_num);
2739 }
2740
2741 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2742                                    const struct btrfsic_block_link *l)
2743 {
2744         printk(KERN_INFO
2745                "Rem %u* link from %c @%llu (%s/%llu/%d)"
2746                " to %c @%llu (%s/%llu/%d).\n",
2747                l->ref_cnt,
2748                btrfsic_get_block_type(state, l->block_ref_from),
2749                (unsigned long long)l->block_ref_from->logical_bytenr,
2750                l->block_ref_from->dev_state->name,
2751                (unsigned long long)l->block_ref_from->dev_bytenr,
2752                l->block_ref_from->mirror_num,
2753                btrfsic_get_block_type(state, l->block_ref_to),
2754                (unsigned long long)l->block_ref_to->logical_bytenr,
2755                l->block_ref_to->dev_state->name,
2756                (unsigned long long)l->block_ref_to->dev_bytenr,
2757                l->block_ref_to->mirror_num);
2758 }
2759
2760 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2761                                    const struct btrfsic_block *block)
2762 {
2763         if (block->is_superblock &&
2764             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2765             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2766                 return 'S';
2767         else if (block->is_superblock)
2768                 return 's';
2769         else if (block->is_metadata)
2770                 return 'M';
2771         else
2772                 return 'D';
2773 }
2774
2775 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2776 {
2777         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2778 }
2779
2780 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2781                                   const struct btrfsic_block *block,
2782                                   int indent_level)
2783 {
2784         struct list_head *elem_ref_to;
2785         int indent_add;
2786         static char buf[80];
2787         int cursor_position;
2788
2789         /*
2790          * Should better fill an on-stack buffer with a complete line and
2791          * dump it at once when it is time to print a newline character.
2792          */
2793
2794         /*
2795          * This algorithm is recursive because the amount of used stack space
2796          * is very small and the max recursion depth is limited.
2797          */
2798         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2799                              btrfsic_get_block_type(state, block),
2800                              (unsigned long long)block->logical_bytenr,
2801                              block->dev_state->name,
2802                              (unsigned long long)block->dev_bytenr,
2803                              block->mirror_num);
2804         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2805                 printk("[...]\n");
2806                 return;
2807         }
2808         printk(buf);
2809         indent_level += indent_add;
2810         if (list_empty(&block->ref_to_list)) {
2811                 printk("\n");
2812                 return;
2813         }
2814         if (block->mirror_num > 1 &&
2815             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2816                 printk(" [...]\n");
2817                 return;
2818         }
2819
2820         cursor_position = indent_level;
2821         list_for_each(elem_ref_to, &block->ref_to_list) {
2822                 const struct btrfsic_block_link *const l =
2823                     list_entry(elem_ref_to, struct btrfsic_block_link,
2824                                node_ref_to);
2825
2826                 while (cursor_position < indent_level) {
2827                         printk(" ");
2828                         cursor_position++;
2829                 }
2830                 if (l->ref_cnt > 1)
2831                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2832                 else
2833                         indent_add = sprintf(buf, " --> ");
2834                 if (indent_level + indent_add >
2835                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2836                         printk("[...]\n");
2837                         cursor_position = 0;
2838                         continue;
2839                 }
2840
2841                 printk(buf);
2842
2843                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2844                                       indent_level + indent_add);
2845                 cursor_position = 0;
2846         }
2847 }
2848
2849 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2850                 struct btrfsic_state *state,
2851                 struct btrfsic_block_data_ctx *next_block_ctx,
2852                 struct btrfsic_block *next_block,
2853                 struct btrfsic_block *from_block,
2854                 u64 parent_generation)
2855 {
2856         struct btrfsic_block_link *l;
2857
2858         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2859                                                 next_block_ctx->dev_bytenr,
2860                                                 from_block->dev_state->bdev,
2861                                                 from_block->dev_bytenr,
2862                                                 &state->block_link_hashtable);
2863         if (NULL == l) {
2864                 l = btrfsic_block_link_alloc();
2865                 if (NULL == l) {
2866                         printk(KERN_INFO
2867                                "btrfsic: error, kmalloc" " failed!\n");
2868                         return NULL;
2869                 }
2870
2871                 l->block_ref_to = next_block;
2872                 l->block_ref_from = from_block;
2873                 l->ref_cnt = 1;
2874                 l->parent_generation = parent_generation;
2875
2876                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2877                         btrfsic_print_add_link(state, l);
2878
2879                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2880                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2881
2882                 btrfsic_block_link_hashtable_add(l,
2883                                                  &state->block_link_hashtable);
2884         } else {
2885                 l->ref_cnt++;
2886                 l->parent_generation = parent_generation;
2887                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2888                         btrfsic_print_add_link(state, l);
2889         }
2890
2891         return l;
2892 }
2893
2894 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2895                 struct btrfsic_state *state,
2896                 struct btrfsic_block_data_ctx *block_ctx,
2897                 const char *additional_string,
2898                 int is_metadata,
2899                 int is_iodone,
2900                 int never_written,
2901                 int mirror_num,
2902                 int *was_created)
2903 {
2904         struct btrfsic_block *block;
2905
2906         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2907                                                block_ctx->dev_bytenr,
2908                                                &state->block_hashtable);
2909         if (NULL == block) {
2910                 struct btrfsic_dev_state *dev_state;
2911
2912                 block = btrfsic_block_alloc();
2913                 if (NULL == block) {
2914                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2915                         return NULL;
2916                 }
2917                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2918                 if (NULL == dev_state) {
2919                         printk(KERN_INFO
2920                                "btrfsic: error, lookup dev_state failed!\n");
2921                         btrfsic_block_free(block);
2922                         return NULL;
2923                 }
2924                 block->dev_state = dev_state;
2925                 block->dev_bytenr = block_ctx->dev_bytenr;
2926                 block->logical_bytenr = block_ctx->start;
2927                 block->is_metadata = is_metadata;
2928                 block->is_iodone = is_iodone;
2929                 block->never_written = never_written;
2930                 block->mirror_num = mirror_num;
2931                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2932                         printk(KERN_INFO
2933                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2934                                additional_string,
2935                                btrfsic_get_block_type(state, block),
2936                                (unsigned long long)block->logical_bytenr,
2937                                dev_state->name,
2938                                (unsigned long long)block->dev_bytenr,
2939                                mirror_num);
2940                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2941                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2942                 if (NULL != was_created)
2943                         *was_created = 1;
2944         } else {
2945                 if (NULL != was_created)
2946                         *was_created = 0;
2947         }
2948
2949         return block;
2950 }
2951
2952 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2953                                            u64 bytenr,
2954                                            struct btrfsic_dev_state *dev_state,
2955                                            u64 dev_bytenr)
2956 {
2957         int num_copies;
2958         int mirror_num;
2959         int ret;
2960         struct btrfsic_block_data_ctx block_ctx;
2961         int match = 0;
2962
2963         num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
2964                                       bytenr, state->metablock_size);
2965
2966         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2967                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2968                                         &block_ctx, mirror_num);
2969                 if (ret) {
2970                         printk(KERN_INFO "btrfsic:"
2971                                " btrfsic_map_block(logical @%llu,"
2972                                " mirror %d) failed!\n",
2973                                (unsigned long long)bytenr, mirror_num);
2974                         continue;
2975                 }
2976
2977                 if (dev_state->bdev == block_ctx.dev->bdev &&
2978                     dev_bytenr == block_ctx.dev_bytenr) {
2979                         match++;
2980                         btrfsic_release_block_ctx(&block_ctx);
2981                         break;
2982                 }
2983                 btrfsic_release_block_ctx(&block_ctx);
2984         }
2985
2986         if (!match) {
2987                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2988                        " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2989                        " phys_bytenr=%llu)!\n",
2990                        (unsigned long long)bytenr, dev_state->name,
2991                        (unsigned long long)dev_bytenr);
2992                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2993                         ret = btrfsic_map_block(state, bytenr,
2994                                                 state->metablock_size,
2995                                                 &block_ctx, mirror_num);
2996                         if (ret)
2997                                 continue;
2998
2999                         printk(KERN_INFO "Read logical bytenr @%llu maps to"
3000                                " (%s/%llu/%d)\n",
3001                                (unsigned long long)bytenr,
3002                                block_ctx.dev->name,
3003                                (unsigned long long)block_ctx.dev_bytenr,
3004                                mirror_num);
3005                 }
3006                 WARN_ON(1);
3007         }
3008 }
3009
3010 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
3011                 struct block_device *bdev)
3012 {
3013         struct btrfsic_dev_state *ds;
3014
3015         ds = btrfsic_dev_state_hashtable_lookup(bdev,
3016                                                 &btrfsic_dev_state_hashtable);
3017         return ds;
3018 }
3019
3020 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3021 {
3022         struct btrfsic_dev_state *dev_state;
3023
3024         if (!btrfsic_is_initialized)
3025                 return submit_bh(rw, bh);
3026
3027         mutex_lock(&btrfsic_mutex);
3028         /* since btrfsic_submit_bh() might also be called before
3029          * btrfsic_mount(), this might return NULL */
3030         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
3031
3032         /* Only called to write the superblock (incl. FLUSH/FUA) */
3033         if (NULL != dev_state &&
3034             (rw & WRITE) && bh->b_size > 0) {
3035                 u64 dev_bytenr;
3036
3037                 dev_bytenr = 4096 * bh->b_blocknr;
3038                 if (dev_state->state->print_mask &
3039                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3040                         printk(KERN_INFO
3041                                "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3042                                " size=%lu, data=%p, bdev=%p)\n",
3043                                rw, (unsigned long)bh->b_blocknr,
3044                                (unsigned long long)dev_bytenr,
3045                                (unsigned long)bh->b_size, bh->b_data,
3046                                bh->b_bdev);
3047                 btrfsic_process_written_block(dev_state, dev_bytenr,
3048                                               &bh->b_data, 1, NULL,
3049                                               NULL, bh, rw);
3050         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3051                 if (dev_state->state->print_mask &
3052                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3053                         printk(KERN_INFO
3054                                "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
3055                                rw, bh->b_bdev);
3056                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3057                         if ((dev_state->state->print_mask &
3058                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3059                               BTRFSIC_PRINT_MASK_VERBOSE)))
3060                                 printk(KERN_INFO
3061                                        "btrfsic_submit_bh(%s) with FLUSH"
3062                                        " but dummy block already in use"
3063                                        " (ignored)!\n",
3064                                        dev_state->name);
3065                 } else {
3066                         struct btrfsic_block *const block =
3067                                 &dev_state->dummy_block_for_bio_bh_flush;
3068
3069                         block->is_iodone = 0;
3070                         block->never_written = 0;
3071                         block->iodone_w_error = 0;
3072                         block->flush_gen = dev_state->last_flush_gen + 1;
3073                         block->submit_bio_bh_rw = rw;
3074                         block->orig_bio_bh_private = bh->b_private;
3075                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
3076                         block->next_in_same_bio = NULL;
3077                         bh->b_private = block;
3078                         bh->b_end_io = btrfsic_bh_end_io;
3079                 }
3080         }
3081         mutex_unlock(&btrfsic_mutex);
3082         return submit_bh(rw, bh);
3083 }
3084
3085 void btrfsic_submit_bio(int rw, struct bio *bio)
3086 {
3087         struct btrfsic_dev_state *dev_state;
3088
3089         if (!btrfsic_is_initialized) {
3090                 submit_bio(rw, bio);
3091                 return;
3092         }
3093
3094         mutex_lock(&btrfsic_mutex);
3095         /* since btrfsic_submit_bio() is also called before
3096          * btrfsic_mount(), this might return NULL */
3097         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3098         if (NULL != dev_state &&
3099             (rw & WRITE) && NULL != bio->bi_io_vec) {
3100                 unsigned int i;
3101                 u64 dev_bytenr;
3102                 int bio_is_patched;
3103                 char **mapped_datav;
3104
3105                 dev_bytenr = 512 * bio->bi_sector;
3106                 bio_is_patched = 0;
3107                 if (dev_state->state->print_mask &
3108                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3109                         printk(KERN_INFO
3110                                "submit_bio(rw=0x%x, bi_vcnt=%u,"
3111                                " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
3112                                rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
3113                                (unsigned long long)dev_bytenr,
3114                                bio->bi_bdev);
3115
3116                 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3117                                        GFP_NOFS);
3118                 if (!mapped_datav)
3119                         goto leave;
3120                 for (i = 0; i < bio->bi_vcnt; i++) {
3121                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3122                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3123                         if (!mapped_datav[i]) {
3124                                 while (i > 0) {
3125                                         i--;
3126                                         kunmap(bio->bi_io_vec[i].bv_page);
3127                                 }
3128                                 kfree(mapped_datav);
3129                                 goto leave;
3130                         }
3131                         if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3132                              BTRFSIC_PRINT_MASK_VERBOSE) ==
3133                             (dev_state->state->print_mask &
3134                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3135                               BTRFSIC_PRINT_MASK_VERBOSE)))
3136                                 printk(KERN_INFO
3137                                        "#%u: page=%p, len=%u, offset=%u\n",
3138                                        i, bio->bi_io_vec[i].bv_page,
3139                                        bio->bi_io_vec[i].bv_len,
3140                                        bio->bi_io_vec[i].bv_offset);
3141                 }
3142                 btrfsic_process_written_block(dev_state, dev_bytenr,
3143                                               mapped_datav, bio->bi_vcnt,
3144                                               bio, &bio_is_patched,
3145                                               NULL, rw);
3146                 while (i > 0) {
3147                         i--;
3148                         kunmap(bio->bi_io_vec[i].bv_page);
3149                 }
3150                 kfree(mapped_datav);
3151         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3152                 if (dev_state->state->print_mask &
3153                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3154                         printk(KERN_INFO
3155                                "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3156                                rw, bio->bi_bdev);
3157                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3158                         if ((dev_state->state->print_mask &
3159                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3160                               BTRFSIC_PRINT_MASK_VERBOSE)))
3161                                 printk(KERN_INFO
3162                                        "btrfsic_submit_bio(%s) with FLUSH"
3163                                        " but dummy block already in use"
3164                                        " (ignored)!\n",
3165                                        dev_state->name);
3166                 } else {
3167                         struct btrfsic_block *const block =
3168                                 &dev_state->dummy_block_for_bio_bh_flush;
3169
3170                         block->is_iodone = 0;
3171                         block->never_written = 0;
3172                         block->iodone_w_error = 0;
3173                         block->flush_gen = dev_state->last_flush_gen + 1;
3174                         block->submit_bio_bh_rw = rw;
3175                         block->orig_bio_bh_private = bio->bi_private;
3176                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3177                         block->next_in_same_bio = NULL;
3178                         bio->bi_private = block;
3179                         bio->bi_end_io = btrfsic_bio_end_io;
3180                 }
3181         }
3182 leave:
3183         mutex_unlock(&btrfsic_mutex);
3184
3185         submit_bio(rw, bio);
3186 }
3187
3188 int btrfsic_mount(struct btrfs_root *root,
3189                   struct btrfs_fs_devices *fs_devices,
3190                   int including_extent_data, u32 print_mask)
3191 {
3192         int ret;
3193         struct btrfsic_state *state;
3194         struct list_head *dev_head = &fs_devices->devices;
3195         struct btrfs_device *device;
3196
3197         if (root->nodesize != root->leafsize) {
3198                 printk(KERN_INFO
3199                        "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3200                        root->nodesize, root->leafsize);
3201                 return -1;
3202         }
3203         if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3204                 printk(KERN_INFO
3205                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3206                        root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3207                 return -1;
3208         }
3209         if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3210                 printk(KERN_INFO
3211                        "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3212                        root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3213                 return -1;
3214         }
3215         if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3216                 printk(KERN_INFO
3217                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3218                        root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3219                 return -1;
3220         }
3221         state = kzalloc(sizeof(*state), GFP_NOFS);
3222         if (NULL == state) {
3223                 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3224                 return -1;
3225         }
3226
3227         if (!btrfsic_is_initialized) {
3228                 mutex_init(&btrfsic_mutex);
3229                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3230                 btrfsic_is_initialized = 1;
3231         }
3232         mutex_lock(&btrfsic_mutex);
3233         state->root = root;
3234         state->print_mask = print_mask;
3235         state->include_extent_data = including_extent_data;
3236         state->csum_size = 0;
3237         state->metablock_size = root->nodesize;
3238         state->datablock_size = root->sectorsize;
3239         INIT_LIST_HEAD(&state->all_blocks_list);
3240         btrfsic_block_hashtable_init(&state->block_hashtable);
3241         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3242         state->max_superblock_generation = 0;
3243         state->latest_superblock = NULL;
3244
3245         list_for_each_entry(device, dev_head, dev_list) {
3246                 struct btrfsic_dev_state *ds;
3247                 char *p;
3248
3249                 if (!device->bdev || !device->name)
3250                         continue;
3251
3252                 ds = btrfsic_dev_state_alloc();
3253                 if (NULL == ds) {
3254                         printk(KERN_INFO
3255                                "btrfs check-integrity: kmalloc() failed!\n");
3256                         mutex_unlock(&btrfsic_mutex);
3257                         return -1;
3258                 }
3259                 ds->bdev = device->bdev;
3260                 ds->state = state;
3261                 bdevname(ds->bdev, ds->name);
3262                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3263                 for (p = ds->name; *p != '\0'; p++);
3264                 while (p > ds->name && *p != '/')
3265                         p--;
3266                 if (*p == '/')
3267                         p++;
3268                 strlcpy(ds->name, p, sizeof(ds->name));
3269                 btrfsic_dev_state_hashtable_add(ds,
3270                                                 &btrfsic_dev_state_hashtable);
3271         }
3272
3273         ret = btrfsic_process_superblock(state, fs_devices);
3274         if (0 != ret) {
3275                 mutex_unlock(&btrfsic_mutex);
3276                 btrfsic_unmount(root, fs_devices);
3277                 return ret;
3278         }
3279
3280         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3281                 btrfsic_dump_database(state);
3282         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3283                 btrfsic_dump_tree(state);
3284
3285         mutex_unlock(&btrfsic_mutex);
3286         return 0;
3287 }
3288
3289 void btrfsic_unmount(struct btrfs_root *root,
3290                      struct btrfs_fs_devices *fs_devices)
3291 {
3292         struct list_head *elem_all;
3293         struct list_head *tmp_all;
3294         struct btrfsic_state *state;
3295         struct list_head *dev_head = &fs_devices->devices;
3296         struct btrfs_device *device;
3297
3298         if (!btrfsic_is_initialized)
3299                 return;
3300
3301         mutex_lock(&btrfsic_mutex);
3302
3303         state = NULL;
3304         list_for_each_entry(device, dev_head, dev_list) {
3305                 struct btrfsic_dev_state *ds;
3306
3307                 if (!device->bdev || !device->name)
3308                         continue;
3309
3310                 ds = btrfsic_dev_state_hashtable_lookup(
3311                                 device->bdev,
3312                                 &btrfsic_dev_state_hashtable);
3313                 if (NULL != ds) {
3314                         state = ds->state;
3315                         btrfsic_dev_state_hashtable_remove(ds);
3316                         btrfsic_dev_state_free(ds);
3317                 }
3318         }
3319
3320         if (NULL == state) {
3321                 printk(KERN_INFO
3322                        "btrfsic: error, cannot find state information"
3323                        " on umount!\n");
3324                 mutex_unlock(&btrfsic_mutex);
3325                 return;
3326         }
3327
3328         /*
3329          * Don't care about keeping the lists' state up to date,
3330          * just free all memory that was allocated dynamically.
3331          * Free the blocks and the block_links.
3332          */
3333         list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3334                 struct btrfsic_block *const b_all =
3335                     list_entry(elem_all, struct btrfsic_block,
3336                                all_blocks_node);
3337                 struct list_head *elem_ref_to;
3338                 struct list_head *tmp_ref_to;
3339
3340                 list_for_each_safe(elem_ref_to, tmp_ref_to,
3341                                    &b_all->ref_to_list) {
3342                         struct btrfsic_block_link *const l =
3343                             list_entry(elem_ref_to,
3344                                        struct btrfsic_block_link,
3345                                        node_ref_to);
3346
3347                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3348                                 btrfsic_print_rem_link(state, l);
3349
3350                         l->ref_cnt--;
3351                         if (0 == l->ref_cnt)
3352                                 btrfsic_block_link_free(l);
3353                 }
3354
3355                 if (b_all->is_iodone || b_all->never_written)
3356                         btrfsic_block_free(b_all);
3357                 else
3358                         printk(KERN_INFO "btrfs: attempt to free %c-block"
3359                                " @%llu (%s/%llu/%d) on umount which is"
3360                                " not yet iodone!\n",
3361                                btrfsic_get_block_type(state, b_all),
3362                                (unsigned long long)b_all->logical_bytenr,
3363                                b_all->dev_state->name,
3364                                (unsigned long long)b_all->dev_bytenr,
3365                                b_all->mirror_num);
3366         }
3367
3368         mutex_unlock(&btrfsic_mutex);
3369
3370         kfree(state);
3371 }