]> git.karo-electronics.de Git - mv-sheeva.git/blob - drivers/md/bitmap.c
04df18e8885fc129a1c9ec13f76665737ea71faa
[mv-sheeva.git] / drivers / md / bitmap.c
1 /*
2  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3  *
4  * bitmap_create  - sets up the bitmap structure
5  * bitmap_destroy - destroys the bitmap structure
6  *
7  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8  * - added disk storage for bitmap
9  * - changes to allow various bitmap chunk sizes
10  */
11
12 /*
13  * Still to do:
14  *
15  * flush after percent set rather than just time based. (maybe both).
16  */
17
18 #include <linux/blkdev.h>
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/timer.h>
24 #include <linux/sched.h>
25 #include <linux/list.h>
26 #include <linux/file.h>
27 #include <linux/mount.h>
28 #include <linux/buffer_head.h>
29 #include <linux/seq_file.h>
30 #include "md.h"
31 #include "bitmap.h"
32
33 static inline char *bmname(struct bitmap *bitmap)
34 {
35         return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
36 }
37
38 /*
39  * just a placeholder - calls kmalloc for bitmap pages
40  */
41 static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
42 {
43         unsigned char *page;
44
45         page = kzalloc(PAGE_SIZE, GFP_NOIO);
46         if (!page)
47                 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
48         else
49                 pr_debug("%s: bitmap_alloc_page: allocated page at %p\n",
50                          bmname(bitmap), page);
51         return page;
52 }
53
54 /*
55  * for now just a placeholder -- just calls kfree for bitmap pages
56  */
57 static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
58 {
59         pr_debug("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page);
60         kfree(page);
61 }
62
63 /*
64  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
65  *
66  * 1) check to see if this page is allocated, if it's not then try to alloc
67  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
68  *    page pointer directly as a counter
69  *
70  * if we find our page, we increment the page's refcount so that it stays
71  * allocated while we're using it
72  */
73 static int bitmap_checkpage(struct bitmap *bitmap,
74                             unsigned long page, int create)
75 __releases(bitmap->lock)
76 __acquires(bitmap->lock)
77 {
78         unsigned char *mappage;
79
80         if (page >= bitmap->pages) {
81                 /* This can happen if bitmap_start_sync goes beyond
82                  * End-of-device while looking for a whole page.
83                  * It is harmless.
84                  */
85                 return -EINVAL;
86         }
87
88         if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
89                 return 0;
90
91         if (bitmap->bp[page].map) /* page is already allocated, just return */
92                 return 0;
93
94         if (!create)
95                 return -ENOENT;
96
97         /* this page has not been allocated yet */
98
99         spin_unlock_irq(&bitmap->lock);
100         mappage = bitmap_alloc_page(bitmap);
101         spin_lock_irq(&bitmap->lock);
102
103         if (mappage == NULL) {
104                 pr_debug("%s: bitmap map page allocation failed, hijacking\n",
105                          bmname(bitmap));
106                 /* failed - set the hijacked flag so that we can use the
107                  * pointer as a counter */
108                 if (!bitmap->bp[page].map)
109                         bitmap->bp[page].hijacked = 1;
110         } else if (bitmap->bp[page].map ||
111                    bitmap->bp[page].hijacked) {
112                 /* somebody beat us to getting the page */
113                 bitmap_free_page(bitmap, mappage);
114                 return 0;
115         } else {
116
117                 /* no page was in place and we have one, so install it */
118
119                 bitmap->bp[page].map = mappage;
120                 bitmap->missing_pages--;
121         }
122         return 0;
123 }
124
125 /* if page is completely empty, put it back on the free list, or dealloc it */
126 /* if page was hijacked, unmark the flag so it might get alloced next time */
127 /* Note: lock should be held when calling this */
128 static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
129 {
130         char *ptr;
131
132         if (bitmap->bp[page].count) /* page is still busy */
133                 return;
134
135         /* page is no longer in use, it can be released */
136
137         if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
138                 bitmap->bp[page].hijacked = 0;
139                 bitmap->bp[page].map = NULL;
140         } else {
141                 /* normal case, free the page */
142                 ptr = bitmap->bp[page].map;
143                 bitmap->bp[page].map = NULL;
144                 bitmap->missing_pages++;
145                 bitmap_free_page(bitmap, ptr);
146         }
147 }
148
149 /*
150  * bitmap file handling - read and write the bitmap file and its superblock
151  */
152
153 /*
154  * basic page I/O operations
155  */
156
157 /* IO operations when bitmap is stored near all superblocks */
158 static struct page *read_sb_page(struct mddev *mddev, loff_t offset,
159                                  struct page *page,
160                                  unsigned long index, int size)
161 {
162         /* choose a good rdev and read the page from there */
163
164         struct md_rdev *rdev;
165         sector_t target;
166         int did_alloc = 0;
167
168         if (!page) {
169                 page = alloc_page(GFP_KERNEL);
170                 if (!page)
171                         return ERR_PTR(-ENOMEM);
172                 did_alloc = 1;
173         }
174
175         rdev_for_each(rdev, mddev) {
176                 if (! test_bit(In_sync, &rdev->flags)
177                     || test_bit(Faulty, &rdev->flags))
178                         continue;
179
180                 target = offset + index * (PAGE_SIZE/512);
181
182                 if (sync_page_io(rdev, target,
183                                  roundup(size, bdev_logical_block_size(rdev->bdev)),
184                                  page, READ, true)) {
185                         page->index = index;
186                         attach_page_buffers(page, NULL); /* so that free_buffer will
187                                                           * quietly no-op */
188                         return page;
189                 }
190         }
191         if (did_alloc)
192                 put_page(page);
193         return ERR_PTR(-EIO);
194
195 }
196
197 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
198 {
199         /* Iterate the disks of an mddev, using rcu to protect access to the
200          * linked list, and raising the refcount of devices we return to ensure
201          * they don't disappear while in use.
202          * As devices are only added or removed when raid_disk is < 0 and
203          * nr_pending is 0 and In_sync is clear, the entries we return will
204          * still be in the same position on the list when we re-enter
205          * list_for_each_continue_rcu.
206          */
207         struct list_head *pos;
208         rcu_read_lock();
209         if (rdev == NULL)
210                 /* start at the beginning */
211                 pos = &mddev->disks;
212         else {
213                 /* release the previous rdev and start from there. */
214                 rdev_dec_pending(rdev, mddev);
215                 pos = &rdev->same_set;
216         }
217         list_for_each_continue_rcu(pos, &mddev->disks) {
218                 rdev = list_entry(pos, struct md_rdev, same_set);
219                 if (rdev->raid_disk >= 0 &&
220                     !test_bit(Faulty, &rdev->flags)) {
221                         /* this is a usable devices */
222                         atomic_inc(&rdev->nr_pending);
223                         rcu_read_unlock();
224                         return rdev;
225                 }
226         }
227         rcu_read_unlock();
228         return NULL;
229 }
230
231 static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
232 {
233         struct md_rdev *rdev = NULL;
234         struct block_device *bdev;
235         struct mddev *mddev = bitmap->mddev;
236
237         while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
238                 int size = PAGE_SIZE;
239                 loff_t offset = mddev->bitmap_info.offset;
240
241                 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
242
243                 if (page->index == bitmap->file_pages-1)
244                         size = roundup(bitmap->last_page_size,
245                                        bdev_logical_block_size(bdev));
246                 /* Just make sure we aren't corrupting data or
247                  * metadata
248                  */
249                 if (mddev->external) {
250                         /* Bitmap could be anywhere. */
251                         if (rdev->sb_start + offset + (page->index
252                                                        * (PAGE_SIZE/512))
253                             > rdev->data_offset
254                             &&
255                             rdev->sb_start + offset
256                             < (rdev->data_offset + mddev->dev_sectors
257                              + (PAGE_SIZE/512)))
258                                 goto bad_alignment;
259                 } else if (offset < 0) {
260                         /* DATA  BITMAP METADATA  */
261                         if (offset
262                             + (long)(page->index * (PAGE_SIZE/512))
263                             + size/512 > 0)
264                                 /* bitmap runs in to metadata */
265                                 goto bad_alignment;
266                         if (rdev->data_offset + mddev->dev_sectors
267                             > rdev->sb_start + offset)
268                                 /* data runs in to bitmap */
269                                 goto bad_alignment;
270                 } else if (rdev->sb_start < rdev->data_offset) {
271                         /* METADATA BITMAP DATA */
272                         if (rdev->sb_start
273                             + offset
274                             + page->index*(PAGE_SIZE/512) + size/512
275                             > rdev->data_offset)
276                                 /* bitmap runs in to data */
277                                 goto bad_alignment;
278                 } else {
279                         /* DATA METADATA BITMAP - no problems */
280                 }
281                 md_super_write(mddev, rdev,
282                                rdev->sb_start + offset
283                                + page->index * (PAGE_SIZE/512),
284                                size,
285                                page);
286         }
287
288         if (wait)
289                 md_super_wait(mddev);
290         return 0;
291
292  bad_alignment:
293         return -EINVAL;
294 }
295
296 static void bitmap_file_kick(struct bitmap *bitmap);
297 /*
298  * write out a page to a file
299  */
300 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
301 {
302         struct buffer_head *bh;
303
304         if (bitmap->file == NULL) {
305                 switch (write_sb_page(bitmap, page, wait)) {
306                 case -EINVAL:
307                         bitmap->flags |= BITMAP_WRITE_ERROR;
308                 }
309         } else {
310
311                 bh = page_buffers(page);
312
313                 while (bh && bh->b_blocknr) {
314                         atomic_inc(&bitmap->pending_writes);
315                         set_buffer_locked(bh);
316                         set_buffer_mapped(bh);
317                         submit_bh(WRITE | REQ_SYNC, bh);
318                         bh = bh->b_this_page;
319                 }
320
321                 if (wait)
322                         wait_event(bitmap->write_wait,
323                                    atomic_read(&bitmap->pending_writes)==0);
324         }
325         if (bitmap->flags & BITMAP_WRITE_ERROR)
326                 bitmap_file_kick(bitmap);
327 }
328
329 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
330 {
331         struct bitmap *bitmap = bh->b_private;
332         unsigned long flags;
333
334         if (!uptodate) {
335                 spin_lock_irqsave(&bitmap->lock, flags);
336                 bitmap->flags |= BITMAP_WRITE_ERROR;
337                 spin_unlock_irqrestore(&bitmap->lock, flags);
338         }
339         if (atomic_dec_and_test(&bitmap->pending_writes))
340                 wake_up(&bitmap->write_wait);
341 }
342
343 /* copied from buffer.c */
344 static void
345 __clear_page_buffers(struct page *page)
346 {
347         ClearPagePrivate(page);
348         set_page_private(page, 0);
349         page_cache_release(page);
350 }
351 static void free_buffers(struct page *page)
352 {
353         struct buffer_head *bh = page_buffers(page);
354
355         while (bh) {
356                 struct buffer_head *next = bh->b_this_page;
357                 free_buffer_head(bh);
358                 bh = next;
359         }
360         __clear_page_buffers(page);
361         put_page(page);
362 }
363
364 /* read a page from a file.
365  * We both read the page, and attach buffers to the page to record the
366  * address of each block (using bmap).  These addresses will be used
367  * to write the block later, completely bypassing the filesystem.
368  * This usage is similar to how swap files are handled, and allows us
369  * to write to a file with no concerns of memory allocation failing.
370  */
371 static struct page *read_page(struct file *file, unsigned long index,
372                               struct bitmap *bitmap,
373                               unsigned long count)
374 {
375         struct page *page = NULL;
376         struct inode *inode = file->f_path.dentry->d_inode;
377         struct buffer_head *bh;
378         sector_t block;
379
380         pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
381                  (unsigned long long)index << PAGE_SHIFT);
382
383         page = alloc_page(GFP_KERNEL);
384         if (!page)
385                 page = ERR_PTR(-ENOMEM);
386         if (IS_ERR(page))
387                 goto out;
388
389         bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
390         if (!bh) {
391                 put_page(page);
392                 page = ERR_PTR(-ENOMEM);
393                 goto out;
394         }
395         attach_page_buffers(page, bh);
396         block = index << (PAGE_SHIFT - inode->i_blkbits);
397         while (bh) {
398                 if (count == 0)
399                         bh->b_blocknr = 0;
400                 else {
401                         bh->b_blocknr = bmap(inode, block);
402                         if (bh->b_blocknr == 0) {
403                                 /* Cannot use this file! */
404                                 free_buffers(page);
405                                 page = ERR_PTR(-EINVAL);
406                                 goto out;
407                         }
408                         bh->b_bdev = inode->i_sb->s_bdev;
409                         if (count < (1<<inode->i_blkbits))
410                                 count = 0;
411                         else
412                                 count -= (1<<inode->i_blkbits);
413
414                         bh->b_end_io = end_bitmap_write;
415                         bh->b_private = bitmap;
416                         atomic_inc(&bitmap->pending_writes);
417                         set_buffer_locked(bh);
418                         set_buffer_mapped(bh);
419                         submit_bh(READ, bh);
420                 }
421                 block++;
422                 bh = bh->b_this_page;
423         }
424         page->index = index;
425
426         wait_event(bitmap->write_wait,
427                    atomic_read(&bitmap->pending_writes)==0);
428         if (bitmap->flags & BITMAP_WRITE_ERROR) {
429                 free_buffers(page);
430                 page = ERR_PTR(-EIO);
431         }
432 out:
433         if (IS_ERR(page))
434                 printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n",
435                         (int)PAGE_SIZE,
436                         (unsigned long long)index << PAGE_SHIFT,
437                         PTR_ERR(page));
438         return page;
439 }
440
441 /*
442  * bitmap file superblock operations
443  */
444
445 /* update the event counter and sync the superblock to disk */
446 void bitmap_update_sb(struct bitmap *bitmap)
447 {
448         bitmap_super_t *sb;
449         unsigned long flags;
450
451         if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
452                 return;
453         if (bitmap->mddev->bitmap_info.external)
454                 return;
455         spin_lock_irqsave(&bitmap->lock, flags);
456         if (!bitmap->sb_page) { /* no superblock */
457                 spin_unlock_irqrestore(&bitmap->lock, flags);
458                 return;
459         }
460         spin_unlock_irqrestore(&bitmap->lock, flags);
461         sb = kmap_atomic(bitmap->sb_page, KM_USER0);
462         sb->events = cpu_to_le64(bitmap->mddev->events);
463         if (bitmap->mddev->events < bitmap->events_cleared)
464                 /* rocking back to read-only */
465                 bitmap->events_cleared = bitmap->mddev->events;
466         sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
467         sb->state = cpu_to_le32(bitmap->flags);
468         /* Just in case these have been changed via sysfs: */
469         sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
470         sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
471         kunmap_atomic(sb, KM_USER0);
472         write_page(bitmap, bitmap->sb_page, 1);
473 }
474
475 /* print out the bitmap file superblock */
476 void bitmap_print_sb(struct bitmap *bitmap)
477 {
478         bitmap_super_t *sb;
479
480         if (!bitmap || !bitmap->sb_page)
481                 return;
482         sb = kmap_atomic(bitmap->sb_page, KM_USER0);
483         printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
484         printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
485         printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
486         printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
487                                         *(__u32 *)(sb->uuid+0),
488                                         *(__u32 *)(sb->uuid+4),
489                                         *(__u32 *)(sb->uuid+8),
490                                         *(__u32 *)(sb->uuid+12));
491         printk(KERN_DEBUG "        events: %llu\n",
492                         (unsigned long long) le64_to_cpu(sb->events));
493         printk(KERN_DEBUG "events cleared: %llu\n",
494                         (unsigned long long) le64_to_cpu(sb->events_cleared));
495         printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
496         printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
497         printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
498         printk(KERN_DEBUG "     sync size: %llu KB\n",
499                         (unsigned long long)le64_to_cpu(sb->sync_size)/2);
500         printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
501         kunmap_atomic(sb, KM_USER0);
502 }
503
504 /*
505  * bitmap_new_disk_sb
506  * @bitmap
507  *
508  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
509  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
510  * This function verifies 'bitmap_info' and populates the on-disk bitmap
511  * structure, which is to be written to disk.
512  *
513  * Returns: 0 on success, -Exxx on error
514  */
515 static int bitmap_new_disk_sb(struct bitmap *bitmap)
516 {
517         bitmap_super_t *sb;
518         unsigned long chunksize, daemon_sleep, write_behind;
519         int err = -EINVAL;
520
521         bitmap->sb_page = alloc_page(GFP_KERNEL);
522         if (IS_ERR(bitmap->sb_page)) {
523                 err = PTR_ERR(bitmap->sb_page);
524                 bitmap->sb_page = NULL;
525                 return err;
526         }
527         bitmap->sb_page->index = 0;
528
529         sb = kmap_atomic(bitmap->sb_page, KM_USER0);
530
531         sb->magic = cpu_to_le32(BITMAP_MAGIC);
532         sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
533
534         chunksize = bitmap->mddev->bitmap_info.chunksize;
535         BUG_ON(!chunksize);
536         if (!is_power_of_2(chunksize)) {
537                 kunmap_atomic(sb, KM_USER0);
538                 printk(KERN_ERR "bitmap chunksize not a power of 2\n");
539                 return -EINVAL;
540         }
541         sb->chunksize = cpu_to_le32(chunksize);
542
543         daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
544         if (!daemon_sleep ||
545             (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
546                 printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
547                 daemon_sleep = 5 * HZ;
548         }
549         sb->daemon_sleep = cpu_to_le32(daemon_sleep);
550         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
551
552         /*
553          * FIXME: write_behind for RAID1.  If not specified, what
554          * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
555          */
556         write_behind = bitmap->mddev->bitmap_info.max_write_behind;
557         if (write_behind > COUNTER_MAX)
558                 write_behind = COUNTER_MAX / 2;
559         sb->write_behind = cpu_to_le32(write_behind);
560         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
561
562         /* keep the array size field of the bitmap superblock up to date */
563         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
564
565         memcpy(sb->uuid, bitmap->mddev->uuid, 16);
566
567         bitmap->flags |= BITMAP_STALE;
568         sb->state |= cpu_to_le32(BITMAP_STALE);
569         bitmap->events_cleared = bitmap->mddev->events;
570         sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
571
572         bitmap->flags |= BITMAP_HOSTENDIAN;
573         sb->version = cpu_to_le32(BITMAP_MAJOR_HOSTENDIAN);
574
575         kunmap_atomic(sb, KM_USER0);
576
577         return 0;
578 }
579
580 /* read the superblock from the bitmap file and initialize some bitmap fields */
581 static int bitmap_read_sb(struct bitmap *bitmap)
582 {
583         char *reason = NULL;
584         bitmap_super_t *sb;
585         unsigned long chunksize, daemon_sleep, write_behind;
586         unsigned long long events;
587         int err = -EINVAL;
588
589         /* page 0 is the superblock, read it... */
590         if (bitmap->file) {
591                 loff_t isize = i_size_read(bitmap->file->f_mapping->host);
592                 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
593
594                 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
595         } else {
596                 bitmap->sb_page = read_sb_page(bitmap->mddev,
597                                                bitmap->mddev->bitmap_info.offset,
598                                                NULL,
599                                                0, sizeof(bitmap_super_t));
600         }
601         if (IS_ERR(bitmap->sb_page)) {
602                 err = PTR_ERR(bitmap->sb_page);
603                 bitmap->sb_page = NULL;
604                 return err;
605         }
606
607         sb = kmap_atomic(bitmap->sb_page, KM_USER0);
608
609         chunksize = le32_to_cpu(sb->chunksize);
610         daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
611         write_behind = le32_to_cpu(sb->write_behind);
612
613         /* verify that the bitmap-specific fields are valid */
614         if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
615                 reason = "bad magic";
616         else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
617                  le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
618                 reason = "unrecognized superblock version";
619         else if (chunksize < 512)
620                 reason = "bitmap chunksize too small";
621         else if (!is_power_of_2(chunksize))
622                 reason = "bitmap chunksize not a power of 2";
623         else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
624                 reason = "daemon sleep period out of range";
625         else if (write_behind > COUNTER_MAX)
626                 reason = "write-behind limit out of range (0 - 16383)";
627         if (reason) {
628                 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
629                         bmname(bitmap), reason);
630                 goto out;
631         }
632
633         /* keep the array size field of the bitmap superblock up to date */
634         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
635
636         if (!bitmap->mddev->persistent)
637                 goto success;
638
639         /*
640          * if we have a persistent array superblock, compare the
641          * bitmap's UUID and event counter to the mddev's
642          */
643         if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
644                 printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n",
645                         bmname(bitmap));
646                 goto out;
647         }
648         events = le64_to_cpu(sb->events);
649         if (events < bitmap->mddev->events) {
650                 printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
651                         "-- forcing full recovery\n", bmname(bitmap), events,
652                         (unsigned long long) bitmap->mddev->events);
653                 sb->state |= cpu_to_le32(BITMAP_STALE);
654         }
655 success:
656         /* assign fields using values from superblock */
657         bitmap->mddev->bitmap_info.chunksize = chunksize;
658         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
659         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
660         bitmap->flags |= le32_to_cpu(sb->state);
661         if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
662                 bitmap->flags |= BITMAP_HOSTENDIAN;
663         bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
664         if (bitmap->flags & BITMAP_STALE)
665                 bitmap->events_cleared = bitmap->mddev->events;
666         err = 0;
667 out:
668         kunmap_atomic(sb, KM_USER0);
669         if (err)
670                 bitmap_print_sb(bitmap);
671         return err;
672 }
673
674 enum bitmap_mask_op {
675         MASK_SET,
676         MASK_UNSET
677 };
678
679 /* record the state of the bitmap in the superblock.  Return the old value */
680 static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
681                              enum bitmap_mask_op op)
682 {
683         bitmap_super_t *sb;
684         unsigned long flags;
685         int old;
686
687         spin_lock_irqsave(&bitmap->lock, flags);
688         if (!bitmap->sb_page) { /* can't set the state */
689                 spin_unlock_irqrestore(&bitmap->lock, flags);
690                 return 0;
691         }
692         spin_unlock_irqrestore(&bitmap->lock, flags);
693         sb = kmap_atomic(bitmap->sb_page, KM_USER0);
694         old = le32_to_cpu(sb->state) & bits;
695         switch (op) {
696         case MASK_SET:
697                 sb->state |= cpu_to_le32(bits);
698                 bitmap->flags |= bits;
699                 break;
700         case MASK_UNSET:
701                 sb->state &= cpu_to_le32(~bits);
702                 bitmap->flags &= ~bits;
703                 break;
704         default:
705                 BUG();
706         }
707         kunmap_atomic(sb, KM_USER0);
708         return old;
709 }
710
711 /*
712  * general bitmap file operations
713  */
714
715 /*
716  * on-disk bitmap:
717  *
718  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
719  * file a page at a time. There's a superblock at the start of the file.
720  */
721 /* calculate the index of the page that contains this bit */
722 static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
723 {
724         if (!bitmap->mddev->bitmap_info.external)
725                 chunk += sizeof(bitmap_super_t) << 3;
726         return chunk >> PAGE_BIT_SHIFT;
727 }
728
729 /* calculate the (bit) offset of this bit within a page */
730 static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
731 {
732         if (!bitmap->mddev->bitmap_info.external)
733                 chunk += sizeof(bitmap_super_t) << 3;
734         return chunk & (PAGE_BITS - 1);
735 }
736
737 /*
738  * return a pointer to the page in the filemap that contains the given bit
739  *
740  * this lookup is complicated by the fact that the bitmap sb might be exactly
741  * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
742  * 0 or page 1
743  */
744 static inline struct page *filemap_get_page(struct bitmap *bitmap,
745                                             unsigned long chunk)
746 {
747         if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
748                 return NULL;
749         return bitmap->filemap[file_page_index(bitmap, chunk)
750                                - file_page_index(bitmap, 0)];
751 }
752
753 static void bitmap_file_unmap(struct bitmap *bitmap)
754 {
755         struct page **map, *sb_page;
756         unsigned long *attr;
757         int pages;
758         unsigned long flags;
759
760         spin_lock_irqsave(&bitmap->lock, flags);
761         map = bitmap->filemap;
762         bitmap->filemap = NULL;
763         attr = bitmap->filemap_attr;
764         bitmap->filemap_attr = NULL;
765         pages = bitmap->file_pages;
766         bitmap->file_pages = 0;
767         sb_page = bitmap->sb_page;
768         bitmap->sb_page = NULL;
769         spin_unlock_irqrestore(&bitmap->lock, flags);
770
771         while (pages--)
772                 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
773                         free_buffers(map[pages]);
774         kfree(map);
775         kfree(attr);
776
777         if (sb_page)
778                 free_buffers(sb_page);
779 }
780
781 static void bitmap_file_put(struct bitmap *bitmap)
782 {
783         struct file *file;
784         unsigned long flags;
785
786         spin_lock_irqsave(&bitmap->lock, flags);
787         file = bitmap->file;
788         bitmap->file = NULL;
789         spin_unlock_irqrestore(&bitmap->lock, flags);
790
791         if (file)
792                 wait_event(bitmap->write_wait,
793                            atomic_read(&bitmap->pending_writes)==0);
794         bitmap_file_unmap(bitmap);
795
796         if (file) {
797                 struct inode *inode = file->f_path.dentry->d_inode;
798                 invalidate_mapping_pages(inode->i_mapping, 0, -1);
799                 fput(file);
800         }
801 }
802
803 /*
804  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
805  * then it is no longer reliable, so we stop using it and we mark the file
806  * as failed in the superblock
807  */
808 static void bitmap_file_kick(struct bitmap *bitmap)
809 {
810         char *path, *ptr = NULL;
811
812         if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) {
813                 bitmap_update_sb(bitmap);
814
815                 if (bitmap->file) {
816                         path = kmalloc(PAGE_SIZE, GFP_KERNEL);
817                         if (path)
818                                 ptr = d_path(&bitmap->file->f_path, path,
819                                              PAGE_SIZE);
820
821                         printk(KERN_ALERT
822                               "%s: kicking failed bitmap file %s from array!\n",
823                               bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
824
825                         kfree(path);
826                 } else
827                         printk(KERN_ALERT
828                                "%s: disabling internal bitmap due to errors\n",
829                                bmname(bitmap));
830         }
831
832         bitmap_file_put(bitmap);
833
834         return;
835 }
836
837 enum bitmap_page_attr {
838         BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
839         BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
840                                     * i.e. counter is 1 or 2. */
841         BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
842 };
843
844 static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
845                                 enum bitmap_page_attr attr)
846 {
847         __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
848 }
849
850 static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
851                                 enum bitmap_page_attr attr)
852 {
853         __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
854 }
855
856 static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
857                                            enum bitmap_page_attr attr)
858 {
859         return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
860 }
861
862 /*
863  * bitmap_file_set_bit -- called before performing a write to the md device
864  * to set (and eventually sync) a particular bit in the bitmap file
865  *
866  * we set the bit immediately, then we record the page number so that
867  * when an unplug occurs, we can flush the dirty pages out to disk
868  */
869 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
870 {
871         unsigned long bit;
872         struct page *page;
873         void *kaddr;
874         unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
875
876         if (!bitmap->filemap)
877                 return;
878
879         page = filemap_get_page(bitmap, chunk);
880         if (!page)
881                 return;
882         bit = file_page_offset(bitmap, chunk);
883
884         /* set the bit */
885         kaddr = kmap_atomic(page, KM_USER0);
886         if (bitmap->flags & BITMAP_HOSTENDIAN)
887                 set_bit(bit, kaddr);
888         else
889                 __set_bit_le(bit, kaddr);
890         kunmap_atomic(kaddr, KM_USER0);
891         pr_debug("set file bit %lu page %lu\n", bit, page->index);
892         /* record page number so it gets flushed to disk when unplug occurs */
893         set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
894 }
895
896 /* this gets called when the md device is ready to unplug its underlying
897  * (slave) device queues -- before we let any writes go down, we need to
898  * sync the dirty pages of the bitmap file to disk */
899 void bitmap_unplug(struct bitmap *bitmap)
900 {
901         unsigned long i, flags;
902         int dirty, need_write;
903         struct page *page;
904         int wait = 0;
905
906         if (!bitmap)
907                 return;
908
909         /* look at each page to see if there are any set bits that need to be
910          * flushed out to disk */
911         for (i = 0; i < bitmap->file_pages; i++) {
912                 spin_lock_irqsave(&bitmap->lock, flags);
913                 if (!bitmap->filemap) {
914                         spin_unlock_irqrestore(&bitmap->lock, flags);
915                         return;
916                 }
917                 page = bitmap->filemap[i];
918                 dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
919                 need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
920                 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
921                 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
922                 if (dirty)
923                         wait = 1;
924                 spin_unlock_irqrestore(&bitmap->lock, flags);
925
926                 if (dirty || need_write)
927                         write_page(bitmap, page, 0);
928         }
929         if (wait) { /* if any writes were performed, we need to wait on them */
930                 if (bitmap->file)
931                         wait_event(bitmap->write_wait,
932                                    atomic_read(&bitmap->pending_writes)==0);
933                 else
934                         md_super_wait(bitmap->mddev);
935         }
936         if (bitmap->flags & BITMAP_WRITE_ERROR)
937                 bitmap_file_kick(bitmap);
938 }
939 EXPORT_SYMBOL(bitmap_unplug);
940
941 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
942 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
943  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
944  * memory mapping of the bitmap file
945  * Special cases:
946  *   if there's no bitmap file, or if the bitmap file had been
947  *   previously kicked from the array, we mark all the bits as
948  *   1's in order to cause a full resync.
949  *
950  * We ignore all bits for sectors that end earlier than 'start'.
951  * This is used when reading an out-of-date bitmap...
952  */
953 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
954 {
955         unsigned long i, chunks, index, oldindex, bit;
956         struct page *page = NULL, *oldpage = NULL;
957         unsigned long num_pages, bit_cnt = 0;
958         struct file *file;
959         unsigned long bytes, offset;
960         int outofdate;
961         int ret = -ENOSPC;
962         void *paddr;
963
964         chunks = bitmap->chunks;
965         file = bitmap->file;
966
967         BUG_ON(!file && !bitmap->mddev->bitmap_info.offset);
968
969         outofdate = bitmap->flags & BITMAP_STALE;
970         if (outofdate)
971                 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
972                         "recovery\n", bmname(bitmap));
973
974         bytes = DIV_ROUND_UP(bitmap->chunks, 8);
975         if (!bitmap->mddev->bitmap_info.external)
976                 bytes += sizeof(bitmap_super_t);
977
978         num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
979
980         if (file && i_size_read(file->f_mapping->host) < bytes) {
981                 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
982                         bmname(bitmap),
983                         (unsigned long) i_size_read(file->f_mapping->host),
984                         bytes);
985                 goto err;
986         }
987
988         ret = -ENOMEM;
989
990         bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
991         if (!bitmap->filemap)
992                 goto err;
993
994         /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
995         bitmap->filemap_attr = kzalloc(
996                 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
997                 GFP_KERNEL);
998         if (!bitmap->filemap_attr)
999                 goto err;
1000
1001         oldindex = ~0L;
1002
1003         for (i = 0; i < chunks; i++) {
1004                 int b;
1005                 index = file_page_index(bitmap, i);
1006                 bit = file_page_offset(bitmap, i);
1007                 if (index != oldindex) { /* this is a new page, read it in */
1008                         int count;
1009                         /* unmap the old page, we're done with it */
1010                         if (index == num_pages-1)
1011                                 count = bytes - index * PAGE_SIZE;
1012                         else
1013                                 count = PAGE_SIZE;
1014                         if (index == 0 && bitmap->sb_page) {
1015                                 /*
1016                                  * if we're here then the superblock page
1017                                  * contains some bits (PAGE_SIZE != sizeof sb)
1018                                  * we've already read it in, so just use it
1019                                  */
1020                                 page = bitmap->sb_page;
1021                                 offset = sizeof(bitmap_super_t);
1022                                 if (!file)
1023                                         page = read_sb_page(
1024                                                 bitmap->mddev,
1025                                                 bitmap->mddev->bitmap_info.offset,
1026                                                 page,
1027                                                 index, count);
1028                         } else if (file) {
1029                                 page = read_page(file, index, bitmap, count);
1030                                 offset = 0;
1031                         } else {
1032                                 page = read_sb_page(bitmap->mddev,
1033                                                     bitmap->mddev->bitmap_info.offset,
1034                                                     NULL,
1035                                                     index, count);
1036                                 offset = 0;
1037                         }
1038                         if (IS_ERR(page)) { /* read error */
1039                                 ret = PTR_ERR(page);
1040                                 goto err;
1041                         }
1042
1043                         oldindex = index;
1044                         oldpage = page;
1045
1046                         bitmap->filemap[bitmap->file_pages++] = page;
1047                         bitmap->last_page_size = count;
1048
1049                         if (outofdate) {
1050                                 /*
1051                                  * if bitmap is out of date, dirty the
1052                                  * whole page and write it out
1053                                  */
1054                                 paddr = kmap_atomic(page, KM_USER0);
1055                                 memset(paddr + offset, 0xff,
1056                                        PAGE_SIZE - offset);
1057                                 kunmap_atomic(paddr, KM_USER0);
1058                                 write_page(bitmap, page, 1);
1059
1060                                 ret = -EIO;
1061                                 if (bitmap->flags & BITMAP_WRITE_ERROR)
1062                                         goto err;
1063                         }
1064                 }
1065                 paddr = kmap_atomic(page, KM_USER0);
1066                 if (bitmap->flags & BITMAP_HOSTENDIAN)
1067                         b = test_bit(bit, paddr);
1068                 else
1069                         b = test_bit_le(bit, paddr);
1070                 kunmap_atomic(paddr, KM_USER0);
1071                 if (b) {
1072                         /* if the disk bit is set, set the memory bit */
1073                         int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
1074                                       >= start);
1075                         bitmap_set_memory_bits(bitmap,
1076                                                (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
1077                                                needed);
1078                         bit_cnt++;
1079                 }
1080         }
1081
1082         /* everything went OK */
1083         ret = 0;
1084         bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
1085
1086         if (bit_cnt) { /* Kick recovery if any bits were set */
1087                 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1088                 md_wakeup_thread(bitmap->mddev->thread);
1089         }
1090
1091         printk(KERN_INFO "%s: bitmap initialized from disk: "
1092                "read %lu/%lu pages, set %lu of %lu bits\n",
1093                bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks);
1094
1095         return 0;
1096
1097  err:
1098         printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1099                bmname(bitmap), ret);
1100         return ret;
1101 }
1102
1103 void bitmap_write_all(struct bitmap *bitmap)
1104 {
1105         /* We don't actually write all bitmap blocks here,
1106          * just flag them as needing to be written
1107          */
1108         int i;
1109
1110         spin_lock_irq(&bitmap->lock);
1111         for (i = 0; i < bitmap->file_pages; i++)
1112                 set_page_attr(bitmap, bitmap->filemap[i],
1113                               BITMAP_PAGE_NEEDWRITE);
1114         bitmap->allclean = 0;
1115         spin_unlock_irq(&bitmap->lock);
1116 }
1117
1118 static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
1119 {
1120         sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1121         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1122         bitmap->bp[page].count += inc;
1123         bitmap_checkfree(bitmap, page);
1124 }
1125 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1126                                             sector_t offset, sector_t *blocks,
1127                                             int create);
1128
1129 /*
1130  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1131  *                      out to disk
1132  */
1133
1134 void bitmap_daemon_work(struct mddev *mddev)
1135 {
1136         struct bitmap *bitmap;
1137         unsigned long j;
1138         unsigned long flags;
1139         struct page *page = NULL, *lastpage = NULL;
1140         sector_t blocks;
1141         void *paddr;
1142
1143         /* Use a mutex to guard daemon_work against
1144          * bitmap_destroy.
1145          */
1146         mutex_lock(&mddev->bitmap_info.mutex);
1147         bitmap = mddev->bitmap;
1148         if (bitmap == NULL) {
1149                 mutex_unlock(&mddev->bitmap_info.mutex);
1150                 return;
1151         }
1152         if (time_before(jiffies, bitmap->daemon_lastrun
1153                         + mddev->bitmap_info.daemon_sleep))
1154                 goto done;
1155
1156         bitmap->daemon_lastrun = jiffies;
1157         if (bitmap->allclean) {
1158                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1159                 goto done;
1160         }
1161         bitmap->allclean = 1;
1162
1163         spin_lock_irqsave(&bitmap->lock, flags);
1164         for (j = 0; j < bitmap->chunks; j++) {
1165                 bitmap_counter_t *bmc;
1166                 if (!bitmap->filemap)
1167                         /* error or shutdown */
1168                         break;
1169
1170                 page = filemap_get_page(bitmap, j);
1171
1172                 if (page != lastpage) {
1173                         /* skip this page unless it's marked as needing cleaning */
1174                         if (!test_page_attr(bitmap, page, BITMAP_PAGE_PENDING)) {
1175                                 int need_write = test_page_attr(bitmap, page,
1176                                                                 BITMAP_PAGE_NEEDWRITE);
1177                                 if (need_write)
1178                                         clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1179
1180                                 spin_unlock_irqrestore(&bitmap->lock, flags);
1181                                 if (need_write)
1182                                         write_page(bitmap, page, 0);
1183                                 spin_lock_irqsave(&bitmap->lock, flags);
1184                                 j |= (PAGE_BITS - 1);
1185                                 continue;
1186                         }
1187
1188                         /* grab the new page, sync and release the old */
1189                         if (lastpage != NULL) {
1190                                 if (test_page_attr(bitmap, lastpage,
1191                                                    BITMAP_PAGE_NEEDWRITE)) {
1192                                         clear_page_attr(bitmap, lastpage,
1193                                                         BITMAP_PAGE_NEEDWRITE);
1194                                         spin_unlock_irqrestore(&bitmap->lock, flags);
1195                                         write_page(bitmap, lastpage, 0);
1196                                 } else {
1197                                         set_page_attr(bitmap, lastpage,
1198                                                       BITMAP_PAGE_NEEDWRITE);
1199                                         bitmap->allclean = 0;
1200                                         spin_unlock_irqrestore(&bitmap->lock, flags);
1201                                 }
1202                         } else
1203                                 spin_unlock_irqrestore(&bitmap->lock, flags);
1204                         lastpage = page;
1205
1206                         /* We are possibly going to clear some bits, so make
1207                          * sure that events_cleared is up-to-date.
1208                          */
1209                         if (bitmap->need_sync &&
1210                             mddev->bitmap_info.external == 0) {
1211                                 bitmap_super_t *sb;
1212                                 bitmap->need_sync = 0;
1213                                 sb = kmap_atomic(bitmap->sb_page, KM_USER0);
1214                                 sb->events_cleared =
1215                                         cpu_to_le64(bitmap->events_cleared);
1216                                 kunmap_atomic(sb, KM_USER0);
1217                                 write_page(bitmap, bitmap->sb_page, 1);
1218                         }
1219                         spin_lock_irqsave(&bitmap->lock, flags);
1220                         if (!bitmap->need_sync)
1221                                 clear_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
1222                         else
1223                                 bitmap->allclean = 0;
1224                 }
1225                 bmc = bitmap_get_counter(bitmap,
1226                                          (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
1227                                          &blocks, 0);
1228                 if (!bmc)
1229                         j |= PAGE_COUNTER_MASK;
1230                 else if (*bmc) {
1231                         if (*bmc == 1 && !bitmap->need_sync) {
1232                                 /* we can clear the bit */
1233                                 *bmc = 0;
1234                                 bitmap_count_page(bitmap,
1235                                                   (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
1236                                                   -1);
1237
1238                                 /* clear the bit */
1239                                 paddr = kmap_atomic(page, KM_USER0);
1240                                 if (bitmap->flags & BITMAP_HOSTENDIAN)
1241                                         clear_bit(file_page_offset(bitmap, j),
1242                                                   paddr);
1243                                 else
1244                                         __clear_bit_le(
1245                                                 file_page_offset(bitmap,
1246                                                                  j),
1247                                                 paddr);
1248                                 kunmap_atomic(paddr, KM_USER0);
1249                         } else if (*bmc <= 2) {
1250                                 *bmc = 1; /* maybe clear the bit next time */
1251                                 set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
1252                                 bitmap->allclean = 0;
1253                         }
1254                 }
1255         }
1256         spin_unlock_irqrestore(&bitmap->lock, flags);
1257
1258         /* now sync the final page */
1259         if (lastpage != NULL) {
1260                 spin_lock_irqsave(&bitmap->lock, flags);
1261                 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1262                         clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1263                         spin_unlock_irqrestore(&bitmap->lock, flags);
1264                         write_page(bitmap, lastpage, 0);
1265                 } else {
1266                         set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1267                         bitmap->allclean = 0;
1268                         spin_unlock_irqrestore(&bitmap->lock, flags);
1269                 }
1270         }
1271
1272  done:
1273         if (bitmap->allclean == 0)
1274                 mddev->thread->timeout =
1275                         mddev->bitmap_info.daemon_sleep;
1276         mutex_unlock(&mddev->bitmap_info.mutex);
1277 }
1278
1279 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1280                                             sector_t offset, sector_t *blocks,
1281                                             int create)
1282 __releases(bitmap->lock)
1283 __acquires(bitmap->lock)
1284 {
1285         /* If 'create', we might release the lock and reclaim it.
1286          * The lock must have been taken with interrupts enabled.
1287          * If !create, we don't release the lock.
1288          */
1289         sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1290         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1291         unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1292         sector_t csize;
1293         int err;
1294
1295         err = bitmap_checkpage(bitmap, page, create);
1296
1297         if (bitmap->bp[page].hijacked ||
1298             bitmap->bp[page].map == NULL)
1299                 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
1300                                           PAGE_COUNTER_SHIFT - 1);
1301         else
1302                 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1303         *blocks = csize - (offset & (csize - 1));
1304
1305         if (err < 0)
1306                 return NULL;
1307
1308         /* now locked ... */
1309
1310         if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1311                 /* should we use the first or second counter field
1312                  * of the hijacked pointer? */
1313                 int hi = (pageoff > PAGE_COUNTER_MASK);
1314                 return  &((bitmap_counter_t *)
1315                           &bitmap->bp[page].map)[hi];
1316         } else /* page is allocated */
1317                 return (bitmap_counter_t *)
1318                         &(bitmap->bp[page].map[pageoff]);
1319 }
1320
1321 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1322 {
1323         if (!bitmap)
1324                 return 0;
1325
1326         if (behind) {
1327                 int bw;
1328                 atomic_inc(&bitmap->behind_writes);
1329                 bw = atomic_read(&bitmap->behind_writes);
1330                 if (bw > bitmap->behind_writes_used)
1331                         bitmap->behind_writes_used = bw;
1332
1333                 pr_debug("inc write-behind count %d/%lu\n",
1334                          bw, bitmap->mddev->bitmap_info.max_write_behind);
1335         }
1336
1337         while (sectors) {
1338                 sector_t blocks;
1339                 bitmap_counter_t *bmc;
1340
1341                 spin_lock_irq(&bitmap->lock);
1342                 bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
1343                 if (!bmc) {
1344                         spin_unlock_irq(&bitmap->lock);
1345                         return 0;
1346                 }
1347
1348                 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1349                         DEFINE_WAIT(__wait);
1350                         /* note that it is safe to do the prepare_to_wait
1351                          * after the test as long as we do it before dropping
1352                          * the spinlock.
1353                          */
1354                         prepare_to_wait(&bitmap->overflow_wait, &__wait,
1355                                         TASK_UNINTERRUPTIBLE);
1356                         spin_unlock_irq(&bitmap->lock);
1357                         io_schedule();
1358                         finish_wait(&bitmap->overflow_wait, &__wait);
1359                         continue;
1360                 }
1361
1362                 switch (*bmc) {
1363                 case 0:
1364                         bitmap_file_set_bit(bitmap, offset);
1365                         bitmap_count_page(bitmap, offset, 1);
1366                         /* fall through */
1367                 case 1:
1368                         *bmc = 2;
1369                 }
1370
1371                 (*bmc)++;
1372
1373                 spin_unlock_irq(&bitmap->lock);
1374
1375                 offset += blocks;
1376                 if (sectors > blocks)
1377                         sectors -= blocks;
1378                 else
1379                         sectors = 0;
1380         }
1381         return 0;
1382 }
1383 EXPORT_SYMBOL(bitmap_startwrite);
1384
1385 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1386                      int success, int behind)
1387 {
1388         if (!bitmap)
1389                 return;
1390         if (behind) {
1391                 if (atomic_dec_and_test(&bitmap->behind_writes))
1392                         wake_up(&bitmap->behind_wait);
1393                 pr_debug("dec write-behind count %d/%lu\n",
1394                          atomic_read(&bitmap->behind_writes),
1395                          bitmap->mddev->bitmap_info.max_write_behind);
1396         }
1397
1398         while (sectors) {
1399                 sector_t blocks;
1400                 unsigned long flags;
1401                 bitmap_counter_t *bmc;
1402
1403                 spin_lock_irqsave(&bitmap->lock, flags);
1404                 bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
1405                 if (!bmc) {
1406                         spin_unlock_irqrestore(&bitmap->lock, flags);
1407                         return;
1408                 }
1409
1410                 if (success && !bitmap->mddev->degraded &&
1411                     bitmap->events_cleared < bitmap->mddev->events) {
1412                         bitmap->events_cleared = bitmap->mddev->events;
1413                         bitmap->need_sync = 1;
1414                         sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1415                 }
1416
1417                 if (!success && !NEEDED(*bmc))
1418                         *bmc |= NEEDED_MASK;
1419
1420                 if (COUNTER(*bmc) == COUNTER_MAX)
1421                         wake_up(&bitmap->overflow_wait);
1422
1423                 (*bmc)--;
1424                 if (*bmc <= 2) {
1425                         set_page_attr(bitmap,
1426                                       filemap_get_page(
1427                                               bitmap,
1428                                               offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1429                                       BITMAP_PAGE_PENDING);
1430                         bitmap->allclean = 0;
1431                 }
1432                 spin_unlock_irqrestore(&bitmap->lock, flags);
1433                 offset += blocks;
1434                 if (sectors > blocks)
1435                         sectors -= blocks;
1436                 else
1437                         sectors = 0;
1438         }
1439 }
1440 EXPORT_SYMBOL(bitmap_endwrite);
1441
1442 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1443                                int degraded)
1444 {
1445         bitmap_counter_t *bmc;
1446         int rv;
1447         if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1448                 *blocks = 1024;
1449                 return 1; /* always resync if no bitmap */
1450         }
1451         spin_lock_irq(&bitmap->lock);
1452         bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1453         rv = 0;
1454         if (bmc) {
1455                 /* locked */
1456                 if (RESYNC(*bmc))
1457                         rv = 1;
1458                 else if (NEEDED(*bmc)) {
1459                         rv = 1;
1460                         if (!degraded) { /* don't set/clear bits if degraded */
1461                                 *bmc |= RESYNC_MASK;
1462                                 *bmc &= ~NEEDED_MASK;
1463                         }
1464                 }
1465         }
1466         spin_unlock_irq(&bitmap->lock);
1467         return rv;
1468 }
1469
1470 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1471                       int degraded)
1472 {
1473         /* bitmap_start_sync must always report on multiples of whole
1474          * pages, otherwise resync (which is very PAGE_SIZE based) will
1475          * get confused.
1476          * So call __bitmap_start_sync repeatedly (if needed) until
1477          * At least PAGE_SIZE>>9 blocks are covered.
1478          * Return the 'or' of the result.
1479          */
1480         int rv = 0;
1481         sector_t blocks1;
1482
1483         *blocks = 0;
1484         while (*blocks < (PAGE_SIZE>>9)) {
1485                 rv |= __bitmap_start_sync(bitmap, offset,
1486                                           &blocks1, degraded);
1487                 offset += blocks1;
1488                 *blocks += blocks1;
1489         }
1490         return rv;
1491 }
1492 EXPORT_SYMBOL(bitmap_start_sync);
1493
1494 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1495 {
1496         bitmap_counter_t *bmc;
1497         unsigned long flags;
1498
1499         if (bitmap == NULL) {
1500                 *blocks = 1024;
1501                 return;
1502         }
1503         spin_lock_irqsave(&bitmap->lock, flags);
1504         bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1505         if (bmc == NULL)
1506                 goto unlock;
1507         /* locked */
1508         if (RESYNC(*bmc)) {
1509                 *bmc &= ~RESYNC_MASK;
1510
1511                 if (!NEEDED(*bmc) && aborted)
1512                         *bmc |= NEEDED_MASK;
1513                 else {
1514                         if (*bmc <= 2) {
1515                                 set_page_attr(bitmap,
1516                                               filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1517                                               BITMAP_PAGE_PENDING);
1518                                 bitmap->allclean = 0;
1519                         }
1520                 }
1521         }
1522  unlock:
1523         spin_unlock_irqrestore(&bitmap->lock, flags);
1524 }
1525 EXPORT_SYMBOL(bitmap_end_sync);
1526
1527 void bitmap_close_sync(struct bitmap *bitmap)
1528 {
1529         /* Sync has finished, and any bitmap chunks that weren't synced
1530          * properly have been aborted.  It remains to us to clear the
1531          * RESYNC bit wherever it is still on
1532          */
1533         sector_t sector = 0;
1534         sector_t blocks;
1535         if (!bitmap)
1536                 return;
1537         while (sector < bitmap->mddev->resync_max_sectors) {
1538                 bitmap_end_sync(bitmap, sector, &blocks, 0);
1539                 sector += blocks;
1540         }
1541 }
1542 EXPORT_SYMBOL(bitmap_close_sync);
1543
1544 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1545 {
1546         sector_t s = 0;
1547         sector_t blocks;
1548
1549         if (!bitmap)
1550                 return;
1551         if (sector == 0) {
1552                 bitmap->last_end_sync = jiffies;
1553                 return;
1554         }
1555         if (time_before(jiffies, (bitmap->last_end_sync
1556                                   + bitmap->mddev->bitmap_info.daemon_sleep)))
1557                 return;
1558         wait_event(bitmap->mddev->recovery_wait,
1559                    atomic_read(&bitmap->mddev->recovery_active) == 0);
1560
1561         bitmap->mddev->curr_resync_completed = sector;
1562         set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1563         sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
1564         s = 0;
1565         while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1566                 bitmap_end_sync(bitmap, s, &blocks, 0);
1567                 s += blocks;
1568         }
1569         bitmap->last_end_sync = jiffies;
1570         sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1571 }
1572 EXPORT_SYMBOL(bitmap_cond_end_sync);
1573
1574 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1575 {
1576         /* For each chunk covered by any of these sectors, set the
1577          * counter to 1 and set resync_needed.  They should all
1578          * be 0 at this point
1579          */
1580
1581         sector_t secs;
1582         bitmap_counter_t *bmc;
1583         spin_lock_irq(&bitmap->lock);
1584         bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
1585         if (!bmc) {
1586                 spin_unlock_irq(&bitmap->lock);
1587                 return;
1588         }
1589         if (!*bmc) {
1590                 struct page *page;
1591                 *bmc = 2 | (needed ? NEEDED_MASK : 0);
1592                 bitmap_count_page(bitmap, offset, 1);
1593                 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
1594                 set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
1595                 bitmap->allclean = 0;
1596         }
1597         spin_unlock_irq(&bitmap->lock);
1598 }
1599
1600 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1601 void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1602 {
1603         unsigned long chunk;
1604
1605         for (chunk = s; chunk <= e; chunk++) {
1606                 sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
1607                 bitmap_set_memory_bits(bitmap, sec, 1);
1608                 spin_lock_irq(&bitmap->lock);
1609                 bitmap_file_set_bit(bitmap, sec);
1610                 spin_unlock_irq(&bitmap->lock);
1611                 if (sec < bitmap->mddev->recovery_cp)
1612                         /* We are asserting that the array is dirty,
1613                          * so move the recovery_cp address back so
1614                          * that it is obvious that it is dirty
1615                          */
1616                         bitmap->mddev->recovery_cp = sec;
1617         }
1618 }
1619
1620 /*
1621  * flush out any pending updates
1622  */
1623 void bitmap_flush(struct mddev *mddev)
1624 {
1625         struct bitmap *bitmap = mddev->bitmap;
1626         long sleep;
1627
1628         if (!bitmap) /* there was no bitmap */
1629                 return;
1630
1631         /* run the daemon_work three time to ensure everything is flushed
1632          * that can be
1633          */
1634         sleep = mddev->bitmap_info.daemon_sleep * 2;
1635         bitmap->daemon_lastrun -= sleep;
1636         bitmap_daemon_work(mddev);
1637         bitmap->daemon_lastrun -= sleep;
1638         bitmap_daemon_work(mddev);
1639         bitmap->daemon_lastrun -= sleep;
1640         bitmap_daemon_work(mddev);
1641         bitmap_update_sb(bitmap);
1642 }
1643
1644 /*
1645  * free memory that was allocated
1646  */
1647 static void bitmap_free(struct bitmap *bitmap)
1648 {
1649         unsigned long k, pages;
1650         struct bitmap_page *bp;
1651
1652         if (!bitmap) /* there was no bitmap */
1653                 return;
1654
1655         /* release the bitmap file and kill the daemon */
1656         bitmap_file_put(bitmap);
1657
1658         bp = bitmap->bp;
1659         pages = bitmap->pages;
1660
1661         /* free all allocated memory */
1662
1663         if (bp) /* deallocate the page memory */
1664                 for (k = 0; k < pages; k++)
1665                         if (bp[k].map && !bp[k].hijacked)
1666                                 kfree(bp[k].map);
1667         kfree(bp);
1668         kfree(bitmap);
1669 }
1670
1671 void bitmap_destroy(struct mddev *mddev)
1672 {
1673         struct bitmap *bitmap = mddev->bitmap;
1674
1675         if (!bitmap) /* there was no bitmap */
1676                 return;
1677
1678         mutex_lock(&mddev->bitmap_info.mutex);
1679         mddev->bitmap = NULL; /* disconnect from the md device */
1680         mutex_unlock(&mddev->bitmap_info.mutex);
1681         if (mddev->thread)
1682                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1683
1684         if (bitmap->sysfs_can_clear)
1685                 sysfs_put(bitmap->sysfs_can_clear);
1686
1687         bitmap_free(bitmap);
1688 }
1689
1690 /*
1691  * initialize the bitmap structure
1692  * if this returns an error, bitmap_destroy must be called to do clean up
1693  */
1694 int bitmap_create(struct mddev *mddev)
1695 {
1696         struct bitmap *bitmap;
1697         sector_t blocks = mddev->resync_max_sectors;
1698         unsigned long chunks;
1699         unsigned long pages;
1700         struct file *file = mddev->bitmap_info.file;
1701         int err;
1702         struct sysfs_dirent *bm = NULL;
1703
1704         BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1705
1706         if (!file
1707             && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
1708                 return 0;
1709
1710         BUG_ON(file && mddev->bitmap_info.offset);
1711
1712         bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1713         if (!bitmap)
1714                 return -ENOMEM;
1715
1716         spin_lock_init(&bitmap->lock);
1717         atomic_set(&bitmap->pending_writes, 0);
1718         init_waitqueue_head(&bitmap->write_wait);
1719         init_waitqueue_head(&bitmap->overflow_wait);
1720         init_waitqueue_head(&bitmap->behind_wait);
1721
1722         bitmap->mddev = mddev;
1723
1724         if (mddev->kobj.sd)
1725                 bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap");
1726         if (bm) {
1727                 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear");
1728                 sysfs_put(bm);
1729         } else
1730                 bitmap->sysfs_can_clear = NULL;
1731
1732         bitmap->file = file;
1733         if (file) {
1734                 get_file(file);
1735                 /* As future accesses to this file will use bmap,
1736                  * and bypass the page cache, we must sync the file
1737                  * first.
1738                  */
1739                 vfs_fsync(file, 1);
1740         }
1741         /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1742         if (!mddev->bitmap_info.external) {
1743                 /*
1744                  * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1745                  * instructing us to create a new on-disk bitmap instance.
1746                  */
1747                 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1748                         err = bitmap_new_disk_sb(bitmap);
1749                 else
1750                         err = bitmap_read_sb(bitmap);
1751         } else {
1752                 err = 0;
1753                 if (mddev->bitmap_info.chunksize == 0 ||
1754                     mddev->bitmap_info.daemon_sleep == 0)
1755                         /* chunksize and time_base need to be
1756                          * set first. */
1757                         err = -EINVAL;
1758         }
1759         if (err)
1760                 goto error;
1761
1762         bitmap->daemon_lastrun = jiffies;
1763         bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize);
1764
1765         /* now that chunksize and chunkshift are set, we can use these macros */
1766         chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >>
1767                         CHUNK_BLOCK_SHIFT(bitmap);
1768         pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
1769
1770         BUG_ON(!pages);
1771
1772         bitmap->chunks = chunks;
1773         bitmap->pages = pages;
1774         bitmap->missing_pages = pages;
1775
1776         bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
1777
1778         err = -ENOMEM;
1779         if (!bitmap->bp)
1780                 goto error;
1781
1782         printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1783                 pages, bmname(bitmap));
1784
1785         mddev->bitmap = bitmap;
1786
1787
1788         return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
1789
1790  error:
1791         bitmap_free(bitmap);
1792         return err;
1793 }
1794
1795 int bitmap_load(struct mddev *mddev)
1796 {
1797         int err = 0;
1798         sector_t start = 0;
1799         sector_t sector = 0;
1800         struct bitmap *bitmap = mddev->bitmap;
1801
1802         if (!bitmap)
1803                 goto out;
1804
1805         /* Clear out old bitmap info first:  Either there is none, or we
1806          * are resuming after someone else has possibly changed things,
1807          * so we should forget old cached info.
1808          * All chunks should be clean, but some might need_sync.
1809          */
1810         while (sector < mddev->resync_max_sectors) {
1811                 sector_t blocks;
1812                 bitmap_start_sync(bitmap, sector, &blocks, 0);
1813                 sector += blocks;
1814         }
1815         bitmap_close_sync(bitmap);
1816
1817         if (mddev->degraded == 0
1818             || bitmap->events_cleared == mddev->events)
1819                 /* no need to keep dirty bits to optimise a
1820                  * re-add of a missing device */
1821                 start = mddev->recovery_cp;
1822
1823         err = bitmap_init_from_disk(bitmap, start);
1824
1825         if (err)
1826                 goto out;
1827
1828         mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1829         md_wakeup_thread(mddev->thread);
1830
1831         bitmap_update_sb(bitmap);
1832
1833         if (bitmap->flags & BITMAP_WRITE_ERROR)
1834                 err = -EIO;
1835 out:
1836         return err;
1837 }
1838 EXPORT_SYMBOL_GPL(bitmap_load);
1839
1840 void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1841 {
1842         unsigned long chunk_kb;
1843         unsigned long flags;
1844
1845         if (!bitmap)
1846                 return;
1847
1848         spin_lock_irqsave(&bitmap->lock, flags);
1849         chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1850         seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1851                    "%lu%s chunk",
1852                    bitmap->pages - bitmap->missing_pages,
1853                    bitmap->pages,
1854                    (bitmap->pages - bitmap->missing_pages)
1855                    << (PAGE_SHIFT - 10),
1856                    chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1857                    chunk_kb ? "KB" : "B");
1858         if (bitmap->file) {
1859                 seq_printf(seq, ", file: ");
1860                 seq_path(seq, &bitmap->file->f_path, " \t\n");
1861         }
1862
1863         seq_printf(seq, "\n");
1864         spin_unlock_irqrestore(&bitmap->lock, flags);
1865 }
1866
1867 static ssize_t
1868 location_show(struct mddev *mddev, char *page)
1869 {
1870         ssize_t len;
1871         if (mddev->bitmap_info.file)
1872                 len = sprintf(page, "file");
1873         else if (mddev->bitmap_info.offset)
1874                 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
1875         else
1876                 len = sprintf(page, "none");
1877         len += sprintf(page+len, "\n");
1878         return len;
1879 }
1880
1881 static ssize_t
1882 location_store(struct mddev *mddev, const char *buf, size_t len)
1883 {
1884
1885         if (mddev->pers) {
1886                 if (!mddev->pers->quiesce)
1887                         return -EBUSY;
1888                 if (mddev->recovery || mddev->sync_thread)
1889                         return -EBUSY;
1890         }
1891
1892         if (mddev->bitmap || mddev->bitmap_info.file ||
1893             mddev->bitmap_info.offset) {
1894                 /* bitmap already configured.  Only option is to clear it */
1895                 if (strncmp(buf, "none", 4) != 0)
1896                         return -EBUSY;
1897                 if (mddev->pers) {
1898                         mddev->pers->quiesce(mddev, 1);
1899                         bitmap_destroy(mddev);
1900                         mddev->pers->quiesce(mddev, 0);
1901                 }
1902                 mddev->bitmap_info.offset = 0;
1903                 if (mddev->bitmap_info.file) {
1904                         struct file *f = mddev->bitmap_info.file;
1905                         mddev->bitmap_info.file = NULL;
1906                         restore_bitmap_write_access(f);
1907                         fput(f);
1908                 }
1909         } else {
1910                 /* No bitmap, OK to set a location */
1911                 long long offset;
1912                 if (strncmp(buf, "none", 4) == 0)
1913                         /* nothing to be done */;
1914                 else if (strncmp(buf, "file:", 5) == 0) {
1915                         /* Not supported yet */
1916                         return -EINVAL;
1917                 } else {
1918                         int rv;
1919                         if (buf[0] == '+')
1920                                 rv = strict_strtoll(buf+1, 10, &offset);
1921                         else
1922                                 rv = strict_strtoll(buf, 10, &offset);
1923                         if (rv)
1924                                 return rv;
1925                         if (offset == 0)
1926                                 return -EINVAL;
1927                         if (mddev->bitmap_info.external == 0 &&
1928                             mddev->major_version == 0 &&
1929                             offset != mddev->bitmap_info.default_offset)
1930                                 return -EINVAL;
1931                         mddev->bitmap_info.offset = offset;
1932                         if (mddev->pers) {
1933                                 mddev->pers->quiesce(mddev, 1);
1934                                 rv = bitmap_create(mddev);
1935                                 if (!rv)
1936                                         rv = bitmap_load(mddev);
1937                                 if (rv) {
1938                                         bitmap_destroy(mddev);
1939                                         mddev->bitmap_info.offset = 0;
1940                                 }
1941                                 mddev->pers->quiesce(mddev, 0);
1942                                 if (rv)
1943                                         return rv;
1944                         }
1945                 }
1946         }
1947         if (!mddev->external) {
1948                 /* Ensure new bitmap info is stored in
1949                  * metadata promptly.
1950                  */
1951                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1952                 md_wakeup_thread(mddev->thread);
1953         }
1954         return len;
1955 }
1956
1957 static struct md_sysfs_entry bitmap_location =
1958 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
1959
1960 static ssize_t
1961 timeout_show(struct mddev *mddev, char *page)
1962 {
1963         ssize_t len;
1964         unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
1965         unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
1966
1967         len = sprintf(page, "%lu", secs);
1968         if (jifs)
1969                 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
1970         len += sprintf(page+len, "\n");
1971         return len;
1972 }
1973
1974 static ssize_t
1975 timeout_store(struct mddev *mddev, const char *buf, size_t len)
1976 {
1977         /* timeout can be set at any time */
1978         unsigned long timeout;
1979         int rv = strict_strtoul_scaled(buf, &timeout, 4);
1980         if (rv)
1981                 return rv;
1982
1983         /* just to make sure we don't overflow... */
1984         if (timeout >= LONG_MAX / HZ)
1985                 return -EINVAL;
1986
1987         timeout = timeout * HZ / 10000;
1988
1989         if (timeout >= MAX_SCHEDULE_TIMEOUT)
1990                 timeout = MAX_SCHEDULE_TIMEOUT-1;
1991         if (timeout < 1)
1992                 timeout = 1;
1993         mddev->bitmap_info.daemon_sleep = timeout;
1994         if (mddev->thread) {
1995                 /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
1996                  * the bitmap is all clean and we don't need to
1997                  * adjust the timeout right now
1998                  */
1999                 if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2000                         mddev->thread->timeout = timeout;
2001                         md_wakeup_thread(mddev->thread);
2002                 }
2003         }
2004         return len;
2005 }
2006
2007 static struct md_sysfs_entry bitmap_timeout =
2008 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2009
2010 static ssize_t
2011 backlog_show(struct mddev *mddev, char *page)
2012 {
2013         return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2014 }
2015
2016 static ssize_t
2017 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2018 {
2019         unsigned long backlog;
2020         int rv = strict_strtoul(buf, 10, &backlog);
2021         if (rv)
2022                 return rv;
2023         if (backlog > COUNTER_MAX)
2024                 return -EINVAL;
2025         mddev->bitmap_info.max_write_behind = backlog;
2026         return len;
2027 }
2028
2029 static struct md_sysfs_entry bitmap_backlog =
2030 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2031
2032 static ssize_t
2033 chunksize_show(struct mddev *mddev, char *page)
2034 {
2035         return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2036 }
2037
2038 static ssize_t
2039 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2040 {
2041         /* Can only be changed when no bitmap is active */
2042         int rv;
2043         unsigned long csize;
2044         if (mddev->bitmap)
2045                 return -EBUSY;
2046         rv = strict_strtoul(buf, 10, &csize);
2047         if (rv)
2048                 return rv;
2049         if (csize < 512 ||
2050             !is_power_of_2(csize))
2051                 return -EINVAL;
2052         mddev->bitmap_info.chunksize = csize;
2053         return len;
2054 }
2055
2056 static struct md_sysfs_entry bitmap_chunksize =
2057 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2058
2059 static ssize_t metadata_show(struct mddev *mddev, char *page)
2060 {
2061         return sprintf(page, "%s\n", (mddev->bitmap_info.external
2062                                       ? "external" : "internal"));
2063 }
2064
2065 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2066 {
2067         if (mddev->bitmap ||
2068             mddev->bitmap_info.file ||
2069             mddev->bitmap_info.offset)
2070                 return -EBUSY;
2071         if (strncmp(buf, "external", 8) == 0)
2072                 mddev->bitmap_info.external = 1;
2073         else if (strncmp(buf, "internal", 8) == 0)
2074                 mddev->bitmap_info.external = 0;
2075         else
2076                 return -EINVAL;
2077         return len;
2078 }
2079
2080 static struct md_sysfs_entry bitmap_metadata =
2081 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2082
2083 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2084 {
2085         int len;
2086         if (mddev->bitmap)
2087                 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2088                                              "false" : "true"));
2089         else
2090                 len = sprintf(page, "\n");
2091         return len;
2092 }
2093
2094 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2095 {
2096         if (mddev->bitmap == NULL)
2097                 return -ENOENT;
2098         if (strncmp(buf, "false", 5) == 0)
2099                 mddev->bitmap->need_sync = 1;
2100         else if (strncmp(buf, "true", 4) == 0) {
2101                 if (mddev->degraded)
2102                         return -EBUSY;
2103                 mddev->bitmap->need_sync = 0;
2104         } else
2105                 return -EINVAL;
2106         return len;
2107 }
2108
2109 static struct md_sysfs_entry bitmap_can_clear =
2110 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2111
2112 static ssize_t
2113 behind_writes_used_show(struct mddev *mddev, char *page)
2114 {
2115         if (mddev->bitmap == NULL)
2116                 return sprintf(page, "0\n");
2117         return sprintf(page, "%lu\n",
2118                        mddev->bitmap->behind_writes_used);
2119 }
2120
2121 static ssize_t
2122 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2123 {
2124         if (mddev->bitmap)
2125                 mddev->bitmap->behind_writes_used = 0;
2126         return len;
2127 }
2128
2129 static struct md_sysfs_entry max_backlog_used =
2130 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2131        behind_writes_used_show, behind_writes_used_reset);
2132
2133 static struct attribute *md_bitmap_attrs[] = {
2134         &bitmap_location.attr,
2135         &bitmap_timeout.attr,
2136         &bitmap_backlog.attr,
2137         &bitmap_chunksize.attr,
2138         &bitmap_metadata.attr,
2139         &bitmap_can_clear.attr,
2140         &max_backlog_used.attr,
2141         NULL
2142 };
2143 struct attribute_group md_bitmap_group = {
2144         .name = "bitmap",
2145         .attrs = md_bitmap_attrs,
2146 };
2147