]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/gfs2/log.c
Merge branch 'master'
[karo-tx-linux.git] / fs / gfs2 / log.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License v.2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <asm/semaphore.h>
17
18 #include "gfs2.h"
19 #include "lm_interface.h"
20 #include "incore.h"
21 #include "bmap.h"
22 #include "glock.h"
23 #include "log.h"
24 #include "lops.h"
25 #include "meta_io.h"
26 #include "util.h"
27
28 #define PULL 1
29
30 static void do_lock_wait(struct gfs2_sbd *sdp, wait_queue_head_t *wq,
31                          atomic_t *a)
32 {
33         wait_event(*wq, atomic_read(a) ? 0 : 1);
34 }
35
36 static void lock_for_trans(struct gfs2_sbd *sdp)
37 {
38         do_lock_wait(sdp, &sdp->sd_log_trans_wq, &sdp->sd_log_flush_count);
39         atomic_inc(&sdp->sd_log_trans_count);
40 }
41
42 static void unlock_from_trans(struct gfs2_sbd *sdp)
43 {
44         gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_trans_count));
45         if (atomic_dec_and_test(&sdp->sd_log_trans_count))
46                 wake_up(&sdp->sd_log_flush_wq);
47 }
48
49 static void gfs2_lock_for_flush(struct gfs2_sbd *sdp)
50 {
51         atomic_inc(&sdp->sd_log_flush_count);
52         do_lock_wait(sdp, &sdp->sd_log_flush_wq, &sdp->sd_log_trans_count);
53 }
54
55 static void gfs2_unlock_from_flush(struct gfs2_sbd *sdp)
56 {
57         gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_flush_count));
58         if (atomic_dec_and_test(&sdp->sd_log_flush_count))
59                 wake_up(&sdp->sd_log_trans_wq);
60 }
61
62 /**
63  * gfs2_struct2blk - compute stuff
64  * @sdp: the filesystem
65  * @nstruct: the number of structures
66  * @ssize: the size of the structures
67  *
68  * Compute the number of log descriptor blocks needed to hold a certain number
69  * of structures of a certain size.
70  *
71  * Returns: the number of blocks needed (minimum is always 1)
72  */
73
74 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
75                              unsigned int ssize)
76 {
77         unsigned int blks;
78         unsigned int first, second;
79
80         blks = 1;
81         first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) /
82                 ssize;
83
84         if (nstruct > first) {
85                 second = (sdp->sd_sb.sb_bsize -
86                           sizeof(struct gfs2_meta_header)) / ssize;
87                 blks += DIV_ROUND_UP(nstruct - first, second);
88         }
89
90         return blks;
91 }
92
93 void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
94 {
95         struct list_head *head = &sdp->sd_ail1_list;
96         uint64_t sync_gen;
97         struct list_head *first, *tmp;
98         struct gfs2_ail *first_ai, *ai;
99
100         gfs2_log_lock(sdp);
101         if (list_empty(head)) {
102                 gfs2_log_unlock(sdp);
103                 return;
104         }
105         sync_gen = sdp->sd_ail_sync_gen++;
106
107         first = head->prev;
108         first_ai = list_entry(first, struct gfs2_ail, ai_list);
109         first_ai->ai_sync_gen = sync_gen;
110         gfs2_ail1_start_one(sdp, first_ai);
111
112         if (flags & DIO_ALL)
113                 first = NULL;
114
115         for (;;) {
116                 if (first &&
117                     (head->prev != first ||
118                      gfs2_ail1_empty_one(sdp, first_ai, 0)))
119                         break;
120
121                 for (tmp = head->prev; tmp != head; tmp = tmp->prev) {
122                         ai = list_entry(tmp, struct gfs2_ail, ai_list);
123                         if (ai->ai_sync_gen >= sync_gen)
124                                 continue;
125                         ai->ai_sync_gen = sync_gen;
126                         gfs2_ail1_start_one(sdp, ai);
127                         break;
128                 }
129
130                 if (tmp == head)
131                         break;
132         }
133
134         gfs2_log_unlock(sdp);
135 }
136
137 int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
138 {
139         struct gfs2_ail *ai, *s;
140         int ret;
141
142         gfs2_log_lock(sdp);
143
144         list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
145                 if (gfs2_ail1_empty_one(sdp, ai, flags))
146                         list_move(&ai->ai_list, &sdp->sd_ail2_list);
147                 else if (!(flags & DIO_ALL))
148                         break;
149         }
150
151         ret = list_empty(&sdp->sd_ail1_list);
152
153         gfs2_log_unlock(sdp);
154
155         return ret;
156 }
157
158 static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
159 {
160         struct gfs2_ail *ai, *safe;
161         unsigned int old_tail = sdp->sd_log_tail;
162         int wrap = (new_tail < old_tail);
163         int a, b, rm;
164
165         gfs2_log_lock(sdp);
166
167         list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
168                 a = (old_tail <= ai->ai_first);
169                 b = (ai->ai_first < new_tail);
170                 rm = (wrap) ? (a || b) : (a && b);
171                 if (!rm)
172                         continue;
173
174                 gfs2_ail2_empty_one(sdp, ai);
175                 list_del(&ai->ai_list);
176                 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
177                 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
178                 kfree(ai);
179         }
180
181         gfs2_log_unlock(sdp);
182 }
183
184 /**
185  * gfs2_log_reserve - Make a log reservation
186  * @sdp: The GFS2 superblock
187  * @blks: The number of blocks to reserve
188  *
189  * Returns: errno
190  */
191
192 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
193 {
194         LIST_HEAD(list);
195         unsigned int try = 0;
196
197         if (gfs2_assert_warn(sdp, blks) ||
198             gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
199                 return -EINVAL;
200
201         for (;;) {
202                 gfs2_log_lock(sdp);
203                 if (list_empty(&list)) {
204                         list_add_tail(&list, &sdp->sd_log_blks_list);
205                         while (sdp->sd_log_blks_list.next != &list) {
206                                 DECLARE_WAITQUEUE(__wait_chan, current);
207                                 set_current_state(TASK_UNINTERRUPTIBLE);
208                                 add_wait_queue(&sdp->sd_log_blks_wait,
209                                                &__wait_chan);
210                                 gfs2_log_unlock(sdp);
211                                 schedule();
212                                 gfs2_log_lock(sdp);
213                                 remove_wait_queue(&sdp->sd_log_blks_wait,
214                                                   &__wait_chan);
215                                 set_current_state(TASK_RUNNING);
216                         }
217                 }
218                 /* Never give away the last block so we can
219                    always pull the tail if we need to. */
220                 if (sdp->sd_log_blks_free > blks) {
221                         sdp->sd_log_blks_free -= blks;
222                         list_del(&list);
223                         gfs2_log_unlock(sdp);
224                         wake_up(&sdp->sd_log_blks_wait);
225                         break;
226                 }
227
228                 gfs2_log_unlock(sdp);
229                 gfs2_ail1_empty(sdp, 0);
230                 gfs2_log_flush(sdp);
231
232                 if (try++)
233                         gfs2_ail1_start(sdp, 0);
234         }
235         lock_for_trans(sdp);
236
237         return 0;
238 }
239
240 /**
241  * gfs2_log_release - Release a given number of log blocks
242  * @sdp: The GFS2 superblock
243  * @blks: The number of blocks
244  *
245  */
246
247 void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
248 {
249         unlock_from_trans(sdp);
250
251         gfs2_log_lock(sdp);
252         sdp->sd_log_blks_free += blks;
253         gfs2_assert_withdraw(sdp,
254                              sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
255         gfs2_log_unlock(sdp);
256 }
257
258 static uint64_t log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
259 {
260         int new = 0;
261         uint64_t dbn;
262         int error;
263
264         error = gfs2_block_map(sdp->sd_jdesc->jd_inode->u.generic_ip,
265                                lbn, &new, &dbn, NULL);
266         gfs2_assert_withdraw(sdp, !error && dbn);
267
268         return dbn;
269 }
270
271 /**
272  * log_distance - Compute distance between two journal blocks
273  * @sdp: The GFS2 superblock
274  * @newer: The most recent journal block of the pair
275  * @older: The older journal block of the pair
276  *
277  *   Compute the distance (in the journal direction) between two
278  *   blocks in the journal
279  *
280  * Returns: the distance in blocks
281  */
282
283 static inline unsigned int log_distance(struct gfs2_sbd *sdp,
284                                         unsigned int newer,
285                                         unsigned int older)
286 {
287         int dist;
288
289         dist = newer - older;
290         if (dist < 0)
291                 dist += sdp->sd_jdesc->jd_blocks;
292
293         return dist;
294 }
295
296 static unsigned int current_tail(struct gfs2_sbd *sdp)
297 {
298         struct gfs2_ail *ai;
299         unsigned int tail;
300
301         gfs2_log_lock(sdp);
302
303         if (list_empty(&sdp->sd_ail1_list))
304                 tail = sdp->sd_log_head;
305         else {
306                 ai = list_entry(sdp->sd_ail1_list.prev,
307                                 struct gfs2_ail, ai_list);
308                 tail = ai->ai_first;
309         }
310
311         gfs2_log_unlock(sdp);
312
313         return tail;
314 }
315
316 static inline void log_incr_head(struct gfs2_sbd *sdp)
317 {
318         if (sdp->sd_log_flush_head == sdp->sd_log_tail)
319                 gfs2_assert_withdraw(sdp,
320                                 sdp->sd_log_flush_head == sdp->sd_log_head);
321
322         if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
323                 sdp->sd_log_flush_head = 0;
324                 sdp->sd_log_flush_wrapped = 1;
325         }
326 }
327
328 /**
329  * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
330  * @sdp: The GFS2 superblock
331  *
332  * Returns: the buffer_head
333  */
334
335 struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
336 {
337         uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
338         struct gfs2_log_buf *lb;
339         struct buffer_head *bh;
340
341         lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
342         list_add(&lb->lb_list, &sdp->sd_log_flush_list);
343
344         bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
345         lock_buffer(bh);
346         memset(bh->b_data, 0, bh->b_size);
347         set_buffer_uptodate(bh);
348         clear_buffer_dirty(bh);
349         unlock_buffer(bh);
350
351         log_incr_head(sdp);
352
353         return bh;
354 }
355
356 /**
357  * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
358  * @sdp: the filesystem
359  * @data: the data the buffer_head should point to
360  *
361  * Returns: the log buffer descriptor
362  */
363
364 struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
365                                       struct buffer_head *real)
366 {
367         uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
368         struct gfs2_log_buf *lb;
369         struct buffer_head *bh;
370
371         lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
372         list_add(&lb->lb_list, &sdp->sd_log_flush_list);
373         lb->lb_real = real;
374
375         bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
376         atomic_set(&bh->b_count, 1);
377         bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
378         set_bh_page(bh, real->b_page, bh_offset(real));
379         bh->b_blocknr = blkno;
380         bh->b_size = sdp->sd_sb.sb_bsize;
381         bh->b_bdev = sdp->sd_vfs->s_bdev;
382
383         log_incr_head(sdp);
384
385         return bh;
386 }
387
388 static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
389 {
390         unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
391
392         ail2_empty(sdp, new_tail);
393
394         gfs2_log_lock(sdp);
395         sdp->sd_log_blks_free += dist - ((pull) ? 1 : 0);
396         gfs2_assert_withdraw(sdp,
397                              sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
398         gfs2_log_unlock(sdp);
399
400         sdp->sd_log_tail = new_tail;
401 }
402
403 /**
404  * log_write_header - Get and initialize a journal header buffer
405  * @sdp: The GFS2 superblock
406  *
407  * Returns: the initialized log buffer descriptor
408  */
409
410 static void log_write_header(struct gfs2_sbd *sdp, uint32_t flags, int pull)
411 {
412         uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
413         struct buffer_head *bh;
414         struct gfs2_log_header *lh;
415         unsigned int tail;
416         uint32_t hash;
417
418         bh = sb_getblk(sdp->sd_vfs, blkno);
419         lock_buffer(bh);
420         memset(bh->b_data, 0, bh->b_size);
421         set_buffer_uptodate(bh);
422         clear_buffer_dirty(bh);
423         unlock_buffer(bh);
424
425         gfs2_ail1_empty(sdp, 0);
426         tail = current_tail(sdp);
427
428         lh = (struct gfs2_log_header *)bh->b_data;
429         memset(lh, 0, sizeof(struct gfs2_log_header));
430         lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
431         lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
432         lh->lh_header.mh_format = cpu_to_be16(GFS2_FORMAT_LH);
433         lh->lh_sequence = be64_to_cpu(sdp->sd_log_sequence++);
434         lh->lh_flags = be32_to_cpu(flags);
435         lh->lh_tail = be32_to_cpu(tail);
436         lh->lh_blkno = be32_to_cpu(sdp->sd_log_flush_head);
437         hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
438         lh->lh_hash = cpu_to_be32(hash);
439
440         set_buffer_dirty(bh);
441         if (sync_dirty_buffer(bh))
442                 gfs2_io_error_bh(sdp, bh);
443         brelse(bh);
444
445         if (sdp->sd_log_tail != tail)
446                 log_pull_tail(sdp, tail, pull);
447         else
448                 gfs2_assert_withdraw(sdp, !pull);
449
450         sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
451         log_incr_head(sdp);
452 }
453
454 static void log_flush_commit(struct gfs2_sbd *sdp)
455 {
456         struct list_head *head = &sdp->sd_log_flush_list;
457         struct gfs2_log_buf *lb;
458         struct buffer_head *bh;
459         unsigned int d;
460
461         d = log_distance(sdp, sdp->sd_log_flush_head, sdp->sd_log_head);
462
463         gfs2_assert_withdraw(sdp, d + 1 == sdp->sd_log_blks_reserved);
464
465         while (!list_empty(head)) {
466                 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
467                 list_del(&lb->lb_list);
468                 bh = lb->lb_bh;
469
470                 wait_on_buffer(bh);
471                 if (!buffer_uptodate(bh))
472                         gfs2_io_error_bh(sdp, bh);
473                 if (lb->lb_real) {
474                         while (atomic_read(&bh->b_count) != 1)  /* Grrrr... */
475                                 schedule();
476                         free_buffer_head(bh);
477                 } else
478                         brelse(bh);
479                 kfree(lb);
480         }
481
482         log_write_header(sdp, 0, 0);
483 }
484
485 /**
486  * gfs2_log_flush_i - flush incore transaction(s)
487  * @sdp: the filesystem
488  * @gl: The glock structure to flush.  If NULL, flush the whole incore log
489  *
490  */
491
492 void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
493 {
494         struct gfs2_ail *ai;
495
496         ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
497         INIT_LIST_HEAD(&ai->ai_ail1_list);
498         INIT_LIST_HEAD(&ai->ai_ail2_list);
499         gfs2_lock_for_flush(sdp);
500
501         if (gl) {
502                 gfs2_log_lock(sdp);
503                 if (list_empty(&gl->gl_le.le_list)) {
504                         gfs2_log_unlock(sdp);
505                         gfs2_unlock_from_flush(sdp);
506                         kfree(ai);
507                         return;
508                 }
509                 gfs2_log_unlock(sdp);
510         }
511
512         mutex_lock(&sdp->sd_log_flush_lock);
513
514         gfs2_assert_withdraw(sdp,
515                         sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
516         gfs2_assert_withdraw(sdp,
517                         sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
518
519         sdp->sd_log_flush_head = sdp->sd_log_head;
520         sdp->sd_log_flush_wrapped = 0;
521         ai->ai_first = sdp->sd_log_flush_head;
522
523         lops_before_commit(sdp);
524         if (!list_empty(&sdp->sd_log_flush_list))
525                 log_flush_commit(sdp);
526         else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
527                 log_write_header(sdp, 0, PULL);
528         lops_after_commit(sdp, ai);
529         sdp->sd_log_head = sdp->sd_log_flush_head;
530         if (sdp->sd_log_flush_wrapped)
531                 sdp->sd_log_wraps++;
532
533         sdp->sd_log_blks_reserved =
534                 sdp->sd_log_commited_buf =
535                 sdp->sd_log_commited_revoke = 0;
536
537         gfs2_log_lock(sdp);
538         if (!list_empty(&ai->ai_ail1_list)) {
539                 list_add(&ai->ai_list, &sdp->sd_ail1_list);
540                 ai = NULL;
541         }
542         gfs2_log_unlock(sdp);
543
544         mutex_unlock(&sdp->sd_log_flush_lock);
545         sdp->sd_vfs->s_dirt = 0;
546         gfs2_unlock_from_flush(sdp);
547
548         kfree(ai);
549 }
550
551 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
552 {
553         unsigned int reserved = 1;
554         unsigned int old;
555
556         gfs2_log_lock(sdp);
557
558         sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
559         gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
560         sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
561         gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
562
563         if (sdp->sd_log_commited_buf)
564                 reserved += 1 + sdp->sd_log_commited_buf +
565                             sdp->sd_log_commited_buf/503;
566         if (sdp->sd_log_commited_revoke)
567                 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
568                                             sizeof(uint64_t));
569
570         old = sdp->sd_log_blks_free;
571         sdp->sd_log_blks_free += tr->tr_reserved -
572                                  (reserved - sdp->sd_log_blks_reserved);
573
574         gfs2_assert_withdraw(sdp,
575                              sdp->sd_log_blks_free >= old);
576         gfs2_assert_withdraw(sdp,
577                              sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
578
579         sdp->sd_log_blks_reserved = reserved;
580
581         gfs2_log_unlock(sdp);
582 }
583
584 /**
585  * gfs2_log_commit - Commit a transaction to the log
586  * @sdp: the filesystem
587  * @tr: the transaction
588  *
589  * Returns: errno
590  */
591
592 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
593 {
594         log_refund(sdp, tr);
595         lops_incore_commit(sdp, tr);
596
597         sdp->sd_vfs->s_dirt = 1;
598         unlock_from_trans(sdp);
599
600         gfs2_log_lock(sdp);
601         if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
602                 gfs2_log_unlock(sdp);
603                 gfs2_log_flush(sdp);
604         } else
605                 gfs2_log_unlock(sdp);
606 }
607
608 /**
609  * gfs2_log_shutdown - write a shutdown header into a journal
610  * @sdp: the filesystem
611  *
612  */
613
614 void gfs2_log_shutdown(struct gfs2_sbd *sdp)
615 {
616         mutex_lock(&sdp->sd_log_flush_lock);
617
618         gfs2_assert_withdraw(sdp, !atomic_read(&sdp->sd_log_trans_count));
619         gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
620         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
621         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
622         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
623         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
624         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
625         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
626         gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
627
628         sdp->sd_log_flush_head = sdp->sd_log_head;
629         sdp->sd_log_flush_wrapped = 0;
630
631         log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
632
633         gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free ==
634                              sdp->sd_jdesc->jd_blocks);
635         gfs2_assert_withdraw(sdp, sdp->sd_log_head == sdp->sd_log_tail);
636         gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail2_list));
637
638         sdp->sd_log_head = sdp->sd_log_flush_head;
639         if (sdp->sd_log_flush_wrapped)
640                 sdp->sd_log_wraps++;
641         sdp->sd_log_tail = sdp->sd_log_head;
642
643         mutex_unlock(&sdp->sd_log_flush_lock);
644 }
645