]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/transaction.c
Btrfs: Let some locks go during defrag and snapshot dropping
[mv-sheeva.git] / fs / btrfs / transaction.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/fs.h>
20 #include <linux/sched.h>
21 #include "ctree.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24
25 static int total_trans = 0;
26 extern struct kmem_cache *btrfs_trans_handle_cachep;
27 extern struct kmem_cache *btrfs_transaction_cachep;
28
29 static struct workqueue_struct *trans_wq;
30
31 #define BTRFS_ROOT_TRANS_TAG 0
32 #define BTRFS_ROOT_DEFRAG_TAG 1
33
34 static void put_transaction(struct btrfs_transaction *transaction)
35 {
36         WARN_ON(transaction->use_count == 0);
37         transaction->use_count--;
38         if (transaction->use_count == 0) {
39                 WARN_ON(total_trans == 0);
40                 total_trans--;
41                 list_del_init(&transaction->list);
42                 memset(transaction, 0, sizeof(*transaction));
43                 kmem_cache_free(btrfs_transaction_cachep, transaction);
44         }
45 }
46
47 static int join_transaction(struct btrfs_root *root)
48 {
49         struct btrfs_transaction *cur_trans;
50         cur_trans = root->fs_info->running_transaction;
51         if (!cur_trans) {
52                 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
53                                              GFP_NOFS);
54                 total_trans++;
55                 BUG_ON(!cur_trans);
56                 root->fs_info->generation++;
57                 root->fs_info->running_transaction = cur_trans;
58                 cur_trans->num_writers = 0;
59                 cur_trans->transid = root->fs_info->generation;
60                 init_waitqueue_head(&cur_trans->writer_wait);
61                 init_waitqueue_head(&cur_trans->commit_wait);
62                 cur_trans->in_commit = 0;
63                 cur_trans->use_count = 1;
64                 cur_trans->commit_done = 0;
65                 cur_trans->start_time = get_seconds();
66                 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
67                 init_bit_radix(&cur_trans->dirty_pages);
68         }
69         cur_trans->num_writers++;
70         return 0;
71 }
72
73 static int record_root_in_trans(struct btrfs_root *root)
74 {
75         u64 running_trans_id = root->fs_info->running_transaction->transid;
76         if (root->ref_cows && root->last_trans < running_trans_id) {
77                 WARN_ON(root == root->fs_info->extent_root);
78                 if (root->root_item.refs != 0) {
79                         radix_tree_tag_set(&root->fs_info->fs_roots_radix,
80                                    (unsigned long)root->root_key.objectid,
81                                    BTRFS_ROOT_TRANS_TAG);
82                         radix_tree_tag_set(&root->fs_info->fs_roots_radix,
83                                    (unsigned long)root->root_key.objectid,
84                                    BTRFS_ROOT_DEFRAG_TAG);
85                         root->commit_root = root->node;
86                         get_bh(root->node);
87                 } else {
88                         WARN_ON(1);
89                 }
90                 root->last_trans = running_trans_id;
91         }
92         return 0;
93 }
94
95 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
96                                                    int num_blocks)
97 {
98         struct btrfs_trans_handle *h =
99                 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
100         int ret;
101
102         mutex_lock(&root->fs_info->trans_mutex);
103         ret = join_transaction(root);
104         BUG_ON(ret);
105
106         record_root_in_trans(root);
107         h->transid = root->fs_info->running_transaction->transid;
108         h->transaction = root->fs_info->running_transaction;
109         h->blocks_reserved = num_blocks;
110         h->blocks_used = 0;
111         h->block_group = NULL;
112         root->fs_info->running_transaction->use_count++;
113         mutex_unlock(&root->fs_info->trans_mutex);
114         return h;
115 }
116
117 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
118                           struct btrfs_root *root)
119 {
120         struct btrfs_transaction *cur_trans;
121
122         mutex_lock(&root->fs_info->trans_mutex);
123         cur_trans = root->fs_info->running_transaction;
124         WARN_ON(cur_trans != trans->transaction);
125         WARN_ON(cur_trans->num_writers < 1);
126         cur_trans->num_writers--;
127         if (waitqueue_active(&cur_trans->writer_wait))
128                 wake_up(&cur_trans->writer_wait);
129         put_transaction(cur_trans);
130         mutex_unlock(&root->fs_info->trans_mutex);
131         memset(trans, 0, sizeof(*trans));
132         kmem_cache_free(btrfs_trans_handle_cachep, trans);
133         return 0;
134 }
135
136
137 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
138                                      struct btrfs_root *root)
139 {
140         unsigned long gang[16];
141         int ret;
142         int i;
143         int err;
144         int werr = 0;
145         struct page *page;
146         struct radix_tree_root *dirty_pages;
147         struct inode *btree_inode = root->fs_info->btree_inode;
148
149         if (!trans || !trans->transaction) {
150                 return filemap_write_and_wait(btree_inode->i_mapping);
151         }
152         dirty_pages = &trans->transaction->dirty_pages;
153         while(1) {
154                 ret = find_first_radix_bit(dirty_pages, gang,
155                                            0, ARRAY_SIZE(gang));
156                 if (!ret)
157                         break;
158                 for (i = 0; i < ret; i++) {
159                         /* FIXME EIO */
160                         clear_radix_bit(dirty_pages, gang[i]);
161                         page = find_lock_page(btree_inode->i_mapping,
162                                               gang[i]);
163                         if (!page)
164                                 continue;
165                         if (PageWriteback(page)) {
166                                 if (PageDirty(page))
167                                         wait_on_page_writeback(page);
168                                 else {
169                                         unlock_page(page);
170                                         page_cache_release(page);
171                                         continue;
172                                 }
173                         }
174                         err = write_one_page(page, 0);
175                         if (err)
176                                 werr = err;
177                         page_cache_release(page);
178                 }
179         }
180         err = filemap_fdatawait(btree_inode->i_mapping);
181         if (err)
182                 werr = err;
183         return werr;
184 }
185
186 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
187                             struct btrfs_root *root)
188 {
189         int ret;
190         u64 old_extent_block;
191         struct btrfs_fs_info *fs_info = root->fs_info;
192         struct btrfs_root *tree_root = fs_info->tree_root;
193         struct btrfs_root *extent_root = fs_info->extent_root;
194
195         btrfs_write_dirty_block_groups(trans, extent_root);
196         while(1) {
197                 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
198                 if (old_extent_block == bh_blocknr(extent_root->node))
199                         break;
200                 btrfs_set_root_blocknr(&extent_root->root_item,
201                                        bh_blocknr(extent_root->node));
202                 ret = btrfs_update_root(trans, tree_root,
203                                         &extent_root->root_key,
204                                         &extent_root->root_item);
205                 BUG_ON(ret);
206                 btrfs_write_dirty_block_groups(trans, extent_root);
207         }
208         return 0;
209 }
210
211 static int wait_for_commit(struct btrfs_root *root,
212                            struct btrfs_transaction *commit)
213 {
214         DEFINE_WAIT(wait);
215         mutex_lock(&root->fs_info->trans_mutex);
216         while(!commit->commit_done) {
217                 prepare_to_wait(&commit->commit_wait, &wait,
218                                 TASK_UNINTERRUPTIBLE);
219                 if (commit->commit_done)
220                         break;
221                 mutex_unlock(&root->fs_info->trans_mutex);
222                 schedule();
223                 mutex_lock(&root->fs_info->trans_mutex);
224         }
225         mutex_unlock(&root->fs_info->trans_mutex);
226         finish_wait(&commit->commit_wait, &wait);
227         return 0;
228 }
229
230 struct dirty_root {
231         struct list_head list;
232         struct btrfs_root *root;
233 };
234
235 int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list)
236 {
237         struct dirty_root *dirty;
238
239         dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
240         if (!dirty)
241                 return -ENOMEM;
242         dirty->root = root;
243         list_add(&dirty->list, dead_list);
244         return 0;
245 }
246
247 static int add_dirty_roots(struct btrfs_trans_handle *trans,
248                            struct radix_tree_root *radix,
249                            struct list_head *list)
250 {
251         struct dirty_root *dirty;
252         struct btrfs_root *gang[8];
253         struct btrfs_root *root;
254         int i;
255         int ret;
256         int err = 0;
257         u32 refs;
258
259         while(1) {
260                 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
261                                                  ARRAY_SIZE(gang),
262                                                  BTRFS_ROOT_TRANS_TAG);
263                 if (ret == 0)
264                         break;
265                 for (i = 0; i < ret; i++) {
266                         root = gang[i];
267                         radix_tree_tag_clear(radix,
268                                      (unsigned long)root->root_key.objectid,
269                                      BTRFS_ROOT_TRANS_TAG);
270                         if (root->commit_root == root->node) {
271                                 WARN_ON(bh_blocknr(root->node) !=
272                                         btrfs_root_blocknr(&root->root_item));
273                                 brelse(root->commit_root);
274                                 root->commit_root = NULL;
275                                 continue;
276                         }
277                         dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
278                         BUG_ON(!dirty);
279                         dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
280                         BUG_ON(!dirty->root);
281
282                         memset(&root->root_item.drop_progress, 0,
283                                sizeof(struct btrfs_disk_key));
284                         root->root_item.drop_level = 0;
285
286                         memcpy(dirty->root, root, sizeof(*root));
287                         dirty->root->node = root->commit_root;
288                         root->commit_root = NULL;
289
290                         root->root_key.offset = root->fs_info->generation;
291                         btrfs_set_root_blocknr(&root->root_item,
292                                                bh_blocknr(root->node));
293                         err = btrfs_insert_root(trans, root->fs_info->tree_root,
294                                                 &root->root_key,
295                                                 &root->root_item);
296                         if (err)
297                                 break;
298
299                         refs = btrfs_root_refs(&dirty->root->root_item);
300                         btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
301                         err = btrfs_update_root(trans, root->fs_info->tree_root,
302                                                 &dirty->root->root_key,
303                                                 &dirty->root->root_item);
304
305                         BUG_ON(err);
306                         if (refs == 1) {
307                                 list_add(&dirty->list, list);
308                         } else {
309                                 WARN_ON(1);
310                                 kfree(dirty->root);
311                                 kfree(dirty);
312                         }
313                 }
314         }
315         return err;
316 }
317
318 int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
319 {
320         struct btrfs_root *gang[1];
321         struct btrfs_root *root;
322         struct btrfs_root *tree_root = info->tree_root;
323         struct btrfs_trans_handle *trans;
324         int i;
325         int ret;
326         int err = 0;
327         u64 last = 0;
328
329         trans = btrfs_start_transaction(tree_root, 1);
330         while(1) {
331                 ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
332                                                  (void **)gang, last,
333                                                  ARRAY_SIZE(gang),
334                                                  BTRFS_ROOT_DEFRAG_TAG);
335                 if (ret == 0)
336                         break;
337                 for (i = 0; i < ret; i++) {
338                         root = gang[i];
339                         last = root->root_key.objectid + 1;
340                         radix_tree_tag_clear(&info->fs_roots_radix,
341                                      (unsigned long)root->root_key.objectid,
342                                      BTRFS_ROOT_DEFRAG_TAG);
343                         if (root->defrag_running)
344                                 continue;
345
346                         while (1) {
347                                 mutex_lock(&root->fs_info->trans_mutex);
348                                 record_root_in_trans(root);
349                                 mutex_unlock(&root->fs_info->trans_mutex);
350
351                                 root->defrag_running = 1;
352                                 err = btrfs_defrag_leaves(trans, root, 1);
353                                 btrfs_end_transaction(trans, tree_root);
354                                 mutex_unlock(&info->fs_mutex);
355
356                                 btrfs_btree_balance_dirty(root);
357                                 cond_resched();
358
359                                 mutex_lock(&info->fs_mutex);
360                                 trans = btrfs_start_transaction(tree_root, 1);
361                                 if (err != -EAGAIN)
362                                         break;
363                         }
364                         root->defrag_running = 0;
365                 }
366         }
367         btrfs_end_transaction(trans, tree_root);
368         return err;
369 }
370
371 static int drop_dirty_roots(struct btrfs_root *tree_root,
372                             struct list_head *list)
373 {
374         struct dirty_root *dirty;
375         struct btrfs_trans_handle *trans;
376         int ret = 0;
377         int err;
378
379         while(!list_empty(list)) {
380                 mutex_lock(&tree_root->fs_info->fs_mutex);
381                 dirty = list_entry(list->next, struct dirty_root, list);
382                 list_del_init(&dirty->list);
383
384                 while(1) {
385                         trans = btrfs_start_transaction(tree_root, 1);
386                         ret = btrfs_drop_snapshot(trans, dirty->root);
387                         if (ret != -EAGAIN) {
388                                 break;
389                         }
390                         err = btrfs_update_root(trans,
391                                         tree_root,
392                                         &dirty->root->root_key,
393                                         &dirty->root->root_item);
394                         if (err)
395                                 ret = err;
396                         ret = btrfs_end_transaction(trans, tree_root);
397                         BUG_ON(ret);
398                         mutex_unlock(&tree_root->fs_info->fs_mutex);
399
400                         btrfs_btree_balance_dirty(tree_root);
401                         schedule();
402
403                         mutex_lock(&tree_root->fs_info->fs_mutex);
404                 }
405                 BUG_ON(ret);
406                 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
407                 if (ret)
408                         break;
409                 ret = btrfs_end_transaction(trans, tree_root);
410                 BUG_ON(ret);
411
412                 kfree(dirty->root);
413                 kfree(dirty);
414                 mutex_unlock(&tree_root->fs_info->fs_mutex);
415                 btrfs_btree_balance_dirty(tree_root);
416                 schedule();
417         }
418         return ret;
419 }
420
421 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
422                              struct btrfs_root *root)
423 {
424         int ret = 0;
425         struct btrfs_transaction *cur_trans;
426         struct btrfs_transaction *prev_trans = NULL;
427         struct list_head dirty_fs_roots;
428         struct radix_tree_root pinned_copy;
429         DEFINE_WAIT(wait);
430
431         init_bit_radix(&pinned_copy);
432         INIT_LIST_HEAD(&dirty_fs_roots);
433
434         mutex_lock(&root->fs_info->trans_mutex);
435         if (trans->transaction->in_commit) {
436                 cur_trans = trans->transaction;
437                 trans->transaction->use_count++;
438                 mutex_unlock(&root->fs_info->trans_mutex);
439                 btrfs_end_transaction(trans, root);
440
441                 mutex_unlock(&root->fs_info->fs_mutex);
442                 ret = wait_for_commit(root, cur_trans);
443                 BUG_ON(ret);
444                 put_transaction(cur_trans);
445                 mutex_lock(&root->fs_info->fs_mutex);
446                 return 0;
447         }
448         trans->transaction->in_commit = 1;
449         cur_trans = trans->transaction;
450         if (cur_trans->list.prev != &root->fs_info->trans_list) {
451                 prev_trans = list_entry(cur_trans->list.prev,
452                                         struct btrfs_transaction, list);
453                 if (!prev_trans->commit_done) {
454                         prev_trans->use_count++;
455                         mutex_unlock(&root->fs_info->fs_mutex);
456                         mutex_unlock(&root->fs_info->trans_mutex);
457
458                         wait_for_commit(root, prev_trans);
459                         put_transaction(prev_trans);
460
461                         mutex_lock(&root->fs_info->fs_mutex);
462                         mutex_lock(&root->fs_info->trans_mutex);
463                 }
464         }
465         while (trans->transaction->num_writers > 1) {
466                 WARN_ON(cur_trans != trans->transaction);
467                 prepare_to_wait(&trans->transaction->writer_wait, &wait,
468                                 TASK_UNINTERRUPTIBLE);
469                 if (trans->transaction->num_writers <= 1)
470                         break;
471                 mutex_unlock(&root->fs_info->fs_mutex);
472                 mutex_unlock(&root->fs_info->trans_mutex);
473                 schedule();
474                 mutex_lock(&root->fs_info->fs_mutex);
475                 mutex_lock(&root->fs_info->trans_mutex);
476                 finish_wait(&trans->transaction->writer_wait, &wait);
477         }
478         finish_wait(&trans->transaction->writer_wait, &wait);
479         WARN_ON(cur_trans != trans->transaction);
480         ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
481                               &dirty_fs_roots);
482         BUG_ON(ret);
483
484         ret = btrfs_commit_tree_roots(trans, root);
485         BUG_ON(ret);
486
487         cur_trans = root->fs_info->running_transaction;
488         root->fs_info->running_transaction = NULL;
489         btrfs_set_super_generation(&root->fs_info->super_copy,
490                                    cur_trans->transid);
491         btrfs_set_super_root(&root->fs_info->super_copy,
492                              bh_blocknr(root->fs_info->tree_root->node));
493         memcpy(root->fs_info->disk_super, &root->fs_info->super_copy,
494                sizeof(root->fs_info->super_copy));
495
496         btrfs_copy_pinned(root, &pinned_copy);
497
498         mutex_unlock(&root->fs_info->trans_mutex);
499         mutex_unlock(&root->fs_info->fs_mutex);
500         ret = btrfs_write_and_wait_transaction(trans, root);
501         BUG_ON(ret);
502         write_ctree_super(trans, root);
503         mutex_lock(&root->fs_info->fs_mutex);
504         btrfs_finish_extent_commit(trans, root, &pinned_copy);
505         mutex_lock(&root->fs_info->trans_mutex);
506         cur_trans->commit_done = 1;
507         wake_up(&cur_trans->commit_wait);
508         put_transaction(cur_trans);
509         put_transaction(cur_trans);
510         if (root->fs_info->closing)
511                 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
512         else
513                 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
514         mutex_unlock(&root->fs_info->trans_mutex);
515         kmem_cache_free(btrfs_trans_handle_cachep, trans);
516
517         if (root->fs_info->closing) {
518                 mutex_unlock(&root->fs_info->fs_mutex);
519                 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
520                 mutex_lock(&root->fs_info->fs_mutex);
521         }
522         return ret;
523 }
524
525 void btrfs_transaction_cleaner(struct work_struct *work)
526 {
527         struct btrfs_fs_info *fs_info = container_of(work,
528                                                      struct btrfs_fs_info,
529                                                      trans_work.work);
530
531         struct btrfs_root *root = fs_info->tree_root;
532         struct btrfs_transaction *cur;
533         struct btrfs_trans_handle *trans;
534         struct list_head dirty_roots;
535         unsigned long now;
536         unsigned long delay = HZ * 30;
537         int ret;
538
539         INIT_LIST_HEAD(&dirty_roots);
540         mutex_lock(&root->fs_info->fs_mutex);
541         mutex_lock(&root->fs_info->trans_mutex);
542         cur = root->fs_info->running_transaction;
543         if (!cur) {
544                 mutex_unlock(&root->fs_info->trans_mutex);
545                 goto out;
546         }
547         now = get_seconds();
548         if (now < cur->start_time || now - cur->start_time < 30) {
549                 mutex_unlock(&root->fs_info->trans_mutex);
550                 delay = HZ * 5;
551                 goto out;
552         }
553         mutex_unlock(&root->fs_info->trans_mutex);
554         btrfs_defrag_dirty_roots(root->fs_info);
555         trans = btrfs_start_transaction(root, 1);
556         ret = btrfs_commit_transaction(trans, root);
557 out:
558         mutex_unlock(&root->fs_info->fs_mutex);
559
560         mutex_lock(&root->fs_info->trans_mutex);
561         list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
562         mutex_unlock(&root->fs_info->trans_mutex);
563
564         if (!list_empty(&dirty_roots)) {
565                 drop_dirty_roots(root, &dirty_roots);
566         }
567         btrfs_transaction_queue_work(root, delay);
568 }
569
570 void btrfs_transaction_queue_work(struct btrfs_root *root, int delay)
571 {
572         queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay);
573 }
574
575 void btrfs_transaction_flush_work(struct btrfs_root *root)
576 {
577         cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work);
578         flush_workqueue(trans_wq);
579 }
580
581 void __init btrfs_init_transaction_sys(void)
582 {
583         trans_wq = create_workqueue("btrfs");
584 }
585
586 void __exit btrfs_exit_transaction_sys(void)
587 {
588         destroy_workqueue(trans_wq);
589 }
590