]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/qgroup.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rkuo/linux...
[karo-tx-linux.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35
36 /* TODO XXX FIXME
37  *  - subvol delete -> delete when ref goes to 0? delete limits also?
38  *  - reorganize keys
39  *  - compressed
40  *  - sync
41  *  - copy also limits on subvol creation
42  *  - limit
43  *  - caches fuer ulists
44  *  - performance benchmarks
45  *  - check all ioctl parameters
46  */
47
48 /*
49  * one struct for each qgroup, organized in fs_info->qgroup_tree.
50  */
51 struct btrfs_qgroup {
52         u64 qgroupid;
53
54         /*
55          * state
56          */
57         u64 rfer;       /* referenced */
58         u64 rfer_cmpr;  /* referenced compressed */
59         u64 excl;       /* exclusive */
60         u64 excl_cmpr;  /* exclusive compressed */
61
62         /*
63          * limits
64          */
65         u64 lim_flags;  /* which limits are set */
66         u64 max_rfer;
67         u64 max_excl;
68         u64 rsv_rfer;
69         u64 rsv_excl;
70
71         /*
72          * reservation tracking
73          */
74         u64 reserved;
75
76         /*
77          * lists
78          */
79         struct list_head groups;  /* groups this group is member of */
80         struct list_head members; /* groups that are members of this group */
81         struct list_head dirty;   /* dirty groups */
82         struct rb_node node;      /* tree of qgroups */
83
84         /*
85          * temp variables for accounting operations
86          */
87         u64 tag;
88         u64 refcnt;
89 };
90
91 /*
92  * glue structure to represent the relations between qgroups.
93  */
94 struct btrfs_qgroup_list {
95         struct list_head next_group;
96         struct list_head next_member;
97         struct btrfs_qgroup *group;
98         struct btrfs_qgroup *member;
99 };
100
101 static int
102 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103                    int init_flags);
104 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
105
106 /* must be called with qgroup_ioctl_lock held */
107 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
108                                            u64 qgroupid)
109 {
110         struct rb_node *n = fs_info->qgroup_tree.rb_node;
111         struct btrfs_qgroup *qgroup;
112
113         while (n) {
114                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
115                 if (qgroup->qgroupid < qgroupid)
116                         n = n->rb_left;
117                 else if (qgroup->qgroupid > qgroupid)
118                         n = n->rb_right;
119                 else
120                         return qgroup;
121         }
122         return NULL;
123 }
124
125 /* must be called with qgroup_lock held */
126 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
127                                           u64 qgroupid)
128 {
129         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
130         struct rb_node *parent = NULL;
131         struct btrfs_qgroup *qgroup;
132
133         while (*p) {
134                 parent = *p;
135                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
136
137                 if (qgroup->qgroupid < qgroupid)
138                         p = &(*p)->rb_left;
139                 else if (qgroup->qgroupid > qgroupid)
140                         p = &(*p)->rb_right;
141                 else
142                         return qgroup;
143         }
144
145         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
146         if (!qgroup)
147                 return ERR_PTR(-ENOMEM);
148
149         qgroup->qgroupid = qgroupid;
150         INIT_LIST_HEAD(&qgroup->groups);
151         INIT_LIST_HEAD(&qgroup->members);
152         INIT_LIST_HEAD(&qgroup->dirty);
153
154         rb_link_node(&qgroup->node, parent, p);
155         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
156
157         return qgroup;
158 }
159
160 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
161 {
162         struct btrfs_qgroup_list *list;
163
164         list_del(&qgroup->dirty);
165         while (!list_empty(&qgroup->groups)) {
166                 list = list_first_entry(&qgroup->groups,
167                                         struct btrfs_qgroup_list, next_group);
168                 list_del(&list->next_group);
169                 list_del(&list->next_member);
170                 kfree(list);
171         }
172
173         while (!list_empty(&qgroup->members)) {
174                 list = list_first_entry(&qgroup->members,
175                                         struct btrfs_qgroup_list, next_member);
176                 list_del(&list->next_group);
177                 list_del(&list->next_member);
178                 kfree(list);
179         }
180         kfree(qgroup);
181 }
182
183 /* must be called with qgroup_lock held */
184 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185 {
186         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
187
188         if (!qgroup)
189                 return -ENOENT;
190
191         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
192         __del_qgroup_rb(qgroup);
193         return 0;
194 }
195
196 /* must be called with qgroup_lock held */
197 static int add_relation_rb(struct btrfs_fs_info *fs_info,
198                            u64 memberid, u64 parentid)
199 {
200         struct btrfs_qgroup *member;
201         struct btrfs_qgroup *parent;
202         struct btrfs_qgroup_list *list;
203
204         member = find_qgroup_rb(fs_info, memberid);
205         parent = find_qgroup_rb(fs_info, parentid);
206         if (!member || !parent)
207                 return -ENOENT;
208
209         list = kzalloc(sizeof(*list), GFP_ATOMIC);
210         if (!list)
211                 return -ENOMEM;
212
213         list->group = parent;
214         list->member = member;
215         list_add_tail(&list->next_group, &member->groups);
216         list_add_tail(&list->next_member, &parent->members);
217
218         return 0;
219 }
220
221 /* must be called with qgroup_lock held */
222 static int del_relation_rb(struct btrfs_fs_info *fs_info,
223                            u64 memberid, u64 parentid)
224 {
225         struct btrfs_qgroup *member;
226         struct btrfs_qgroup *parent;
227         struct btrfs_qgroup_list *list;
228
229         member = find_qgroup_rb(fs_info, memberid);
230         parent = find_qgroup_rb(fs_info, parentid);
231         if (!member || !parent)
232                 return -ENOENT;
233
234         list_for_each_entry(list, &member->groups, next_group) {
235                 if (list->group == parent) {
236                         list_del(&list->next_group);
237                         list_del(&list->next_member);
238                         kfree(list);
239                         return 0;
240                 }
241         }
242         return -ENOENT;
243 }
244
245 /*
246  * The full config is read in one go, only called from open_ctree()
247  * It doesn't use any locking, as at this point we're still single-threaded
248  */
249 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
250 {
251         struct btrfs_key key;
252         struct btrfs_key found_key;
253         struct btrfs_root *quota_root = fs_info->quota_root;
254         struct btrfs_path *path = NULL;
255         struct extent_buffer *l;
256         int slot;
257         int ret = 0;
258         u64 flags = 0;
259         u64 rescan_progress = 0;
260
261         if (!fs_info->quota_enabled)
262                 return 0;
263
264         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
265         if (!fs_info->qgroup_ulist) {
266                 ret = -ENOMEM;
267                 goto out;
268         }
269
270         path = btrfs_alloc_path();
271         if (!path) {
272                 ret = -ENOMEM;
273                 goto out;
274         }
275
276         /* default this to quota off, in case no status key is found */
277         fs_info->qgroup_flags = 0;
278
279         /*
280          * pass 1: read status, all qgroup infos and limits
281          */
282         key.objectid = 0;
283         key.type = 0;
284         key.offset = 0;
285         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
286         if (ret)
287                 goto out;
288
289         while (1) {
290                 struct btrfs_qgroup *qgroup;
291
292                 slot = path->slots[0];
293                 l = path->nodes[0];
294                 btrfs_item_key_to_cpu(l, &found_key, slot);
295
296                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
297                         struct btrfs_qgroup_status_item *ptr;
298
299                         ptr = btrfs_item_ptr(l, slot,
300                                              struct btrfs_qgroup_status_item);
301
302                         if (btrfs_qgroup_status_version(l, ptr) !=
303                             BTRFS_QGROUP_STATUS_VERSION) {
304                                 btrfs_err(fs_info,
305                                  "old qgroup version, quota disabled");
306                                 goto out;
307                         }
308                         if (btrfs_qgroup_status_generation(l, ptr) !=
309                             fs_info->generation) {
310                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
311                                 btrfs_err(fs_info,
312                                         "qgroup generation mismatch, "
313                                         "marked as inconsistent");
314                         }
315                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
316                                                                           ptr);
317                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
318                         goto next1;
319                 }
320
321                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
322                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
323                         goto next1;
324
325                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
326                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
327                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
328                         btrfs_err(fs_info, "inconsitent qgroup config");
329                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330                 }
331                 if (!qgroup) {
332                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
333                         if (IS_ERR(qgroup)) {
334                                 ret = PTR_ERR(qgroup);
335                                 goto out;
336                         }
337                 }
338                 switch (found_key.type) {
339                 case BTRFS_QGROUP_INFO_KEY: {
340                         struct btrfs_qgroup_info_item *ptr;
341
342                         ptr = btrfs_item_ptr(l, slot,
343                                              struct btrfs_qgroup_info_item);
344                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
345                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
346                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
347                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
348                         /* generation currently unused */
349                         break;
350                 }
351                 case BTRFS_QGROUP_LIMIT_KEY: {
352                         struct btrfs_qgroup_limit_item *ptr;
353
354                         ptr = btrfs_item_ptr(l, slot,
355                                              struct btrfs_qgroup_limit_item);
356                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
357                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
358                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
359                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
360                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
361                         break;
362                 }
363                 }
364 next1:
365                 ret = btrfs_next_item(quota_root, path);
366                 if (ret < 0)
367                         goto out;
368                 if (ret)
369                         break;
370         }
371         btrfs_release_path(path);
372
373         /*
374          * pass 2: read all qgroup relations
375          */
376         key.objectid = 0;
377         key.type = BTRFS_QGROUP_RELATION_KEY;
378         key.offset = 0;
379         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
380         if (ret)
381                 goto out;
382         while (1) {
383                 slot = path->slots[0];
384                 l = path->nodes[0];
385                 btrfs_item_key_to_cpu(l, &found_key, slot);
386
387                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
388                         goto next2;
389
390                 if (found_key.objectid > found_key.offset) {
391                         /* parent <- member, not needed to build config */
392                         /* FIXME should we omit the key completely? */
393                         goto next2;
394                 }
395
396                 ret = add_relation_rb(fs_info, found_key.objectid,
397                                       found_key.offset);
398                 if (ret == -ENOENT) {
399                         btrfs_warn(fs_info,
400                                 "orphan qgroup relation 0x%llx->0x%llx",
401                                 found_key.objectid, found_key.offset);
402                         ret = 0;        /* ignore the error */
403                 }
404                 if (ret)
405                         goto out;
406 next2:
407                 ret = btrfs_next_item(quota_root, path);
408                 if (ret < 0)
409                         goto out;
410                 if (ret)
411                         break;
412         }
413 out:
414         fs_info->qgroup_flags |= flags;
415         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
416                 fs_info->quota_enabled = 0;
417                 fs_info->pending_quota_state = 0;
418         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
419                    ret >= 0) {
420                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
421         }
422         btrfs_free_path(path);
423
424         if (ret < 0) {
425                 ulist_free(fs_info->qgroup_ulist);
426                 fs_info->qgroup_ulist = NULL;
427                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
428         }
429
430         return ret < 0 ? ret : 0;
431 }
432
433 /*
434  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
435  * first two are in single-threaded paths.And for the third one, we have set
436  * quota_root to be null with qgroup_lock held before, so it is safe to clean
437  * up the in-memory structures without qgroup_lock held.
438  */
439 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
440 {
441         struct rb_node *n;
442         struct btrfs_qgroup *qgroup;
443
444         while ((n = rb_first(&fs_info->qgroup_tree))) {
445                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
446                 rb_erase(n, &fs_info->qgroup_tree);
447                 __del_qgroup_rb(qgroup);
448         }
449         /*
450          * we call btrfs_free_qgroup_config() when umounting
451          * filesystem and disabling quota, so we set qgroup_ulit
452          * to be null here to avoid double free.
453          */
454         ulist_free(fs_info->qgroup_ulist);
455         fs_info->qgroup_ulist = NULL;
456 }
457
458 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
459                                     struct btrfs_root *quota_root,
460                                     u64 src, u64 dst)
461 {
462         int ret;
463         struct btrfs_path *path;
464         struct btrfs_key key;
465
466         path = btrfs_alloc_path();
467         if (!path)
468                 return -ENOMEM;
469
470         key.objectid = src;
471         key.type = BTRFS_QGROUP_RELATION_KEY;
472         key.offset = dst;
473
474         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
475
476         btrfs_mark_buffer_dirty(path->nodes[0]);
477
478         btrfs_free_path(path);
479         return ret;
480 }
481
482 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
483                                     struct btrfs_root *quota_root,
484                                     u64 src, u64 dst)
485 {
486         int ret;
487         struct btrfs_path *path;
488         struct btrfs_key key;
489
490         path = btrfs_alloc_path();
491         if (!path)
492                 return -ENOMEM;
493
494         key.objectid = src;
495         key.type = BTRFS_QGROUP_RELATION_KEY;
496         key.offset = dst;
497
498         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
499         if (ret < 0)
500                 goto out;
501
502         if (ret > 0) {
503                 ret = -ENOENT;
504                 goto out;
505         }
506
507         ret = btrfs_del_item(trans, quota_root, path);
508 out:
509         btrfs_free_path(path);
510         return ret;
511 }
512
513 static int add_qgroup_item(struct btrfs_trans_handle *trans,
514                            struct btrfs_root *quota_root, u64 qgroupid)
515 {
516         int ret;
517         struct btrfs_path *path;
518         struct btrfs_qgroup_info_item *qgroup_info;
519         struct btrfs_qgroup_limit_item *qgroup_limit;
520         struct extent_buffer *leaf;
521         struct btrfs_key key;
522
523         path = btrfs_alloc_path();
524         if (!path)
525                 return -ENOMEM;
526
527         key.objectid = 0;
528         key.type = BTRFS_QGROUP_INFO_KEY;
529         key.offset = qgroupid;
530
531         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
532                                       sizeof(*qgroup_info));
533         if (ret)
534                 goto out;
535
536         leaf = path->nodes[0];
537         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
538                                  struct btrfs_qgroup_info_item);
539         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
540         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
541         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
542         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
543         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
544
545         btrfs_mark_buffer_dirty(leaf);
546
547         btrfs_release_path(path);
548
549         key.type = BTRFS_QGROUP_LIMIT_KEY;
550         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
551                                       sizeof(*qgroup_limit));
552         if (ret)
553                 goto out;
554
555         leaf = path->nodes[0];
556         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
557                                   struct btrfs_qgroup_limit_item);
558         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
559         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
560         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
561         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
562         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
563
564         btrfs_mark_buffer_dirty(leaf);
565
566         ret = 0;
567 out:
568         btrfs_free_path(path);
569         return ret;
570 }
571
572 static int del_qgroup_item(struct btrfs_trans_handle *trans,
573                            struct btrfs_root *quota_root, u64 qgroupid)
574 {
575         int ret;
576         struct btrfs_path *path;
577         struct btrfs_key key;
578
579         path = btrfs_alloc_path();
580         if (!path)
581                 return -ENOMEM;
582
583         key.objectid = 0;
584         key.type = BTRFS_QGROUP_INFO_KEY;
585         key.offset = qgroupid;
586         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
587         if (ret < 0)
588                 goto out;
589
590         if (ret > 0) {
591                 ret = -ENOENT;
592                 goto out;
593         }
594
595         ret = btrfs_del_item(trans, quota_root, path);
596         if (ret)
597                 goto out;
598
599         btrfs_release_path(path);
600
601         key.type = BTRFS_QGROUP_LIMIT_KEY;
602         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
603         if (ret < 0)
604                 goto out;
605
606         if (ret > 0) {
607                 ret = -ENOENT;
608                 goto out;
609         }
610
611         ret = btrfs_del_item(trans, quota_root, path);
612
613 out:
614         btrfs_free_path(path);
615         return ret;
616 }
617
618 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
619                                     struct btrfs_root *root, u64 qgroupid,
620                                     u64 flags, u64 max_rfer, u64 max_excl,
621                                     u64 rsv_rfer, u64 rsv_excl)
622 {
623         struct btrfs_path *path;
624         struct btrfs_key key;
625         struct extent_buffer *l;
626         struct btrfs_qgroup_limit_item *qgroup_limit;
627         int ret;
628         int slot;
629
630         key.objectid = 0;
631         key.type = BTRFS_QGROUP_LIMIT_KEY;
632         key.offset = qgroupid;
633
634         path = btrfs_alloc_path();
635         if (!path)
636                 return -ENOMEM;
637
638         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
639         if (ret > 0)
640                 ret = -ENOENT;
641
642         if (ret)
643                 goto out;
644
645         l = path->nodes[0];
646         slot = path->slots[0];
647         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
648         btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
649         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
650         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
651         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
652         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
653
654         btrfs_mark_buffer_dirty(l);
655
656 out:
657         btrfs_free_path(path);
658         return ret;
659 }
660
661 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
662                                    struct btrfs_root *root,
663                                    struct btrfs_qgroup *qgroup)
664 {
665         struct btrfs_path *path;
666         struct btrfs_key key;
667         struct extent_buffer *l;
668         struct btrfs_qgroup_info_item *qgroup_info;
669         int ret;
670         int slot;
671
672         key.objectid = 0;
673         key.type = BTRFS_QGROUP_INFO_KEY;
674         key.offset = qgroup->qgroupid;
675
676         path = btrfs_alloc_path();
677         if (!path)
678                 return -ENOMEM;
679
680         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
681         if (ret > 0)
682                 ret = -ENOENT;
683
684         if (ret)
685                 goto out;
686
687         l = path->nodes[0];
688         slot = path->slots[0];
689         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
690         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
691         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
692         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
693         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
694         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
695
696         btrfs_mark_buffer_dirty(l);
697
698 out:
699         btrfs_free_path(path);
700         return ret;
701 }
702
703 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
704                                      struct btrfs_fs_info *fs_info,
705                                     struct btrfs_root *root)
706 {
707         struct btrfs_path *path;
708         struct btrfs_key key;
709         struct extent_buffer *l;
710         struct btrfs_qgroup_status_item *ptr;
711         int ret;
712         int slot;
713
714         key.objectid = 0;
715         key.type = BTRFS_QGROUP_STATUS_KEY;
716         key.offset = 0;
717
718         path = btrfs_alloc_path();
719         if (!path)
720                 return -ENOMEM;
721
722         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
723         if (ret > 0)
724                 ret = -ENOENT;
725
726         if (ret)
727                 goto out;
728
729         l = path->nodes[0];
730         slot = path->slots[0];
731         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
732         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
733         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
734         btrfs_set_qgroup_status_rescan(l, ptr,
735                                 fs_info->qgroup_rescan_progress.objectid);
736
737         btrfs_mark_buffer_dirty(l);
738
739 out:
740         btrfs_free_path(path);
741         return ret;
742 }
743
744 /*
745  * called with qgroup_lock held
746  */
747 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
748                                   struct btrfs_root *root)
749 {
750         struct btrfs_path *path;
751         struct btrfs_key key;
752         struct extent_buffer *leaf = NULL;
753         int ret;
754         int nr = 0;
755
756         path = btrfs_alloc_path();
757         if (!path)
758                 return -ENOMEM;
759
760         path->leave_spinning = 1;
761
762         key.objectid = 0;
763         key.offset = 0;
764         key.type = 0;
765
766         while (1) {
767                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
768                 if (ret < 0)
769                         goto out;
770                 leaf = path->nodes[0];
771                 nr = btrfs_header_nritems(leaf);
772                 if (!nr)
773                         break;
774                 /*
775                  * delete the leaf one by one
776                  * since the whole tree is going
777                  * to be deleted.
778                  */
779                 path->slots[0] = 0;
780                 ret = btrfs_del_items(trans, root, path, 0, nr);
781                 if (ret)
782                         goto out;
783
784                 btrfs_release_path(path);
785         }
786         ret = 0;
787 out:
788         root->fs_info->pending_quota_state = 0;
789         btrfs_free_path(path);
790         return ret;
791 }
792
793 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
794                        struct btrfs_fs_info *fs_info)
795 {
796         struct btrfs_root *quota_root;
797         struct btrfs_root *tree_root = fs_info->tree_root;
798         struct btrfs_path *path = NULL;
799         struct btrfs_qgroup_status_item *ptr;
800         struct extent_buffer *leaf;
801         struct btrfs_key key;
802         struct btrfs_key found_key;
803         struct btrfs_qgroup *qgroup = NULL;
804         int ret = 0;
805         int slot;
806
807         mutex_lock(&fs_info->qgroup_ioctl_lock);
808         if (fs_info->quota_root) {
809                 fs_info->pending_quota_state = 1;
810                 goto out;
811         }
812
813         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
814         if (!fs_info->qgroup_ulist) {
815                 ret = -ENOMEM;
816                 goto out;
817         }
818
819         /*
820          * initially create the quota tree
821          */
822         quota_root = btrfs_create_tree(trans, fs_info,
823                                        BTRFS_QUOTA_TREE_OBJECTID);
824         if (IS_ERR(quota_root)) {
825                 ret =  PTR_ERR(quota_root);
826                 goto out;
827         }
828
829         path = btrfs_alloc_path();
830         if (!path) {
831                 ret = -ENOMEM;
832                 goto out_free_root;
833         }
834
835         key.objectid = 0;
836         key.type = BTRFS_QGROUP_STATUS_KEY;
837         key.offset = 0;
838
839         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
840                                       sizeof(*ptr));
841         if (ret)
842                 goto out_free_path;
843
844         leaf = path->nodes[0];
845         ptr = btrfs_item_ptr(leaf, path->slots[0],
846                                  struct btrfs_qgroup_status_item);
847         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
848         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
849         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
850                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
851         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
852         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
853
854         btrfs_mark_buffer_dirty(leaf);
855
856         key.objectid = 0;
857         key.type = BTRFS_ROOT_REF_KEY;
858         key.offset = 0;
859
860         btrfs_release_path(path);
861         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
862         if (ret > 0)
863                 goto out_add_root;
864         if (ret < 0)
865                 goto out_free_path;
866
867
868         while (1) {
869                 slot = path->slots[0];
870                 leaf = path->nodes[0];
871                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
872
873                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
874                         ret = add_qgroup_item(trans, quota_root,
875                                               found_key.offset);
876                         if (ret)
877                                 goto out_free_path;
878
879                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
880                         if (IS_ERR(qgroup)) {
881                                 ret = PTR_ERR(qgroup);
882                                 goto out_free_path;
883                         }
884                 }
885                 ret = btrfs_next_item(tree_root, path);
886                 if (ret < 0)
887                         goto out_free_path;
888                 if (ret)
889                         break;
890         }
891
892 out_add_root:
893         btrfs_release_path(path);
894         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
895         if (ret)
896                 goto out_free_path;
897
898         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
899         if (IS_ERR(qgroup)) {
900                 ret = PTR_ERR(qgroup);
901                 goto out_free_path;
902         }
903         spin_lock(&fs_info->qgroup_lock);
904         fs_info->quota_root = quota_root;
905         fs_info->pending_quota_state = 1;
906         spin_unlock(&fs_info->qgroup_lock);
907 out_free_path:
908         btrfs_free_path(path);
909 out_free_root:
910         if (ret) {
911                 free_extent_buffer(quota_root->node);
912                 free_extent_buffer(quota_root->commit_root);
913                 kfree(quota_root);
914         }
915 out:
916         if (ret) {
917                 ulist_free(fs_info->qgroup_ulist);
918                 fs_info->qgroup_ulist = NULL;
919         }
920         mutex_unlock(&fs_info->qgroup_ioctl_lock);
921         return ret;
922 }
923
924 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
925                         struct btrfs_fs_info *fs_info)
926 {
927         struct btrfs_root *tree_root = fs_info->tree_root;
928         struct btrfs_root *quota_root;
929         int ret = 0;
930
931         mutex_lock(&fs_info->qgroup_ioctl_lock);
932         if (!fs_info->quota_root)
933                 goto out;
934         spin_lock(&fs_info->qgroup_lock);
935         fs_info->quota_enabled = 0;
936         fs_info->pending_quota_state = 0;
937         quota_root = fs_info->quota_root;
938         fs_info->quota_root = NULL;
939         spin_unlock(&fs_info->qgroup_lock);
940
941         btrfs_free_qgroup_config(fs_info);
942
943         ret = btrfs_clean_quota_tree(trans, quota_root);
944         if (ret)
945                 goto out;
946
947         ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
948         if (ret)
949                 goto out;
950
951         list_del(&quota_root->dirty_list);
952
953         btrfs_tree_lock(quota_root->node);
954         clean_tree_block(trans, tree_root, quota_root->node);
955         btrfs_tree_unlock(quota_root->node);
956         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
957
958         free_extent_buffer(quota_root->node);
959         free_extent_buffer(quota_root->commit_root);
960         kfree(quota_root);
961 out:
962         mutex_unlock(&fs_info->qgroup_ioctl_lock);
963         return ret;
964 }
965
966 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
967                          struct btrfs_qgroup *qgroup)
968 {
969         if (list_empty(&qgroup->dirty))
970                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
971 }
972
973 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
974                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
975 {
976         struct btrfs_root *quota_root;
977         struct btrfs_qgroup *parent;
978         struct btrfs_qgroup *member;
979         struct btrfs_qgroup_list *list;
980         int ret = 0;
981
982         mutex_lock(&fs_info->qgroup_ioctl_lock);
983         quota_root = fs_info->quota_root;
984         if (!quota_root) {
985                 ret = -EINVAL;
986                 goto out;
987         }
988         member = find_qgroup_rb(fs_info, src);
989         parent = find_qgroup_rb(fs_info, dst);
990         if (!member || !parent) {
991                 ret = -EINVAL;
992                 goto out;
993         }
994
995         /* check if such qgroup relation exist firstly */
996         list_for_each_entry(list, &member->groups, next_group) {
997                 if (list->group == parent) {
998                         ret = -EEXIST;
999                         goto out;
1000                 }
1001         }
1002
1003         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1004         if (ret)
1005                 goto out;
1006
1007         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1008         if (ret) {
1009                 del_qgroup_relation_item(trans, quota_root, src, dst);
1010                 goto out;
1011         }
1012
1013         spin_lock(&fs_info->qgroup_lock);
1014         ret = add_relation_rb(quota_root->fs_info, src, dst);
1015         spin_unlock(&fs_info->qgroup_lock);
1016 out:
1017         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1018         return ret;
1019 }
1020
1021 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1022                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1023 {
1024         struct btrfs_root *quota_root;
1025         struct btrfs_qgroup *parent;
1026         struct btrfs_qgroup *member;
1027         struct btrfs_qgroup_list *list;
1028         int ret = 0;
1029         int err;
1030
1031         mutex_lock(&fs_info->qgroup_ioctl_lock);
1032         quota_root = fs_info->quota_root;
1033         if (!quota_root) {
1034                 ret = -EINVAL;
1035                 goto out;
1036         }
1037
1038         member = find_qgroup_rb(fs_info, src);
1039         parent = find_qgroup_rb(fs_info, dst);
1040         if (!member || !parent) {
1041                 ret = -EINVAL;
1042                 goto out;
1043         }
1044
1045         /* check if such qgroup relation exist firstly */
1046         list_for_each_entry(list, &member->groups, next_group) {
1047                 if (list->group == parent)
1048                         goto exist;
1049         }
1050         ret = -ENOENT;
1051         goto out;
1052 exist:
1053         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1054         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1055         if (err && !ret)
1056                 ret = err;
1057
1058         spin_lock(&fs_info->qgroup_lock);
1059         del_relation_rb(fs_info, src, dst);
1060         spin_unlock(&fs_info->qgroup_lock);
1061 out:
1062         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1063         return ret;
1064 }
1065
1066 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1067                         struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1068 {
1069         struct btrfs_root *quota_root;
1070         struct btrfs_qgroup *qgroup;
1071         int ret = 0;
1072
1073         mutex_lock(&fs_info->qgroup_ioctl_lock);
1074         quota_root = fs_info->quota_root;
1075         if (!quota_root) {
1076                 ret = -EINVAL;
1077                 goto out;
1078         }
1079         qgroup = find_qgroup_rb(fs_info, qgroupid);
1080         if (qgroup) {
1081                 ret = -EEXIST;
1082                 goto out;
1083         }
1084
1085         ret = add_qgroup_item(trans, quota_root, qgroupid);
1086         if (ret)
1087                 goto out;
1088
1089         spin_lock(&fs_info->qgroup_lock);
1090         qgroup = add_qgroup_rb(fs_info, qgroupid);
1091         spin_unlock(&fs_info->qgroup_lock);
1092
1093         if (IS_ERR(qgroup))
1094                 ret = PTR_ERR(qgroup);
1095 out:
1096         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1097         return ret;
1098 }
1099
1100 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1101                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1102 {
1103         struct btrfs_root *quota_root;
1104         struct btrfs_qgroup *qgroup;
1105         int ret = 0;
1106
1107         mutex_lock(&fs_info->qgroup_ioctl_lock);
1108         quota_root = fs_info->quota_root;
1109         if (!quota_root) {
1110                 ret = -EINVAL;
1111                 goto out;
1112         }
1113
1114         qgroup = find_qgroup_rb(fs_info, qgroupid);
1115         if (!qgroup) {
1116                 ret = -ENOENT;
1117                 goto out;
1118         } else {
1119                 /* check if there are no relations to this qgroup */
1120                 if (!list_empty(&qgroup->groups) ||
1121                     !list_empty(&qgroup->members)) {
1122                         ret = -EBUSY;
1123                         goto out;
1124                 }
1125         }
1126         ret = del_qgroup_item(trans, quota_root, qgroupid);
1127
1128         spin_lock(&fs_info->qgroup_lock);
1129         del_qgroup_rb(quota_root->fs_info, qgroupid);
1130         spin_unlock(&fs_info->qgroup_lock);
1131 out:
1132         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1133         return ret;
1134 }
1135
1136 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1137                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1138                        struct btrfs_qgroup_limit *limit)
1139 {
1140         struct btrfs_root *quota_root;
1141         struct btrfs_qgroup *qgroup;
1142         int ret = 0;
1143
1144         mutex_lock(&fs_info->qgroup_ioctl_lock);
1145         quota_root = fs_info->quota_root;
1146         if (!quota_root) {
1147                 ret = -EINVAL;
1148                 goto out;
1149         }
1150
1151         qgroup = find_qgroup_rb(fs_info, qgroupid);
1152         if (!qgroup) {
1153                 ret = -ENOENT;
1154                 goto out;
1155         }
1156         ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1157                                        limit->flags, limit->max_rfer,
1158                                        limit->max_excl, limit->rsv_rfer,
1159                                        limit->rsv_excl);
1160         if (ret) {
1161                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1162                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1163                        qgroupid);
1164         }
1165
1166         spin_lock(&fs_info->qgroup_lock);
1167         qgroup->lim_flags = limit->flags;
1168         qgroup->max_rfer = limit->max_rfer;
1169         qgroup->max_excl = limit->max_excl;
1170         qgroup->rsv_rfer = limit->rsv_rfer;
1171         qgroup->rsv_excl = limit->rsv_excl;
1172         spin_unlock(&fs_info->qgroup_lock);
1173 out:
1174         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1175         return ret;
1176 }
1177
1178 /*
1179  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1180  * the modification into a list that's later used by btrfs_end_transaction to
1181  * pass the recorded modifications on to btrfs_qgroup_account_ref.
1182  */
1183 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1184                             struct btrfs_delayed_ref_node *node,
1185                             struct btrfs_delayed_extent_op *extent_op)
1186 {
1187         struct qgroup_update *u;
1188
1189         BUG_ON(!trans->delayed_ref_elem.seq);
1190         u = kmalloc(sizeof(*u), GFP_NOFS);
1191         if (!u)
1192                 return -ENOMEM;
1193
1194         u->node = node;
1195         u->extent_op = extent_op;
1196         list_add_tail(&u->list, &trans->qgroup_ref_list);
1197
1198         return 0;
1199 }
1200
1201 static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1202                                     struct ulist *roots, struct ulist *tmp,
1203                                     u64 seq)
1204 {
1205         struct ulist_node *unode;
1206         struct ulist_iterator uiter;
1207         struct ulist_node *tmp_unode;
1208         struct ulist_iterator tmp_uiter;
1209         struct btrfs_qgroup *qg;
1210         int ret;
1211
1212         ULIST_ITER_INIT(&uiter);
1213         while ((unode = ulist_next(roots, &uiter))) {
1214                 qg = find_qgroup_rb(fs_info, unode->val);
1215                 if (!qg)
1216                         continue;
1217
1218                 ulist_reinit(tmp);
1219                                                 /* XXX id not needed */
1220                 ret = ulist_add(tmp, qg->qgroupid,
1221                                 (u64)(uintptr_t)qg, GFP_ATOMIC);
1222                 if (ret < 0)
1223                         return ret;
1224                 ULIST_ITER_INIT(&tmp_uiter);
1225                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1226                         struct btrfs_qgroup_list *glist;
1227
1228                         qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1229                         if (qg->refcnt < seq)
1230                                 qg->refcnt = seq + 1;
1231                         else
1232                                 ++qg->refcnt;
1233
1234                         list_for_each_entry(glist, &qg->groups, next_group) {
1235                                 ret = ulist_add(tmp, glist->group->qgroupid,
1236                                                 (u64)(uintptr_t)glist->group,
1237                                                 GFP_ATOMIC);
1238                                 if (ret < 0)
1239                                         return ret;
1240                         }
1241                 }
1242         }
1243
1244         return 0;
1245 }
1246
1247 static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1248                                     struct ulist *roots, struct ulist *tmp,
1249                                     u64 seq, int sgn, u64 num_bytes,
1250                                     struct btrfs_qgroup *qgroup)
1251 {
1252         struct ulist_node *unode;
1253         struct ulist_iterator uiter;
1254         struct btrfs_qgroup *qg;
1255         struct btrfs_qgroup_list *glist;
1256         int ret;
1257
1258         ulist_reinit(tmp);
1259         ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1260         if (ret < 0)
1261                 return ret;
1262
1263         ULIST_ITER_INIT(&uiter);
1264         while ((unode = ulist_next(tmp, &uiter))) {
1265                 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1266                 if (qg->refcnt < seq) {
1267                         /* not visited by step 1 */
1268                         qg->rfer += sgn * num_bytes;
1269                         qg->rfer_cmpr += sgn * num_bytes;
1270                         if (roots->nnodes == 0) {
1271                                 qg->excl += sgn * num_bytes;
1272                                 qg->excl_cmpr += sgn * num_bytes;
1273                         }
1274                         qgroup_dirty(fs_info, qg);
1275                 }
1276                 WARN_ON(qg->tag >= seq);
1277                 qg->tag = seq;
1278
1279                 list_for_each_entry(glist, &qg->groups, next_group) {
1280                         ret = ulist_add(tmp, glist->group->qgroupid,
1281                                         (uintptr_t)glist->group, GFP_ATOMIC);
1282                         if (ret < 0)
1283                                 return ret;
1284                 }
1285         }
1286
1287         return 0;
1288 }
1289
1290 static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1291                                     struct ulist *roots, struct ulist *tmp,
1292                                     u64 seq, int sgn, u64 num_bytes)
1293 {
1294         struct ulist_node *unode;
1295         struct ulist_iterator uiter;
1296         struct btrfs_qgroup *qg;
1297         struct ulist_node *tmp_unode;
1298         struct ulist_iterator tmp_uiter;
1299         int ret;
1300
1301         ULIST_ITER_INIT(&uiter);
1302         while ((unode = ulist_next(roots, &uiter))) {
1303                 qg = find_qgroup_rb(fs_info, unode->val);
1304                 if (!qg)
1305                         continue;
1306
1307                 ulist_reinit(tmp);
1308                 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1309                 if (ret < 0)
1310                         return ret;
1311
1312                 ULIST_ITER_INIT(&tmp_uiter);
1313                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1314                         struct btrfs_qgroup_list *glist;
1315
1316                         qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1317                         if (qg->tag == seq)
1318                                 continue;
1319
1320                         if (qg->refcnt - seq == roots->nnodes) {
1321                                 qg->excl -= sgn * num_bytes;
1322                                 qg->excl_cmpr -= sgn * num_bytes;
1323                                 qgroup_dirty(fs_info, qg);
1324                         }
1325
1326                         list_for_each_entry(glist, &qg->groups, next_group) {
1327                                 ret = ulist_add(tmp, glist->group->qgroupid,
1328                                                 (uintptr_t)glist->group,
1329                                                 GFP_ATOMIC);
1330                                 if (ret < 0)
1331                                         return ret;
1332                         }
1333                 }
1334         }
1335
1336         return 0;
1337 }
1338
1339 /*
1340  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1341  * from the fs. First, all roots referencing the extent are searched, and
1342  * then the space is accounted accordingly to the different roots. The
1343  * accounting algorithm works in 3 steps documented inline.
1344  */
1345 int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1346                              struct btrfs_fs_info *fs_info,
1347                              struct btrfs_delayed_ref_node *node,
1348                              struct btrfs_delayed_extent_op *extent_op)
1349 {
1350         struct btrfs_root *quota_root;
1351         u64 ref_root;
1352         struct btrfs_qgroup *qgroup;
1353         struct ulist *roots = NULL;
1354         u64 seq;
1355         int ret = 0;
1356         int sgn;
1357
1358         if (!fs_info->quota_enabled)
1359                 return 0;
1360
1361         BUG_ON(!fs_info->quota_root);
1362
1363         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1364             node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1365                 struct btrfs_delayed_tree_ref *ref;
1366                 ref = btrfs_delayed_node_to_tree_ref(node);
1367                 ref_root = ref->root;
1368         } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1369                    node->type == BTRFS_SHARED_DATA_REF_KEY) {
1370                 struct btrfs_delayed_data_ref *ref;
1371                 ref = btrfs_delayed_node_to_data_ref(node);
1372                 ref_root = ref->root;
1373         } else {
1374                 BUG();
1375         }
1376
1377         if (!is_fstree(ref_root)) {
1378                 /*
1379                  * non-fs-trees are not being accounted
1380                  */
1381                 return 0;
1382         }
1383
1384         switch (node->action) {
1385         case BTRFS_ADD_DELAYED_REF:
1386         case BTRFS_ADD_DELAYED_EXTENT:
1387                 sgn = 1;
1388                 seq = btrfs_tree_mod_seq_prev(node->seq);
1389                 break;
1390         case BTRFS_DROP_DELAYED_REF:
1391                 sgn = -1;
1392                 seq = node->seq;
1393                 break;
1394         case BTRFS_UPDATE_DELAYED_HEAD:
1395                 return 0;
1396         default:
1397                 BUG();
1398         }
1399
1400         mutex_lock(&fs_info->qgroup_rescan_lock);
1401         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1402                 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1403                         mutex_unlock(&fs_info->qgroup_rescan_lock);
1404                         return 0;
1405                 }
1406         }
1407         mutex_unlock(&fs_info->qgroup_rescan_lock);
1408
1409         /*
1410          * the delayed ref sequence number we pass depends on the direction of
1411          * the operation. for add operations, we pass
1412          * tree_mod_log_prev_seq(node->seq) to skip
1413          * the delayed ref's current sequence number, because we need the state
1414          * of the tree before the add operation. for delete operations, we pass
1415          * (node->seq) to include the delayed ref's current sequence number,
1416          * because we need the state of the tree after the delete operation.
1417          */
1418         ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1419         if (ret < 0)
1420                 return ret;
1421
1422         spin_lock(&fs_info->qgroup_lock);
1423
1424         quota_root = fs_info->quota_root;
1425         if (!quota_root)
1426                 goto unlock;
1427
1428         qgroup = find_qgroup_rb(fs_info, ref_root);
1429         if (!qgroup)
1430                 goto unlock;
1431
1432         /*
1433          * step 1: for each old ref, visit all nodes once and inc refcnt
1434          */
1435         ulist_reinit(fs_info->qgroup_ulist);
1436         seq = fs_info->qgroup_seq;
1437         fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1438
1439         ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1440                                        seq);
1441         if (ret)
1442                 goto unlock;
1443
1444         /*
1445          * step 2: walk from the new root
1446          */
1447         ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1448                                        seq, sgn, node->num_bytes, qgroup);
1449         if (ret)
1450                 goto unlock;
1451
1452         /*
1453          * step 3: walk again from old refs
1454          */
1455         ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1456                                        seq, sgn, node->num_bytes);
1457         if (ret)
1458                 goto unlock;
1459
1460 unlock:
1461         spin_unlock(&fs_info->qgroup_lock);
1462         ulist_free(roots);
1463
1464         return ret;
1465 }
1466
1467 /*
1468  * called from commit_transaction. Writes all changed qgroups to disk.
1469  */
1470 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1471                       struct btrfs_fs_info *fs_info)
1472 {
1473         struct btrfs_root *quota_root = fs_info->quota_root;
1474         int ret = 0;
1475         int start_rescan_worker = 0;
1476
1477         if (!quota_root)
1478                 goto out;
1479
1480         if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1481                 start_rescan_worker = 1;
1482
1483         fs_info->quota_enabled = fs_info->pending_quota_state;
1484
1485         spin_lock(&fs_info->qgroup_lock);
1486         while (!list_empty(&fs_info->dirty_qgroups)) {
1487                 struct btrfs_qgroup *qgroup;
1488                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
1489                                           struct btrfs_qgroup, dirty);
1490                 list_del_init(&qgroup->dirty);
1491                 spin_unlock(&fs_info->qgroup_lock);
1492                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
1493                 if (ret)
1494                         fs_info->qgroup_flags |=
1495                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1496                 spin_lock(&fs_info->qgroup_lock);
1497         }
1498         if (fs_info->quota_enabled)
1499                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1500         else
1501                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1502         spin_unlock(&fs_info->qgroup_lock);
1503
1504         ret = update_qgroup_status_item(trans, fs_info, quota_root);
1505         if (ret)
1506                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1507
1508         if (!ret && start_rescan_worker) {
1509                 ret = qgroup_rescan_init(fs_info, 0, 1);
1510                 if (!ret) {
1511                         qgroup_rescan_zero_tracking(fs_info);
1512                         btrfs_queue_work(fs_info->qgroup_rescan_workers,
1513                                          &fs_info->qgroup_rescan_work);
1514                 }
1515                 ret = 0;
1516         }
1517
1518 out:
1519
1520         return ret;
1521 }
1522
1523 /*
1524  * copy the acounting information between qgroups. This is necessary when a
1525  * snapshot or a subvolume is created
1526  */
1527 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1528                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1529                          struct btrfs_qgroup_inherit *inherit)
1530 {
1531         int ret = 0;
1532         int i;
1533         u64 *i_qgroups;
1534         struct btrfs_root *quota_root = fs_info->quota_root;
1535         struct btrfs_qgroup *srcgroup;
1536         struct btrfs_qgroup *dstgroup;
1537         u32 level_size = 0;
1538         u64 nums;
1539
1540         mutex_lock(&fs_info->qgroup_ioctl_lock);
1541         if (!fs_info->quota_enabled)
1542                 goto out;
1543
1544         if (!quota_root) {
1545                 ret = -EINVAL;
1546                 goto out;
1547         }
1548
1549         if (inherit) {
1550                 i_qgroups = (u64 *)(inherit + 1);
1551                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1552                        2 * inherit->num_excl_copies;
1553                 for (i = 0; i < nums; ++i) {
1554                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1555                         if (!srcgroup) {
1556                                 ret = -EINVAL;
1557                                 goto out;
1558                         }
1559                         ++i_qgroups;
1560                 }
1561         }
1562
1563         /*
1564          * create a tracking group for the subvol itself
1565          */
1566         ret = add_qgroup_item(trans, quota_root, objectid);
1567         if (ret)
1568                 goto out;
1569
1570         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1571                 ret = update_qgroup_limit_item(trans, quota_root, objectid,
1572                                                inherit->lim.flags,
1573                                                inherit->lim.max_rfer,
1574                                                inherit->lim.max_excl,
1575                                                inherit->lim.rsv_rfer,
1576                                                inherit->lim.rsv_excl);
1577                 if (ret)
1578                         goto out;
1579         }
1580
1581         if (srcid) {
1582                 struct btrfs_root *srcroot;
1583                 struct btrfs_key srckey;
1584                 int srcroot_level;
1585
1586                 srckey.objectid = srcid;
1587                 srckey.type = BTRFS_ROOT_ITEM_KEY;
1588                 srckey.offset = (u64)-1;
1589                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1590                 if (IS_ERR(srcroot)) {
1591                         ret = PTR_ERR(srcroot);
1592                         goto out;
1593                 }
1594
1595                 rcu_read_lock();
1596                 srcroot_level = btrfs_header_level(srcroot->node);
1597                 level_size = btrfs_level_size(srcroot, srcroot_level);
1598                 rcu_read_unlock();
1599         }
1600
1601         /*
1602          * add qgroup to all inherited groups
1603          */
1604         if (inherit) {
1605                 i_qgroups = (u64 *)(inherit + 1);
1606                 for (i = 0; i < inherit->num_qgroups; ++i) {
1607                         ret = add_qgroup_relation_item(trans, quota_root,
1608                                                        objectid, *i_qgroups);
1609                         if (ret)
1610                                 goto out;
1611                         ret = add_qgroup_relation_item(trans, quota_root,
1612                                                        *i_qgroups, objectid);
1613                         if (ret)
1614                                 goto out;
1615                         ++i_qgroups;
1616                 }
1617         }
1618
1619
1620         spin_lock(&fs_info->qgroup_lock);
1621
1622         dstgroup = add_qgroup_rb(fs_info, objectid);
1623         if (IS_ERR(dstgroup)) {
1624                 ret = PTR_ERR(dstgroup);
1625                 goto unlock;
1626         }
1627
1628         if (srcid) {
1629                 srcgroup = find_qgroup_rb(fs_info, srcid);
1630                 if (!srcgroup)
1631                         goto unlock;
1632                 dstgroup->rfer = srcgroup->rfer - level_size;
1633                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1634                 srcgroup->excl = level_size;
1635                 srcgroup->excl_cmpr = level_size;
1636                 qgroup_dirty(fs_info, dstgroup);
1637                 qgroup_dirty(fs_info, srcgroup);
1638         }
1639
1640         if (!inherit)
1641                 goto unlock;
1642
1643         i_qgroups = (u64 *)(inherit + 1);
1644         for (i = 0; i < inherit->num_qgroups; ++i) {
1645                 ret = add_relation_rb(quota_root->fs_info, objectid,
1646                                       *i_qgroups);
1647                 if (ret)
1648                         goto unlock;
1649                 ++i_qgroups;
1650         }
1651
1652         for (i = 0; i <  inherit->num_ref_copies; ++i) {
1653                 struct btrfs_qgroup *src;
1654                 struct btrfs_qgroup *dst;
1655
1656                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1657                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1658
1659                 if (!src || !dst) {
1660                         ret = -EINVAL;
1661                         goto unlock;
1662                 }
1663
1664                 dst->rfer = src->rfer - level_size;
1665                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
1666                 i_qgroups += 2;
1667         }
1668         for (i = 0; i <  inherit->num_excl_copies; ++i) {
1669                 struct btrfs_qgroup *src;
1670                 struct btrfs_qgroup *dst;
1671
1672                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1673                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1674
1675                 if (!src || !dst) {
1676                         ret = -EINVAL;
1677                         goto unlock;
1678                 }
1679
1680                 dst->excl = src->excl + level_size;
1681                 dst->excl_cmpr = src->excl_cmpr + level_size;
1682                 i_qgroups += 2;
1683         }
1684
1685 unlock:
1686         spin_unlock(&fs_info->qgroup_lock);
1687 out:
1688         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1689         return ret;
1690 }
1691
1692 /*
1693  * reserve some space for a qgroup and all its parents. The reservation takes
1694  * place with start_transaction or dealloc_reserve, similar to ENOSPC
1695  * accounting. If not enough space is available, EDQUOT is returned.
1696  * We assume that the requested space is new for all qgroups.
1697  */
1698 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1699 {
1700         struct btrfs_root *quota_root;
1701         struct btrfs_qgroup *qgroup;
1702         struct btrfs_fs_info *fs_info = root->fs_info;
1703         u64 ref_root = root->root_key.objectid;
1704         int ret = 0;
1705         struct ulist_node *unode;
1706         struct ulist_iterator uiter;
1707
1708         if (!is_fstree(ref_root))
1709                 return 0;
1710
1711         if (num_bytes == 0)
1712                 return 0;
1713
1714         spin_lock(&fs_info->qgroup_lock);
1715         quota_root = fs_info->quota_root;
1716         if (!quota_root)
1717                 goto out;
1718
1719         qgroup = find_qgroup_rb(fs_info, ref_root);
1720         if (!qgroup)
1721                 goto out;
1722
1723         /*
1724          * in a first step, we check all affected qgroups if any limits would
1725          * be exceeded
1726          */
1727         ulist_reinit(fs_info->qgroup_ulist);
1728         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1729                         (uintptr_t)qgroup, GFP_ATOMIC);
1730         if (ret < 0)
1731                 goto out;
1732         ULIST_ITER_INIT(&uiter);
1733         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1734                 struct btrfs_qgroup *qg;
1735                 struct btrfs_qgroup_list *glist;
1736
1737                 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1738
1739                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1740                     qg->reserved + (s64)qg->rfer + num_bytes >
1741                     qg->max_rfer) {
1742                         ret = -EDQUOT;
1743                         goto out;
1744                 }
1745
1746                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1747                     qg->reserved + (s64)qg->excl + num_bytes >
1748                     qg->max_excl) {
1749                         ret = -EDQUOT;
1750                         goto out;
1751                 }
1752
1753                 list_for_each_entry(glist, &qg->groups, next_group) {
1754                         ret = ulist_add(fs_info->qgroup_ulist,
1755                                         glist->group->qgroupid,
1756                                         (uintptr_t)glist->group, GFP_ATOMIC);
1757                         if (ret < 0)
1758                                 goto out;
1759                 }
1760         }
1761         ret = 0;
1762         /*
1763          * no limits exceeded, now record the reservation into all qgroups
1764          */
1765         ULIST_ITER_INIT(&uiter);
1766         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1767                 struct btrfs_qgroup *qg;
1768
1769                 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1770
1771                 qg->reserved += num_bytes;
1772         }
1773
1774 out:
1775         spin_unlock(&fs_info->qgroup_lock);
1776         return ret;
1777 }
1778
1779 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1780 {
1781         struct btrfs_root *quota_root;
1782         struct btrfs_qgroup *qgroup;
1783         struct btrfs_fs_info *fs_info = root->fs_info;
1784         struct ulist_node *unode;
1785         struct ulist_iterator uiter;
1786         u64 ref_root = root->root_key.objectid;
1787         int ret = 0;
1788
1789         if (!is_fstree(ref_root))
1790                 return;
1791
1792         if (num_bytes == 0)
1793                 return;
1794
1795         spin_lock(&fs_info->qgroup_lock);
1796
1797         quota_root = fs_info->quota_root;
1798         if (!quota_root)
1799                 goto out;
1800
1801         qgroup = find_qgroup_rb(fs_info, ref_root);
1802         if (!qgroup)
1803                 goto out;
1804
1805         ulist_reinit(fs_info->qgroup_ulist);
1806         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1807                         (uintptr_t)qgroup, GFP_ATOMIC);
1808         if (ret < 0)
1809                 goto out;
1810         ULIST_ITER_INIT(&uiter);
1811         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1812                 struct btrfs_qgroup *qg;
1813                 struct btrfs_qgroup_list *glist;
1814
1815                 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1816
1817                 qg->reserved -= num_bytes;
1818
1819                 list_for_each_entry(glist, &qg->groups, next_group) {
1820                         ret = ulist_add(fs_info->qgroup_ulist,
1821                                         glist->group->qgroupid,
1822                                         (uintptr_t)glist->group, GFP_ATOMIC);
1823                         if (ret < 0)
1824                                 goto out;
1825                 }
1826         }
1827
1828 out:
1829         spin_unlock(&fs_info->qgroup_lock);
1830 }
1831
1832 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1833 {
1834         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1835                 return;
1836         btrfs_err(trans->root->fs_info,
1837                 "qgroups not uptodate in trans handle %p:  list is%s empty, "
1838                 "seq is %#x.%x",
1839                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1840                 (u32)(trans->delayed_ref_elem.seq >> 32),
1841                 (u32)trans->delayed_ref_elem.seq);
1842         BUG();
1843 }
1844
1845 /*
1846  * returns < 0 on error, 0 when more leafs are to be scanned.
1847  * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1848  */
1849 static int
1850 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1851                    struct btrfs_trans_handle *trans, struct ulist *tmp,
1852                    struct extent_buffer *scratch_leaf)
1853 {
1854         struct btrfs_key found;
1855         struct ulist *roots = NULL;
1856         struct ulist_node *unode;
1857         struct ulist_iterator uiter;
1858         struct seq_list tree_mod_seq_elem = {};
1859         u64 seq;
1860         int slot;
1861         int ret;
1862
1863         path->leave_spinning = 1;
1864         mutex_lock(&fs_info->qgroup_rescan_lock);
1865         ret = btrfs_search_slot_for_read(fs_info->extent_root,
1866                                          &fs_info->qgroup_rescan_progress,
1867                                          path, 1, 0);
1868
1869         pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1870                  fs_info->qgroup_rescan_progress.objectid,
1871                  fs_info->qgroup_rescan_progress.type,
1872                  fs_info->qgroup_rescan_progress.offset, ret);
1873
1874         if (ret) {
1875                 /*
1876                  * The rescan is about to end, we will not be scanning any
1877                  * further blocks. We cannot unset the RESCAN flag here, because
1878                  * we want to commit the transaction if everything went well.
1879                  * To make the live accounting work in this phase, we set our
1880                  * scan progress pointer such that every real extent objectid
1881                  * will be smaller.
1882                  */
1883                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1884                 btrfs_release_path(path);
1885                 mutex_unlock(&fs_info->qgroup_rescan_lock);
1886                 return ret;
1887         }
1888
1889         btrfs_item_key_to_cpu(path->nodes[0], &found,
1890                               btrfs_header_nritems(path->nodes[0]) - 1);
1891         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1892
1893         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1894         memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1895         slot = path->slots[0];
1896         btrfs_release_path(path);
1897         mutex_unlock(&fs_info->qgroup_rescan_lock);
1898
1899         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900                 u64 num_bytes;
1901
1902                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1903                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904                     found.type != BTRFS_METADATA_ITEM_KEY)
1905                         continue;
1906                 if (found.type == BTRFS_METADATA_ITEM_KEY)
1907                         num_bytes = fs_info->extent_root->leafsize;
1908                 else
1909                         num_bytes = found.offset;
1910
1911                 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1912                                            tree_mod_seq_elem.seq, &roots);
1913                 if (ret < 0)
1914                         goto out;
1915                 spin_lock(&fs_info->qgroup_lock);
1916                 seq = fs_info->qgroup_seq;
1917                 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1918
1919                 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1920                 if (ret) {
1921                         spin_unlock(&fs_info->qgroup_lock);
1922                         ulist_free(roots);
1923                         goto out;
1924                 }
1925
1926                 /*
1927                  * step2 of btrfs_qgroup_account_ref works from a single root,
1928                  * we're doing all at once here.
1929                  */
1930                 ulist_reinit(tmp);
1931                 ULIST_ITER_INIT(&uiter);
1932                 while ((unode = ulist_next(roots, &uiter))) {
1933                         struct btrfs_qgroup *qg;
1934
1935                         qg = find_qgroup_rb(fs_info, unode->val);
1936                         if (!qg)
1937                                 continue;
1938
1939                         ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1940                                         GFP_ATOMIC);
1941                         if (ret < 0) {
1942                                 spin_unlock(&fs_info->qgroup_lock);
1943                                 ulist_free(roots);
1944                                 goto out;
1945                         }
1946                 }
1947
1948                 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1949                 ULIST_ITER_INIT(&uiter);
1950                 while ((unode = ulist_next(tmp, &uiter))) {
1951                         struct btrfs_qgroup *qg;
1952                         struct btrfs_qgroup_list *glist;
1953
1954                         qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1955                         qg->rfer += num_bytes;
1956                         qg->rfer_cmpr += num_bytes;
1957                         WARN_ON(qg->tag >= seq);
1958                         if (qg->refcnt - seq == roots->nnodes) {
1959                                 qg->excl += num_bytes;
1960                                 qg->excl_cmpr += num_bytes;
1961                         }
1962                         qgroup_dirty(fs_info, qg);
1963
1964                         list_for_each_entry(glist, &qg->groups, next_group) {
1965                                 ret = ulist_add(tmp, glist->group->qgroupid,
1966                                                 (uintptr_t)glist->group,
1967                                                 GFP_ATOMIC);
1968                                 if (ret < 0) {
1969                                         spin_unlock(&fs_info->qgroup_lock);
1970                                         ulist_free(roots);
1971                                         goto out;
1972                                 }
1973                         }
1974                 }
1975
1976                 spin_unlock(&fs_info->qgroup_lock);
1977                 ulist_free(roots);
1978                 ret = 0;
1979         }
1980
1981 out:
1982         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1983
1984         return ret;
1985 }
1986
1987 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1988 {
1989         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
1990                                                      qgroup_rescan_work);
1991         struct btrfs_path *path;
1992         struct btrfs_trans_handle *trans = NULL;
1993         struct ulist *tmp = NULL;
1994         struct extent_buffer *scratch_leaf = NULL;
1995         int err = -ENOMEM;
1996
1997         path = btrfs_alloc_path();
1998         if (!path)
1999                 goto out;
2000         tmp = ulist_alloc(GFP_NOFS);
2001         if (!tmp)
2002                 goto out;
2003         scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2004         if (!scratch_leaf)
2005                 goto out;
2006
2007         err = 0;
2008         while (!err) {
2009                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2010                 if (IS_ERR(trans)) {
2011                         err = PTR_ERR(trans);
2012                         break;
2013                 }
2014                 if (!fs_info->quota_enabled) {
2015                         err = -EINTR;
2016                 } else {
2017                         err = qgroup_rescan_leaf(fs_info, path, trans,
2018                                                  tmp, scratch_leaf);
2019                 }
2020                 if (err > 0)
2021                         btrfs_commit_transaction(trans, fs_info->fs_root);
2022                 else
2023                         btrfs_end_transaction(trans, fs_info->fs_root);
2024         }
2025
2026 out:
2027         kfree(scratch_leaf);
2028         ulist_free(tmp);
2029         btrfs_free_path(path);
2030
2031         mutex_lock(&fs_info->qgroup_rescan_lock);
2032         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2033
2034         if (err == 2 &&
2035             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2036                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2037         } else if (err < 0) {
2038                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2039         }
2040         mutex_unlock(&fs_info->qgroup_rescan_lock);
2041
2042         if (err >= 0) {
2043                 btrfs_info(fs_info, "qgroup scan completed%s",
2044                         err == 2 ? " (inconsistency flag cleared)" : "");
2045         } else {
2046                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2047         }
2048
2049         complete_all(&fs_info->qgroup_rescan_completion);
2050 }
2051
2052 /*
2053  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2054  * memory required for the rescan context.
2055  */
2056 static int
2057 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2058                    int init_flags)
2059 {
2060         int ret = 0;
2061
2062         if (!init_flags &&
2063             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2064              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2065                 ret = -EINVAL;
2066                 goto err;
2067         }
2068
2069         mutex_lock(&fs_info->qgroup_rescan_lock);
2070         spin_lock(&fs_info->qgroup_lock);
2071
2072         if (init_flags) {
2073                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2074                         ret = -EINPROGRESS;
2075                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2076                         ret = -EINVAL;
2077
2078                 if (ret) {
2079                         spin_unlock(&fs_info->qgroup_lock);
2080                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2081                         goto err;
2082                 }
2083
2084                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2085         }
2086
2087         memset(&fs_info->qgroup_rescan_progress, 0,
2088                 sizeof(fs_info->qgroup_rescan_progress));
2089         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2090
2091         spin_unlock(&fs_info->qgroup_lock);
2092         mutex_unlock(&fs_info->qgroup_rescan_lock);
2093
2094         init_completion(&fs_info->qgroup_rescan_completion);
2095
2096         memset(&fs_info->qgroup_rescan_work, 0,
2097                sizeof(fs_info->qgroup_rescan_work));
2098         btrfs_init_work(&fs_info->qgroup_rescan_work,
2099                         btrfs_qgroup_rescan_worker, NULL, NULL);
2100
2101         if (ret) {
2102 err:
2103                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2104                 return ret;
2105         }
2106
2107         return 0;
2108 }
2109
2110 static void
2111 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2112 {
2113         struct rb_node *n;
2114         struct btrfs_qgroup *qgroup;
2115
2116         spin_lock(&fs_info->qgroup_lock);
2117         /* clear all current qgroup tracking information */
2118         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2119                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2120                 qgroup->rfer = 0;
2121                 qgroup->rfer_cmpr = 0;
2122                 qgroup->excl = 0;
2123                 qgroup->excl_cmpr = 0;
2124         }
2125         spin_unlock(&fs_info->qgroup_lock);
2126 }
2127
2128 int
2129 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2130 {
2131         int ret = 0;
2132         struct btrfs_trans_handle *trans;
2133
2134         ret = qgroup_rescan_init(fs_info, 0, 1);
2135         if (ret)
2136                 return ret;
2137
2138         /*
2139          * We have set the rescan_progress to 0, which means no more
2140          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2141          * However, btrfs_qgroup_account_ref may be right after its call
2142          * to btrfs_find_all_roots, in which case it would still do the
2143          * accounting.
2144          * To solve this, we're committing the transaction, which will
2145          * ensure we run all delayed refs and only after that, we are
2146          * going to clear all tracking information for a clean start.
2147          */
2148
2149         trans = btrfs_join_transaction(fs_info->fs_root);
2150         if (IS_ERR(trans)) {
2151                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2152                 return PTR_ERR(trans);
2153         }
2154         ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2155         if (ret) {
2156                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2157                 return ret;
2158         }
2159
2160         qgroup_rescan_zero_tracking(fs_info);
2161
2162         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2163                          &fs_info->qgroup_rescan_work);
2164
2165         return 0;
2166 }
2167
2168 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2169 {
2170         int running;
2171         int ret = 0;
2172
2173         mutex_lock(&fs_info->qgroup_rescan_lock);
2174         spin_lock(&fs_info->qgroup_lock);
2175         running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2176         spin_unlock(&fs_info->qgroup_lock);
2177         mutex_unlock(&fs_info->qgroup_rescan_lock);
2178
2179         if (running)
2180                 ret = wait_for_completion_interruptible(
2181                                         &fs_info->qgroup_rescan_completion);
2182
2183         return ret;
2184 }
2185
2186 /*
2187  * this is only called from open_ctree where we're still single threaded, thus
2188  * locking is omitted here.
2189  */
2190 void
2191 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2192 {
2193         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2194                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2195                                  &fs_info->qgroup_rescan_work);
2196 }