]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/xfs/quota/xfs_qm.c
Merge git://git.infradead.org/users/dwmw2/mtd-2.6.38
[mv-sheeva.git] / fs / xfs / quota / xfs_qm.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_alloc.h"
27 #include "xfs_quota.h"
28 #include "xfs_mount.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_ialloc_btree.h"
31 #include "xfs_dinode.h"
32 #include "xfs_inode.h"
33 #include "xfs_ialloc.h"
34 #include "xfs_itable.h"
35 #include "xfs_rtalloc.h"
36 #include "xfs_error.h"
37 #include "xfs_bmap.h"
38 #include "xfs_attr.h"
39 #include "xfs_buf_item.h"
40 #include "xfs_trans_space.h"
41 #include "xfs_utils.h"
42 #include "xfs_qm.h"
43 #include "xfs_trace.h"
44
45 /*
46  * The global quota manager. There is only one of these for the entire
47  * system, _not_ one per file system. XQM keeps track of the overall
48  * quota functionality, including maintaining the freelist and hash
49  * tables of dquots.
50  */
51 struct mutex    xfs_Gqm_lock;
52 struct xfs_qm   *xfs_Gqm;
53 uint            ndquot;
54
55 kmem_zone_t     *qm_dqzone;
56 kmem_zone_t     *qm_dqtrxzone;
57
58 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
59 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
60
61 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
62 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
63 STATIC int      xfs_qm_shake(struct shrinker *, int, gfp_t);
64
65 static struct shrinker xfs_qm_shaker = {
66         .shrink = xfs_qm_shake,
67         .seeks = DEFAULT_SEEKS,
68 };
69
70 #ifdef DEBUG
71 extern struct mutex     qcheck_lock;
72 #endif
73
74 #ifdef QUOTADEBUG
75 static void
76 xfs_qm_dquot_list_print(
77         struct xfs_mount *mp)
78 {
79         xfs_dquot_t     *dqp;
80         int             i = 0;
81
82         list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
83                 cmn_err(CE_DEBUG, "   %d. \"%d (%s)\"   "
84                                   "bcnt = %lld, icnt = %lld, refs = %d",
85                         i++, be32_to_cpu(dqp->q_core.d_id),
86                         DQFLAGTO_TYPESTR(dqp),
87                         (long long)be64_to_cpu(dqp->q_core.d_bcount),
88                         (long long)be64_to_cpu(dqp->q_core.d_icount),
89                         dqp->q_nrefs);
90         }
91 }
92 #else
93 static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
94 #endif
95
96 /*
97  * Initialize the XQM structure.
98  * Note that there is not one quota manager per file system.
99  */
100 STATIC struct xfs_qm *
101 xfs_Gqm_init(void)
102 {
103         xfs_dqhash_t    *udqhash, *gdqhash;
104         xfs_qm_t        *xqm;
105         size_t          hsize;
106         uint            i;
107
108         /*
109          * Initialize the dquot hash tables.
110          */
111         udqhash = kmem_zalloc_greedy(&hsize,
112                                      XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
113                                      XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
114         if (!udqhash)
115                 goto out;
116
117         gdqhash = kmem_zalloc_large(hsize);
118         if (!gdqhash)
119                 goto out_free_udqhash;
120
121         hsize /= sizeof(xfs_dqhash_t);
122         ndquot = hsize << 8;
123
124         xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
125         xqm->qm_dqhashmask = hsize - 1;
126         xqm->qm_usr_dqhtable = udqhash;
127         xqm->qm_grp_dqhtable = gdqhash;
128         ASSERT(xqm->qm_usr_dqhtable != NULL);
129         ASSERT(xqm->qm_grp_dqhtable != NULL);
130
131         for (i = 0; i < hsize; i++) {
132                 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
133                 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
134         }
135
136         /*
137          * Freelist of all dquots of all file systems
138          */
139         INIT_LIST_HEAD(&xqm->qm_dqfrlist);
140         xqm->qm_dqfrlist_cnt = 0;
141         mutex_init(&xqm->qm_dqfrlist_lock);
142
143         /*
144          * dquot zone. we register our own low-memory callback.
145          */
146         if (!qm_dqzone) {
147                 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
148                                                 "xfs_dquots");
149                 qm_dqzone = xqm->qm_dqzone;
150         } else
151                 xqm->qm_dqzone = qm_dqzone;
152
153         register_shrinker(&xfs_qm_shaker);
154
155         /*
156          * The t_dqinfo portion of transactions.
157          */
158         if (!qm_dqtrxzone) {
159                 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
160                                                    "xfs_dqtrx");
161                 qm_dqtrxzone = xqm->qm_dqtrxzone;
162         } else
163                 xqm->qm_dqtrxzone = qm_dqtrxzone;
164
165         atomic_set(&xqm->qm_totaldquots, 0);
166         xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
167         xqm->qm_nrefs = 0;
168 #ifdef DEBUG
169         mutex_init(&qcheck_lock);
170 #endif
171         return xqm;
172
173  out_free_udqhash:
174         kmem_free_large(udqhash);
175  out:
176         return NULL;
177 }
178
179 /*
180  * Destroy the global quota manager when its reference count goes to zero.
181  */
182 STATIC void
183 xfs_qm_destroy(
184         struct xfs_qm   *xqm)
185 {
186         struct xfs_dquot *dqp, *n;
187         int             hsize, i;
188
189         ASSERT(xqm != NULL);
190         ASSERT(xqm->qm_nrefs == 0);
191         unregister_shrinker(&xfs_qm_shaker);
192         hsize = xqm->qm_dqhashmask + 1;
193         for (i = 0; i < hsize; i++) {
194                 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
195                 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
196         }
197         kmem_free_large(xqm->qm_usr_dqhtable);
198         kmem_free_large(xqm->qm_grp_dqhtable);
199         xqm->qm_usr_dqhtable = NULL;
200         xqm->qm_grp_dqhtable = NULL;
201         xqm->qm_dqhashmask = 0;
202
203         /* frlist cleanup */
204         mutex_lock(&xqm->qm_dqfrlist_lock);
205         list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
206                 xfs_dqlock(dqp);
207 #ifdef QUOTADEBUG
208                 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
209 #endif
210                 list_del_init(&dqp->q_freelist);
211                 xfs_Gqm->qm_dqfrlist_cnt--;
212                 xfs_dqunlock(dqp);
213                 xfs_qm_dqdestroy(dqp);
214         }
215         mutex_unlock(&xqm->qm_dqfrlist_lock);
216         mutex_destroy(&xqm->qm_dqfrlist_lock);
217 #ifdef DEBUG
218         mutex_destroy(&qcheck_lock);
219 #endif
220         kmem_free(xqm);
221 }
222
223 /*
224  * Called at mount time to let XQM know that another file system is
225  * starting quotas. This isn't crucial information as the individual mount
226  * structures are pretty independent, but it helps the XQM keep a
227  * global view of what's going on.
228  */
229 /* ARGSUSED */
230 STATIC int
231 xfs_qm_hold_quotafs_ref(
232         struct xfs_mount *mp)
233 {
234         /*
235          * Need to lock the xfs_Gqm structure for things like this. For example,
236          * the structure could disappear between the entry to this routine and
237          * a HOLD operation if not locked.
238          */
239         mutex_lock(&xfs_Gqm_lock);
240
241         if (!xfs_Gqm) {
242                 xfs_Gqm = xfs_Gqm_init();
243                 if (!xfs_Gqm) {
244                         mutex_unlock(&xfs_Gqm_lock);
245                         return ENOMEM;
246                 }
247         }
248
249         /*
250          * We can keep a list of all filesystems with quotas mounted for
251          * debugging and statistical purposes, but ...
252          * Just take a reference and get out.
253          */
254         xfs_Gqm->qm_nrefs++;
255         mutex_unlock(&xfs_Gqm_lock);
256
257         return 0;
258 }
259
260
261 /*
262  * Release the reference that a filesystem took at mount time,
263  * so that we know when we need to destroy the entire quota manager.
264  */
265 /* ARGSUSED */
266 STATIC void
267 xfs_qm_rele_quotafs_ref(
268         struct xfs_mount *mp)
269 {
270         xfs_dquot_t     *dqp, *n;
271
272         ASSERT(xfs_Gqm);
273         ASSERT(xfs_Gqm->qm_nrefs > 0);
274
275         /*
276          * Go thru the freelist and destroy all inactive dquots.
277          */
278         mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
279
280         list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
281                 xfs_dqlock(dqp);
282                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
283                         ASSERT(dqp->q_mount == NULL);
284                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
285                         ASSERT(list_empty(&dqp->q_hashlist));
286                         ASSERT(list_empty(&dqp->q_mplist));
287                         list_del_init(&dqp->q_freelist);
288                         xfs_Gqm->qm_dqfrlist_cnt--;
289                         xfs_dqunlock(dqp);
290                         xfs_qm_dqdestroy(dqp);
291                 } else {
292                         xfs_dqunlock(dqp);
293                 }
294         }
295         mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
296
297         /*
298          * Destroy the entire XQM. If somebody mounts with quotaon, this'll
299          * be restarted.
300          */
301         mutex_lock(&xfs_Gqm_lock);
302         if (--xfs_Gqm->qm_nrefs == 0) {
303                 xfs_qm_destroy(xfs_Gqm);
304                 xfs_Gqm = NULL;
305         }
306         mutex_unlock(&xfs_Gqm_lock);
307 }
308
309 /*
310  * Just destroy the quotainfo structure.
311  */
312 void
313 xfs_qm_unmount(
314         struct xfs_mount        *mp)
315 {
316         if (mp->m_quotainfo) {
317                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
318                 xfs_qm_destroy_quotainfo(mp);
319         }
320 }
321
322
323 /*
324  * This is called from xfs_mountfs to start quotas and initialize all
325  * necessary data structures like quotainfo.  This is also responsible for
326  * running a quotacheck as necessary.  We are guaranteed that the superblock
327  * is consistently read in at this point.
328  *
329  * If we fail here, the mount will continue with quota turned off. We don't
330  * need to inidicate success or failure at all.
331  */
332 void
333 xfs_qm_mount_quotas(
334         xfs_mount_t     *mp)
335 {
336         int             error = 0;
337         uint            sbf;
338
339         /*
340          * If quotas on realtime volumes is not supported, we disable
341          * quotas immediately.
342          */
343         if (mp->m_sb.sb_rextents) {
344                 cmn_err(CE_NOTE,
345                         "Cannot turn on quotas for realtime filesystem %s",
346                         mp->m_fsname);
347                 mp->m_qflags = 0;
348                 goto write_changes;
349         }
350
351         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
352
353         /*
354          * Allocate the quotainfo structure inside the mount struct, and
355          * create quotainode(s), and change/rev superblock if necessary.
356          */
357         error = xfs_qm_init_quotainfo(mp);
358         if (error) {
359                 /*
360                  * We must turn off quotas.
361                  */
362                 ASSERT(mp->m_quotainfo == NULL);
363                 mp->m_qflags = 0;
364                 goto write_changes;
365         }
366         /*
367          * If any of the quotas are not consistent, do a quotacheck.
368          */
369         if (XFS_QM_NEED_QUOTACHECK(mp)) {
370                 error = xfs_qm_quotacheck(mp);
371                 if (error) {
372                         /* Quotacheck failed and disabled quotas. */
373                         return;
374                 }
375         }
376         /* 
377          * If one type of quotas is off, then it will lose its
378          * quotachecked status, since we won't be doing accounting for
379          * that type anymore.
380          */
381         if (!XFS_IS_UQUOTA_ON(mp))
382                 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
383         if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
384                 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
385
386  write_changes:
387         /*
388          * We actually don't have to acquire the m_sb_lock at all.
389          * This can only be called from mount, and that's single threaded. XXX
390          */
391         spin_lock(&mp->m_sb_lock);
392         sbf = mp->m_sb.sb_qflags;
393         mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
394         spin_unlock(&mp->m_sb_lock);
395
396         if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
397                 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
398                         /*
399                          * We could only have been turning quotas off.
400                          * We aren't in very good shape actually because
401                          * the incore structures are convinced that quotas are
402                          * off, but the on disk superblock doesn't know that !
403                          */
404                         ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
405                         xfs_fs_cmn_err(CE_ALERT, mp,
406                                 "XFS mount_quotas: Superblock update failed!");
407                 }
408         }
409
410         if (error) {
411                 xfs_fs_cmn_err(CE_WARN, mp,
412                         "Failed to initialize disk quotas.");
413                 return;
414         }
415
416 #ifdef QUOTADEBUG
417         if (XFS_IS_QUOTA_ON(mp))
418                 xfs_qm_internalqcheck(mp);
419 #endif
420 }
421
422 /*
423  * Called from the vfsops layer.
424  */
425 void
426 xfs_qm_unmount_quotas(
427         xfs_mount_t     *mp)
428 {
429         /*
430          * Release the dquots that root inode, et al might be holding,
431          * before we flush quotas and blow away the quotainfo structure.
432          */
433         ASSERT(mp->m_rootip);
434         xfs_qm_dqdetach(mp->m_rootip);
435         if (mp->m_rbmip)
436                 xfs_qm_dqdetach(mp->m_rbmip);
437         if (mp->m_rsumip)
438                 xfs_qm_dqdetach(mp->m_rsumip);
439
440         /*
441          * Release the quota inodes.
442          */
443         if (mp->m_quotainfo) {
444                 if (mp->m_quotainfo->qi_uquotaip) {
445                         IRELE(mp->m_quotainfo->qi_uquotaip);
446                         mp->m_quotainfo->qi_uquotaip = NULL;
447                 }
448                 if (mp->m_quotainfo->qi_gquotaip) {
449                         IRELE(mp->m_quotainfo->qi_gquotaip);
450                         mp->m_quotainfo->qi_gquotaip = NULL;
451                 }
452         }
453 }
454
455 /*
456  * Flush all dquots of the given file system to disk. The dquots are
457  * _not_ purged from memory here, just their data written to disk.
458  */
459 STATIC int
460 xfs_qm_dqflush_all(
461         struct xfs_mount        *mp,
462         int                     sync_mode)
463 {
464         struct xfs_quotainfo    *q = mp->m_quotainfo;
465         int                     recl;
466         struct xfs_dquot        *dqp;
467         int                     niters;
468         int                     error;
469
470         if (!q)
471                 return 0;
472         niters = 0;
473 again:
474         mutex_lock(&q->qi_dqlist_lock);
475         list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
476                 xfs_dqlock(dqp);
477                 if (! XFS_DQ_IS_DIRTY(dqp)) {
478                         xfs_dqunlock(dqp);
479                         continue;
480                 }
481
482                 /* XXX a sentinel would be better */
483                 recl = q->qi_dqreclaims;
484                 if (!xfs_dqflock_nowait(dqp)) {
485                         /*
486                          * If we can't grab the flush lock then check
487                          * to see if the dquot has been flushed delayed
488                          * write.  If so, grab its buffer and send it
489                          * out immediately.  We'll be able to acquire
490                          * the flush lock when the I/O completes.
491                          */
492                         xfs_qm_dqflock_pushbuf_wait(dqp);
493                 }
494                 /*
495                  * Let go of the mplist lock. We don't want to hold it
496                  * across a disk write.
497                  */
498                 mutex_unlock(&q->qi_dqlist_lock);
499                 error = xfs_qm_dqflush(dqp, sync_mode);
500                 xfs_dqunlock(dqp);
501                 if (error)
502                         return error;
503
504                 mutex_lock(&q->qi_dqlist_lock);
505                 if (recl != q->qi_dqreclaims) {
506                         mutex_unlock(&q->qi_dqlist_lock);
507                         /* XXX restart limit */
508                         goto again;
509                 }
510         }
511
512         mutex_unlock(&q->qi_dqlist_lock);
513         /* return ! busy */
514         return 0;
515 }
516 /*
517  * Release the group dquot pointers the user dquots may be
518  * carrying around as a hint. mplist is locked on entry and exit.
519  */
520 STATIC void
521 xfs_qm_detach_gdquots(
522         struct xfs_mount        *mp)
523 {
524         struct xfs_quotainfo    *q = mp->m_quotainfo;
525         struct xfs_dquot        *dqp, *gdqp;
526         int                     nrecl;
527
528  again:
529         ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
530         list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
531                 xfs_dqlock(dqp);
532                 if ((gdqp = dqp->q_gdquot)) {
533                         xfs_dqlock(gdqp);
534                         dqp->q_gdquot = NULL;
535                 }
536                 xfs_dqunlock(dqp);
537
538                 if (gdqp) {
539                         /*
540                          * Can't hold the mplist lock across a dqput.
541                          * XXXmust convert to marker based iterations here.
542                          */
543                         nrecl = q->qi_dqreclaims;
544                         mutex_unlock(&q->qi_dqlist_lock);
545                         xfs_qm_dqput(gdqp);
546
547                         mutex_lock(&q->qi_dqlist_lock);
548                         if (nrecl != q->qi_dqreclaims)
549                                 goto again;
550                 }
551         }
552 }
553
554 /*
555  * Go through all the incore dquots of this file system and take them
556  * off the mplist and hashlist, if the dquot type matches the dqtype
557  * parameter. This is used when turning off quota accounting for
558  * users and/or groups, as well as when the filesystem is unmounting.
559  */
560 STATIC int
561 xfs_qm_dqpurge_int(
562         struct xfs_mount        *mp,
563         uint                    flags)
564 {
565         struct xfs_quotainfo    *q = mp->m_quotainfo;
566         struct xfs_dquot        *dqp, *n;
567         uint                    dqtype;
568         int                     nrecl;
569         int                     nmisses;
570
571         if (!q)
572                 return 0;
573
574         dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
575         dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
576         dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
577
578         mutex_lock(&q->qi_dqlist_lock);
579
580         /*
581          * In the first pass through all incore dquots of this filesystem,
582          * we release the group dquot pointers the user dquots may be
583          * carrying around as a hint. We need to do this irrespective of
584          * what's being turned off.
585          */
586         xfs_qm_detach_gdquots(mp);
587
588       again:
589         nmisses = 0;
590         ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
591         /*
592          * Try to get rid of all of the unwanted dquots. The idea is to
593          * get them off mplist and hashlist, but leave them on freelist.
594          */
595         list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
596                 /*
597                  * It's OK to look at the type without taking dqlock here.
598                  * We're holding the mplist lock here, and that's needed for
599                  * a dqreclaim.
600                  */
601                 if ((dqp->dq_flags & dqtype) == 0)
602                         continue;
603
604                 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
605                         nrecl = q->qi_dqreclaims;
606                         mutex_unlock(&q->qi_dqlist_lock);
607                         mutex_lock(&dqp->q_hash->qh_lock);
608                         mutex_lock(&q->qi_dqlist_lock);
609
610                         /*
611                          * XXXTheoretically, we can get into a very long
612                          * ping pong game here.
613                          * No one can be adding dquots to the mplist at
614                          * this point, but somebody might be taking things off.
615                          */
616                         if (nrecl != q->qi_dqreclaims) {
617                                 mutex_unlock(&dqp->q_hash->qh_lock);
618                                 goto again;
619                         }
620                 }
621
622                 /*
623                  * Take the dquot off the mplist and hashlist. It may remain on
624                  * freelist in INACTIVE state.
625                  */
626                 nmisses += xfs_qm_dqpurge(dqp);
627         }
628         mutex_unlock(&q->qi_dqlist_lock);
629         return nmisses;
630 }
631
632 int
633 xfs_qm_dqpurge_all(
634         xfs_mount_t     *mp,
635         uint            flags)
636 {
637         int             ndquots;
638
639         /*
640          * Purge the dquot cache.
641          * None of the dquots should really be busy at this point.
642          */
643         if (mp->m_quotainfo) {
644                 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
645                         delay(ndquots * 10);
646                 }
647         }
648         return 0;
649 }
650
651 STATIC int
652 xfs_qm_dqattach_one(
653         xfs_inode_t     *ip,
654         xfs_dqid_t      id,
655         uint            type,
656         uint            doalloc,
657         xfs_dquot_t     *udqhint, /* hint */
658         xfs_dquot_t     **IO_idqpp)
659 {
660         xfs_dquot_t     *dqp;
661         int             error;
662
663         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
664         error = 0;
665
666         /*
667          * See if we already have it in the inode itself. IO_idqpp is
668          * &i_udquot or &i_gdquot. This made the code look weird, but
669          * made the logic a lot simpler.
670          */
671         dqp = *IO_idqpp;
672         if (dqp) {
673                 trace_xfs_dqattach_found(dqp);
674                 return 0;
675         }
676
677         /*
678          * udqhint is the i_udquot field in inode, and is non-NULL only
679          * when the type arg is group/project. Its purpose is to save a
680          * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
681          * the user dquot.
682          */
683         if (udqhint) {
684                 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
685                 xfs_dqlock(udqhint);
686
687                 /*
688                  * No need to take dqlock to look at the id.
689                  *
690                  * The ID can't change until it gets reclaimed, and it won't
691                  * be reclaimed as long as we have a ref from inode and we
692                  * hold the ilock.
693                  */
694                 dqp = udqhint->q_gdquot;
695                 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
696                         xfs_dqlock(dqp);
697                         XFS_DQHOLD(dqp);
698                         ASSERT(*IO_idqpp == NULL);
699                         *IO_idqpp = dqp;
700
701                         xfs_dqunlock(dqp);
702                         xfs_dqunlock(udqhint);
703                         return 0;
704                 }
705
706                 /*
707                  * We can't hold a dquot lock when we call the dqget code.
708                  * We'll deadlock in no time, because of (not conforming to)
709                  * lock ordering - the inodelock comes before any dquot lock,
710                  * and we may drop and reacquire the ilock in xfs_qm_dqget().
711                  */
712                 xfs_dqunlock(udqhint);
713         }
714
715         /*
716          * Find the dquot from somewhere. This bumps the
717          * reference count of dquot and returns it locked.
718          * This can return ENOENT if dquot didn't exist on
719          * disk and we didn't ask it to allocate;
720          * ESRCH if quotas got turned off suddenly.
721          */
722         error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
723         if (error)
724                 return error;
725
726         trace_xfs_dqattach_get(dqp);
727
728         /*
729          * dqget may have dropped and re-acquired the ilock, but it guarantees
730          * that the dquot returned is the one that should go in the inode.
731          */
732         *IO_idqpp = dqp;
733         xfs_dqunlock(dqp);
734         return 0;
735 }
736
737
738 /*
739  * Given a udquot and gdquot, attach a ptr to the group dquot in the
740  * udquot as a hint for future lookups. The idea sounds simple, but the
741  * execution isn't, because the udquot might have a group dquot attached
742  * already and getting rid of that gets us into lock ordering constraints.
743  * The process is complicated more by the fact that the dquots may or may not
744  * be locked on entry.
745  */
746 STATIC void
747 xfs_qm_dqattach_grouphint(
748         xfs_dquot_t     *udq,
749         xfs_dquot_t     *gdq)
750 {
751         xfs_dquot_t     *tmp;
752
753         xfs_dqlock(udq);
754
755         if ((tmp = udq->q_gdquot)) {
756                 if (tmp == gdq) {
757                         xfs_dqunlock(udq);
758                         return;
759                 }
760
761                 udq->q_gdquot = NULL;
762                 /*
763                  * We can't keep any dqlocks when calling dqrele,
764                  * because the freelist lock comes before dqlocks.
765                  */
766                 xfs_dqunlock(udq);
767                 /*
768                  * we took a hard reference once upon a time in dqget,
769                  * so give it back when the udquot no longer points at it
770                  * dqput() does the unlocking of the dquot.
771                  */
772                 xfs_qm_dqrele(tmp);
773
774                 xfs_dqlock(udq);
775                 xfs_dqlock(gdq);
776
777         } else {
778                 ASSERT(XFS_DQ_IS_LOCKED(udq));
779                 xfs_dqlock(gdq);
780         }
781
782         ASSERT(XFS_DQ_IS_LOCKED(udq));
783         ASSERT(XFS_DQ_IS_LOCKED(gdq));
784         /*
785          * Somebody could have attached a gdquot here,
786          * when we dropped the uqlock. If so, just do nothing.
787          */
788         if (udq->q_gdquot == NULL) {
789                 XFS_DQHOLD(gdq);
790                 udq->q_gdquot = gdq;
791         }
792
793         xfs_dqunlock(gdq);
794         xfs_dqunlock(udq);
795 }
796
797
798 /*
799  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
800  * into account.
801  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
802  * Inode may get unlocked and relocked in here, and the caller must deal with
803  * the consequences.
804  */
805 int
806 xfs_qm_dqattach_locked(
807         xfs_inode_t     *ip,
808         uint            flags)
809 {
810         xfs_mount_t     *mp = ip->i_mount;
811         uint            nquotas = 0;
812         int             error = 0;
813
814         if (!XFS_IS_QUOTA_RUNNING(mp) ||
815             !XFS_IS_QUOTA_ON(mp) ||
816             !XFS_NOT_DQATTACHED(mp, ip) ||
817             ip->i_ino == mp->m_sb.sb_uquotino ||
818             ip->i_ino == mp->m_sb.sb_gquotino)
819                 return 0;
820
821         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
822
823         if (XFS_IS_UQUOTA_ON(mp)) {
824                 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
825                                                 flags & XFS_QMOPT_DQALLOC,
826                                                 NULL, &ip->i_udquot);
827                 if (error)
828                         goto done;
829                 nquotas++;
830         }
831
832         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
833         if (XFS_IS_OQUOTA_ON(mp)) {
834                 error = XFS_IS_GQUOTA_ON(mp) ?
835                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
836                                                 flags & XFS_QMOPT_DQALLOC,
837                                                 ip->i_udquot, &ip->i_gdquot) :
838                         xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
839                                                 flags & XFS_QMOPT_DQALLOC,
840                                                 ip->i_udquot, &ip->i_gdquot);
841                 /*
842                  * Don't worry about the udquot that we may have
843                  * attached above. It'll get detached, if not already.
844                  */
845                 if (error)
846                         goto done;
847                 nquotas++;
848         }
849
850         /*
851          * Attach this group quota to the user quota as a hint.
852          * This WON'T, in general, result in a thrash.
853          */
854         if (nquotas == 2) {
855                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
856                 ASSERT(ip->i_udquot);
857                 ASSERT(ip->i_gdquot);
858
859                 /*
860                  * We may or may not have the i_udquot locked at this point,
861                  * but this check is OK since we don't depend on the i_gdquot to
862                  * be accurate 100% all the time. It is just a hint, and this
863                  * will succeed in general.
864                  */
865                 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
866                         goto done;
867                 /*
868                  * Attach i_gdquot to the gdquot hint inside the i_udquot.
869                  */
870                 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
871         }
872
873  done:
874 #ifdef QUOTADEBUG
875         if (! error) {
876                 if (XFS_IS_UQUOTA_ON(mp))
877                         ASSERT(ip->i_udquot);
878                 if (XFS_IS_OQUOTA_ON(mp))
879                         ASSERT(ip->i_gdquot);
880         }
881         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
882 #endif
883         return error;
884 }
885
886 int
887 xfs_qm_dqattach(
888         struct xfs_inode        *ip,
889         uint                    flags)
890 {
891         int                     error;
892
893         xfs_ilock(ip, XFS_ILOCK_EXCL);
894         error = xfs_qm_dqattach_locked(ip, flags);
895         xfs_iunlock(ip, XFS_ILOCK_EXCL);
896
897         return error;
898 }
899
900 /*
901  * Release dquots (and their references) if any.
902  * The inode should be locked EXCL except when this's called by
903  * xfs_ireclaim.
904  */
905 void
906 xfs_qm_dqdetach(
907         xfs_inode_t     *ip)
908 {
909         if (!(ip->i_udquot || ip->i_gdquot))
910                 return;
911
912         trace_xfs_dquot_dqdetach(ip);
913
914         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
915         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
916         if (ip->i_udquot) {
917                 xfs_qm_dqrele(ip->i_udquot);
918                 ip->i_udquot = NULL;
919         }
920         if (ip->i_gdquot) {
921                 xfs_qm_dqrele(ip->i_gdquot);
922                 ip->i_gdquot = NULL;
923         }
924 }
925
926 int
927 xfs_qm_sync(
928         struct xfs_mount        *mp,
929         int                     flags)
930 {
931         struct xfs_quotainfo    *q = mp->m_quotainfo;
932         int                     recl, restarts;
933         struct xfs_dquot        *dqp;
934         int                     error;
935
936         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
937                 return 0;
938
939         restarts = 0;
940
941   again:
942         mutex_lock(&q->qi_dqlist_lock);
943         /*
944          * dqpurge_all() also takes the mplist lock and iterate thru all dquots
945          * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
946          * when we have the mplist lock, we know that dquots will be consistent
947          * as long as we have it locked.
948          */
949         if (!XFS_IS_QUOTA_ON(mp)) {
950                 mutex_unlock(&q->qi_dqlist_lock);
951                 return 0;
952         }
953         ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
954         list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
955                 /*
956                  * If this is vfs_sync calling, then skip the dquots that
957                  * don't 'seem' to be dirty. ie. don't acquire dqlock.
958                  * This is very similar to what xfs_sync does with inodes.
959                  */
960                 if (flags & SYNC_TRYLOCK) {
961                         if (!XFS_DQ_IS_DIRTY(dqp))
962                                 continue;
963                         if (!xfs_qm_dqlock_nowait(dqp))
964                                 continue;
965                 } else {
966                         xfs_dqlock(dqp);
967                 }
968
969                 /*
970                  * Now, find out for sure if this dquot is dirty or not.
971                  */
972                 if (! XFS_DQ_IS_DIRTY(dqp)) {
973                         xfs_dqunlock(dqp);
974                         continue;
975                 }
976
977                 /* XXX a sentinel would be better */
978                 recl = q->qi_dqreclaims;
979                 if (!xfs_dqflock_nowait(dqp)) {
980                         if (flags & SYNC_TRYLOCK) {
981                                 xfs_dqunlock(dqp);
982                                 continue;
983                         }
984                         /*
985                          * If we can't grab the flush lock then if the caller
986                          * really wanted us to give this our best shot, so
987                          * see if we can give a push to the buffer before we wait
988                          * on the flush lock. At this point, we know that
989                          * even though the dquot is being flushed,
990                          * it has (new) dirty data.
991                          */
992                         xfs_qm_dqflock_pushbuf_wait(dqp);
993                 }
994                 /*
995                  * Let go of the mplist lock. We don't want to hold it
996                  * across a disk write
997                  */
998                 mutex_unlock(&q->qi_dqlist_lock);
999                 error = xfs_qm_dqflush(dqp, flags);
1000                 xfs_dqunlock(dqp);
1001                 if (error && XFS_FORCED_SHUTDOWN(mp))
1002                         return 0;       /* Need to prevent umount failure */
1003                 else if (error)
1004                         return error;
1005
1006                 mutex_lock(&q->qi_dqlist_lock);
1007                 if (recl != q->qi_dqreclaims) {
1008                         if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1009                                 break;
1010
1011                         mutex_unlock(&q->qi_dqlist_lock);
1012                         goto again;
1013                 }
1014         }
1015
1016         mutex_unlock(&q->qi_dqlist_lock);
1017         return 0;
1018 }
1019
1020 /*
1021  * The hash chains and the mplist use the same xfs_dqhash structure as
1022  * their list head, but we can take the mplist qh_lock and one of the
1023  * hash qh_locks at the same time without any problem as they aren't
1024  * related.
1025  */
1026 static struct lock_class_key xfs_quota_mplist_class;
1027
1028 /*
1029  * This initializes all the quota information that's kept in the
1030  * mount structure
1031  */
1032 STATIC int
1033 xfs_qm_init_quotainfo(
1034         xfs_mount_t     *mp)
1035 {
1036         xfs_quotainfo_t *qinf;
1037         int             error;
1038         xfs_dquot_t     *dqp;
1039
1040         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1041
1042         /*
1043          * Tell XQM that we exist as soon as possible.
1044          */
1045         if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1046                 return error;
1047         }
1048
1049         qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1050
1051         /*
1052          * See if quotainodes are setup, and if not, allocate them,
1053          * and change the superblock accordingly.
1054          */
1055         if ((error = xfs_qm_init_quotainos(mp))) {
1056                 kmem_free(qinf);
1057                 mp->m_quotainfo = NULL;
1058                 return error;
1059         }
1060
1061         INIT_LIST_HEAD(&qinf->qi_dqlist);
1062         mutex_init(&qinf->qi_dqlist_lock);
1063         lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
1064
1065         qinf->qi_dqreclaims = 0;
1066
1067         /* mutex used to serialize quotaoffs */
1068         mutex_init(&qinf->qi_quotaofflock);
1069
1070         /* Precalc some constants */
1071         qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1072         ASSERT(qinf->qi_dqchunklen);
1073         qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1074         do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1075
1076         mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1077
1078         /*
1079          * We try to get the limits from the superuser's limits fields.
1080          * This is quite hacky, but it is standard quota practice.
1081          * We look at the USR dquot with id == 0 first, but if user quotas
1082          * are not enabled we goto the GRP dquot with id == 0.
1083          * We don't really care to keep separate default limits for user
1084          * and group quotas, at least not at this point.
1085          */
1086         error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1087                              XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
1088                              (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1089                                 XFS_DQ_PROJ),
1090                              XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1091                              &dqp);
1092         if (! error) {
1093                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1094
1095                 /*
1096                  * The warnings and timers set the grace period given to
1097                  * a user or group before he or she can not perform any
1098                  * more writing. If it is zero, a default is used.
1099                  */
1100                 qinf->qi_btimelimit = ddqp->d_btimer ?
1101                         be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
1102                 qinf->qi_itimelimit = ddqp->d_itimer ?
1103                         be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
1104                 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
1105                         be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
1106                 qinf->qi_bwarnlimit = ddqp->d_bwarns ?
1107                         be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
1108                 qinf->qi_iwarnlimit = ddqp->d_iwarns ?
1109                         be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
1110                 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
1111                         be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
1112                 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
1113                 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
1114                 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
1115                 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
1116                 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1117                 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1118  
1119                 /*
1120                  * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1121                  * we don't want this dquot cached. We haven't done a
1122                  * quotacheck yet, and quotacheck doesn't like incore dquots.
1123                  */
1124                 xfs_qm_dqdestroy(dqp);
1125         } else {
1126                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1127                 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1128                 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1129                 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1130                 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1131                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1132         }
1133
1134         return 0;
1135 }
1136
1137
1138 /*
1139  * Gets called when unmounting a filesystem or when all quotas get
1140  * turned off.
1141  * This purges the quota inodes, destroys locks and frees itself.
1142  */
1143 void
1144 xfs_qm_destroy_quotainfo(
1145         xfs_mount_t     *mp)
1146 {
1147         xfs_quotainfo_t *qi;
1148
1149         qi = mp->m_quotainfo;
1150         ASSERT(qi != NULL);
1151         ASSERT(xfs_Gqm != NULL);
1152
1153         /*
1154          * Release the reference that XQM kept, so that we know
1155          * when the XQM structure should be freed. We cannot assume
1156          * that xfs_Gqm is non-null after this point.
1157          */
1158         xfs_qm_rele_quotafs_ref(mp);
1159
1160         ASSERT(list_empty(&qi->qi_dqlist));
1161         mutex_destroy(&qi->qi_dqlist_lock);
1162
1163         if (qi->qi_uquotaip) {
1164                 IRELE(qi->qi_uquotaip);
1165                 qi->qi_uquotaip = NULL; /* paranoia */
1166         }
1167         if (qi->qi_gquotaip) {
1168                 IRELE(qi->qi_gquotaip);
1169                 qi->qi_gquotaip = NULL;
1170         }
1171         mutex_destroy(&qi->qi_quotaofflock);
1172         kmem_free(qi);
1173         mp->m_quotainfo = NULL;
1174 }
1175
1176
1177
1178 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1179
1180 /* ARGSUSED */
1181 STATIC void
1182 xfs_qm_list_init(
1183         xfs_dqlist_t    *list,
1184         char            *str,
1185         int             n)
1186 {
1187         mutex_init(&list->qh_lock);
1188         INIT_LIST_HEAD(&list->qh_list);
1189         list->qh_version = 0;
1190         list->qh_nelems = 0;
1191 }
1192
1193 STATIC void
1194 xfs_qm_list_destroy(
1195         xfs_dqlist_t    *list)
1196 {
1197         mutex_destroy(&(list->qh_lock));
1198 }
1199
1200 /*
1201  * Create an inode and return with a reference already taken, but unlocked
1202  * This is how we create quota inodes
1203  */
1204 STATIC int
1205 xfs_qm_qino_alloc(
1206         xfs_mount_t     *mp,
1207         xfs_inode_t     **ip,
1208         __int64_t       sbfields,
1209         uint            flags)
1210 {
1211         xfs_trans_t     *tp;
1212         int             error;
1213         int             committed;
1214
1215         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1216         if ((error = xfs_trans_reserve(tp,
1217                                       XFS_QM_QINOCREATE_SPACE_RES(mp),
1218                                       XFS_CREATE_LOG_RES(mp), 0,
1219                                       XFS_TRANS_PERM_LOG_RES,
1220                                       XFS_CREATE_LOG_COUNT))) {
1221                 xfs_trans_cancel(tp, 0);
1222                 return error;
1223         }
1224
1225         error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
1226         if (error) {
1227                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1228                                  XFS_TRANS_ABORT);
1229                 return error;
1230         }
1231
1232         /*
1233          * Keep an extra reference to this quota inode. This inode is
1234          * locked exclusively and joined to the transaction already.
1235          */
1236         ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1237         IHOLD(*ip);
1238
1239         /*
1240          * Make the changes in the superblock, and log those too.
1241          * sbfields arg may contain fields other than *QUOTINO;
1242          * VERSIONNUM for example.
1243          */
1244         spin_lock(&mp->m_sb_lock);
1245         if (flags & XFS_QMOPT_SBVERSION) {
1246                 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1247                 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1248                                    XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1249                        (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1250                         XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1251
1252                 xfs_sb_version_addquota(&mp->m_sb);
1253                 mp->m_sb.sb_uquotino = NULLFSINO;
1254                 mp->m_sb.sb_gquotino = NULLFSINO;
1255
1256                 /* qflags will get updated _after_ quotacheck */
1257                 mp->m_sb.sb_qflags = 0;
1258         }
1259         if (flags & XFS_QMOPT_UQUOTA)
1260                 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1261         else
1262                 mp->m_sb.sb_gquotino = (*ip)->i_ino;
1263         spin_unlock(&mp->m_sb_lock);
1264         xfs_mod_sb(tp, sbfields);
1265
1266         if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1267                 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1268                 return error;
1269         }
1270         return 0;
1271 }
1272
1273
1274 STATIC void
1275 xfs_qm_reset_dqcounts(
1276         xfs_mount_t     *mp,
1277         xfs_buf_t       *bp,
1278         xfs_dqid_t      id,
1279         uint            type)
1280 {
1281         xfs_disk_dquot_t        *ddq;
1282         int                     j;
1283
1284         trace_xfs_reset_dqcounts(bp, _RET_IP_);
1285
1286         /*
1287          * Reset all counters and timers. They'll be
1288          * started afresh by xfs_qm_quotacheck.
1289          */
1290 #ifdef DEBUG
1291         j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1292         do_div(j, sizeof(xfs_dqblk_t));
1293         ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
1294 #endif
1295         ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1296         for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
1297                 /*
1298                  * Do a sanity check, and if needed, repair the dqblk. Don't
1299                  * output any warnings because it's perfectly possible to
1300                  * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1301                  */
1302                 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1303                                       "xfs_quotacheck");
1304                 ddq->d_bcount = 0;
1305                 ddq->d_icount = 0;
1306                 ddq->d_rtbcount = 0;
1307                 ddq->d_btimer = 0;
1308                 ddq->d_itimer = 0;
1309                 ddq->d_rtbtimer = 0;
1310                 ddq->d_bwarns = 0;
1311                 ddq->d_iwarns = 0;
1312                 ddq->d_rtbwarns = 0;
1313                 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1314         }
1315 }
1316
1317 STATIC int
1318 xfs_qm_dqiter_bufs(
1319         xfs_mount_t     *mp,
1320         xfs_dqid_t      firstid,
1321         xfs_fsblock_t   bno,
1322         xfs_filblks_t   blkcnt,
1323         uint            flags)
1324 {
1325         xfs_buf_t       *bp;
1326         int             error;
1327         int             notcommitted;
1328         int             incr;
1329         int             type;
1330
1331         ASSERT(blkcnt > 0);
1332         notcommitted = 0;
1333         incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1334                 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1335         type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1336                 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1337         error = 0;
1338
1339         /*
1340          * Blkcnt arg can be a very big number, and might even be
1341          * larger than the log itself. So, we have to break it up into
1342          * manageable-sized transactions.
1343          * Note that we don't start a permanent transaction here; we might
1344          * not be able to get a log reservation for the whole thing up front,
1345          * and we don't really care to either, because we just discard
1346          * everything if we were to crash in the middle of this loop.
1347          */
1348         while (blkcnt--) {
1349                 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1350                               XFS_FSB_TO_DADDR(mp, bno),
1351                               mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1352                 if (error)
1353                         break;
1354
1355                 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1356                 xfs_bdwrite(mp, bp);
1357                 /*
1358                  * goto the next block.
1359                  */
1360                 bno++;
1361                 firstid += mp->m_quotainfo->qi_dqperchunk;
1362         }
1363         return error;
1364 }
1365
1366 /*
1367  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1368  * caller supplied function for every chunk of dquots that we find.
1369  */
1370 STATIC int
1371 xfs_qm_dqiterate(
1372         xfs_mount_t     *mp,
1373         xfs_inode_t     *qip,
1374         uint            flags)
1375 {
1376         xfs_bmbt_irec_t         *map;
1377         int                     i, nmaps;       /* number of map entries */
1378         int                     error;          /* return value */
1379         xfs_fileoff_t           lblkno;
1380         xfs_filblks_t           maxlblkcnt;
1381         xfs_dqid_t              firstid;
1382         xfs_fsblock_t           rablkno;
1383         xfs_filblks_t           rablkcnt;
1384
1385         error = 0;
1386         /*
1387          * This looks racy, but we can't keep an inode lock across a
1388          * trans_reserve. But, this gets called during quotacheck, and that
1389          * happens only at mount time which is single threaded.
1390          */
1391         if (qip->i_d.di_nblocks == 0)
1392                 return 0;
1393
1394         map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1395
1396         lblkno = 0;
1397         maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1398         do {
1399                 nmaps = XFS_DQITER_MAP_SIZE;
1400                 /*
1401                  * We aren't changing the inode itself. Just changing
1402                  * some of its data. No new blocks are added here, and
1403                  * the inode is never added to the transaction.
1404                  */
1405                 xfs_ilock(qip, XFS_ILOCK_SHARED);
1406                 error = xfs_bmapi(NULL, qip, lblkno,
1407                                   maxlblkcnt - lblkno,
1408                                   XFS_BMAPI_METADATA,
1409                                   NULL,
1410                                   0, map, &nmaps, NULL);
1411                 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1412                 if (error)
1413                         break;
1414
1415                 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1416                 for (i = 0; i < nmaps; i++) {
1417                         ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1418                         ASSERT(map[i].br_blockcount);
1419
1420
1421                         lblkno += map[i].br_blockcount;
1422
1423                         if (map[i].br_startblock == HOLESTARTBLOCK)
1424                                 continue;
1425
1426                         firstid = (xfs_dqid_t) map[i].br_startoff *
1427                                 mp->m_quotainfo->qi_dqperchunk;
1428                         /*
1429                          * Do a read-ahead on the next extent.
1430                          */
1431                         if ((i+1 < nmaps) &&
1432                             (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1433                                 rablkcnt =  map[i+1].br_blockcount;
1434                                 rablkno = map[i+1].br_startblock;
1435                                 while (rablkcnt--) {
1436                                         xfs_buf_readahead(mp->m_ddev_targp,
1437                                                XFS_FSB_TO_DADDR(mp, rablkno),
1438                                                mp->m_quotainfo->qi_dqchunklen);
1439                                         rablkno++;
1440                                 }
1441                         }
1442                         /*
1443                          * Iterate thru all the blks in the extent and
1444                          * reset the counters of all the dquots inside them.
1445                          */
1446                         if ((error = xfs_qm_dqiter_bufs(mp,
1447                                                        firstid,
1448                                                        map[i].br_startblock,
1449                                                        map[i].br_blockcount,
1450                                                        flags))) {
1451                                 break;
1452                         }
1453                 }
1454
1455                 if (error)
1456                         break;
1457         } while (nmaps > 0);
1458
1459         kmem_free(map);
1460
1461         return error;
1462 }
1463
1464 /*
1465  * Called by dqusage_adjust in doing a quotacheck.
1466  *
1467  * Given the inode, and a dquot id this updates both the incore dqout as well
1468  * as the buffer copy. This is so that once the quotacheck is done, we can
1469  * just log all the buffers, as opposed to logging numerous updates to
1470  * individual dquots.
1471  */
1472 STATIC int
1473 xfs_qm_quotacheck_dqadjust(
1474         struct xfs_inode        *ip,
1475         xfs_dqid_t              id,
1476         uint                    type,
1477         xfs_qcnt_t              nblks,
1478         xfs_qcnt_t              rtblks)
1479 {
1480         struct xfs_mount        *mp = ip->i_mount;
1481         struct xfs_dquot        *dqp;
1482         int                     error;
1483
1484         error = xfs_qm_dqget(mp, ip, id, type,
1485                              XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
1486         if (error) {
1487                 /*
1488                  * Shouldn't be able to turn off quotas here.
1489                  */
1490                 ASSERT(error != ESRCH);
1491                 ASSERT(error != ENOENT);
1492                 return error;
1493         }
1494
1495         trace_xfs_dqadjust(dqp);
1496
1497         /*
1498          * Adjust the inode count and the block count to reflect this inode's
1499          * resource usage.
1500          */
1501         be64_add_cpu(&dqp->q_core.d_icount, 1);
1502         dqp->q_res_icount++;
1503         if (nblks) {
1504                 be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1505                 dqp->q_res_bcount += nblks;
1506         }
1507         if (rtblks) {
1508                 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1509                 dqp->q_res_rtbcount += rtblks;
1510         }
1511
1512         /*
1513          * Set default limits, adjust timers (since we changed usages)
1514          *
1515          * There are no timers for the default values set in the root dquot.
1516          */
1517         if (dqp->q_core.d_id) {
1518                 xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
1519                 xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1520         }
1521
1522         dqp->dq_flags |= XFS_DQ_DIRTY;
1523         xfs_qm_dqput(dqp);
1524         return 0;
1525 }
1526
1527 STATIC int
1528 xfs_qm_get_rtblks(
1529         xfs_inode_t     *ip,
1530         xfs_qcnt_t      *O_rtblks)
1531 {
1532         xfs_filblks_t   rtblks;                 /* total rt blks */
1533         xfs_extnum_t    idx;                    /* extent record index */
1534         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1535         xfs_extnum_t    nextents;               /* number of extent entries */
1536         int             error;
1537
1538         ASSERT(XFS_IS_REALTIME_INODE(ip));
1539         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1540         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1541                 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1542                         return error;
1543         }
1544         rtblks = 0;
1545         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1546         for (idx = 0; idx < nextents; idx++)
1547                 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1548         *O_rtblks = (xfs_qcnt_t)rtblks;
1549         return 0;
1550 }
1551
1552 /*
1553  * callback routine supplied to bulkstat(). Given an inumber, find its
1554  * dquots and update them to account for resources taken by that inode.
1555  */
1556 /* ARGSUSED */
1557 STATIC int
1558 xfs_qm_dqusage_adjust(
1559         xfs_mount_t     *mp,            /* mount point for filesystem */
1560         xfs_ino_t       ino,            /* inode number to get data for */
1561         void            __user *buffer, /* not used */
1562         int             ubsize,         /* not used */
1563         int             *ubused,        /* not used */
1564         int             *res)           /* result code value */
1565 {
1566         xfs_inode_t     *ip;
1567         xfs_qcnt_t      nblks, rtblks = 0;
1568         int             error;
1569
1570         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1571
1572         /*
1573          * rootino must have its resources accounted for, not so with the quota
1574          * inodes.
1575          */
1576         if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1577                 *res = BULKSTAT_RV_NOTHING;
1578                 return XFS_ERROR(EINVAL);
1579         }
1580
1581         /*
1582          * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1583          * interface expects the inode to be exclusively locked because that's
1584          * the case in all other instances. It's OK that we do this because
1585          * quotacheck is done only at mount time.
1586          */
1587         error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
1588         if (error) {
1589                 *res = BULKSTAT_RV_NOTHING;
1590                 return error;
1591         }
1592
1593         ASSERT(ip->i_delayed_blks == 0);
1594
1595         if (XFS_IS_REALTIME_INODE(ip)) {
1596                 /*
1597                  * Walk thru the extent list and count the realtime blocks.
1598                  */
1599                 error = xfs_qm_get_rtblks(ip, &rtblks);
1600                 if (error)
1601                         goto error0;
1602         }
1603
1604         nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1605
1606         /*
1607          * Add the (disk blocks and inode) resources occupied by this
1608          * inode to its dquots. We do this adjustment in the incore dquot,
1609          * and also copy the changes to its buffer.
1610          * We don't care about putting these changes in a transaction
1611          * envelope because if we crash in the middle of a 'quotacheck'
1612          * we have to start from the beginning anyway.
1613          * Once we're done, we'll log all the dquot bufs.
1614          *
1615          * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1616          * and quotaoffs don't race. (Quotachecks happen at mount time only).
1617          */
1618         if (XFS_IS_UQUOTA_ON(mp)) {
1619                 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
1620                                                    XFS_DQ_USER, nblks, rtblks);
1621                 if (error)
1622                         goto error0;
1623         }
1624
1625         if (XFS_IS_GQUOTA_ON(mp)) {
1626                 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
1627                                                    XFS_DQ_GROUP, nblks, rtblks);
1628                 if (error)
1629                         goto error0;
1630         }
1631
1632         if (XFS_IS_PQUOTA_ON(mp)) {
1633                 error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
1634                                                    XFS_DQ_PROJ, nblks, rtblks);
1635                 if (error)
1636                         goto error0;
1637         }
1638
1639         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1640         IRELE(ip);
1641         *res = BULKSTAT_RV_DIDONE;
1642         return 0;
1643
1644 error0:
1645         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1646         IRELE(ip);
1647         *res = BULKSTAT_RV_GIVEUP;
1648         return error;
1649 }
1650
1651 /*
1652  * Walk thru all the filesystem inodes and construct a consistent view
1653  * of the disk quota world. If the quotacheck fails, disable quotas.
1654  */
1655 int
1656 xfs_qm_quotacheck(
1657         xfs_mount_t     *mp)
1658 {
1659         int             done, count, error;
1660         xfs_ino_t       lastino;
1661         size_t          structsz;
1662         xfs_inode_t     *uip, *gip;
1663         uint            flags;
1664
1665         count = INT_MAX;
1666         structsz = 1;
1667         lastino = 0;
1668         flags = 0;
1669
1670         ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1671         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1672
1673         /*
1674          * There should be no cached dquots. The (simplistic) quotacheck
1675          * algorithm doesn't like that.
1676          */
1677         ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1678
1679         cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1680
1681         /*
1682          * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1683          * their counters to zero. We need a clean slate.
1684          * We don't log our changes till later.
1685          */
1686         uip = mp->m_quotainfo->qi_uquotaip;
1687         if (uip) {
1688                 error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
1689                 if (error)
1690                         goto error_return;
1691                 flags |= XFS_UQUOTA_CHKD;
1692         }
1693
1694         gip = mp->m_quotainfo->qi_gquotaip;
1695         if (gip) {
1696                 error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1697                                         XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1698                 if (error)
1699                         goto error_return;
1700                 flags |= XFS_OQUOTA_CHKD;
1701         }
1702
1703         do {
1704                 /*
1705                  * Iterate thru all the inodes in the file system,
1706                  * adjusting the corresponding dquot counters in core.
1707                  */
1708                 error = xfs_bulkstat(mp, &lastino, &count,
1709                                      xfs_qm_dqusage_adjust,
1710                                      structsz, NULL, &done);
1711                 if (error)
1712                         break;
1713
1714         } while (!done);
1715
1716         /*
1717          * We've made all the changes that we need to make incore.
1718          * Flush them down to disk buffers if everything was updated
1719          * successfully.
1720          */
1721         if (!error)
1722                 error = xfs_qm_dqflush_all(mp, 0);
1723
1724         /*
1725          * We can get this error if we couldn't do a dquot allocation inside
1726          * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1727          * dirty dquots that might be cached, we just want to get rid of them
1728          * and turn quotaoff. The dquots won't be attached to any of the inodes
1729          * at this point (because we intentionally didn't in dqget_noattach).
1730          */
1731         if (error) {
1732                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1733                 goto error_return;
1734         }
1735
1736         /*
1737          * We didn't log anything, because if we crashed, we'll have to
1738          * start the quotacheck from scratch anyway. However, we must make
1739          * sure that our dquot changes are secure before we put the
1740          * quotacheck'd stamp on the superblock. So, here we do a synchronous
1741          * flush.
1742          */
1743         XFS_bflush(mp->m_ddev_targp);
1744
1745         /*
1746          * If one type of quotas is off, then it will lose its
1747          * quotachecked status, since we won't be doing accounting for
1748          * that type anymore.
1749          */
1750         mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1751         mp->m_qflags |= flags;
1752
1753         xfs_qm_dquot_list_print(mp);
1754
1755  error_return:
1756         if (error) {
1757                 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1758                         "Disabling quotas.",
1759                         mp->m_fsname, error);
1760                 /*
1761                  * We must turn off quotas.
1762                  */
1763                 ASSERT(mp->m_quotainfo != NULL);
1764                 ASSERT(xfs_Gqm != NULL);
1765                 xfs_qm_destroy_quotainfo(mp);
1766                 if (xfs_mount_reset_sbqflags(mp)) {
1767                         cmn_err(CE_WARN, "XFS quotacheck %s: "
1768                                 "Failed to reset quota flags.", mp->m_fsname);
1769                 }
1770         } else {
1771                 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1772         }
1773         return (error);
1774 }
1775
1776 /*
1777  * This is called after the superblock has been read in and we're ready to
1778  * iget the quota inodes.
1779  */
1780 STATIC int
1781 xfs_qm_init_quotainos(
1782         xfs_mount_t     *mp)
1783 {
1784         xfs_inode_t     *uip, *gip;
1785         int             error;
1786         __int64_t       sbflags;
1787         uint            flags;
1788
1789         ASSERT(mp->m_quotainfo);
1790         uip = gip = NULL;
1791         sbflags = 0;
1792         flags = 0;
1793
1794         /*
1795          * Get the uquota and gquota inodes
1796          */
1797         if (xfs_sb_version_hasquota(&mp->m_sb)) {
1798                 if (XFS_IS_UQUOTA_ON(mp) &&
1799                     mp->m_sb.sb_uquotino != NULLFSINO) {
1800                         ASSERT(mp->m_sb.sb_uquotino > 0);
1801                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1802                                              0, 0, &uip)))
1803                                 return XFS_ERROR(error);
1804                 }
1805                 if (XFS_IS_OQUOTA_ON(mp) &&
1806                     mp->m_sb.sb_gquotino != NULLFSINO) {
1807                         ASSERT(mp->m_sb.sb_gquotino > 0);
1808                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1809                                              0, 0, &gip))) {
1810                                 if (uip)
1811                                         IRELE(uip);
1812                                 return XFS_ERROR(error);
1813                         }
1814                 }
1815         } else {
1816                 flags |= XFS_QMOPT_SBVERSION;
1817                 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1818                             XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1819         }
1820
1821         /*
1822          * Create the two inodes, if they don't exist already. The changes
1823          * made above will get added to a transaction and logged in one of
1824          * the qino_alloc calls below.  If the device is readonly,
1825          * temporarily switch to read-write to do this.
1826          */
1827         if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1828                 if ((error = xfs_qm_qino_alloc(mp, &uip,
1829                                               sbflags | XFS_SB_UQUOTINO,
1830                                               flags | XFS_QMOPT_UQUOTA)))
1831                         return XFS_ERROR(error);
1832
1833                 flags &= ~XFS_QMOPT_SBVERSION;
1834         }
1835         if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1836                 flags |= (XFS_IS_GQUOTA_ON(mp) ?
1837                                 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1838                 error = xfs_qm_qino_alloc(mp, &gip,
1839                                           sbflags | XFS_SB_GQUOTINO, flags);
1840                 if (error) {
1841                         if (uip)
1842                                 IRELE(uip);
1843
1844                         return XFS_ERROR(error);
1845                 }
1846         }
1847
1848         mp->m_quotainfo->qi_uquotaip = uip;
1849         mp->m_quotainfo->qi_gquotaip = gip;
1850
1851         return 0;
1852 }
1853
1854
1855
1856 /*
1857  * Just pop the least recently used dquot off the freelist and
1858  * recycle it. The returned dquot is locked.
1859  */
1860 STATIC xfs_dquot_t *
1861 xfs_qm_dqreclaim_one(void)
1862 {
1863         xfs_dquot_t     *dqpout;
1864         xfs_dquot_t     *dqp;
1865         int             restarts;
1866         int             startagain;
1867
1868         restarts = 0;
1869         dqpout = NULL;
1870
1871         /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1872 again:
1873         startagain = 0;
1874         mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1875
1876         list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1877                 struct xfs_mount *mp = dqp->q_mount;
1878                 xfs_dqlock(dqp);
1879
1880                 /*
1881                  * We are racing with dqlookup here. Naturally we don't
1882                  * want to reclaim a dquot that lookup wants. We release the
1883                  * freelist lock and start over, so that lookup will grab
1884                  * both the dquot and the freelistlock.
1885                  */
1886                 if (dqp->dq_flags & XFS_DQ_WANT) {
1887                         ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1888
1889                         trace_xfs_dqreclaim_want(dqp);
1890                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1891                         restarts++;
1892                         startagain = 1;
1893                         goto dqunlock;
1894                 }
1895
1896                 /*
1897                  * If the dquot is inactive, we are assured that it is
1898                  * not on the mplist or the hashlist, and that makes our
1899                  * life easier.
1900                  */
1901                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1902                         ASSERT(mp == NULL);
1903                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1904                         ASSERT(list_empty(&dqp->q_hashlist));
1905                         ASSERT(list_empty(&dqp->q_mplist));
1906                         list_del_init(&dqp->q_freelist);
1907                         xfs_Gqm->qm_dqfrlist_cnt--;
1908                         dqpout = dqp;
1909                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1910                         goto dqunlock;
1911                 }
1912
1913                 ASSERT(dqp->q_hash);
1914                 ASSERT(!list_empty(&dqp->q_mplist));
1915
1916                 /*
1917                  * Try to grab the flush lock. If this dquot is in the process
1918                  * of getting flushed to disk, we don't want to reclaim it.
1919                  */
1920                 if (!xfs_dqflock_nowait(dqp))
1921                         goto dqunlock;
1922
1923                 /*
1924                  * We have the flush lock so we know that this is not in the
1925                  * process of being flushed. So, if this is dirty, flush it
1926                  * DELWRI so that we don't get a freelist infested with
1927                  * dirty dquots.
1928                  */
1929                 if (XFS_DQ_IS_DIRTY(dqp)) {
1930                         int     error;
1931
1932                         trace_xfs_dqreclaim_dirty(dqp);
1933
1934                         /*
1935                          * We flush it delayed write, so don't bother
1936                          * releasing the freelist lock.
1937                          */
1938                         error = xfs_qm_dqflush(dqp, 0);
1939                         if (error) {
1940                                 xfs_fs_cmn_err(CE_WARN, mp,
1941                         "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
1942                         }
1943                         goto dqunlock;
1944                 }
1945
1946                 /*
1947                  * We're trying to get the hashlock out of order. This races
1948                  * with dqlookup; so, we giveup and goto the next dquot if
1949                  * we couldn't get the hashlock. This way, we won't starve
1950                  * a dqlookup process that holds the hashlock that is
1951                  * waiting for the freelist lock.
1952                  */
1953                 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
1954                         restarts++;
1955                         goto dqfunlock;
1956                 }
1957
1958                 /*
1959                  * This races with dquot allocation code as well as dqflush_all
1960                  * and reclaim code. So, if we failed to grab the mplist lock,
1961                  * giveup everything and start over.
1962                  */
1963                 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
1964                         restarts++;
1965                         startagain = 1;
1966                         goto qhunlock;
1967                 }
1968
1969                 ASSERT(dqp->q_nrefs == 0);
1970                 list_del_init(&dqp->q_mplist);
1971                 mp->m_quotainfo->qi_dquots--;
1972                 mp->m_quotainfo->qi_dqreclaims++;
1973                 list_del_init(&dqp->q_hashlist);
1974                 dqp->q_hash->qh_version++;
1975                 list_del_init(&dqp->q_freelist);
1976                 xfs_Gqm->qm_dqfrlist_cnt--;
1977                 dqpout = dqp;
1978                 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1979 qhunlock:
1980                 mutex_unlock(&dqp->q_hash->qh_lock);
1981 dqfunlock:
1982                 xfs_dqfunlock(dqp);
1983 dqunlock:
1984                 xfs_dqunlock(dqp);
1985                 if (dqpout)
1986                         break;
1987                 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1988                         break;
1989                 if (startagain) {
1990                         mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1991                         goto again;
1992                 }
1993         }
1994         mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1995         return dqpout;
1996 }
1997
1998 /*
1999  * Traverse the freelist of dquots and attempt to reclaim a maximum of
2000  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
2001  * favor the lookup function ...
2002  */
2003 STATIC int
2004 xfs_qm_shake_freelist(
2005         int     howmany)
2006 {
2007         int             nreclaimed = 0;
2008         xfs_dquot_t     *dqp;
2009
2010         if (howmany <= 0)
2011                 return 0;
2012
2013         while (nreclaimed < howmany) {
2014                 dqp = xfs_qm_dqreclaim_one();
2015                 if (!dqp)
2016                         return nreclaimed;
2017                 xfs_qm_dqdestroy(dqp);
2018                 nreclaimed++;
2019         }
2020         return nreclaimed;
2021 }
2022
2023 /*
2024  * The kmem_shake interface is invoked when memory is running low.
2025  */
2026 /* ARGSUSED */
2027 STATIC int
2028 xfs_qm_shake(
2029         struct shrinker *shrink,
2030         int             nr_to_scan,
2031         gfp_t           gfp_mask)
2032 {
2033         int     ndqused, nfree, n;
2034
2035         if (!kmem_shake_allow(gfp_mask))
2036                 return 0;
2037         if (!xfs_Gqm)
2038                 return 0;
2039
2040         nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
2041         /* incore dquots in all f/s's */
2042         ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2043
2044         ASSERT(ndqused >= 0);
2045
2046         if (nfree <= ndqused && nfree < ndquot)
2047                 return 0;
2048
2049         ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2050         n = nfree - ndqused - ndquot;           /* # over target */
2051
2052         return xfs_qm_shake_freelist(MAX(nfree, n));
2053 }
2054
2055
2056 /*------------------------------------------------------------------*/
2057
2058 /*
2059  * Return a new incore dquot. Depending on the number of
2060  * dquots in the system, we either allocate a new one on the kernel heap,
2061  * or reclaim a free one.
2062  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2063  * to reclaim an existing one from the freelist.
2064  */
2065 boolean_t
2066 xfs_qm_dqalloc_incore(
2067         xfs_dquot_t **O_dqpp)
2068 {
2069         xfs_dquot_t     *dqp;
2070
2071         /*
2072          * Check against high water mark to see if we want to pop
2073          * a nincompoop dquot off the freelist.
2074          */
2075         if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2076                 /*
2077                  * Try to recycle a dquot from the freelist.
2078                  */
2079                 if ((dqp = xfs_qm_dqreclaim_one())) {
2080                         XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2081                         /*
2082                          * Just zero the core here. The rest will get
2083                          * reinitialized by caller. XXX we shouldn't even
2084                          * do this zero ...
2085                          */
2086                         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2087                         *O_dqpp = dqp;
2088                         return B_FALSE;
2089                 }
2090                 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2091         }
2092
2093         /*
2094          * Allocate a brand new dquot on the kernel heap and return it
2095          * to the caller to initialize.
2096          */
2097         ASSERT(xfs_Gqm->qm_dqzone != NULL);
2098         *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2099         atomic_inc(&xfs_Gqm->qm_totaldquots);
2100
2101         return B_TRUE;
2102 }
2103
2104
2105 /*
2106  * Start a transaction and write the incore superblock changes to
2107  * disk. flags parameter indicates which fields have changed.
2108  */
2109 int
2110 xfs_qm_write_sb_changes(
2111         xfs_mount_t     *mp,
2112         __int64_t       flags)
2113 {
2114         xfs_trans_t     *tp;
2115         int             error;
2116
2117 #ifdef QUOTADEBUG
2118         cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2119 #endif
2120         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2121         if ((error = xfs_trans_reserve(tp, 0,
2122                                       mp->m_sb.sb_sectsize + 128, 0,
2123                                       0,
2124                                       XFS_DEFAULT_LOG_COUNT))) {
2125                 xfs_trans_cancel(tp, 0);
2126                 return error;
2127         }
2128
2129         xfs_mod_sb(tp, flags);
2130         error = xfs_trans_commit(tp, 0);
2131
2132         return error;
2133 }
2134
2135
2136 /* --------------- utility functions for vnodeops ---------------- */
2137
2138
2139 /*
2140  * Given an inode, a uid, gid and prid make sure that we have
2141  * allocated relevant dquot(s) on disk, and that we won't exceed inode
2142  * quotas by creating this file.
2143  * This also attaches dquot(s) to the given inode after locking it,
2144  * and returns the dquots corresponding to the uid and/or gid.
2145  *
2146  * in   : inode (unlocked)
2147  * out  : udquot, gdquot with references taken and unlocked
2148  */
2149 int
2150 xfs_qm_vop_dqalloc(
2151         struct xfs_inode        *ip,
2152         uid_t                   uid,
2153         gid_t                   gid,
2154         prid_t                  prid,
2155         uint                    flags,
2156         struct xfs_dquot        **O_udqpp,
2157         struct xfs_dquot        **O_gdqpp)
2158 {
2159         struct xfs_mount        *mp = ip->i_mount;
2160         struct xfs_dquot        *uq, *gq;
2161         int                     error;
2162         uint                    lockflags;
2163
2164         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2165                 return 0;
2166
2167         lockflags = XFS_ILOCK_EXCL;
2168         xfs_ilock(ip, lockflags);
2169
2170         if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
2171                 gid = ip->i_d.di_gid;
2172
2173         /*
2174          * Attach the dquot(s) to this inode, doing a dquot allocation
2175          * if necessary. The dquot(s) will not be locked.
2176          */
2177         if (XFS_NOT_DQATTACHED(mp, ip)) {
2178                 error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
2179                 if (error) {
2180                         xfs_iunlock(ip, lockflags);
2181                         return error;
2182                 }
2183         }
2184
2185         uq = gq = NULL;
2186         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2187                 if (ip->i_d.di_uid != uid) {
2188                         /*
2189                          * What we need is the dquot that has this uid, and
2190                          * if we send the inode to dqget, the uid of the inode
2191                          * takes priority over what's sent in the uid argument.
2192                          * We must unlock inode here before calling dqget if
2193                          * we're not sending the inode, because otherwise
2194                          * we'll deadlock by doing trans_reserve while
2195                          * holding ilock.
2196                          */
2197                         xfs_iunlock(ip, lockflags);
2198                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2199                                                  XFS_DQ_USER,
2200                                                  XFS_QMOPT_DQALLOC |
2201                                                  XFS_QMOPT_DOWARN,
2202                                                  &uq))) {
2203                                 ASSERT(error != ENOENT);
2204                                 return error;
2205                         }
2206                         /*
2207                          * Get the ilock in the right order.
2208                          */
2209                         xfs_dqunlock(uq);
2210                         lockflags = XFS_ILOCK_SHARED;
2211                         xfs_ilock(ip, lockflags);
2212                 } else {
2213                         /*
2214                          * Take an extra reference, because we'll return
2215                          * this to caller
2216                          */
2217                         ASSERT(ip->i_udquot);
2218                         uq = ip->i_udquot;
2219                         xfs_dqlock(uq);
2220                         XFS_DQHOLD(uq);
2221                         xfs_dqunlock(uq);
2222                 }
2223         }
2224         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2225                 if (ip->i_d.di_gid != gid) {
2226                         xfs_iunlock(ip, lockflags);
2227                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2228                                                  XFS_DQ_GROUP,
2229                                                  XFS_QMOPT_DQALLOC |
2230                                                  XFS_QMOPT_DOWARN,
2231                                                  &gq))) {
2232                                 if (uq)
2233                                         xfs_qm_dqrele(uq);
2234                                 ASSERT(error != ENOENT);
2235                                 return error;
2236                         }
2237                         xfs_dqunlock(gq);
2238                         lockflags = XFS_ILOCK_SHARED;
2239                         xfs_ilock(ip, lockflags);
2240                 } else {
2241                         ASSERT(ip->i_gdquot);
2242                         gq = ip->i_gdquot;
2243                         xfs_dqlock(gq);
2244                         XFS_DQHOLD(gq);
2245                         xfs_dqunlock(gq);
2246                 }
2247         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2248                 if (xfs_get_projid(ip) != prid) {
2249                         xfs_iunlock(ip, lockflags);
2250                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2251                                                  XFS_DQ_PROJ,
2252                                                  XFS_QMOPT_DQALLOC |
2253                                                  XFS_QMOPT_DOWARN,
2254                                                  &gq))) {
2255                                 if (uq)
2256                                         xfs_qm_dqrele(uq);
2257                                 ASSERT(error != ENOENT);
2258                                 return (error);
2259                         }
2260                         xfs_dqunlock(gq);
2261                         lockflags = XFS_ILOCK_SHARED;
2262                         xfs_ilock(ip, lockflags);
2263                 } else {
2264                         ASSERT(ip->i_gdquot);
2265                         gq = ip->i_gdquot;
2266                         xfs_dqlock(gq);
2267                         XFS_DQHOLD(gq);
2268                         xfs_dqunlock(gq);
2269                 }
2270         }
2271         if (uq)
2272                 trace_xfs_dquot_dqalloc(ip);
2273
2274         xfs_iunlock(ip, lockflags);
2275         if (O_udqpp)
2276                 *O_udqpp = uq;
2277         else if (uq)
2278                 xfs_qm_dqrele(uq);
2279         if (O_gdqpp)
2280                 *O_gdqpp = gq;
2281         else if (gq)
2282                 xfs_qm_dqrele(gq);
2283         return 0;
2284 }
2285
2286 /*
2287  * Actually transfer ownership, and do dquot modifications.
2288  * These were already reserved.
2289  */
2290 xfs_dquot_t *
2291 xfs_qm_vop_chown(
2292         xfs_trans_t     *tp,
2293         xfs_inode_t     *ip,
2294         xfs_dquot_t     **IO_olddq,
2295         xfs_dquot_t     *newdq)
2296 {
2297         xfs_dquot_t     *prevdq;
2298         uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2299                                  XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2300
2301
2302         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2303         ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2304
2305         /* old dquot */
2306         prevdq = *IO_olddq;
2307         ASSERT(prevdq);
2308         ASSERT(prevdq != newdq);
2309
2310         xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2311         xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2312
2313         /* the sparkling new dquot */
2314         xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2315         xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2316
2317         /*
2318          * Take an extra reference, because the inode
2319          * is going to keep this dquot pointer even
2320          * after the trans_commit.
2321          */
2322         xfs_dqlock(newdq);
2323         XFS_DQHOLD(newdq);
2324         xfs_dqunlock(newdq);
2325         *IO_olddq = newdq;
2326
2327         return prevdq;
2328 }
2329
2330 /*
2331  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2332  */
2333 int
2334 xfs_qm_vop_chown_reserve(
2335         xfs_trans_t     *tp,
2336         xfs_inode_t     *ip,
2337         xfs_dquot_t     *udqp,
2338         xfs_dquot_t     *gdqp,
2339         uint            flags)
2340 {
2341         xfs_mount_t     *mp = ip->i_mount;
2342         uint            delblks, blkflags, prjflags = 0;
2343         xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2344         int             error;
2345
2346
2347         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2348         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2349
2350         delblks = ip->i_delayed_blks;
2351         delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2352         blkflags = XFS_IS_REALTIME_INODE(ip) ?
2353                         XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2354
2355         if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2356             ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2357                 delblksudq = udqp;
2358                 /*
2359                  * If there are delayed allocation blocks, then we have to
2360                  * unreserve those from the old dquot, and add them to the
2361                  * new dquot.
2362                  */
2363                 if (delblks) {
2364                         ASSERT(ip->i_udquot);
2365                         unresudq = ip->i_udquot;
2366                 }
2367         }
2368         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2369                 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2370                      xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
2371                         prjflags = XFS_QMOPT_ENOSPC;
2372
2373                 if (prjflags ||
2374                     (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2375                      ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2376                         delblksgdq = gdqp;
2377                         if (delblks) {
2378                                 ASSERT(ip->i_gdquot);
2379                                 unresgdq = ip->i_gdquot;
2380                         }
2381                 }
2382         }
2383
2384         if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2385                                 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2386                                 flags | blkflags | prjflags)))
2387                 return (error);
2388
2389         /*
2390          * Do the delayed blks reservations/unreservations now. Since, these
2391          * are done without the help of a transaction, if a reservation fails
2392          * its previous reservations won't be automatically undone by trans
2393          * code. So, we have to do it manually here.
2394          */
2395         if (delblks) {
2396                 /*
2397                  * Do the reservations first. Unreservation can't fail.
2398                  */
2399                 ASSERT(delblksudq || delblksgdq);
2400                 ASSERT(unresudq || unresgdq);
2401                 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2402                                 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2403                                 flags | blkflags | prjflags)))
2404                         return (error);
2405                 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2406                                 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2407                                 blkflags);
2408         }
2409
2410         return (0);
2411 }
2412
2413 int
2414 xfs_qm_vop_rename_dqattach(
2415         struct xfs_inode        **i_tab)
2416 {
2417         struct xfs_mount        *mp = i_tab[0]->i_mount;
2418         int                     i;
2419
2420         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2421                 return 0;
2422
2423         for (i = 0; (i < 4 && i_tab[i]); i++) {
2424                 struct xfs_inode        *ip = i_tab[i];
2425                 int                     error;
2426
2427                 /*
2428                  * Watch out for duplicate entries in the table.
2429                  */
2430                 if (i == 0 || ip != i_tab[i-1]) {
2431                         if (XFS_NOT_DQATTACHED(mp, ip)) {
2432                                 error = xfs_qm_dqattach(ip, 0);
2433                                 if (error)
2434                                         return error;
2435                         }
2436                 }
2437         }
2438         return 0;
2439 }
2440
2441 void
2442 xfs_qm_vop_create_dqattach(
2443         struct xfs_trans        *tp,
2444         struct xfs_inode        *ip,
2445         struct xfs_dquot        *udqp,
2446         struct xfs_dquot        *gdqp)
2447 {
2448         struct xfs_mount        *mp = tp->t_mountp;
2449
2450         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2451                 return;
2452
2453         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2454         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2455
2456         if (udqp) {
2457                 xfs_dqlock(udqp);
2458                 XFS_DQHOLD(udqp);
2459                 xfs_dqunlock(udqp);
2460                 ASSERT(ip->i_udquot == NULL);
2461                 ip->i_udquot = udqp;
2462                 ASSERT(XFS_IS_UQUOTA_ON(mp));
2463                 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2464                 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2465         }
2466         if (gdqp) {
2467                 xfs_dqlock(gdqp);
2468                 XFS_DQHOLD(gdqp);
2469                 xfs_dqunlock(gdqp);
2470                 ASSERT(ip->i_gdquot == NULL);
2471                 ip->i_gdquot = gdqp;
2472                 ASSERT(XFS_IS_OQUOTA_ON(mp));
2473                 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2474                         ip->i_d.di_gid : xfs_get_projid(ip)) ==
2475                                 be32_to_cpu(gdqp->q_core.d_id));
2476                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2477         }
2478 }
2479