xref: /linux/fs/xfs/xfs_qm.c (revision b43ab901d671e3e3cad425ea5e9a3c74e266dcdd)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_alloc.h"
27 #include "xfs_quota.h"
28 #include "xfs_mount.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_ialloc_btree.h"
31 #include "xfs_dinode.h"
32 #include "xfs_inode.h"
33 #include "xfs_ialloc.h"
34 #include "xfs_itable.h"
35 #include "xfs_rtalloc.h"
36 #include "xfs_error.h"
37 #include "xfs_bmap.h"
38 #include "xfs_attr.h"
39 #include "xfs_buf_item.h"
40 #include "xfs_trans_space.h"
41 #include "xfs_utils.h"
42 #include "xfs_qm.h"
43 #include "xfs_trace.h"
44 
45 /*
46  * The global quota manager. There is only one of these for the entire
47  * system, _not_ one per file system. XQM keeps track of the overall
48  * quota functionality, including maintaining the freelist and hash
49  * tables of dquots.
50  */
51 struct mutex	xfs_Gqm_lock;
52 struct xfs_qm	*xfs_Gqm;
53 uint		ndquot;
54 
55 kmem_zone_t	*qm_dqzone;
56 kmem_zone_t	*qm_dqtrxzone;
57 
58 STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
59 STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
60 
61 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
62 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
63 STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
64 
65 static struct shrinker xfs_qm_shaker = {
66 	.shrink = xfs_qm_shake,
67 	.seeks = DEFAULT_SEEKS,
68 };
69 
70 /*
71  * Initialize the XQM structure.
72  * Note that there is not one quota manager per file system.
73  */
74 STATIC struct xfs_qm *
75 xfs_Gqm_init(void)
76 {
77 	xfs_dqhash_t	*udqhash, *gdqhash;
78 	xfs_qm_t	*xqm;
79 	size_t		hsize;
80 	uint		i;
81 
82 	/*
83 	 * Initialize the dquot hash tables.
84 	 */
85 	udqhash = kmem_zalloc_greedy(&hsize,
86 				     XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
87 				     XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
88 	if (!udqhash)
89 		goto out;
90 
91 	gdqhash = kmem_zalloc_large(hsize);
92 	if (!gdqhash)
93 		goto out_free_udqhash;
94 
95 	hsize /= sizeof(xfs_dqhash_t);
96 	ndquot = hsize << 8;
97 
98 	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
99 	xqm->qm_dqhashmask = hsize - 1;
100 	xqm->qm_usr_dqhtable = udqhash;
101 	xqm->qm_grp_dqhtable = gdqhash;
102 	ASSERT(xqm->qm_usr_dqhtable != NULL);
103 	ASSERT(xqm->qm_grp_dqhtable != NULL);
104 
105 	for (i = 0; i < hsize; i++) {
106 		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
107 		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
108 	}
109 
110 	/*
111 	 * Freelist of all dquots of all file systems
112 	 */
113 	INIT_LIST_HEAD(&xqm->qm_dqfrlist);
114 	xqm->qm_dqfrlist_cnt = 0;
115 	mutex_init(&xqm->qm_dqfrlist_lock);
116 
117 	/*
118 	 * dquot zone. we register our own low-memory callback.
119 	 */
120 	if (!qm_dqzone) {
121 		xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
122 						"xfs_dquots");
123 		qm_dqzone = xqm->qm_dqzone;
124 	} else
125 		xqm->qm_dqzone = qm_dqzone;
126 
127 	register_shrinker(&xfs_qm_shaker);
128 
129 	/*
130 	 * The t_dqinfo portion of transactions.
131 	 */
132 	if (!qm_dqtrxzone) {
133 		xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
134 						   "xfs_dqtrx");
135 		qm_dqtrxzone = xqm->qm_dqtrxzone;
136 	} else
137 		xqm->qm_dqtrxzone = qm_dqtrxzone;
138 
139 	atomic_set(&xqm->qm_totaldquots, 0);
140 	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
141 	xqm->qm_nrefs = 0;
142 	return xqm;
143 
144  out_free_udqhash:
145 	kmem_free_large(udqhash);
146  out:
147 	return NULL;
148 }
149 
150 /*
151  * Destroy the global quota manager when its reference count goes to zero.
152  */
153 STATIC void
154 xfs_qm_destroy(
155 	struct xfs_qm	*xqm)
156 {
157 	int		hsize, i;
158 
159 	ASSERT(xqm != NULL);
160 	ASSERT(xqm->qm_nrefs == 0);
161 
162 	unregister_shrinker(&xfs_qm_shaker);
163 
164 	mutex_lock(&xqm->qm_dqfrlist_lock);
165 	ASSERT(list_empty(&xqm->qm_dqfrlist));
166 	mutex_unlock(&xqm->qm_dqfrlist_lock);
167 
168 	hsize = xqm->qm_dqhashmask + 1;
169 	for (i = 0; i < hsize; i++) {
170 		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
171 		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
172 	}
173 	kmem_free_large(xqm->qm_usr_dqhtable);
174 	kmem_free_large(xqm->qm_grp_dqhtable);
175 	xqm->qm_usr_dqhtable = NULL;
176 	xqm->qm_grp_dqhtable = NULL;
177 	xqm->qm_dqhashmask = 0;
178 
179 	kmem_free(xqm);
180 }
181 
182 /*
183  * Called at mount time to let XQM know that another file system is
184  * starting quotas. This isn't crucial information as the individual mount
185  * structures are pretty independent, but it helps the XQM keep a
186  * global view of what's going on.
187  */
188 /* ARGSUSED */
189 STATIC int
190 xfs_qm_hold_quotafs_ref(
191 	struct xfs_mount *mp)
192 {
193 	/*
194 	 * Need to lock the xfs_Gqm structure for things like this. For example,
195 	 * the structure could disappear between the entry to this routine and
196 	 * a HOLD operation if not locked.
197 	 */
198 	mutex_lock(&xfs_Gqm_lock);
199 
200 	if (!xfs_Gqm) {
201 		xfs_Gqm = xfs_Gqm_init();
202 		if (!xfs_Gqm) {
203 			mutex_unlock(&xfs_Gqm_lock);
204 			return ENOMEM;
205 		}
206 	}
207 
208 	/*
209 	 * We can keep a list of all filesystems with quotas mounted for
210 	 * debugging and statistical purposes, but ...
211 	 * Just take a reference and get out.
212 	 */
213 	xfs_Gqm->qm_nrefs++;
214 	mutex_unlock(&xfs_Gqm_lock);
215 
216 	return 0;
217 }
218 
219 
220 /*
221  * Release the reference that a filesystem took at mount time,
222  * so that we know when we need to destroy the entire quota manager.
223  */
224 /* ARGSUSED */
225 STATIC void
226 xfs_qm_rele_quotafs_ref(
227 	struct xfs_mount *mp)
228 {
229 	ASSERT(xfs_Gqm);
230 	ASSERT(xfs_Gqm->qm_nrefs > 0);
231 
232 	/*
233 	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
234 	 * be restarted.
235 	 */
236 	mutex_lock(&xfs_Gqm_lock);
237 	if (--xfs_Gqm->qm_nrefs == 0) {
238 		xfs_qm_destroy(xfs_Gqm);
239 		xfs_Gqm = NULL;
240 	}
241 	mutex_unlock(&xfs_Gqm_lock);
242 }
243 
244 /*
245  * Just destroy the quotainfo structure.
246  */
247 void
248 xfs_qm_unmount(
249 	struct xfs_mount	*mp)
250 {
251 	if (mp->m_quotainfo) {
252 		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
253 		xfs_qm_destroy_quotainfo(mp);
254 	}
255 }
256 
257 
258 /*
259  * This is called from xfs_mountfs to start quotas and initialize all
260  * necessary data structures like quotainfo.  This is also responsible for
261  * running a quotacheck as necessary.  We are guaranteed that the superblock
262  * is consistently read in at this point.
263  *
264  * If we fail here, the mount will continue with quota turned off. We don't
265  * need to inidicate success or failure at all.
266  */
267 void
268 xfs_qm_mount_quotas(
269 	xfs_mount_t	*mp)
270 {
271 	int		error = 0;
272 	uint		sbf;
273 
274 	/*
275 	 * If quotas on realtime volumes is not supported, we disable
276 	 * quotas immediately.
277 	 */
278 	if (mp->m_sb.sb_rextents) {
279 		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
280 		mp->m_qflags = 0;
281 		goto write_changes;
282 	}
283 
284 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
285 
286 	/*
287 	 * Allocate the quotainfo structure inside the mount struct, and
288 	 * create quotainode(s), and change/rev superblock if necessary.
289 	 */
290 	error = xfs_qm_init_quotainfo(mp);
291 	if (error) {
292 		/*
293 		 * We must turn off quotas.
294 		 */
295 		ASSERT(mp->m_quotainfo == NULL);
296 		mp->m_qflags = 0;
297 		goto write_changes;
298 	}
299 	/*
300 	 * If any of the quotas are not consistent, do a quotacheck.
301 	 */
302 	if (XFS_QM_NEED_QUOTACHECK(mp)) {
303 		error = xfs_qm_quotacheck(mp);
304 		if (error) {
305 			/* Quotacheck failed and disabled quotas. */
306 			return;
307 		}
308 	}
309 	/*
310 	 * If one type of quotas is off, then it will lose its
311 	 * quotachecked status, since we won't be doing accounting for
312 	 * that type anymore.
313 	 */
314 	if (!XFS_IS_UQUOTA_ON(mp))
315 		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
316 	if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
317 		mp->m_qflags &= ~XFS_OQUOTA_CHKD;
318 
319  write_changes:
320 	/*
321 	 * We actually don't have to acquire the m_sb_lock at all.
322 	 * This can only be called from mount, and that's single threaded. XXX
323 	 */
324 	spin_lock(&mp->m_sb_lock);
325 	sbf = mp->m_sb.sb_qflags;
326 	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
327 	spin_unlock(&mp->m_sb_lock);
328 
329 	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
330 		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
331 			/*
332 			 * We could only have been turning quotas off.
333 			 * We aren't in very good shape actually because
334 			 * the incore structures are convinced that quotas are
335 			 * off, but the on disk superblock doesn't know that !
336 			 */
337 			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
338 			xfs_alert(mp, "%s: Superblock update failed!",
339 				__func__);
340 		}
341 	}
342 
343 	if (error) {
344 		xfs_warn(mp, "Failed to initialize disk quotas.");
345 		return;
346 	}
347 }
348 
349 /*
350  * Called from the vfsops layer.
351  */
352 void
353 xfs_qm_unmount_quotas(
354 	xfs_mount_t	*mp)
355 {
356 	/*
357 	 * Release the dquots that root inode, et al might be holding,
358 	 * before we flush quotas and blow away the quotainfo structure.
359 	 */
360 	ASSERT(mp->m_rootip);
361 	xfs_qm_dqdetach(mp->m_rootip);
362 	if (mp->m_rbmip)
363 		xfs_qm_dqdetach(mp->m_rbmip);
364 	if (mp->m_rsumip)
365 		xfs_qm_dqdetach(mp->m_rsumip);
366 
367 	/*
368 	 * Release the quota inodes.
369 	 */
370 	if (mp->m_quotainfo) {
371 		if (mp->m_quotainfo->qi_uquotaip) {
372 			IRELE(mp->m_quotainfo->qi_uquotaip);
373 			mp->m_quotainfo->qi_uquotaip = NULL;
374 		}
375 		if (mp->m_quotainfo->qi_gquotaip) {
376 			IRELE(mp->m_quotainfo->qi_gquotaip);
377 			mp->m_quotainfo->qi_gquotaip = NULL;
378 		}
379 	}
380 }
381 
382 /*
383  * Flush all dquots of the given file system to disk. The dquots are
384  * _not_ purged from memory here, just their data written to disk.
385  */
386 STATIC int
387 xfs_qm_dqflush_all(
388 	struct xfs_mount	*mp)
389 {
390 	struct xfs_quotainfo	*q = mp->m_quotainfo;
391 	int			recl;
392 	struct xfs_dquot	*dqp;
393 	int			error;
394 
395 	if (!q)
396 		return 0;
397 again:
398 	mutex_lock(&q->qi_dqlist_lock);
399 	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
400 		xfs_dqlock(dqp);
401 		if ((dqp->dq_flags & XFS_DQ_FREEING) ||
402 		    !XFS_DQ_IS_DIRTY(dqp)) {
403 			xfs_dqunlock(dqp);
404 			continue;
405 		}
406 
407 		/* XXX a sentinel would be better */
408 		recl = q->qi_dqreclaims;
409 		if (!xfs_dqflock_nowait(dqp)) {
410 			/*
411 			 * If we can't grab the flush lock then check
412 			 * to see if the dquot has been flushed delayed
413 			 * write.  If so, grab its buffer and send it
414 			 * out immediately.  We'll be able to acquire
415 			 * the flush lock when the I/O completes.
416 			 */
417 			xfs_dqflock_pushbuf_wait(dqp);
418 		}
419 		/*
420 		 * Let go of the mplist lock. We don't want to hold it
421 		 * across a disk write.
422 		 */
423 		mutex_unlock(&q->qi_dqlist_lock);
424 		error = xfs_qm_dqflush(dqp, 0);
425 		xfs_dqunlock(dqp);
426 		if (error)
427 			return error;
428 
429 		mutex_lock(&q->qi_dqlist_lock);
430 		if (recl != q->qi_dqreclaims) {
431 			mutex_unlock(&q->qi_dqlist_lock);
432 			/* XXX restart limit */
433 			goto again;
434 		}
435 	}
436 
437 	mutex_unlock(&q->qi_dqlist_lock);
438 	/* return ! busy */
439 	return 0;
440 }
441 
442 /*
443  * Release the group dquot pointers the user dquots may be
444  * carrying around as a hint. mplist is locked on entry and exit.
445  */
446 STATIC void
447 xfs_qm_detach_gdquots(
448 	struct xfs_mount	*mp)
449 {
450 	struct xfs_quotainfo	*q = mp->m_quotainfo;
451 	struct xfs_dquot	*dqp, *gdqp;
452 
453  again:
454 	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
455 	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
456 		xfs_dqlock(dqp);
457 		if (dqp->dq_flags & XFS_DQ_FREEING) {
458 			xfs_dqunlock(dqp);
459 			mutex_unlock(&q->qi_dqlist_lock);
460 			delay(1);
461 			mutex_lock(&q->qi_dqlist_lock);
462 			goto again;
463 		}
464 
465 		gdqp = dqp->q_gdquot;
466 		if (gdqp)
467 			dqp->q_gdquot = NULL;
468 		xfs_dqunlock(dqp);
469 
470 		if (gdqp)
471 			xfs_qm_dqrele(gdqp);
472 	}
473 }
474 
475 /*
476  * Go through all the incore dquots of this file system and take them
477  * off the mplist and hashlist, if the dquot type matches the dqtype
478  * parameter. This is used when turning off quota accounting for
479  * users and/or groups, as well as when the filesystem is unmounting.
480  */
481 STATIC int
482 xfs_qm_dqpurge_int(
483 	struct xfs_mount	*mp,
484 	uint			flags)
485 {
486 	struct xfs_quotainfo	*q = mp->m_quotainfo;
487 	struct xfs_dquot	*dqp, *n;
488 	uint			dqtype;
489 	int			nmisses = 0;
490 	LIST_HEAD		(dispose_list);
491 
492 	if (!q)
493 		return 0;
494 
495 	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
496 	dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
497 	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
498 
499 	mutex_lock(&q->qi_dqlist_lock);
500 
501 	/*
502 	 * In the first pass through all incore dquots of this filesystem,
503 	 * we release the group dquot pointers the user dquots may be
504 	 * carrying around as a hint. We need to do this irrespective of
505 	 * what's being turned off.
506 	 */
507 	xfs_qm_detach_gdquots(mp);
508 
509 	/*
510 	 * Try to get rid of all of the unwanted dquots.
511 	 */
512 	list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
513 		xfs_dqlock(dqp);
514 		if ((dqp->dq_flags & dqtype) != 0 &&
515 		    !(dqp->dq_flags & XFS_DQ_FREEING)) {
516 			if (dqp->q_nrefs == 0) {
517 				dqp->dq_flags |= XFS_DQ_FREEING;
518 				list_move_tail(&dqp->q_mplist, &dispose_list);
519 			} else
520 				nmisses++;
521 		}
522 		xfs_dqunlock(dqp);
523 	}
524 	mutex_unlock(&q->qi_dqlist_lock);
525 
526 	list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
527 		xfs_qm_dqpurge(dqp);
528 
529 	return nmisses;
530 }
531 
532 int
533 xfs_qm_dqpurge_all(
534 	xfs_mount_t	*mp,
535 	uint		flags)
536 {
537 	int		ndquots;
538 
539 	/*
540 	 * Purge the dquot cache.
541 	 * None of the dquots should really be busy at this point.
542 	 */
543 	if (mp->m_quotainfo) {
544 		while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
545 			delay(ndquots * 10);
546 		}
547 	}
548 	return 0;
549 }
550 
551 STATIC int
552 xfs_qm_dqattach_one(
553 	xfs_inode_t	*ip,
554 	xfs_dqid_t	id,
555 	uint		type,
556 	uint		doalloc,
557 	xfs_dquot_t	*udqhint, /* hint */
558 	xfs_dquot_t	**IO_idqpp)
559 {
560 	xfs_dquot_t	*dqp;
561 	int		error;
562 
563 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
564 	error = 0;
565 
566 	/*
567 	 * See if we already have it in the inode itself. IO_idqpp is
568 	 * &i_udquot or &i_gdquot. This made the code look weird, but
569 	 * made the logic a lot simpler.
570 	 */
571 	dqp = *IO_idqpp;
572 	if (dqp) {
573 		trace_xfs_dqattach_found(dqp);
574 		return 0;
575 	}
576 
577 	/*
578 	 * udqhint is the i_udquot field in inode, and is non-NULL only
579 	 * when the type arg is group/project. Its purpose is to save a
580 	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
581 	 * the user dquot.
582 	 */
583 	if (udqhint) {
584 		ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
585 		xfs_dqlock(udqhint);
586 
587 		/*
588 		 * No need to take dqlock to look at the id.
589 		 *
590 		 * The ID can't change until it gets reclaimed, and it won't
591 		 * be reclaimed as long as we have a ref from inode and we
592 		 * hold the ilock.
593 		 */
594 		dqp = udqhint->q_gdquot;
595 		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
596 			ASSERT(*IO_idqpp == NULL);
597 
598 			*IO_idqpp = xfs_qm_dqhold(dqp);
599 			xfs_dqunlock(udqhint);
600 			return 0;
601 		}
602 
603 		/*
604 		 * We can't hold a dquot lock when we call the dqget code.
605 		 * We'll deadlock in no time, because of (not conforming to)
606 		 * lock ordering - the inodelock comes before any dquot lock,
607 		 * and we may drop and reacquire the ilock in xfs_qm_dqget().
608 		 */
609 		xfs_dqunlock(udqhint);
610 	}
611 
612 	/*
613 	 * Find the dquot from somewhere. This bumps the
614 	 * reference count of dquot and returns it locked.
615 	 * This can return ENOENT if dquot didn't exist on
616 	 * disk and we didn't ask it to allocate;
617 	 * ESRCH if quotas got turned off suddenly.
618 	 */
619 	error = xfs_qm_dqget(ip->i_mount, ip, id, type,
620 			     doalloc | XFS_QMOPT_DOWARN, &dqp);
621 	if (error)
622 		return error;
623 
624 	trace_xfs_dqattach_get(dqp);
625 
626 	/*
627 	 * dqget may have dropped and re-acquired the ilock, but it guarantees
628 	 * that the dquot returned is the one that should go in the inode.
629 	 */
630 	*IO_idqpp = dqp;
631 	xfs_dqunlock(dqp);
632 	return 0;
633 }
634 
635 
636 /*
637  * Given a udquot and gdquot, attach a ptr to the group dquot in the
638  * udquot as a hint for future lookups.
639  */
640 STATIC void
641 xfs_qm_dqattach_grouphint(
642 	xfs_dquot_t	*udq,
643 	xfs_dquot_t	*gdq)
644 {
645 	xfs_dquot_t	*tmp;
646 
647 	xfs_dqlock(udq);
648 
649 	tmp = udq->q_gdquot;
650 	if (tmp) {
651 		if (tmp == gdq)
652 			goto done;
653 
654 		udq->q_gdquot = NULL;
655 		xfs_qm_dqrele(tmp);
656 	}
657 
658 	udq->q_gdquot = xfs_qm_dqhold(gdq);
659 done:
660 	xfs_dqunlock(udq);
661 }
662 
663 
664 /*
665  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
666  * into account.
667  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
668  * Inode may get unlocked and relocked in here, and the caller must deal with
669  * the consequences.
670  */
671 int
672 xfs_qm_dqattach_locked(
673 	xfs_inode_t	*ip,
674 	uint		flags)
675 {
676 	xfs_mount_t	*mp = ip->i_mount;
677 	uint		nquotas = 0;
678 	int		error = 0;
679 
680 	if (!XFS_IS_QUOTA_RUNNING(mp) ||
681 	    !XFS_IS_QUOTA_ON(mp) ||
682 	    !XFS_NOT_DQATTACHED(mp, ip) ||
683 	    ip->i_ino == mp->m_sb.sb_uquotino ||
684 	    ip->i_ino == mp->m_sb.sb_gquotino)
685 		return 0;
686 
687 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
688 
689 	if (XFS_IS_UQUOTA_ON(mp)) {
690 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
691 						flags & XFS_QMOPT_DQALLOC,
692 						NULL, &ip->i_udquot);
693 		if (error)
694 			goto done;
695 		nquotas++;
696 	}
697 
698 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
699 	if (XFS_IS_OQUOTA_ON(mp)) {
700 		error = XFS_IS_GQUOTA_ON(mp) ?
701 			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
702 						flags & XFS_QMOPT_DQALLOC,
703 						ip->i_udquot, &ip->i_gdquot) :
704 			xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
705 						flags & XFS_QMOPT_DQALLOC,
706 						ip->i_udquot, &ip->i_gdquot);
707 		/*
708 		 * Don't worry about the udquot that we may have
709 		 * attached above. It'll get detached, if not already.
710 		 */
711 		if (error)
712 			goto done;
713 		nquotas++;
714 	}
715 
716 	/*
717 	 * Attach this group quota to the user quota as a hint.
718 	 * This WON'T, in general, result in a thrash.
719 	 */
720 	if (nquotas == 2) {
721 		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
722 		ASSERT(ip->i_udquot);
723 		ASSERT(ip->i_gdquot);
724 
725 		/*
726 		 * We do not have i_udquot locked at this point, but this check
727 		 * is OK since we don't depend on the i_gdquot to be accurate
728 		 * 100% all the time. It is just a hint, and this will
729 		 * succeed in general.
730 		 */
731 		if (ip->i_udquot->q_gdquot != ip->i_gdquot)
732 			xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
733 	}
734 
735  done:
736 #ifdef DEBUG
737 	if (!error) {
738 		if (XFS_IS_UQUOTA_ON(mp))
739 			ASSERT(ip->i_udquot);
740 		if (XFS_IS_OQUOTA_ON(mp))
741 			ASSERT(ip->i_gdquot);
742 	}
743 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
744 #endif
745 	return error;
746 }
747 
748 int
749 xfs_qm_dqattach(
750 	struct xfs_inode	*ip,
751 	uint			flags)
752 {
753 	int			error;
754 
755 	xfs_ilock(ip, XFS_ILOCK_EXCL);
756 	error = xfs_qm_dqattach_locked(ip, flags);
757 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
758 
759 	return error;
760 }
761 
762 /*
763  * Release dquots (and their references) if any.
764  * The inode should be locked EXCL except when this's called by
765  * xfs_ireclaim.
766  */
767 void
768 xfs_qm_dqdetach(
769 	xfs_inode_t	*ip)
770 {
771 	if (!(ip->i_udquot || ip->i_gdquot))
772 		return;
773 
774 	trace_xfs_dquot_dqdetach(ip);
775 
776 	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
777 	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
778 	if (ip->i_udquot) {
779 		xfs_qm_dqrele(ip->i_udquot);
780 		ip->i_udquot = NULL;
781 	}
782 	if (ip->i_gdquot) {
783 		xfs_qm_dqrele(ip->i_gdquot);
784 		ip->i_gdquot = NULL;
785 	}
786 }
787 
788 /*
789  * The hash chains and the mplist use the same xfs_dqhash structure as
790  * their list head, but we can take the mplist qh_lock and one of the
791  * hash qh_locks at the same time without any problem as they aren't
792  * related.
793  */
794 static struct lock_class_key xfs_quota_mplist_class;
795 
796 /*
797  * This initializes all the quota information that's kept in the
798  * mount structure
799  */
800 STATIC int
801 xfs_qm_init_quotainfo(
802 	xfs_mount_t	*mp)
803 {
804 	xfs_quotainfo_t *qinf;
805 	int		error;
806 	xfs_dquot_t	*dqp;
807 
808 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
809 
810 	/*
811 	 * Tell XQM that we exist as soon as possible.
812 	 */
813 	if ((error = xfs_qm_hold_quotafs_ref(mp))) {
814 		return error;
815 	}
816 
817 	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
818 
819 	/*
820 	 * See if quotainodes are setup, and if not, allocate them,
821 	 * and change the superblock accordingly.
822 	 */
823 	if ((error = xfs_qm_init_quotainos(mp))) {
824 		kmem_free(qinf);
825 		mp->m_quotainfo = NULL;
826 		return error;
827 	}
828 
829 	INIT_LIST_HEAD(&qinf->qi_dqlist);
830 	mutex_init(&qinf->qi_dqlist_lock);
831 	lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
832 
833 	qinf->qi_dqreclaims = 0;
834 
835 	/* mutex used to serialize quotaoffs */
836 	mutex_init(&qinf->qi_quotaofflock);
837 
838 	/* Precalc some constants */
839 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
840 	ASSERT(qinf->qi_dqchunklen);
841 	qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
842 	do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
843 
844 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
845 
846 	/*
847 	 * We try to get the limits from the superuser's limits fields.
848 	 * This is quite hacky, but it is standard quota practice.
849 	 *
850 	 * We look at the USR dquot with id == 0 first, but if user quotas
851 	 * are not enabled we goto the GRP dquot with id == 0.
852 	 * We don't really care to keep separate default limits for user
853 	 * and group quotas, at least not at this point.
854 	 *
855 	 * Since we may not have done a quotacheck by this point, just read
856 	 * the dquot without attaching it to any hashtables or lists.
857 	 */
858 	error = xfs_qm_dqread(mp, 0,
859 			XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
860 			 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
861 			  XFS_DQ_PROJ),
862 			XFS_QMOPT_DOWARN, &dqp);
863 	if (!error) {
864 		xfs_disk_dquot_t	*ddqp = &dqp->q_core;
865 
866 		/*
867 		 * The warnings and timers set the grace period given to
868 		 * a user or group before he or she can not perform any
869 		 * more writing. If it is zero, a default is used.
870 		 */
871 		qinf->qi_btimelimit = ddqp->d_btimer ?
872 			be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
873 		qinf->qi_itimelimit = ddqp->d_itimer ?
874 			be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
875 		qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
876 			be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
877 		qinf->qi_bwarnlimit = ddqp->d_bwarns ?
878 			be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
879 		qinf->qi_iwarnlimit = ddqp->d_iwarns ?
880 			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
881 		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
882 			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
883 		qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
884 		qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
885 		qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
886 		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
887 		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
888 		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
889 
890 		xfs_qm_dqdestroy(dqp);
891 	} else {
892 		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
893 		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
894 		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
895 		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
896 		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
897 		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
898 	}
899 
900 	return 0;
901 }
902 
903 
904 /*
905  * Gets called when unmounting a filesystem or when all quotas get
906  * turned off.
907  * This purges the quota inodes, destroys locks and frees itself.
908  */
909 void
910 xfs_qm_destroy_quotainfo(
911 	xfs_mount_t	*mp)
912 {
913 	xfs_quotainfo_t *qi;
914 
915 	qi = mp->m_quotainfo;
916 	ASSERT(qi != NULL);
917 	ASSERT(xfs_Gqm != NULL);
918 
919 	/*
920 	 * Release the reference that XQM kept, so that we know
921 	 * when the XQM structure should be freed. We cannot assume
922 	 * that xfs_Gqm is non-null after this point.
923 	 */
924 	xfs_qm_rele_quotafs_ref(mp);
925 
926 	ASSERT(list_empty(&qi->qi_dqlist));
927 	mutex_destroy(&qi->qi_dqlist_lock);
928 
929 	if (qi->qi_uquotaip) {
930 		IRELE(qi->qi_uquotaip);
931 		qi->qi_uquotaip = NULL; /* paranoia */
932 	}
933 	if (qi->qi_gquotaip) {
934 		IRELE(qi->qi_gquotaip);
935 		qi->qi_gquotaip = NULL;
936 	}
937 	mutex_destroy(&qi->qi_quotaofflock);
938 	kmem_free(qi);
939 	mp->m_quotainfo = NULL;
940 }
941 
942 
943 
944 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
945 
946 /* ARGSUSED */
947 STATIC void
948 xfs_qm_list_init(
949 	xfs_dqlist_t	*list,
950 	char		*str,
951 	int		n)
952 {
953 	mutex_init(&list->qh_lock);
954 	INIT_LIST_HEAD(&list->qh_list);
955 	list->qh_version = 0;
956 	list->qh_nelems = 0;
957 }
958 
959 STATIC void
960 xfs_qm_list_destroy(
961 	xfs_dqlist_t	*list)
962 {
963 	mutex_destroy(&(list->qh_lock));
964 }
965 
966 /*
967  * Create an inode and return with a reference already taken, but unlocked
968  * This is how we create quota inodes
969  */
970 STATIC int
971 xfs_qm_qino_alloc(
972 	xfs_mount_t	*mp,
973 	xfs_inode_t	**ip,
974 	__int64_t	sbfields,
975 	uint		flags)
976 {
977 	xfs_trans_t	*tp;
978 	int		error;
979 	int		committed;
980 
981 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
982 	if ((error = xfs_trans_reserve(tp,
983 				      XFS_QM_QINOCREATE_SPACE_RES(mp),
984 				      XFS_CREATE_LOG_RES(mp), 0,
985 				      XFS_TRANS_PERM_LOG_RES,
986 				      XFS_CREATE_LOG_COUNT))) {
987 		xfs_trans_cancel(tp, 0);
988 		return error;
989 	}
990 
991 	error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
992 	if (error) {
993 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
994 				 XFS_TRANS_ABORT);
995 		return error;
996 	}
997 
998 	/*
999 	 * Make the changes in the superblock, and log those too.
1000 	 * sbfields arg may contain fields other than *QUOTINO;
1001 	 * VERSIONNUM for example.
1002 	 */
1003 	spin_lock(&mp->m_sb_lock);
1004 	if (flags & XFS_QMOPT_SBVERSION) {
1005 		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1006 		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1007 				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1008 		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1009 			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1010 
1011 		xfs_sb_version_addquota(&mp->m_sb);
1012 		mp->m_sb.sb_uquotino = NULLFSINO;
1013 		mp->m_sb.sb_gquotino = NULLFSINO;
1014 
1015 		/* qflags will get updated _after_ quotacheck */
1016 		mp->m_sb.sb_qflags = 0;
1017 	}
1018 	if (flags & XFS_QMOPT_UQUOTA)
1019 		mp->m_sb.sb_uquotino = (*ip)->i_ino;
1020 	else
1021 		mp->m_sb.sb_gquotino = (*ip)->i_ino;
1022 	spin_unlock(&mp->m_sb_lock);
1023 	xfs_mod_sb(tp, sbfields);
1024 
1025 	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1026 		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
1027 		return error;
1028 	}
1029 	return 0;
1030 }
1031 
1032 
1033 STATIC void
1034 xfs_qm_reset_dqcounts(
1035 	xfs_mount_t	*mp,
1036 	xfs_buf_t	*bp,
1037 	xfs_dqid_t	id,
1038 	uint		type)
1039 {
1040 	xfs_disk_dquot_t	*ddq;
1041 	int			j;
1042 
1043 	trace_xfs_reset_dqcounts(bp, _RET_IP_);
1044 
1045 	/*
1046 	 * Reset all counters and timers. They'll be
1047 	 * started afresh by xfs_qm_quotacheck.
1048 	 */
1049 #ifdef DEBUG
1050 	j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1051 	do_div(j, sizeof(xfs_dqblk_t));
1052 	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
1053 #endif
1054 	ddq = bp->b_addr;
1055 	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
1056 		/*
1057 		 * Do a sanity check, and if needed, repair the dqblk. Don't
1058 		 * output any warnings because it's perfectly possible to
1059 		 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1060 		 */
1061 		(void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1062 				      "xfs_quotacheck");
1063 		ddq->d_bcount = 0;
1064 		ddq->d_icount = 0;
1065 		ddq->d_rtbcount = 0;
1066 		ddq->d_btimer = 0;
1067 		ddq->d_itimer = 0;
1068 		ddq->d_rtbtimer = 0;
1069 		ddq->d_bwarns = 0;
1070 		ddq->d_iwarns = 0;
1071 		ddq->d_rtbwarns = 0;
1072 		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1073 	}
1074 }
1075 
1076 STATIC int
1077 xfs_qm_dqiter_bufs(
1078 	xfs_mount_t	*mp,
1079 	xfs_dqid_t	firstid,
1080 	xfs_fsblock_t	bno,
1081 	xfs_filblks_t	blkcnt,
1082 	uint		flags)
1083 {
1084 	xfs_buf_t	*bp;
1085 	int		error;
1086 	int		type;
1087 
1088 	ASSERT(blkcnt > 0);
1089 	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1090 		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1091 	error = 0;
1092 
1093 	/*
1094 	 * Blkcnt arg can be a very big number, and might even be
1095 	 * larger than the log itself. So, we have to break it up into
1096 	 * manageable-sized transactions.
1097 	 * Note that we don't start a permanent transaction here; we might
1098 	 * not be able to get a log reservation for the whole thing up front,
1099 	 * and we don't really care to either, because we just discard
1100 	 * everything if we were to crash in the middle of this loop.
1101 	 */
1102 	while (blkcnt--) {
1103 		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1104 			      XFS_FSB_TO_DADDR(mp, bno),
1105 			      mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1106 		if (error)
1107 			break;
1108 
1109 		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1110 		xfs_buf_delwri_queue(bp);
1111 		xfs_buf_relse(bp);
1112 		/*
1113 		 * goto the next block.
1114 		 */
1115 		bno++;
1116 		firstid += mp->m_quotainfo->qi_dqperchunk;
1117 	}
1118 	return error;
1119 }
1120 
1121 /*
1122  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1123  * caller supplied function for every chunk of dquots that we find.
1124  */
1125 STATIC int
1126 xfs_qm_dqiterate(
1127 	xfs_mount_t	*mp,
1128 	xfs_inode_t	*qip,
1129 	uint		flags)
1130 {
1131 	xfs_bmbt_irec_t		*map;
1132 	int			i, nmaps;	/* number of map entries */
1133 	int			error;		/* return value */
1134 	xfs_fileoff_t		lblkno;
1135 	xfs_filblks_t		maxlblkcnt;
1136 	xfs_dqid_t		firstid;
1137 	xfs_fsblock_t		rablkno;
1138 	xfs_filblks_t		rablkcnt;
1139 
1140 	error = 0;
1141 	/*
1142 	 * This looks racy, but we can't keep an inode lock across a
1143 	 * trans_reserve. But, this gets called during quotacheck, and that
1144 	 * happens only at mount time which is single threaded.
1145 	 */
1146 	if (qip->i_d.di_nblocks == 0)
1147 		return 0;
1148 
1149 	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1150 
1151 	lblkno = 0;
1152 	maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1153 	do {
1154 		nmaps = XFS_DQITER_MAP_SIZE;
1155 		/*
1156 		 * We aren't changing the inode itself. Just changing
1157 		 * some of its data. No new blocks are added here, and
1158 		 * the inode is never added to the transaction.
1159 		 */
1160 		xfs_ilock(qip, XFS_ILOCK_SHARED);
1161 		error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
1162 				       map, &nmaps, 0);
1163 		xfs_iunlock(qip, XFS_ILOCK_SHARED);
1164 		if (error)
1165 			break;
1166 
1167 		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1168 		for (i = 0; i < nmaps; i++) {
1169 			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1170 			ASSERT(map[i].br_blockcount);
1171 
1172 
1173 			lblkno += map[i].br_blockcount;
1174 
1175 			if (map[i].br_startblock == HOLESTARTBLOCK)
1176 				continue;
1177 
1178 			firstid = (xfs_dqid_t) map[i].br_startoff *
1179 				mp->m_quotainfo->qi_dqperchunk;
1180 			/*
1181 			 * Do a read-ahead on the next extent.
1182 			 */
1183 			if ((i+1 < nmaps) &&
1184 			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1185 				rablkcnt =  map[i+1].br_blockcount;
1186 				rablkno = map[i+1].br_startblock;
1187 				while (rablkcnt--) {
1188 					xfs_buf_readahead(mp->m_ddev_targp,
1189 					       XFS_FSB_TO_DADDR(mp, rablkno),
1190 					       mp->m_quotainfo->qi_dqchunklen);
1191 					rablkno++;
1192 				}
1193 			}
1194 			/*
1195 			 * Iterate thru all the blks in the extent and
1196 			 * reset the counters of all the dquots inside them.
1197 			 */
1198 			if ((error = xfs_qm_dqiter_bufs(mp,
1199 						       firstid,
1200 						       map[i].br_startblock,
1201 						       map[i].br_blockcount,
1202 						       flags))) {
1203 				break;
1204 			}
1205 		}
1206 
1207 		if (error)
1208 			break;
1209 	} while (nmaps > 0);
1210 
1211 	kmem_free(map);
1212 
1213 	return error;
1214 }
1215 
1216 /*
1217  * Called by dqusage_adjust in doing a quotacheck.
1218  *
1219  * Given the inode, and a dquot id this updates both the incore dqout as well
1220  * as the buffer copy. This is so that once the quotacheck is done, we can
1221  * just log all the buffers, as opposed to logging numerous updates to
1222  * individual dquots.
1223  */
1224 STATIC int
1225 xfs_qm_quotacheck_dqadjust(
1226 	struct xfs_inode	*ip,
1227 	xfs_dqid_t		id,
1228 	uint			type,
1229 	xfs_qcnt_t		nblks,
1230 	xfs_qcnt_t		rtblks)
1231 {
1232 	struct xfs_mount	*mp = ip->i_mount;
1233 	struct xfs_dquot	*dqp;
1234 	int			error;
1235 
1236 	error = xfs_qm_dqget(mp, ip, id, type,
1237 			     XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
1238 	if (error) {
1239 		/*
1240 		 * Shouldn't be able to turn off quotas here.
1241 		 */
1242 		ASSERT(error != ESRCH);
1243 		ASSERT(error != ENOENT);
1244 		return error;
1245 	}
1246 
1247 	trace_xfs_dqadjust(dqp);
1248 
1249 	/*
1250 	 * Adjust the inode count and the block count to reflect this inode's
1251 	 * resource usage.
1252 	 */
1253 	be64_add_cpu(&dqp->q_core.d_icount, 1);
1254 	dqp->q_res_icount++;
1255 	if (nblks) {
1256 		be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1257 		dqp->q_res_bcount += nblks;
1258 	}
1259 	if (rtblks) {
1260 		be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1261 		dqp->q_res_rtbcount += rtblks;
1262 	}
1263 
1264 	/*
1265 	 * Set default limits, adjust timers (since we changed usages)
1266 	 *
1267 	 * There are no timers for the default values set in the root dquot.
1268 	 */
1269 	if (dqp->q_core.d_id) {
1270 		xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
1271 		xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1272 	}
1273 
1274 	dqp->dq_flags |= XFS_DQ_DIRTY;
1275 	xfs_qm_dqput(dqp);
1276 	return 0;
1277 }
1278 
1279 STATIC int
1280 xfs_qm_get_rtblks(
1281 	xfs_inode_t	*ip,
1282 	xfs_qcnt_t	*O_rtblks)
1283 {
1284 	xfs_filblks_t	rtblks;			/* total rt blks */
1285 	xfs_extnum_t	idx;			/* extent record index */
1286 	xfs_ifork_t	*ifp;			/* inode fork pointer */
1287 	xfs_extnum_t	nextents;		/* number of extent entries */
1288 	int		error;
1289 
1290 	ASSERT(XFS_IS_REALTIME_INODE(ip));
1291 	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1292 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1293 		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1294 			return error;
1295 	}
1296 	rtblks = 0;
1297 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1298 	for (idx = 0; idx < nextents; idx++)
1299 		rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1300 	*O_rtblks = (xfs_qcnt_t)rtblks;
1301 	return 0;
1302 }
1303 
1304 /*
1305  * callback routine supplied to bulkstat(). Given an inumber, find its
1306  * dquots and update them to account for resources taken by that inode.
1307  */
1308 /* ARGSUSED */
1309 STATIC int
1310 xfs_qm_dqusage_adjust(
1311 	xfs_mount_t	*mp,		/* mount point for filesystem */
1312 	xfs_ino_t	ino,		/* inode number to get data for */
1313 	void		__user *buffer,	/* not used */
1314 	int		ubsize,		/* not used */
1315 	int		*ubused,	/* not used */
1316 	int		*res)		/* result code value */
1317 {
1318 	xfs_inode_t	*ip;
1319 	xfs_qcnt_t	nblks, rtblks = 0;
1320 	int		error;
1321 
1322 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1323 
1324 	/*
1325 	 * rootino must have its resources accounted for, not so with the quota
1326 	 * inodes.
1327 	 */
1328 	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1329 		*res = BULKSTAT_RV_NOTHING;
1330 		return XFS_ERROR(EINVAL);
1331 	}
1332 
1333 	/*
1334 	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1335 	 * interface expects the inode to be exclusively locked because that's
1336 	 * the case in all other instances. It's OK that we do this because
1337 	 * quotacheck is done only at mount time.
1338 	 */
1339 	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
1340 	if (error) {
1341 		*res = BULKSTAT_RV_NOTHING;
1342 		return error;
1343 	}
1344 
1345 	ASSERT(ip->i_delayed_blks == 0);
1346 
1347 	if (XFS_IS_REALTIME_INODE(ip)) {
1348 		/*
1349 		 * Walk thru the extent list and count the realtime blocks.
1350 		 */
1351 		error = xfs_qm_get_rtblks(ip, &rtblks);
1352 		if (error)
1353 			goto error0;
1354 	}
1355 
1356 	nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1357 
1358 	/*
1359 	 * Add the (disk blocks and inode) resources occupied by this
1360 	 * inode to its dquots. We do this adjustment in the incore dquot,
1361 	 * and also copy the changes to its buffer.
1362 	 * We don't care about putting these changes in a transaction
1363 	 * envelope because if we crash in the middle of a 'quotacheck'
1364 	 * we have to start from the beginning anyway.
1365 	 * Once we're done, we'll log all the dquot bufs.
1366 	 *
1367 	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1368 	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1369 	 */
1370 	if (XFS_IS_UQUOTA_ON(mp)) {
1371 		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
1372 						   XFS_DQ_USER, nblks, rtblks);
1373 		if (error)
1374 			goto error0;
1375 	}
1376 
1377 	if (XFS_IS_GQUOTA_ON(mp)) {
1378 		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
1379 						   XFS_DQ_GROUP, nblks, rtblks);
1380 		if (error)
1381 			goto error0;
1382 	}
1383 
1384 	if (XFS_IS_PQUOTA_ON(mp)) {
1385 		error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
1386 						   XFS_DQ_PROJ, nblks, rtblks);
1387 		if (error)
1388 			goto error0;
1389 	}
1390 
1391 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1392 	IRELE(ip);
1393 	*res = BULKSTAT_RV_DIDONE;
1394 	return 0;
1395 
1396 error0:
1397 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1398 	IRELE(ip);
1399 	*res = BULKSTAT_RV_GIVEUP;
1400 	return error;
1401 }
1402 
1403 /*
1404  * Walk thru all the filesystem inodes and construct a consistent view
1405  * of the disk quota world. If the quotacheck fails, disable quotas.
1406  */
1407 int
1408 xfs_qm_quotacheck(
1409 	xfs_mount_t	*mp)
1410 {
1411 	int		done, count, error;
1412 	xfs_ino_t	lastino;
1413 	size_t		structsz;
1414 	xfs_inode_t	*uip, *gip;
1415 	uint		flags;
1416 
1417 	count = INT_MAX;
1418 	structsz = 1;
1419 	lastino = 0;
1420 	flags = 0;
1421 
1422 	ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1423 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1424 
1425 	/*
1426 	 * There should be no cached dquots. The (simplistic) quotacheck
1427 	 * algorithm doesn't like that.
1428 	 */
1429 	ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1430 
1431 	xfs_notice(mp, "Quotacheck needed: Please wait.");
1432 
1433 	/*
1434 	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1435 	 * their counters to zero. We need a clean slate.
1436 	 * We don't log our changes till later.
1437 	 */
1438 	uip = mp->m_quotainfo->qi_uquotaip;
1439 	if (uip) {
1440 		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
1441 		if (error)
1442 			goto error_return;
1443 		flags |= XFS_UQUOTA_CHKD;
1444 	}
1445 
1446 	gip = mp->m_quotainfo->qi_gquotaip;
1447 	if (gip) {
1448 		error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1449 					XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1450 		if (error)
1451 			goto error_return;
1452 		flags |= XFS_OQUOTA_CHKD;
1453 	}
1454 
1455 	do {
1456 		/*
1457 		 * Iterate thru all the inodes in the file system,
1458 		 * adjusting the corresponding dquot counters in core.
1459 		 */
1460 		error = xfs_bulkstat(mp, &lastino, &count,
1461 				     xfs_qm_dqusage_adjust,
1462 				     structsz, NULL, &done);
1463 		if (error)
1464 			break;
1465 
1466 	} while (!done);
1467 
1468 	/*
1469 	 * We've made all the changes that we need to make incore.
1470 	 * Flush them down to disk buffers if everything was updated
1471 	 * successfully.
1472 	 */
1473 	if (!error)
1474 		error = xfs_qm_dqflush_all(mp);
1475 
1476 	/*
1477 	 * We can get this error if we couldn't do a dquot allocation inside
1478 	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1479 	 * dirty dquots that might be cached, we just want to get rid of them
1480 	 * and turn quotaoff. The dquots won't be attached to any of the inodes
1481 	 * at this point (because we intentionally didn't in dqget_noattach).
1482 	 */
1483 	if (error) {
1484 		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1485 		goto error_return;
1486 	}
1487 
1488 	/*
1489 	 * We didn't log anything, because if we crashed, we'll have to
1490 	 * start the quotacheck from scratch anyway. However, we must make
1491 	 * sure that our dquot changes are secure before we put the
1492 	 * quotacheck'd stamp on the superblock. So, here we do a synchronous
1493 	 * flush.
1494 	 */
1495 	xfs_flush_buftarg(mp->m_ddev_targp, 1);
1496 
1497 	/*
1498 	 * If one type of quotas is off, then it will lose its
1499 	 * quotachecked status, since we won't be doing accounting for
1500 	 * that type anymore.
1501 	 */
1502 	mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1503 	mp->m_qflags |= flags;
1504 
1505  error_return:
1506 	if (error) {
1507 		xfs_warn(mp,
1508 	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1509 			error);
1510 		/*
1511 		 * We must turn off quotas.
1512 		 */
1513 		ASSERT(mp->m_quotainfo != NULL);
1514 		ASSERT(xfs_Gqm != NULL);
1515 		xfs_qm_destroy_quotainfo(mp);
1516 		if (xfs_mount_reset_sbqflags(mp)) {
1517 			xfs_warn(mp,
1518 				"Quotacheck: Failed to reset quota flags.");
1519 		}
1520 	} else
1521 		xfs_notice(mp, "Quotacheck: Done.");
1522 	return (error);
1523 }
1524 
1525 /*
1526  * This is called after the superblock has been read in and we're ready to
1527  * iget the quota inodes.
1528  */
1529 STATIC int
1530 xfs_qm_init_quotainos(
1531 	xfs_mount_t	*mp)
1532 {
1533 	xfs_inode_t	*uip, *gip;
1534 	int		error;
1535 	__int64_t	sbflags;
1536 	uint		flags;
1537 
1538 	ASSERT(mp->m_quotainfo);
1539 	uip = gip = NULL;
1540 	sbflags = 0;
1541 	flags = 0;
1542 
1543 	/*
1544 	 * Get the uquota and gquota inodes
1545 	 */
1546 	if (xfs_sb_version_hasquota(&mp->m_sb)) {
1547 		if (XFS_IS_UQUOTA_ON(mp) &&
1548 		    mp->m_sb.sb_uquotino != NULLFSINO) {
1549 			ASSERT(mp->m_sb.sb_uquotino > 0);
1550 			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1551 					     0, 0, &uip)))
1552 				return XFS_ERROR(error);
1553 		}
1554 		if (XFS_IS_OQUOTA_ON(mp) &&
1555 		    mp->m_sb.sb_gquotino != NULLFSINO) {
1556 			ASSERT(mp->m_sb.sb_gquotino > 0);
1557 			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1558 					     0, 0, &gip))) {
1559 				if (uip)
1560 					IRELE(uip);
1561 				return XFS_ERROR(error);
1562 			}
1563 		}
1564 	} else {
1565 		flags |= XFS_QMOPT_SBVERSION;
1566 		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1567 			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1568 	}
1569 
1570 	/*
1571 	 * Create the two inodes, if they don't exist already. The changes
1572 	 * made above will get added to a transaction and logged in one of
1573 	 * the qino_alloc calls below.  If the device is readonly,
1574 	 * temporarily switch to read-write to do this.
1575 	 */
1576 	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1577 		if ((error = xfs_qm_qino_alloc(mp, &uip,
1578 					      sbflags | XFS_SB_UQUOTINO,
1579 					      flags | XFS_QMOPT_UQUOTA)))
1580 			return XFS_ERROR(error);
1581 
1582 		flags &= ~XFS_QMOPT_SBVERSION;
1583 	}
1584 	if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1585 		flags |= (XFS_IS_GQUOTA_ON(mp) ?
1586 				XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1587 		error = xfs_qm_qino_alloc(mp, &gip,
1588 					  sbflags | XFS_SB_GQUOTINO, flags);
1589 		if (error) {
1590 			if (uip)
1591 				IRELE(uip);
1592 
1593 			return XFS_ERROR(error);
1594 		}
1595 	}
1596 
1597 	mp->m_quotainfo->qi_uquotaip = uip;
1598 	mp->m_quotainfo->qi_gquotaip = gip;
1599 
1600 	return 0;
1601 }
1602 
1603 
1604 
1605 /*
1606  * Pop the least recently used dquot off the freelist and recycle it.
1607  */
1608 STATIC struct xfs_dquot *
1609 xfs_qm_dqreclaim_one(void)
1610 {
1611 	struct xfs_dquot	*dqp;
1612 	int			restarts = 0;
1613 
1614 	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1615 restart:
1616 	list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1617 		struct xfs_mount *mp = dqp->q_mount;
1618 
1619 		if (!xfs_dqlock_nowait(dqp))
1620 			continue;
1621 
1622 		/*
1623 		 * This dquot has already been grabbed by dqlookup.
1624 		 * Remove it from the freelist and try again.
1625 		 */
1626 		if (dqp->q_nrefs) {
1627 			trace_xfs_dqreclaim_want(dqp);
1628 			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1629 
1630 			list_del_init(&dqp->q_freelist);
1631 			xfs_Gqm->qm_dqfrlist_cnt--;
1632 			restarts++;
1633 			goto dqunlock;
1634 		}
1635 
1636 		ASSERT(dqp->q_hash);
1637 		ASSERT(!list_empty(&dqp->q_mplist));
1638 
1639 		/*
1640 		 * Try to grab the flush lock. If this dquot is in the process
1641 		 * of getting flushed to disk, we don't want to reclaim it.
1642 		 */
1643 		if (!xfs_dqflock_nowait(dqp))
1644 			goto dqunlock;
1645 
1646 		/*
1647 		 * We have the flush lock so we know that this is not in the
1648 		 * process of being flushed. So, if this is dirty, flush it
1649 		 * DELWRI so that we don't get a freelist infested with
1650 		 * dirty dquots.
1651 		 */
1652 		if (XFS_DQ_IS_DIRTY(dqp)) {
1653 			int	error;
1654 
1655 			trace_xfs_dqreclaim_dirty(dqp);
1656 
1657 			/*
1658 			 * We flush it delayed write, so don't bother
1659 			 * releasing the freelist lock.
1660 			 */
1661 			error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
1662 			if (error) {
1663 				xfs_warn(mp, "%s: dquot %p flush failed",
1664 					__func__, dqp);
1665 			}
1666 			goto dqunlock;
1667 		}
1668 		xfs_dqfunlock(dqp);
1669 
1670 		/*
1671 		 * Prevent lookup now that we are going to reclaim the dquot.
1672 		 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
1673 		 * thus we can drop the lock now.
1674 		 */
1675 		dqp->dq_flags |= XFS_DQ_FREEING;
1676 		xfs_dqunlock(dqp);
1677 
1678 		mutex_lock(&dqp->q_hash->qh_lock);
1679 		list_del_init(&dqp->q_hashlist);
1680 		dqp->q_hash->qh_version++;
1681 		mutex_unlock(&dqp->q_hash->qh_lock);
1682 
1683 		mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1684 		list_del_init(&dqp->q_mplist);
1685 		mp->m_quotainfo->qi_dquots--;
1686 		mp->m_quotainfo->qi_dqreclaims++;
1687 		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1688 
1689 		ASSERT(dqp->q_nrefs == 0);
1690 		list_del_init(&dqp->q_freelist);
1691 		xfs_Gqm->qm_dqfrlist_cnt--;
1692 
1693 		mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1694 		return dqp;
1695 dqunlock:
1696 		xfs_dqunlock(dqp);
1697 		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1698 			break;
1699 		goto restart;
1700 	}
1701 
1702 	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1703 	return NULL;
1704 }
1705 
1706 /*
1707  * Traverse the freelist of dquots and attempt to reclaim a maximum of
1708  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1709  * favor the lookup function ...
1710  */
1711 STATIC int
1712 xfs_qm_shake_freelist(
1713 	int	howmany)
1714 {
1715 	int		nreclaimed = 0;
1716 	xfs_dquot_t	*dqp;
1717 
1718 	if (howmany <= 0)
1719 		return 0;
1720 
1721 	while (nreclaimed < howmany) {
1722 		dqp = xfs_qm_dqreclaim_one();
1723 		if (!dqp)
1724 			return nreclaimed;
1725 		xfs_qm_dqdestroy(dqp);
1726 		nreclaimed++;
1727 	}
1728 	return nreclaimed;
1729 }
1730 
1731 /*
1732  * The kmem_shake interface is invoked when memory is running low.
1733  */
1734 /* ARGSUSED */
1735 STATIC int
1736 xfs_qm_shake(
1737 	struct shrinker	*shrink,
1738 	struct shrink_control *sc)
1739 {
1740 	int	ndqused, nfree, n;
1741 	gfp_t gfp_mask = sc->gfp_mask;
1742 
1743 	if (!kmem_shake_allow(gfp_mask))
1744 		return 0;
1745 	if (!xfs_Gqm)
1746 		return 0;
1747 
1748 	nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
1749 	/* incore dquots in all f/s's */
1750 	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
1751 
1752 	ASSERT(ndqused >= 0);
1753 
1754 	if (nfree <= ndqused && nfree < ndquot)
1755 		return 0;
1756 
1757 	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
1758 	n = nfree - ndqused - ndquot;		/* # over target */
1759 
1760 	return xfs_qm_shake_freelist(MAX(nfree, n));
1761 }
1762 
1763 
1764 /*------------------------------------------------------------------*/
1765 
1766 /*
1767  * Return a new incore dquot. Depending on the number of
1768  * dquots in the system, we either allocate a new one on the kernel heap,
1769  * or reclaim a free one.
1770  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
1771  * to reclaim an existing one from the freelist.
1772  */
1773 boolean_t
1774 xfs_qm_dqalloc_incore(
1775 	xfs_dquot_t **O_dqpp)
1776 {
1777 	xfs_dquot_t	*dqp;
1778 
1779 	/*
1780 	 * Check against high water mark to see if we want to pop
1781 	 * a nincompoop dquot off the freelist.
1782 	 */
1783 	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
1784 		/*
1785 		 * Try to recycle a dquot from the freelist.
1786 		 */
1787 		if ((dqp = xfs_qm_dqreclaim_one())) {
1788 			XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
1789 			/*
1790 			 * Just zero the core here. The rest will get
1791 			 * reinitialized by caller. XXX we shouldn't even
1792 			 * do this zero ...
1793 			 */
1794 			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1795 			*O_dqpp = dqp;
1796 			return B_FALSE;
1797 		}
1798 		XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
1799 	}
1800 
1801 	/*
1802 	 * Allocate a brand new dquot on the kernel heap and return it
1803 	 * to the caller to initialize.
1804 	 */
1805 	ASSERT(xfs_Gqm->qm_dqzone != NULL);
1806 	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
1807 	atomic_inc(&xfs_Gqm->qm_totaldquots);
1808 
1809 	return B_TRUE;
1810 }
1811 
1812 
1813 /*
1814  * Start a transaction and write the incore superblock changes to
1815  * disk. flags parameter indicates which fields have changed.
1816  */
1817 int
1818 xfs_qm_write_sb_changes(
1819 	xfs_mount_t	*mp,
1820 	__int64_t	flags)
1821 {
1822 	xfs_trans_t	*tp;
1823 	int		error;
1824 
1825 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1826 	if ((error = xfs_trans_reserve(tp, 0,
1827 				      mp->m_sb.sb_sectsize + 128, 0,
1828 				      0,
1829 				      XFS_DEFAULT_LOG_COUNT))) {
1830 		xfs_trans_cancel(tp, 0);
1831 		return error;
1832 	}
1833 
1834 	xfs_mod_sb(tp, flags);
1835 	error = xfs_trans_commit(tp, 0);
1836 
1837 	return error;
1838 }
1839 
1840 
1841 /* --------------- utility functions for vnodeops ---------------- */
1842 
1843 
1844 /*
1845  * Given an inode, a uid, gid and prid make sure that we have
1846  * allocated relevant dquot(s) on disk, and that we won't exceed inode
1847  * quotas by creating this file.
1848  * This also attaches dquot(s) to the given inode after locking it,
1849  * and returns the dquots corresponding to the uid and/or gid.
1850  *
1851  * in	: inode (unlocked)
1852  * out	: udquot, gdquot with references taken and unlocked
1853  */
1854 int
1855 xfs_qm_vop_dqalloc(
1856 	struct xfs_inode	*ip,
1857 	uid_t			uid,
1858 	gid_t			gid,
1859 	prid_t			prid,
1860 	uint			flags,
1861 	struct xfs_dquot	**O_udqpp,
1862 	struct xfs_dquot	**O_gdqpp)
1863 {
1864 	struct xfs_mount	*mp = ip->i_mount;
1865 	struct xfs_dquot	*uq, *gq;
1866 	int			error;
1867 	uint			lockflags;
1868 
1869 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1870 		return 0;
1871 
1872 	lockflags = XFS_ILOCK_EXCL;
1873 	xfs_ilock(ip, lockflags);
1874 
1875 	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1876 		gid = ip->i_d.di_gid;
1877 
1878 	/*
1879 	 * Attach the dquot(s) to this inode, doing a dquot allocation
1880 	 * if necessary. The dquot(s) will not be locked.
1881 	 */
1882 	if (XFS_NOT_DQATTACHED(mp, ip)) {
1883 		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
1884 		if (error) {
1885 			xfs_iunlock(ip, lockflags);
1886 			return error;
1887 		}
1888 	}
1889 
1890 	uq = gq = NULL;
1891 	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1892 		if (ip->i_d.di_uid != uid) {
1893 			/*
1894 			 * What we need is the dquot that has this uid, and
1895 			 * if we send the inode to dqget, the uid of the inode
1896 			 * takes priority over what's sent in the uid argument.
1897 			 * We must unlock inode here before calling dqget if
1898 			 * we're not sending the inode, because otherwise
1899 			 * we'll deadlock by doing trans_reserve while
1900 			 * holding ilock.
1901 			 */
1902 			xfs_iunlock(ip, lockflags);
1903 			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
1904 						 XFS_DQ_USER,
1905 						 XFS_QMOPT_DQALLOC |
1906 						 XFS_QMOPT_DOWARN,
1907 						 &uq))) {
1908 				ASSERT(error != ENOENT);
1909 				return error;
1910 			}
1911 			/*
1912 			 * Get the ilock in the right order.
1913 			 */
1914 			xfs_dqunlock(uq);
1915 			lockflags = XFS_ILOCK_SHARED;
1916 			xfs_ilock(ip, lockflags);
1917 		} else {
1918 			/*
1919 			 * Take an extra reference, because we'll return
1920 			 * this to caller
1921 			 */
1922 			ASSERT(ip->i_udquot);
1923 			uq = xfs_qm_dqhold(ip->i_udquot);
1924 		}
1925 	}
1926 	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1927 		if (ip->i_d.di_gid != gid) {
1928 			xfs_iunlock(ip, lockflags);
1929 			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
1930 						 XFS_DQ_GROUP,
1931 						 XFS_QMOPT_DQALLOC |
1932 						 XFS_QMOPT_DOWARN,
1933 						 &gq))) {
1934 				if (uq)
1935 					xfs_qm_dqrele(uq);
1936 				ASSERT(error != ENOENT);
1937 				return error;
1938 			}
1939 			xfs_dqunlock(gq);
1940 			lockflags = XFS_ILOCK_SHARED;
1941 			xfs_ilock(ip, lockflags);
1942 		} else {
1943 			ASSERT(ip->i_gdquot);
1944 			gq = xfs_qm_dqhold(ip->i_gdquot);
1945 		}
1946 	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
1947 		if (xfs_get_projid(ip) != prid) {
1948 			xfs_iunlock(ip, lockflags);
1949 			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
1950 						 XFS_DQ_PROJ,
1951 						 XFS_QMOPT_DQALLOC |
1952 						 XFS_QMOPT_DOWARN,
1953 						 &gq))) {
1954 				if (uq)
1955 					xfs_qm_dqrele(uq);
1956 				ASSERT(error != ENOENT);
1957 				return (error);
1958 			}
1959 			xfs_dqunlock(gq);
1960 			lockflags = XFS_ILOCK_SHARED;
1961 			xfs_ilock(ip, lockflags);
1962 		} else {
1963 			ASSERT(ip->i_gdquot);
1964 			gq = xfs_qm_dqhold(ip->i_gdquot);
1965 		}
1966 	}
1967 	if (uq)
1968 		trace_xfs_dquot_dqalloc(ip);
1969 
1970 	xfs_iunlock(ip, lockflags);
1971 	if (O_udqpp)
1972 		*O_udqpp = uq;
1973 	else if (uq)
1974 		xfs_qm_dqrele(uq);
1975 	if (O_gdqpp)
1976 		*O_gdqpp = gq;
1977 	else if (gq)
1978 		xfs_qm_dqrele(gq);
1979 	return 0;
1980 }
1981 
1982 /*
1983  * Actually transfer ownership, and do dquot modifications.
1984  * These were already reserved.
1985  */
1986 xfs_dquot_t *
1987 xfs_qm_vop_chown(
1988 	xfs_trans_t	*tp,
1989 	xfs_inode_t	*ip,
1990 	xfs_dquot_t	**IO_olddq,
1991 	xfs_dquot_t	*newdq)
1992 {
1993 	xfs_dquot_t	*prevdq;
1994 	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
1995 				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
1996 
1997 
1998 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1999 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2000 
2001 	/* old dquot */
2002 	prevdq = *IO_olddq;
2003 	ASSERT(prevdq);
2004 	ASSERT(prevdq != newdq);
2005 
2006 	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2007 	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2008 
2009 	/* the sparkling new dquot */
2010 	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2011 	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2012 
2013 	/*
2014 	 * Take an extra reference, because the inode is going to keep
2015 	 * this dquot pointer even after the trans_commit.
2016 	 */
2017 	*IO_olddq = xfs_qm_dqhold(newdq);
2018 
2019 	return prevdq;
2020 }
2021 
2022 /*
2023  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2024  */
2025 int
2026 xfs_qm_vop_chown_reserve(
2027 	xfs_trans_t	*tp,
2028 	xfs_inode_t	*ip,
2029 	xfs_dquot_t	*udqp,
2030 	xfs_dquot_t	*gdqp,
2031 	uint		flags)
2032 {
2033 	xfs_mount_t	*mp = ip->i_mount;
2034 	uint		delblks, blkflags, prjflags = 0;
2035 	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
2036 	int		error;
2037 
2038 
2039 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2040 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2041 
2042 	delblks = ip->i_delayed_blks;
2043 	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2044 	blkflags = XFS_IS_REALTIME_INODE(ip) ?
2045 			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2046 
2047 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2048 	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2049 		delblksudq = udqp;
2050 		/*
2051 		 * If there are delayed allocation blocks, then we have to
2052 		 * unreserve those from the old dquot, and add them to the
2053 		 * new dquot.
2054 		 */
2055 		if (delblks) {
2056 			ASSERT(ip->i_udquot);
2057 			unresudq = ip->i_udquot;
2058 		}
2059 	}
2060 	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2061 		if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2062 		     xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
2063 			prjflags = XFS_QMOPT_ENOSPC;
2064 
2065 		if (prjflags ||
2066 		    (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2067 		     ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2068 			delblksgdq = gdqp;
2069 			if (delblks) {
2070 				ASSERT(ip->i_gdquot);
2071 				unresgdq = ip->i_gdquot;
2072 			}
2073 		}
2074 	}
2075 
2076 	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2077 				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2078 				flags | blkflags | prjflags)))
2079 		return (error);
2080 
2081 	/*
2082 	 * Do the delayed blks reservations/unreservations now. Since, these
2083 	 * are done without the help of a transaction, if a reservation fails
2084 	 * its previous reservations won't be automatically undone by trans
2085 	 * code. So, we have to do it manually here.
2086 	 */
2087 	if (delblks) {
2088 		/*
2089 		 * Do the reservations first. Unreservation can't fail.
2090 		 */
2091 		ASSERT(delblksudq || delblksgdq);
2092 		ASSERT(unresudq || unresgdq);
2093 		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2094 				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2095 				flags | blkflags | prjflags)))
2096 			return (error);
2097 		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2098 				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2099 				blkflags);
2100 	}
2101 
2102 	return (0);
2103 }
2104 
2105 int
2106 xfs_qm_vop_rename_dqattach(
2107 	struct xfs_inode	**i_tab)
2108 {
2109 	struct xfs_mount	*mp = i_tab[0]->i_mount;
2110 	int			i;
2111 
2112 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2113 		return 0;
2114 
2115 	for (i = 0; (i < 4 && i_tab[i]); i++) {
2116 		struct xfs_inode	*ip = i_tab[i];
2117 		int			error;
2118 
2119 		/*
2120 		 * Watch out for duplicate entries in the table.
2121 		 */
2122 		if (i == 0 || ip != i_tab[i-1]) {
2123 			if (XFS_NOT_DQATTACHED(mp, ip)) {
2124 				error = xfs_qm_dqattach(ip, 0);
2125 				if (error)
2126 					return error;
2127 			}
2128 		}
2129 	}
2130 	return 0;
2131 }
2132 
2133 void
2134 xfs_qm_vop_create_dqattach(
2135 	struct xfs_trans	*tp,
2136 	struct xfs_inode	*ip,
2137 	struct xfs_dquot	*udqp,
2138 	struct xfs_dquot	*gdqp)
2139 {
2140 	struct xfs_mount	*mp = tp->t_mountp;
2141 
2142 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2143 		return;
2144 
2145 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2146 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2147 
2148 	if (udqp) {
2149 		ASSERT(ip->i_udquot == NULL);
2150 		ASSERT(XFS_IS_UQUOTA_ON(mp));
2151 		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2152 
2153 		ip->i_udquot = xfs_qm_dqhold(udqp);
2154 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2155 	}
2156 	if (gdqp) {
2157 		ASSERT(ip->i_gdquot == NULL);
2158 		ASSERT(XFS_IS_OQUOTA_ON(mp));
2159 		ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2160 			ip->i_d.di_gid : xfs_get_projid(ip)) ==
2161 				be32_to_cpu(gdqp->q_core.d_id));
2162 
2163 		ip->i_gdquot = xfs_qm_dqhold(gdqp);
2164 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2165 	}
2166 }
2167 
2168