xref: /linux/fs/xfs/xfs_qm.c (revision cdd30ebb1b9f36159d66f088b61aee264e649d7a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_inode.h"
16 #include "xfs_iwalk.h"
17 #include "xfs_quota.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_util.h"
20 #include "xfs_trans.h"
21 #include "xfs_trans_space.h"
22 #include "xfs_qm.h"
23 #include "xfs_trace.h"
24 #include "xfs_icache.h"
25 #include "xfs_error.h"
26 #include "xfs_ag.h"
27 #include "xfs_ialloc.h"
28 #include "xfs_log_priv.h"
29 #include "xfs_health.h"
30 #include "xfs_da_format.h"
31 #include "xfs_metafile.h"
32 #include "xfs_rtgroup.h"
33 
34 /*
35  * The global quota manager. There is only one of these for the entire
36  * system, _not_ one per file system. XQM keeps track of the overall
37  * quota functionality, including maintaining the freelist and hash
38  * tables of dquots.
39  */
40 STATIC int	xfs_qm_init_quotainos(struct xfs_mount *mp);
41 STATIC int	xfs_qm_init_quotainfo(struct xfs_mount *mp);
42 
43 STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);
44 /*
45  * We use the batch lookup interface to iterate over the dquots as it
46  * currently is the only interface into the radix tree code that allows
47  * fuzzy lookups instead of exact matches.  Holding the lock over multiple
48  * operations is fine as all callers are used either during mount/umount
49  * or quotaoff.
50  */
51 #define XFS_DQ_LOOKUP_BATCH	32
52 
53 STATIC int
54 xfs_qm_dquot_walk(
55 	struct xfs_mount	*mp,
56 	xfs_dqtype_t		type,
57 	int			(*execute)(struct xfs_dquot *dqp, void *data),
58 	void			*data)
59 {
60 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
61 	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);
62 	uint32_t		next_index;
63 	int			last_error = 0;
64 	int			skipped;
65 	int			nr_found;
66 
67 restart:
68 	skipped = 0;
69 	next_index = 0;
70 	nr_found = 0;
71 
72 	while (1) {
73 		struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
74 		int		error;
75 		int		i;
76 
77 		mutex_lock(&qi->qi_tree_lock);
78 		nr_found = radix_tree_gang_lookup(tree, (void **)batch,
79 					next_index, XFS_DQ_LOOKUP_BATCH);
80 		if (!nr_found) {
81 			mutex_unlock(&qi->qi_tree_lock);
82 			break;
83 		}
84 
85 		for (i = 0; i < nr_found; i++) {
86 			struct xfs_dquot *dqp = batch[i];
87 
88 			next_index = dqp->q_id + 1;
89 
90 			error = execute(batch[i], data);
91 			if (error == -EAGAIN) {
92 				skipped++;
93 				continue;
94 			}
95 			if (error && last_error != -EFSCORRUPTED)
96 				last_error = error;
97 		}
98 
99 		mutex_unlock(&qi->qi_tree_lock);
100 
101 		/* bail out if the filesystem is corrupted.  */
102 		if (last_error == -EFSCORRUPTED) {
103 			skipped = 0;
104 			break;
105 		}
106 		/* we're done if id overflows back to zero */
107 		if (!next_index)
108 			break;
109 	}
110 
111 	if (skipped) {
112 		delay(1);
113 		goto restart;
114 	}
115 
116 	return last_error;
117 }
118 
119 
120 /*
121  * Purge a dquot from all tracking data structures and free it.
122  */
123 STATIC int
124 xfs_qm_dqpurge(
125 	struct xfs_dquot	*dqp,
126 	void			*data)
127 {
128 	struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo;
129 	int			error = -EAGAIN;
130 
131 	xfs_dqlock(dqp);
132 	if ((dqp->q_flags & XFS_DQFLAG_FREEING) || dqp->q_nrefs != 0)
133 		goto out_unlock;
134 
135 	dqp->q_flags |= XFS_DQFLAG_FREEING;
136 
137 	xfs_dqflock(dqp);
138 
139 	/*
140 	 * If we are turning this type of quotas off, we don't care
141 	 * about the dirty metadata sitting in this dquot. OTOH, if
142 	 * we're unmounting, we do care, so we flush it and wait.
143 	 */
144 	if (XFS_DQ_IS_DIRTY(dqp)) {
145 		struct xfs_buf	*bp = NULL;
146 
147 		/*
148 		 * We don't care about getting disk errors here. We need
149 		 * to purge this dquot anyway, so we go ahead regardless.
150 		 */
151 		error = xfs_qm_dqflush(dqp, &bp);
152 		if (!error) {
153 			error = xfs_bwrite(bp);
154 			xfs_buf_relse(bp);
155 		} else if (error == -EAGAIN) {
156 			dqp->q_flags &= ~XFS_DQFLAG_FREEING;
157 			goto out_unlock;
158 		}
159 		xfs_dqflock(dqp);
160 	}
161 
162 	ASSERT(atomic_read(&dqp->q_pincount) == 0);
163 	ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) ||
164 		!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
165 
166 	xfs_dqfunlock(dqp);
167 	xfs_dqunlock(dqp);
168 
169 	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
170 	qi->qi_dquots--;
171 
172 	/*
173 	 * We move dquots to the freelist as soon as their reference count
174 	 * hits zero, so it really should be on the freelist here.
175 	 */
176 	ASSERT(!list_empty(&dqp->q_lru));
177 	list_lru_del_obj(&qi->qi_lru, &dqp->q_lru);
178 	XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
179 
180 	xfs_qm_dqdestroy(dqp);
181 	return 0;
182 
183 out_unlock:
184 	xfs_dqunlock(dqp);
185 	return error;
186 }
187 
188 /*
189  * Purge the dquot cache.
190  */
191 static void
192 xfs_qm_dqpurge_all(
193 	struct xfs_mount	*mp)
194 {
195 	xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL);
196 	xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL);
197 	xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL);
198 }
199 
200 /*
201  * Just destroy the quotainfo structure.
202  */
203 void
204 xfs_qm_unmount(
205 	struct xfs_mount	*mp)
206 {
207 	if (mp->m_quotainfo) {
208 		xfs_qm_dqpurge_all(mp);
209 		xfs_qm_destroy_quotainfo(mp);
210 	}
211 }
212 
213 static void
214 xfs_qm_unmount_rt(
215 	struct xfs_mount	*mp)
216 {
217 	struct xfs_rtgroup	*rtg = xfs_rtgroup_grab(mp, 0);
218 
219 	if (!rtg)
220 		return;
221 	if (rtg->rtg_inodes[XFS_RTGI_BITMAP])
222 		xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_BITMAP]);
223 	if (rtg->rtg_inodes[XFS_RTGI_SUMMARY])
224 		xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_SUMMARY]);
225 	xfs_rtgroup_rele(rtg);
226 }
227 
228 STATIC void
229 xfs_qm_destroy_quotainos(
230 	struct xfs_quotainfo	*qi)
231 {
232 	if (qi->qi_uquotaip) {
233 		xfs_irele(qi->qi_uquotaip);
234 		qi->qi_uquotaip = NULL; /* paranoia */
235 	}
236 	if (qi->qi_gquotaip) {
237 		xfs_irele(qi->qi_gquotaip);
238 		qi->qi_gquotaip = NULL;
239 	}
240 	if (qi->qi_pquotaip) {
241 		xfs_irele(qi->qi_pquotaip);
242 		qi->qi_pquotaip = NULL;
243 	}
244 }
245 
246 /*
247  * Called from the vfsops layer.
248  */
249 void
250 xfs_qm_unmount_quotas(
251 	xfs_mount_t	*mp)
252 {
253 	/*
254 	 * Release the dquots that root inode, et al might be holding,
255 	 * before we flush quotas and blow away the quotainfo structure.
256 	 */
257 	ASSERT(mp->m_rootip);
258 	xfs_qm_dqdetach(mp->m_rootip);
259 
260 	/*
261 	 * For pre-RTG file systems, the RT inodes have quotas attached,
262 	 * detach them now.
263 	 */
264 	if (!xfs_has_rtgroups(mp))
265 		xfs_qm_unmount_rt(mp);
266 
267 	/*
268 	 * Release the quota inodes.
269 	 */
270 	if (mp->m_quotainfo)
271 		xfs_qm_destroy_quotainos(mp->m_quotainfo);
272 }
273 
274 STATIC int
275 xfs_qm_dqattach_one(
276 	struct xfs_inode	*ip,
277 	xfs_dqtype_t		type,
278 	bool			doalloc,
279 	struct xfs_dquot	**IO_idqpp)
280 {
281 	struct xfs_dquot	*dqp;
282 	int			error;
283 
284 	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
285 	error = 0;
286 
287 	/*
288 	 * See if we already have it in the inode itself. IO_idqpp is &i_udquot
289 	 * or &i_gdquot. This made the code look weird, but made the logic a lot
290 	 * simpler.
291 	 */
292 	dqp = *IO_idqpp;
293 	if (dqp) {
294 		trace_xfs_dqattach_found(dqp);
295 		return 0;
296 	}
297 
298 	/*
299 	 * Find the dquot from somewhere. This bumps the reference count of
300 	 * dquot and returns it locked.  This can return ENOENT if dquot didn't
301 	 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
302 	 * turned off suddenly.
303 	 */
304 	error = xfs_qm_dqget_inode(ip, type, doalloc, &dqp);
305 	if (error)
306 		return error;
307 
308 	trace_xfs_dqattach_get(dqp);
309 
310 	/*
311 	 * dqget may have dropped and re-acquired the ilock, but it guarantees
312 	 * that the dquot returned is the one that should go in the inode.
313 	 */
314 	*IO_idqpp = dqp;
315 	xfs_dqunlock(dqp);
316 	return 0;
317 }
318 
319 static bool
320 xfs_qm_need_dqattach(
321 	struct xfs_inode	*ip)
322 {
323 	struct xfs_mount	*mp = ip->i_mount;
324 
325 	if (!XFS_IS_QUOTA_ON(mp))
326 		return false;
327 	if (!XFS_NOT_DQATTACHED(mp, ip))
328 		return false;
329 	if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
330 		return false;
331 	if (xfs_is_metadir_inode(ip))
332 		return false;
333 	return true;
334 }
335 
336 /*
337  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
338  * into account.
339  * If @doalloc is true, the dquot(s) will be allocated if needed.
340  * Inode may get unlocked and relocked in here, and the caller must deal with
341  * the consequences.
342  */
343 int
344 xfs_qm_dqattach_locked(
345 	xfs_inode_t	*ip,
346 	bool		doalloc)
347 {
348 	xfs_mount_t	*mp = ip->i_mount;
349 	int		error = 0;
350 
351 	if (!xfs_qm_need_dqattach(ip))
352 		return 0;
353 
354 	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
355 	ASSERT(!xfs_is_metadir_inode(ip));
356 
357 	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
358 		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
359 				doalloc, &ip->i_udquot);
360 		if (error)
361 			goto done;
362 		ASSERT(ip->i_udquot);
363 	}
364 
365 	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
366 		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP,
367 				doalloc, &ip->i_gdquot);
368 		if (error)
369 			goto done;
370 		ASSERT(ip->i_gdquot);
371 	}
372 
373 	if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
374 		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ,
375 				doalloc, &ip->i_pdquot);
376 		if (error)
377 			goto done;
378 		ASSERT(ip->i_pdquot);
379 	}
380 
381 done:
382 	/*
383 	 * Don't worry about the dquots that we may have attached before any
384 	 * error - they'll get detached later if it has not already been done.
385 	 */
386 	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
387 	return error;
388 }
389 
390 int
391 xfs_qm_dqattach(
392 	struct xfs_inode	*ip)
393 {
394 	int			error;
395 
396 	if (!xfs_qm_need_dqattach(ip))
397 		return 0;
398 
399 	xfs_ilock(ip, XFS_ILOCK_EXCL);
400 	error = xfs_qm_dqattach_locked(ip, false);
401 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
402 
403 	return error;
404 }
405 
406 /*
407  * Release dquots (and their references) if any.
408  * The inode should be locked EXCL except when this's called by
409  * xfs_ireclaim.
410  */
411 void
412 xfs_qm_dqdetach(
413 	xfs_inode_t	*ip)
414 {
415 	if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot))
416 		return;
417 
418 	trace_xfs_dquot_dqdetach(ip);
419 
420 	ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino));
421 	if (ip->i_udquot) {
422 		xfs_qm_dqrele(ip->i_udquot);
423 		ip->i_udquot = NULL;
424 	}
425 	if (ip->i_gdquot) {
426 		xfs_qm_dqrele(ip->i_gdquot);
427 		ip->i_gdquot = NULL;
428 	}
429 	if (ip->i_pdquot) {
430 		xfs_qm_dqrele(ip->i_pdquot);
431 		ip->i_pdquot = NULL;
432 	}
433 }
434 
435 struct xfs_qm_isolate {
436 	struct list_head	buffers;
437 	struct list_head	dispose;
438 };
439 
440 static enum lru_status
441 xfs_qm_dquot_isolate(
442 	struct list_head	*item,
443 	struct list_lru_one	*lru,
444 	void			*arg)
445 		__releases(&lru->lock) __acquires(&lru->lock)
446 {
447 	struct xfs_dquot	*dqp = container_of(item,
448 						struct xfs_dquot, q_lru);
449 	struct xfs_qm_isolate	*isol = arg;
450 
451 	if (!xfs_dqlock_nowait(dqp))
452 		goto out_miss_busy;
453 
454 	/*
455 	 * If something else is freeing this dquot and hasn't yet removed it
456 	 * from the LRU, leave it for the freeing task to complete the freeing
457 	 * process rather than risk it being free from under us here.
458 	 */
459 	if (dqp->q_flags & XFS_DQFLAG_FREEING)
460 		goto out_miss_unlock;
461 
462 	/*
463 	 * This dquot has acquired a reference in the meantime remove it from
464 	 * the freelist and try again.
465 	 */
466 	if (dqp->q_nrefs) {
467 		xfs_dqunlock(dqp);
468 		XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants);
469 
470 		trace_xfs_dqreclaim_want(dqp);
471 		list_lru_isolate(lru, &dqp->q_lru);
472 		XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
473 		return LRU_REMOVED;
474 	}
475 
476 	/*
477 	 * If the dquot is dirty, flush it. If it's already being flushed, just
478 	 * skip it so there is time for the IO to complete before we try to
479 	 * reclaim it again on the next LRU pass.
480 	 */
481 	if (!xfs_dqflock_nowait(dqp))
482 		goto out_miss_unlock;
483 
484 	if (XFS_DQ_IS_DIRTY(dqp)) {
485 		struct xfs_buf	*bp = NULL;
486 		int		error;
487 
488 		trace_xfs_dqreclaim_dirty(dqp);
489 
490 		/* we have to drop the LRU lock to flush the dquot */
491 		spin_unlock(&lru->lock);
492 
493 		error = xfs_qm_dqflush(dqp, &bp);
494 		if (error)
495 			goto out_unlock_dirty;
496 
497 		xfs_buf_delwri_queue(bp, &isol->buffers);
498 		xfs_buf_relse(bp);
499 		goto out_unlock_dirty;
500 	}
501 	xfs_dqfunlock(dqp);
502 
503 	/*
504 	 * Prevent lookups now that we are past the point of no return.
505 	 */
506 	dqp->q_flags |= XFS_DQFLAG_FREEING;
507 	xfs_dqunlock(dqp);
508 
509 	ASSERT(dqp->q_nrefs == 0);
510 	list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose);
511 	XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
512 	trace_xfs_dqreclaim_done(dqp);
513 	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
514 	return LRU_REMOVED;
515 
516 out_miss_unlock:
517 	xfs_dqunlock(dqp);
518 out_miss_busy:
519 	trace_xfs_dqreclaim_busy(dqp);
520 	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
521 	return LRU_SKIP;
522 
523 out_unlock_dirty:
524 	trace_xfs_dqreclaim_busy(dqp);
525 	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
526 	xfs_dqunlock(dqp);
527 	return LRU_RETRY;
528 }
529 
530 static unsigned long
531 xfs_qm_shrink_scan(
532 	struct shrinker		*shrink,
533 	struct shrink_control	*sc)
534 {
535 	struct xfs_quotainfo	*qi = shrink->private_data;
536 	struct xfs_qm_isolate	isol;
537 	unsigned long		freed;
538 	int			error;
539 
540 	if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
541 		return 0;
542 
543 	INIT_LIST_HEAD(&isol.buffers);
544 	INIT_LIST_HEAD(&isol.dispose);
545 
546 	freed = list_lru_shrink_walk(&qi->qi_lru, sc,
547 				     xfs_qm_dquot_isolate, &isol);
548 
549 	error = xfs_buf_delwri_submit(&isol.buffers);
550 	if (error)
551 		xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
552 
553 	while (!list_empty(&isol.dispose)) {
554 		struct xfs_dquot	*dqp;
555 
556 		dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
557 		list_del_init(&dqp->q_lru);
558 		xfs_qm_dqfree_one(dqp);
559 	}
560 
561 	return freed;
562 }
563 
564 static unsigned long
565 xfs_qm_shrink_count(
566 	struct shrinker		*shrink,
567 	struct shrink_control	*sc)
568 {
569 	struct xfs_quotainfo	*qi = shrink->private_data;
570 
571 	return list_lru_shrink_count(&qi->qi_lru, sc);
572 }
573 
574 STATIC void
575 xfs_qm_set_defquota(
576 	struct xfs_mount	*mp,
577 	xfs_dqtype_t		type,
578 	struct xfs_quotainfo	*qinf)
579 {
580 	struct xfs_dquot	*dqp;
581 	struct xfs_def_quota	*defq;
582 	int			error;
583 
584 	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
585 	if (error)
586 		return;
587 
588 	defq = xfs_get_defquota(qinf, xfs_dquot_type(dqp));
589 
590 	/*
591 	 * Timers and warnings have been already set, let's just set the
592 	 * default limits for this quota type
593 	 */
594 	defq->blk.hard = dqp->q_blk.hardlimit;
595 	defq->blk.soft = dqp->q_blk.softlimit;
596 	defq->ino.hard = dqp->q_ino.hardlimit;
597 	defq->ino.soft = dqp->q_ino.softlimit;
598 	defq->rtb.hard = dqp->q_rtb.hardlimit;
599 	defq->rtb.soft = dqp->q_rtb.softlimit;
600 	xfs_qm_dqdestroy(dqp);
601 }
602 
603 /* Initialize quota time limits from the root dquot. */
604 static void
605 xfs_qm_init_timelimits(
606 	struct xfs_mount	*mp,
607 	xfs_dqtype_t		type)
608 {
609 	struct xfs_quotainfo	*qinf = mp->m_quotainfo;
610 	struct xfs_def_quota	*defq;
611 	struct xfs_dquot	*dqp;
612 	int			error;
613 
614 	defq = xfs_get_defquota(qinf, type);
615 
616 	defq->blk.time = XFS_QM_BTIMELIMIT;
617 	defq->ino.time = XFS_QM_ITIMELIMIT;
618 	defq->rtb.time = XFS_QM_RTBTIMELIMIT;
619 
620 	/*
621 	 * We try to get the limits from the superuser's limits fields.
622 	 * This is quite hacky, but it is standard quota practice.
623 	 *
624 	 * Since we may not have done a quotacheck by this point, just read
625 	 * the dquot without attaching it to any hashtables or lists.
626 	 */
627 	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
628 	if (error)
629 		return;
630 
631 	/*
632 	 * The warnings and timers set the grace period given to
633 	 * a user or group before he or she can not perform any
634 	 * more writing. If it is zero, a default is used.
635 	 */
636 	if (dqp->q_blk.timer)
637 		defq->blk.time = dqp->q_blk.timer;
638 	if (dqp->q_ino.timer)
639 		defq->ino.time = dqp->q_ino.timer;
640 	if (dqp->q_rtb.timer)
641 		defq->rtb.time = dqp->q_rtb.timer;
642 
643 	xfs_qm_dqdestroy(dqp);
644 }
645 
646 static int
647 xfs_qm_load_metadir_qinos(
648 	struct xfs_mount	*mp,
649 	struct xfs_quotainfo	*qi,
650 	struct xfs_inode	**dpp)
651 {
652 	struct xfs_trans	*tp;
653 	int			error;
654 
655 	error = xfs_trans_alloc_empty(mp, &tp);
656 	if (error)
657 		return error;
658 
659 	error = xfs_dqinode_load_parent(tp, dpp);
660 	if (error == -ENOENT) {
661 		/* no quota dir directory, but we'll create one later */
662 		error = 0;
663 		goto out_trans;
664 	}
665 	if (error)
666 		goto out_trans;
667 
668 	if (XFS_IS_UQUOTA_ON(mp)) {
669 		error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_USER,
670 				&qi->qi_uquotaip);
671 		if (error && error != -ENOENT)
672 			goto out_trans;
673 	}
674 
675 	if (XFS_IS_GQUOTA_ON(mp)) {
676 		error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_GROUP,
677 				&qi->qi_gquotaip);
678 		if (error && error != -ENOENT)
679 			goto out_trans;
680 	}
681 
682 	if (XFS_IS_PQUOTA_ON(mp)) {
683 		error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_PROJ,
684 				&qi->qi_pquotaip);
685 		if (error && error != -ENOENT)
686 			goto out_trans;
687 	}
688 
689 	error = 0;
690 out_trans:
691 	xfs_trans_cancel(tp);
692 	return error;
693 }
694 
695 /* Create quota inodes in the metadata directory tree. */
696 STATIC int
697 xfs_qm_create_metadir_qinos(
698 	struct xfs_mount	*mp,
699 	struct xfs_quotainfo	*qi,
700 	struct xfs_inode	**dpp)
701 {
702 	int			error;
703 
704 	if (!*dpp) {
705 		error = xfs_dqinode_mkdir_parent(mp, dpp);
706 		if (error && error != -EEXIST)
707 			return error;
708 	}
709 
710 	if (XFS_IS_UQUOTA_ON(mp) && !qi->qi_uquotaip) {
711 		error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_USER,
712 				&qi->qi_uquotaip);
713 		if (error)
714 			return error;
715 	}
716 
717 	if (XFS_IS_GQUOTA_ON(mp) && !qi->qi_gquotaip) {
718 		error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_GROUP,
719 				&qi->qi_gquotaip);
720 		if (error)
721 			return error;
722 	}
723 
724 	if (XFS_IS_PQUOTA_ON(mp) && !qi->qi_pquotaip) {
725 		error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_PROJ,
726 				&qi->qi_pquotaip);
727 		if (error)
728 			return error;
729 	}
730 
731 	return 0;
732 }
733 
734 /*
735  * Add QUOTABIT to sb_versionnum and initialize qflags in preparation for
736  * creating quota files on a metadir filesystem.
737  */
738 STATIC int
739 xfs_qm_prep_metadir_sb(
740 	struct xfs_mount	*mp)
741 {
742 	struct xfs_trans	*tp;
743 	int			error;
744 
745 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp);
746 	if (error)
747 		return error;
748 
749 	spin_lock(&mp->m_sb_lock);
750 
751 	xfs_add_quota(mp);
752 
753 	/* qflags will get updated fully _after_ quotacheck */
754 	mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
755 
756 	spin_unlock(&mp->m_sb_lock);
757 	xfs_log_sb(tp);
758 
759 	return xfs_trans_commit(tp);
760 }
761 
762 /*
763  * Load existing quota inodes or create them.  Since this is a V5 filesystem,
764  * we don't have to deal with the grp/prjquota switcheroo thing from V4.
765  */
766 STATIC int
767 xfs_qm_init_metadir_qinos(
768 	struct xfs_mount	*mp)
769 {
770 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
771 	struct xfs_inode	*dp = NULL;
772 	int			error;
773 
774 	if (!xfs_has_quota(mp)) {
775 		error = xfs_qm_prep_metadir_sb(mp);
776 		if (error)
777 			return error;
778 	}
779 
780 	error = xfs_qm_load_metadir_qinos(mp, qi, &dp);
781 	if (error)
782 		goto out_err;
783 
784 	error = xfs_qm_create_metadir_qinos(mp, qi, &dp);
785 	if (error)
786 		goto out_err;
787 
788 	xfs_irele(dp);
789 	return 0;
790 out_err:
791 	xfs_qm_destroy_quotainos(mp->m_quotainfo);
792 	if (dp)
793 		xfs_irele(dp);
794 	return error;
795 }
796 
797 /*
798  * This initializes all the quota information that's kept in the
799  * mount structure
800  */
801 STATIC int
802 xfs_qm_init_quotainfo(
803 	struct xfs_mount	*mp)
804 {
805 	struct xfs_quotainfo	*qinf;
806 	int			error;
807 
808 	ASSERT(XFS_IS_QUOTA_ON(mp));
809 
810 	qinf = mp->m_quotainfo = kzalloc(sizeof(struct xfs_quotainfo),
811 					GFP_KERNEL | __GFP_NOFAIL);
812 
813 	error = list_lru_init(&qinf->qi_lru);
814 	if (error)
815 		goto out_free_qinf;
816 
817 	/*
818 	 * See if quotainodes are setup, and if not, allocate them,
819 	 * and change the superblock accordingly.
820 	 */
821 	if (xfs_has_metadir(mp))
822 		error = xfs_qm_init_metadir_qinos(mp);
823 	else
824 		error = xfs_qm_init_quotainos(mp);
825 	if (error)
826 		goto out_free_lru;
827 
828 	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_KERNEL);
829 	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_KERNEL);
830 	INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_KERNEL);
831 	mutex_init(&qinf->qi_tree_lock);
832 
833 	/* mutex used to serialize quotaoffs */
834 	mutex_init(&qinf->qi_quotaofflock);
835 
836 	/* Precalc some constants */
837 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
838 	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
839 	if (xfs_has_bigtime(mp)) {
840 		qinf->qi_expiry_min =
841 			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN);
842 		qinf->qi_expiry_max =
843 			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MAX);
844 	} else {
845 		qinf->qi_expiry_min = XFS_DQ_LEGACY_EXPIRY_MIN;
846 		qinf->qi_expiry_max = XFS_DQ_LEGACY_EXPIRY_MAX;
847 	}
848 	trace_xfs_quota_expiry_range(mp, qinf->qi_expiry_min,
849 			qinf->qi_expiry_max);
850 
851 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
852 
853 	xfs_qm_init_timelimits(mp, XFS_DQTYPE_USER);
854 	xfs_qm_init_timelimits(mp, XFS_DQTYPE_GROUP);
855 	xfs_qm_init_timelimits(mp, XFS_DQTYPE_PROJ);
856 
857 	if (XFS_IS_UQUOTA_ON(mp))
858 		xfs_qm_set_defquota(mp, XFS_DQTYPE_USER, qinf);
859 	if (XFS_IS_GQUOTA_ON(mp))
860 		xfs_qm_set_defquota(mp, XFS_DQTYPE_GROUP, qinf);
861 	if (XFS_IS_PQUOTA_ON(mp))
862 		xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf);
863 
864 	qinf->qi_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-qm:%s",
865 					   mp->m_super->s_id);
866 	if (!qinf->qi_shrinker) {
867 		error = -ENOMEM;
868 		goto out_free_inos;
869 	}
870 
871 	qinf->qi_shrinker->count_objects = xfs_qm_shrink_count;
872 	qinf->qi_shrinker->scan_objects = xfs_qm_shrink_scan;
873 	qinf->qi_shrinker->private_data = qinf;
874 
875 	shrinker_register(qinf->qi_shrinker);
876 
877 	xfs_hooks_init(&qinf->qi_mod_ino_dqtrx_hooks);
878 	xfs_hooks_init(&qinf->qi_apply_dqtrx_hooks);
879 
880 	return 0;
881 
882 out_free_inos:
883 	mutex_destroy(&qinf->qi_quotaofflock);
884 	mutex_destroy(&qinf->qi_tree_lock);
885 	xfs_qm_destroy_quotainos(qinf);
886 out_free_lru:
887 	list_lru_destroy(&qinf->qi_lru);
888 out_free_qinf:
889 	kfree(qinf);
890 	mp->m_quotainfo = NULL;
891 	return error;
892 }
893 
894 /*
895  * Gets called when unmounting a filesystem or when all quotas get
896  * turned off.
897  * This purges the quota inodes, destroys locks and frees itself.
898  */
899 void
900 xfs_qm_destroy_quotainfo(
901 	struct xfs_mount	*mp)
902 {
903 	struct xfs_quotainfo	*qi;
904 
905 	qi = mp->m_quotainfo;
906 	ASSERT(qi != NULL);
907 
908 	shrinker_free(qi->qi_shrinker);
909 	list_lru_destroy(&qi->qi_lru);
910 	xfs_qm_destroy_quotainos(qi);
911 	mutex_destroy(&qi->qi_tree_lock);
912 	mutex_destroy(&qi->qi_quotaofflock);
913 	kfree(qi);
914 	mp->m_quotainfo = NULL;
915 }
916 
917 static inline enum xfs_metafile_type
918 xfs_qm_metafile_type(
919 	unsigned int		flags)
920 {
921 	if (flags & XFS_QMOPT_UQUOTA)
922 		return XFS_METAFILE_USRQUOTA;
923 	else if (flags & XFS_QMOPT_GQUOTA)
924 		return XFS_METAFILE_GRPQUOTA;
925 	return XFS_METAFILE_PRJQUOTA;
926 }
927 
928 /*
929  * Create an inode and return with a reference already taken, but unlocked
930  * This is how we create quota inodes
931  */
932 STATIC int
933 xfs_qm_qino_alloc(
934 	struct xfs_mount	*mp,
935 	struct xfs_inode	**ipp,
936 	unsigned int		flags)
937 {
938 	struct xfs_trans	*tp;
939 	enum xfs_metafile_type	metafile_type = xfs_qm_metafile_type(flags);
940 	int			error;
941 	bool			need_alloc = true;
942 
943 	*ipp = NULL;
944 	/*
945 	 * With superblock that doesn't have separate pquotino, we
946 	 * share an inode between gquota and pquota. If the on-disk
947 	 * superblock has GQUOTA and the filesystem is now mounted
948 	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
949 	 * vice-versa.
950 	 */
951 	if (!xfs_has_pquotino(mp) &&
952 			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
953 		xfs_ino_t ino = NULLFSINO;
954 
955 		if ((flags & XFS_QMOPT_PQUOTA) &&
956 			     (mp->m_sb.sb_gquotino != NULLFSINO)) {
957 			ino = mp->m_sb.sb_gquotino;
958 			if (XFS_IS_CORRUPT(mp,
959 					   mp->m_sb.sb_pquotino != NULLFSINO)) {
960 				xfs_fs_mark_sick(mp, XFS_SICK_FS_PQUOTA);
961 				return -EFSCORRUPTED;
962 			}
963 		} else if ((flags & XFS_QMOPT_GQUOTA) &&
964 			     (mp->m_sb.sb_pquotino != NULLFSINO)) {
965 			ino = mp->m_sb.sb_pquotino;
966 			if (XFS_IS_CORRUPT(mp,
967 					   mp->m_sb.sb_gquotino != NULLFSINO)) {
968 				xfs_fs_mark_sick(mp, XFS_SICK_FS_GQUOTA);
969 				return -EFSCORRUPTED;
970 			}
971 		}
972 		if (ino != NULLFSINO) {
973 			error = xfs_metafile_iget(mp, ino, metafile_type, ipp);
974 			if (error)
975 				return error;
976 
977 			mp->m_sb.sb_gquotino = NULLFSINO;
978 			mp->m_sb.sb_pquotino = NULLFSINO;
979 			need_alloc = false;
980 		}
981 	}
982 
983 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
984 			need_alloc ? XFS_QM_QINOCREATE_SPACE_RES(mp) : 0,
985 			0, 0, &tp);
986 	if (error)
987 		return error;
988 
989 	if (need_alloc) {
990 		struct xfs_icreate_args	args = {
991 			.mode		= S_IFREG,
992 			.flags		= XFS_ICREATE_UNLINKABLE,
993 		};
994 		xfs_ino_t	ino;
995 
996 		error = xfs_dialloc(&tp, &args, &ino);
997 		if (!error)
998 			error = xfs_icreate(tp, ino, &args, ipp);
999 		if (error) {
1000 			xfs_trans_cancel(tp);
1001 			return error;
1002 		}
1003 		if (xfs_has_metadir(mp))
1004 			xfs_metafile_set_iflag(tp, *ipp, metafile_type);
1005 	}
1006 
1007 	/*
1008 	 * Make the changes in the superblock, and log those too.
1009 	 * sbfields arg may contain fields other than *QUOTINO;
1010 	 * VERSIONNUM for example.
1011 	 */
1012 	spin_lock(&mp->m_sb_lock);
1013 	if (flags & XFS_QMOPT_SBVERSION) {
1014 		ASSERT(!xfs_has_quota(mp));
1015 
1016 		xfs_add_quota(mp);
1017 		mp->m_sb.sb_uquotino = NULLFSINO;
1018 		mp->m_sb.sb_gquotino = NULLFSINO;
1019 		mp->m_sb.sb_pquotino = NULLFSINO;
1020 
1021 		/* qflags will get updated fully _after_ quotacheck */
1022 		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
1023 	}
1024 	if (flags & XFS_QMOPT_UQUOTA)
1025 		mp->m_sb.sb_uquotino = (*ipp)->i_ino;
1026 	else if (flags & XFS_QMOPT_GQUOTA)
1027 		mp->m_sb.sb_gquotino = (*ipp)->i_ino;
1028 	else
1029 		mp->m_sb.sb_pquotino = (*ipp)->i_ino;
1030 	spin_unlock(&mp->m_sb_lock);
1031 	xfs_log_sb(tp);
1032 
1033 	error = xfs_trans_commit(tp);
1034 	if (error) {
1035 		ASSERT(xfs_is_shutdown(mp));
1036 		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
1037 	}
1038 	if (need_alloc) {
1039 		xfs_iunlock(*ipp, XFS_ILOCK_EXCL);
1040 		xfs_finish_inode_setup(*ipp);
1041 	}
1042 	return error;
1043 }
1044 
1045 
1046 STATIC void
1047 xfs_qm_reset_dqcounts(
1048 	struct xfs_mount	*mp,
1049 	struct xfs_buf		*bp,
1050 	xfs_dqid_t		id,
1051 	xfs_dqtype_t		type)
1052 {
1053 	struct xfs_dqblk	*dqb;
1054 	int			j;
1055 
1056 	trace_xfs_reset_dqcounts(bp, _RET_IP_);
1057 
1058 	/*
1059 	 * Reset all counters and timers. They'll be
1060 	 * started afresh by xfs_qm_quotacheck.
1061 	 */
1062 #ifdef DEBUG
1063 	j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) /
1064 		sizeof(struct xfs_dqblk);
1065 	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
1066 #endif
1067 	dqb = bp->b_addr;
1068 	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
1069 		struct xfs_disk_dquot	*ddq;
1070 
1071 		ddq = (struct xfs_disk_dquot *)&dqb[j];
1072 
1073 		/*
1074 		 * Do a sanity check, and if needed, repair the dqblk. Don't
1075 		 * output any warnings because it's perfectly possible to
1076 		 * find uninitialised dquot blks. See comment in
1077 		 * xfs_dquot_verify.
1078 		 */
1079 		if (xfs_dqblk_verify(mp, &dqb[j], id + j) ||
1080 		    (dqb[j].dd_diskdq.d_type & XFS_DQTYPE_REC_MASK) != type)
1081 			xfs_dqblk_repair(mp, &dqb[j], id + j, type);
1082 
1083 		/*
1084 		 * Reset type in case we are reusing group quota file for
1085 		 * project quotas or vice versa
1086 		 */
1087 		ddq->d_type = type;
1088 		ddq->d_bcount = 0;
1089 		ddq->d_icount = 0;
1090 		ddq->d_rtbcount = 0;
1091 
1092 		/*
1093 		 * dquot id 0 stores the default grace period and the maximum
1094 		 * warning limit that were set by the administrator, so we
1095 		 * should not reset them.
1096 		 */
1097 		if (ddq->d_id != 0) {
1098 			ddq->d_btimer = 0;
1099 			ddq->d_itimer = 0;
1100 			ddq->d_rtbtimer = 0;
1101 			ddq->d_bwarns = 0;
1102 			ddq->d_iwarns = 0;
1103 			ddq->d_rtbwarns = 0;
1104 			if (xfs_has_bigtime(mp))
1105 				ddq->d_type |= XFS_DQTYPE_BIGTIME;
1106 		}
1107 
1108 		if (xfs_has_crc(mp)) {
1109 			xfs_update_cksum((char *)&dqb[j],
1110 					 sizeof(struct xfs_dqblk),
1111 					 XFS_DQUOT_CRC_OFF);
1112 		}
1113 	}
1114 }
1115 
1116 STATIC int
1117 xfs_qm_reset_dqcounts_all(
1118 	struct xfs_mount	*mp,
1119 	xfs_dqid_t		firstid,
1120 	xfs_fsblock_t		bno,
1121 	xfs_filblks_t		blkcnt,
1122 	xfs_dqtype_t		type,
1123 	struct list_head	*buffer_list)
1124 {
1125 	struct xfs_buf		*bp;
1126 	int			error = 0;
1127 
1128 	ASSERT(blkcnt > 0);
1129 
1130 	/*
1131 	 * Blkcnt arg can be a very big number, and might even be
1132 	 * larger than the log itself. So, we have to break it up into
1133 	 * manageable-sized transactions.
1134 	 * Note that we don't start a permanent transaction here; we might
1135 	 * not be able to get a log reservation for the whole thing up front,
1136 	 * and we don't really care to either, because we just discard
1137 	 * everything if we were to crash in the middle of this loop.
1138 	 */
1139 	while (blkcnt--) {
1140 		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1141 			      XFS_FSB_TO_DADDR(mp, bno),
1142 			      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1143 			      &xfs_dquot_buf_ops);
1144 
1145 		/*
1146 		 * CRC and validation errors will return a EFSCORRUPTED here. If
1147 		 * this occurs, re-read without CRC validation so that we can
1148 		 * repair the damage via xfs_qm_reset_dqcounts(). This process
1149 		 * will leave a trace in the log indicating corruption has
1150 		 * been detected.
1151 		 */
1152 		if (error == -EFSCORRUPTED) {
1153 			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1154 				      XFS_FSB_TO_DADDR(mp, bno),
1155 				      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1156 				      NULL);
1157 		}
1158 
1159 		if (error)
1160 			break;
1161 
1162 		/*
1163 		 * A corrupt buffer might not have a verifier attached, so
1164 		 * make sure we have the correct one attached before writeback
1165 		 * occurs.
1166 		 */
1167 		bp->b_ops = &xfs_dquot_buf_ops;
1168 		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1169 		xfs_buf_delwri_queue(bp, buffer_list);
1170 		xfs_buf_relse(bp);
1171 
1172 		/* goto the next block. */
1173 		bno++;
1174 		firstid += mp->m_quotainfo->qi_dqperchunk;
1175 	}
1176 
1177 	return error;
1178 }
1179 
1180 /*
1181  * Iterate over all allocated dquot blocks in this quota inode, zeroing all
1182  * counters for every chunk of dquots that we find.
1183  */
1184 STATIC int
1185 xfs_qm_reset_dqcounts_buf(
1186 	struct xfs_mount	*mp,
1187 	struct xfs_inode	*qip,
1188 	xfs_dqtype_t		type,
1189 	struct list_head	*buffer_list)
1190 {
1191 	struct xfs_bmbt_irec	*map;
1192 	int			i, nmaps;	/* number of map entries */
1193 	int			error;		/* return value */
1194 	xfs_fileoff_t		lblkno;
1195 	xfs_filblks_t		maxlblkcnt;
1196 	xfs_dqid_t		firstid;
1197 	xfs_fsblock_t		rablkno;
1198 	xfs_filblks_t		rablkcnt;
1199 
1200 	error = 0;
1201 	/*
1202 	 * This looks racy, but we can't keep an inode lock across a
1203 	 * trans_reserve. But, this gets called during quotacheck, and that
1204 	 * happens only at mount time which is single threaded.
1205 	 */
1206 	if (qip->i_nblocks == 0)
1207 		return 0;
1208 
1209 	map = kmalloc(XFS_DQITER_MAP_SIZE * sizeof(*map),
1210 			GFP_KERNEL | __GFP_NOFAIL);
1211 
1212 	lblkno = 0;
1213 	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
1214 	do {
1215 		uint		lock_mode;
1216 
1217 		nmaps = XFS_DQITER_MAP_SIZE;
1218 		/*
1219 		 * We aren't changing the inode itself. Just changing
1220 		 * some of its data. No new blocks are added here, and
1221 		 * the inode is never added to the transaction.
1222 		 */
1223 		lock_mode = xfs_ilock_data_map_shared(qip);
1224 		error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
1225 				       map, &nmaps, 0);
1226 		xfs_iunlock(qip, lock_mode);
1227 		if (error)
1228 			break;
1229 
1230 		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1231 		for (i = 0; i < nmaps; i++) {
1232 			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1233 			ASSERT(map[i].br_blockcount);
1234 
1235 
1236 			lblkno += map[i].br_blockcount;
1237 
1238 			if (map[i].br_startblock == HOLESTARTBLOCK)
1239 				continue;
1240 
1241 			firstid = (xfs_dqid_t) map[i].br_startoff *
1242 				mp->m_quotainfo->qi_dqperchunk;
1243 			/*
1244 			 * Do a read-ahead on the next extent.
1245 			 */
1246 			if ((i+1 < nmaps) &&
1247 			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1248 				rablkcnt =  map[i+1].br_blockcount;
1249 				rablkno = map[i+1].br_startblock;
1250 				while (rablkcnt--) {
1251 					xfs_buf_readahead(mp->m_ddev_targp,
1252 					       XFS_FSB_TO_DADDR(mp, rablkno),
1253 					       mp->m_quotainfo->qi_dqchunklen,
1254 					       &xfs_dquot_buf_ops);
1255 					rablkno++;
1256 				}
1257 			}
1258 			/*
1259 			 * Iterate thru all the blks in the extent and
1260 			 * reset the counters of all the dquots inside them.
1261 			 */
1262 			error = xfs_qm_reset_dqcounts_all(mp, firstid,
1263 						   map[i].br_startblock,
1264 						   map[i].br_blockcount,
1265 						   type, buffer_list);
1266 			if (error)
1267 				goto out;
1268 		}
1269 	} while (nmaps > 0);
1270 
1271 out:
1272 	kfree(map);
1273 	return error;
1274 }
1275 
1276 /*
1277  * Called by dqusage_adjust in doing a quotacheck.
1278  *
1279  * Given the inode, and a dquot id this updates both the incore dqout as well
1280  * as the buffer copy. This is so that once the quotacheck is done, we can
1281  * just log all the buffers, as opposed to logging numerous updates to
1282  * individual dquots.
1283  */
1284 STATIC int
1285 xfs_qm_quotacheck_dqadjust(
1286 	struct xfs_inode	*ip,
1287 	xfs_dqtype_t		type,
1288 	xfs_qcnt_t		nblks,
1289 	xfs_qcnt_t		rtblks)
1290 {
1291 	struct xfs_mount	*mp = ip->i_mount;
1292 	struct xfs_dquot	*dqp;
1293 	xfs_dqid_t		id;
1294 	int			error;
1295 
1296 	id = xfs_qm_id_for_quotatype(ip, type);
1297 	error = xfs_qm_dqget(mp, id, type, true, &dqp);
1298 	if (error) {
1299 		/*
1300 		 * Shouldn't be able to turn off quotas here.
1301 		 */
1302 		ASSERT(error != -ESRCH);
1303 		ASSERT(error != -ENOENT);
1304 		return error;
1305 	}
1306 
1307 	trace_xfs_dqadjust(dqp);
1308 
1309 	/*
1310 	 * Adjust the inode count and the block count to reflect this inode's
1311 	 * resource usage.
1312 	 */
1313 	dqp->q_ino.count++;
1314 	dqp->q_ino.reserved++;
1315 	if (nblks) {
1316 		dqp->q_blk.count += nblks;
1317 		dqp->q_blk.reserved += nblks;
1318 	}
1319 	if (rtblks) {
1320 		dqp->q_rtb.count += rtblks;
1321 		dqp->q_rtb.reserved += rtblks;
1322 	}
1323 
1324 	/*
1325 	 * Set default limits, adjust timers (since we changed usages)
1326 	 *
1327 	 * There are no timers for the default values set in the root dquot.
1328 	 */
1329 	if (dqp->q_id) {
1330 		xfs_qm_adjust_dqlimits(dqp);
1331 		xfs_qm_adjust_dqtimers(dqp);
1332 	}
1333 
1334 	dqp->q_flags |= XFS_DQFLAG_DIRTY;
1335 	xfs_qm_dqput(dqp);
1336 	return 0;
1337 }
1338 
1339 /*
1340  * callback routine supplied to bulkstat(). Given an inumber, find its
1341  * dquots and update them to account for resources taken by that inode.
1342  */
1343 /* ARGSUSED */
1344 STATIC int
1345 xfs_qm_dqusage_adjust(
1346 	struct xfs_mount	*mp,
1347 	struct xfs_trans	*tp,
1348 	xfs_ino_t		ino,
1349 	void			*data)
1350 {
1351 	struct xfs_inode	*ip;
1352 	xfs_filblks_t		nblks, rtblks;
1353 	unsigned int		lock_mode;
1354 	int			error;
1355 
1356 	ASSERT(XFS_IS_QUOTA_ON(mp));
1357 
1358 	/*
1359 	 * rootino must have its resources accounted for, not so with the quota
1360 	 * inodes.
1361 	 */
1362 	if (xfs_is_quota_inode(&mp->m_sb, ino))
1363 		return 0;
1364 
1365 	/*
1366 	 * We don't _need_ to take the ilock EXCL here because quotacheck runs
1367 	 * at mount time and therefore nobody will be racing chown/chproj.
1368 	 */
1369 	error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
1370 	if (error == -EINVAL || error == -ENOENT)
1371 		return 0;
1372 	if (error)
1373 		return error;
1374 
1375 	/*
1376 	 * Reload the incore unlinked list to avoid failure in inodegc.
1377 	 * Use an unlocked check here because unrecovered unlinked inodes
1378 	 * should be somewhat rare.
1379 	 */
1380 	if (xfs_inode_unlinked_incomplete(ip)) {
1381 		error = xfs_inode_reload_unlinked(ip);
1382 		if (error) {
1383 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1384 			goto error0;
1385 		}
1386 	}
1387 
1388 	/* Metadata directory files are not accounted to user-visible quotas. */
1389 	if (xfs_is_metadir_inode(ip))
1390 		goto error0;
1391 
1392 	ASSERT(ip->i_delayed_blks == 0);
1393 
1394 	lock_mode = xfs_ilock_data_map_shared(ip);
1395 	if (XFS_IS_REALTIME_INODE(ip)) {
1396 		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1397 		if (error) {
1398 			xfs_iunlock(ip, lock_mode);
1399 			goto error0;
1400 		}
1401 	}
1402 	xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
1403 	xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);
1404 	xfs_iunlock(ip, lock_mode);
1405 
1406 	/*
1407 	 * Add the (disk blocks and inode) resources occupied by this
1408 	 * inode to its dquots. We do this adjustment in the incore dquot,
1409 	 * and also copy the changes to its buffer.
1410 	 * We don't care about putting these changes in a transaction
1411 	 * envelope because if we crash in the middle of a 'quotacheck'
1412 	 * we have to start from the beginning anyway.
1413 	 * Once we're done, we'll log all the dquot bufs.
1414 	 *
1415 	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1416 	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1417 	 */
1418 	if (XFS_IS_UQUOTA_ON(mp)) {
1419 		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_USER, nblks,
1420 				rtblks);
1421 		if (error)
1422 			goto error0;
1423 	}
1424 
1425 	if (XFS_IS_GQUOTA_ON(mp)) {
1426 		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_GROUP, nblks,
1427 				rtblks);
1428 		if (error)
1429 			goto error0;
1430 	}
1431 
1432 	if (XFS_IS_PQUOTA_ON(mp)) {
1433 		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_PROJ, nblks,
1434 				rtblks);
1435 		if (error)
1436 			goto error0;
1437 	}
1438 
1439 error0:
1440 	xfs_irele(ip);
1441 	return error;
1442 }
1443 
1444 STATIC int
1445 xfs_qm_flush_one(
1446 	struct xfs_dquot	*dqp,
1447 	void			*data)
1448 {
1449 	struct xfs_mount	*mp = dqp->q_mount;
1450 	struct list_head	*buffer_list = data;
1451 	struct xfs_buf		*bp = NULL;
1452 	int			error = 0;
1453 
1454 	xfs_dqlock(dqp);
1455 	if (dqp->q_flags & XFS_DQFLAG_FREEING)
1456 		goto out_unlock;
1457 	if (!XFS_DQ_IS_DIRTY(dqp))
1458 		goto out_unlock;
1459 
1460 	/*
1461 	 * The only way the dquot is already flush locked by the time quotacheck
1462 	 * gets here is if reclaim flushed it before the dqadjust walk dirtied
1463 	 * it for the final time. Quotacheck collects all dquot bufs in the
1464 	 * local delwri queue before dquots are dirtied, so reclaim can't have
1465 	 * possibly queued it for I/O. The only way out is to push the buffer to
1466 	 * cycle the flush lock.
1467 	 */
1468 	if (!xfs_dqflock_nowait(dqp)) {
1469 		/* buf is pinned in-core by delwri list */
1470 		error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
1471 				mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1472 		if (error)
1473 			goto out_unlock;
1474 
1475 		if (!(bp->b_flags & _XBF_DELWRI_Q)) {
1476 			error = -EAGAIN;
1477 			xfs_buf_relse(bp);
1478 			goto out_unlock;
1479 		}
1480 		xfs_buf_unlock(bp);
1481 
1482 		xfs_buf_delwri_pushbuf(bp, buffer_list);
1483 		xfs_buf_rele(bp);
1484 
1485 		error = -EAGAIN;
1486 		goto out_unlock;
1487 	}
1488 
1489 	error = xfs_qm_dqflush(dqp, &bp);
1490 	if (error)
1491 		goto out_unlock;
1492 
1493 	xfs_buf_delwri_queue(bp, buffer_list);
1494 	xfs_buf_relse(bp);
1495 out_unlock:
1496 	xfs_dqunlock(dqp);
1497 	return error;
1498 }
1499 
1500 /*
1501  * Walk thru all the filesystem inodes and construct a consistent view
1502  * of the disk quota world. If the quotacheck fails, disable quotas.
1503  */
1504 STATIC int
1505 xfs_qm_quotacheck(
1506 	xfs_mount_t	*mp)
1507 {
1508 	int			error, error2;
1509 	uint			flags;
1510 	LIST_HEAD		(buffer_list);
1511 	struct xfs_inode	*uip = mp->m_quotainfo->qi_uquotaip;
1512 	struct xfs_inode	*gip = mp->m_quotainfo->qi_gquotaip;
1513 	struct xfs_inode	*pip = mp->m_quotainfo->qi_pquotaip;
1514 
1515 	flags = 0;
1516 
1517 	ASSERT(uip || gip || pip);
1518 	ASSERT(XFS_IS_QUOTA_ON(mp));
1519 
1520 	xfs_notice(mp, "Quotacheck needed: Please wait.");
1521 
1522 	/*
1523 	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1524 	 * their counters to zero. We need a clean slate.
1525 	 * We don't log our changes till later.
1526 	 */
1527 	if (uip) {
1528 		error = xfs_qm_reset_dqcounts_buf(mp, uip, XFS_DQTYPE_USER,
1529 					 &buffer_list);
1530 		if (error)
1531 			goto error_return;
1532 		flags |= XFS_UQUOTA_CHKD;
1533 	}
1534 
1535 	if (gip) {
1536 		error = xfs_qm_reset_dqcounts_buf(mp, gip, XFS_DQTYPE_GROUP,
1537 					 &buffer_list);
1538 		if (error)
1539 			goto error_return;
1540 		flags |= XFS_GQUOTA_CHKD;
1541 	}
1542 
1543 	if (pip) {
1544 		error = xfs_qm_reset_dqcounts_buf(mp, pip, XFS_DQTYPE_PROJ,
1545 					 &buffer_list);
1546 		if (error)
1547 			goto error_return;
1548 		flags |= XFS_PQUOTA_CHKD;
1549 	}
1550 
1551 	xfs_set_quotacheck_running(mp);
1552 	error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
1553 			NULL);
1554 	xfs_clear_quotacheck_running(mp);
1555 
1556 	/*
1557 	 * On error, the inode walk may have partially populated the dquot
1558 	 * caches.  We must purge them before disabling quota and tearing down
1559 	 * the quotainfo, or else the dquots will leak.
1560 	 */
1561 	if (error)
1562 		goto error_purge;
1563 
1564 	/*
1565 	 * We've made all the changes that we need to make incore.  Flush them
1566 	 * down to disk buffers if everything was updated successfully.
1567 	 */
1568 	if (XFS_IS_UQUOTA_ON(mp)) {
1569 		error = xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_flush_one,
1570 					  &buffer_list);
1571 	}
1572 	if (XFS_IS_GQUOTA_ON(mp)) {
1573 		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_flush_one,
1574 					   &buffer_list);
1575 		if (!error)
1576 			error = error2;
1577 	}
1578 	if (XFS_IS_PQUOTA_ON(mp)) {
1579 		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_flush_one,
1580 					   &buffer_list);
1581 		if (!error)
1582 			error = error2;
1583 	}
1584 
1585 	error2 = xfs_buf_delwri_submit(&buffer_list);
1586 	if (!error)
1587 		error = error2;
1588 
1589 	/*
1590 	 * We can get this error if we couldn't do a dquot allocation inside
1591 	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1592 	 * dirty dquots that might be cached, we just want to get rid of them
1593 	 * and turn quotaoff. The dquots won't be attached to any of the inodes
1594 	 * at this point (because we intentionally didn't in dqget_noattach).
1595 	 */
1596 	if (error)
1597 		goto error_purge;
1598 
1599 	/*
1600 	 * If one type of quotas is off, then it will lose its
1601 	 * quotachecked status, since we won't be doing accounting for
1602 	 * that type anymore.
1603 	 */
1604 	mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1605 	mp->m_qflags |= flags;
1606 
1607 error_return:
1608 	xfs_buf_delwri_cancel(&buffer_list);
1609 
1610 	if (error) {
1611 		xfs_warn(mp,
1612 	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1613 			error);
1614 		/*
1615 		 * We must turn off quotas.
1616 		 */
1617 		ASSERT(mp->m_quotainfo != NULL);
1618 		xfs_qm_destroy_quotainfo(mp);
1619 		if (xfs_mount_reset_sbqflags(mp)) {
1620 			xfs_warn(mp,
1621 				"Quotacheck: Failed to reset quota flags.");
1622 		}
1623 		xfs_fs_mark_sick(mp, XFS_SICK_FS_QUOTACHECK);
1624 	} else {
1625 		xfs_notice(mp, "Quotacheck: Done.");
1626 		xfs_fs_mark_healthy(mp, XFS_SICK_FS_QUOTACHECK);
1627 	}
1628 
1629 	return error;
1630 
1631 error_purge:
1632 	/*
1633 	 * On error, we may have inodes queued for inactivation. This may try
1634 	 * to attach dquots to the inode before running cleanup operations on
1635 	 * the inode and this can race with the xfs_qm_destroy_quotainfo() call
1636 	 * below that frees mp->m_quotainfo. To avoid this race, flush all the
1637 	 * pending inodegc operations before we purge the dquots from memory,
1638 	 * ensuring that background inactivation is idle whilst we turn off
1639 	 * quotas.
1640 	 */
1641 	xfs_inodegc_flush(mp);
1642 	xfs_qm_dqpurge_all(mp);
1643 	goto error_return;
1644 
1645 }
1646 
1647 /*
1648  * This is called from xfs_mountfs to start quotas and initialize all
1649  * necessary data structures like quotainfo.  This is also responsible for
1650  * running a quotacheck as necessary.  We are guaranteed that the superblock
1651  * is consistently read in at this point.
1652  *
1653  * If we fail here, the mount will continue with quota turned off. We don't
1654  * need to inidicate success or failure at all.
1655  */
1656 void
1657 xfs_qm_mount_quotas(
1658 	struct xfs_mount	*mp)
1659 {
1660 	int			error = 0;
1661 	uint			sbf;
1662 
1663 	/*
1664 	 * If quotas on realtime volumes is not supported, disable quotas
1665 	 * immediately.  We only support rtquota if rtgroups are enabled to
1666 	 * avoid problems with older kernels.
1667 	 */
1668 	if (mp->m_sb.sb_rextents && !xfs_has_rtgroups(mp)) {
1669 		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
1670 		mp->m_qflags = 0;
1671 		goto write_changes;
1672 	}
1673 
1674 	ASSERT(XFS_IS_QUOTA_ON(mp));
1675 
1676 	/*
1677 	 * Allocate the quotainfo structure inside the mount struct, and
1678 	 * create quotainode(s), and change/rev superblock if necessary.
1679 	 */
1680 	error = xfs_qm_init_quotainfo(mp);
1681 	if (error) {
1682 		/*
1683 		 * We must turn off quotas.
1684 		 */
1685 		ASSERT(mp->m_quotainfo == NULL);
1686 		mp->m_qflags = 0;
1687 		goto write_changes;
1688 	}
1689 	/*
1690 	 * If any of the quotas are not consistent, do a quotacheck.
1691 	 */
1692 	if (XFS_QM_NEED_QUOTACHECK(mp)) {
1693 		error = xfs_qm_quotacheck(mp);
1694 		if (error) {
1695 			/* Quotacheck failed and disabled quotas. */
1696 			return;
1697 		}
1698 	}
1699 	/*
1700 	 * If one type of quotas is off, then it will lose its
1701 	 * quotachecked status, since we won't be doing accounting for
1702 	 * that type anymore.
1703 	 */
1704 	if (!XFS_IS_UQUOTA_ON(mp))
1705 		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
1706 	if (!XFS_IS_GQUOTA_ON(mp))
1707 		mp->m_qflags &= ~XFS_GQUOTA_CHKD;
1708 	if (!XFS_IS_PQUOTA_ON(mp))
1709 		mp->m_qflags &= ~XFS_PQUOTA_CHKD;
1710 
1711  write_changes:
1712 	/*
1713 	 * We actually don't have to acquire the m_sb_lock at all.
1714 	 * This can only be called from mount, and that's single threaded. XXX
1715 	 */
1716 	spin_lock(&mp->m_sb_lock);
1717 	sbf = mp->m_sb.sb_qflags;
1718 	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
1719 	spin_unlock(&mp->m_sb_lock);
1720 
1721 	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
1722 		if (xfs_sync_sb(mp, false)) {
1723 			/*
1724 			 * We could only have been turning quotas off.
1725 			 * We aren't in very good shape actually because
1726 			 * the incore structures are convinced that quotas are
1727 			 * off, but the on disk superblock doesn't know that !
1728 			 */
1729 			ASSERT(!(XFS_IS_QUOTA_ON(mp)));
1730 			xfs_alert(mp, "%s: Superblock update failed!",
1731 				__func__);
1732 		}
1733 	}
1734 
1735 	if (error) {
1736 		xfs_warn(mp, "Failed to initialize disk quotas, err %d.", error);
1737 		return;
1738 	}
1739 }
1740 
1741 /*
1742  * Load the inode for a given type of quota, assuming that the sb fields have
1743  * been sorted out.  This is not true when switching quota types on a V4
1744  * filesystem, so do not use this function for that.
1745  *
1746  * Returns -ENOENT if the quota inode field is NULLFSINO; 0 and an inode on
1747  * success; or a negative errno.
1748  */
1749 int
1750 xfs_qm_qino_load(
1751 	struct xfs_mount	*mp,
1752 	xfs_dqtype_t		type,
1753 	struct xfs_inode	**ipp)
1754 {
1755 	struct xfs_trans	*tp;
1756 	struct xfs_inode	*dp = NULL;
1757 	int			error;
1758 
1759 	error = xfs_trans_alloc_empty(mp, &tp);
1760 	if (error)
1761 		return error;
1762 
1763 	if (xfs_has_metadir(mp)) {
1764 		error = xfs_dqinode_load_parent(tp, &dp);
1765 		if (error)
1766 			goto out_cancel;
1767 	}
1768 
1769 	error = xfs_dqinode_load(tp, dp, type, ipp);
1770 	if (dp)
1771 		xfs_irele(dp);
1772 out_cancel:
1773 	xfs_trans_cancel(tp);
1774 	return error;
1775 }
1776 
1777 /*
1778  * This is called after the superblock has been read in and we're ready to
1779  * iget the quota inodes.
1780  */
1781 STATIC int
1782 xfs_qm_init_quotainos(
1783 	xfs_mount_t	*mp)
1784 {
1785 	struct xfs_inode	*uip = NULL;
1786 	struct xfs_inode	*gip = NULL;
1787 	struct xfs_inode	*pip = NULL;
1788 	int			error;
1789 	uint			flags = 0;
1790 
1791 	ASSERT(mp->m_quotainfo);
1792 
1793 	/*
1794 	 * Get the uquota and gquota inodes
1795 	 */
1796 	if (xfs_has_quota(mp)) {
1797 		if (XFS_IS_UQUOTA_ON(mp) &&
1798 		    mp->m_sb.sb_uquotino != NULLFSINO) {
1799 			ASSERT(mp->m_sb.sb_uquotino > 0);
1800 			error = xfs_qm_qino_load(mp, XFS_DQTYPE_USER, &uip);
1801 			if (error)
1802 				return error;
1803 		}
1804 		if (XFS_IS_GQUOTA_ON(mp) &&
1805 		    mp->m_sb.sb_gquotino != NULLFSINO) {
1806 			ASSERT(mp->m_sb.sb_gquotino > 0);
1807 			error = xfs_qm_qino_load(mp, XFS_DQTYPE_GROUP, &gip);
1808 			if (error)
1809 				goto error_rele;
1810 		}
1811 		if (XFS_IS_PQUOTA_ON(mp) &&
1812 		    mp->m_sb.sb_pquotino != NULLFSINO) {
1813 			ASSERT(mp->m_sb.sb_pquotino > 0);
1814 			error = xfs_qm_qino_load(mp, XFS_DQTYPE_PROJ, &pip);
1815 			if (error)
1816 				goto error_rele;
1817 		}
1818 	} else {
1819 		flags |= XFS_QMOPT_SBVERSION;
1820 	}
1821 
1822 	/*
1823 	 * Create the three inodes, if they don't exist already. The changes
1824 	 * made above will get added to a transaction and logged in one of
1825 	 * the qino_alloc calls below.  If the device is readonly,
1826 	 * temporarily switch to read-write to do this.
1827 	 */
1828 	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1829 		error = xfs_qm_qino_alloc(mp, &uip,
1830 					      flags | XFS_QMOPT_UQUOTA);
1831 		if (error)
1832 			goto error_rele;
1833 
1834 		flags &= ~XFS_QMOPT_SBVERSION;
1835 	}
1836 	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
1837 		error = xfs_qm_qino_alloc(mp, &gip,
1838 					  flags | XFS_QMOPT_GQUOTA);
1839 		if (error)
1840 			goto error_rele;
1841 
1842 		flags &= ~XFS_QMOPT_SBVERSION;
1843 	}
1844 	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
1845 		error = xfs_qm_qino_alloc(mp, &pip,
1846 					  flags | XFS_QMOPT_PQUOTA);
1847 		if (error)
1848 			goto error_rele;
1849 	}
1850 
1851 	mp->m_quotainfo->qi_uquotaip = uip;
1852 	mp->m_quotainfo->qi_gquotaip = gip;
1853 	mp->m_quotainfo->qi_pquotaip = pip;
1854 
1855 	return 0;
1856 
1857 error_rele:
1858 	if (uip)
1859 		xfs_irele(uip);
1860 	if (gip)
1861 		xfs_irele(gip);
1862 	if (pip)
1863 		xfs_irele(pip);
1864 	return error;
1865 }
1866 
1867 STATIC void
1868 xfs_qm_dqfree_one(
1869 	struct xfs_dquot	*dqp)
1870 {
1871 	struct xfs_mount	*mp = dqp->q_mount;
1872 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
1873 
1874 	mutex_lock(&qi->qi_tree_lock);
1875 	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
1876 
1877 	qi->qi_dquots--;
1878 	mutex_unlock(&qi->qi_tree_lock);
1879 
1880 	xfs_qm_dqdestroy(dqp);
1881 }
1882 
1883 /* --------------- utility functions for vnodeops ---------------- */
1884 
1885 
1886 /*
1887  * Given an inode, a uid, gid and prid make sure that we have
1888  * allocated relevant dquot(s) on disk, and that we won't exceed inode
1889  * quotas by creating this file.
1890  * This also attaches dquot(s) to the given inode after locking it,
1891  * and returns the dquots corresponding to the uid and/or gid.
1892  *
1893  * in	: inode (unlocked)
1894  * out	: udquot, gdquot with references taken and unlocked
1895  */
1896 int
1897 xfs_qm_vop_dqalloc(
1898 	struct xfs_inode	*ip,
1899 	kuid_t			uid,
1900 	kgid_t			gid,
1901 	prid_t			prid,
1902 	uint			flags,
1903 	struct xfs_dquot	**O_udqpp,
1904 	struct xfs_dquot	**O_gdqpp,
1905 	struct xfs_dquot	**O_pdqpp)
1906 {
1907 	struct xfs_mount	*mp = ip->i_mount;
1908 	struct inode		*inode = VFS_I(ip);
1909 	struct user_namespace	*user_ns = inode->i_sb->s_user_ns;
1910 	struct xfs_dquot	*uq = NULL;
1911 	struct xfs_dquot	*gq = NULL;
1912 	struct xfs_dquot	*pq = NULL;
1913 	int			error;
1914 	uint			lockflags;
1915 
1916 	if (!XFS_IS_QUOTA_ON(mp))
1917 		return 0;
1918 
1919 	ASSERT(!xfs_is_metadir_inode(ip));
1920 
1921 	lockflags = XFS_ILOCK_EXCL;
1922 	xfs_ilock(ip, lockflags);
1923 
1924 	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1925 		gid = inode->i_gid;
1926 
1927 	/*
1928 	 * Attach the dquot(s) to this inode, doing a dquot allocation
1929 	 * if necessary. The dquot(s) will not be locked.
1930 	 */
1931 	if (XFS_NOT_DQATTACHED(mp, ip)) {
1932 		error = xfs_qm_dqattach_locked(ip, true);
1933 		if (error) {
1934 			xfs_iunlock(ip, lockflags);
1935 			return error;
1936 		}
1937 	}
1938 
1939 	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1940 		ASSERT(O_udqpp);
1941 		if (!uid_eq(inode->i_uid, uid)) {
1942 			/*
1943 			 * What we need is the dquot that has this uid, and
1944 			 * if we send the inode to dqget, the uid of the inode
1945 			 * takes priority over what's sent in the uid argument.
1946 			 * We must unlock inode here before calling dqget if
1947 			 * we're not sending the inode, because otherwise
1948 			 * we'll deadlock by doing trans_reserve while
1949 			 * holding ilock.
1950 			 */
1951 			xfs_iunlock(ip, lockflags);
1952 			error = xfs_qm_dqget(mp, from_kuid(user_ns, uid),
1953 					XFS_DQTYPE_USER, true, &uq);
1954 			if (error) {
1955 				ASSERT(error != -ENOENT);
1956 				return error;
1957 			}
1958 			/*
1959 			 * Get the ilock in the right order.
1960 			 */
1961 			xfs_dqunlock(uq);
1962 			lockflags = XFS_ILOCK_SHARED;
1963 			xfs_ilock(ip, lockflags);
1964 		} else {
1965 			/*
1966 			 * Take an extra reference, because we'll return
1967 			 * this to caller
1968 			 */
1969 			ASSERT(ip->i_udquot);
1970 			uq = xfs_qm_dqhold(ip->i_udquot);
1971 		}
1972 	}
1973 	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1974 		ASSERT(O_gdqpp);
1975 		if (!gid_eq(inode->i_gid, gid)) {
1976 			xfs_iunlock(ip, lockflags);
1977 			error = xfs_qm_dqget(mp, from_kgid(user_ns, gid),
1978 					XFS_DQTYPE_GROUP, true, &gq);
1979 			if (error) {
1980 				ASSERT(error != -ENOENT);
1981 				goto error_rele;
1982 			}
1983 			xfs_dqunlock(gq);
1984 			lockflags = XFS_ILOCK_SHARED;
1985 			xfs_ilock(ip, lockflags);
1986 		} else {
1987 			ASSERT(ip->i_gdquot);
1988 			gq = xfs_qm_dqhold(ip->i_gdquot);
1989 		}
1990 	}
1991 	if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
1992 		ASSERT(O_pdqpp);
1993 		if (ip->i_projid != prid) {
1994 			xfs_iunlock(ip, lockflags);
1995 			error = xfs_qm_dqget(mp, prid,
1996 					XFS_DQTYPE_PROJ, true, &pq);
1997 			if (error) {
1998 				ASSERT(error != -ENOENT);
1999 				goto error_rele;
2000 			}
2001 			xfs_dqunlock(pq);
2002 			lockflags = XFS_ILOCK_SHARED;
2003 			xfs_ilock(ip, lockflags);
2004 		} else {
2005 			ASSERT(ip->i_pdquot);
2006 			pq = xfs_qm_dqhold(ip->i_pdquot);
2007 		}
2008 	}
2009 	trace_xfs_dquot_dqalloc(ip);
2010 
2011 	xfs_iunlock(ip, lockflags);
2012 	if (O_udqpp)
2013 		*O_udqpp = uq;
2014 	else
2015 		xfs_qm_dqrele(uq);
2016 	if (O_gdqpp)
2017 		*O_gdqpp = gq;
2018 	else
2019 		xfs_qm_dqrele(gq);
2020 	if (O_pdqpp)
2021 		*O_pdqpp = pq;
2022 	else
2023 		xfs_qm_dqrele(pq);
2024 	return 0;
2025 
2026 error_rele:
2027 	xfs_qm_dqrele(gq);
2028 	xfs_qm_dqrele(uq);
2029 	return error;
2030 }
2031 
2032 /*
2033  * Actually transfer ownership, and do dquot modifications.
2034  * These were already reserved.
2035  */
2036 struct xfs_dquot *
2037 xfs_qm_vop_chown(
2038 	struct xfs_trans	*tp,
2039 	struct xfs_inode	*ip,
2040 	struct xfs_dquot	**IO_olddq,
2041 	struct xfs_dquot	*newdq)
2042 {
2043 	struct xfs_dquot	*prevdq;
2044 	xfs_filblks_t		dblocks, rblocks;
2045 	bool			isrt = XFS_IS_REALTIME_INODE(ip);
2046 
2047 	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
2048 	ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
2049 	ASSERT(!xfs_is_metadir_inode(ip));
2050 
2051 	/* old dquot */
2052 	prevdq = *IO_olddq;
2053 	ASSERT(prevdq);
2054 	ASSERT(prevdq != newdq);
2055 
2056 	xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
2057 
2058 	xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_BCOUNT,
2059 			-(xfs_qcnt_t)dblocks);
2060 	xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_RTBCOUNT,
2061 			-(xfs_qcnt_t)rblocks);
2062 	xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2063 
2064 	/* the sparkling new dquot */
2065 	xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_BCOUNT, dblocks);
2066 	xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_RTBCOUNT, rblocks);
2067 	xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2068 
2069 	/*
2070 	 * Back when we made quota reservations for the chown, we reserved the
2071 	 * ondisk blocks + delalloc blocks with the new dquot.  Now that we've
2072 	 * switched the dquots, decrease the new dquot's block reservation
2073 	 * (having already bumped up the real counter) so that we don't have
2074 	 * any reservation to give back when we commit.
2075 	 */
2076 	xfs_trans_mod_dquot(tp, newdq,
2077 			isrt ? XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
2078 			-ip->i_delayed_blks);
2079 
2080 	/*
2081 	 * Give the incore reservation for delalloc blocks back to the old
2082 	 * dquot.  We don't normally handle delalloc quota reservations
2083 	 * transactionally, so just lock the dquot and subtract from the
2084 	 * reservation.  Dirty the transaction because it's too late to turn
2085 	 * back now.
2086 	 */
2087 	tp->t_flags |= XFS_TRANS_DIRTY;
2088 	xfs_dqlock(prevdq);
2089 	if (isrt) {
2090 		ASSERT(prevdq->q_rtb.reserved >= ip->i_delayed_blks);
2091 		prevdq->q_rtb.reserved -= ip->i_delayed_blks;
2092 	} else {
2093 		ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
2094 		prevdq->q_blk.reserved -= ip->i_delayed_blks;
2095 	}
2096 	xfs_dqunlock(prevdq);
2097 
2098 	/*
2099 	 * Take an extra reference, because the inode is going to keep
2100 	 * this dquot pointer even after the trans_commit.
2101 	 */
2102 	*IO_olddq = xfs_qm_dqhold(newdq);
2103 
2104 	return prevdq;
2105 }
2106 
2107 int
2108 xfs_qm_vop_rename_dqattach(
2109 	struct xfs_inode	**i_tab)
2110 {
2111 	struct xfs_mount	*mp = i_tab[0]->i_mount;
2112 	int			i;
2113 
2114 	if (!XFS_IS_QUOTA_ON(mp))
2115 		return 0;
2116 
2117 	for (i = 0; (i < 4 && i_tab[i]); i++) {
2118 		struct xfs_inode	*ip = i_tab[i];
2119 		int			error;
2120 
2121 		/*
2122 		 * Watch out for duplicate entries in the table.
2123 		 */
2124 		if (i == 0 || ip != i_tab[i-1]) {
2125 			if (XFS_NOT_DQATTACHED(mp, ip)) {
2126 				error = xfs_qm_dqattach(ip);
2127 				if (error)
2128 					return error;
2129 			}
2130 		}
2131 	}
2132 	return 0;
2133 }
2134 
2135 void
2136 xfs_qm_vop_create_dqattach(
2137 	struct xfs_trans	*tp,
2138 	struct xfs_inode	*ip,
2139 	struct xfs_dquot	*udqp,
2140 	struct xfs_dquot	*gdqp,
2141 	struct xfs_dquot	*pdqp)
2142 {
2143 	struct xfs_mount	*mp = tp->t_mountp;
2144 
2145 	if (!XFS_IS_QUOTA_ON(mp))
2146 		return;
2147 
2148 	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
2149 	ASSERT(!xfs_is_metadir_inode(ip));
2150 
2151 	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
2152 		ASSERT(ip->i_udquot == NULL);
2153 		ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id);
2154 
2155 		ip->i_udquot = xfs_qm_dqhold(udqp);
2156 	}
2157 	if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
2158 		ASSERT(ip->i_gdquot == NULL);
2159 		ASSERT(i_gid_read(VFS_I(ip)) == gdqp->q_id);
2160 
2161 		ip->i_gdquot = xfs_qm_dqhold(gdqp);
2162 	}
2163 	if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
2164 		ASSERT(ip->i_pdquot == NULL);
2165 		ASSERT(ip->i_projid == pdqp->q_id);
2166 
2167 		ip->i_pdquot = xfs_qm_dqhold(pdqp);
2168 	}
2169 
2170 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, 1);
2171 }
2172 
2173 /* Decide if this inode's dquot is near an enforcement boundary. */
2174 bool
2175 xfs_inode_near_dquot_enforcement(
2176 	struct xfs_inode	*ip,
2177 	xfs_dqtype_t		type)
2178 {
2179 	struct xfs_dquot	*dqp;
2180 	struct xfs_dquot_res	*res;
2181 	struct xfs_dquot_pre	*pre;
2182 	int64_t			freesp;
2183 
2184 	/* We only care for quotas that are enabled and enforced. */
2185 	dqp = xfs_inode_dquot(ip, type);
2186 	if (!dqp || !xfs_dquot_is_enforced(dqp))
2187 		return false;
2188 
2189 	if (xfs_dquot_res_over_limits(&dqp->q_ino) ||
2190 	    xfs_dquot_res_over_limits(&dqp->q_blk) ||
2191 	    xfs_dquot_res_over_limits(&dqp->q_rtb))
2192 		return true;
2193 
2194 	if (XFS_IS_REALTIME_INODE(ip)) {
2195 		res = &dqp->q_rtb;
2196 		pre = &dqp->q_rtb_prealloc;
2197 	} else {
2198 		res = &dqp->q_blk;
2199 		pre = &dqp->q_blk_prealloc;
2200 	}
2201 
2202 	/* For space on the data device, check the various thresholds. */
2203 	if (!pre->q_prealloc_hi_wmark)
2204 		return false;
2205 
2206 	if (res->reserved < pre->q_prealloc_lo_wmark)
2207 		return false;
2208 
2209 	if (res->reserved >= pre->q_prealloc_hi_wmark)
2210 		return true;
2211 
2212 	freesp = pre->q_prealloc_hi_wmark - res->reserved;
2213 	if (freesp < pre->q_low_space[XFS_QLOWSP_5_PCNT])
2214 		return true;
2215 
2216 	return false;
2217 }
2218