xref: /illumos-gate/usr/src/uts/common/fs/ufs/quota.c (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 /*
44  * Code pertaining to management of the in-core data structures.
45  */
46 #include <sys/types.h>
47 #include <sys/t_lock.h>
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/signal.h>
51 #include <sys/errno.h>
52 #include <sys/user.h>
53 #include <sys/proc.h>
54 #include <sys/vfs.h>
55 #include <sys/vnode.h>
56 #include <sys/uio.h>
57 #include <sys/buf.h>
58 #include <sys/fs/ufs_fs.h>
59 #include <sys/fs/ufs_inode.h>
60 #include <sys/fs/ufs_quota.h>
61 #include <sys/cmn_err.h>
62 #include <sys/kmem.h>
63 #include <sys/debug.h>
64 #include <sys/file.h>
65 #include <sys/fs/ufs_panic.h>
66 #include <sys/var.h>
67 
68 
69 /*
70  * Dquot in core hash chain headers
71  */
72 struct	dqhead	dqhead[NDQHASH];
73 
74 static kmutex_t dq_cachelock;
75 static kmutex_t dq_freelock;
76 
77 krwlock_t dq_rwlock;
78 
79 /*
80  * Dquot free list.
81  */
82 struct dquot dqfreelist;
83 
84 #define	dqinsheadfree(DQP) { \
85 	mutex_enter(&dq_freelock); \
86 	(DQP)->dq_freef = dqfreelist.dq_freef; \
87 	(DQP)->dq_freeb = &dqfreelist; \
88 	dqfreelist.dq_freef->dq_freeb = (DQP); \
89 	dqfreelist.dq_freef = (DQP); \
90 	mutex_exit(&dq_freelock); \
91 }
92 
93 #define	dqinstailfree(DQP) { \
94 	mutex_enter(&dq_freelock); \
95 	(DQP)->dq_freeb = dqfreelist.dq_freeb; \
96 	(DQP)->dq_freef = &dqfreelist; \
97 	dqfreelist.dq_freeb->dq_freef = (DQP); \
98 	dqfreelist.dq_freeb = (DQP); \
99 	mutex_exit(&dq_freelock); \
100 }
101 
102 /* (clear pointers to make sure we don't use them; catch problems early) */
103 #define	dqremfree(DQP) { \
104 	(DQP)->dq_freeb->dq_freef = (DQP)->dq_freef; \
105 	(DQP)->dq_freef->dq_freeb = (DQP)->dq_freeb; \
106 	(DQP)->dq_freef = (DQP)->dq_freeb = NULL; \
107 }
108 
109 typedef	struct dquot *DQptr;
110 
111 /*
112  * Initialize quota sub-system init lock.
113  */
114 void
115 qtinit()
116 {
117 	rw_init(&dq_rwlock, NULL, RW_DEFAULT, NULL);
118 }
119 
120 /*
121  * qtinit2 allocated space for the quota structures.  Only do this if
122  * if quotas are going to be used so that we can save the space if quotas
123  * aren't used.
124  */
125 void
126 qtinit2(void)
127 {
128 	register struct dqhead *dhp;
129 	register struct dquot *dqp;
130 
131 	ASSERT(RW_WRITE_HELD(&dq_rwlock));
132 
133 	if (ndquot == 0)
134 		ndquot = ((maxusers * NMOUNT) / 4) + v.v_proc;
135 
136 	dquot = kmem_zalloc(ndquot * sizeof (struct dquot), KM_SLEEP);
137 	dquotNDQUOT = dquot + ndquot;
138 
139 	/*
140 	 * Initialize the cache between the in-core structures
141 	 * and the per-file system quota files on disk.
142 	 */
143 	for (dhp = &dqhead[0]; dhp < &dqhead[NDQHASH]; dhp++) {
144 		dhp->dqh_forw = dhp->dqh_back = (DQptr)dhp;
145 	}
146 	dqfreelist.dq_freef = dqfreelist.dq_freeb = (DQptr)&dqfreelist;
147 	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
148 		mutex_init(&dqp->dq_lock, NULL, MUTEX_DEFAULT, NULL);
149 		dqp->dq_forw = dqp->dq_back = dqp;
150 		dqinsheadfree(dqp);
151 	}
152 }
153 
154 /*
155  * Obtain the user's on-disk quota limit for file system specified.
156  * dqpp is returned locked.
157  */
158 int
159 getdiskquota(
160 	uid_t uid,
161 	struct ufsvfs *ufsvfsp,
162 	int force,			/* don't do enable checks */
163 	struct dquot **dqpp)		/* resulting dquot ptr */
164 {
165 	struct dquot *dqp;
166 	struct dqhead *dhp;
167 	struct inode *qip;
168 	int error;
169 	extern struct cred *kcred;
170 	daddr_t	bn;
171 	int contig;
172 	int err;
173 
174 	ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
175 
176 	dhp = &dqhead[DQHASH(uid, ufsvfsp)];
177 loop:
178 	/*
179 	 * Check for quotas enabled.
180 	 */
181 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0 && !force)
182 		return (ESRCH);
183 	qip = ufsvfsp->vfs_qinod;
184 	if (!qip)
185 		return (ufs_fault(ufsvfsp->vfs_root, "getdiskquota: NULL qip"));
186 	/*
187 	 * Check the cache first.
188 	 */
189 	mutex_enter(&dq_cachelock);
190 	for (dqp = dhp->dqh_forw; dqp != (DQptr)dhp; dqp = dqp->dq_forw) {
191 		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp)
192 			continue;
193 		mutex_exit(&dq_cachelock);
194 		mutex_enter(&dqp->dq_lock);
195 		/*
196 		 * I may have slept in the mutex_enter.  Make sure this is
197 		 * still the one I want.
198 		 */
199 		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp) {
200 			mutex_exit(&dqp->dq_lock);
201 			goto loop;
202 		}
203 		if (dqp->dq_flags & DQ_ERROR) {
204 			mutex_exit(&dqp->dq_lock);
205 			return (EINVAL);
206 		}
207 		/*
208 		 * Cache hit with no references.
209 		 * Take the structure off the free list.
210 		 */
211 		if (dqp->dq_cnt == 0) {
212 			mutex_enter(&dq_freelock);
213 			dqremfree(dqp);
214 			mutex_exit(&dq_freelock);
215 		}
216 		dqp->dq_cnt++;
217 		mutex_exit(&dqp->dq_lock);
218 		*dqpp = dqp;
219 		return (0);
220 	}
221 	/*
222 	 * Not in cache.
223 	 * Get dquot at head of free list.
224 	 */
225 	mutex_enter(&dq_freelock);
226 	if ((dqp = dqfreelist.dq_freef) == &dqfreelist) {
227 		mutex_exit(&dq_freelock);
228 		mutex_exit(&dq_cachelock);
229 		cmn_err(CE_WARN, "dquot table full");
230 		return (EUSERS);
231 	}
232 
233 	if (dqp->dq_cnt != 0 || dqp->dq_flags != 0) {
234 		panic("getdiskquota: dqp->dq_cnt: "
235 		    "%ld != 0 || dqp->dq_flags: 0x%x != 0 (%s)",
236 		    dqp->dq_cnt, dqp->dq_flags, qip->i_fs->fs_fsmnt);
237 		/*NOTREACHED*/
238 	}
239 	/*
240 	 * Take it off the free list, and off the hash chain it was on.
241 	 * Then put it on the new hash chain.
242 	 */
243 	dqremfree(dqp);
244 	mutex_exit(&dq_freelock);
245 	remque(dqp);
246 	dqp->dq_cnt = 1;
247 	dqp->dq_uid = uid;
248 	dqp->dq_ufsvfsp = ufsvfsp;
249 	dqp->dq_mof = UFS_HOLE;
250 	mutex_enter(&dqp->dq_lock);
251 	insque(dqp, dhp);
252 	mutex_exit(&dq_cachelock);
253 	/*
254 	 * Check the uid in case it's too large to fit into the 2Gbyte
255 	 * 'quotas' file (higher than 67 million or so).
256 	 */
257 
258 	/*
259 	 * Large Files: i_size need to be accessed atomically now.
260 	 */
261 	rw_enter(&qip->i_contents, RW_READER);
262 	if (uid >= 0 && dqoff(uid) >= 0 && dqoff(uid) < qip->i_size) {
263 		/*
264 		 * This could almost be a static comparison with UID_MAX,
265 		 * but we keep the ASSERT here to document the restriction
266 		 * inherent in this simplistic database.
267 		 */
268 		ASSERT((u_offset_t)uid <
269 		    UFS_MAXOFFSET_T / sizeof (struct dqblk));
270 
271 		/*
272 		 * Read quota info off disk.
273 		 */
274 		error = ufs_rdwri(UIO_READ, FREAD, qip, (caddr_t)&dqp->dq_dqb,
275 		    sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE,
276 		    (int *)NULL, kcred);
277 		/*
278 		 * We must set the dq_mof even if not we are not logging in case
279 		 * we are later remount to logging.
280 		 */
281 		err = bmap_read(qip, dqoff(uid), &bn, &contig);
282 		rw_exit(&qip->i_contents);
283 		if ((bn != UFS_HOLE) && !err) {
284 			dqp->dq_mof = ldbtob(bn) +
285 			(offset_t)(dqoff(uid) & (DEV_BSIZE - 1));
286 		} else {
287 			dqp->dq_mof = UFS_HOLE;
288 		}
289 		if (error) {
290 			/*
291 			 * I/O error in reading quota file.
292 			 * Put dquot on a private, unfindable hash list,
293 			 * put dquot at the head of the free list and
294 			 * reflect the problem to caller.
295 			 */
296 			dqp->dq_flags = DQ_ERROR;
297 			/*
298 			 * I must exit the dq_lock so that I can acquire the
299 			 * dq_cachelock.  If another thread finds dqp before
300 			 * I remove it from the cache it will see the
301 			 * DQ_ERROR and just return EIO.
302 			 */
303 			mutex_exit(&dqp->dq_lock);
304 			mutex_enter(&dq_cachelock);
305 			mutex_enter(&dqp->dq_lock);
306 			remque(dqp);
307 			mutex_exit(&dqp->dq_lock);
308 			mutex_exit(&dq_cachelock);
309 			/*
310 			 * Don't bother reacquiring dq_lock because the dq is
311 			 * not on the freelist or in the cache so only I have
312 			 * access to it.
313 			 */
314 			dqp->dq_cnt = 0;
315 			dqp->dq_ufsvfsp = NULL;
316 			dqp->dq_forw = dqp;
317 			dqp->dq_back = dqp;
318 			dqp->dq_mof = UFS_HOLE;
319 			dqp->dq_flags = 0;
320 			dqinsheadfree(dqp);
321 			return (EIO);
322 		}
323 	} else {
324 		rw_exit(&qip->i_contents);	/* done with i_size */
325 		bzero(&dqp->dq_dqb, sizeof (struct dqblk));
326 		dqp->dq_mof = UFS_HOLE;
327 	}
328 	mutex_exit(&dqp->dq_lock);
329 	*dqpp = dqp;
330 	return (0);
331 }
332 
333 /*
334  * Release dquot.
335  */
336 void
337 dqput(dqp)
338 	register struct dquot *dqp;
339 {
340 
341 	ASSERT(dqp->dq_ufsvfsp == NULL ||
342 		RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
343 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
344 	if (dqp->dq_cnt == 0) {
345 		(void) ufs_fault(
346 			dqp->dq_ufsvfsp && dqp->dq_ufsvfsp->vfs_root?
347 			dqp->dq_ufsvfsp->vfs_root: NULL,
348 						    "dqput: dqp->dq_cnt == 0");
349 		return;
350 	}
351 	if (--dqp->dq_cnt == 0) {
352 		if (dqp->dq_flags & DQ_MOD)
353 			dqupdate(dqp);
354 		/*
355 		 * DQ_MOD was cleared by dqupdate().
356 		 * DQ_ERROR shouldn't be set if this dquot was being used.
357 		 * DQ_FILES/DQ_BLKS don't matter at this point.
358 		 */
359 		dqp->dq_flags = 0;
360 		if (dqp->dq_ufsvfsp == NULL ||
361 		    dqp->dq_ufsvfsp->vfs_qflags == 0) {
362 			/* quotas are disabled, discard this dquot struct */
363 			dqinval(dqp);
364 		} else
365 			dqinstailfree(dqp);
366 	}
367 }
368 
369 /*
370  * Update on disk quota info.
371  */
372 void
373 dqupdate(dqp)
374 	register struct dquot *dqp;
375 {
376 	register struct inode *qip;
377 	extern struct cred *kcred;
378 	struct ufsvfs	*ufsvfsp;
379 	int		newtrans	= 0;
380 	struct vnode	*vfs_root;
381 
382 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
383 
384 	if (!dqp->dq_ufsvfsp) {
385 		(void) ufs_fault(NULL, "dqupdate: NULL dq_ufsvfsp");
386 		return;
387 	}
388 	vfs_root = dqp->dq_ufsvfsp->vfs_root;
389 	if (!vfs_root) {
390 		(void) ufs_fault(NULL, "dqupdate: NULL vfs_root");
391 		return;
392 	}
393 	/*
394 	 * I don't need to hold dq_rwlock when looking at vfs_qinod here
395 	 * because vfs_qinod is only cleared by closedq after it has called
396 	 * dqput on all dq's.  Since I am holding dq_lock on this dq, closedq
397 	 * will have to wait until I am done before it can call dqput on
398 	 * this dq so vfs_qinod will not change value until after I return.
399 	 */
400 	qip = dqp->dq_ufsvfsp->vfs_qinod;
401 	if (!qip) {
402 		(void) ufs_fault(vfs_root, "dqupdate: NULL vfs_qinod");
403 		return;
404 	}
405 	ufsvfsp = qip->i_ufsvfs;
406 	if (!ufsvfsp) {
407 		(void) ufs_fault(vfs_root,
408 				    "dqupdate: NULL vfs_qinod->i_ufsvfs");
409 		return;
410 	}
411 	if (ufsvfsp != dqp->dq_ufsvfsp) {
412 		(void) ufs_fault(vfs_root,
413 			    "dqupdate: vfs_qinod->i_ufsvfs != dqp->dq_ufsvfsp");
414 		return;
415 	}
416 	if (!(dqp->dq_flags & DQ_MOD)) {
417 		(void) ufs_fault(vfs_root,
418 				    "dqupdate: !(dqp->dq_flags & DQ_MOD)");
419 		return;
420 	}
421 
422 	if (!(curthread->t_flag & T_DONTBLOCK)) {
423 		newtrans++;
424 		curthread->t_flag |= T_DONTBLOCK;
425 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
426 	}
427 	if (TRANS_ISTRANS(ufsvfsp)) {
428 		TRANS_DELTA(ufsvfsp, dqp->dq_mof, sizeof (struct dqblk),
429 		    DT_QR, 0, 0);
430 		TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb, dqp->dq_mof,
431 		    (int)(sizeof (struct dqblk)), NULL, 0);
432 	} else {
433 		/*
434 		 * Locknest gets very confused when I lock the quota inode.
435 		 * It thinks that qip and ip (the inode that caused the
436 		 * quota routines to get called) are the same inode.
437 		 */
438 		rw_enter(&qip->i_contents, RW_WRITER);
439 		/*
440 		 * refuse to push if offset would be illegal
441 		 */
442 		if (dqoff(dqp->dq_uid) >= 0) {
443 			(void) ufs_rdwri(UIO_WRITE, FWRITE, qip,
444 					(caddr_t)&dqp->dq_dqb,
445 					sizeof (struct dqblk),
446 					dqoff(dqp->dq_uid), UIO_SYSSPACE,
447 					(int *)NULL, kcred);
448 		}
449 		rw_exit(&qip->i_contents);
450 	}
451 
452 	dqp->dq_flags &= ~DQ_MOD;
453 	if (newtrans) {
454 		TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
455 		curthread->t_flag &= ~T_DONTBLOCK;
456 	}
457 }
458 
459 /*
460  * Invalidate a dquot.  This function is called when quotas are disabled
461  * for a specific file system via closedq() or when we unmount the file
462  * system and invalidate the quota cache via invalidatedq().
463  *
464  * Take the dquot off its hash list and put it on a private, unfindable
465  * hash list (refers to itself). Also, put it at the head of the free list.
466  * Note that even though dq_cnt is zero, this dquot is NOT yet on the
467  * freelist.
468  */
469 void
470 dqinval(dqp)
471 	register struct dquot *dqp;
472 {
473 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
474 	ASSERT(dqp->dq_cnt == 0);
475 	ASSERT(dqp->dq_flags == 0);
476 	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
477 	ASSERT(dqp->dq_ufsvfsp &&
478 		(dqp->dq_ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
479 
480 	/*
481 	 * To preserve lock order, we have to drop dq_lock in order to
482 	 * grab dq_cachelock.  To prevent someone from grabbing this
483 	 * dquot from the quota cache via getdiskquota() while we are
484 	 * "unsafe", we clear dq_ufsvfsp so it won't match anything.
485 	 */
486 	dqp->dq_ufsvfsp = NULL;
487 	mutex_exit(&dqp->dq_lock);
488 	mutex_enter(&dq_cachelock);
489 	mutex_enter(&dqp->dq_lock);
490 
491 	/*
492 	 * The following paranoia is to make sure that getdiskquota()
493 	 * has not been broken:
494 	 */
495 	ASSERT(dqp->dq_cnt == 0);
496 	ASSERT(dqp->dq_flags == 0);
497 	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
498 	ASSERT(dqp->dq_ufsvfsp == NULL);
499 
500 	/*
501 	 * Now we have the locks in the right order so we can do the
502 	 * rest of the work.
503 	 */
504 	remque(dqp);
505 	mutex_exit(&dq_cachelock);
506 	dqp->dq_forw = dqp;
507 	dqp->dq_back = dqp;
508 	dqinsheadfree(dqp);
509 }
510 
511 /*
512  * Invalidate all quota information records for the specified file system.
513  */
514 void
515 invalidatedq(ufsvfsp)
516 	register struct ufsvfs *ufsvfsp;
517 {
518 	register struct dquot *dqp;
519 
520 
521 	/*
522 	 * If quotas are not initialized, then there is nothing to do.
523 	 */
524 	rw_enter(&dq_rwlock, RW_READER);
525 	if (!quotas_initialized) {
526 		rw_exit(&dq_rwlock);
527 		return;
528 	}
529 	rw_exit(&dq_rwlock);
530 
531 
532 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);
533 
534 	ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
535 
536 	/*
537 	 * Invalidate all the quota info records for this file system
538 	 * that are in the quota cache:
539 	 */
540 	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
541 		/*
542 		 * If someone else has it, then ignore it. For the target
543 		 * file system, this is okay for three reasons:
544 		 *
545 		 * 1) This routine is called after closedq() so the quota
546 		 *    sub-system is disabled for this file system.
547 		 * 2) We have made the quota sub-system quiescent for
548 		 *    this file system.
549 		 * 3) We are in the process of unmounting this file
550 		 *    system so the quota sub-system can't be enabled
551 		 *    for it.
552 		 */
553 		if (!mutex_tryenter(&dqp->dq_lock)) {
554 			continue;
555 		}
556 
557 
558 		/*
559 		 * At this point, any quota info records that are
560 		 * associated with the target file system, should have a
561 		 * reference count of zero and be on the free list.
562 		 * Why? Because these quota info records went to a zero
563 		 * dq_cnt (via dqput()) before the file system was
564 		 * unmounted and are waiting to be found in the quota
565 		 * cache and reused (via getdiskquota()). The exception
566 		 * is when a quota transaction is sitting in the deltamap,
567 		 * indicated by DQ_TRANS being set in dq_flags.
568 		 * This causes a reference to be held on the quota
569 		 * information record and it will only be cleared once
570 		 * the transaction has reached the log. If we find
571 		 * any of these - we ignore them and let logging do
572 		 * the right thing.
573 		 */
574 		if (dqp->dq_ufsvfsp == ufsvfsp) {
575 			ASSERT(dqp->dq_cnt == 0 || (dqp->dq_cnt == 1 &&
576 			    (dqp->dq_flags & DQ_TRANS)));
577 
578 			/* Cope with those orphaned dquots. */
579 			if (dqp->dq_cnt == 1 && (dqp->dq_flags & DQ_TRANS)) {
580 				mutex_exit(&dqp->dq_lock);
581 				continue;
582 			}
583 
584 			ASSERT(dqp->dq_cnt == 0);
585 			ASSERT(dqp->dq_freef && dqp->dq_freeb);
586 
587 			/*
588 			 * Take the quota info record off the free list
589 			 * so dqinval() can do its job (and put it on the
590 			 * front of the free list).
591 			 */
592 			mutex_enter(&dq_freelock);
593 			dqremfree(dqp);
594 			mutex_exit(&dq_freelock);
595 			dqinval(dqp);
596 		}
597 
598 		mutex_exit(&dqp->dq_lock);
599 	}
600 	rw_exit(&ufsvfsp->vfs_dqrwlock);
601 }
602