xref: /illumos-gate/usr/src/uts/common/fs/ufs/quota.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 /*
43  * Code pertaining to management of the in-core data structures.
44  */
45 #include <sys/types.h>
46 #include <sys/t_lock.h>
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signal.h>
50 #include <sys/errno.h>
51 #include <sys/user.h>
52 #include <sys/proc.h>
53 #include <sys/vfs.h>
54 #include <sys/vnode.h>
55 #include <sys/uio.h>
56 #include <sys/buf.h>
57 #include <sys/fs/ufs_fs.h>
58 #include <sys/fs/ufs_inode.h>
59 #include <sys/fs/ufs_quota.h>
60 #include <sys/cmn_err.h>
61 #include <sys/kmem.h>
62 #include <sys/debug.h>
63 #include <sys/file.h>
64 #include <sys/fs/ufs_panic.h>
65 #include <sys/var.h>
66 
67 
68 /*
69  * Dquot in core hash chain headers
70  */
71 struct	dqhead	dqhead[NDQHASH];
72 
73 static kmutex_t dq_cachelock;
74 static kmutex_t dq_freelock;
75 
76 krwlock_t dq_rwlock;
77 
78 /*
79  * Dquot free list.
80  */
81 struct dquot dqfreelist;
82 
83 #define	dqinsheadfree(DQP) { \
84 	mutex_enter(&dq_freelock); \
85 	(DQP)->dq_freef = dqfreelist.dq_freef; \
86 	(DQP)->dq_freeb = &dqfreelist; \
87 	dqfreelist.dq_freef->dq_freeb = (DQP); \
88 	dqfreelist.dq_freef = (DQP); \
89 	mutex_exit(&dq_freelock); \
90 }
91 
92 #define	dqinstailfree(DQP) { \
93 	mutex_enter(&dq_freelock); \
94 	(DQP)->dq_freeb = dqfreelist.dq_freeb; \
95 	(DQP)->dq_freef = &dqfreelist; \
96 	dqfreelist.dq_freeb->dq_freef = (DQP); \
97 	dqfreelist.dq_freeb = (DQP); \
98 	mutex_exit(&dq_freelock); \
99 }
100 
101 /* (clear pointers to make sure we don't use them; catch problems early) */
102 #define	dqremfree(DQP) { \
103 	(DQP)->dq_freeb->dq_freef = (DQP)->dq_freef; \
104 	(DQP)->dq_freef->dq_freeb = (DQP)->dq_freeb; \
105 	(DQP)->dq_freef = (DQP)->dq_freeb = NULL; \
106 }
107 
108 typedef	struct dquot *DQptr;
109 
110 /*
111  * Initialize quota sub-system init lock.
112  */
113 void
114 qtinit()
115 {
116 	rw_init(&dq_rwlock, NULL, RW_DEFAULT, NULL);
117 }
118 
119 /*
120  * qtinit2 allocated space for the quota structures.  Only do this if
121  * if quotas are going to be used so that we can save the space if quotas
122  * aren't used.
123  */
124 void
125 qtinit2(void)
126 {
127 	register struct dqhead *dhp;
128 	register struct dquot *dqp;
129 
130 	ASSERT(RW_WRITE_HELD(&dq_rwlock));
131 
132 	if (ndquot == 0)
133 		ndquot = ((maxusers * NMOUNT) / 4) + v.v_proc;
134 
135 	dquot = kmem_zalloc(ndquot * sizeof (struct dquot), KM_SLEEP);
136 	dquotNDQUOT = dquot + ndquot;
137 
138 	/*
139 	 * Initialize the cache between the in-core structures
140 	 * and the per-file system quota files on disk.
141 	 */
142 	for (dhp = &dqhead[0]; dhp < &dqhead[NDQHASH]; dhp++) {
143 		dhp->dqh_forw = dhp->dqh_back = (DQptr)dhp;
144 	}
145 	dqfreelist.dq_freef = dqfreelist.dq_freeb = (DQptr)&dqfreelist;
146 	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
147 		mutex_init(&dqp->dq_lock, NULL, MUTEX_DEFAULT, NULL);
148 		dqp->dq_forw = dqp->dq_back = dqp;
149 		dqinsheadfree(dqp);
150 	}
151 }
152 
153 /*
154  * Obtain the user's on-disk quota limit for file system specified.
155  * dqpp is returned locked.
156  */
157 int
158 getdiskquota(
159 	uid_t uid,
160 	struct ufsvfs *ufsvfsp,
161 	int force,			/* don't do enable checks */
162 	struct dquot **dqpp)		/* resulting dquot ptr */
163 {
164 	struct dquot *dqp;
165 	struct dqhead *dhp;
166 	struct inode *qip;
167 	int error;
168 	extern struct cred *kcred;
169 	daddr_t	bn;
170 	int contig;
171 	int err;
172 
173 	ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
174 
175 	dhp = &dqhead[DQHASH(uid, ufsvfsp)];
176 loop:
177 	/*
178 	 * Check for quotas enabled.
179 	 */
180 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0 && !force)
181 		return (ESRCH);
182 	qip = ufsvfsp->vfs_qinod;
183 	if (!qip)
184 		return (ufs_fault(ufsvfsp->vfs_root, "getdiskquota: NULL qip"));
185 	/*
186 	 * Check the cache first.
187 	 */
188 	mutex_enter(&dq_cachelock);
189 	for (dqp = dhp->dqh_forw; dqp != (DQptr)dhp; dqp = dqp->dq_forw) {
190 		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp)
191 			continue;
192 		mutex_exit(&dq_cachelock);
193 		mutex_enter(&dqp->dq_lock);
194 		/*
195 		 * I may have slept in the mutex_enter.  Make sure this is
196 		 * still the one I want.
197 		 */
198 		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp) {
199 			mutex_exit(&dqp->dq_lock);
200 			goto loop;
201 		}
202 		if (dqp->dq_flags & DQ_ERROR) {
203 			mutex_exit(&dqp->dq_lock);
204 			return (EINVAL);
205 		}
206 		/*
207 		 * Cache hit with no references.
208 		 * Take the structure off the free list.
209 		 */
210 		if (dqp->dq_cnt == 0) {
211 			mutex_enter(&dq_freelock);
212 			dqremfree(dqp);
213 			mutex_exit(&dq_freelock);
214 		}
215 		dqp->dq_cnt++;
216 		mutex_exit(&dqp->dq_lock);
217 		*dqpp = dqp;
218 		return (0);
219 	}
220 	/*
221 	 * Not in cache.
222 	 * Get dquot at head of free list.
223 	 */
224 	mutex_enter(&dq_freelock);
225 	if ((dqp = dqfreelist.dq_freef) == &dqfreelist) {
226 		mutex_exit(&dq_freelock);
227 		mutex_exit(&dq_cachelock);
228 		cmn_err(CE_WARN, "dquot table full");
229 		return (EUSERS);
230 	}
231 
232 	if (dqp->dq_cnt != 0 || dqp->dq_flags != 0) {
233 		panic("getdiskquota: dqp->dq_cnt: "
234 		    "%ld != 0 || dqp->dq_flags: 0x%x != 0 (%s)",
235 		    dqp->dq_cnt, dqp->dq_flags, qip->i_fs->fs_fsmnt);
236 		/*NOTREACHED*/
237 	}
238 	/*
239 	 * Take it off the free list, and off the hash chain it was on.
240 	 * Then put it on the new hash chain.
241 	 */
242 	dqremfree(dqp);
243 	mutex_exit(&dq_freelock);
244 	remque(dqp);
245 	dqp->dq_cnt = 1;
246 	dqp->dq_uid = uid;
247 	dqp->dq_ufsvfsp = ufsvfsp;
248 	dqp->dq_mof = UFS_HOLE;
249 	mutex_enter(&dqp->dq_lock);
250 	insque(dqp, dhp);
251 	mutex_exit(&dq_cachelock);
252 	/*
253 	 * Check the uid in case it's too large to fit into the 2Gbyte
254 	 * 'quotas' file (higher than 67 million or so).
255 	 */
256 
257 	/*
258 	 * Large Files: i_size need to be accessed atomically now.
259 	 */
260 	rw_enter(&qip->i_contents, RW_READER);
261 	if (uid <= MAXUID && dqoff(uid) >= 0 && dqoff(uid) < qip->i_size) {
262 		/*
263 		 * Read quota info off disk.
264 		 */
265 		error = ufs_rdwri(UIO_READ, FREAD, qip, (caddr_t)&dqp->dq_dqb,
266 		    sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE,
267 		    (int *)NULL, kcred);
268 		/*
269 		 * We must set the dq_mof even if not we are not logging in case
270 		 * we are later remount to logging.
271 		 */
272 		err = bmap_read(qip, dqoff(uid), &bn, &contig);
273 		rw_exit(&qip->i_contents);
274 		if ((bn != UFS_HOLE) && !err) {
275 			dqp->dq_mof = ldbtob(bn) +
276 			    (offset_t)(dqoff(uid) & (DEV_BSIZE - 1));
277 		} else {
278 			dqp->dq_mof = UFS_HOLE;
279 		}
280 		if (error) {
281 			/*
282 			 * I/O error in reading quota file.
283 			 * Put dquot on a private, unfindable hash list,
284 			 * put dquot at the head of the free list and
285 			 * reflect the problem to caller.
286 			 */
287 			dqp->dq_flags = DQ_ERROR;
288 			/*
289 			 * I must exit the dq_lock so that I can acquire the
290 			 * dq_cachelock.  If another thread finds dqp before
291 			 * I remove it from the cache it will see the
292 			 * DQ_ERROR and just return EIO.
293 			 */
294 			mutex_exit(&dqp->dq_lock);
295 			mutex_enter(&dq_cachelock);
296 			mutex_enter(&dqp->dq_lock);
297 			remque(dqp);
298 			mutex_exit(&dqp->dq_lock);
299 			mutex_exit(&dq_cachelock);
300 			/*
301 			 * Don't bother reacquiring dq_lock because the dq is
302 			 * not on the freelist or in the cache so only I have
303 			 * access to it.
304 			 */
305 			dqp->dq_cnt = 0;
306 			dqp->dq_ufsvfsp = NULL;
307 			dqp->dq_forw = dqp;
308 			dqp->dq_back = dqp;
309 			dqp->dq_mof = UFS_HOLE;
310 			dqp->dq_flags = 0;
311 			dqinsheadfree(dqp);
312 			return (EIO);
313 		}
314 	} else {
315 		rw_exit(&qip->i_contents);	/* done with i_size */
316 		bzero(&dqp->dq_dqb, sizeof (struct dqblk));
317 		dqp->dq_mof = UFS_HOLE;
318 	}
319 	mutex_exit(&dqp->dq_lock);
320 	*dqpp = dqp;
321 	return (0);
322 }
323 
324 /*
325  * Release dquot.
326  */
327 void
328 dqput(dqp)
329 	register struct dquot *dqp;
330 {
331 
332 	ASSERT(dqp->dq_ufsvfsp == NULL ||
333 		RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
334 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
335 	if (dqp->dq_cnt == 0) {
336 		(void) ufs_fault(
337 			dqp->dq_ufsvfsp && dqp->dq_ufsvfsp->vfs_root?
338 			dqp->dq_ufsvfsp->vfs_root: NULL,
339 						    "dqput: dqp->dq_cnt == 0");
340 		return;
341 	}
342 	if (--dqp->dq_cnt == 0) {
343 		if (dqp->dq_flags & DQ_MOD)
344 			dqupdate(dqp);
345 		/*
346 		 * DQ_MOD was cleared by dqupdate().
347 		 * DQ_ERROR shouldn't be set if this dquot was being used.
348 		 * DQ_FILES/DQ_BLKS don't matter at this point.
349 		 */
350 		dqp->dq_flags = 0;
351 		if (dqp->dq_ufsvfsp == NULL ||
352 		    dqp->dq_ufsvfsp->vfs_qflags == 0) {
353 			/* quotas are disabled, discard this dquot struct */
354 			dqinval(dqp);
355 		} else
356 			dqinstailfree(dqp);
357 	}
358 }
359 
360 /*
361  * Update on disk quota info.
362  */
363 void
364 dqupdate(dqp)
365 	register struct dquot *dqp;
366 {
367 	register struct inode *qip;
368 	extern struct cred *kcred;
369 	struct ufsvfs	*ufsvfsp;
370 	int		newtrans	= 0;
371 	struct vnode	*vfs_root;
372 
373 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
374 
375 	if (!dqp->dq_ufsvfsp) {
376 		(void) ufs_fault(NULL, "dqupdate: NULL dq_ufsvfsp");
377 		return;
378 	}
379 	vfs_root = dqp->dq_ufsvfsp->vfs_root;
380 	if (!vfs_root) {
381 		(void) ufs_fault(NULL, "dqupdate: NULL vfs_root");
382 		return;
383 	}
384 	/*
385 	 * I don't need to hold dq_rwlock when looking at vfs_qinod here
386 	 * because vfs_qinod is only cleared by closedq after it has called
387 	 * dqput on all dq's.  Since I am holding dq_lock on this dq, closedq
388 	 * will have to wait until I am done before it can call dqput on
389 	 * this dq so vfs_qinod will not change value until after I return.
390 	 */
391 	qip = dqp->dq_ufsvfsp->vfs_qinod;
392 	if (!qip) {
393 		(void) ufs_fault(vfs_root, "dqupdate: NULL vfs_qinod");
394 		return;
395 	}
396 	ufsvfsp = qip->i_ufsvfs;
397 	if (!ufsvfsp) {
398 		(void) ufs_fault(vfs_root,
399 				    "dqupdate: NULL vfs_qinod->i_ufsvfs");
400 		return;
401 	}
402 	if (ufsvfsp != dqp->dq_ufsvfsp) {
403 		(void) ufs_fault(vfs_root,
404 			    "dqupdate: vfs_qinod->i_ufsvfs != dqp->dq_ufsvfsp");
405 		return;
406 	}
407 	if (!(dqp->dq_flags & DQ_MOD)) {
408 		(void) ufs_fault(vfs_root,
409 				    "dqupdate: !(dqp->dq_flags & DQ_MOD)");
410 		return;
411 	}
412 
413 	if (!(curthread->t_flag & T_DONTBLOCK)) {
414 		newtrans++;
415 		curthread->t_flag |= T_DONTBLOCK;
416 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
417 	}
418 	if (TRANS_ISTRANS(ufsvfsp)) {
419 		TRANS_DELTA(ufsvfsp, dqp->dq_mof, sizeof (struct dqblk),
420 		    DT_QR, 0, 0);
421 		TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb, dqp->dq_mof,
422 		    (int)(sizeof (struct dqblk)), NULL, 0);
423 	} else {
424 		/*
425 		 * Locknest gets very confused when I lock the quota inode.
426 		 * It thinks that qip and ip (the inode that caused the
427 		 * quota routines to get called) are the same inode.
428 		 */
429 		rw_enter(&qip->i_contents, RW_WRITER);
430 		/*
431 		 * refuse to push if offset would be illegal
432 		 */
433 		if (dqoff(dqp->dq_uid) >= 0) {
434 			(void) ufs_rdwri(UIO_WRITE, FWRITE, qip,
435 					(caddr_t)&dqp->dq_dqb,
436 					sizeof (struct dqblk),
437 					dqoff(dqp->dq_uid), UIO_SYSSPACE,
438 					(int *)NULL, kcred);
439 		}
440 		rw_exit(&qip->i_contents);
441 	}
442 
443 	dqp->dq_flags &= ~DQ_MOD;
444 	if (newtrans) {
445 		TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
446 		curthread->t_flag &= ~T_DONTBLOCK;
447 	}
448 }
449 
450 /*
451  * Invalidate a dquot.  This function is called when quotas are disabled
452  * for a specific file system via closedq() or when we unmount the file
453  * system and invalidate the quota cache via invalidatedq().
454  *
455  * Take the dquot off its hash list and put it on a private, unfindable
456  * hash list (refers to itself). Also, put it at the head of the free list.
457  * Note that even though dq_cnt is zero, this dquot is NOT yet on the
458  * freelist.
459  */
460 void
461 dqinval(dqp)
462 	register struct dquot *dqp;
463 {
464 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
465 	ASSERT(dqp->dq_cnt == 0);
466 	ASSERT(dqp->dq_flags == 0);
467 	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
468 	ASSERT(dqp->dq_ufsvfsp &&
469 		(dqp->dq_ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
470 
471 	/*
472 	 * To preserve lock order, we have to drop dq_lock in order to
473 	 * grab dq_cachelock.  To prevent someone from grabbing this
474 	 * dquot from the quota cache via getdiskquota() while we are
475 	 * "unsafe", we clear dq_ufsvfsp so it won't match anything.
476 	 */
477 	dqp->dq_ufsvfsp = NULL;
478 	mutex_exit(&dqp->dq_lock);
479 	mutex_enter(&dq_cachelock);
480 	mutex_enter(&dqp->dq_lock);
481 
482 	/*
483 	 * The following paranoia is to make sure that getdiskquota()
484 	 * has not been broken:
485 	 */
486 	ASSERT(dqp->dq_cnt == 0);
487 	ASSERT(dqp->dq_flags == 0);
488 	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
489 	ASSERT(dqp->dq_ufsvfsp == NULL);
490 
491 	/*
492 	 * Now we have the locks in the right order so we can do the
493 	 * rest of the work.
494 	 */
495 	remque(dqp);
496 	mutex_exit(&dq_cachelock);
497 	dqp->dq_forw = dqp;
498 	dqp->dq_back = dqp;
499 	dqinsheadfree(dqp);
500 }
501 
502 /*
503  * Invalidate all quota information records for the specified file system.
504  */
505 void
506 invalidatedq(ufsvfsp)
507 	register struct ufsvfs *ufsvfsp;
508 {
509 	register struct dquot *dqp;
510 
511 
512 	/*
513 	 * If quotas are not initialized, then there is nothing to do.
514 	 */
515 	rw_enter(&dq_rwlock, RW_READER);
516 	if (!quotas_initialized) {
517 		rw_exit(&dq_rwlock);
518 		return;
519 	}
520 	rw_exit(&dq_rwlock);
521 
522 
523 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);
524 
525 	ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
526 
527 	/*
528 	 * Invalidate all the quota info records for this file system
529 	 * that are in the quota cache:
530 	 */
531 	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
532 		/*
533 		 * If someone else has it, then ignore it. For the target
534 		 * file system, this is okay for three reasons:
535 		 *
536 		 * 1) This routine is called after closedq() so the quota
537 		 *    sub-system is disabled for this file system.
538 		 * 2) We have made the quota sub-system quiescent for
539 		 *    this file system.
540 		 * 3) We are in the process of unmounting this file
541 		 *    system so the quota sub-system can't be enabled
542 		 *    for it.
543 		 */
544 		if (!mutex_tryenter(&dqp->dq_lock)) {
545 			continue;
546 		}
547 
548 
549 		/*
550 		 * At this point, any quota info records that are
551 		 * associated with the target file system, should have a
552 		 * reference count of zero and be on the free list.
553 		 * Why? Because these quota info records went to a zero
554 		 * dq_cnt (via dqput()) before the file system was
555 		 * unmounted and are waiting to be found in the quota
556 		 * cache and reused (via getdiskquota()). The exception
557 		 * is when a quota transaction is sitting in the deltamap,
558 		 * indicated by DQ_TRANS being set in dq_flags.
559 		 * This causes a reference to be held on the quota
560 		 * information record and it will only be cleared once
561 		 * the transaction has reached the log. If we find
562 		 * any of these - we ignore them and let logging do
563 		 * the right thing.
564 		 */
565 		if (dqp->dq_ufsvfsp == ufsvfsp) {
566 			ASSERT(dqp->dq_cnt == 0 || (dqp->dq_cnt == 1 &&
567 			    (dqp->dq_flags & DQ_TRANS)));
568 
569 			/* Cope with those orphaned dquots. */
570 			if (dqp->dq_cnt == 1 && (dqp->dq_flags & DQ_TRANS)) {
571 				mutex_exit(&dqp->dq_lock);
572 				continue;
573 			}
574 
575 			ASSERT(dqp->dq_cnt == 0);
576 			ASSERT(dqp->dq_freef && dqp->dq_freeb);
577 
578 			/*
579 			 * Take the quota info record off the free list
580 			 * so dqinval() can do its job (and put it on the
581 			 * front of the free list).
582 			 */
583 			mutex_enter(&dq_freelock);
584 			dqremfree(dqp);
585 			mutex_exit(&dq_freelock);
586 			dqinval(dqp);
587 		}
588 
589 		mutex_exit(&dqp->dq_lock);
590 	}
591 	rw_exit(&ufsvfsp->vfs_dqrwlock);
592 }
593