xref: /illumos-gate/usr/src/uts/common/fs/ufs/quota.c (revision b92be93cdb5c3e9e673cdcb4daffe01fe1419f9e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 /*
40  * Code pertaining to management of the in-core data structures.
41  */
42 #include <sys/types.h>
43 #include <sys/t_lock.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/signal.h>
47 #include <sys/errno.h>
48 #include <sys/user.h>
49 #include <sys/proc.h>
50 #include <sys/vfs.h>
51 #include <sys/vnode.h>
52 #include <sys/uio.h>
53 #include <sys/buf.h>
54 #include <sys/fs/ufs_fs.h>
55 #include <sys/fs/ufs_inode.h>
56 #include <sys/fs/ufs_quota.h>
57 #include <sys/cmn_err.h>
58 #include <sys/kmem.h>
59 #include <sys/debug.h>
60 #include <sys/file.h>
61 #include <sys/fs/ufs_panic.h>
62 #include <sys/var.h>
63 
64 
65 /*
66  * Dquot in core hash chain headers
67  */
68 struct	dqhead	dqhead[NDQHASH];
69 
70 static kmutex_t dq_cachelock;
71 static kmutex_t dq_freelock;
72 
73 krwlock_t dq_rwlock;
74 
75 /*
76  * Dquot free list.
77  */
78 struct dquot dqfreelist;
79 
80 #define	dqinsheadfree(DQP) { \
81 	mutex_enter(&dq_freelock); \
82 	(DQP)->dq_freef = dqfreelist.dq_freef; \
83 	(DQP)->dq_freeb = &dqfreelist; \
84 	dqfreelist.dq_freef->dq_freeb = (DQP); \
85 	dqfreelist.dq_freef = (DQP); \
86 	mutex_exit(&dq_freelock); \
87 }
88 
89 #define	dqinstailfree(DQP) { \
90 	mutex_enter(&dq_freelock); \
91 	(DQP)->dq_freeb = dqfreelist.dq_freeb; \
92 	(DQP)->dq_freef = &dqfreelist; \
93 	dqfreelist.dq_freeb->dq_freef = (DQP); \
94 	dqfreelist.dq_freeb = (DQP); \
95 	mutex_exit(&dq_freelock); \
96 }
97 
98 /* (clear pointers to make sure we don't use them; catch problems early) */
99 #define	dqremfree(DQP) { \
100 	(DQP)->dq_freeb->dq_freef = (DQP)->dq_freef; \
101 	(DQP)->dq_freef->dq_freeb = (DQP)->dq_freeb; \
102 	(DQP)->dq_freef = (DQP)->dq_freeb = NULL; \
103 }
104 
105 typedef	struct dquot *DQptr;
106 
107 /*
108  * Initialize quota sub-system init lock.
109  */
110 void
111 qtinit()
112 {
113 	rw_init(&dq_rwlock, NULL, RW_DEFAULT, NULL);
114 }
115 
116 /*
117  * qtinit2 allocated space for the quota structures.  Only do this if
118  * if quotas are going to be used so that we can save the space if quotas
119  * aren't used.
120  */
121 void
122 qtinit2(void)
123 {
124 	register struct dqhead *dhp;
125 	register struct dquot *dqp;
126 
127 	ASSERT(RW_WRITE_HELD(&dq_rwlock));
128 
129 	if (ndquot == 0)
130 		ndquot = ((maxusers * NMOUNT) / 4) + v.v_proc;
131 
132 	dquot = kmem_zalloc(ndquot * sizeof (struct dquot), KM_SLEEP);
133 	dquotNDQUOT = dquot + ndquot;
134 
135 	/*
136 	 * Initialize the cache between the in-core structures
137 	 * and the per-file system quota files on disk.
138 	 */
139 	for (dhp = &dqhead[0]; dhp < &dqhead[NDQHASH]; dhp++) {
140 		dhp->dqh_forw = dhp->dqh_back = (DQptr)dhp;
141 	}
142 	dqfreelist.dq_freef = dqfreelist.dq_freeb = (DQptr)&dqfreelist;
143 	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
144 		mutex_init(&dqp->dq_lock, NULL, MUTEX_DEFAULT, NULL);
145 		dqp->dq_forw = dqp->dq_back = dqp;
146 		dqinsheadfree(dqp);
147 	}
148 }
149 
150 /*
151  * Obtain the user's on-disk quota limit for file system specified.
152  * dqpp is returned locked.
153  */
154 int
155 getdiskquota(
156 	uid_t uid,
157 	struct ufsvfs *ufsvfsp,
158 	int force,			/* don't do enable checks */
159 	struct dquot **dqpp)		/* resulting dquot ptr */
160 {
161 	struct dquot *dqp;
162 	struct dqhead *dhp;
163 	struct inode *qip;
164 	int error;
165 	extern struct cred *kcred;
166 	daddr_t	bn;
167 	int contig;
168 	int err;
169 
170 	ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
171 
172 	dhp = &dqhead[DQHASH(uid, ufsvfsp)];
173 loop:
174 	/*
175 	 * Check for quotas enabled.
176 	 */
177 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0 && !force)
178 		return (ESRCH);
179 	qip = ufsvfsp->vfs_qinod;
180 	if (!qip)
181 		return (ufs_fault(ufsvfsp->vfs_root, "getdiskquota: NULL qip"));
182 	/*
183 	 * Check the cache first.
184 	 */
185 	mutex_enter(&dq_cachelock);
186 	for (dqp = dhp->dqh_forw; dqp != (DQptr)dhp; dqp = dqp->dq_forw) {
187 		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp)
188 			continue;
189 		mutex_exit(&dq_cachelock);
190 		mutex_enter(&dqp->dq_lock);
191 		/*
192 		 * I may have slept in the mutex_enter.  Make sure this is
193 		 * still the one I want.
194 		 */
195 		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp) {
196 			mutex_exit(&dqp->dq_lock);
197 			goto loop;
198 		}
199 		if (dqp->dq_flags & DQ_ERROR) {
200 			mutex_exit(&dqp->dq_lock);
201 			return (EINVAL);
202 		}
203 		/*
204 		 * Cache hit with no references.
205 		 * Take the structure off the free list.
206 		 */
207 		if (dqp->dq_cnt == 0) {
208 			mutex_enter(&dq_freelock);
209 			dqremfree(dqp);
210 			mutex_exit(&dq_freelock);
211 		}
212 		dqp->dq_cnt++;
213 		mutex_exit(&dqp->dq_lock);
214 		*dqpp = dqp;
215 		return (0);
216 	}
217 	/*
218 	 * Not in cache.
219 	 * Get dquot at head of free list.
220 	 */
221 	mutex_enter(&dq_freelock);
222 	if ((dqp = dqfreelist.dq_freef) == &dqfreelist) {
223 		mutex_exit(&dq_freelock);
224 		mutex_exit(&dq_cachelock);
225 		cmn_err(CE_WARN, "dquot table full");
226 		return (EUSERS);
227 	}
228 
229 	if (dqp->dq_cnt != 0 || dqp->dq_flags != 0) {
230 		panic("getdiskquota: dqp->dq_cnt: "
231 		    "%ld != 0 || dqp->dq_flags: 0x%x != 0 (%s)",
232 		    dqp->dq_cnt, dqp->dq_flags, qip->i_fs->fs_fsmnt);
233 		/*NOTREACHED*/
234 	}
235 	/*
236 	 * Take it off the free list, and off the hash chain it was on.
237 	 * Then put it on the new hash chain.
238 	 */
239 	dqremfree(dqp);
240 	mutex_exit(&dq_freelock);
241 	remque(dqp);
242 	dqp->dq_cnt = 1;
243 	dqp->dq_uid = uid;
244 	dqp->dq_ufsvfsp = ufsvfsp;
245 	dqp->dq_mof = UFS_HOLE;
246 	mutex_enter(&dqp->dq_lock);
247 	insque(dqp, dhp);
248 	mutex_exit(&dq_cachelock);
249 	/*
250 	 * Check the uid in case it's too large to fit into the 2Gbyte
251 	 * 'quotas' file (higher than 67 million or so).
252 	 */
253 
254 	/*
255 	 * Large Files: i_size need to be accessed atomically now.
256 	 */
257 	rw_enter(&qip->i_contents, RW_READER);
258 	if (uid <= MAXUID && dqoff(uid) >= 0 && dqoff(uid) < qip->i_size) {
259 		/*
260 		 * Read quota info off disk.
261 		 */
262 		error = ufs_rdwri(UIO_READ, FREAD, qip, (caddr_t)&dqp->dq_dqb,
263 		    sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE,
264 		    (int *)NULL, kcred);
265 		/*
266 		 * We must set the dq_mof even if not we are not logging in case
267 		 * we are later remount to logging.
268 		 */
269 		err = bmap_read(qip, dqoff(uid), &bn, &contig);
270 		rw_exit(&qip->i_contents);
271 		if ((bn != UFS_HOLE) && !err) {
272 			dqp->dq_mof = ldbtob(bn) +
273 			    (offset_t)(dqoff(uid) & (DEV_BSIZE - 1));
274 		} else {
275 			dqp->dq_mof = UFS_HOLE;
276 		}
277 		if (error) {
278 			/*
279 			 * I/O error in reading quota file.
280 			 * Put dquot on a private, unfindable hash list,
281 			 * put dquot at the head of the free list and
282 			 * reflect the problem to caller.
283 			 */
284 			dqp->dq_flags = DQ_ERROR;
285 			/*
286 			 * I must exit the dq_lock so that I can acquire the
287 			 * dq_cachelock.  If another thread finds dqp before
288 			 * I remove it from the cache it will see the
289 			 * DQ_ERROR and just return EIO.
290 			 */
291 			mutex_exit(&dqp->dq_lock);
292 			mutex_enter(&dq_cachelock);
293 			mutex_enter(&dqp->dq_lock);
294 			remque(dqp);
295 			mutex_exit(&dqp->dq_lock);
296 			mutex_exit(&dq_cachelock);
297 			/*
298 			 * Don't bother reacquiring dq_lock because the dq is
299 			 * not on the freelist or in the cache so only I have
300 			 * access to it.
301 			 */
302 			dqp->dq_cnt = 0;
303 			dqp->dq_ufsvfsp = NULL;
304 			dqp->dq_forw = dqp;
305 			dqp->dq_back = dqp;
306 			dqp->dq_mof = UFS_HOLE;
307 			dqp->dq_flags = 0;
308 			dqinsheadfree(dqp);
309 			return (EIO);
310 		}
311 	} else {
312 		rw_exit(&qip->i_contents);	/* done with i_size */
313 		bzero(&dqp->dq_dqb, sizeof (struct dqblk));
314 		dqp->dq_mof = UFS_HOLE;
315 	}
316 	mutex_exit(&dqp->dq_lock);
317 	*dqpp = dqp;
318 	return (0);
319 }
320 
321 /*
322  * Release dquot.
323  */
324 void
325 dqput(dqp)
326 	register struct dquot *dqp;
327 {
328 
329 	ASSERT(dqp->dq_ufsvfsp == NULL ||
330 		RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
331 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
332 	if (dqp->dq_cnt == 0) {
333 		(void) ufs_fault(
334 			dqp->dq_ufsvfsp && dqp->dq_ufsvfsp->vfs_root?
335 			dqp->dq_ufsvfsp->vfs_root: NULL,
336 						    "dqput: dqp->dq_cnt == 0");
337 		return;
338 	}
339 	if (--dqp->dq_cnt == 0) {
340 		if (dqp->dq_flags & DQ_MOD)
341 			dqupdate(dqp);
342 		/*
343 		 * DQ_MOD was cleared by dqupdate().
344 		 * DQ_ERROR shouldn't be set if this dquot was being used.
345 		 * DQ_FILES/DQ_BLKS don't matter at this point.
346 		 */
347 		dqp->dq_flags = 0;
348 		if (dqp->dq_ufsvfsp == NULL ||
349 		    dqp->dq_ufsvfsp->vfs_qflags == 0) {
350 			/* quotas are disabled, discard this dquot struct */
351 			dqinval(dqp);
352 		} else
353 			dqinstailfree(dqp);
354 	}
355 }
356 
357 /*
358  * Update on disk quota info.
359  */
360 void
361 dqupdate(dqp)
362 	register struct dquot *dqp;
363 {
364 	register struct inode *qip;
365 	extern struct cred *kcred;
366 	struct ufsvfs	*ufsvfsp;
367 	int		newtrans	= 0;
368 	struct vnode	*vfs_root;
369 
370 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
371 
372 	if (!dqp->dq_ufsvfsp) {
373 		(void) ufs_fault(NULL, "dqupdate: NULL dq_ufsvfsp");
374 		return;
375 	}
376 	vfs_root = dqp->dq_ufsvfsp->vfs_root;
377 	if (!vfs_root) {
378 		(void) ufs_fault(NULL, "dqupdate: NULL vfs_root");
379 		return;
380 	}
381 	/*
382 	 * I don't need to hold dq_rwlock when looking at vfs_qinod here
383 	 * because vfs_qinod is only cleared by closedq after it has called
384 	 * dqput on all dq's.  Since I am holding dq_lock on this dq, closedq
385 	 * will have to wait until I am done before it can call dqput on
386 	 * this dq so vfs_qinod will not change value until after I return.
387 	 */
388 	qip = dqp->dq_ufsvfsp->vfs_qinod;
389 	if (!qip) {
390 		(void) ufs_fault(vfs_root, "dqupdate: NULL vfs_qinod");
391 		return;
392 	}
393 	ufsvfsp = qip->i_ufsvfs;
394 	if (!ufsvfsp) {
395 		(void) ufs_fault(vfs_root,
396 				    "dqupdate: NULL vfs_qinod->i_ufsvfs");
397 		return;
398 	}
399 	if (ufsvfsp != dqp->dq_ufsvfsp) {
400 		(void) ufs_fault(vfs_root,
401 			    "dqupdate: vfs_qinod->i_ufsvfs != dqp->dq_ufsvfsp");
402 		return;
403 	}
404 	if (!(dqp->dq_flags & DQ_MOD)) {
405 		(void) ufs_fault(vfs_root,
406 				    "dqupdate: !(dqp->dq_flags & DQ_MOD)");
407 		return;
408 	}
409 
410 	if (!(curthread->t_flag & T_DONTBLOCK)) {
411 		newtrans++;
412 		curthread->t_flag |= T_DONTBLOCK;
413 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
414 	}
415 	if (TRANS_ISTRANS(ufsvfsp)) {
416 		TRANS_DELTA(ufsvfsp, dqp->dq_mof, sizeof (struct dqblk),
417 		    DT_QR, 0, 0);
418 		TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb, dqp->dq_mof,
419 		    (int)(sizeof (struct dqblk)), NULL, 0);
420 	} else {
421 		/*
422 		 * Locknest gets very confused when I lock the quota inode.
423 		 * It thinks that qip and ip (the inode that caused the
424 		 * quota routines to get called) are the same inode.
425 		 */
426 		rw_enter(&qip->i_contents, RW_WRITER);
427 		/*
428 		 * refuse to push if offset would be illegal
429 		 */
430 		if (dqoff(dqp->dq_uid) >= 0) {
431 			(void) ufs_rdwri(UIO_WRITE, FWRITE, qip,
432 					(caddr_t)&dqp->dq_dqb,
433 					sizeof (struct dqblk),
434 					dqoff(dqp->dq_uid), UIO_SYSSPACE,
435 					(int *)NULL, kcred);
436 		}
437 		rw_exit(&qip->i_contents);
438 	}
439 
440 	dqp->dq_flags &= ~DQ_MOD;
441 	if (newtrans) {
442 		TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
443 		curthread->t_flag &= ~T_DONTBLOCK;
444 	}
445 }
446 
447 /*
448  * Invalidate a dquot.  This function is called when quotas are disabled
449  * for a specific file system via closedq() or when we unmount the file
450  * system and invalidate the quota cache via invalidatedq().
451  *
452  * Take the dquot off its hash list and put it on a private, unfindable
453  * hash list (refers to itself). Also, put it at the head of the free list.
454  * Note that even though dq_cnt is zero, this dquot is NOT yet on the
455  * freelist.
456  */
457 void
458 dqinval(dqp)
459 	register struct dquot *dqp;
460 {
461 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
462 	ASSERT(dqp->dq_cnt == 0);
463 	ASSERT(dqp->dq_flags == 0);
464 	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
465 	ASSERT(dqp->dq_ufsvfsp &&
466 		(dqp->dq_ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
467 
468 	/*
469 	 * To preserve lock order, we have to drop dq_lock in order to
470 	 * grab dq_cachelock.  To prevent someone from grabbing this
471 	 * dquot from the quota cache via getdiskquota() while we are
472 	 * "unsafe", we clear dq_ufsvfsp so it won't match anything.
473 	 */
474 	dqp->dq_ufsvfsp = NULL;
475 	mutex_exit(&dqp->dq_lock);
476 	mutex_enter(&dq_cachelock);
477 	mutex_enter(&dqp->dq_lock);
478 
479 	/*
480 	 * The following paranoia is to make sure that getdiskquota()
481 	 * has not been broken:
482 	 */
483 	ASSERT(dqp->dq_cnt == 0);
484 	ASSERT(dqp->dq_flags == 0);
485 	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
486 	ASSERT(dqp->dq_ufsvfsp == NULL);
487 
488 	/*
489 	 * Now we have the locks in the right order so we can do the
490 	 * rest of the work.
491 	 */
492 	remque(dqp);
493 	mutex_exit(&dq_cachelock);
494 	dqp->dq_forw = dqp;
495 	dqp->dq_back = dqp;
496 	dqinsheadfree(dqp);
497 }
498 
499 /*
500  * Invalidate all quota information records for the specified file system.
501  */
502 void
503 invalidatedq(ufsvfsp)
504 	register struct ufsvfs *ufsvfsp;
505 {
506 	register struct dquot *dqp;
507 
508 
509 	/*
510 	 * If quotas are not initialized, then there is nothing to do.
511 	 */
512 	rw_enter(&dq_rwlock, RW_READER);
513 	if (!quotas_initialized) {
514 		rw_exit(&dq_rwlock);
515 		return;
516 	}
517 	rw_exit(&dq_rwlock);
518 
519 
520 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);
521 
522 	ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
523 
524 	/*
525 	 * Invalidate all the quota info records for this file system
526 	 * that are in the quota cache:
527 	 */
528 	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
529 		/*
530 		 * If someone else has it, then ignore it. For the target
531 		 * file system, this is okay for three reasons:
532 		 *
533 		 * 1) This routine is called after closedq() so the quota
534 		 *    sub-system is disabled for this file system.
535 		 * 2) We have made the quota sub-system quiescent for
536 		 *    this file system.
537 		 * 3) We are in the process of unmounting this file
538 		 *    system so the quota sub-system can't be enabled
539 		 *    for it.
540 		 */
541 		if (!mutex_tryenter(&dqp->dq_lock)) {
542 			continue;
543 		}
544 
545 
546 		/*
547 		 * At this point, any quota info records that are
548 		 * associated with the target file system, should have a
549 		 * reference count of zero and be on the free list.
550 		 * Why? Because these quota info records went to a zero
551 		 * dq_cnt (via dqput()) before the file system was
552 		 * unmounted and are waiting to be found in the quota
553 		 * cache and reused (via getdiskquota()). The exception
554 		 * is when a quota transaction is sitting in the deltamap,
555 		 * indicated by DQ_TRANS being set in dq_flags.
556 		 * This causes a reference to be held on the quota
557 		 * information record and it will only be cleared once
558 		 * the transaction has reached the log. If we find
559 		 * any of these - we ignore them and let logging do
560 		 * the right thing.
561 		 */
562 		if (dqp->dq_ufsvfsp == ufsvfsp) {
563 			ASSERT(dqp->dq_cnt == 0 || (dqp->dq_cnt == 1 &&
564 			    (dqp->dq_flags & DQ_TRANS)));
565 
566 			/* Cope with those orphaned dquots. */
567 			if (dqp->dq_cnt == 1 && (dqp->dq_flags & DQ_TRANS)) {
568 				mutex_exit(&dqp->dq_lock);
569 				continue;
570 			}
571 
572 			ASSERT(dqp->dq_cnt == 0);
573 			ASSERT(dqp->dq_freef && dqp->dq_freeb);
574 
575 			/*
576 			 * Take the quota info record off the free list
577 			 * so dqinval() can do its job (and put it on the
578 			 * front of the free list).
579 			 */
580 			mutex_enter(&dq_freelock);
581 			dqremfree(dqp);
582 			mutex_exit(&dq_freelock);
583 			dqinval(dqp);
584 		}
585 
586 		mutex_exit(&dqp->dq_lock);
587 	}
588 	rw_exit(&ufsvfsp->vfs_dqrwlock);
589 }
590