xref: /titanic_52/usr/src/uts/common/fs/ufs/quota_ufs.c (revision bde3d612a7c090234c60e6e4578821237a5db135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 /*
43  * Routines used in checking limits on file system usage.
44  */
45 
46 #include <sys/types.h>
47 #include <sys/t_lock.h>
48 #include <sys/param.h>
49 #include <sys/time.h>
50 #include <sys/systm.h>
51 #include <sys/kmem.h>
52 #include <sys/signal.h>
53 #include <sys/cred.h>
54 #include <sys/proc.h>
55 #include <sys/user.h>
56 #include <sys/proc.h>
57 #include <sys/vfs.h>
58 #include <sys/vnode.h>
59 #include <sys/buf.h>
60 #include <sys/uio.h>
61 #include <sys/fs/ufs_inode.h>
62 #include <sys/fs/ufs_fs.h>
63 #include <sys/fs/ufs_quota.h>
64 #include <sys/errno.h>
65 #include <sys/cmn_err.h>
66 #include <sys/session.h>
67 #include <sys/debug.h>
68 
69 /*
70  * Find the dquot structure that should
71  * be used in checking i/o on inode ip.
72  */
73 struct dquot *
74 getinoquota(struct inode *ip)
75 {
76 	struct dquot *dqp, *xdqp;
77 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
78 
79 	ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
80 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
81 	/*
82 	 * Check for quotas enabled.
83 	 */
84 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
85 		return (NULL);
86 	}
87 
88 	/*
89 	 * Check for someone doing I/O to quota file.
90 	 */
91 	if (ip == ufsvfsp->vfs_qinod) {
92 		return (NULL);
93 	}
94 
95 	/*
96 	 * Check for a legal inode, e.g. not a shadow inode,
97 	 * not a extended attribute directory inode and a valid mode.
98 	 */
99 	ASSERT((ip->i_mode & IFMT) != IFSHAD);
100 	ASSERT((ip->i_mode & IFMT) != IFATTRDIR);
101 	ASSERT(ip->i_mode);
102 
103 	if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) {
104 		return (NULL);
105 	}
106 	dqp = xdqp;
107 	mutex_enter(&dqp->dq_lock);
108 	ASSERT(ip->i_uid == dqp->dq_uid);
109 
110 	if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 &&
111 	    dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) {
112 		dqput(dqp);
113 		mutex_exit(&dqp->dq_lock);
114 		dqp = NULL;
115 	} else {
116 		mutex_exit(&dqp->dq_lock);
117 	}
118 	return (dqp);
119 }
120 
121 /*
122  * Update disk usage, and take corrective action.
123  */
124 int
125 chkdq(struct inode *ip, long change, int force, struct cred *cr,
126 	char **uerrp, size_t *lenp)
127 {
128 	struct dquot *dqp;
129 	uint64_t ncurblocks;
130 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
131 	int error = 0;
132 	long abs_change;
133 	char *msg1 =
134 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n";
135 	char *msg2 =
136 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n";
137 	char *msg3 =
138 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n";
139 	char *msg4 =
140 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n";
141 	char *errmsg = NULL;
142 	time_t now;
143 
144 	/*
145 	 * Shadow inodes do not need to hold the vfs_dqrwlock lock.
146 	 */
147 	ASSERT((ip->i_mode & IFMT) == IFSHAD ||
148 	    RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
149 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
150 
151 	if (change == 0)
152 		return (0);
153 	dqp = ip->i_dquot;
154 
155 	/*
156 	 * Make sure the quota info record matches the owner.
157 	 */
158 	ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid);
159 
160 #ifdef DEBUG
161 	/*
162 	 * Shadow inodes and extended attribute directories
163 	 * should not have quota info records.
164 	 */
165 	if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) {
166 		ASSERT(dqp == NULL);
167 	}
168 	/*
169 	 * Paranoia for verifying that quotas are okay.
170 	 */
171 	else {
172 		struct dquot *expect_dq;
173 		int mismatch_ok = 0;
174 
175 		/* Get current quota information */
176 		expect_dq = getinoquota(ip);
177 		/*
178 		 * We got NULL back from getinoquota(), but there is
179 		 * no error code return from that interface and some
180 		 * errors are "ok" because we may be testing via error
181 		 * injection.  If this is not the quota inode then we
182 		 * use getdiskquota() to see if there is an error and
183 		 * if the error is ok.
184 		 */
185 		if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) {
186 			int error;
187 			struct dquot *xdqp;
188 
189 			error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0,
190 			    &xdqp);
191 			switch (error) {
192 			/*
193 			 * Either the error was transient or the quota
194 			 * info record has no limits which gets optimized
195 			 * out by getinoquota().
196 			 */
197 			case 0:
198 				if (xdqp->dq_fhardlimit == 0 &&
199 				    xdqp->dq_fsoftlimit == 0 &&
200 				    xdqp->dq_bhardlimit == 0 &&
201 				    xdqp->dq_bsoftlimit == 0) {
202 					mutex_enter(&xdqp->dq_lock);
203 					dqput(xdqp);
204 					mutex_exit(&xdqp->dq_lock);
205 				} else {
206 					expect_dq = xdqp;
207 				}
208 				break;
209 
210 			case ESRCH:	/* quotas are not enabled */
211 			case EINVAL:	/* error flag set on cached record */
212 			case EUSERS:	/* quota table is full */
213 			case EIO:	/* I/O error */
214 				mismatch_ok = 1;
215 				break;
216 			}
217 		}
218 
219 		/*
220 		 * Make sure dqp and the current quota info agree.
221 		 * The first part of the #ifndef is the quick way to
222 		 * do the check and should be part of the standard
223 		 * DEBUG code. The #else part is useful if you are
224 		 * actually chasing an inconsistency and don't want
225 		 * to have to look at stack frames to figure which
226 		 * variable has what value.
227 		 */
228 #ifndef CHASE_QUOTA
229 		ASSERT(mismatch_ok || dqp == expect_dq);
230 #else /* CHASE_QUOTA */
231 		if (expect_dq == NULL) {
232 			/*
233 			 * If you hit this ASSERT() you know that quota
234 			 * subsystem does not expect quota info for this
235 			 * inode, but the inode has it.
236 			 */
237 			ASSERT(mismatch_ok || dqp == NULL);
238 		} else {
239 			/*
240 			 * If you hit this ASSERT() you know that quota
241 			 * subsystem expects quota info for this inode,
242 			 * but the inode does not have it.
243 			 */
244 			ASSERT(dqp);
245 			/*
246 			 * If you hit this ASSERT() you know that quota
247 			 * subsystem expects quota info for this inode
248 			 * and the inode has quota info, but the two
249 			 * quota info pointers are not the same.
250 			 */
251 			ASSERT(dqp == expect_dq);
252 		}
253 #endif /* !CHASE_QUOTA */
254 		/*
255 		 * Release for getinoquota() above or getdiskquota()
256 		 * call when error is transient.
257 		 */
258 		if (expect_dq) {
259 			mutex_enter(&expect_dq->dq_lock);
260 			dqput(expect_dq);
261 			mutex_exit(&expect_dq->dq_lock);
262 		}
263 	}
264 #endif /* DEBUG */
265 
266 	/*
267 	 * Shadow inodes and extended attribute directories
268 	 * do not have quota info records.
269 	 */
270 	if (dqp == NULL)
271 		return (0);
272 	/*
273 	 * Quotas are not enabled on this file system so there is nothing
274 	 * more to do.
275 	 */
276 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
277 		return (0);
278 	}
279 	mutex_enter(&dqp->dq_lock);
280 	if (change < 0) {
281 		dqp->dq_flags |= DQ_MOD;
282 		abs_change = -change;	/* abs_change must be positive */
283 		if (dqp->dq_curblocks < abs_change)
284 			dqp->dq_curblocks = 0;
285 		else
286 			dqp->dq_curblocks += change;
287 		if (dqp->dq_curblocks < dqp->dq_bsoftlimit)
288 			dqp->dq_btimelimit = 0;
289 		dqp->dq_flags &= ~DQ_BLKS;
290 		TRANS_QUOTA(dqp);
291 		mutex_exit(&dqp->dq_lock);
292 		return (0);
293 	}
294 
295 	/*
296 	 * Adding 'change' to dq_curblocks could cause an overflow.
297 	 * So store the result in a 64-bit variable and check for
298 	 * overflow below.
299 	 */
300 	ncurblocks = (uint64_t)dqp->dq_curblocks + change;
301 
302 	/*
303 	 * Allocation. Check hard and soft limits.
304 	 * Skip checks for uid 0 owned files.
305 	 * This check used to require both euid and ip->i_uid
306 	 * to be 0; but there are no quotas for uid 0 so
307 	 * it really doesn't matter who is writing to the
308 	 * root owned file.  And even root cannot write
309 	 * past a user's quota limit.
310 	 */
311 	if (ip->i_uid == 0)
312 		goto out;
313 
314 	/*
315 	 * Disallow allocation if it would bring the current usage over
316 	 * the hard limit or if the user is over his soft limit and his time
317 	 * has run out.
318 	 */
319 	if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit &&
320 	    !force) {
321 		/* If the user was not informed yet and the caller	*/
322 		/* is the owner of the file				*/
323 		if ((dqp->dq_flags & DQ_BLKS) == 0 &&
324 		    ip->i_uid == crgetruid(cr)) {
325 			errmsg = msg1;
326 			dqp->dq_flags |= DQ_BLKS;
327 		}
328 		error = EDQUOT;
329 		goto out;
330 	}
331 	if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) {
332 		now = gethrestime_sec();
333 		if (dqp->dq_curblocks < dqp->dq_bsoftlimit ||
334 		    dqp->dq_btimelimit == 0) {
335 			dqp->dq_flags |= DQ_MOD;
336 			dqp->dq_btimelimit = now +
337 			    ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data)
338 			    ->vfs_btimelimit;
339 			if (ip->i_uid == crgetruid(cr)) {
340 				errmsg = msg2;
341 			}
342 		} else if (now > dqp->dq_btimelimit && !force) {
343 			/* If the user was not informed yet and the	*/
344 			/* caller is the owner of the file		*/
345 			if ((dqp->dq_flags & DQ_BLKS) == 0 &&
346 			    ip->i_uid == crgetruid(cr)) {
347 				errmsg = msg3;
348 				dqp->dq_flags |= DQ_BLKS;
349 			}
350 			error = EDQUOT;
351 		}
352 	}
353 out:
354 	if (error == 0) {
355 		dqp->dq_flags |= DQ_MOD;
356 		/*
357 		 * ncurblocks can be bigger than the maximum
358 		 * number that can be represented in 32-bits.
359 		 * When copying ncurblocks to dq_curblocks
360 		 * (an unsigned 32-bit quantity), make sure there
361 		 * is no overflow.  The only way this can happen
362 		 * is if "force" is set.  Otherwise, this allocation
363 		 * would have exceeded the hard limit check above
364 		 * (since the hard limit is a 32-bit quantity).
365 		 */
366 		if (ncurblocks > 0xffffffffLL) {
367 			dqp->dq_curblocks = 0xffffffff;
368 			errmsg = msg4;
369 		} else {
370 			dqp->dq_curblocks = ncurblocks;
371 		}
372 	}
373 
374 	if (dqp->dq_flags & DQ_MOD)
375 		TRANS_QUOTA(dqp);
376 
377 	mutex_exit(&dqp->dq_lock);
378 	/*
379 	 * Check for any error messages to be sent
380 	 */
381 	if (errmsg != NULL) {
382 		/*
383 		 * Send message to the error log.
384 		 */
385 		if (uerrp != NULL) {
386 			/*
387 			 * Set up message caller should send to user;
388 			 * gets copied to the message buffer as a side-
389 			 * effect of the caller's uprintf().
390 			 */
391 			*lenp = strlen(errmsg) + 20 + 20 +
392 			    strlen(ip->i_fs->fs_fsmnt) + 1;
393 			*uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
394 			if (*uerrp != NULL) {
395 				/* errmsg+1 => skip leading ! */
396 				(void) sprintf(*uerrp, errmsg+1,
397 				    (int)ttoproc(curthread)->p_pid,
398 				    (int)ip->i_uid, (int)ip->i_number,
399 				    ip->i_fs->fs_fsmnt);
400 			}
401 		} else {
402 			/*
403 			 * Caller doesn't care, so just copy to the
404 			 * message buffer.
405 			 */
406 			cmn_err(CE_NOTE, errmsg,
407 			    (int)ttoproc(curthread)->p_pid,
408 			    (int)ip->i_uid, (int)ip->i_number,
409 			    ip->i_fs->fs_fsmnt);
410 		}
411 	}
412 	return (error);
413 }
414 
415 /*
416  * Check the inode limit, applying corrective action.
417  */
418 int
419 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid,
420 	int force, struct cred *cr, char **uerrp, size_t *lenp)
421 {
422 	struct dquot *dqp, *xdqp;
423 	unsigned int ncurfiles;
424 	char *errmsg = NULL;
425 	char *err1 =
426 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n";
427 	char *err2 =
428 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n";
429 	char *err3 =
430 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n";
431 	int error = 0;
432 	time_t now;
433 
434 	ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock));
435 	/*
436 	 * Change must be either a single increment or decrement.
437 	 * If change is an increment, then ip must be NULL.
438 	 */
439 	ASSERT(change == 1 || change == -1);
440 	ASSERT(change != 1 || ip == NULL);
441 
442 	/*
443 	 * Quotas are not enabled so bail out now.
444 	 */
445 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
446 		return (0);
447 	}
448 
449 	/*
450 	 * Free a specific inode.
451 	 */
452 	if (change == -1 && ip) {
453 		dqp = ip->i_dquot;
454 		/*
455 		 * Shadow inodes and extended attribute directories
456 		 * do not have quota info records.
457 		 */
458 		if (dqp == NULL)
459 			return (0);
460 		mutex_enter(&dqp->dq_lock);
461 		if (dqp->dq_curfiles) {
462 			dqp->dq_curfiles--;
463 			dqp->dq_flags |= DQ_MOD;
464 		}
465 		if (dqp->dq_curfiles < dqp->dq_fsoftlimit) {
466 			dqp->dq_ftimelimit = 0;
467 			dqp->dq_flags |= DQ_MOD;
468 		}
469 		dqp->dq_flags &= ~DQ_FILES;
470 		if (dqp->dq_flags & DQ_MOD)
471 			TRANS_QUOTA(dqp);
472 		mutex_exit(&dqp->dq_lock);
473 		return (0);
474 	}
475 
476 	/*
477 	 * Allocation or deallocation without a specific inode.
478 	 * Get dquot for for uid, fs.
479 	 */
480 	if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) {
481 		return (0);
482 	}
483 	dqp = xdqp;
484 	mutex_enter(&dqp->dq_lock);
485 	if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) {
486 		dqput(dqp);
487 		mutex_exit(&dqp->dq_lock);
488 		return (0);
489 	}
490 
491 	/*
492 	 * Skip checks for uid 0 owned files.
493 	 * This check used to require both euid and uid
494 	 * to be 0; but there are no quotas for uid 0 so
495 	 * it really doesn't matter who is writing to the
496 	 * root owned file.  And even root can not write
497 	 * past the user's quota limit.
498 	 */
499 	if (uid == 0)
500 		goto out;
501 
502 	/*
503 	 * Theoretically, this could overflow, but in practice, it
504 	 * won't.  Multi-terabyte file systems are required to have an
505 	 * nbpi value of at least 1MB.  In order to overflow this
506 	 * field, there would have to be 2^32 inodes in the file.
507 	 * That would imply a file system of 2^32 * 1MB, which is
508 	 * 2^(32 + 20), which is 4096 terabytes, which is not
509 	 * contemplated for ufs any time soon.
510 	 */
511 	ncurfiles = dqp->dq_curfiles + change;
512 
513 	/*
514 	 * Dissallow allocation if it would bring the current usage over
515 	 * the hard limit or if the user is over his soft limit and his time
516 	 * has run out.
517 	 */
518 	if (change == 1 && ncurfiles >= dqp->dq_fhardlimit &&
519 	    dqp->dq_fhardlimit && !force) {
520 		/* If the user was not informed yet and the caller	*/
521 		/* is the owner of the file 				*/
522 		if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) {
523 			errmsg = err1;
524 			dqp->dq_flags |= DQ_FILES;
525 		}
526 		error = EDQUOT;
527 	} else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit &&
528 	    dqp->dq_fsoftlimit) {
529 		now = gethrestime_sec();
530 		if (ncurfiles == dqp->dq_fsoftlimit ||
531 		    dqp->dq_ftimelimit == 0) {
532 			dqp->dq_flags |= DQ_MOD;
533 			dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit;
534 			/* If the caller owns the file */
535 			if (uid == crgetruid(cr))
536 				errmsg = err2;
537 		} else if (now > dqp->dq_ftimelimit && !force) {
538 			/* If the user was not informed yet and the	*/
539 			/* caller is the owner of the file 		*/
540 			if ((dqp->dq_flags & DQ_FILES) == 0 &&
541 			    uid == crgetruid(cr)) {
542 				errmsg = err3;
543 				dqp->dq_flags |= DQ_FILES;
544 			}
545 			error = EDQUOT;
546 		}
547 	}
548 out:
549 	if (error == 0) {
550 		dqp->dq_flags |= DQ_MOD;
551 		dqp->dq_curfiles += change;
552 	}
553 	if (dqp->dq_flags & DQ_MOD)
554 		TRANS_QUOTA(dqp);
555 	dqput(dqp);
556 	mutex_exit(&dqp->dq_lock);
557 	/*
558 	 * Check for any error messages to be sent
559 	 */
560 	if (errmsg != NULL) {
561 		/*
562 		 * Send message to the error log.
563 		 */
564 		if (uerrp != NULL) {
565 			/*
566 			 * Set up message caller should send to user;
567 			 * gets copied to the message buffer as a side-
568 			 * effect of the caller's uprintf().
569 			 */
570 			*lenp = strlen(errmsg) + 20 + 20 +
571 			    strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1;
572 			*uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
573 			if (*uerrp != NULL) {
574 				/* errmsg+1 => skip leading ! */
575 				(void) sprintf(*uerrp, errmsg+1,
576 				    (int)ttoproc(curthread)->p_pid,
577 				    (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
578 			}
579 		} else {
580 			/*
581 			 * Caller doesn't care, so just copy to the
582 			 * message buffer.
583 			 */
584 			cmn_err(CE_NOTE, errmsg,
585 			    (int)ttoproc(curthread)->p_pid,
586 			    (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
587 		}
588 	}
589 	return (error);
590 }
591 
592 /*
593  * Release a dquot.
594  */
595 void
596 dqrele(struct dquot *dqp)
597 {
598 	/*
599 	 * Shadow inodes and extended attribute directories
600 	 * do not have quota info records.
601 	 */
602 	if (dqp != NULL) {
603 		mutex_enter(&dqp->dq_lock);
604 		if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD)
605 			dqupdate(dqp);
606 		dqput(dqp);
607 		mutex_exit(&dqp->dq_lock);
608 	}
609 }
610