xref: /illumos-gate/usr/src/uts/common/fs/ufs/quota_ufs.c (revision 2d6eb4a5e0a47d30189497241345dc5466bb68ab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2016 by Delphix. All rights reserved.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 /*
41  * Routines used in checking limits on file system usage.
42  */
43 
44 #include <sys/types.h>
45 #include <sys/t_lock.h>
46 #include <sys/param.h>
47 #include <sys/time.h>
48 #include <sys/systm.h>
49 #include <sys/kmem.h>
50 #include <sys/signal.h>
51 #include <sys/cred.h>
52 #include <sys/proc.h>
53 #include <sys/user.h>
54 #include <sys/proc.h>
55 #include <sys/vfs.h>
56 #include <sys/vnode.h>
57 #include <sys/buf.h>
58 #include <sys/uio.h>
59 #include <sys/fs/ufs_inode.h>
60 #include <sys/fs/ufs_fs.h>
61 #include <sys/fs/ufs_quota.h>
62 #include <sys/errno.h>
63 #include <sys/cmn_err.h>
64 #include <sys/session.h>
65 #include <sys/debug.h>
66 
67 /*
68  * Find the dquot structure that should
69  * be used in checking i/o on inode ip.
70  */
71 struct dquot *
getinoquota(struct inode * ip)72 getinoquota(struct inode *ip)
73 {
74 	struct dquot *dqp, *xdqp;
75 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
76 
77 	ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
78 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
79 	/*
80 	 * Check for quotas enabled.
81 	 */
82 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
83 		return (NULL);
84 	}
85 
86 	/*
87 	 * Check for someone doing I/O to quota file.
88 	 */
89 	if (ip == ufsvfsp->vfs_qinod) {
90 		return (NULL);
91 	}
92 
93 	/*
94 	 * Check for a legal inode, e.g. not a shadow inode,
95 	 * not a extended attribute directory inode and a valid mode.
96 	 */
97 	ASSERT((ip->i_mode & IFMT) != IFSHAD);
98 	ASSERT((ip->i_mode & IFMT) != IFATTRDIR);
99 	ASSERT(ip->i_mode);
100 
101 	if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) {
102 		return (NULL);
103 	}
104 	dqp = xdqp;
105 	mutex_enter(&dqp->dq_lock);
106 	ASSERT(ip->i_uid == dqp->dq_uid);
107 
108 	if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 &&
109 	    dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) {
110 		dqput(dqp);
111 		mutex_exit(&dqp->dq_lock);
112 		dqp = NULL;
113 	} else {
114 		mutex_exit(&dqp->dq_lock);
115 	}
116 	return (dqp);
117 }
118 
119 /*
120  * Update disk usage, and take corrective action.
121  */
122 int
chkdq(struct inode * ip,long change,int force,struct cred * cr,char ** uerrp,size_t * lenp)123 chkdq(struct inode *ip, long change, int force, struct cred *cr,
124 	char **uerrp, size_t *lenp)
125 {
126 	struct dquot *dqp;
127 	uint64_t ncurblocks;
128 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
129 	int error = 0;
130 	long abs_change;
131 	char *msg1 =
132 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n";
133 	char *msg2 =
134 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n";
135 	char *msg3 =
136 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n";
137 	char *msg4 =
138 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n";
139 	char *errmsg = NULL;
140 	time_t now;
141 
142 	/*
143 	 * Shadow inodes do not need to hold the vfs_dqrwlock lock.
144 	 */
145 	ASSERT((ip->i_mode & IFMT) == IFSHAD ||
146 	    RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
147 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
148 
149 	if (change == 0)
150 		return (0);
151 	dqp = ip->i_dquot;
152 
153 	/*
154 	 * Make sure the quota info record matches the owner.
155 	 */
156 	ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid);
157 
158 #ifdef DEBUG
159 	/*
160 	 * Shadow inodes and extended attribute directories
161 	 * should not have quota info records.
162 	 */
163 	if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) {
164 		ASSERT(dqp == NULL);
165 	}
166 	/*
167 	 * Paranoia for verifying that quotas are okay.
168 	 */
169 	else {
170 		struct dquot *expect_dq;
171 		int mismatch_ok = 0;
172 
173 		/* Get current quota information */
174 		expect_dq = getinoquota(ip);
175 		/*
176 		 * We got NULL back from getinoquota(), but there is
177 		 * no error code return from that interface and some
178 		 * errors are "ok" because we may be testing via error
179 		 * injection.  If this is not the quota inode then we
180 		 * use getdiskquota() to see if there is an error and
181 		 * if the error is ok.
182 		 */
183 		if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) {
184 			int error;
185 			struct dquot *xdqp;
186 
187 			error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0,
188 			    &xdqp);
189 			switch (error) {
190 			/*
191 			 * Either the error was transient or the quota
192 			 * info record has no limits which gets optimized
193 			 * out by getinoquota().
194 			 */
195 			case 0:
196 				if (xdqp->dq_fhardlimit == 0 &&
197 				    xdqp->dq_fsoftlimit == 0 &&
198 				    xdqp->dq_bhardlimit == 0 &&
199 				    xdqp->dq_bsoftlimit == 0) {
200 					mutex_enter(&xdqp->dq_lock);
201 					dqput(xdqp);
202 					mutex_exit(&xdqp->dq_lock);
203 				} else {
204 					expect_dq = xdqp;
205 				}
206 				break;
207 
208 			case ESRCH:	/* quotas are not enabled */
209 			case EINVAL:	/* error flag set on cached record */
210 			case EUSERS:	/* quota table is full */
211 			case EIO:	/* I/O error */
212 				mismatch_ok = 1;
213 				break;
214 			}
215 		}
216 
217 		/*
218 		 * Make sure dqp and the current quota info agree.
219 		 * The first part of the #ifndef is the quick way to
220 		 * do the check and should be part of the standard
221 		 * DEBUG code. The #else part is useful if you are
222 		 * actually chasing an inconsistency and don't want
223 		 * to have to look at stack frames to figure which
224 		 * variable has what value.
225 		 */
226 #ifndef CHASE_QUOTA
227 		ASSERT(mismatch_ok || dqp == expect_dq);
228 #else /* CHASE_QUOTA */
229 		if (expect_dq == NULL) {
230 			/*
231 			 * If you hit this ASSERT() you know that quota
232 			 * subsystem does not expect quota info for this
233 			 * inode, but the inode has it.
234 			 */
235 			ASSERT(mismatch_ok || dqp == NULL);
236 		} else {
237 			/*
238 			 * If you hit this ASSERT() you know that quota
239 			 * subsystem expects quota info for this inode,
240 			 * but the inode does not have it.
241 			 */
242 			ASSERT(dqp);
243 			/*
244 			 * If you hit this ASSERT() you know that quota
245 			 * subsystem expects quota info for this inode
246 			 * and the inode has quota info, but the two
247 			 * quota info pointers are not the same.
248 			 */
249 			ASSERT(dqp == expect_dq);
250 		}
251 #endif /* !CHASE_QUOTA */
252 		/*
253 		 * Release for getinoquota() above or getdiskquota()
254 		 * call when error is transient.
255 		 */
256 		if (expect_dq) {
257 			mutex_enter(&expect_dq->dq_lock);
258 			dqput(expect_dq);
259 			mutex_exit(&expect_dq->dq_lock);
260 		}
261 	}
262 #endif /* DEBUG */
263 
264 	/*
265 	 * Shadow inodes and extended attribute directories
266 	 * do not have quota info records.
267 	 */
268 	if (dqp == NULL)
269 		return (0);
270 	/*
271 	 * Quotas are not enabled on this file system so there is nothing
272 	 * more to do.
273 	 */
274 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
275 		return (0);
276 	}
277 	mutex_enter(&dqp->dq_lock);
278 	if (change < 0) {
279 		dqp->dq_flags |= DQ_MOD;
280 		abs_change = -change;	/* abs_change must be positive */
281 		if (dqp->dq_curblocks < abs_change)
282 			dqp->dq_curblocks = 0;
283 		else
284 			dqp->dq_curblocks += change;
285 		if (dqp->dq_curblocks < dqp->dq_bsoftlimit)
286 			dqp->dq_btimelimit = 0;
287 		dqp->dq_flags &= ~DQ_BLKS;
288 		TRANS_QUOTA(dqp);
289 		mutex_exit(&dqp->dq_lock);
290 		return (0);
291 	}
292 
293 	/*
294 	 * Adding 'change' to dq_curblocks could cause an overflow.
295 	 * So store the result in a 64-bit variable and check for
296 	 * overflow below.
297 	 */
298 	ncurblocks = (uint64_t)dqp->dq_curblocks + change;
299 
300 	/*
301 	 * Allocation. Check hard and soft limits.
302 	 * Skip checks for uid 0 owned files.
303 	 * This check used to require both euid and ip->i_uid
304 	 * to be 0; but there are no quotas for uid 0 so
305 	 * it really doesn't matter who is writing to the
306 	 * root owned file.  And even root cannot write
307 	 * past a user's quota limit.
308 	 */
309 	if (ip->i_uid == 0)
310 		goto out;
311 
312 	/*
313 	 * Disallow allocation if it would bring the current usage over
314 	 * the hard limit or if the user is over their soft limit and their
315 	 * time has run out.
316 	 */
317 	if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit &&
318 	    !force) {
319 		/* If the user was not informed yet and the caller	*/
320 		/* is the owner of the file				*/
321 		if ((dqp->dq_flags & DQ_BLKS) == 0 &&
322 		    ip->i_uid == crgetruid(cr)) {
323 			errmsg = msg1;
324 			dqp->dq_flags |= DQ_BLKS;
325 		}
326 		error = EDQUOT;
327 		goto out;
328 	}
329 	if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) {
330 		now = gethrestime_sec();
331 		if (dqp->dq_curblocks < dqp->dq_bsoftlimit ||
332 		    dqp->dq_btimelimit == 0) {
333 			dqp->dq_flags |= DQ_MOD;
334 			dqp->dq_btimelimit = now +
335 			    ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data)
336 			    ->vfs_btimelimit;
337 			if (ip->i_uid == crgetruid(cr)) {
338 				errmsg = msg2;
339 			}
340 		} else if (now > dqp->dq_btimelimit && !force) {
341 			/* If the user was not informed yet and the	*/
342 			/* caller is the owner of the file		*/
343 			if ((dqp->dq_flags & DQ_BLKS) == 0 &&
344 			    ip->i_uid == crgetruid(cr)) {
345 				errmsg = msg3;
346 				dqp->dq_flags |= DQ_BLKS;
347 			}
348 			error = EDQUOT;
349 		}
350 	}
351 out:
352 	if (error == 0) {
353 		dqp->dq_flags |= DQ_MOD;
354 		/*
355 		 * ncurblocks can be bigger than the maximum
356 		 * number that can be represented in 32-bits.
357 		 * When copying ncurblocks to dq_curblocks
358 		 * (an unsigned 32-bit quantity), make sure there
359 		 * is no overflow.  The only way this can happen
360 		 * is if "force" is set.  Otherwise, this allocation
361 		 * would have exceeded the hard limit check above
362 		 * (since the hard limit is a 32-bit quantity).
363 		 */
364 		if (ncurblocks > 0xffffffffLL) {
365 			dqp->dq_curblocks = 0xffffffff;
366 			errmsg = msg4;
367 		} else {
368 			dqp->dq_curblocks = ncurblocks;
369 		}
370 	}
371 
372 	if (dqp->dq_flags & DQ_MOD)
373 		TRANS_QUOTA(dqp);
374 
375 	mutex_exit(&dqp->dq_lock);
376 	/*
377 	 * Check for any error messages to be sent
378 	 */
379 	if (errmsg != NULL) {
380 		/*
381 		 * Send message to the error log.
382 		 */
383 		if (uerrp != NULL) {
384 			/*
385 			 * Set up message caller should send to user;
386 			 * gets copied to the message buffer as a side-
387 			 * effect of the caller's uprintf().
388 			 */
389 			*lenp = strlen(errmsg) + 20 + 20 +
390 			    strlen(ip->i_fs->fs_fsmnt) + 1;
391 			*uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
392 			if (*uerrp != NULL) {
393 				/* errmsg+1 => skip leading ! */
394 				(void) sprintf(*uerrp, errmsg+1,
395 				    (int)ttoproc(curthread)->p_pid,
396 				    (int)ip->i_uid, (int)ip->i_number,
397 				    ip->i_fs->fs_fsmnt);
398 			}
399 		} else {
400 			/*
401 			 * Caller doesn't care, so just copy to the
402 			 * message buffer.
403 			 */
404 			cmn_err(CE_NOTE, errmsg,
405 			    (int)ttoproc(curthread)->p_pid,
406 			    (int)ip->i_uid, (int)ip->i_number,
407 			    ip->i_fs->fs_fsmnt);
408 		}
409 	}
410 	return (error);
411 }
412 
413 /*
414  * Check the inode limit, applying corrective action.
415  */
416 int
chkiq(struct ufsvfs * ufsvfsp,int change,struct inode * ip,uid_t uid,int force,struct cred * cr,char ** uerrp,size_t * lenp)417 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid,
418 	int force, struct cred *cr, char **uerrp, size_t *lenp)
419 {
420 	struct dquot *dqp, *xdqp;
421 	unsigned int ncurfiles;
422 	char *errmsg = NULL;
423 	char *err1 =
424 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n";
425 	char *err2 =
426 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n";
427 	char *err3 =
428 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n";
429 	int error = 0;
430 	time_t now;
431 
432 	ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock));
433 	/*
434 	 * Change must be either a single increment or decrement.
435 	 * If change is an increment, then ip must be NULL.
436 	 */
437 	ASSERT(change == 1 || change == -1);
438 	ASSERT(change != 1 || ip == NULL);
439 
440 	/*
441 	 * Quotas are not enabled so bail out now.
442 	 */
443 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
444 		return (0);
445 	}
446 
447 	/*
448 	 * Free a specific inode.
449 	 */
450 	if (change == -1 && ip) {
451 		dqp = ip->i_dquot;
452 		/*
453 		 * Shadow inodes and extended attribute directories
454 		 * do not have quota info records.
455 		 */
456 		if (dqp == NULL)
457 			return (0);
458 		mutex_enter(&dqp->dq_lock);
459 		if (dqp->dq_curfiles) {
460 			dqp->dq_curfiles--;
461 			dqp->dq_flags |= DQ_MOD;
462 		}
463 		if (dqp->dq_curfiles < dqp->dq_fsoftlimit) {
464 			dqp->dq_ftimelimit = 0;
465 			dqp->dq_flags |= DQ_MOD;
466 		}
467 		dqp->dq_flags &= ~DQ_FILES;
468 		if (dqp->dq_flags & DQ_MOD)
469 			TRANS_QUOTA(dqp);
470 		mutex_exit(&dqp->dq_lock);
471 		return (0);
472 	}
473 
474 	/*
475 	 * Allocation or deallocation without a specific inode.
476 	 * Get dquot for for uid, fs.
477 	 */
478 	if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) {
479 		return (0);
480 	}
481 	dqp = xdqp;
482 	mutex_enter(&dqp->dq_lock);
483 	if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) {
484 		dqput(dqp);
485 		mutex_exit(&dqp->dq_lock);
486 		return (0);
487 	}
488 
489 	/*
490 	 * Skip checks for uid 0 owned files.
491 	 * This check used to require both euid and uid
492 	 * to be 0; but there are no quotas for uid 0 so
493 	 * it really doesn't matter who is writing to the
494 	 * root owned file.  And even root can not write
495 	 * past the user's quota limit.
496 	 */
497 	if (uid == 0)
498 		goto out;
499 
500 	/*
501 	 * Theoretically, this could overflow, but in practice, it
502 	 * won't.  Multi-terabyte file systems are required to have an
503 	 * nbpi value of at least 1MB.  In order to overflow this
504 	 * field, there would have to be 2^32 inodes in the file.
505 	 * That would imply a file system of 2^32 * 1MB, which is
506 	 * 2^(32 + 20), which is 4096 terabytes, which is not
507 	 * contemplated for ufs any time soon.
508 	 */
509 	ncurfiles = dqp->dq_curfiles + change;
510 
511 	/*
512 	 * Dissallow allocation if it would bring the current usage over
513 	 * the hard limit or if the user is over their soft limit and their
514 	 * time has run out.
515 	 */
516 	if (change == 1 && ncurfiles >= dqp->dq_fhardlimit &&
517 	    dqp->dq_fhardlimit && !force) {
518 		/* If the user was not informed yet and the caller	*/
519 		/* is the owner of the file 				*/
520 		if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) {
521 			errmsg = err1;
522 			dqp->dq_flags |= DQ_FILES;
523 		}
524 		error = EDQUOT;
525 	} else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit &&
526 	    dqp->dq_fsoftlimit) {
527 		now = gethrestime_sec();
528 		if (ncurfiles == dqp->dq_fsoftlimit ||
529 		    dqp->dq_ftimelimit == 0) {
530 			dqp->dq_flags |= DQ_MOD;
531 			dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit;
532 			/* If the caller owns the file */
533 			if (uid == crgetruid(cr))
534 				errmsg = err2;
535 		} else if (now > dqp->dq_ftimelimit && !force) {
536 			/* If the user was not informed yet and the	*/
537 			/* caller is the owner of the file 		*/
538 			if ((dqp->dq_flags & DQ_FILES) == 0 &&
539 			    uid == crgetruid(cr)) {
540 				errmsg = err3;
541 				dqp->dq_flags |= DQ_FILES;
542 			}
543 			error = EDQUOT;
544 		}
545 	}
546 out:
547 	if (error == 0) {
548 		dqp->dq_flags |= DQ_MOD;
549 		dqp->dq_curfiles += change;
550 	}
551 	if (dqp->dq_flags & DQ_MOD)
552 		TRANS_QUOTA(dqp);
553 	dqput(dqp);
554 	mutex_exit(&dqp->dq_lock);
555 	/*
556 	 * Check for any error messages to be sent
557 	 */
558 	if (errmsg != NULL) {
559 		/*
560 		 * Send message to the error log.
561 		 */
562 		if (uerrp != NULL) {
563 			/*
564 			 * Set up message caller should send to user;
565 			 * gets copied to the message buffer as a side-
566 			 * effect of the caller's uprintf().
567 			 */
568 			*lenp = strlen(errmsg) + 20 + 20 +
569 			    strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1;
570 			*uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
571 			if (*uerrp != NULL) {
572 				/* errmsg+1 => skip leading ! */
573 				(void) sprintf(*uerrp, errmsg+1,
574 				    (int)ttoproc(curthread)->p_pid,
575 				    (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
576 			}
577 		} else {
578 			/*
579 			 * Caller doesn't care, so just copy to the
580 			 * message buffer.
581 			 */
582 			cmn_err(CE_NOTE, errmsg,
583 			    (int)ttoproc(curthread)->p_pid,
584 			    (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
585 		}
586 	}
587 	return (error);
588 }
589 
590 /*
591  * Release a dquot.
592  */
593 void
dqrele(struct dquot * dqp)594 dqrele(struct dquot *dqp)
595 {
596 	/*
597 	 * Shadow inodes and extended attribute directories
598 	 * do not have quota info records.
599 	 */
600 	if (dqp != NULL) {
601 		mutex_enter(&dqp->dq_lock);
602 		if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD)
603 			dqupdate(dqp);
604 		dqput(dqp);
605 		mutex_exit(&dqp->dq_lock);
606 	}
607 }
608