1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 */
26
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
33 * All Rights Reserved
34 *
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
37 * contributors.
38 */
39
40 /*
41 * Routines used in checking limits on file system usage.
42 */
43
44 #include <sys/types.h>
45 #include <sys/t_lock.h>
46 #include <sys/param.h>
47 #include <sys/time.h>
48 #include <sys/systm.h>
49 #include <sys/kmem.h>
50 #include <sys/signal.h>
51 #include <sys/cred.h>
52 #include <sys/proc.h>
53 #include <sys/user.h>
54 #include <sys/proc.h>
55 #include <sys/vfs.h>
56 #include <sys/vnode.h>
57 #include <sys/buf.h>
58 #include <sys/uio.h>
59 #include <sys/fs/ufs_inode.h>
60 #include <sys/fs/ufs_fs.h>
61 #include <sys/fs/ufs_quota.h>
62 #include <sys/errno.h>
63 #include <sys/cmn_err.h>
64 #include <sys/session.h>
65 #include <sys/debug.h>
66
67 /*
68 * Find the dquot structure that should
69 * be used in checking i/o on inode ip.
70 */
71 struct dquot *
getinoquota(struct inode * ip)72 getinoquota(struct inode *ip)
73 {
74 struct dquot *dqp, *xdqp;
75 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
76
77 ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
78 ASSERT(RW_WRITE_HELD(&ip->i_contents));
79 /*
80 * Check for quotas enabled.
81 */
82 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
83 return (NULL);
84 }
85
86 /*
87 * Check for someone doing I/O to quota file.
88 */
89 if (ip == ufsvfsp->vfs_qinod) {
90 return (NULL);
91 }
92
93 /*
94 * Check for a legal inode, e.g. not a shadow inode,
95 * not a extended attribute directory inode and a valid mode.
96 */
97 ASSERT((ip->i_mode & IFMT) != IFSHAD);
98 ASSERT((ip->i_mode & IFMT) != IFATTRDIR);
99 ASSERT(ip->i_mode);
100
101 if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) {
102 return (NULL);
103 }
104 dqp = xdqp;
105 mutex_enter(&dqp->dq_lock);
106 ASSERT(ip->i_uid == dqp->dq_uid);
107
108 if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 &&
109 dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) {
110 dqput(dqp);
111 mutex_exit(&dqp->dq_lock);
112 dqp = NULL;
113 } else {
114 mutex_exit(&dqp->dq_lock);
115 }
116 return (dqp);
117 }
118
119 /*
120 * Update disk usage, and take corrective action.
121 */
122 int
chkdq(struct inode * ip,long change,int force,struct cred * cr,char ** uerrp,size_t * lenp)123 chkdq(struct inode *ip, long change, int force, struct cred *cr,
124 char **uerrp, size_t *lenp)
125 {
126 struct dquot *dqp;
127 uint64_t ncurblocks;
128 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
129 int error = 0;
130 long abs_change;
131 char *msg1 =
132 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n";
133 char *msg2 =
134 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n";
135 char *msg3 =
136 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n";
137 char *msg4 =
138 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n";
139 char *errmsg = NULL;
140 time_t now;
141
142 /*
143 * Shadow inodes do not need to hold the vfs_dqrwlock lock.
144 */
145 ASSERT((ip->i_mode & IFMT) == IFSHAD ||
146 RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
147 ASSERT(RW_WRITE_HELD(&ip->i_contents));
148
149 if (change == 0)
150 return (0);
151 dqp = ip->i_dquot;
152
153 /*
154 * Make sure the quota info record matches the owner.
155 */
156 ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid);
157
158 #ifdef DEBUG
159 /*
160 * Shadow inodes and extended attribute directories
161 * should not have quota info records.
162 */
163 if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) {
164 ASSERT(dqp == NULL);
165 }
166 /*
167 * Paranoia for verifying that quotas are okay.
168 */
169 else {
170 struct dquot *expect_dq;
171 int mismatch_ok = 0;
172
173 /* Get current quota information */
174 expect_dq = getinoquota(ip);
175 /*
176 * We got NULL back from getinoquota(), but there is
177 * no error code return from that interface and some
178 * errors are "ok" because we may be testing via error
179 * injection. If this is not the quota inode then we
180 * use getdiskquota() to see if there is an error and
181 * if the error is ok.
182 */
183 if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) {
184 int error;
185 struct dquot *xdqp;
186
187 error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0,
188 &xdqp);
189 switch (error) {
190 /*
191 * Either the error was transient or the quota
192 * info record has no limits which gets optimized
193 * out by getinoquota().
194 */
195 case 0:
196 if (xdqp->dq_fhardlimit == 0 &&
197 xdqp->dq_fsoftlimit == 0 &&
198 xdqp->dq_bhardlimit == 0 &&
199 xdqp->dq_bsoftlimit == 0) {
200 mutex_enter(&xdqp->dq_lock);
201 dqput(xdqp);
202 mutex_exit(&xdqp->dq_lock);
203 } else {
204 expect_dq = xdqp;
205 }
206 break;
207
208 case ESRCH: /* quotas are not enabled */
209 case EINVAL: /* error flag set on cached record */
210 case EUSERS: /* quota table is full */
211 case EIO: /* I/O error */
212 mismatch_ok = 1;
213 break;
214 }
215 }
216
217 /*
218 * Make sure dqp and the current quota info agree.
219 * The first part of the #ifndef is the quick way to
220 * do the check and should be part of the standard
221 * DEBUG code. The #else part is useful if you are
222 * actually chasing an inconsistency and don't want
223 * to have to look at stack frames to figure which
224 * variable has what value.
225 */
226 #ifndef CHASE_QUOTA
227 ASSERT(mismatch_ok || dqp == expect_dq);
228 #else /* CHASE_QUOTA */
229 if (expect_dq == NULL) {
230 /*
231 * If you hit this ASSERT() you know that quota
232 * subsystem does not expect quota info for this
233 * inode, but the inode has it.
234 */
235 ASSERT(mismatch_ok || dqp == NULL);
236 } else {
237 /*
238 * If you hit this ASSERT() you know that quota
239 * subsystem expects quota info for this inode,
240 * but the inode does not have it.
241 */
242 ASSERT(dqp);
243 /*
244 * If you hit this ASSERT() you know that quota
245 * subsystem expects quota info for this inode
246 * and the inode has quota info, but the two
247 * quota info pointers are not the same.
248 */
249 ASSERT(dqp == expect_dq);
250 }
251 #endif /* !CHASE_QUOTA */
252 /*
253 * Release for getinoquota() above or getdiskquota()
254 * call when error is transient.
255 */
256 if (expect_dq) {
257 mutex_enter(&expect_dq->dq_lock);
258 dqput(expect_dq);
259 mutex_exit(&expect_dq->dq_lock);
260 }
261 }
262 #endif /* DEBUG */
263
264 /*
265 * Shadow inodes and extended attribute directories
266 * do not have quota info records.
267 */
268 if (dqp == NULL)
269 return (0);
270 /*
271 * Quotas are not enabled on this file system so there is nothing
272 * more to do.
273 */
274 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
275 return (0);
276 }
277 mutex_enter(&dqp->dq_lock);
278 if (change < 0) {
279 dqp->dq_flags |= DQ_MOD;
280 abs_change = -change; /* abs_change must be positive */
281 if (dqp->dq_curblocks < abs_change)
282 dqp->dq_curblocks = 0;
283 else
284 dqp->dq_curblocks += change;
285 if (dqp->dq_curblocks < dqp->dq_bsoftlimit)
286 dqp->dq_btimelimit = 0;
287 dqp->dq_flags &= ~DQ_BLKS;
288 TRANS_QUOTA(dqp);
289 mutex_exit(&dqp->dq_lock);
290 return (0);
291 }
292
293 /*
294 * Adding 'change' to dq_curblocks could cause an overflow.
295 * So store the result in a 64-bit variable and check for
296 * overflow below.
297 */
298 ncurblocks = (uint64_t)dqp->dq_curblocks + change;
299
300 /*
301 * Allocation. Check hard and soft limits.
302 * Skip checks for uid 0 owned files.
303 * This check used to require both euid and ip->i_uid
304 * to be 0; but there are no quotas for uid 0 so
305 * it really doesn't matter who is writing to the
306 * root owned file. And even root cannot write
307 * past a user's quota limit.
308 */
309 if (ip->i_uid == 0)
310 goto out;
311
312 /*
313 * Disallow allocation if it would bring the current usage over
314 * the hard limit or if the user is over their soft limit and their
315 * time has run out.
316 */
317 if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit &&
318 !force) {
319 /* If the user was not informed yet and the caller */
320 /* is the owner of the file */
321 if ((dqp->dq_flags & DQ_BLKS) == 0 &&
322 ip->i_uid == crgetruid(cr)) {
323 errmsg = msg1;
324 dqp->dq_flags |= DQ_BLKS;
325 }
326 error = EDQUOT;
327 goto out;
328 }
329 if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) {
330 now = gethrestime_sec();
331 if (dqp->dq_curblocks < dqp->dq_bsoftlimit ||
332 dqp->dq_btimelimit == 0) {
333 dqp->dq_flags |= DQ_MOD;
334 dqp->dq_btimelimit = now +
335 ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data)
336 ->vfs_btimelimit;
337 if (ip->i_uid == crgetruid(cr)) {
338 errmsg = msg2;
339 }
340 } else if (now > dqp->dq_btimelimit && !force) {
341 /* If the user was not informed yet and the */
342 /* caller is the owner of the file */
343 if ((dqp->dq_flags & DQ_BLKS) == 0 &&
344 ip->i_uid == crgetruid(cr)) {
345 errmsg = msg3;
346 dqp->dq_flags |= DQ_BLKS;
347 }
348 error = EDQUOT;
349 }
350 }
351 out:
352 if (error == 0) {
353 dqp->dq_flags |= DQ_MOD;
354 /*
355 * ncurblocks can be bigger than the maximum
356 * number that can be represented in 32-bits.
357 * When copying ncurblocks to dq_curblocks
358 * (an unsigned 32-bit quantity), make sure there
359 * is no overflow. The only way this can happen
360 * is if "force" is set. Otherwise, this allocation
361 * would have exceeded the hard limit check above
362 * (since the hard limit is a 32-bit quantity).
363 */
364 if (ncurblocks > 0xffffffffLL) {
365 dqp->dq_curblocks = 0xffffffff;
366 errmsg = msg4;
367 } else {
368 dqp->dq_curblocks = ncurblocks;
369 }
370 }
371
372 if (dqp->dq_flags & DQ_MOD)
373 TRANS_QUOTA(dqp);
374
375 mutex_exit(&dqp->dq_lock);
376 /*
377 * Check for any error messages to be sent
378 */
379 if (errmsg != NULL) {
380 /*
381 * Send message to the error log.
382 */
383 if (uerrp != NULL) {
384 /*
385 * Set up message caller should send to user;
386 * gets copied to the message buffer as a side-
387 * effect of the caller's uprintf().
388 */
389 *lenp = strlen(errmsg) + 20 + 20 +
390 strlen(ip->i_fs->fs_fsmnt) + 1;
391 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
392 if (*uerrp != NULL) {
393 /* errmsg+1 => skip leading ! */
394 (void) sprintf(*uerrp, errmsg+1,
395 (int)ttoproc(curthread)->p_pid,
396 (int)ip->i_uid, (int)ip->i_number,
397 ip->i_fs->fs_fsmnt);
398 }
399 } else {
400 /*
401 * Caller doesn't care, so just copy to the
402 * message buffer.
403 */
404 cmn_err(CE_NOTE, errmsg,
405 (int)ttoproc(curthread)->p_pid,
406 (int)ip->i_uid, (int)ip->i_number,
407 ip->i_fs->fs_fsmnt);
408 }
409 }
410 return (error);
411 }
412
413 /*
414 * Check the inode limit, applying corrective action.
415 */
416 int
chkiq(struct ufsvfs * ufsvfsp,int change,struct inode * ip,uid_t uid,int force,struct cred * cr,char ** uerrp,size_t * lenp)417 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid,
418 int force, struct cred *cr, char **uerrp, size_t *lenp)
419 {
420 struct dquot *dqp, *xdqp;
421 unsigned int ncurfiles;
422 char *errmsg = NULL;
423 char *err1 =
424 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n";
425 char *err2 =
426 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n";
427 char *err3 =
428 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n";
429 int error = 0;
430 time_t now;
431
432 ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock));
433 /*
434 * Change must be either a single increment or decrement.
435 * If change is an increment, then ip must be NULL.
436 */
437 ASSERT(change == 1 || change == -1);
438 ASSERT(change != 1 || ip == NULL);
439
440 /*
441 * Quotas are not enabled so bail out now.
442 */
443 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
444 return (0);
445 }
446
447 /*
448 * Free a specific inode.
449 */
450 if (change == -1 && ip) {
451 dqp = ip->i_dquot;
452 /*
453 * Shadow inodes and extended attribute directories
454 * do not have quota info records.
455 */
456 if (dqp == NULL)
457 return (0);
458 mutex_enter(&dqp->dq_lock);
459 if (dqp->dq_curfiles) {
460 dqp->dq_curfiles--;
461 dqp->dq_flags |= DQ_MOD;
462 }
463 if (dqp->dq_curfiles < dqp->dq_fsoftlimit) {
464 dqp->dq_ftimelimit = 0;
465 dqp->dq_flags |= DQ_MOD;
466 }
467 dqp->dq_flags &= ~DQ_FILES;
468 if (dqp->dq_flags & DQ_MOD)
469 TRANS_QUOTA(dqp);
470 mutex_exit(&dqp->dq_lock);
471 return (0);
472 }
473
474 /*
475 * Allocation or deallocation without a specific inode.
476 * Get dquot for for uid, fs.
477 */
478 if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) {
479 return (0);
480 }
481 dqp = xdqp;
482 mutex_enter(&dqp->dq_lock);
483 if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) {
484 dqput(dqp);
485 mutex_exit(&dqp->dq_lock);
486 return (0);
487 }
488
489 /*
490 * Skip checks for uid 0 owned files.
491 * This check used to require both euid and uid
492 * to be 0; but there are no quotas for uid 0 so
493 * it really doesn't matter who is writing to the
494 * root owned file. And even root can not write
495 * past the user's quota limit.
496 */
497 if (uid == 0)
498 goto out;
499
500 /*
501 * Theoretically, this could overflow, but in practice, it
502 * won't. Multi-terabyte file systems are required to have an
503 * nbpi value of at least 1MB. In order to overflow this
504 * field, there would have to be 2^32 inodes in the file.
505 * That would imply a file system of 2^32 * 1MB, which is
506 * 2^(32 + 20), which is 4096 terabytes, which is not
507 * contemplated for ufs any time soon.
508 */
509 ncurfiles = dqp->dq_curfiles + change;
510
511 /*
512 * Dissallow allocation if it would bring the current usage over
513 * the hard limit or if the user is over their soft limit and their
514 * time has run out.
515 */
516 if (change == 1 && ncurfiles >= dqp->dq_fhardlimit &&
517 dqp->dq_fhardlimit && !force) {
518 /* If the user was not informed yet and the caller */
519 /* is the owner of the file */
520 if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) {
521 errmsg = err1;
522 dqp->dq_flags |= DQ_FILES;
523 }
524 error = EDQUOT;
525 } else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit &&
526 dqp->dq_fsoftlimit) {
527 now = gethrestime_sec();
528 if (ncurfiles == dqp->dq_fsoftlimit ||
529 dqp->dq_ftimelimit == 0) {
530 dqp->dq_flags |= DQ_MOD;
531 dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit;
532 /* If the caller owns the file */
533 if (uid == crgetruid(cr))
534 errmsg = err2;
535 } else if (now > dqp->dq_ftimelimit && !force) {
536 /* If the user was not informed yet and the */
537 /* caller is the owner of the file */
538 if ((dqp->dq_flags & DQ_FILES) == 0 &&
539 uid == crgetruid(cr)) {
540 errmsg = err3;
541 dqp->dq_flags |= DQ_FILES;
542 }
543 error = EDQUOT;
544 }
545 }
546 out:
547 if (error == 0) {
548 dqp->dq_flags |= DQ_MOD;
549 dqp->dq_curfiles += change;
550 }
551 if (dqp->dq_flags & DQ_MOD)
552 TRANS_QUOTA(dqp);
553 dqput(dqp);
554 mutex_exit(&dqp->dq_lock);
555 /*
556 * Check for any error messages to be sent
557 */
558 if (errmsg != NULL) {
559 /*
560 * Send message to the error log.
561 */
562 if (uerrp != NULL) {
563 /*
564 * Set up message caller should send to user;
565 * gets copied to the message buffer as a side-
566 * effect of the caller's uprintf().
567 */
568 *lenp = strlen(errmsg) + 20 + 20 +
569 strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1;
570 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
571 if (*uerrp != NULL) {
572 /* errmsg+1 => skip leading ! */
573 (void) sprintf(*uerrp, errmsg+1,
574 (int)ttoproc(curthread)->p_pid,
575 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
576 }
577 } else {
578 /*
579 * Caller doesn't care, so just copy to the
580 * message buffer.
581 */
582 cmn_err(CE_NOTE, errmsg,
583 (int)ttoproc(curthread)->p_pid,
584 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
585 }
586 }
587 return (error);
588 }
589
590 /*
591 * Release a dquot.
592 */
593 void
dqrele(struct dquot * dqp)594 dqrele(struct dquot *dqp)
595 {
596 /*
597 * Shadow inodes and extended attribute directories
598 * do not have quota info records.
599 */
600 if (dqp != NULL) {
601 mutex_enter(&dqp->dq_lock);
602 if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD)
603 dqupdate(dqp);
604 dqput(dqp);
605 mutex_exit(&dqp->dq_lock);
606 }
607 }
608