1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39
40 #pragma ident "%Z%%M% %I% %E% SMI"
41
42 /*
43 * Routines used in checking limits on file system usage.
44 */
45
46 #include <sys/types.h>
47 #include <sys/t_lock.h>
48 #include <sys/param.h>
49 #include <sys/time.h>
50 #include <sys/systm.h>
51 #include <sys/kmem.h>
52 #include <sys/signal.h>
53 #include <sys/cred.h>
54 #include <sys/proc.h>
55 #include <sys/user.h>
56 #include <sys/proc.h>
57 #include <sys/vfs.h>
58 #include <sys/vnode.h>
59 #include <sys/buf.h>
60 #include <sys/uio.h>
61 #include <sys/fs/ufs_inode.h>
62 #include <sys/fs/ufs_fs.h>
63 #include <sys/fs/ufs_quota.h>
64 #include <sys/errno.h>
65 #include <sys/cmn_err.h>
66 #include <sys/session.h>
67 #include <sys/debug.h>
68
69 /*
70 * Find the dquot structure that should
71 * be used in checking i/o on inode ip.
72 */
73 struct dquot *
getinoquota(struct inode * ip)74 getinoquota(struct inode *ip)
75 {
76 struct dquot *dqp, *xdqp;
77 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
78
79 ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
80 ASSERT(RW_WRITE_HELD(&ip->i_contents));
81 /*
82 * Check for quotas enabled.
83 */
84 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
85 return (NULL);
86 }
87
88 /*
89 * Check for someone doing I/O to quota file.
90 */
91 if (ip == ufsvfsp->vfs_qinod) {
92 return (NULL);
93 }
94
95 /*
96 * Check for a legal inode, e.g. not a shadow inode,
97 * not a extended attribute directory inode and a valid mode.
98 */
99 ASSERT((ip->i_mode & IFMT) != IFSHAD);
100 ASSERT((ip->i_mode & IFMT) != IFATTRDIR);
101 ASSERT(ip->i_mode);
102
103 if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) {
104 return (NULL);
105 }
106 dqp = xdqp;
107 mutex_enter(&dqp->dq_lock);
108 ASSERT(ip->i_uid == dqp->dq_uid);
109
110 if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 &&
111 dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) {
112 dqput(dqp);
113 mutex_exit(&dqp->dq_lock);
114 dqp = NULL;
115 } else {
116 mutex_exit(&dqp->dq_lock);
117 }
118 return (dqp);
119 }
120
121 /*
122 * Update disk usage, and take corrective action.
123 */
124 int
chkdq(struct inode * ip,long change,int force,struct cred * cr,char ** uerrp,size_t * lenp)125 chkdq(struct inode *ip, long change, int force, struct cred *cr,
126 char **uerrp, size_t *lenp)
127 {
128 struct dquot *dqp;
129 uint64_t ncurblocks;
130 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
131 int error = 0;
132 long abs_change;
133 char *msg1 =
134 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n";
135 char *msg2 =
136 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n";
137 char *msg3 =
138 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n";
139 char *msg4 =
140 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n";
141 char *errmsg = NULL;
142 time_t now;
143
144 /*
145 * Shadow inodes do not need to hold the vfs_dqrwlock lock.
146 */
147 ASSERT((ip->i_mode & IFMT) == IFSHAD ||
148 RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
149 ASSERT(RW_WRITE_HELD(&ip->i_contents));
150
151 if (change == 0)
152 return (0);
153 dqp = ip->i_dquot;
154
155 /*
156 * Make sure the quota info record matches the owner.
157 */
158 ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid);
159
160 #ifdef DEBUG
161 /*
162 * Shadow inodes and extended attribute directories
163 * should not have quota info records.
164 */
165 if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) {
166 ASSERT(dqp == NULL);
167 }
168 /*
169 * Paranoia for verifying that quotas are okay.
170 */
171 else {
172 struct dquot *expect_dq;
173 int mismatch_ok = 0;
174
175 /* Get current quota information */
176 expect_dq = getinoquota(ip);
177 /*
178 * We got NULL back from getinoquota(), but there is
179 * no error code return from that interface and some
180 * errors are "ok" because we may be testing via error
181 * injection. If this is not the quota inode then we
182 * use getdiskquota() to see if there is an error and
183 * if the error is ok.
184 */
185 if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) {
186 int error;
187 struct dquot *xdqp;
188
189 error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0,
190 &xdqp);
191 switch (error) {
192 /*
193 * Either the error was transient or the quota
194 * info record has no limits which gets optimized
195 * out by getinoquota().
196 */
197 case 0:
198 if (xdqp->dq_fhardlimit == 0 &&
199 xdqp->dq_fsoftlimit == 0 &&
200 xdqp->dq_bhardlimit == 0 &&
201 xdqp->dq_bsoftlimit == 0) {
202 mutex_enter(&xdqp->dq_lock);
203 dqput(xdqp);
204 mutex_exit(&xdqp->dq_lock);
205 } else {
206 expect_dq = xdqp;
207 }
208 break;
209
210 case ESRCH: /* quotas are not enabled */
211 case EINVAL: /* error flag set on cached record */
212 case EUSERS: /* quota table is full */
213 case EIO: /* I/O error */
214 mismatch_ok = 1;
215 break;
216 }
217 }
218
219 /*
220 * Make sure dqp and the current quota info agree.
221 * The first part of the #ifndef is the quick way to
222 * do the check and should be part of the standard
223 * DEBUG code. The #else part is useful if you are
224 * actually chasing an inconsistency and don't want
225 * to have to look at stack frames to figure which
226 * variable has what value.
227 */
228 #ifndef CHASE_QUOTA
229 ASSERT(mismatch_ok || dqp == expect_dq);
230 #else /* CHASE_QUOTA */
231 if (expect_dq == NULL) {
232 /*
233 * If you hit this ASSERT() you know that quota
234 * subsystem does not expect quota info for this
235 * inode, but the inode has it.
236 */
237 ASSERT(mismatch_ok || dqp == NULL);
238 } else {
239 /*
240 * If you hit this ASSERT() you know that quota
241 * subsystem expects quota info for this inode,
242 * but the inode does not have it.
243 */
244 ASSERT(dqp);
245 /*
246 * If you hit this ASSERT() you know that quota
247 * subsystem expects quota info for this inode
248 * and the inode has quota info, but the two
249 * quota info pointers are not the same.
250 */
251 ASSERT(dqp == expect_dq);
252 }
253 #endif /* !CHASE_QUOTA */
254 /*
255 * Release for getinoquota() above or getdiskquota()
256 * call when error is transient.
257 */
258 if (expect_dq) {
259 mutex_enter(&expect_dq->dq_lock);
260 dqput(expect_dq);
261 mutex_exit(&expect_dq->dq_lock);
262 }
263 }
264 #endif /* DEBUG */
265
266 /*
267 * Shadow inodes and extended attribute directories
268 * do not have quota info records.
269 */
270 if (dqp == NULL)
271 return (0);
272 /*
273 * Quotas are not enabled on this file system so there is nothing
274 * more to do.
275 */
276 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
277 return (0);
278 }
279 mutex_enter(&dqp->dq_lock);
280 if (change < 0) {
281 dqp->dq_flags |= DQ_MOD;
282 abs_change = -change; /* abs_change must be positive */
283 if (dqp->dq_curblocks < abs_change)
284 dqp->dq_curblocks = 0;
285 else
286 dqp->dq_curblocks += change;
287 if (dqp->dq_curblocks < dqp->dq_bsoftlimit)
288 dqp->dq_btimelimit = 0;
289 dqp->dq_flags &= ~DQ_BLKS;
290 TRANS_QUOTA(dqp);
291 mutex_exit(&dqp->dq_lock);
292 return (0);
293 }
294
295 /*
296 * Adding 'change' to dq_curblocks could cause an overflow.
297 * So store the result in a 64-bit variable and check for
298 * overflow below.
299 */
300 ncurblocks = (uint64_t)dqp->dq_curblocks + change;
301
302 /*
303 * Allocation. Check hard and soft limits.
304 * Skip checks for uid 0 owned files.
305 * This check used to require both euid and ip->i_uid
306 * to be 0; but there are no quotas for uid 0 so
307 * it really doesn't matter who is writing to the
308 * root owned file. And even root cannot write
309 * past a user's quota limit.
310 */
311 if (ip->i_uid == 0)
312 goto out;
313
314 /*
315 * Disallow allocation if it would bring the current usage over
316 * the hard limit or if the user is over his soft limit and his time
317 * has run out.
318 */
319 if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit &&
320 !force) {
321 /* If the user was not informed yet and the caller */
322 /* is the owner of the file */
323 if ((dqp->dq_flags & DQ_BLKS) == 0 &&
324 ip->i_uid == crgetruid(cr)) {
325 errmsg = msg1;
326 dqp->dq_flags |= DQ_BLKS;
327 }
328 error = EDQUOT;
329 goto out;
330 }
331 if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) {
332 now = gethrestime_sec();
333 if (dqp->dq_curblocks < dqp->dq_bsoftlimit ||
334 dqp->dq_btimelimit == 0) {
335 dqp->dq_flags |= DQ_MOD;
336 dqp->dq_btimelimit = now +
337 ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data)
338 ->vfs_btimelimit;
339 if (ip->i_uid == crgetruid(cr)) {
340 errmsg = msg2;
341 }
342 } else if (now > dqp->dq_btimelimit && !force) {
343 /* If the user was not informed yet and the */
344 /* caller is the owner of the file */
345 if ((dqp->dq_flags & DQ_BLKS) == 0 &&
346 ip->i_uid == crgetruid(cr)) {
347 errmsg = msg3;
348 dqp->dq_flags |= DQ_BLKS;
349 }
350 error = EDQUOT;
351 }
352 }
353 out:
354 if (error == 0) {
355 dqp->dq_flags |= DQ_MOD;
356 /*
357 * ncurblocks can be bigger than the maximum
358 * number that can be represented in 32-bits.
359 * When copying ncurblocks to dq_curblocks
360 * (an unsigned 32-bit quantity), make sure there
361 * is no overflow. The only way this can happen
362 * is if "force" is set. Otherwise, this allocation
363 * would have exceeded the hard limit check above
364 * (since the hard limit is a 32-bit quantity).
365 */
366 if (ncurblocks > 0xffffffffLL) {
367 dqp->dq_curblocks = 0xffffffff;
368 errmsg = msg4;
369 } else {
370 dqp->dq_curblocks = ncurblocks;
371 }
372 }
373
374 if (dqp->dq_flags & DQ_MOD)
375 TRANS_QUOTA(dqp);
376
377 mutex_exit(&dqp->dq_lock);
378 /*
379 * Check for any error messages to be sent
380 */
381 if (errmsg != NULL) {
382 /*
383 * Send message to the error log.
384 */
385 if (uerrp != NULL) {
386 /*
387 * Set up message caller should send to user;
388 * gets copied to the message buffer as a side-
389 * effect of the caller's uprintf().
390 */
391 *lenp = strlen(errmsg) + 20 + 20 +
392 strlen(ip->i_fs->fs_fsmnt) + 1;
393 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
394 if (*uerrp != NULL) {
395 /* errmsg+1 => skip leading ! */
396 (void) sprintf(*uerrp, errmsg+1,
397 (int)ttoproc(curthread)->p_pid,
398 (int)ip->i_uid, (int)ip->i_number,
399 ip->i_fs->fs_fsmnt);
400 }
401 } else {
402 /*
403 * Caller doesn't care, so just copy to the
404 * message buffer.
405 */
406 cmn_err(CE_NOTE, errmsg,
407 (int)ttoproc(curthread)->p_pid,
408 (int)ip->i_uid, (int)ip->i_number,
409 ip->i_fs->fs_fsmnt);
410 }
411 }
412 return (error);
413 }
414
415 /*
416 * Check the inode limit, applying corrective action.
417 */
418 int
chkiq(struct ufsvfs * ufsvfsp,int change,struct inode * ip,uid_t uid,int force,struct cred * cr,char ** uerrp,size_t * lenp)419 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid,
420 int force, struct cred *cr, char **uerrp, size_t *lenp)
421 {
422 struct dquot *dqp, *xdqp;
423 unsigned int ncurfiles;
424 char *errmsg = NULL;
425 char *err1 =
426 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n";
427 char *err2 =
428 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n";
429 char *err3 =
430 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n";
431 int error = 0;
432 time_t now;
433
434 ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock));
435 /*
436 * Change must be either a single increment or decrement.
437 * If change is an increment, then ip must be NULL.
438 */
439 ASSERT(change == 1 || change == -1);
440 ASSERT(change != 1 || ip == NULL);
441
442 /*
443 * Quotas are not enabled so bail out now.
444 */
445 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
446 return (0);
447 }
448
449 /*
450 * Free a specific inode.
451 */
452 if (change == -1 && ip) {
453 dqp = ip->i_dquot;
454 /*
455 * Shadow inodes and extended attribute directories
456 * do not have quota info records.
457 */
458 if (dqp == NULL)
459 return (0);
460 mutex_enter(&dqp->dq_lock);
461 if (dqp->dq_curfiles) {
462 dqp->dq_curfiles--;
463 dqp->dq_flags |= DQ_MOD;
464 }
465 if (dqp->dq_curfiles < dqp->dq_fsoftlimit) {
466 dqp->dq_ftimelimit = 0;
467 dqp->dq_flags |= DQ_MOD;
468 }
469 dqp->dq_flags &= ~DQ_FILES;
470 if (dqp->dq_flags & DQ_MOD)
471 TRANS_QUOTA(dqp);
472 mutex_exit(&dqp->dq_lock);
473 return (0);
474 }
475
476 /*
477 * Allocation or deallocation without a specific inode.
478 * Get dquot for for uid, fs.
479 */
480 if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) {
481 return (0);
482 }
483 dqp = xdqp;
484 mutex_enter(&dqp->dq_lock);
485 if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) {
486 dqput(dqp);
487 mutex_exit(&dqp->dq_lock);
488 return (0);
489 }
490
491 /*
492 * Skip checks for uid 0 owned files.
493 * This check used to require both euid and uid
494 * to be 0; but there are no quotas for uid 0 so
495 * it really doesn't matter who is writing to the
496 * root owned file. And even root can not write
497 * past the user's quota limit.
498 */
499 if (uid == 0)
500 goto out;
501
502 /*
503 * Theoretically, this could overflow, but in practice, it
504 * won't. Multi-terabyte file systems are required to have an
505 * nbpi value of at least 1MB. In order to overflow this
506 * field, there would have to be 2^32 inodes in the file.
507 * That would imply a file system of 2^32 * 1MB, which is
508 * 2^(32 + 20), which is 4096 terabytes, which is not
509 * contemplated for ufs any time soon.
510 */
511 ncurfiles = dqp->dq_curfiles + change;
512
513 /*
514 * Dissallow allocation if it would bring the current usage over
515 * the hard limit or if the user is over his soft limit and his time
516 * has run out.
517 */
518 if (change == 1 && ncurfiles >= dqp->dq_fhardlimit &&
519 dqp->dq_fhardlimit && !force) {
520 /* If the user was not informed yet and the caller */
521 /* is the owner of the file */
522 if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) {
523 errmsg = err1;
524 dqp->dq_flags |= DQ_FILES;
525 }
526 error = EDQUOT;
527 } else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit &&
528 dqp->dq_fsoftlimit) {
529 now = gethrestime_sec();
530 if (ncurfiles == dqp->dq_fsoftlimit ||
531 dqp->dq_ftimelimit == 0) {
532 dqp->dq_flags |= DQ_MOD;
533 dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit;
534 /* If the caller owns the file */
535 if (uid == crgetruid(cr))
536 errmsg = err2;
537 } else if (now > dqp->dq_ftimelimit && !force) {
538 /* If the user was not informed yet and the */
539 /* caller is the owner of the file */
540 if ((dqp->dq_flags & DQ_FILES) == 0 &&
541 uid == crgetruid(cr)) {
542 errmsg = err3;
543 dqp->dq_flags |= DQ_FILES;
544 }
545 error = EDQUOT;
546 }
547 }
548 out:
549 if (error == 0) {
550 dqp->dq_flags |= DQ_MOD;
551 dqp->dq_curfiles += change;
552 }
553 if (dqp->dq_flags & DQ_MOD)
554 TRANS_QUOTA(dqp);
555 dqput(dqp);
556 mutex_exit(&dqp->dq_lock);
557 /*
558 * Check for any error messages to be sent
559 */
560 if (errmsg != NULL) {
561 /*
562 * Send message to the error log.
563 */
564 if (uerrp != NULL) {
565 /*
566 * Set up message caller should send to user;
567 * gets copied to the message buffer as a side-
568 * effect of the caller's uprintf().
569 */
570 *lenp = strlen(errmsg) + 20 + 20 +
571 strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1;
572 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
573 if (*uerrp != NULL) {
574 /* errmsg+1 => skip leading ! */
575 (void) sprintf(*uerrp, errmsg+1,
576 (int)ttoproc(curthread)->p_pid,
577 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
578 }
579 } else {
580 /*
581 * Caller doesn't care, so just copy to the
582 * message buffer.
583 */
584 cmn_err(CE_NOTE, errmsg,
585 (int)ttoproc(curthread)->p_pid,
586 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
587 }
588 }
589 return (error);
590 }
591
592 /*
593 * Release a dquot.
594 */
595 void
dqrele(struct dquot * dqp)596 dqrele(struct dquot *dqp)
597 {
598 /*
599 * Shadow inodes and extended attribute directories
600 * do not have quota info records.
601 */
602 if (dqp != NULL) {
603 mutex_enter(&dqp->dq_lock);
604 if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD)
605 dqupdate(dqp);
606 dqput(dqp);
607 mutex_exit(&dqp->dq_lock);
608 }
609 }
610