1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 40 #pragma ident "%Z%%M% %I% %E% SMI" 41 42 /* 43 * Routines used in checking limits on file system usage. 44 */ 45 46 #include <sys/types.h> 47 #include <sys/t_lock.h> 48 #include <sys/param.h> 49 #include <sys/time.h> 50 #include <sys/systm.h> 51 #include <sys/kmem.h> 52 #include <sys/signal.h> 53 #include <sys/cred.h> 54 #include <sys/proc.h> 55 #include <sys/user.h> 56 #include <sys/proc.h> 57 #include <sys/vfs.h> 58 #include <sys/vnode.h> 59 #include <sys/buf.h> 60 #include <sys/uio.h> 61 #include <sys/fs/ufs_inode.h> 62 #include <sys/fs/ufs_fs.h> 63 #include <sys/fs/ufs_quota.h> 64 #include <sys/errno.h> 65 #include <sys/cmn_err.h> 66 #include <sys/session.h> 67 #include <sys/debug.h> 68 69 /* 70 * Find the dquot structure that should 71 * be used in checking i/o on inode ip. 72 */ 73 struct dquot * 74 getinoquota(struct inode *ip) 75 { 76 struct dquot *dqp, *xdqp; 77 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 78 79 ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 80 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 81 /* 82 * Check for quotas enabled. 83 */ 84 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 85 return (NULL); 86 } 87 88 /* 89 * Check for someone doing I/O to quota file. 90 */ 91 if (ip == ufsvfsp->vfs_qinod) { 92 return (NULL); 93 } 94 95 /* 96 * Check for a legal inode, e.g. not a shadow inode, 97 * not a extended attribute directory inode and a valid mode. 98 */ 99 ASSERT((ip->i_mode & IFMT) != IFSHAD); 100 ASSERT((ip->i_mode & IFMT) != IFATTRDIR); 101 ASSERT(ip->i_mode); 102 103 if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) { 104 return (NULL); 105 } 106 dqp = xdqp; 107 mutex_enter(&dqp->dq_lock); 108 ASSERT(ip->i_uid == dqp->dq_uid); 109 110 if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 && 111 dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) { 112 dqput(dqp); 113 mutex_exit(&dqp->dq_lock); 114 dqp = NULL; 115 } else { 116 mutex_exit(&dqp->dq_lock); 117 } 118 return (dqp); 119 } 120 121 /* 122 * Update disk usage, and take corrective action. 123 */ 124 int 125 chkdq(struct inode *ip, long change, int force, struct cred *cr, 126 char **uerrp, size_t *lenp) 127 { 128 struct dquot *dqp; 129 uint64_t ncurblocks; 130 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 131 int error = 0; 132 long abs_change; 133 char *msg1 = 134 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n"; 135 char *msg2 = 136 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n"; 137 char *msg3 = 138 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n"; 139 char *msg4 = 140 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n"; 141 char *errmsg = NULL; 142 time_t now; 143 144 /* 145 * Shadow inodes do not need to hold the vfs_dqrwlock lock. 146 */ 147 ASSERT((ip->i_mode & IFMT) == IFSHAD || 148 RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 149 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 150 151 if (change == 0) 152 return (0); 153 dqp = ip->i_dquot; 154 155 /* 156 * Make sure the quota info record matches the owner. 157 */ 158 ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid); 159 160 #ifdef DEBUG 161 /* 162 * Shadow inodes and extended attribute directories 163 * should not have quota info records. 164 */ 165 if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) { 166 ASSERT(dqp == NULL); 167 } 168 /* 169 * Paranoia for verifying that quotas are okay. 170 */ 171 else { 172 struct dquot *expect_dq; 173 int mismatch_ok = 0; 174 175 /* Get current quota information */ 176 expect_dq = getinoquota(ip); 177 /* 178 * We got NULL back from getinoquota(), but there is 179 * no error code return from that interface and some 180 * errors are "ok" because we may be testing via error 181 * injection. If this is not the quota inode then we 182 * use getdiskquota() to see if there is an error and 183 * if the error is ok. 184 */ 185 if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) { 186 int error; 187 struct dquot *xdqp; 188 189 error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, 190 &xdqp); 191 switch (error) { 192 /* 193 * Either the error was transient or the quota 194 * info record has no limits which gets optimized 195 * out by getinoquota(). 196 */ 197 case 0: 198 if (xdqp->dq_fhardlimit == 0 && 199 xdqp->dq_fsoftlimit == 0 && 200 xdqp->dq_bhardlimit == 0 && 201 xdqp->dq_bsoftlimit == 0) { 202 mutex_enter(&xdqp->dq_lock); 203 dqput(xdqp); 204 mutex_exit(&xdqp->dq_lock); 205 } else { 206 expect_dq = xdqp; 207 } 208 break; 209 210 case ESRCH: /* quotas are not enabled */ 211 case EINVAL: /* error flag set on cached record */ 212 case EUSERS: /* quota table is full */ 213 case EIO: /* I/O error */ 214 mismatch_ok = 1; 215 break; 216 } 217 } 218 219 /* 220 * Make sure dqp and the current quota info agree. 221 * The first part of the #ifndef is the quick way to 222 * do the check and should be part of the standard 223 * DEBUG code. The #else part is useful if you are 224 * actually chasing an inconsistency and don't want 225 * to have to look at stack frames to figure which 226 * variable has what value. 227 */ 228 #ifndef CHASE_QUOTA 229 ASSERT(mismatch_ok || dqp == expect_dq); 230 #else /* CHASE_QUOTA */ 231 if (expect_dq == NULL) { 232 /* 233 * If you hit this ASSERT() you know that quota 234 * subsystem does not expect quota info for this 235 * inode, but the inode has it. 236 */ 237 ASSERT(mismatch_ok || dqp == NULL); 238 } else { 239 /* 240 * If you hit this ASSERT() you know that quota 241 * subsystem expects quota info for this inode, 242 * but the inode does not have it. 243 */ 244 ASSERT(dqp); 245 /* 246 * If you hit this ASSERT() you know that quota 247 * subsystem expects quota info for this inode 248 * and the inode has quota info, but the two 249 * quota info pointers are not the same. 250 */ 251 ASSERT(dqp == expect_dq); 252 } 253 #endif /* !CHASE_QUOTA */ 254 /* 255 * Release for getinoquota() above or getdiskquota() 256 * call when error is transient. 257 */ 258 if (expect_dq) { 259 mutex_enter(&expect_dq->dq_lock); 260 dqput(expect_dq); 261 mutex_exit(&expect_dq->dq_lock); 262 } 263 } 264 #endif /* DEBUG */ 265 266 /* 267 * Shadow inodes and extended attribute directories 268 * do not have quota info records. 269 */ 270 if (dqp == NULL) 271 return (0); 272 /* 273 * Quotas are not enabled on this file system so there is nothing 274 * more to do. 275 */ 276 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 277 return (0); 278 } 279 mutex_enter(&dqp->dq_lock); 280 if (change < 0) { 281 dqp->dq_flags |= DQ_MOD; 282 abs_change = -change; /* abs_change must be positive */ 283 if (dqp->dq_curblocks < abs_change) 284 dqp->dq_curblocks = 0; 285 else 286 dqp->dq_curblocks += change; 287 if (dqp->dq_curblocks < dqp->dq_bsoftlimit) 288 dqp->dq_btimelimit = 0; 289 dqp->dq_flags &= ~DQ_BLKS; 290 TRANS_QUOTA(dqp); 291 mutex_exit(&dqp->dq_lock); 292 return (0); 293 } 294 295 /* 296 * Adding 'change' to dq_curblocks could cause an overflow. 297 * So store the result in a 64-bit variable and check for 298 * overflow below. 299 */ 300 ncurblocks = (uint64_t)dqp->dq_curblocks + change; 301 302 /* 303 * Allocation. Check hard and soft limits. 304 * Skip checks for uid 0 owned files. 305 * This check used to require both euid and ip->i_uid 306 * to be 0; but there are no quotas for uid 0 so 307 * it really doesn't matter who is writing to the 308 * root owned file. And even root cannot write 309 * past a user's quota limit. 310 */ 311 if (ip->i_uid == 0) 312 goto out; 313 314 /* 315 * Disallow allocation if it would bring the current usage over 316 * the hard limit or if the user is over his soft limit and his time 317 * has run out. 318 */ 319 if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit && 320 !force) { 321 /* If the user was not informed yet and the caller */ 322 /* is the owner of the file */ 323 if ((dqp->dq_flags & DQ_BLKS) == 0 && 324 ip->i_uid == crgetruid(cr)) { 325 errmsg = msg1; 326 dqp->dq_flags |= DQ_BLKS; 327 } 328 error = EDQUOT; 329 goto out; 330 } 331 if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) { 332 now = gethrestime_sec(); 333 if (dqp->dq_curblocks < dqp->dq_bsoftlimit || 334 dqp->dq_btimelimit == 0) { 335 dqp->dq_flags |= DQ_MOD; 336 dqp->dq_btimelimit = now + 337 ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data) 338 ->vfs_btimelimit; 339 if (ip->i_uid == crgetruid(cr)) { 340 errmsg = msg2; 341 } 342 } else if (now > dqp->dq_btimelimit && !force) { 343 /* If the user was not informed yet and the */ 344 /* caller is the owner of the file */ 345 if ((dqp->dq_flags & DQ_BLKS) == 0 && 346 ip->i_uid == crgetruid(cr)) { 347 errmsg = msg3; 348 dqp->dq_flags |= DQ_BLKS; 349 } 350 error = EDQUOT; 351 } 352 } 353 out: 354 if (error == 0) { 355 dqp->dq_flags |= DQ_MOD; 356 /* 357 * ncurblocks can be bigger than the maximum 358 * number that can be represented in 32-bits. 359 * When copying ncurblocks to dq_curblocks 360 * (an unsigned 32-bit quantity), make sure there 361 * is no overflow. The only way this can happen 362 * is if "force" is set. Otherwise, this allocation 363 * would have exceeded the hard limit check above 364 * (since the hard limit is a 32-bit quantity). 365 */ 366 if (ncurblocks > 0xffffffffLL) { 367 dqp->dq_curblocks = 0xffffffff; 368 errmsg = msg4; 369 } else { 370 dqp->dq_curblocks = ncurblocks; 371 } 372 } 373 374 if (dqp->dq_flags & DQ_MOD) 375 TRANS_QUOTA(dqp); 376 377 mutex_exit(&dqp->dq_lock); 378 /* 379 * Check for any error messages to be sent 380 */ 381 if (errmsg != NULL) { 382 /* 383 * Send message to the error log. 384 */ 385 if (uerrp != NULL) { 386 /* 387 * Set up message caller should send to user; 388 * gets copied to the message buffer as a side- 389 * effect of the caller's uprintf(). 390 */ 391 *lenp = strlen(errmsg) + 20 + 20 + 392 strlen(ip->i_fs->fs_fsmnt) + 1; 393 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP); 394 if (*uerrp != NULL) { 395 /* errmsg+1 => skip leading ! */ 396 (void) sprintf(*uerrp, errmsg+1, 397 (int)ttoproc(curthread)->p_pid, 398 (int)ip->i_uid, (int)ip->i_number, 399 ip->i_fs->fs_fsmnt); 400 } 401 } else { 402 /* 403 * Caller doesn't care, so just copy to the 404 * message buffer. 405 */ 406 cmn_err(CE_NOTE, errmsg, 407 (int)ttoproc(curthread)->p_pid, 408 (int)ip->i_uid, (int)ip->i_number, 409 ip->i_fs->fs_fsmnt); 410 } 411 } 412 return (error); 413 } 414 415 /* 416 * Check the inode limit, applying corrective action. 417 */ 418 int 419 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid, 420 int force, struct cred *cr, char **uerrp, size_t *lenp) 421 { 422 struct dquot *dqp, *xdqp; 423 unsigned int ncurfiles; 424 char *errmsg = NULL; 425 char *err1 = 426 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n"; 427 char *err2 = 428 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n"; 429 char *err3 = 430 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n"; 431 int error = 0; 432 time_t now; 433 434 ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock)); 435 /* 436 * Change must be either a single increment or decrement. 437 * If change is an increment, then ip must be NULL. 438 */ 439 ASSERT(change == 1 || change == -1); 440 ASSERT(change != 1 || ip == NULL); 441 442 /* 443 * Quotas are not enabled so bail out now. 444 */ 445 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 446 return (0); 447 } 448 449 /* 450 * Free a specific inode. 451 */ 452 if (change == -1 && ip) { 453 dqp = ip->i_dquot; 454 /* 455 * Shadow inodes and extended attribute directories 456 * do not have quota info records. 457 */ 458 if (dqp == NULL) 459 return (0); 460 mutex_enter(&dqp->dq_lock); 461 if (dqp->dq_curfiles) { 462 dqp->dq_curfiles--; 463 dqp->dq_flags |= DQ_MOD; 464 } 465 if (dqp->dq_curfiles < dqp->dq_fsoftlimit) { 466 dqp->dq_ftimelimit = 0; 467 dqp->dq_flags |= DQ_MOD; 468 } 469 dqp->dq_flags &= ~DQ_FILES; 470 if (dqp->dq_flags & DQ_MOD) 471 TRANS_QUOTA(dqp); 472 mutex_exit(&dqp->dq_lock); 473 return (0); 474 } 475 476 /* 477 * Allocation or deallocation without a specific inode. 478 * Get dquot for for uid, fs. 479 */ 480 if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) { 481 return (0); 482 } 483 dqp = xdqp; 484 mutex_enter(&dqp->dq_lock); 485 if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) { 486 dqput(dqp); 487 mutex_exit(&dqp->dq_lock); 488 return (0); 489 } 490 491 /* 492 * Skip checks for uid 0 owned files. 493 * This check used to require both euid and uid 494 * to be 0; but there are no quotas for uid 0 so 495 * it really doesn't matter who is writing to the 496 * root owned file. And even root can not write 497 * past the user's quota limit. 498 */ 499 if (uid == 0) 500 goto out; 501 502 /* 503 * Theoretically, this could overflow, but in practice, it 504 * won't. Multi-terabyte file systems are required to have an 505 * nbpi value of at least 1MB. In order to overflow this 506 * field, there would have to be 2^32 inodes in the file. 507 * That would imply a file system of 2^32 * 1MB, which is 508 * 2^(32 + 20), which is 4096 terabytes, which is not 509 * contemplated for ufs any time soon. 510 */ 511 ncurfiles = dqp->dq_curfiles + change; 512 513 /* 514 * Dissallow allocation if it would bring the current usage over 515 * the hard limit or if the user is over his soft limit and his time 516 * has run out. 517 */ 518 if (change == 1 && ncurfiles >= dqp->dq_fhardlimit && 519 dqp->dq_fhardlimit && !force) { 520 /* If the user was not informed yet and the caller */ 521 /* is the owner of the file */ 522 if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) { 523 errmsg = err1; 524 dqp->dq_flags |= DQ_FILES; 525 } 526 error = EDQUOT; 527 } else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit && 528 dqp->dq_fsoftlimit) { 529 now = gethrestime_sec(); 530 if (ncurfiles == dqp->dq_fsoftlimit || 531 dqp->dq_ftimelimit == 0) { 532 dqp->dq_flags |= DQ_MOD; 533 dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit; 534 /* If the caller owns the file */ 535 if (uid == crgetruid(cr)) 536 errmsg = err2; 537 } else if (now > dqp->dq_ftimelimit && !force) { 538 /* If the user was not informed yet and the */ 539 /* caller is the owner of the file */ 540 if ((dqp->dq_flags & DQ_FILES) == 0 && 541 uid == crgetruid(cr)) { 542 errmsg = err3; 543 dqp->dq_flags |= DQ_FILES; 544 } 545 error = EDQUOT; 546 } 547 } 548 out: 549 if (error == 0) { 550 dqp->dq_flags |= DQ_MOD; 551 dqp->dq_curfiles += change; 552 } 553 if (dqp->dq_flags & DQ_MOD) 554 TRANS_QUOTA(dqp); 555 dqput(dqp); 556 mutex_exit(&dqp->dq_lock); 557 /* 558 * Check for any error messages to be sent 559 */ 560 if (errmsg != NULL) { 561 /* 562 * Send message to the error log. 563 */ 564 if (uerrp != NULL) { 565 /* 566 * Set up message caller should send to user; 567 * gets copied to the message buffer as a side- 568 * effect of the caller's uprintf(). 569 */ 570 *lenp = strlen(errmsg) + 20 + 20 + 571 strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1; 572 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP); 573 if (*uerrp != NULL) { 574 /* errmsg+1 => skip leading ! */ 575 (void) sprintf(*uerrp, errmsg+1, 576 (int)ttoproc(curthread)->p_pid, 577 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt); 578 } 579 } else { 580 /* 581 * Caller doesn't care, so just copy to the 582 * message buffer. 583 */ 584 cmn_err(CE_NOTE, errmsg, 585 (int)ttoproc(curthread)->p_pid, 586 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt); 587 } 588 } 589 return (error); 590 } 591 592 /* 593 * Release a dquot. 594 */ 595 void 596 dqrele(struct dquot *dqp) 597 { 598 /* 599 * Shadow inodes and extended attribute directories 600 * do not have quota info records. 601 */ 602 if (dqp != NULL) { 603 mutex_enter(&dqp->dq_lock); 604 if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD) 605 dqupdate(dqp); 606 dqput(dqp); 607 mutex_exit(&dqp->dq_lock); 608 } 609 } 610