1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2016 by Delphix. All rights reserved. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 /* 41 * Routines used in checking limits on file system usage. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/t_lock.h> 46 #include <sys/param.h> 47 #include <sys/time.h> 48 #include <sys/systm.h> 49 #include <sys/kmem.h> 50 #include <sys/signal.h> 51 #include <sys/cred.h> 52 #include <sys/proc.h> 53 #include <sys/user.h> 54 #include <sys/proc.h> 55 #include <sys/vfs.h> 56 #include <sys/vnode.h> 57 #include <sys/buf.h> 58 #include <sys/uio.h> 59 #include <sys/fs/ufs_inode.h> 60 #include <sys/fs/ufs_fs.h> 61 #include <sys/fs/ufs_quota.h> 62 #include <sys/errno.h> 63 #include <sys/cmn_err.h> 64 #include <sys/session.h> 65 #include <sys/debug.h> 66 67 /* 68 * Find the dquot structure that should 69 * be used in checking i/o on inode ip. 70 */ 71 struct dquot * 72 getinoquota(struct inode *ip) 73 { 74 struct dquot *dqp, *xdqp; 75 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 76 77 ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 78 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 79 /* 80 * Check for quotas enabled. 81 */ 82 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 83 return (NULL); 84 } 85 86 /* 87 * Check for someone doing I/O to quota file. 88 */ 89 if (ip == ufsvfsp->vfs_qinod) { 90 return (NULL); 91 } 92 93 /* 94 * Check for a legal inode, e.g. not a shadow inode, 95 * not a extended attribute directory inode and a valid mode. 96 */ 97 ASSERT((ip->i_mode & IFMT) != IFSHAD); 98 ASSERT((ip->i_mode & IFMT) != IFATTRDIR); 99 ASSERT(ip->i_mode); 100 101 if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) { 102 return (NULL); 103 } 104 dqp = xdqp; 105 mutex_enter(&dqp->dq_lock); 106 ASSERT(ip->i_uid == dqp->dq_uid); 107 108 if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 && 109 dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) { 110 dqput(dqp); 111 mutex_exit(&dqp->dq_lock); 112 dqp = NULL; 113 } else { 114 mutex_exit(&dqp->dq_lock); 115 } 116 return (dqp); 117 } 118 119 /* 120 * Update disk usage, and take corrective action. 121 */ 122 int 123 chkdq(struct inode *ip, long change, int force, struct cred *cr, 124 char **uerrp, size_t *lenp) 125 { 126 struct dquot *dqp; 127 uint64_t ncurblocks; 128 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 129 int error = 0; 130 long abs_change; 131 char *msg1 = 132 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n"; 133 char *msg2 = 134 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n"; 135 char *msg3 = 136 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n"; 137 char *msg4 = 138 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n"; 139 char *errmsg = NULL; 140 time_t now; 141 142 /* 143 * Shadow inodes do not need to hold the vfs_dqrwlock lock. 144 */ 145 ASSERT((ip->i_mode & IFMT) == IFSHAD || 146 RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 147 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 148 149 if (change == 0) 150 return (0); 151 dqp = ip->i_dquot; 152 153 /* 154 * Make sure the quota info record matches the owner. 155 */ 156 ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid); 157 158 #ifdef DEBUG 159 /* 160 * Shadow inodes and extended attribute directories 161 * should not have quota info records. 162 */ 163 if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) { 164 ASSERT(dqp == NULL); 165 } 166 /* 167 * Paranoia for verifying that quotas are okay. 168 */ 169 else { 170 struct dquot *expect_dq; 171 int mismatch_ok = 0; 172 173 /* Get current quota information */ 174 expect_dq = getinoquota(ip); 175 /* 176 * We got NULL back from getinoquota(), but there is 177 * no error code return from that interface and some 178 * errors are "ok" because we may be testing via error 179 * injection. If this is not the quota inode then we 180 * use getdiskquota() to see if there is an error and 181 * if the error is ok. 182 */ 183 if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) { 184 int error; 185 struct dquot *xdqp; 186 187 error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, 188 &xdqp); 189 switch (error) { 190 /* 191 * Either the error was transient or the quota 192 * info record has no limits which gets optimized 193 * out by getinoquota(). 194 */ 195 case 0: 196 if (xdqp->dq_fhardlimit == 0 && 197 xdqp->dq_fsoftlimit == 0 && 198 xdqp->dq_bhardlimit == 0 && 199 xdqp->dq_bsoftlimit == 0) { 200 mutex_enter(&xdqp->dq_lock); 201 dqput(xdqp); 202 mutex_exit(&xdqp->dq_lock); 203 } else { 204 expect_dq = xdqp; 205 } 206 break; 207 208 case ESRCH: /* quotas are not enabled */ 209 case EINVAL: /* error flag set on cached record */ 210 case EUSERS: /* quota table is full */ 211 case EIO: /* I/O error */ 212 mismatch_ok = 1; 213 break; 214 } 215 } 216 217 /* 218 * Make sure dqp and the current quota info agree. 219 * The first part of the #ifndef is the quick way to 220 * do the check and should be part of the standard 221 * DEBUG code. The #else part is useful if you are 222 * actually chasing an inconsistency and don't want 223 * to have to look at stack frames to figure which 224 * variable has what value. 225 */ 226 #ifndef CHASE_QUOTA 227 ASSERT(mismatch_ok || dqp == expect_dq); 228 #else /* CHASE_QUOTA */ 229 if (expect_dq == NULL) { 230 /* 231 * If you hit this ASSERT() you know that quota 232 * subsystem does not expect quota info for this 233 * inode, but the inode has it. 234 */ 235 ASSERT(mismatch_ok || dqp == NULL); 236 } else { 237 /* 238 * If you hit this ASSERT() you know that quota 239 * subsystem expects quota info for this inode, 240 * but the inode does not have it. 241 */ 242 ASSERT(dqp); 243 /* 244 * If you hit this ASSERT() you know that quota 245 * subsystem expects quota info for this inode 246 * and the inode has quota info, but the two 247 * quota info pointers are not the same. 248 */ 249 ASSERT(dqp == expect_dq); 250 } 251 #endif /* !CHASE_QUOTA */ 252 /* 253 * Release for getinoquota() above or getdiskquota() 254 * call when error is transient. 255 */ 256 if (expect_dq) { 257 mutex_enter(&expect_dq->dq_lock); 258 dqput(expect_dq); 259 mutex_exit(&expect_dq->dq_lock); 260 } 261 } 262 #endif /* DEBUG */ 263 264 /* 265 * Shadow inodes and extended attribute directories 266 * do not have quota info records. 267 */ 268 if (dqp == NULL) 269 return (0); 270 /* 271 * Quotas are not enabled on this file system so there is nothing 272 * more to do. 273 */ 274 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 275 return (0); 276 } 277 mutex_enter(&dqp->dq_lock); 278 if (change < 0) { 279 dqp->dq_flags |= DQ_MOD; 280 abs_change = -change; /* abs_change must be positive */ 281 if (dqp->dq_curblocks < abs_change) 282 dqp->dq_curblocks = 0; 283 else 284 dqp->dq_curblocks += change; 285 if (dqp->dq_curblocks < dqp->dq_bsoftlimit) 286 dqp->dq_btimelimit = 0; 287 dqp->dq_flags &= ~DQ_BLKS; 288 TRANS_QUOTA(dqp); 289 mutex_exit(&dqp->dq_lock); 290 return (0); 291 } 292 293 /* 294 * Adding 'change' to dq_curblocks could cause an overflow. 295 * So store the result in a 64-bit variable and check for 296 * overflow below. 297 */ 298 ncurblocks = (uint64_t)dqp->dq_curblocks + change; 299 300 /* 301 * Allocation. Check hard and soft limits. 302 * Skip checks for uid 0 owned files. 303 * This check used to require both euid and ip->i_uid 304 * to be 0; but there are no quotas for uid 0 so 305 * it really doesn't matter who is writing to the 306 * root owned file. And even root cannot write 307 * past a user's quota limit. 308 */ 309 if (ip->i_uid == 0) 310 goto out; 311 312 /* 313 * Disallow allocation if it would bring the current usage over 314 * the hard limit or if the user is over their soft limit and their 315 * time has run out. 316 */ 317 if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit && 318 !force) { 319 /* If the user was not informed yet and the caller */ 320 /* is the owner of the file */ 321 if ((dqp->dq_flags & DQ_BLKS) == 0 && 322 ip->i_uid == crgetruid(cr)) { 323 errmsg = msg1; 324 dqp->dq_flags |= DQ_BLKS; 325 } 326 error = EDQUOT; 327 goto out; 328 } 329 if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) { 330 now = gethrestime_sec(); 331 if (dqp->dq_curblocks < dqp->dq_bsoftlimit || 332 dqp->dq_btimelimit == 0) { 333 dqp->dq_flags |= DQ_MOD; 334 dqp->dq_btimelimit = now + 335 ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data) 336 ->vfs_btimelimit; 337 if (ip->i_uid == crgetruid(cr)) { 338 errmsg = msg2; 339 } 340 } else if (now > dqp->dq_btimelimit && !force) { 341 /* If the user was not informed yet and the */ 342 /* caller is the owner of the file */ 343 if ((dqp->dq_flags & DQ_BLKS) == 0 && 344 ip->i_uid == crgetruid(cr)) { 345 errmsg = msg3; 346 dqp->dq_flags |= DQ_BLKS; 347 } 348 error = EDQUOT; 349 } 350 } 351 out: 352 if (error == 0) { 353 dqp->dq_flags |= DQ_MOD; 354 /* 355 * ncurblocks can be bigger than the maximum 356 * number that can be represented in 32-bits. 357 * When copying ncurblocks to dq_curblocks 358 * (an unsigned 32-bit quantity), make sure there 359 * is no overflow. The only way this can happen 360 * is if "force" is set. Otherwise, this allocation 361 * would have exceeded the hard limit check above 362 * (since the hard limit is a 32-bit quantity). 363 */ 364 if (ncurblocks > 0xffffffffLL) { 365 dqp->dq_curblocks = 0xffffffff; 366 errmsg = msg4; 367 } else { 368 dqp->dq_curblocks = ncurblocks; 369 } 370 } 371 372 if (dqp->dq_flags & DQ_MOD) 373 TRANS_QUOTA(dqp); 374 375 mutex_exit(&dqp->dq_lock); 376 /* 377 * Check for any error messages to be sent 378 */ 379 if (errmsg != NULL) { 380 /* 381 * Send message to the error log. 382 */ 383 if (uerrp != NULL) { 384 /* 385 * Set up message caller should send to user; 386 * gets copied to the message buffer as a side- 387 * effect of the caller's uprintf(). 388 */ 389 *lenp = strlen(errmsg) + 20 + 20 + 390 strlen(ip->i_fs->fs_fsmnt) + 1; 391 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP); 392 if (*uerrp != NULL) { 393 /* errmsg+1 => skip leading ! */ 394 (void) sprintf(*uerrp, errmsg+1, 395 (int)ttoproc(curthread)->p_pid, 396 (int)ip->i_uid, (int)ip->i_number, 397 ip->i_fs->fs_fsmnt); 398 } 399 } else { 400 /* 401 * Caller doesn't care, so just copy to the 402 * message buffer. 403 */ 404 cmn_err(CE_NOTE, errmsg, 405 (int)ttoproc(curthread)->p_pid, 406 (int)ip->i_uid, (int)ip->i_number, 407 ip->i_fs->fs_fsmnt); 408 } 409 } 410 return (error); 411 } 412 413 /* 414 * Check the inode limit, applying corrective action. 415 */ 416 int 417 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid, 418 int force, struct cred *cr, char **uerrp, size_t *lenp) 419 { 420 struct dquot *dqp, *xdqp; 421 unsigned int ncurfiles; 422 char *errmsg = NULL; 423 char *err1 = 424 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n"; 425 char *err2 = 426 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n"; 427 char *err3 = 428 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n"; 429 int error = 0; 430 time_t now; 431 432 ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock)); 433 /* 434 * Change must be either a single increment or decrement. 435 * If change is an increment, then ip must be NULL. 436 */ 437 ASSERT(change == 1 || change == -1); 438 ASSERT(change != 1 || ip == NULL); 439 440 /* 441 * Quotas are not enabled so bail out now. 442 */ 443 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 444 return (0); 445 } 446 447 /* 448 * Free a specific inode. 449 */ 450 if (change == -1 && ip) { 451 dqp = ip->i_dquot; 452 /* 453 * Shadow inodes and extended attribute directories 454 * do not have quota info records. 455 */ 456 if (dqp == NULL) 457 return (0); 458 mutex_enter(&dqp->dq_lock); 459 if (dqp->dq_curfiles) { 460 dqp->dq_curfiles--; 461 dqp->dq_flags |= DQ_MOD; 462 } 463 if (dqp->dq_curfiles < dqp->dq_fsoftlimit) { 464 dqp->dq_ftimelimit = 0; 465 dqp->dq_flags |= DQ_MOD; 466 } 467 dqp->dq_flags &= ~DQ_FILES; 468 if (dqp->dq_flags & DQ_MOD) 469 TRANS_QUOTA(dqp); 470 mutex_exit(&dqp->dq_lock); 471 return (0); 472 } 473 474 /* 475 * Allocation or deallocation without a specific inode. 476 * Get dquot for for uid, fs. 477 */ 478 if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) { 479 return (0); 480 } 481 dqp = xdqp; 482 mutex_enter(&dqp->dq_lock); 483 if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) { 484 dqput(dqp); 485 mutex_exit(&dqp->dq_lock); 486 return (0); 487 } 488 489 /* 490 * Skip checks for uid 0 owned files. 491 * This check used to require both euid and uid 492 * to be 0; but there are no quotas for uid 0 so 493 * it really doesn't matter who is writing to the 494 * root owned file. And even root can not write 495 * past the user's quota limit. 496 */ 497 if (uid == 0) 498 goto out; 499 500 /* 501 * Theoretically, this could overflow, but in practice, it 502 * won't. Multi-terabyte file systems are required to have an 503 * nbpi value of at least 1MB. In order to overflow this 504 * field, there would have to be 2^32 inodes in the file. 505 * That would imply a file system of 2^32 * 1MB, which is 506 * 2^(32 + 20), which is 4096 terabytes, which is not 507 * contemplated for ufs any time soon. 508 */ 509 ncurfiles = dqp->dq_curfiles + change; 510 511 /* 512 * Dissallow allocation if it would bring the current usage over 513 * the hard limit or if the user is over their soft limit and their 514 * time has run out. 515 */ 516 if (change == 1 && ncurfiles >= dqp->dq_fhardlimit && 517 dqp->dq_fhardlimit && !force) { 518 /* If the user was not informed yet and the caller */ 519 /* is the owner of the file */ 520 if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) { 521 errmsg = err1; 522 dqp->dq_flags |= DQ_FILES; 523 } 524 error = EDQUOT; 525 } else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit && 526 dqp->dq_fsoftlimit) { 527 now = gethrestime_sec(); 528 if (ncurfiles == dqp->dq_fsoftlimit || 529 dqp->dq_ftimelimit == 0) { 530 dqp->dq_flags |= DQ_MOD; 531 dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit; 532 /* If the caller owns the file */ 533 if (uid == crgetruid(cr)) 534 errmsg = err2; 535 } else if (now > dqp->dq_ftimelimit && !force) { 536 /* If the user was not informed yet and the */ 537 /* caller is the owner of the file */ 538 if ((dqp->dq_flags & DQ_FILES) == 0 && 539 uid == crgetruid(cr)) { 540 errmsg = err3; 541 dqp->dq_flags |= DQ_FILES; 542 } 543 error = EDQUOT; 544 } 545 } 546 out: 547 if (error == 0) { 548 dqp->dq_flags |= DQ_MOD; 549 dqp->dq_curfiles += change; 550 } 551 if (dqp->dq_flags & DQ_MOD) 552 TRANS_QUOTA(dqp); 553 dqput(dqp); 554 mutex_exit(&dqp->dq_lock); 555 /* 556 * Check for any error messages to be sent 557 */ 558 if (errmsg != NULL) { 559 /* 560 * Send message to the error log. 561 */ 562 if (uerrp != NULL) { 563 /* 564 * Set up message caller should send to user; 565 * gets copied to the message buffer as a side- 566 * effect of the caller's uprintf(). 567 */ 568 *lenp = strlen(errmsg) + 20 + 20 + 569 strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1; 570 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP); 571 if (*uerrp != NULL) { 572 /* errmsg+1 => skip leading ! */ 573 (void) sprintf(*uerrp, errmsg+1, 574 (int)ttoproc(curthread)->p_pid, 575 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt); 576 } 577 } else { 578 /* 579 * Caller doesn't care, so just copy to the 580 * message buffer. 581 */ 582 cmn_err(CE_NOTE, errmsg, 583 (int)ttoproc(curthread)->p_pid, 584 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt); 585 } 586 } 587 return (error); 588 } 589 590 /* 591 * Release a dquot. 592 */ 593 void 594 dqrele(struct dquot *dqp) 595 { 596 /* 597 * Shadow inodes and extended attribute directories 598 * do not have quota info records. 599 */ 600 if (dqp != NULL) { 601 mutex_enter(&dqp->dq_lock); 602 if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD) 603 dqupdate(dqp); 604 dqput(dqp); 605 mutex_exit(&dqp->dq_lock); 606 } 607 } 608