1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2003 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_shared.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_inode.h" 16 #include "xfs_bmap.h" 17 #include "xfs_bmap_util.h" 18 #include "xfs_alloc.h" 19 #include "xfs_quota.h" 20 #include "xfs_error.h" 21 #include "xfs_trans.h" 22 #include "xfs_buf_item.h" 23 #include "xfs_trans_space.h" 24 #include "xfs_trans_priv.h" 25 #include "xfs_qm.h" 26 #include "xfs_cksum.h" 27 #include "xfs_trace.h" 28 #include "xfs_log.h" 29 #include "xfs_bmap_btree.h" 30 31 /* 32 * Lock order: 33 * 34 * ip->i_lock 35 * qi->qi_tree_lock 36 * dquot->q_qlock (xfs_dqlock() and friends) 37 * dquot->q_flush (xfs_dqflock() and friends) 38 * qi->qi_lru_lock 39 * 40 * If two dquots need to be locked the order is user before group/project, 41 * otherwise by the lowest id first, see xfs_dqlock2. 42 */ 43 44 struct kmem_zone *xfs_qm_dqtrxzone; 45 static struct kmem_zone *xfs_qm_dqzone; 46 47 static struct lock_class_key xfs_dquot_group_class; 48 static struct lock_class_key xfs_dquot_project_class; 49 50 /* 51 * This is called to free all the memory associated with a dquot 52 */ 53 void 54 xfs_qm_dqdestroy( 55 xfs_dquot_t *dqp) 56 { 57 ASSERT(list_empty(&dqp->q_lru)); 58 59 kmem_free(dqp->q_logitem.qli_item.li_lv_shadow); 60 mutex_destroy(&dqp->q_qlock); 61 62 XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); 63 kmem_zone_free(xfs_qm_dqzone, dqp); 64 } 65 66 /* 67 * If default limits are in force, push them into the dquot now. 68 * We overwrite the dquot limits only if they are zero and this 69 * is not the root dquot. 70 */ 71 void 72 xfs_qm_adjust_dqlimits( 73 struct xfs_mount *mp, 74 struct xfs_dquot *dq) 75 { 76 struct xfs_quotainfo *q = mp->m_quotainfo; 77 struct xfs_disk_dquot *d = &dq->q_core; 78 struct xfs_def_quota *defq; 79 int prealloc = 0; 80 81 ASSERT(d->d_id); 82 defq = xfs_get_defquota(dq, q); 83 84 if (defq->bsoftlimit && !d->d_blk_softlimit) { 85 d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit); 86 prealloc = 1; 87 } 88 if (defq->bhardlimit && !d->d_blk_hardlimit) { 89 d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit); 90 prealloc = 1; 91 } 92 if (defq->isoftlimit && !d->d_ino_softlimit) 93 d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit); 94 if (defq->ihardlimit && !d->d_ino_hardlimit) 95 d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit); 96 if (defq->rtbsoftlimit && !d->d_rtb_softlimit) 97 d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit); 98 if (defq->rtbhardlimit && !d->d_rtb_hardlimit) 99 d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit); 100 101 if (prealloc) 102 xfs_dquot_set_prealloc_limits(dq); 103 } 104 105 /* 106 * Check the limits and timers of a dquot and start or reset timers 107 * if necessary. 108 * This gets called even when quota enforcement is OFF, which makes our 109 * life a little less complicated. (We just don't reject any quota 110 * reservations in that case, when enforcement is off). 111 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when 112 * enforcement's off. 113 * In contrast, warnings are a little different in that they don't 114 * 'automatically' get started when limits get exceeded. They do 115 * get reset to zero, however, when we find the count to be under 116 * the soft limit (they are only ever set non-zero via userspace). 117 */ 118 void 119 xfs_qm_adjust_dqtimers( 120 xfs_mount_t *mp, 121 xfs_disk_dquot_t *d) 122 { 123 ASSERT(d->d_id); 124 125 #ifdef DEBUG 126 if (d->d_blk_hardlimit) 127 ASSERT(be64_to_cpu(d->d_blk_softlimit) <= 128 be64_to_cpu(d->d_blk_hardlimit)); 129 if (d->d_ino_hardlimit) 130 ASSERT(be64_to_cpu(d->d_ino_softlimit) <= 131 be64_to_cpu(d->d_ino_hardlimit)); 132 if (d->d_rtb_hardlimit) 133 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= 134 be64_to_cpu(d->d_rtb_hardlimit)); 135 #endif 136 137 if (!d->d_btimer) { 138 if ((d->d_blk_softlimit && 139 (be64_to_cpu(d->d_bcount) > 140 be64_to_cpu(d->d_blk_softlimit))) || 141 (d->d_blk_hardlimit && 142 (be64_to_cpu(d->d_bcount) > 143 be64_to_cpu(d->d_blk_hardlimit)))) { 144 d->d_btimer = cpu_to_be32(get_seconds() + 145 mp->m_quotainfo->qi_btimelimit); 146 } else { 147 d->d_bwarns = 0; 148 } 149 } else { 150 if ((!d->d_blk_softlimit || 151 (be64_to_cpu(d->d_bcount) <= 152 be64_to_cpu(d->d_blk_softlimit))) && 153 (!d->d_blk_hardlimit || 154 (be64_to_cpu(d->d_bcount) <= 155 be64_to_cpu(d->d_blk_hardlimit)))) { 156 d->d_btimer = 0; 157 } 158 } 159 160 if (!d->d_itimer) { 161 if ((d->d_ino_softlimit && 162 (be64_to_cpu(d->d_icount) > 163 be64_to_cpu(d->d_ino_softlimit))) || 164 (d->d_ino_hardlimit && 165 (be64_to_cpu(d->d_icount) > 166 be64_to_cpu(d->d_ino_hardlimit)))) { 167 d->d_itimer = cpu_to_be32(get_seconds() + 168 mp->m_quotainfo->qi_itimelimit); 169 } else { 170 d->d_iwarns = 0; 171 } 172 } else { 173 if ((!d->d_ino_softlimit || 174 (be64_to_cpu(d->d_icount) <= 175 be64_to_cpu(d->d_ino_softlimit))) && 176 (!d->d_ino_hardlimit || 177 (be64_to_cpu(d->d_icount) <= 178 be64_to_cpu(d->d_ino_hardlimit)))) { 179 d->d_itimer = 0; 180 } 181 } 182 183 if (!d->d_rtbtimer) { 184 if ((d->d_rtb_softlimit && 185 (be64_to_cpu(d->d_rtbcount) > 186 be64_to_cpu(d->d_rtb_softlimit))) || 187 (d->d_rtb_hardlimit && 188 (be64_to_cpu(d->d_rtbcount) > 189 be64_to_cpu(d->d_rtb_hardlimit)))) { 190 d->d_rtbtimer = cpu_to_be32(get_seconds() + 191 mp->m_quotainfo->qi_rtbtimelimit); 192 } else { 193 d->d_rtbwarns = 0; 194 } 195 } else { 196 if ((!d->d_rtb_softlimit || 197 (be64_to_cpu(d->d_rtbcount) <= 198 be64_to_cpu(d->d_rtb_softlimit))) && 199 (!d->d_rtb_hardlimit || 200 (be64_to_cpu(d->d_rtbcount) <= 201 be64_to_cpu(d->d_rtb_hardlimit)))) { 202 d->d_rtbtimer = 0; 203 } 204 } 205 } 206 207 /* 208 * initialize a buffer full of dquots and log the whole thing 209 */ 210 STATIC void 211 xfs_qm_init_dquot_blk( 212 xfs_trans_t *tp, 213 xfs_mount_t *mp, 214 xfs_dqid_t id, 215 uint type, 216 xfs_buf_t *bp) 217 { 218 struct xfs_quotainfo *q = mp->m_quotainfo; 219 xfs_dqblk_t *d; 220 xfs_dqid_t curid; 221 int i; 222 223 ASSERT(tp); 224 ASSERT(xfs_buf_islocked(bp)); 225 226 d = bp->b_addr; 227 228 /* 229 * ID of the first dquot in the block - id's are zero based. 230 */ 231 curid = id - (id % q->qi_dqperchunk); 232 memset(d, 0, BBTOB(q->qi_dqchunklen)); 233 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { 234 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 235 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 236 d->dd_diskdq.d_id = cpu_to_be32(curid); 237 d->dd_diskdq.d_flags = type; 238 if (xfs_sb_version_hascrc(&mp->m_sb)) { 239 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); 240 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), 241 XFS_DQUOT_CRC_OFF); 242 } 243 } 244 245 xfs_trans_dquot_buf(tp, bp, 246 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : 247 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : 248 XFS_BLF_GDQUOT_BUF))); 249 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 250 } 251 252 /* 253 * Initialize the dynamic speculative preallocation thresholds. The lo/hi 254 * watermarks correspond to the soft and hard limits by default. If a soft limit 255 * is not specified, we use 95% of the hard limit. 256 */ 257 void 258 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) 259 { 260 uint64_t space; 261 262 dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit); 263 dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit); 264 if (!dqp->q_prealloc_lo_wmark) { 265 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark; 266 do_div(dqp->q_prealloc_lo_wmark, 100); 267 dqp->q_prealloc_lo_wmark *= 95; 268 } 269 270 space = dqp->q_prealloc_hi_wmark; 271 272 do_div(space, 100); 273 dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space; 274 dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3; 275 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; 276 } 277 278 /* 279 * Ensure that the given in-core dquot has a buffer on disk backing it, and 280 * return the buffer. This is called when the bmapi finds a hole. 281 */ 282 STATIC int 283 xfs_dquot_disk_alloc( 284 struct xfs_trans **tpp, 285 struct xfs_dquot *dqp, 286 struct xfs_buf **bpp) 287 { 288 struct xfs_bmbt_irec map; 289 struct xfs_trans *tp = *tpp; 290 struct xfs_mount *mp = tp->t_mountp; 291 struct xfs_buf *bp; 292 struct xfs_inode *quotip = xfs_quota_inode(mp, dqp->dq_flags); 293 int nmaps = 1; 294 int error; 295 296 trace_xfs_dqalloc(dqp); 297 298 xfs_ilock(quotip, XFS_ILOCK_EXCL); 299 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { 300 /* 301 * Return if this type of quotas is turned off while we didn't 302 * have an inode lock 303 */ 304 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 305 return -ESRCH; 306 } 307 308 /* Create the block mapping. */ 309 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); 310 error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, 311 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 312 XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps); 313 if (error) 314 return error; 315 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 316 ASSERT(nmaps == 1); 317 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 318 (map.br_startblock != HOLESTARTBLOCK)); 319 320 /* 321 * Keep track of the blkno to save a lookup later 322 */ 323 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 324 325 /* now we can just get the buffer (there's nothing to read yet) */ 326 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, 327 mp->m_quotainfo->qi_dqchunklen, 0); 328 if (!bp) 329 return -ENOMEM; 330 bp->b_ops = &xfs_dquot_buf_ops; 331 332 /* 333 * Make a chunk of dquots out of this buffer and log 334 * the entire thing. 335 */ 336 xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), 337 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 338 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 339 340 /* 341 * Hold the buffer and join it to the dfops so that we'll still own 342 * the buffer when we return to the caller. The buffer disposal on 343 * error must be paid attention to very carefully, as it has been 344 * broken since commit efa092f3d4c6 "[XFS] Fixes a bug in the quota 345 * code when allocating a new dquot record" in 2005, and the later 346 * conversion to xfs_defer_ops in commit 310a75a3c6c747 failed to keep 347 * the buffer locked across the _defer_finish call. We can now do 348 * this correctly with xfs_defer_bjoin. 349 * 350 * Above, we allocated a disk block for the dquot information and used 351 * get_buf to initialize the dquot. If the _defer_finish fails, the old 352 * transaction is gone but the new buffer is not joined or held to any 353 * transaction, so we must _buf_relse it. 354 * 355 * If everything succeeds, the caller of this function is returned a 356 * buffer that is locked and held to the transaction. The caller 357 * is responsible for unlocking any buffer passed back, either 358 * manually or by committing the transaction. 359 */ 360 xfs_trans_bhold(tp, bp); 361 error = xfs_defer_finish(tpp); 362 tp = *tpp; 363 if (error) { 364 xfs_buf_relse(bp); 365 return error; 366 } 367 *bpp = bp; 368 return 0; 369 } 370 371 /* 372 * Read in the in-core dquot's on-disk metadata and return the buffer. 373 * Returns ENOENT to signal a hole. 374 */ 375 STATIC int 376 xfs_dquot_disk_read( 377 struct xfs_mount *mp, 378 struct xfs_dquot *dqp, 379 struct xfs_buf **bpp) 380 { 381 struct xfs_bmbt_irec map; 382 struct xfs_buf *bp; 383 struct xfs_inode *quotip = xfs_quota_inode(mp, dqp->dq_flags); 384 uint lock_mode; 385 int nmaps = 1; 386 int error; 387 388 lock_mode = xfs_ilock_data_map_shared(quotip); 389 if (!xfs_this_quota_on(mp, dqp->dq_flags)) { 390 /* 391 * Return if this type of quotas is turned off while we 392 * didn't have the quota inode lock. 393 */ 394 xfs_iunlock(quotip, lock_mode); 395 return -ESRCH; 396 } 397 398 /* 399 * Find the block map; no allocations yet 400 */ 401 error = xfs_bmapi_read(quotip, dqp->q_fileoffset, 402 XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); 403 xfs_iunlock(quotip, lock_mode); 404 if (error) 405 return error; 406 407 ASSERT(nmaps == 1); 408 ASSERT(map.br_blockcount >= 1); 409 ASSERT(map.br_startblock != DELAYSTARTBLOCK); 410 if (map.br_startblock == HOLESTARTBLOCK) 411 return -ENOENT; 412 413 trace_xfs_dqtobp_read(dqp); 414 415 /* 416 * store the blkno etc so that we don't have to do the 417 * mapping all the time 418 */ 419 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 420 421 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 422 mp->m_quotainfo->qi_dqchunklen, 0, &bp, 423 &xfs_dquot_buf_ops); 424 if (error) { 425 ASSERT(bp == NULL); 426 return error; 427 } 428 429 ASSERT(xfs_buf_islocked(bp)); 430 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 431 *bpp = bp; 432 433 return 0; 434 } 435 436 /* Allocate and initialize everything we need for an incore dquot. */ 437 STATIC struct xfs_dquot * 438 xfs_dquot_alloc( 439 struct xfs_mount *mp, 440 xfs_dqid_t id, 441 uint type) 442 { 443 struct xfs_dquot *dqp; 444 445 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); 446 447 dqp->dq_flags = type; 448 dqp->q_core.d_id = cpu_to_be32(id); 449 dqp->q_mount = mp; 450 INIT_LIST_HEAD(&dqp->q_lru); 451 mutex_init(&dqp->q_qlock); 452 init_waitqueue_head(&dqp->q_pinwait); 453 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 454 /* 455 * Offset of dquot in the (fixed sized) dquot chunk. 456 */ 457 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * 458 sizeof(xfs_dqblk_t); 459 460 /* 461 * Because we want to use a counting completion, complete 462 * the flush completion once to allow a single access to 463 * the flush completion without blocking. 464 */ 465 init_completion(&dqp->q_flush); 466 complete(&dqp->q_flush); 467 468 /* 469 * Make sure group quotas have a different lock class than user 470 * quotas. 471 */ 472 switch (type) { 473 case XFS_DQ_USER: 474 /* uses the default lock class */ 475 break; 476 case XFS_DQ_GROUP: 477 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); 478 break; 479 case XFS_DQ_PROJ: 480 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); 481 break; 482 default: 483 ASSERT(0); 484 break; 485 } 486 487 xfs_qm_dquot_logitem_init(dqp); 488 489 XFS_STATS_INC(mp, xs_qm_dquot); 490 return dqp; 491 } 492 493 /* Copy the in-core quota fields in from the on-disk buffer. */ 494 STATIC void 495 xfs_dquot_from_disk( 496 struct xfs_dquot *dqp, 497 struct xfs_buf *bp) 498 { 499 struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; 500 501 /* copy everything from disk dquot to the incore dquot */ 502 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 503 504 /* 505 * Reservation counters are defined as reservation plus current usage 506 * to avoid having to add every time. 507 */ 508 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); 509 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 510 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); 511 512 /* initialize the dquot speculative prealloc thresholds */ 513 xfs_dquot_set_prealloc_limits(dqp); 514 } 515 516 /* Allocate and initialize the dquot buffer for this in-core dquot. */ 517 static int 518 xfs_qm_dqread_alloc( 519 struct xfs_mount *mp, 520 struct xfs_dquot *dqp, 521 struct xfs_buf **bpp) 522 { 523 struct xfs_trans *tp; 524 struct xfs_buf *bp; 525 int error; 526 527 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc, 528 XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp); 529 if (error) 530 goto err; 531 532 error = xfs_dquot_disk_alloc(&tp, dqp, &bp); 533 if (error) 534 goto err_cancel; 535 536 error = xfs_trans_commit(tp); 537 if (error) { 538 /* 539 * Buffer was held to the transaction, so we have to unlock it 540 * manually here because we're not passing it back. 541 */ 542 xfs_buf_relse(bp); 543 goto err; 544 } 545 *bpp = bp; 546 return 0; 547 548 err_cancel: 549 xfs_trans_cancel(tp); 550 err: 551 return error; 552 } 553 554 /* 555 * Read in the ondisk dquot using dqtobp() then copy it to an incore version, 556 * and release the buffer immediately. If @can_alloc is true, fill any 557 * holes in the on-disk metadata. 558 */ 559 static int 560 xfs_qm_dqread( 561 struct xfs_mount *mp, 562 xfs_dqid_t id, 563 uint type, 564 bool can_alloc, 565 struct xfs_dquot **dqpp) 566 { 567 struct xfs_dquot *dqp; 568 struct xfs_buf *bp; 569 int error; 570 571 dqp = xfs_dquot_alloc(mp, id, type); 572 trace_xfs_dqread(dqp); 573 574 /* Try to read the buffer, allocating if necessary. */ 575 error = xfs_dquot_disk_read(mp, dqp, &bp); 576 if (error == -ENOENT && can_alloc) 577 error = xfs_qm_dqread_alloc(mp, dqp, &bp); 578 if (error) 579 goto err; 580 581 /* 582 * At this point we should have a clean locked buffer. Copy the data 583 * to the incore dquot and release the buffer since the incore dquot 584 * has its own locking protocol so we needn't tie up the buffer any 585 * further. 586 */ 587 ASSERT(xfs_buf_islocked(bp)); 588 xfs_dquot_from_disk(dqp, bp); 589 590 xfs_buf_relse(bp); 591 *dqpp = dqp; 592 return error; 593 594 err: 595 trace_xfs_dqread_fail(dqp); 596 xfs_qm_dqdestroy(dqp); 597 *dqpp = NULL; 598 return error; 599 } 600 601 /* 602 * Advance to the next id in the current chunk, or if at the 603 * end of the chunk, skip ahead to first id in next allocated chunk 604 * using the SEEK_DATA interface. 605 */ 606 static int 607 xfs_dq_get_next_id( 608 struct xfs_mount *mp, 609 uint type, 610 xfs_dqid_t *id) 611 { 612 struct xfs_inode *quotip = xfs_quota_inode(mp, type); 613 xfs_dqid_t next_id = *id + 1; /* simple advance */ 614 uint lock_flags; 615 struct xfs_bmbt_irec got; 616 struct xfs_iext_cursor cur; 617 xfs_fsblock_t start; 618 int error = 0; 619 620 /* If we'd wrap past the max ID, stop */ 621 if (next_id < *id) 622 return -ENOENT; 623 624 /* If new ID is within the current chunk, advancing it sufficed */ 625 if (next_id % mp->m_quotainfo->qi_dqperchunk) { 626 *id = next_id; 627 return 0; 628 } 629 630 /* Nope, next_id is now past the current chunk, so find the next one */ 631 start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk; 632 633 lock_flags = xfs_ilock_data_map_shared(quotip); 634 if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) { 635 error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK); 636 if (error) 637 return error; 638 } 639 640 if (xfs_iext_lookup_extent(quotip, "ip->i_df, start, &cur, &got)) { 641 /* contiguous chunk, bump startoff for the id calculation */ 642 if (got.br_startoff < start) 643 got.br_startoff = start; 644 *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk; 645 } else { 646 error = -ENOENT; 647 } 648 649 xfs_iunlock(quotip, lock_flags); 650 651 return error; 652 } 653 654 /* 655 * Look up the dquot in the in-core cache. If found, the dquot is returned 656 * locked and ready to go. 657 */ 658 static struct xfs_dquot * 659 xfs_qm_dqget_cache_lookup( 660 struct xfs_mount *mp, 661 struct xfs_quotainfo *qi, 662 struct radix_tree_root *tree, 663 xfs_dqid_t id) 664 { 665 struct xfs_dquot *dqp; 666 667 restart: 668 mutex_lock(&qi->qi_tree_lock); 669 dqp = radix_tree_lookup(tree, id); 670 if (!dqp) { 671 mutex_unlock(&qi->qi_tree_lock); 672 XFS_STATS_INC(mp, xs_qm_dqcachemisses); 673 return NULL; 674 } 675 676 xfs_dqlock(dqp); 677 if (dqp->dq_flags & XFS_DQ_FREEING) { 678 xfs_dqunlock(dqp); 679 mutex_unlock(&qi->qi_tree_lock); 680 trace_xfs_dqget_freeing(dqp); 681 delay(1); 682 goto restart; 683 } 684 685 dqp->q_nrefs++; 686 mutex_unlock(&qi->qi_tree_lock); 687 688 trace_xfs_dqget_hit(dqp); 689 XFS_STATS_INC(mp, xs_qm_dqcachehits); 690 return dqp; 691 } 692 693 /* 694 * Try to insert a new dquot into the in-core cache. If an error occurs the 695 * caller should throw away the dquot and start over. Otherwise, the dquot 696 * is returned locked (and held by the cache) as if there had been a cache 697 * hit. 698 */ 699 static int 700 xfs_qm_dqget_cache_insert( 701 struct xfs_mount *mp, 702 struct xfs_quotainfo *qi, 703 struct radix_tree_root *tree, 704 xfs_dqid_t id, 705 struct xfs_dquot *dqp) 706 { 707 int error; 708 709 mutex_lock(&qi->qi_tree_lock); 710 error = radix_tree_insert(tree, id, dqp); 711 if (unlikely(error)) { 712 /* Duplicate found! Caller must try again. */ 713 WARN_ON(error != -EEXIST); 714 mutex_unlock(&qi->qi_tree_lock); 715 trace_xfs_dqget_dup(dqp); 716 return error; 717 } 718 719 /* Return a locked dquot to the caller, with a reference taken. */ 720 xfs_dqlock(dqp); 721 dqp->q_nrefs = 1; 722 723 qi->qi_dquots++; 724 mutex_unlock(&qi->qi_tree_lock); 725 726 return 0; 727 } 728 729 /* Check our input parameters. */ 730 static int 731 xfs_qm_dqget_checks( 732 struct xfs_mount *mp, 733 uint type) 734 { 735 if (WARN_ON_ONCE(!XFS_IS_QUOTA_RUNNING(mp))) 736 return -ESRCH; 737 738 switch (type) { 739 case XFS_DQ_USER: 740 if (!XFS_IS_UQUOTA_ON(mp)) 741 return -ESRCH; 742 return 0; 743 case XFS_DQ_GROUP: 744 if (!XFS_IS_GQUOTA_ON(mp)) 745 return -ESRCH; 746 return 0; 747 case XFS_DQ_PROJ: 748 if (!XFS_IS_PQUOTA_ON(mp)) 749 return -ESRCH; 750 return 0; 751 default: 752 WARN_ON_ONCE(0); 753 return -EINVAL; 754 } 755 } 756 757 /* 758 * Given the file system, id, and type (UDQUOT/GDQUOT), return a a locked 759 * dquot, doing an allocation (if requested) as needed. 760 */ 761 int 762 xfs_qm_dqget( 763 struct xfs_mount *mp, 764 xfs_dqid_t id, 765 uint type, 766 bool can_alloc, 767 struct xfs_dquot **O_dqpp) 768 { 769 struct xfs_quotainfo *qi = mp->m_quotainfo; 770 struct radix_tree_root *tree = xfs_dquot_tree(qi, type); 771 struct xfs_dquot *dqp; 772 int error; 773 774 error = xfs_qm_dqget_checks(mp, type); 775 if (error) 776 return error; 777 778 restart: 779 dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id); 780 if (dqp) { 781 *O_dqpp = dqp; 782 return 0; 783 } 784 785 error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); 786 if (error) 787 return error; 788 789 error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp); 790 if (error) { 791 /* 792 * Duplicate found. Just throw away the new dquot and start 793 * over. 794 */ 795 xfs_qm_dqdestroy(dqp); 796 XFS_STATS_INC(mp, xs_qm_dquot_dups); 797 goto restart; 798 } 799 800 trace_xfs_dqget_miss(dqp); 801 *O_dqpp = dqp; 802 return 0; 803 } 804 805 /* 806 * Given a dquot id and type, read and initialize a dquot from the on-disk 807 * metadata. This function is only for use during quota initialization so 808 * it ignores the dquot cache assuming that the dquot shrinker isn't set up. 809 * The caller is responsible for _qm_dqdestroy'ing the returned dquot. 810 */ 811 int 812 xfs_qm_dqget_uncached( 813 struct xfs_mount *mp, 814 xfs_dqid_t id, 815 uint type, 816 struct xfs_dquot **dqpp) 817 { 818 int error; 819 820 error = xfs_qm_dqget_checks(mp, type); 821 if (error) 822 return error; 823 824 return xfs_qm_dqread(mp, id, type, 0, dqpp); 825 } 826 827 /* Return the quota id for a given inode and type. */ 828 xfs_dqid_t 829 xfs_qm_id_for_quotatype( 830 struct xfs_inode *ip, 831 uint type) 832 { 833 switch (type) { 834 case XFS_DQ_USER: 835 return ip->i_d.di_uid; 836 case XFS_DQ_GROUP: 837 return ip->i_d.di_gid; 838 case XFS_DQ_PROJ: 839 return xfs_get_projid(ip); 840 } 841 ASSERT(0); 842 return 0; 843 } 844 845 /* 846 * Return the dquot for a given inode and type. If @can_alloc is true, then 847 * allocate blocks if needed. The inode's ILOCK must be held and it must not 848 * have already had an inode attached. 849 */ 850 int 851 xfs_qm_dqget_inode( 852 struct xfs_inode *ip, 853 uint type, 854 bool can_alloc, 855 struct xfs_dquot **O_dqpp) 856 { 857 struct xfs_mount *mp = ip->i_mount; 858 struct xfs_quotainfo *qi = mp->m_quotainfo; 859 struct radix_tree_root *tree = xfs_dquot_tree(qi, type); 860 struct xfs_dquot *dqp; 861 xfs_dqid_t id; 862 int error; 863 864 error = xfs_qm_dqget_checks(mp, type); 865 if (error) 866 return error; 867 868 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 869 ASSERT(xfs_inode_dquot(ip, type) == NULL); 870 871 id = xfs_qm_id_for_quotatype(ip, type); 872 873 restart: 874 dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id); 875 if (dqp) { 876 *O_dqpp = dqp; 877 return 0; 878 } 879 880 /* 881 * Dquot cache miss. We don't want to keep the inode lock across 882 * a (potential) disk read. Also we don't want to deal with the lock 883 * ordering between quotainode and this inode. OTOH, dropping the inode 884 * lock here means dealing with a chown that can happen before 885 * we re-acquire the lock. 886 */ 887 xfs_iunlock(ip, XFS_ILOCK_EXCL); 888 error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); 889 xfs_ilock(ip, XFS_ILOCK_EXCL); 890 if (error) 891 return error; 892 893 /* 894 * A dquot could be attached to this inode by now, since we had 895 * dropped the ilock. 896 */ 897 if (xfs_this_quota_on(mp, type)) { 898 struct xfs_dquot *dqp1; 899 900 dqp1 = xfs_inode_dquot(ip, type); 901 if (dqp1) { 902 xfs_qm_dqdestroy(dqp); 903 dqp = dqp1; 904 xfs_dqlock(dqp); 905 goto dqret; 906 } 907 } else { 908 /* inode stays locked on return */ 909 xfs_qm_dqdestroy(dqp); 910 return -ESRCH; 911 } 912 913 error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp); 914 if (error) { 915 /* 916 * Duplicate found. Just throw away the new dquot and start 917 * over. 918 */ 919 xfs_qm_dqdestroy(dqp); 920 XFS_STATS_INC(mp, xs_qm_dquot_dups); 921 goto restart; 922 } 923 924 dqret: 925 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 926 trace_xfs_dqget_miss(dqp); 927 *O_dqpp = dqp; 928 return 0; 929 } 930 931 /* 932 * Starting at @id and progressing upwards, look for an initialized incore 933 * dquot, lock it, and return it. 934 */ 935 int 936 xfs_qm_dqget_next( 937 struct xfs_mount *mp, 938 xfs_dqid_t id, 939 uint type, 940 struct xfs_dquot **dqpp) 941 { 942 struct xfs_dquot *dqp; 943 int error = 0; 944 945 *dqpp = NULL; 946 for (; !error; error = xfs_dq_get_next_id(mp, type, &id)) { 947 error = xfs_qm_dqget(mp, id, type, false, &dqp); 948 if (error == -ENOENT) 949 continue; 950 else if (error != 0) 951 break; 952 953 if (!XFS_IS_DQUOT_UNINITIALIZED(dqp)) { 954 *dqpp = dqp; 955 return 0; 956 } 957 958 xfs_qm_dqput(dqp); 959 } 960 961 return error; 962 } 963 964 /* 965 * Release a reference to the dquot (decrement ref-count) and unlock it. 966 * 967 * If there is a group quota attached to this dquot, carefully release that 968 * too without tripping over deadlocks'n'stuff. 969 */ 970 void 971 xfs_qm_dqput( 972 struct xfs_dquot *dqp) 973 { 974 ASSERT(dqp->q_nrefs > 0); 975 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 976 977 trace_xfs_dqput(dqp); 978 979 if (--dqp->q_nrefs == 0) { 980 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; 981 trace_xfs_dqput_free(dqp); 982 983 if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) 984 XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); 985 } 986 xfs_dqunlock(dqp); 987 } 988 989 /* 990 * Release a dquot. Flush it if dirty, then dqput() it. 991 * dquot must not be locked. 992 */ 993 void 994 xfs_qm_dqrele( 995 xfs_dquot_t *dqp) 996 { 997 if (!dqp) 998 return; 999 1000 trace_xfs_dqrele(dqp); 1001 1002 xfs_dqlock(dqp); 1003 /* 1004 * We don't care to flush it if the dquot is dirty here. 1005 * That will create stutters that we want to avoid. 1006 * Instead we do a delayed write when we try to reclaim 1007 * a dirty dquot. Also xfs_sync will take part of the burden... 1008 */ 1009 xfs_qm_dqput(dqp); 1010 } 1011 1012 /* 1013 * This is the dquot flushing I/O completion routine. It is called 1014 * from interrupt level when the buffer containing the dquot is 1015 * flushed to disk. It is responsible for removing the dquot logitem 1016 * from the AIL if it has not been re-logged, and unlocking the dquot's 1017 * flush lock. This behavior is very similar to that of inodes.. 1018 */ 1019 STATIC void 1020 xfs_qm_dqflush_done( 1021 struct xfs_buf *bp, 1022 struct xfs_log_item *lip) 1023 { 1024 xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; 1025 xfs_dquot_t *dqp = qip->qli_dquot; 1026 struct xfs_ail *ailp = lip->li_ailp; 1027 1028 /* 1029 * We only want to pull the item from the AIL if its 1030 * location in the log has not changed since we started the flush. 1031 * Thus, we only bother if the dquot's lsn has 1032 * not changed. First we check the lsn outside the lock 1033 * since it's cheaper, and then we recheck while 1034 * holding the lock before removing the dquot from the AIL. 1035 */ 1036 if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && 1037 ((lip->li_lsn == qip->qli_flush_lsn) || 1038 test_bit(XFS_LI_FAILED, &lip->li_flags))) { 1039 1040 /* xfs_trans_ail_delete() drops the AIL lock. */ 1041 spin_lock(&ailp->ail_lock); 1042 if (lip->li_lsn == qip->qli_flush_lsn) { 1043 xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); 1044 } else { 1045 /* 1046 * Clear the failed state since we are about to drop the 1047 * flush lock 1048 */ 1049 xfs_clear_li_failed(lip); 1050 spin_unlock(&ailp->ail_lock); 1051 } 1052 } 1053 1054 /* 1055 * Release the dq's flush lock since we're done with it. 1056 */ 1057 xfs_dqfunlock(dqp); 1058 } 1059 1060 /* 1061 * Write a modified dquot to disk. 1062 * The dquot must be locked and the flush lock too taken by caller. 1063 * The flush lock will not be unlocked until the dquot reaches the disk, 1064 * but the dquot is free to be unlocked and modified by the caller 1065 * in the interim. Dquot is still locked on return. This behavior is 1066 * identical to that of inodes. 1067 */ 1068 int 1069 xfs_qm_dqflush( 1070 struct xfs_dquot *dqp, 1071 struct xfs_buf **bpp) 1072 { 1073 struct xfs_mount *mp = dqp->q_mount; 1074 struct xfs_buf *bp; 1075 struct xfs_dqblk *dqb; 1076 struct xfs_disk_dquot *ddqp; 1077 xfs_failaddr_t fa; 1078 int error; 1079 1080 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1081 ASSERT(!completion_done(&dqp->q_flush)); 1082 1083 trace_xfs_dqflush(dqp); 1084 1085 *bpp = NULL; 1086 1087 xfs_qm_dqunpin_wait(dqp); 1088 1089 /* 1090 * This may have been unpinned because the filesystem is shutting 1091 * down forcibly. If that's the case we must not write this dquot 1092 * to disk, because the log record didn't make it to disk. 1093 * 1094 * We also have to remove the log item from the AIL in this case, 1095 * as we wait for an emptry AIL as part of the unmount process. 1096 */ 1097 if (XFS_FORCED_SHUTDOWN(mp)) { 1098 struct xfs_log_item *lip = &dqp->q_logitem.qli_item; 1099 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1100 1101 xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE); 1102 1103 error = -EIO; 1104 goto out_unlock; 1105 } 1106 1107 /* 1108 * Get the buffer containing the on-disk dquot 1109 */ 1110 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 1111 mp->m_quotainfo->qi_dqchunklen, 0, &bp, 1112 &xfs_dquot_buf_ops); 1113 if (error) 1114 goto out_unlock; 1115 1116 /* 1117 * Calculate the location of the dquot inside the buffer. 1118 */ 1119 dqb = bp->b_addr + dqp->q_bufoffset; 1120 ddqp = &dqb->dd_diskdq; 1121 1122 /* 1123 * A simple sanity check in case we got a corrupted dquot. 1124 */ 1125 fa = xfs_dqblk_verify(mp, dqb, be32_to_cpu(ddqp->d_id), 0); 1126 if (fa) { 1127 xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", 1128 be32_to_cpu(ddqp->d_id), fa); 1129 xfs_buf_relse(bp); 1130 xfs_dqfunlock(dqp); 1131 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1132 return -EIO; 1133 } 1134 1135 /* This is the only portion of data that needs to persist */ 1136 memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); 1137 1138 /* 1139 * Clear the dirty field and remember the flush lsn for later use. 1140 */ 1141 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1142 1143 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, 1144 &dqp->q_logitem.qli_item.li_lsn); 1145 1146 /* 1147 * copy the lsn into the on-disk dquot now while we have the in memory 1148 * dquot here. This can't be done later in the write verifier as we 1149 * can't get access to the log item at that point in time. 1150 * 1151 * We also calculate the CRC here so that the on-disk dquot in the 1152 * buffer always has a valid CRC. This ensures there is no possibility 1153 * of a dquot without an up-to-date CRC getting to disk. 1154 */ 1155 if (xfs_sb_version_hascrc(&mp->m_sb)) { 1156 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); 1157 xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), 1158 XFS_DQUOT_CRC_OFF); 1159 } 1160 1161 /* 1162 * Attach an iodone routine so that we can remove this dquot from the 1163 * AIL and release the flush lock once the dquot is synced to disk. 1164 */ 1165 xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, 1166 &dqp->q_logitem.qli_item); 1167 1168 /* 1169 * If the buffer is pinned then push on the log so we won't 1170 * get stuck waiting in the write for too long. 1171 */ 1172 if (xfs_buf_ispinned(bp)) { 1173 trace_xfs_dqflush_force(dqp); 1174 xfs_log_force(mp, 0); 1175 } 1176 1177 trace_xfs_dqflush_done(dqp); 1178 *bpp = bp; 1179 return 0; 1180 1181 out_unlock: 1182 xfs_dqfunlock(dqp); 1183 return -EIO; 1184 } 1185 1186 /* 1187 * Lock two xfs_dquot structures. 1188 * 1189 * To avoid deadlocks we always lock the quota structure with 1190 * the lowerd id first. 1191 */ 1192 void 1193 xfs_dqlock2( 1194 xfs_dquot_t *d1, 1195 xfs_dquot_t *d2) 1196 { 1197 if (d1 && d2) { 1198 ASSERT(d1 != d2); 1199 if (be32_to_cpu(d1->q_core.d_id) > 1200 be32_to_cpu(d2->q_core.d_id)) { 1201 mutex_lock(&d2->q_qlock); 1202 mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); 1203 } else { 1204 mutex_lock(&d1->q_qlock); 1205 mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); 1206 } 1207 } else if (d1) { 1208 mutex_lock(&d1->q_qlock); 1209 } else if (d2) { 1210 mutex_lock(&d2->q_qlock); 1211 } 1212 } 1213 1214 int __init 1215 xfs_qm_init(void) 1216 { 1217 xfs_qm_dqzone = 1218 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot"); 1219 if (!xfs_qm_dqzone) 1220 goto out; 1221 1222 xfs_qm_dqtrxzone = 1223 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx"); 1224 if (!xfs_qm_dqtrxzone) 1225 goto out_free_dqzone; 1226 1227 return 0; 1228 1229 out_free_dqzone: 1230 kmem_zone_destroy(xfs_qm_dqzone); 1231 out: 1232 return -ENOMEM; 1233 } 1234 1235 void 1236 xfs_qm_exit(void) 1237 { 1238 kmem_zone_destroy(xfs_qm_dqtrxzone); 1239 kmem_zone_destroy(xfs_qm_dqzone); 1240 } 1241 1242 /* 1243 * Iterate every dquot of a particular type. The caller must ensure that the 1244 * particular quota type is active. iter_fn can return negative error codes, 1245 * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating. 1246 */ 1247 int 1248 xfs_qm_dqiterate( 1249 struct xfs_mount *mp, 1250 uint dqtype, 1251 xfs_qm_dqiterate_fn iter_fn, 1252 void *priv) 1253 { 1254 struct xfs_dquot *dq; 1255 xfs_dqid_t id = 0; 1256 int error; 1257 1258 do { 1259 error = xfs_qm_dqget_next(mp, id, dqtype, &dq); 1260 if (error == -ENOENT) 1261 return 0; 1262 if (error) 1263 return error; 1264 1265 error = iter_fn(dq, dqtype, priv); 1266 id = be32_to_cpu(dq->q_core.d_id); 1267 xfs_qm_dqput(dq); 1268 id++; 1269 } while (error == 0 && id != 0); 1270 1271 return error; 1272 } 1273