1 /* 2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_bit.h" 21 #include "xfs_log.h" 22 #include "xfs_inum.h" 23 #include "xfs_trans.h" 24 #include "xfs_sb.h" 25 #include "xfs_ag.h" 26 #include "xfs_alloc.h" 27 #include "xfs_quota.h" 28 #include "xfs_mount.h" 29 #include "xfs_bmap_btree.h" 30 #include "xfs_inode.h" 31 #include "xfs_bmap.h" 32 #include "xfs_rtalloc.h" 33 #include "xfs_error.h" 34 #include "xfs_itable.h" 35 #include "xfs_attr.h" 36 #include "xfs_buf_item.h" 37 #include "xfs_trans_space.h" 38 #include "xfs_trans_priv.h" 39 #include "xfs_qm.h" 40 #include "xfs_trace.h" 41 42 /* 43 * Lock order: 44 * 45 * ip->i_lock 46 * qh->qh_lock 47 * qi->qi_dqlist_lock 48 * dquot->q_qlock (xfs_dqlock() and friends) 49 * dquot->q_flush (xfs_dqflock() and friends) 50 * xfs_Gqm->qm_dqfrlist_lock 51 * 52 * If two dquots need to be locked the order is user before group/project, 53 * otherwise by the lowest id first, see xfs_dqlock2. 54 */ 55 56 #ifdef DEBUG 57 xfs_buftarg_t *xfs_dqerror_target; 58 int xfs_do_dqerror; 59 int xfs_dqreq_num; 60 int xfs_dqerror_mod = 33; 61 #endif 62 63 static struct lock_class_key xfs_dquot_other_class; 64 65 /* 66 * Allocate and initialize a dquot. We don't always allocate fresh memory; 67 * we try to reclaim a free dquot if the number of incore dquots are above 68 * a threshold. 69 * The only field inside the core that gets initialized at this point 70 * is the d_id field. The idea is to fill in the entire q_core 71 * when we read in the on disk dquot. 72 */ 73 STATIC xfs_dquot_t * 74 xfs_qm_dqinit( 75 xfs_mount_t *mp, 76 xfs_dqid_t id, 77 uint type) 78 { 79 xfs_dquot_t *dqp; 80 boolean_t brandnewdquot; 81 82 brandnewdquot = xfs_qm_dqalloc_incore(&dqp); 83 dqp->dq_flags = type; 84 dqp->q_core.d_id = cpu_to_be32(id); 85 dqp->q_mount = mp; 86 87 /* 88 * No need to re-initialize these if this is a reclaimed dquot. 89 */ 90 if (brandnewdquot) { 91 INIT_LIST_HEAD(&dqp->q_freelist); 92 mutex_init(&dqp->q_qlock); 93 init_waitqueue_head(&dqp->q_pinwait); 94 95 /* 96 * Because we want to use a counting completion, complete 97 * the flush completion once to allow a single access to 98 * the flush completion without blocking. 99 */ 100 init_completion(&dqp->q_flush); 101 complete(&dqp->q_flush); 102 103 trace_xfs_dqinit(dqp); 104 } else { 105 /* 106 * Only the q_core portion was zeroed in dqreclaim_one(). 107 * So, we need to reset others. 108 */ 109 dqp->q_nrefs = 0; 110 dqp->q_blkno = 0; 111 INIT_LIST_HEAD(&dqp->q_mplist); 112 INIT_LIST_HEAD(&dqp->q_hashlist); 113 dqp->q_bufoffset = 0; 114 dqp->q_fileoffset = 0; 115 dqp->q_transp = NULL; 116 dqp->q_gdquot = NULL; 117 dqp->q_res_bcount = 0; 118 dqp->q_res_icount = 0; 119 dqp->q_res_rtbcount = 0; 120 atomic_set(&dqp->q_pincount, 0); 121 dqp->q_hash = NULL; 122 ASSERT(list_empty(&dqp->q_freelist)); 123 124 trace_xfs_dqreuse(dqp); 125 } 126 127 /* 128 * In either case we need to make sure group quotas have a different 129 * lock class than user quotas, to make sure lockdep knows we can 130 * locks of one of each at the same time. 131 */ 132 if (!(type & XFS_DQ_USER)) 133 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); 134 135 /* 136 * log item gets initialized later 137 */ 138 return (dqp); 139 } 140 141 /* 142 * This is called to free all the memory associated with a dquot 143 */ 144 void 145 xfs_qm_dqdestroy( 146 xfs_dquot_t *dqp) 147 { 148 ASSERT(list_empty(&dqp->q_freelist)); 149 150 mutex_destroy(&dqp->q_qlock); 151 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 152 153 atomic_dec(&xfs_Gqm->qm_totaldquots); 154 } 155 156 /* 157 * If default limits are in force, push them into the dquot now. 158 * We overwrite the dquot limits only if they are zero and this 159 * is not the root dquot. 160 */ 161 void 162 xfs_qm_adjust_dqlimits( 163 xfs_mount_t *mp, 164 xfs_disk_dquot_t *d) 165 { 166 xfs_quotainfo_t *q = mp->m_quotainfo; 167 168 ASSERT(d->d_id); 169 170 if (q->qi_bsoftlimit && !d->d_blk_softlimit) 171 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); 172 if (q->qi_bhardlimit && !d->d_blk_hardlimit) 173 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); 174 if (q->qi_isoftlimit && !d->d_ino_softlimit) 175 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); 176 if (q->qi_ihardlimit && !d->d_ino_hardlimit) 177 d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); 178 if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) 179 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); 180 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) 181 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); 182 } 183 184 /* 185 * Check the limits and timers of a dquot and start or reset timers 186 * if necessary. 187 * This gets called even when quota enforcement is OFF, which makes our 188 * life a little less complicated. (We just don't reject any quota 189 * reservations in that case, when enforcement is off). 190 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when 191 * enforcement's off. 192 * In contrast, warnings are a little different in that they don't 193 * 'automatically' get started when limits get exceeded. They do 194 * get reset to zero, however, when we find the count to be under 195 * the soft limit (they are only ever set non-zero via userspace). 196 */ 197 void 198 xfs_qm_adjust_dqtimers( 199 xfs_mount_t *mp, 200 xfs_disk_dquot_t *d) 201 { 202 ASSERT(d->d_id); 203 204 #ifdef DEBUG 205 if (d->d_blk_hardlimit) 206 ASSERT(be64_to_cpu(d->d_blk_softlimit) <= 207 be64_to_cpu(d->d_blk_hardlimit)); 208 if (d->d_ino_hardlimit) 209 ASSERT(be64_to_cpu(d->d_ino_softlimit) <= 210 be64_to_cpu(d->d_ino_hardlimit)); 211 if (d->d_rtb_hardlimit) 212 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= 213 be64_to_cpu(d->d_rtb_hardlimit)); 214 #endif 215 216 if (!d->d_btimer) { 217 if ((d->d_blk_softlimit && 218 (be64_to_cpu(d->d_bcount) >= 219 be64_to_cpu(d->d_blk_softlimit))) || 220 (d->d_blk_hardlimit && 221 (be64_to_cpu(d->d_bcount) >= 222 be64_to_cpu(d->d_blk_hardlimit)))) { 223 d->d_btimer = cpu_to_be32(get_seconds() + 224 mp->m_quotainfo->qi_btimelimit); 225 } else { 226 d->d_bwarns = 0; 227 } 228 } else { 229 if ((!d->d_blk_softlimit || 230 (be64_to_cpu(d->d_bcount) < 231 be64_to_cpu(d->d_blk_softlimit))) && 232 (!d->d_blk_hardlimit || 233 (be64_to_cpu(d->d_bcount) < 234 be64_to_cpu(d->d_blk_hardlimit)))) { 235 d->d_btimer = 0; 236 } 237 } 238 239 if (!d->d_itimer) { 240 if ((d->d_ino_softlimit && 241 (be64_to_cpu(d->d_icount) >= 242 be64_to_cpu(d->d_ino_softlimit))) || 243 (d->d_ino_hardlimit && 244 (be64_to_cpu(d->d_icount) >= 245 be64_to_cpu(d->d_ino_hardlimit)))) { 246 d->d_itimer = cpu_to_be32(get_seconds() + 247 mp->m_quotainfo->qi_itimelimit); 248 } else { 249 d->d_iwarns = 0; 250 } 251 } else { 252 if ((!d->d_ino_softlimit || 253 (be64_to_cpu(d->d_icount) < 254 be64_to_cpu(d->d_ino_softlimit))) && 255 (!d->d_ino_hardlimit || 256 (be64_to_cpu(d->d_icount) < 257 be64_to_cpu(d->d_ino_hardlimit)))) { 258 d->d_itimer = 0; 259 } 260 } 261 262 if (!d->d_rtbtimer) { 263 if ((d->d_rtb_softlimit && 264 (be64_to_cpu(d->d_rtbcount) >= 265 be64_to_cpu(d->d_rtb_softlimit))) || 266 (d->d_rtb_hardlimit && 267 (be64_to_cpu(d->d_rtbcount) >= 268 be64_to_cpu(d->d_rtb_hardlimit)))) { 269 d->d_rtbtimer = cpu_to_be32(get_seconds() + 270 mp->m_quotainfo->qi_rtbtimelimit); 271 } else { 272 d->d_rtbwarns = 0; 273 } 274 } else { 275 if ((!d->d_rtb_softlimit || 276 (be64_to_cpu(d->d_rtbcount) < 277 be64_to_cpu(d->d_rtb_softlimit))) && 278 (!d->d_rtb_hardlimit || 279 (be64_to_cpu(d->d_rtbcount) < 280 be64_to_cpu(d->d_rtb_hardlimit)))) { 281 d->d_rtbtimer = 0; 282 } 283 } 284 } 285 286 /* 287 * initialize a buffer full of dquots and log the whole thing 288 */ 289 STATIC void 290 xfs_qm_init_dquot_blk( 291 xfs_trans_t *tp, 292 xfs_mount_t *mp, 293 xfs_dqid_t id, 294 uint type, 295 xfs_buf_t *bp) 296 { 297 struct xfs_quotainfo *q = mp->m_quotainfo; 298 xfs_dqblk_t *d; 299 int curid, i; 300 301 ASSERT(tp); 302 ASSERT(xfs_buf_islocked(bp)); 303 304 d = bp->b_addr; 305 306 /* 307 * ID of the first dquot in the block - id's are zero based. 308 */ 309 curid = id - (id % q->qi_dqperchunk); 310 ASSERT(curid >= 0); 311 memset(d, 0, BBTOB(q->qi_dqchunklen)); 312 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { 313 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 314 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 315 d->dd_diskdq.d_id = cpu_to_be32(curid); 316 d->dd_diskdq.d_flags = type; 317 } 318 319 xfs_trans_dquot_buf(tp, bp, 320 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : 321 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : 322 XFS_BLF_GDQUOT_BUF))); 323 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 324 } 325 326 327 328 /* 329 * Allocate a block and fill it with dquots. 330 * This is called when the bmapi finds a hole. 331 */ 332 STATIC int 333 xfs_qm_dqalloc( 334 xfs_trans_t **tpp, 335 xfs_mount_t *mp, 336 xfs_dquot_t *dqp, 337 xfs_inode_t *quotip, 338 xfs_fileoff_t offset_fsb, 339 xfs_buf_t **O_bpp) 340 { 341 xfs_fsblock_t firstblock; 342 xfs_bmap_free_t flist; 343 xfs_bmbt_irec_t map; 344 int nmaps, error, committed; 345 xfs_buf_t *bp; 346 xfs_trans_t *tp = *tpp; 347 348 ASSERT(tp != NULL); 349 350 trace_xfs_dqalloc(dqp); 351 352 /* 353 * Initialize the bmap freelist prior to calling bmapi code. 354 */ 355 xfs_bmap_init(&flist, &firstblock); 356 xfs_ilock(quotip, XFS_ILOCK_EXCL); 357 /* 358 * Return if this type of quotas is turned off while we didn't 359 * have an inode lock 360 */ 361 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 362 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 363 return (ESRCH); 364 } 365 366 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); 367 nmaps = 1; 368 error = xfs_bmapi_write(tp, quotip, offset_fsb, 369 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 370 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), 371 &map, &nmaps, &flist); 372 if (error) 373 goto error0; 374 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 375 ASSERT(nmaps == 1); 376 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 377 (map.br_startblock != HOLESTARTBLOCK)); 378 379 /* 380 * Keep track of the blkno to save a lookup later 381 */ 382 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 383 384 /* now we can just get the buffer (there's nothing to read yet) */ 385 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 386 dqp->q_blkno, 387 mp->m_quotainfo->qi_dqchunklen, 388 0); 389 390 error = xfs_buf_geterror(bp); 391 if (error) 392 goto error1; 393 394 /* 395 * Make a chunk of dquots out of this buffer and log 396 * the entire thing. 397 */ 398 xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), 399 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 400 401 /* 402 * xfs_bmap_finish() may commit the current transaction and 403 * start a second transaction if the freelist is not empty. 404 * 405 * Since we still want to modify this buffer, we need to 406 * ensure that the buffer is not released on commit of 407 * the first transaction and ensure the buffer is added to the 408 * second transaction. 409 * 410 * If there is only one transaction then don't stop the buffer 411 * from being released when it commits later on. 412 */ 413 414 xfs_trans_bhold(tp, bp); 415 416 if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { 417 goto error1; 418 } 419 420 if (committed) { 421 tp = *tpp; 422 xfs_trans_bjoin(tp, bp); 423 } else { 424 xfs_trans_bhold_release(tp, bp); 425 } 426 427 *O_bpp = bp; 428 return 0; 429 430 error1: 431 xfs_bmap_cancel(&flist); 432 error0: 433 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 434 435 return (error); 436 } 437 438 /* 439 * Maps a dquot to the buffer containing its on-disk version. 440 * This returns a ptr to the buffer containing the on-disk dquot 441 * in the bpp param, and a ptr to the on-disk dquot within that buffer 442 */ 443 STATIC int 444 xfs_qm_dqtobp( 445 xfs_trans_t **tpp, 446 xfs_dquot_t *dqp, 447 xfs_disk_dquot_t **O_ddpp, 448 xfs_buf_t **O_bpp, 449 uint flags) 450 { 451 xfs_bmbt_irec_t map; 452 int nmaps = 1, error; 453 xfs_buf_t *bp; 454 xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); 455 xfs_mount_t *mp = dqp->q_mount; 456 xfs_disk_dquot_t *ddq; 457 xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); 458 xfs_trans_t *tp = (tpp ? *tpp : NULL); 459 460 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 461 462 xfs_ilock(quotip, XFS_ILOCK_SHARED); 463 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 464 /* 465 * Return if this type of quotas is turned off while we 466 * didn't have the quota inode lock. 467 */ 468 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 469 return ESRCH; 470 } 471 472 /* 473 * Find the block map; no allocations yet 474 */ 475 error = xfs_bmapi_read(quotip, dqp->q_fileoffset, 476 XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); 477 478 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 479 if (error) 480 return error; 481 482 ASSERT(nmaps == 1); 483 ASSERT(map.br_blockcount == 1); 484 485 /* 486 * Offset of dquot in the (fixed sized) dquot chunk. 487 */ 488 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * 489 sizeof(xfs_dqblk_t); 490 491 ASSERT(map.br_startblock != DELAYSTARTBLOCK); 492 if (map.br_startblock == HOLESTARTBLOCK) { 493 /* 494 * We don't allocate unless we're asked to 495 */ 496 if (!(flags & XFS_QMOPT_DQALLOC)) 497 return ENOENT; 498 499 ASSERT(tp); 500 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, 501 dqp->q_fileoffset, &bp); 502 if (error) 503 return error; 504 tp = *tpp; 505 } else { 506 trace_xfs_dqtobp_read(dqp); 507 508 /* 509 * store the blkno etc so that we don't have to do the 510 * mapping all the time 511 */ 512 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 513 514 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 515 dqp->q_blkno, 516 mp->m_quotainfo->qi_dqchunklen, 517 0, &bp); 518 if (error || !bp) 519 return XFS_ERROR(error); 520 } 521 522 ASSERT(xfs_buf_islocked(bp)); 523 524 /* 525 * calculate the location of the dquot inside the buffer. 526 */ 527 ddq = bp->b_addr + dqp->q_bufoffset; 528 529 /* 530 * A simple sanity check in case we got a corrupted dquot... 531 */ 532 error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, 533 flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), 534 "dqtobp"); 535 if (error) { 536 if (!(flags & XFS_QMOPT_DQREPAIR)) { 537 xfs_trans_brelse(tp, bp); 538 return XFS_ERROR(EIO); 539 } 540 } 541 542 *O_bpp = bp; 543 *O_ddpp = ddq; 544 545 return (0); 546 } 547 548 549 /* 550 * Read in the ondisk dquot using dqtobp() then copy it to an incore version, 551 * and release the buffer immediately. 552 * 553 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed. 554 */ 555 int 556 xfs_qm_dqread( 557 struct xfs_mount *mp, 558 xfs_dqid_t id, 559 uint type, 560 uint flags, 561 struct xfs_dquot **O_dqpp) 562 { 563 struct xfs_dquot *dqp; 564 struct xfs_disk_dquot *ddqp; 565 struct xfs_buf *bp; 566 struct xfs_trans *tp = NULL; 567 int error; 568 int cancelflags = 0; 569 570 dqp = xfs_qm_dqinit(mp, id, type); 571 572 trace_xfs_dqread(dqp); 573 574 if (flags & XFS_QMOPT_DQALLOC) { 575 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 576 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), 577 XFS_WRITE_LOG_RES(mp) + 578 /* 579 * Round the chunklen up to the next multiple 580 * of 128 (buf log item chunk size)). 581 */ 582 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128, 583 0, 584 XFS_TRANS_PERM_LOG_RES, 585 XFS_WRITE_LOG_COUNT); 586 if (error) 587 goto error1; 588 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 589 } 590 591 /* 592 * get a pointer to the on-disk dquot and the buffer containing it 593 * dqp already knows its own type (GROUP/USER). 594 */ 595 error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); 596 if (error) { 597 /* 598 * This can happen if quotas got turned off (ESRCH), 599 * or if the dquot didn't exist on disk and we ask to 600 * allocate (ENOENT). 601 */ 602 trace_xfs_dqread_fail(dqp); 603 cancelflags |= XFS_TRANS_ABORT; 604 goto error1; 605 } 606 607 /* copy everything from disk dquot to the incore dquot */ 608 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 609 xfs_qm_dquot_logitem_init(dqp); 610 611 /* 612 * Reservation counters are defined as reservation plus current usage 613 * to avoid having to add every time. 614 */ 615 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); 616 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 617 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); 618 619 /* Mark the buf so that this will stay incore a little longer */ 620 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 621 622 /* 623 * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) 624 * So we need to release with xfs_trans_brelse(). 625 * The strategy here is identical to that of inodes; we lock 626 * the dquot in xfs_qm_dqget() before making it accessible to 627 * others. This is because dquots, like inodes, need a good level of 628 * concurrency, and we don't want to take locks on the entire buffers 629 * for dquot accesses. 630 * Note also that the dquot buffer may even be dirty at this point, if 631 * this particular dquot was repaired. We still aren't afraid to 632 * brelse it because we have the changes incore. 633 */ 634 ASSERT(xfs_buf_islocked(bp)); 635 xfs_trans_brelse(tp, bp); 636 637 if (tp) { 638 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 639 if (error) 640 goto error0; 641 } 642 643 *O_dqpp = dqp; 644 return error; 645 646 error1: 647 if (tp) 648 xfs_trans_cancel(tp, cancelflags); 649 error0: 650 xfs_qm_dqdestroy(dqp); 651 *O_dqpp = NULL; 652 return error; 653 } 654 655 /* 656 * Lookup a dquot in the incore dquot hashtable. We keep two separate 657 * hashtables for user and group dquots; and, these are global tables 658 * inside the XQM, not per-filesystem tables. 659 * The hash chain must be locked by caller, and it is left locked 660 * on return. Returning dquot is locked. 661 */ 662 STATIC int 663 xfs_qm_dqlookup( 664 xfs_mount_t *mp, 665 xfs_dqid_t id, 666 xfs_dqhash_t *qh, 667 xfs_dquot_t **O_dqpp) 668 { 669 xfs_dquot_t *dqp; 670 671 ASSERT(mutex_is_locked(&qh->qh_lock)); 672 673 /* 674 * Traverse the hashchain looking for a match 675 */ 676 list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { 677 /* 678 * We already have the hashlock. We don't need the 679 * dqlock to look at the id field of the dquot, since the 680 * id can't be modified without the hashlock anyway. 681 */ 682 if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp) 683 continue; 684 685 trace_xfs_dqlookup_found(dqp); 686 687 xfs_dqlock(dqp); 688 if (dqp->dq_flags & XFS_DQ_FREEING) { 689 *O_dqpp = NULL; 690 xfs_dqunlock(dqp); 691 return -1; 692 } 693 694 dqp->q_nrefs++; 695 696 /* 697 * move the dquot to the front of the hashchain 698 */ 699 list_move(&dqp->q_hashlist, &qh->qh_list); 700 trace_xfs_dqlookup_done(dqp); 701 *O_dqpp = dqp; 702 return 0; 703 } 704 705 *O_dqpp = NULL; 706 return 1; 707 } 708 709 /* 710 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a 711 * a locked dquot, doing an allocation (if requested) as needed. 712 * When both an inode and an id are given, the inode's id takes precedence. 713 * That is, if the id changes while we don't hold the ilock inside this 714 * function, the new dquot is returned, not necessarily the one requested 715 * in the id argument. 716 */ 717 int 718 xfs_qm_dqget( 719 xfs_mount_t *mp, 720 xfs_inode_t *ip, /* locked inode (optional) */ 721 xfs_dqid_t id, /* uid/projid/gid depending on type */ 722 uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ 723 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ 724 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ 725 { 726 xfs_dquot_t *dqp; 727 xfs_dqhash_t *h; 728 uint version; 729 int error; 730 731 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 732 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || 733 (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || 734 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { 735 return (ESRCH); 736 } 737 h = XFS_DQ_HASH(mp, id, type); 738 739 #ifdef DEBUG 740 if (xfs_do_dqerror) { 741 if ((xfs_dqerror_target == mp->m_ddev_targp) && 742 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { 743 xfs_debug(mp, "Returning error in dqget"); 744 return (EIO); 745 } 746 } 747 748 ASSERT(type == XFS_DQ_USER || 749 type == XFS_DQ_PROJ || 750 type == XFS_DQ_GROUP); 751 if (ip) { 752 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 753 if (type == XFS_DQ_USER) 754 ASSERT(ip->i_udquot == NULL); 755 else 756 ASSERT(ip->i_gdquot == NULL); 757 } 758 #endif 759 760 restart: 761 mutex_lock(&h->qh_lock); 762 763 /* 764 * Look in the cache (hashtable). 765 * The chain is kept locked during lookup. 766 */ 767 switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { 768 case -1: 769 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 770 mutex_unlock(&h->qh_lock); 771 delay(1); 772 goto restart; 773 case 0: 774 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); 775 /* 776 * The dquot was found, moved to the front of the chain, 777 * taken off the freelist if it was on it, and locked 778 * at this point. Just unlock the hashchain and return. 779 */ 780 ASSERT(*O_dqpp); 781 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 782 mutex_unlock(&h->qh_lock); 783 trace_xfs_dqget_hit(*O_dqpp); 784 return 0; /* success */ 785 default: 786 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); 787 break; 788 } 789 790 /* 791 * Dquot cache miss. We don't want to keep the inode lock across 792 * a (potential) disk read. Also we don't want to deal with the lock 793 * ordering between quotainode and this inode. OTOH, dropping the inode 794 * lock here means dealing with a chown that can happen before 795 * we re-acquire the lock. 796 */ 797 if (ip) 798 xfs_iunlock(ip, XFS_ILOCK_EXCL); 799 /* 800 * Save the hashchain version stamp, and unlock the chain, so that 801 * we don't keep the lock across a disk read 802 */ 803 version = h->qh_version; 804 mutex_unlock(&h->qh_lock); 805 806 error = xfs_qm_dqread(mp, id, type, flags, &dqp); 807 808 if (ip) 809 xfs_ilock(ip, XFS_ILOCK_EXCL); 810 811 if (error) 812 return error; 813 814 /* 815 * Dquot lock comes after hashlock in the lock ordering 816 */ 817 if (ip) { 818 /* 819 * A dquot could be attached to this inode by now, since 820 * we had dropped the ilock. 821 */ 822 if (type == XFS_DQ_USER) { 823 if (!XFS_IS_UQUOTA_ON(mp)) { 824 /* inode stays locked on return */ 825 xfs_qm_dqdestroy(dqp); 826 return XFS_ERROR(ESRCH); 827 } 828 if (ip->i_udquot) { 829 xfs_qm_dqdestroy(dqp); 830 dqp = ip->i_udquot; 831 xfs_dqlock(dqp); 832 goto dqret; 833 } 834 } else { 835 if (!XFS_IS_OQUOTA_ON(mp)) { 836 /* inode stays locked on return */ 837 xfs_qm_dqdestroy(dqp); 838 return XFS_ERROR(ESRCH); 839 } 840 if (ip->i_gdquot) { 841 xfs_qm_dqdestroy(dqp); 842 dqp = ip->i_gdquot; 843 xfs_dqlock(dqp); 844 goto dqret; 845 } 846 } 847 } 848 849 /* 850 * Hashlock comes after ilock in lock order 851 */ 852 mutex_lock(&h->qh_lock); 853 if (version != h->qh_version) { 854 xfs_dquot_t *tmpdqp; 855 /* 856 * Now, see if somebody else put the dquot in the 857 * hashtable before us. This can happen because we didn't 858 * keep the hashchain lock. We don't have to worry about 859 * lock order between the two dquots here since dqp isn't 860 * on any findable lists yet. 861 */ 862 switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { 863 case 0: 864 case -1: 865 /* 866 * Duplicate found, either in cache or on its way out. 867 * Just throw away the new dquot and start over. 868 */ 869 if (tmpdqp) 870 xfs_qm_dqput(tmpdqp); 871 mutex_unlock(&h->qh_lock); 872 xfs_qm_dqdestroy(dqp); 873 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 874 goto restart; 875 default: 876 break; 877 } 878 } 879 880 /* 881 * Put the dquot at the beginning of the hash-chain and mp's list 882 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. 883 */ 884 ASSERT(mutex_is_locked(&h->qh_lock)); 885 dqp->q_hash = h; 886 list_add(&dqp->q_hashlist, &h->qh_list); 887 h->qh_version++; 888 889 /* 890 * Attach this dquot to this filesystem's list of all dquots, 891 * kept inside the mount structure in m_quotainfo field 892 */ 893 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); 894 895 /* 896 * We return a locked dquot to the caller, with a reference taken 897 */ 898 xfs_dqlock(dqp); 899 dqp->q_nrefs = 1; 900 901 list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); 902 mp->m_quotainfo->qi_dquots++; 903 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 904 mutex_unlock(&h->qh_lock); 905 dqret: 906 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 907 trace_xfs_dqget_miss(dqp); 908 *O_dqpp = dqp; 909 return (0); 910 } 911 912 913 /* 914 * Release a reference to the dquot (decrement ref-count) 915 * and unlock it. If there is a group quota attached to this 916 * dquot, carefully release that too without tripping over 917 * deadlocks'n'stuff. 918 */ 919 void 920 xfs_qm_dqput( 921 struct xfs_dquot *dqp) 922 { 923 struct xfs_dquot *gdqp; 924 925 ASSERT(dqp->q_nrefs > 0); 926 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 927 928 trace_xfs_dqput(dqp); 929 930 recurse: 931 if (--dqp->q_nrefs > 0) { 932 xfs_dqunlock(dqp); 933 return; 934 } 935 936 trace_xfs_dqput_free(dqp); 937 938 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 939 if (list_empty(&dqp->q_freelist)) { 940 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); 941 xfs_Gqm->qm_dqfrlist_cnt++; 942 } 943 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 944 945 /* 946 * If we just added a udquot to the freelist, then we want to release 947 * the gdquot reference that it (probably) has. Otherwise it'll keep 948 * the gdquot from getting reclaimed. 949 */ 950 gdqp = dqp->q_gdquot; 951 if (gdqp) { 952 xfs_dqlock(gdqp); 953 dqp->q_gdquot = NULL; 954 } 955 xfs_dqunlock(dqp); 956 957 /* 958 * If we had a group quota hint, release it now. 959 */ 960 if (gdqp) { 961 dqp = gdqp; 962 goto recurse; 963 } 964 } 965 966 /* 967 * Release a dquot. Flush it if dirty, then dqput() it. 968 * dquot must not be locked. 969 */ 970 void 971 xfs_qm_dqrele( 972 xfs_dquot_t *dqp) 973 { 974 if (!dqp) 975 return; 976 977 trace_xfs_dqrele(dqp); 978 979 xfs_dqlock(dqp); 980 /* 981 * We don't care to flush it if the dquot is dirty here. 982 * That will create stutters that we want to avoid. 983 * Instead we do a delayed write when we try to reclaim 984 * a dirty dquot. Also xfs_sync will take part of the burden... 985 */ 986 xfs_qm_dqput(dqp); 987 } 988 989 /* 990 * This is the dquot flushing I/O completion routine. It is called 991 * from interrupt level when the buffer containing the dquot is 992 * flushed to disk. It is responsible for removing the dquot logitem 993 * from the AIL if it has not been re-logged, and unlocking the dquot's 994 * flush lock. This behavior is very similar to that of inodes.. 995 */ 996 STATIC void 997 xfs_qm_dqflush_done( 998 struct xfs_buf *bp, 999 struct xfs_log_item *lip) 1000 { 1001 xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; 1002 xfs_dquot_t *dqp = qip->qli_dquot; 1003 struct xfs_ail *ailp = lip->li_ailp; 1004 1005 /* 1006 * We only want to pull the item from the AIL if its 1007 * location in the log has not changed since we started the flush. 1008 * Thus, we only bother if the dquot's lsn has 1009 * not changed. First we check the lsn outside the lock 1010 * since it's cheaper, and then we recheck while 1011 * holding the lock before removing the dquot from the AIL. 1012 */ 1013 if ((lip->li_flags & XFS_LI_IN_AIL) && 1014 lip->li_lsn == qip->qli_flush_lsn) { 1015 1016 /* xfs_trans_ail_delete() drops the AIL lock. */ 1017 spin_lock(&ailp->xa_lock); 1018 if (lip->li_lsn == qip->qli_flush_lsn) 1019 xfs_trans_ail_delete(ailp, lip); 1020 else 1021 spin_unlock(&ailp->xa_lock); 1022 } 1023 1024 /* 1025 * Release the dq's flush lock since we're done with it. 1026 */ 1027 xfs_dqfunlock(dqp); 1028 } 1029 1030 /* 1031 * Write a modified dquot to disk. 1032 * The dquot must be locked and the flush lock too taken by caller. 1033 * The flush lock will not be unlocked until the dquot reaches the disk, 1034 * but the dquot is free to be unlocked and modified by the caller 1035 * in the interim. Dquot is still locked on return. This behavior is 1036 * identical to that of inodes. 1037 */ 1038 int 1039 xfs_qm_dqflush( 1040 xfs_dquot_t *dqp, 1041 uint flags) 1042 { 1043 struct xfs_mount *mp = dqp->q_mount; 1044 struct xfs_buf *bp; 1045 struct xfs_disk_dquot *ddqp; 1046 int error; 1047 1048 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1049 ASSERT(!completion_done(&dqp->q_flush)); 1050 1051 trace_xfs_dqflush(dqp); 1052 1053 /* 1054 * If not dirty, or it's pinned and we are not supposed to block, nada. 1055 */ 1056 if (!XFS_DQ_IS_DIRTY(dqp) || 1057 ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) { 1058 xfs_dqfunlock(dqp); 1059 return 0; 1060 } 1061 xfs_qm_dqunpin_wait(dqp); 1062 1063 /* 1064 * This may have been unpinned because the filesystem is shutting 1065 * down forcibly. If that's the case we must not write this dquot 1066 * to disk, because the log record didn't make it to disk! 1067 */ 1068 if (XFS_FORCED_SHUTDOWN(mp)) { 1069 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1070 xfs_dqfunlock(dqp); 1071 return XFS_ERROR(EIO); 1072 } 1073 1074 /* 1075 * Get the buffer containing the on-disk dquot 1076 */ 1077 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 1078 mp->m_quotainfo->qi_dqchunklen, 0, &bp); 1079 if (error) { 1080 ASSERT(error != ENOENT); 1081 xfs_dqfunlock(dqp); 1082 return error; 1083 } 1084 1085 /* 1086 * Calculate the location of the dquot inside the buffer. 1087 */ 1088 ddqp = bp->b_addr + dqp->q_bufoffset; 1089 1090 /* 1091 * A simple sanity check in case we got a corrupted dquot.. 1092 */ 1093 error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1094 XFS_QMOPT_DOWARN, "dqflush (incore copy)"); 1095 if (error) { 1096 xfs_buf_relse(bp); 1097 xfs_dqfunlock(dqp); 1098 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1099 return XFS_ERROR(EIO); 1100 } 1101 1102 /* This is the only portion of data that needs to persist */ 1103 memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); 1104 1105 /* 1106 * Clear the dirty field and remember the flush lsn for later use. 1107 */ 1108 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1109 1110 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, 1111 &dqp->q_logitem.qli_item.li_lsn); 1112 1113 /* 1114 * Attach an iodone routine so that we can remove this dquot from the 1115 * AIL and release the flush lock once the dquot is synced to disk. 1116 */ 1117 xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, 1118 &dqp->q_logitem.qli_item); 1119 1120 /* 1121 * If the buffer is pinned then push on the log so we won't 1122 * get stuck waiting in the write for too long. 1123 */ 1124 if (xfs_buf_ispinned(bp)) { 1125 trace_xfs_dqflush_force(dqp); 1126 xfs_log_force(mp, 0); 1127 } 1128 1129 if (flags & SYNC_WAIT) 1130 error = xfs_bwrite(bp); 1131 else 1132 xfs_buf_delwri_queue(bp); 1133 1134 xfs_buf_relse(bp); 1135 1136 trace_xfs_dqflush_done(dqp); 1137 1138 /* 1139 * dqp is still locked, but caller is free to unlock it now. 1140 */ 1141 return error; 1142 1143 } 1144 1145 void 1146 xfs_dqunlock( 1147 xfs_dquot_t *dqp) 1148 { 1149 xfs_dqunlock_nonotify(dqp); 1150 if (dqp->q_logitem.qli_dquot == dqp) { 1151 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, 1152 &dqp->q_logitem.qli_item); 1153 } 1154 } 1155 1156 /* 1157 * Lock two xfs_dquot structures. 1158 * 1159 * To avoid deadlocks we always lock the quota structure with 1160 * the lowerd id first. 1161 */ 1162 void 1163 xfs_dqlock2( 1164 xfs_dquot_t *d1, 1165 xfs_dquot_t *d2) 1166 { 1167 if (d1 && d2) { 1168 ASSERT(d1 != d2); 1169 if (be32_to_cpu(d1->q_core.d_id) > 1170 be32_to_cpu(d2->q_core.d_id)) { 1171 mutex_lock(&d2->q_qlock); 1172 mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); 1173 } else { 1174 mutex_lock(&d1->q_qlock); 1175 mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); 1176 } 1177 } else if (d1) { 1178 mutex_lock(&d1->q_qlock); 1179 } else if (d2) { 1180 mutex_lock(&d2->q_qlock); 1181 } 1182 } 1183 1184 /* 1185 * Take a dquot out of the mount's dqlist as well as the hashlist. This is 1186 * called via unmount as well as quotaoff, and the purge will always succeed. 1187 */ 1188 void 1189 xfs_qm_dqpurge( 1190 struct xfs_dquot *dqp) 1191 { 1192 struct xfs_mount *mp = dqp->q_mount; 1193 struct xfs_dqhash *qh = dqp->q_hash; 1194 1195 xfs_dqlock(dqp); 1196 1197 /* 1198 * If we're turning off quotas, we have to make sure that, for 1199 * example, we don't delete quota disk blocks while dquots are 1200 * in the process of getting written to those disk blocks. 1201 * This dquot might well be on AIL, and we can't leave it there 1202 * if we're turning off quotas. Basically, we need this flush 1203 * lock, and are willing to block on it. 1204 */ 1205 if (!xfs_dqflock_nowait(dqp)) { 1206 /* 1207 * Block on the flush lock after nudging dquot buffer, 1208 * if it is incore. 1209 */ 1210 xfs_dqflock_pushbuf_wait(dqp); 1211 } 1212 1213 /* 1214 * If we are turning this type of quotas off, we don't care 1215 * about the dirty metadata sitting in this dquot. OTOH, if 1216 * we're unmounting, we do care, so we flush it and wait. 1217 */ 1218 if (XFS_DQ_IS_DIRTY(dqp)) { 1219 int error; 1220 1221 /* 1222 * We don't care about getting disk errors here. We need 1223 * to purge this dquot anyway, so we go ahead regardless. 1224 */ 1225 error = xfs_qm_dqflush(dqp, SYNC_WAIT); 1226 if (error) 1227 xfs_warn(mp, "%s: dquot %p flush failed", 1228 __func__, dqp); 1229 xfs_dqflock(dqp); 1230 } 1231 1232 ASSERT(atomic_read(&dqp->q_pincount) == 0); 1233 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1234 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1235 1236 xfs_dqfunlock(dqp); 1237 xfs_dqunlock(dqp); 1238 1239 mutex_lock(&qh->qh_lock); 1240 list_del_init(&dqp->q_hashlist); 1241 qh->qh_version++; 1242 mutex_unlock(&qh->qh_lock); 1243 1244 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); 1245 list_del_init(&dqp->q_mplist); 1246 mp->m_quotainfo->qi_dqreclaims++; 1247 mp->m_quotainfo->qi_dquots--; 1248 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 1249 1250 /* 1251 * We move dquots to the freelist as soon as their reference count 1252 * hits zero, so it really should be on the freelist here. 1253 */ 1254 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1255 ASSERT(!list_empty(&dqp->q_freelist)); 1256 list_del_init(&dqp->q_freelist); 1257 xfs_Gqm->qm_dqfrlist_cnt--; 1258 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1259 1260 xfs_qm_dqdestroy(dqp); 1261 } 1262 1263 /* 1264 * Give the buffer a little push if it is incore and 1265 * wait on the flush lock. 1266 */ 1267 void 1268 xfs_dqflock_pushbuf_wait( 1269 xfs_dquot_t *dqp) 1270 { 1271 xfs_mount_t *mp = dqp->q_mount; 1272 xfs_buf_t *bp; 1273 1274 /* 1275 * Check to see if the dquot has been flushed delayed 1276 * write. If so, grab its buffer and send it 1277 * out immediately. We'll be able to acquire 1278 * the flush lock when the I/O completes. 1279 */ 1280 bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, 1281 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); 1282 if (!bp) 1283 goto out_lock; 1284 1285 if (XFS_BUF_ISDELAYWRITE(bp)) { 1286 if (xfs_buf_ispinned(bp)) 1287 xfs_log_force(mp, 0); 1288 xfs_buf_delwri_promote(bp); 1289 wake_up_process(bp->b_target->bt_task); 1290 } 1291 xfs_buf_relse(bp); 1292 out_lock: 1293 xfs_dqflock(dqp); 1294 } 1295