1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_trans.h" 15 #include "xfs_btree.h" 16 #include "xfs_rmap_btree.h" 17 #include "xfs_trace.h" 18 #include "xfs_rmap.h" 19 #include "xfs_alloc.h" 20 #include "xfs_bit.h" 21 #include <linux/fsmap.h> 22 #include "xfs_fsmap.h" 23 #include "xfs_refcount.h" 24 #include "xfs_refcount_btree.h" 25 #include "xfs_alloc_btree.h" 26 #include "xfs_rtbitmap.h" 27 #include "xfs_ag.h" 28 #include "xfs_rtgroup.h" 29 30 /* Convert an xfs_fsmap to an fsmap. */ 31 static void 32 xfs_fsmap_from_internal( 33 struct fsmap *dest, 34 struct xfs_fsmap *src) 35 { 36 dest->fmr_device = src->fmr_device; 37 dest->fmr_flags = src->fmr_flags; 38 dest->fmr_physical = BBTOB(src->fmr_physical); 39 dest->fmr_owner = src->fmr_owner; 40 dest->fmr_offset = BBTOB(src->fmr_offset); 41 dest->fmr_length = BBTOB(src->fmr_length); 42 dest->fmr_reserved[0] = 0; 43 dest->fmr_reserved[1] = 0; 44 dest->fmr_reserved[2] = 0; 45 } 46 47 /* Convert an fsmap to an xfs_fsmap. */ 48 static void 49 xfs_fsmap_to_internal( 50 struct xfs_fsmap *dest, 51 struct fsmap *src) 52 { 53 dest->fmr_device = src->fmr_device; 54 dest->fmr_flags = src->fmr_flags; 55 dest->fmr_physical = BTOBBT(src->fmr_physical); 56 dest->fmr_owner = src->fmr_owner; 57 dest->fmr_offset = BTOBBT(src->fmr_offset); 58 dest->fmr_length = BTOBBT(src->fmr_length); 59 } 60 61 /* Convert an fsmap owner into an rmapbt owner. */ 62 static int 63 xfs_fsmap_owner_to_rmap( 64 struct xfs_rmap_irec *dest, 65 const struct xfs_fsmap *src) 66 { 67 if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) { 68 dest->rm_owner = src->fmr_owner; 69 return 0; 70 } 71 72 switch (src->fmr_owner) { 73 case 0: /* "lowest owner id possible" */ 74 case -1ULL: /* "highest owner id possible" */ 75 dest->rm_owner = src->fmr_owner; 76 break; 77 case XFS_FMR_OWN_FREE: 78 dest->rm_owner = XFS_RMAP_OWN_NULL; 79 break; 80 case XFS_FMR_OWN_UNKNOWN: 81 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN; 82 break; 83 case XFS_FMR_OWN_FS: 84 dest->rm_owner = XFS_RMAP_OWN_FS; 85 break; 86 case XFS_FMR_OWN_LOG: 87 dest->rm_owner = XFS_RMAP_OWN_LOG; 88 break; 89 case XFS_FMR_OWN_AG: 90 dest->rm_owner = XFS_RMAP_OWN_AG; 91 break; 92 case XFS_FMR_OWN_INOBT: 93 dest->rm_owner = XFS_RMAP_OWN_INOBT; 94 break; 95 case XFS_FMR_OWN_INODES: 96 dest->rm_owner = XFS_RMAP_OWN_INODES; 97 break; 98 case XFS_FMR_OWN_REFC: 99 dest->rm_owner = XFS_RMAP_OWN_REFC; 100 break; 101 case XFS_FMR_OWN_COW: 102 dest->rm_owner = XFS_RMAP_OWN_COW; 103 break; 104 case XFS_FMR_OWN_DEFECTIVE: /* not implemented */ 105 /* fall through */ 106 default: 107 return -EINVAL; 108 } 109 return 0; 110 } 111 112 /* Convert an rmapbt owner into an fsmap owner. */ 113 static int 114 xfs_fsmap_owner_from_frec( 115 struct xfs_fsmap *dest, 116 const struct xfs_fsmap_irec *frec) 117 { 118 dest->fmr_flags = 0; 119 if (!XFS_RMAP_NON_INODE_OWNER(frec->owner)) { 120 dest->fmr_owner = frec->owner; 121 return 0; 122 } 123 dest->fmr_flags |= FMR_OF_SPECIAL_OWNER; 124 125 switch (frec->owner) { 126 case XFS_RMAP_OWN_FS: 127 dest->fmr_owner = XFS_FMR_OWN_FS; 128 break; 129 case XFS_RMAP_OWN_LOG: 130 dest->fmr_owner = XFS_FMR_OWN_LOG; 131 break; 132 case XFS_RMAP_OWN_AG: 133 dest->fmr_owner = XFS_FMR_OWN_AG; 134 break; 135 case XFS_RMAP_OWN_INOBT: 136 dest->fmr_owner = XFS_FMR_OWN_INOBT; 137 break; 138 case XFS_RMAP_OWN_INODES: 139 dest->fmr_owner = XFS_FMR_OWN_INODES; 140 break; 141 case XFS_RMAP_OWN_REFC: 142 dest->fmr_owner = XFS_FMR_OWN_REFC; 143 break; 144 case XFS_RMAP_OWN_COW: 145 dest->fmr_owner = XFS_FMR_OWN_COW; 146 break; 147 case XFS_RMAP_OWN_NULL: /* "free" */ 148 dest->fmr_owner = XFS_FMR_OWN_FREE; 149 break; 150 default: 151 ASSERT(0); 152 return -EFSCORRUPTED; 153 } 154 return 0; 155 } 156 157 /* getfsmap query state */ 158 struct xfs_getfsmap_info { 159 struct xfs_fsmap_head *head; 160 struct fsmap *fsmap_recs; /* mapping records */ 161 struct xfs_buf *agf_bp; /* AGF, for refcount queries */ 162 struct xfs_group *group; /* group info, if applicable */ 163 xfs_daddr_t next_daddr; /* next daddr we expect */ 164 /* daddr of low fsmap key when we're using the rtbitmap */ 165 xfs_daddr_t low_daddr; 166 /* daddr of high fsmap key, or the last daddr on the device */ 167 xfs_daddr_t end_daddr; 168 u64 missing_owner; /* owner of holes */ 169 u32 dev; /* device id */ 170 /* 171 * Low rmap key for the query. If low.rm_blockcount is nonzero, this 172 * is the second (or later) call to retrieve the recordset in pieces. 173 * xfs_getfsmap_rec_before_start will compare all records retrieved 174 * by the rmapbt query to filter out any records that start before 175 * the last record. 176 */ 177 struct xfs_rmap_irec low; 178 struct xfs_rmap_irec high; /* high rmap key */ 179 bool last; /* last extent? */ 180 }; 181 182 /* Associate a device with a getfsmap handler. */ 183 struct xfs_getfsmap_dev { 184 u32 dev; 185 int (*fn)(struct xfs_trans *tp, 186 const struct xfs_fsmap *keys, 187 struct xfs_getfsmap_info *info); 188 sector_t nr_sectors; 189 }; 190 191 /* Compare two getfsmap device handlers. */ 192 static int 193 xfs_getfsmap_dev_compare( 194 const void *p1, 195 const void *p2) 196 { 197 const struct xfs_getfsmap_dev *d1 = p1; 198 const struct xfs_getfsmap_dev *d2 = p2; 199 200 return d1->dev - d2->dev; 201 } 202 203 /* Decide if this mapping is shared. */ 204 STATIC int 205 xfs_getfsmap_is_shared( 206 struct xfs_trans *tp, 207 struct xfs_getfsmap_info *info, 208 const struct xfs_fsmap_irec *frec, 209 bool *stat) 210 { 211 struct xfs_mount *mp = tp->t_mountp; 212 struct xfs_btree_cur *cur; 213 xfs_agblock_t fbno; 214 xfs_extlen_t flen; 215 int error; 216 217 *stat = false; 218 if (!xfs_has_reflink(mp)) 219 return 0; 220 /* rt files will have no perag structure */ 221 if (!info->group) 222 return 0; 223 224 /* Are there any shared blocks here? */ 225 flen = 0; 226 cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, 227 to_perag(info->group)); 228 229 error = xfs_refcount_find_shared(cur, frec->rec_key, 230 XFS_BB_TO_FSBT(mp, frec->len_daddr), &fbno, &flen, 231 false); 232 233 xfs_btree_del_cursor(cur, error); 234 if (error) 235 return error; 236 237 *stat = flen > 0; 238 return 0; 239 } 240 241 static inline void 242 xfs_getfsmap_format( 243 struct xfs_mount *mp, 244 struct xfs_fsmap *xfm, 245 struct xfs_getfsmap_info *info) 246 { 247 struct fsmap *rec; 248 249 trace_xfs_getfsmap_mapping(mp, xfm); 250 251 rec = &info->fsmap_recs[info->head->fmh_entries++]; 252 xfs_fsmap_from_internal(rec, xfm); 253 } 254 255 static inline bool 256 xfs_getfsmap_frec_before_start( 257 struct xfs_getfsmap_info *info, 258 const struct xfs_fsmap_irec *frec) 259 { 260 if (info->low_daddr != XFS_BUF_DADDR_NULL) 261 return frec->start_daddr < info->low_daddr; 262 if (info->low.rm_blockcount) { 263 struct xfs_rmap_irec rec = { 264 .rm_startblock = frec->rec_key, 265 .rm_owner = frec->owner, 266 .rm_flags = frec->rm_flags, 267 }; 268 269 return xfs_rmap_compare(&rec, &info->low) < 0; 270 } 271 272 return false; 273 } 274 275 /* 276 * Format a reverse mapping for getfsmap, having translated rm_startblock 277 * into the appropriate daddr units. Pass in a nonzero @len_daddr if the 278 * length could be larger than rm_blockcount in struct xfs_rmap_irec. 279 */ 280 STATIC int 281 xfs_getfsmap_helper( 282 struct xfs_trans *tp, 283 struct xfs_getfsmap_info *info, 284 const struct xfs_fsmap_irec *frec) 285 { 286 struct xfs_fsmap fmr; 287 struct xfs_mount *mp = tp->t_mountp; 288 bool shared; 289 int error = 0; 290 291 if (fatal_signal_pending(current)) 292 return -EINTR; 293 294 /* 295 * Filter out records that start before our startpoint, if the 296 * caller requested that. 297 */ 298 if (xfs_getfsmap_frec_before_start(info, frec)) 299 goto out; 300 301 /* Are we just counting mappings? */ 302 if (info->head->fmh_count == 0) { 303 if (info->head->fmh_entries == UINT_MAX) 304 return -ECANCELED; 305 306 if (frec->start_daddr > info->next_daddr) 307 info->head->fmh_entries++; 308 309 if (info->last) 310 return 0; 311 312 info->head->fmh_entries++; 313 goto out; 314 } 315 316 /* 317 * If the record starts past the last physical block we saw, 318 * then we've found a gap. Report the gap as being owned by 319 * whatever the caller specified is the missing owner. 320 */ 321 if (frec->start_daddr > info->next_daddr) { 322 if (info->head->fmh_entries >= info->head->fmh_count) 323 return -ECANCELED; 324 325 fmr.fmr_device = info->dev; 326 fmr.fmr_physical = info->next_daddr; 327 fmr.fmr_owner = info->missing_owner; 328 fmr.fmr_offset = 0; 329 fmr.fmr_length = frec->start_daddr - info->next_daddr; 330 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; 331 xfs_getfsmap_format(mp, &fmr, info); 332 } 333 334 if (info->last) 335 goto out; 336 337 /* Fill out the extent we found */ 338 if (info->head->fmh_entries >= info->head->fmh_count) 339 return -ECANCELED; 340 341 trace_xfs_fsmap_mapping(mp, info->dev, 342 info->group ? info->group->xg_gno : NULLAGNUMBER, 343 frec); 344 345 fmr.fmr_device = info->dev; 346 fmr.fmr_physical = frec->start_daddr; 347 error = xfs_fsmap_owner_from_frec(&fmr, frec); 348 if (error) 349 return error; 350 fmr.fmr_offset = XFS_FSB_TO_BB(mp, frec->offset); 351 fmr.fmr_length = frec->len_daddr; 352 if (frec->rm_flags & XFS_RMAP_UNWRITTEN) 353 fmr.fmr_flags |= FMR_OF_PREALLOC; 354 if (frec->rm_flags & XFS_RMAP_ATTR_FORK) 355 fmr.fmr_flags |= FMR_OF_ATTR_FORK; 356 if (frec->rm_flags & XFS_RMAP_BMBT_BLOCK) 357 fmr.fmr_flags |= FMR_OF_EXTENT_MAP; 358 if (fmr.fmr_flags == 0) { 359 error = xfs_getfsmap_is_shared(tp, info, frec, &shared); 360 if (error) 361 return error; 362 if (shared) 363 fmr.fmr_flags |= FMR_OF_SHARED; 364 } 365 366 xfs_getfsmap_format(mp, &fmr, info); 367 out: 368 info->next_daddr = max(info->next_daddr, 369 frec->start_daddr + frec->len_daddr); 370 return 0; 371 } 372 373 static inline int 374 xfs_getfsmap_group_helper( 375 struct xfs_getfsmap_info *info, 376 struct xfs_trans *tp, 377 struct xfs_group *xg, 378 xfs_agblock_t startblock, 379 xfs_extlen_t blockcount, 380 struct xfs_fsmap_irec *frec) 381 { 382 /* 383 * For an info->last query, we're looking for a gap between the last 384 * mapping emitted and the high key specified by userspace. If the 385 * user's query spans less than 1 fsblock, then info->high and 386 * info->low will have the same rm_startblock, which causes rec_daddr 387 * and next_daddr to be the same. Therefore, use the end_daddr that 388 * we calculated from userspace's high key to synthesize the record. 389 * Note that if the btree query found a mapping, there won't be a gap. 390 */ 391 if (info->last) 392 frec->start_daddr = info->end_daddr + 1; 393 else 394 frec->start_daddr = xfs_gbno_to_daddr(xg, startblock); 395 396 frec->len_daddr = XFS_FSB_TO_BB(xg->xg_mount, blockcount); 397 return xfs_getfsmap_helper(tp, info, frec); 398 } 399 400 /* Transform a rmapbt irec into a fsmap */ 401 STATIC int 402 xfs_getfsmap_rmapbt_helper( 403 struct xfs_btree_cur *cur, 404 const struct xfs_rmap_irec *rec, 405 void *priv) 406 { 407 struct xfs_fsmap_irec frec = { 408 .owner = rec->rm_owner, 409 .offset = rec->rm_offset, 410 .rm_flags = rec->rm_flags, 411 .rec_key = rec->rm_startblock, 412 }; 413 struct xfs_getfsmap_info *info = priv; 414 415 return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group, 416 rec->rm_startblock, rec->rm_blockcount, &frec); 417 } 418 419 /* Transform a bnobt irec into a fsmap */ 420 STATIC int 421 xfs_getfsmap_datadev_bnobt_helper( 422 struct xfs_btree_cur *cur, 423 const struct xfs_alloc_rec_incore *rec, 424 void *priv) 425 { 426 struct xfs_fsmap_irec frec = { 427 .owner = XFS_RMAP_OWN_NULL, /* "free" */ 428 .rec_key = rec->ar_startblock, 429 }; 430 struct xfs_getfsmap_info *info = priv; 431 432 return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group, 433 rec->ar_startblock, rec->ar_blockcount, &frec); 434 } 435 436 /* Set rmap flags based on the getfsmap flags */ 437 static void 438 xfs_getfsmap_set_irec_flags( 439 struct xfs_rmap_irec *irec, 440 const struct xfs_fsmap *fmr) 441 { 442 irec->rm_flags = 0; 443 if (fmr->fmr_flags & FMR_OF_ATTR_FORK) 444 irec->rm_flags |= XFS_RMAP_ATTR_FORK; 445 if (fmr->fmr_flags & FMR_OF_EXTENT_MAP) 446 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK; 447 if (fmr->fmr_flags & FMR_OF_PREALLOC) 448 irec->rm_flags |= XFS_RMAP_UNWRITTEN; 449 } 450 451 static inline bool 452 rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) 453 { 454 if (!xfs_has_reflink(mp)) 455 return true; 456 if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner)) 457 return true; 458 if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | 459 XFS_RMAP_UNWRITTEN)) 460 return true; 461 return false; 462 } 463 464 /* Execute a getfsmap query against the regular data device. */ 465 STATIC int 466 __xfs_getfsmap_datadev( 467 struct xfs_trans *tp, 468 const struct xfs_fsmap *keys, 469 struct xfs_getfsmap_info *info, 470 int (*query_fn)(struct xfs_trans *, 471 struct xfs_getfsmap_info *, 472 struct xfs_btree_cur **, 473 void *), 474 void *priv) 475 { 476 struct xfs_mount *mp = tp->t_mountp; 477 struct xfs_perag *pag = NULL; 478 struct xfs_btree_cur *bt_cur = NULL; 479 xfs_fsblock_t start_fsb; 480 xfs_fsblock_t end_fsb; 481 xfs_agnumber_t start_ag, end_ag; 482 uint64_t eofs; 483 int error = 0; 484 485 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 486 if (keys[0].fmr_physical >= eofs) 487 return 0; 488 start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); 489 end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 490 491 /* 492 * Convert the fsmap low/high keys to AG based keys. Initialize 493 * low to the fsmap low key and max out the high key to the end 494 * of the AG. 495 */ 496 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); 497 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); 498 if (error) 499 return error; 500 info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); 501 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); 502 503 /* Adjust the low key if we are continuing from where we left off. */ 504 if (info->low.rm_blockcount == 0) { 505 /* No previous record from which to continue */ 506 } else if (rmap_not_shareable(mp, &info->low)) { 507 /* Last record seen was an unshareable extent */ 508 info->low.rm_owner = 0; 509 info->low.rm_offset = 0; 510 511 start_fsb += info->low.rm_blockcount; 512 if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) 513 return 0; 514 } else { 515 /* Last record seen was a shareable file data extent */ 516 info->low.rm_offset += info->low.rm_blockcount; 517 } 518 info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); 519 520 info->high.rm_startblock = -1U; 521 info->high.rm_owner = ULLONG_MAX; 522 info->high.rm_offset = ULLONG_MAX; 523 info->high.rm_blockcount = 0; 524 info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; 525 526 start_ag = XFS_FSB_TO_AGNO(mp, start_fsb); 527 end_ag = XFS_FSB_TO_AGNO(mp, end_fsb); 528 529 while ((pag = xfs_perag_next_range(mp, pag, start_ag, end_ag))) { 530 /* 531 * Set the AG high key from the fsmap high key if this 532 * is the last AG that we're querying. 533 */ 534 info->group = pag_group(pag); 535 if (pag_agno(pag) == end_ag) { 536 info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp, 537 end_fsb); 538 info->high.rm_offset = XFS_BB_TO_FSBT(mp, 539 keys[1].fmr_offset); 540 error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); 541 if (error) 542 break; 543 xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); 544 } 545 546 if (bt_cur) { 547 xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); 548 bt_cur = NULL; 549 xfs_trans_brelse(tp, info->agf_bp); 550 info->agf_bp = NULL; 551 } 552 553 error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp); 554 if (error) 555 break; 556 557 trace_xfs_fsmap_low_group_key(mp, info->dev, pag_agno(pag), 558 &info->low); 559 trace_xfs_fsmap_high_group_key(mp, info->dev, pag_agno(pag), 560 &info->high); 561 562 error = query_fn(tp, info, &bt_cur, priv); 563 if (error) 564 break; 565 566 /* 567 * Set the AG low key to the start of the AG prior to 568 * moving on to the next AG. 569 */ 570 if (pag_agno(pag) == start_ag) 571 memset(&info->low, 0, sizeof(info->low)); 572 573 /* 574 * If this is the last AG, report any gap at the end of it 575 * before we drop the reference to the perag when the loop 576 * terminates. 577 */ 578 if (pag_agno(pag) == end_ag) { 579 info->last = true; 580 error = query_fn(tp, info, &bt_cur, priv); 581 if (error) 582 break; 583 } 584 info->group = NULL; 585 } 586 587 if (bt_cur) 588 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : 589 XFS_BTREE_NOERROR); 590 if (info->agf_bp) { 591 xfs_trans_brelse(tp, info->agf_bp); 592 info->agf_bp = NULL; 593 } 594 if (info->group) { 595 xfs_perag_rele(pag); 596 info->group = NULL; 597 } else if (pag) { 598 /* loop termination case */ 599 xfs_perag_rele(pag); 600 } 601 602 return error; 603 } 604 605 /* Actually query the rmap btree. */ 606 STATIC int 607 xfs_getfsmap_datadev_rmapbt_query( 608 struct xfs_trans *tp, 609 struct xfs_getfsmap_info *info, 610 struct xfs_btree_cur **curpp, 611 void *priv) 612 { 613 /* Report any gap at the end of the last AG. */ 614 if (info->last) 615 return xfs_getfsmap_rmapbt_helper(*curpp, &info->high, info); 616 617 /* Allocate cursor for this AG and query_range it. */ 618 *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp, 619 to_perag(info->group)); 620 return xfs_rmap_query_range(*curpp, &info->low, &info->high, 621 xfs_getfsmap_rmapbt_helper, info); 622 } 623 624 /* Execute a getfsmap query against the regular data device rmapbt. */ 625 STATIC int 626 xfs_getfsmap_datadev_rmapbt( 627 struct xfs_trans *tp, 628 const struct xfs_fsmap *keys, 629 struct xfs_getfsmap_info *info) 630 { 631 info->missing_owner = XFS_FMR_OWN_FREE; 632 return __xfs_getfsmap_datadev(tp, keys, info, 633 xfs_getfsmap_datadev_rmapbt_query, NULL); 634 } 635 636 /* Actually query the bno btree. */ 637 STATIC int 638 xfs_getfsmap_datadev_bnobt_query( 639 struct xfs_trans *tp, 640 struct xfs_getfsmap_info *info, 641 struct xfs_btree_cur **curpp, 642 void *priv) 643 { 644 struct xfs_alloc_rec_incore *key = priv; 645 646 /* Report any gap at the end of the last AG. */ 647 if (info->last) 648 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info); 649 650 /* Allocate cursor for this AG and query_range it. */ 651 *curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp, 652 to_perag(info->group)); 653 key->ar_startblock = info->low.rm_startblock; 654 key[1].ar_startblock = info->high.rm_startblock; 655 return xfs_alloc_query_range(*curpp, key, &key[1], 656 xfs_getfsmap_datadev_bnobt_helper, info); 657 } 658 659 /* Execute a getfsmap query against the regular data device's bnobt. */ 660 STATIC int 661 xfs_getfsmap_datadev_bnobt( 662 struct xfs_trans *tp, 663 const struct xfs_fsmap *keys, 664 struct xfs_getfsmap_info *info) 665 { 666 struct xfs_alloc_rec_incore akeys[2]; 667 668 memset(akeys, 0, sizeof(akeys)); 669 info->missing_owner = XFS_FMR_OWN_UNKNOWN; 670 return __xfs_getfsmap_datadev(tp, keys, info, 671 xfs_getfsmap_datadev_bnobt_query, &akeys[0]); 672 } 673 674 /* Execute a getfsmap query against the log device. */ 675 STATIC int 676 xfs_getfsmap_logdev( 677 struct xfs_trans *tp, 678 const struct xfs_fsmap *keys, 679 struct xfs_getfsmap_info *info) 680 { 681 struct xfs_fsmap_irec frec = { 682 .start_daddr = 0, 683 .rec_key = 0, 684 .owner = XFS_RMAP_OWN_LOG, 685 }; 686 struct xfs_mount *mp = tp->t_mountp; 687 xfs_fsblock_t start_fsb, end_fsb; 688 uint64_t eofs; 689 690 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 691 if (keys[0].fmr_physical >= eofs) 692 return 0; 693 start_fsb = XFS_BB_TO_FSBT(mp, 694 keys[0].fmr_physical + keys[0].fmr_length); 695 end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 696 697 /* Adjust the low key if we are continuing from where we left off. */ 698 if (keys[0].fmr_length > 0) 699 info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); 700 701 trace_xfs_fsmap_low_linear_key(mp, info->dev, start_fsb); 702 trace_xfs_fsmap_high_linear_key(mp, info->dev, end_fsb); 703 704 if (start_fsb > 0) 705 return 0; 706 707 /* Fabricate an rmap entry for the external log device. */ 708 frec.len_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 709 return xfs_getfsmap_helper(tp, info, &frec); 710 } 711 712 #ifdef CONFIG_XFS_RT 713 /* Transform a rtbitmap "record" into a fsmap */ 714 STATIC int 715 xfs_getfsmap_rtdev_rtbitmap_helper( 716 struct xfs_rtgroup *rtg, 717 struct xfs_trans *tp, 718 const struct xfs_rtalloc_rec *rec, 719 void *priv) 720 { 721 struct xfs_fsmap_irec frec = { 722 .owner = XFS_RMAP_OWN_NULL, /* "free" */ 723 }; 724 struct xfs_mount *mp = rtg_mount(rtg); 725 struct xfs_getfsmap_info *info = priv; 726 xfs_rtblock_t start_rtb = 727 xfs_rtx_to_rtb(rtg, rec->ar_startext); 728 uint64_t rtbcount = 729 xfs_rtbxlen_to_blen(mp, rec->ar_extcount); 730 731 /* 732 * For an info->last query, we're looking for a gap between the last 733 * mapping emitted and the high key specified by userspace. If the 734 * user's query spans less than 1 fsblock, then info->high and 735 * info->low will have the same rm_startblock, which causes rec_daddr 736 * and next_daddr to be the same. Therefore, use the end_daddr that 737 * we calculated from userspace's high key to synthesize the record. 738 * Note that if the btree query found a mapping, there won't be a gap. 739 */ 740 if (info->last) 741 frec.start_daddr = info->end_daddr + 1; 742 else 743 frec.start_daddr = xfs_rtb_to_daddr(mp, start_rtb); 744 745 frec.len_daddr = XFS_FSB_TO_BB(mp, rtbcount); 746 return xfs_getfsmap_helper(tp, info, &frec); 747 } 748 749 /* Execute a getfsmap query against the realtime device rtbitmap. */ 750 STATIC int 751 xfs_getfsmap_rtdev_rtbitmap( 752 struct xfs_trans *tp, 753 const struct xfs_fsmap *keys, 754 struct xfs_getfsmap_info *info) 755 { 756 struct xfs_mount *mp = tp->t_mountp; 757 xfs_rtblock_t start_rtbno, end_rtbno; 758 xfs_rtxnum_t start_rtx, end_rtx; 759 xfs_rgnumber_t start_rgno, end_rgno; 760 struct xfs_rtgroup *rtg = NULL; 761 uint64_t eofs; 762 int error; 763 764 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 765 if (keys[0].fmr_physical >= eofs) 766 return 0; 767 768 info->missing_owner = XFS_FMR_OWN_UNKNOWN; 769 770 /* Adjust the low key if we are continuing from where we left off. */ 771 start_rtbno = xfs_daddr_to_rtb(mp, 772 keys[0].fmr_physical + keys[0].fmr_length); 773 if (keys[0].fmr_length > 0) { 774 info->low_daddr = xfs_rtb_to_daddr(mp, start_rtbno); 775 if (info->low_daddr >= eofs) 776 return 0; 777 } 778 start_rtx = xfs_rtb_to_rtx(mp, start_rtbno); 779 start_rgno = xfs_rtb_to_rgno(mp, start_rtbno); 780 781 end_rtbno = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical)); 782 end_rgno = xfs_rtb_to_rgno(mp, end_rtbno); 783 784 trace_xfs_fsmap_low_linear_key(mp, info->dev, start_rtbno); 785 trace_xfs_fsmap_high_linear_key(mp, info->dev, end_rtbno); 786 787 end_rtx = -1ULL; 788 789 while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) { 790 if (rtg_rgno(rtg) == end_rgno) 791 end_rtx = xfs_rtb_to_rtx(mp, 792 end_rtbno + mp->m_sb.sb_rextsize - 1); 793 794 info->group = rtg_group(rtg); 795 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED); 796 error = xfs_rtalloc_query_range(rtg, tp, start_rtx, end_rtx, 797 xfs_getfsmap_rtdev_rtbitmap_helper, info); 798 if (error) 799 break; 800 801 /* 802 * Report any gaps at the end of the rtbitmap by simulating a 803 * zero-length free extent starting at the rtx after the end 804 * of the query range. 805 */ 806 if (rtg_rgno(rtg) == end_rgno) { 807 struct xfs_rtalloc_rec ahigh = { 808 .ar_startext = min(end_rtx + 1, 809 rtg->rtg_extents), 810 }; 811 812 info->last = true; 813 error = xfs_getfsmap_rtdev_rtbitmap_helper(rtg, tp, 814 &ahigh, info); 815 if (error) 816 break; 817 } 818 819 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); 820 info->group = NULL; 821 start_rtx = 0; 822 } 823 824 /* loop termination case */ 825 if (rtg) { 826 if (info->group) { 827 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); 828 info->group = NULL; 829 } 830 xfs_rtgroup_rele(rtg); 831 } 832 833 return error; 834 } 835 #endif /* CONFIG_XFS_RT */ 836 837 /* Do we recognize the device? */ 838 STATIC bool 839 xfs_getfsmap_is_valid_device( 840 struct xfs_mount *mp, 841 struct xfs_fsmap *fm) 842 { 843 if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || 844 fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev)) 845 return true; 846 if (mp->m_logdev_targp && 847 fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev)) 848 return true; 849 if (mp->m_rtdev_targp && 850 fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev)) 851 return true; 852 return false; 853 } 854 855 /* Ensure that the low key is less than the high key. */ 856 STATIC bool 857 xfs_getfsmap_check_keys( 858 struct xfs_fsmap *low_key, 859 struct xfs_fsmap *high_key) 860 { 861 if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { 862 if (low_key->fmr_offset) 863 return false; 864 } 865 if (high_key->fmr_flags != -1U && 866 (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | 867 FMR_OF_EXTENT_MAP))) { 868 if (high_key->fmr_offset && high_key->fmr_offset != -1ULL) 869 return false; 870 } 871 if (high_key->fmr_length && high_key->fmr_length != -1ULL) 872 return false; 873 874 if (low_key->fmr_device > high_key->fmr_device) 875 return false; 876 if (low_key->fmr_device < high_key->fmr_device) 877 return true; 878 879 if (low_key->fmr_physical > high_key->fmr_physical) 880 return false; 881 if (low_key->fmr_physical < high_key->fmr_physical) 882 return true; 883 884 if (low_key->fmr_owner > high_key->fmr_owner) 885 return false; 886 if (low_key->fmr_owner < high_key->fmr_owner) 887 return true; 888 889 if (low_key->fmr_offset > high_key->fmr_offset) 890 return false; 891 if (low_key->fmr_offset < high_key->fmr_offset) 892 return true; 893 894 return false; 895 } 896 897 /* 898 * There are only two devices if we didn't configure RT devices at build time. 899 */ 900 #ifdef CONFIG_XFS_RT 901 #define XFS_GETFSMAP_DEVS 3 902 #else 903 #define XFS_GETFSMAP_DEVS 2 904 #endif /* CONFIG_XFS_RT */ 905 906 /* 907 * Get filesystem's extents as described in head, and format for output. Fills 908 * in the supplied records array until there are no more reverse mappings to 909 * return or head.fmh_entries == head.fmh_count. In the second case, this 910 * function returns -ECANCELED to indicate that more records would have been 911 * returned. 912 * 913 * Key to Confusion 914 * ---------------- 915 * There are multiple levels of keys and counters at work here: 916 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; 917 * these reflect fs-wide sector addrs. 918 * dkeys -- fmh_keys used to query each device; 919 * these are fmh_keys but w/ the low key 920 * bumped up by fmr_length. 921 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this 922 * is how we detect gaps in the fsmap 923 records and report them. 924 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from 925 * dkeys; used to query the metadata. 926 */ 927 STATIC int 928 xfs_getfsmap( 929 struct xfs_mount *mp, 930 struct xfs_fsmap_head *head, 931 struct fsmap *fsmap_recs) 932 { 933 struct xfs_trans *tp = NULL; 934 struct xfs_fsmap dkeys[2]; /* per-dev keys */ 935 struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; 936 struct xfs_getfsmap_info info = { 937 .fsmap_recs = fsmap_recs, 938 .head = head, 939 }; 940 bool use_rmap; 941 int i; 942 int error = 0; 943 944 if (head->fmh_iflags & ~FMH_IF_VALID) 945 return -EINVAL; 946 if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || 947 !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) 948 return -EINVAL; 949 if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1])) 950 return -EINVAL; 951 952 use_rmap = xfs_has_rmapbt(mp) && 953 has_capability_noaudit(current, CAP_SYS_ADMIN); 954 head->fmh_entries = 0; 955 956 /* Set up our device handlers. */ 957 memset(handlers, 0, sizeof(handlers)); 958 handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 959 handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); 960 if (use_rmap) 961 handlers[0].fn = xfs_getfsmap_datadev_rmapbt; 962 else 963 handlers[0].fn = xfs_getfsmap_datadev_bnobt; 964 if (mp->m_logdev_targp != mp->m_ddev_targp) { 965 handlers[1].nr_sectors = XFS_FSB_TO_BB(mp, 966 mp->m_sb.sb_logblocks); 967 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); 968 handlers[1].fn = xfs_getfsmap_logdev; 969 } 970 #ifdef CONFIG_XFS_RT 971 if (mp->m_rtdev_targp) { 972 handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 973 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); 974 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; 975 } 976 #endif /* CONFIG_XFS_RT */ 977 978 xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), 979 xfs_getfsmap_dev_compare); 980 981 /* 982 * To continue where we left off, we allow userspace to use the 983 * last mapping from a previous call as the low key of the next. 984 * This is identified by a non-zero length in the low key. We 985 * have to increment the low key in this scenario to ensure we 986 * don't return the same mapping again, and instead return the 987 * very next mapping. 988 * 989 * If the low key mapping refers to file data, the same physical 990 * blocks could be mapped to several other files/offsets. 991 * According to rmapbt record ordering, the minimal next 992 * possible record for the block range is the next starting 993 * offset in the same inode. Therefore, each fsmap backend bumps 994 * the file offset to continue the search appropriately. For 995 * all other low key mapping types (attr blocks, metadata), each 996 * fsmap backend bumps the physical offset as there can be no 997 * other mapping for the same physical block range. 998 */ 999 dkeys[0] = head->fmh_keys[0]; 1000 memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); 1001 1002 info.next_daddr = head->fmh_keys[0].fmr_physical + 1003 head->fmh_keys[0].fmr_length; 1004 1005 /* For each device we support... */ 1006 for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { 1007 /* Is this device within the range the user asked for? */ 1008 if (!handlers[i].fn) 1009 continue; 1010 if (head->fmh_keys[0].fmr_device > handlers[i].dev) 1011 continue; 1012 if (head->fmh_keys[1].fmr_device < handlers[i].dev) 1013 break; 1014 1015 /* 1016 * If this device number matches the high key, we have to pass 1017 * the high key to the handler to limit the query results, and 1018 * set the end_daddr so that we can synthesize records at the 1019 * end of the query range or device. 1020 */ 1021 if (handlers[i].dev == head->fmh_keys[1].fmr_device) { 1022 dkeys[1] = head->fmh_keys[1]; 1023 info.end_daddr = min(handlers[i].nr_sectors - 1, 1024 dkeys[1].fmr_physical); 1025 } else { 1026 info.end_daddr = handlers[i].nr_sectors - 1; 1027 } 1028 1029 /* 1030 * If the device number exceeds the low key, zero out the low 1031 * key so that we get everything from the beginning. 1032 */ 1033 if (handlers[i].dev > head->fmh_keys[0].fmr_device) 1034 memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); 1035 1036 /* 1037 * Grab an empty transaction so that we can use its recursive 1038 * buffer locking abilities to detect cycles in the rmapbt 1039 * without deadlocking. 1040 */ 1041 error = xfs_trans_alloc_empty(mp, &tp); 1042 if (error) 1043 break; 1044 1045 info.dev = handlers[i].dev; 1046 info.last = false; 1047 info.group = NULL; 1048 info.low_daddr = XFS_BUF_DADDR_NULL; 1049 info.low.rm_blockcount = 0; 1050 error = handlers[i].fn(tp, dkeys, &info); 1051 if (error) 1052 break; 1053 xfs_trans_cancel(tp); 1054 tp = NULL; 1055 info.next_daddr = 0; 1056 } 1057 1058 if (tp) 1059 xfs_trans_cancel(tp); 1060 head->fmh_oflags = FMH_OF_DEV_T; 1061 return error; 1062 } 1063 1064 int 1065 xfs_ioc_getfsmap( 1066 struct xfs_inode *ip, 1067 struct fsmap_head __user *arg) 1068 { 1069 struct xfs_fsmap_head xhead = {0}; 1070 struct fsmap_head head; 1071 struct fsmap *recs; 1072 unsigned int count; 1073 __u32 last_flags = 0; 1074 bool done = false; 1075 int error; 1076 1077 if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) 1078 return -EFAULT; 1079 if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || 1080 memchr_inv(head.fmh_keys[0].fmr_reserved, 0, 1081 sizeof(head.fmh_keys[0].fmr_reserved)) || 1082 memchr_inv(head.fmh_keys[1].fmr_reserved, 0, 1083 sizeof(head.fmh_keys[1].fmr_reserved))) 1084 return -EINVAL; 1085 1086 /* 1087 * Use an internal memory buffer so that we don't have to copy fsmap 1088 * data to userspace while holding locks. Start by trying to allocate 1089 * up to 128k for the buffer, but fall back to a single page if needed. 1090 */ 1091 count = min_t(unsigned int, head.fmh_count, 1092 131072 / sizeof(struct fsmap)); 1093 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); 1094 if (!recs) { 1095 count = min_t(unsigned int, head.fmh_count, 1096 PAGE_SIZE / sizeof(struct fsmap)); 1097 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); 1098 if (!recs) 1099 return -ENOMEM; 1100 } 1101 1102 xhead.fmh_iflags = head.fmh_iflags; 1103 xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); 1104 xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); 1105 1106 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); 1107 trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); 1108 1109 head.fmh_entries = 0; 1110 do { 1111 struct fsmap __user *user_recs; 1112 struct fsmap *last_rec; 1113 1114 user_recs = &arg->fmh_recs[head.fmh_entries]; 1115 xhead.fmh_entries = 0; 1116 xhead.fmh_count = min_t(unsigned int, count, 1117 head.fmh_count - head.fmh_entries); 1118 1119 /* Run query, record how many entries we got. */ 1120 error = xfs_getfsmap(ip->i_mount, &xhead, recs); 1121 switch (error) { 1122 case 0: 1123 /* 1124 * There are no more records in the result set. Copy 1125 * whatever we got to userspace and break out. 1126 */ 1127 done = true; 1128 break; 1129 case -ECANCELED: 1130 /* 1131 * The internal memory buffer is full. Copy whatever 1132 * records we got to userspace and go again if we have 1133 * not yet filled the userspace buffer. 1134 */ 1135 error = 0; 1136 break; 1137 default: 1138 goto out_free; 1139 } 1140 head.fmh_entries += xhead.fmh_entries; 1141 head.fmh_oflags = xhead.fmh_oflags; 1142 1143 /* 1144 * If the caller wanted a record count or there aren't any 1145 * new records to return, we're done. 1146 */ 1147 if (head.fmh_count == 0 || xhead.fmh_entries == 0) 1148 break; 1149 1150 /* Copy all the records we got out to userspace. */ 1151 if (copy_to_user(user_recs, recs, 1152 xhead.fmh_entries * sizeof(struct fsmap))) { 1153 error = -EFAULT; 1154 goto out_free; 1155 } 1156 1157 /* Remember the last record flags we copied to userspace. */ 1158 last_rec = &recs[xhead.fmh_entries - 1]; 1159 last_flags = last_rec->fmr_flags; 1160 1161 /* Set up the low key for the next iteration. */ 1162 xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); 1163 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); 1164 } while (!done && head.fmh_entries < head.fmh_count); 1165 1166 /* 1167 * If there are no more records in the query result set and we're not 1168 * in counting mode, mark the last record returned with the LAST flag. 1169 */ 1170 if (done && head.fmh_count > 0 && head.fmh_entries > 0) { 1171 struct fsmap __user *user_rec; 1172 1173 last_flags |= FMR_OF_LAST; 1174 user_rec = &arg->fmh_recs[head.fmh_entries - 1]; 1175 1176 if (copy_to_user(&user_rec->fmr_flags, &last_flags, 1177 sizeof(last_flags))) { 1178 error = -EFAULT; 1179 goto out_free; 1180 } 1181 } 1182 1183 /* copy back header */ 1184 if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { 1185 error = -EFAULT; 1186 goto out_free; 1187 } 1188 1189 out_free: 1190 kvfree(recs); 1191 return error; 1192 } 1193