1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_trans.h" 15 #include "xfs_btree.h" 16 #include "xfs_rmap_btree.h" 17 #include "xfs_trace.h" 18 #include "xfs_rmap.h" 19 #include "xfs_alloc.h" 20 #include "xfs_bit.h" 21 #include <linux/fsmap.h> 22 #include "xfs_fsmap.h" 23 #include "xfs_refcount.h" 24 #include "xfs_refcount_btree.h" 25 #include "xfs_alloc_btree.h" 26 #include "xfs_rtbitmap.h" 27 #include "xfs_ag.h" 28 29 /* Convert an xfs_fsmap to an fsmap. */ 30 static void 31 xfs_fsmap_from_internal( 32 struct fsmap *dest, 33 struct xfs_fsmap *src) 34 { 35 dest->fmr_device = src->fmr_device; 36 dest->fmr_flags = src->fmr_flags; 37 dest->fmr_physical = BBTOB(src->fmr_physical); 38 dest->fmr_owner = src->fmr_owner; 39 dest->fmr_offset = BBTOB(src->fmr_offset); 40 dest->fmr_length = BBTOB(src->fmr_length); 41 dest->fmr_reserved[0] = 0; 42 dest->fmr_reserved[1] = 0; 43 dest->fmr_reserved[2] = 0; 44 } 45 46 /* Convert an fsmap to an xfs_fsmap. */ 47 static void 48 xfs_fsmap_to_internal( 49 struct xfs_fsmap *dest, 50 struct fsmap *src) 51 { 52 dest->fmr_device = src->fmr_device; 53 dest->fmr_flags = src->fmr_flags; 54 dest->fmr_physical = BTOBBT(src->fmr_physical); 55 dest->fmr_owner = src->fmr_owner; 56 dest->fmr_offset = BTOBBT(src->fmr_offset); 57 dest->fmr_length = BTOBBT(src->fmr_length); 58 } 59 60 /* Convert an fsmap owner into an rmapbt owner. */ 61 static int 62 xfs_fsmap_owner_to_rmap( 63 struct xfs_rmap_irec *dest, 64 const struct xfs_fsmap *src) 65 { 66 if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) { 67 dest->rm_owner = src->fmr_owner; 68 return 0; 69 } 70 71 switch (src->fmr_owner) { 72 case 0: /* "lowest owner id possible" */ 73 case -1ULL: /* "highest owner id possible" */ 74 dest->rm_owner = src->fmr_owner; 75 break; 76 case XFS_FMR_OWN_FREE: 77 dest->rm_owner = XFS_RMAP_OWN_NULL; 78 break; 79 case XFS_FMR_OWN_UNKNOWN: 80 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN; 81 break; 82 case XFS_FMR_OWN_FS: 83 dest->rm_owner = XFS_RMAP_OWN_FS; 84 break; 85 case XFS_FMR_OWN_LOG: 86 dest->rm_owner = XFS_RMAP_OWN_LOG; 87 break; 88 case XFS_FMR_OWN_AG: 89 dest->rm_owner = XFS_RMAP_OWN_AG; 90 break; 91 case XFS_FMR_OWN_INOBT: 92 dest->rm_owner = XFS_RMAP_OWN_INOBT; 93 break; 94 case XFS_FMR_OWN_INODES: 95 dest->rm_owner = XFS_RMAP_OWN_INODES; 96 break; 97 case XFS_FMR_OWN_REFC: 98 dest->rm_owner = XFS_RMAP_OWN_REFC; 99 break; 100 case XFS_FMR_OWN_COW: 101 dest->rm_owner = XFS_RMAP_OWN_COW; 102 break; 103 case XFS_FMR_OWN_DEFECTIVE: /* not implemented */ 104 /* fall through */ 105 default: 106 return -EINVAL; 107 } 108 return 0; 109 } 110 111 /* Convert an rmapbt owner into an fsmap owner. */ 112 static int 113 xfs_fsmap_owner_from_rmap( 114 struct xfs_fsmap *dest, 115 const struct xfs_rmap_irec *src) 116 { 117 dest->fmr_flags = 0; 118 if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) { 119 dest->fmr_owner = src->rm_owner; 120 return 0; 121 } 122 dest->fmr_flags |= FMR_OF_SPECIAL_OWNER; 123 124 switch (src->rm_owner) { 125 case XFS_RMAP_OWN_FS: 126 dest->fmr_owner = XFS_FMR_OWN_FS; 127 break; 128 case XFS_RMAP_OWN_LOG: 129 dest->fmr_owner = XFS_FMR_OWN_LOG; 130 break; 131 case XFS_RMAP_OWN_AG: 132 dest->fmr_owner = XFS_FMR_OWN_AG; 133 break; 134 case XFS_RMAP_OWN_INOBT: 135 dest->fmr_owner = XFS_FMR_OWN_INOBT; 136 break; 137 case XFS_RMAP_OWN_INODES: 138 dest->fmr_owner = XFS_FMR_OWN_INODES; 139 break; 140 case XFS_RMAP_OWN_REFC: 141 dest->fmr_owner = XFS_FMR_OWN_REFC; 142 break; 143 case XFS_RMAP_OWN_COW: 144 dest->fmr_owner = XFS_FMR_OWN_COW; 145 break; 146 case XFS_RMAP_OWN_NULL: /* "free" */ 147 dest->fmr_owner = XFS_FMR_OWN_FREE; 148 break; 149 default: 150 ASSERT(0); 151 return -EFSCORRUPTED; 152 } 153 return 0; 154 } 155 156 /* getfsmap query state */ 157 struct xfs_getfsmap_info { 158 struct xfs_fsmap_head *head; 159 struct fsmap *fsmap_recs; /* mapping records */ 160 struct xfs_buf *agf_bp; /* AGF, for refcount queries */ 161 struct xfs_perag *pag; /* AG info, if applicable */ 162 xfs_daddr_t next_daddr; /* next daddr we expect */ 163 /* daddr of low fsmap key when we're using the rtbitmap */ 164 xfs_daddr_t low_daddr; 165 xfs_daddr_t end_daddr; /* daddr of high fsmap key */ 166 u64 missing_owner; /* owner of holes */ 167 u32 dev; /* device id */ 168 /* 169 * Low rmap key for the query. If low.rm_blockcount is nonzero, this 170 * is the second (or later) call to retrieve the recordset in pieces. 171 * xfs_getfsmap_rec_before_start will compare all records retrieved 172 * by the rmapbt query to filter out any records that start before 173 * the last record. 174 */ 175 struct xfs_rmap_irec low; 176 struct xfs_rmap_irec high; /* high rmap key */ 177 bool last; /* last extent? */ 178 }; 179 180 /* Associate a device with a getfsmap handler. */ 181 struct xfs_getfsmap_dev { 182 u32 dev; 183 int (*fn)(struct xfs_trans *tp, 184 const struct xfs_fsmap *keys, 185 struct xfs_getfsmap_info *info); 186 sector_t nr_sectors; 187 }; 188 189 /* Compare two getfsmap device handlers. */ 190 static int 191 xfs_getfsmap_dev_compare( 192 const void *p1, 193 const void *p2) 194 { 195 const struct xfs_getfsmap_dev *d1 = p1; 196 const struct xfs_getfsmap_dev *d2 = p2; 197 198 return d1->dev - d2->dev; 199 } 200 201 /* Decide if this mapping is shared. */ 202 STATIC int 203 xfs_getfsmap_is_shared( 204 struct xfs_trans *tp, 205 struct xfs_getfsmap_info *info, 206 const struct xfs_rmap_irec *rec, 207 bool *stat) 208 { 209 struct xfs_mount *mp = tp->t_mountp; 210 struct xfs_btree_cur *cur; 211 xfs_agblock_t fbno; 212 xfs_extlen_t flen; 213 int error; 214 215 *stat = false; 216 if (!xfs_has_reflink(mp)) 217 return 0; 218 /* rt files will have no perag structure */ 219 if (!info->pag) 220 return 0; 221 222 /* Are there any shared blocks here? */ 223 flen = 0; 224 cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, info->pag); 225 226 error = xfs_refcount_find_shared(cur, rec->rm_startblock, 227 rec->rm_blockcount, &fbno, &flen, false); 228 229 xfs_btree_del_cursor(cur, error); 230 if (error) 231 return error; 232 233 *stat = flen > 0; 234 return 0; 235 } 236 237 static inline void 238 xfs_getfsmap_format( 239 struct xfs_mount *mp, 240 struct xfs_fsmap *xfm, 241 struct xfs_getfsmap_info *info) 242 { 243 struct fsmap *rec; 244 245 trace_xfs_getfsmap_mapping(mp, xfm); 246 247 rec = &info->fsmap_recs[info->head->fmh_entries++]; 248 xfs_fsmap_from_internal(rec, xfm); 249 } 250 251 static inline bool 252 xfs_getfsmap_rec_before_start( 253 struct xfs_getfsmap_info *info, 254 const struct xfs_rmap_irec *rec, 255 xfs_daddr_t rec_daddr) 256 { 257 if (info->low_daddr != XFS_BUF_DADDR_NULL) 258 return rec_daddr < info->low_daddr; 259 if (info->low.rm_blockcount) 260 return xfs_rmap_compare(rec, &info->low) < 0; 261 return false; 262 } 263 264 /* 265 * Format a reverse mapping for getfsmap, having translated rm_startblock 266 * into the appropriate daddr units. Pass in a nonzero @len_daddr if the 267 * length could be larger than rm_blockcount in struct xfs_rmap_irec. 268 */ 269 STATIC int 270 xfs_getfsmap_helper( 271 struct xfs_trans *tp, 272 struct xfs_getfsmap_info *info, 273 const struct xfs_rmap_irec *rec, 274 xfs_daddr_t rec_daddr, 275 xfs_daddr_t len_daddr) 276 { 277 struct xfs_fsmap fmr; 278 struct xfs_mount *mp = tp->t_mountp; 279 bool shared; 280 int error; 281 282 if (fatal_signal_pending(current)) 283 return -EINTR; 284 285 if (len_daddr == 0) 286 len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount); 287 288 /* 289 * Filter out records that start before our startpoint, if the 290 * caller requested that. 291 */ 292 if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) { 293 rec_daddr += len_daddr; 294 if (info->next_daddr < rec_daddr) 295 info->next_daddr = rec_daddr; 296 return 0; 297 } 298 299 /* 300 * For an info->last query, we're looking for a gap between the last 301 * mapping emitted and the high key specified by userspace. If the 302 * user's query spans less than 1 fsblock, then info->high and 303 * info->low will have the same rm_startblock, which causes rec_daddr 304 * and next_daddr to be the same. Therefore, use the end_daddr that 305 * we calculated from userspace's high key to synthesize the record. 306 * Note that if the btree query found a mapping, there won't be a gap. 307 */ 308 if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) 309 rec_daddr = info->end_daddr; 310 311 /* Are we just counting mappings? */ 312 if (info->head->fmh_count == 0) { 313 if (info->head->fmh_entries == UINT_MAX) 314 return -ECANCELED; 315 316 if (rec_daddr > info->next_daddr) 317 info->head->fmh_entries++; 318 319 if (info->last) 320 return 0; 321 322 info->head->fmh_entries++; 323 324 rec_daddr += len_daddr; 325 if (info->next_daddr < rec_daddr) 326 info->next_daddr = rec_daddr; 327 return 0; 328 } 329 330 /* 331 * If the record starts past the last physical block we saw, 332 * then we've found a gap. Report the gap as being owned by 333 * whatever the caller specified is the missing owner. 334 */ 335 if (rec_daddr > info->next_daddr) { 336 if (info->head->fmh_entries >= info->head->fmh_count) 337 return -ECANCELED; 338 339 fmr.fmr_device = info->dev; 340 fmr.fmr_physical = info->next_daddr; 341 fmr.fmr_owner = info->missing_owner; 342 fmr.fmr_offset = 0; 343 fmr.fmr_length = rec_daddr - info->next_daddr; 344 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; 345 xfs_getfsmap_format(mp, &fmr, info); 346 } 347 348 if (info->last) 349 goto out; 350 351 /* Fill out the extent we found */ 352 if (info->head->fmh_entries >= info->head->fmh_count) 353 return -ECANCELED; 354 355 trace_xfs_fsmap_mapping(mp, info->dev, 356 info->pag ? info->pag->pag_agno : NULLAGNUMBER, rec); 357 358 fmr.fmr_device = info->dev; 359 fmr.fmr_physical = rec_daddr; 360 error = xfs_fsmap_owner_from_rmap(&fmr, rec); 361 if (error) 362 return error; 363 fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset); 364 fmr.fmr_length = len_daddr; 365 if (rec->rm_flags & XFS_RMAP_UNWRITTEN) 366 fmr.fmr_flags |= FMR_OF_PREALLOC; 367 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) 368 fmr.fmr_flags |= FMR_OF_ATTR_FORK; 369 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) 370 fmr.fmr_flags |= FMR_OF_EXTENT_MAP; 371 if (fmr.fmr_flags == 0) { 372 error = xfs_getfsmap_is_shared(tp, info, rec, &shared); 373 if (error) 374 return error; 375 if (shared) 376 fmr.fmr_flags |= FMR_OF_SHARED; 377 } 378 379 xfs_getfsmap_format(mp, &fmr, info); 380 out: 381 rec_daddr += len_daddr; 382 if (info->next_daddr < rec_daddr) 383 info->next_daddr = rec_daddr; 384 return 0; 385 } 386 387 /* Transform a rmapbt irec into a fsmap */ 388 STATIC int 389 xfs_getfsmap_datadev_helper( 390 struct xfs_btree_cur *cur, 391 const struct xfs_rmap_irec *rec, 392 void *priv) 393 { 394 struct xfs_mount *mp = cur->bc_mp; 395 struct xfs_getfsmap_info *info = priv; 396 xfs_fsblock_t fsb; 397 xfs_daddr_t rec_daddr; 398 399 fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock); 400 rec_daddr = XFS_FSB_TO_DADDR(mp, fsb); 401 402 return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0); 403 } 404 405 /* Transform a bnobt irec into a fsmap */ 406 STATIC int 407 xfs_getfsmap_datadev_bnobt_helper( 408 struct xfs_btree_cur *cur, 409 const struct xfs_alloc_rec_incore *rec, 410 void *priv) 411 { 412 struct xfs_mount *mp = cur->bc_mp; 413 struct xfs_getfsmap_info *info = priv; 414 struct xfs_rmap_irec irec; 415 xfs_daddr_t rec_daddr; 416 417 rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_ag.pag->pag_agno, 418 rec->ar_startblock); 419 420 irec.rm_startblock = rec->ar_startblock; 421 irec.rm_blockcount = rec->ar_blockcount; 422 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ 423 irec.rm_offset = 0; 424 irec.rm_flags = 0; 425 426 return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0); 427 } 428 429 /* Set rmap flags based on the getfsmap flags */ 430 static void 431 xfs_getfsmap_set_irec_flags( 432 struct xfs_rmap_irec *irec, 433 const struct xfs_fsmap *fmr) 434 { 435 irec->rm_flags = 0; 436 if (fmr->fmr_flags & FMR_OF_ATTR_FORK) 437 irec->rm_flags |= XFS_RMAP_ATTR_FORK; 438 if (fmr->fmr_flags & FMR_OF_EXTENT_MAP) 439 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK; 440 if (fmr->fmr_flags & FMR_OF_PREALLOC) 441 irec->rm_flags |= XFS_RMAP_UNWRITTEN; 442 } 443 444 static inline bool 445 rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) 446 { 447 if (!xfs_has_reflink(mp)) 448 return true; 449 if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner)) 450 return true; 451 if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | 452 XFS_RMAP_UNWRITTEN)) 453 return true; 454 return false; 455 } 456 457 /* Execute a getfsmap query against the regular data device. */ 458 STATIC int 459 __xfs_getfsmap_datadev( 460 struct xfs_trans *tp, 461 const struct xfs_fsmap *keys, 462 struct xfs_getfsmap_info *info, 463 int (*query_fn)(struct xfs_trans *, 464 struct xfs_getfsmap_info *, 465 struct xfs_btree_cur **, 466 void *), 467 void *priv) 468 { 469 struct xfs_mount *mp = tp->t_mountp; 470 struct xfs_perag *pag; 471 struct xfs_btree_cur *bt_cur = NULL; 472 xfs_fsblock_t start_fsb; 473 xfs_fsblock_t end_fsb; 474 xfs_agnumber_t start_ag; 475 xfs_agnumber_t end_ag; 476 uint64_t eofs; 477 int error = 0; 478 479 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 480 if (keys[0].fmr_physical >= eofs) 481 return 0; 482 start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); 483 end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 484 485 /* 486 * Convert the fsmap low/high keys to AG based keys. Initialize 487 * low to the fsmap low key and max out the high key to the end 488 * of the AG. 489 */ 490 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); 491 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); 492 if (error) 493 return error; 494 info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); 495 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); 496 497 /* Adjust the low key if we are continuing from where we left off. */ 498 if (info->low.rm_blockcount == 0) { 499 /* No previous record from which to continue */ 500 } else if (rmap_not_shareable(mp, &info->low)) { 501 /* Last record seen was an unshareable extent */ 502 info->low.rm_owner = 0; 503 info->low.rm_offset = 0; 504 505 start_fsb += info->low.rm_blockcount; 506 if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) 507 return 0; 508 } else { 509 /* Last record seen was a shareable file data extent */ 510 info->low.rm_offset += info->low.rm_blockcount; 511 } 512 info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); 513 514 info->high.rm_startblock = -1U; 515 info->high.rm_owner = ULLONG_MAX; 516 info->high.rm_offset = ULLONG_MAX; 517 info->high.rm_blockcount = 0; 518 info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; 519 520 start_ag = XFS_FSB_TO_AGNO(mp, start_fsb); 521 end_ag = XFS_FSB_TO_AGNO(mp, end_fsb); 522 523 for_each_perag_range(mp, start_ag, end_ag, pag) { 524 /* 525 * Set the AG high key from the fsmap high key if this 526 * is the last AG that we're querying. 527 */ 528 info->pag = pag; 529 if (pag->pag_agno == end_ag) { 530 info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp, 531 end_fsb); 532 info->high.rm_offset = XFS_BB_TO_FSBT(mp, 533 keys[1].fmr_offset); 534 error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); 535 if (error) 536 break; 537 xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); 538 } 539 540 if (bt_cur) { 541 xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); 542 bt_cur = NULL; 543 xfs_trans_brelse(tp, info->agf_bp); 544 info->agf_bp = NULL; 545 } 546 547 error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp); 548 if (error) 549 break; 550 551 trace_xfs_fsmap_low_key(mp, info->dev, pag->pag_agno, 552 &info->low); 553 trace_xfs_fsmap_high_key(mp, info->dev, pag->pag_agno, 554 &info->high); 555 556 error = query_fn(tp, info, &bt_cur, priv); 557 if (error) 558 break; 559 560 /* 561 * Set the AG low key to the start of the AG prior to 562 * moving on to the next AG. 563 */ 564 if (pag->pag_agno == start_ag) 565 memset(&info->low, 0, sizeof(info->low)); 566 567 /* 568 * If this is the last AG, report any gap at the end of it 569 * before we drop the reference to the perag when the loop 570 * terminates. 571 */ 572 if (pag->pag_agno == end_ag) { 573 info->last = true; 574 error = query_fn(tp, info, &bt_cur, priv); 575 if (error) 576 break; 577 } 578 info->pag = NULL; 579 } 580 581 if (bt_cur) 582 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : 583 XFS_BTREE_NOERROR); 584 if (info->agf_bp) { 585 xfs_trans_brelse(tp, info->agf_bp); 586 info->agf_bp = NULL; 587 } 588 if (info->pag) { 589 xfs_perag_rele(info->pag); 590 info->pag = NULL; 591 } else if (pag) { 592 /* loop termination case */ 593 xfs_perag_rele(pag); 594 } 595 596 return error; 597 } 598 599 /* Actually query the rmap btree. */ 600 STATIC int 601 xfs_getfsmap_datadev_rmapbt_query( 602 struct xfs_trans *tp, 603 struct xfs_getfsmap_info *info, 604 struct xfs_btree_cur **curpp, 605 void *priv) 606 { 607 /* Report any gap at the end of the last AG. */ 608 if (info->last) 609 return xfs_getfsmap_datadev_helper(*curpp, &info->high, info); 610 611 /* Allocate cursor for this AG and query_range it. */ 612 *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp, 613 info->pag); 614 return xfs_rmap_query_range(*curpp, &info->low, &info->high, 615 xfs_getfsmap_datadev_helper, info); 616 } 617 618 /* Execute a getfsmap query against the regular data device rmapbt. */ 619 STATIC int 620 xfs_getfsmap_datadev_rmapbt( 621 struct xfs_trans *tp, 622 const struct xfs_fsmap *keys, 623 struct xfs_getfsmap_info *info) 624 { 625 info->missing_owner = XFS_FMR_OWN_FREE; 626 return __xfs_getfsmap_datadev(tp, keys, info, 627 xfs_getfsmap_datadev_rmapbt_query, NULL); 628 } 629 630 /* Actually query the bno btree. */ 631 STATIC int 632 xfs_getfsmap_datadev_bnobt_query( 633 struct xfs_trans *tp, 634 struct xfs_getfsmap_info *info, 635 struct xfs_btree_cur **curpp, 636 void *priv) 637 { 638 struct xfs_alloc_rec_incore *key = priv; 639 640 /* Report any gap at the end of the last AG. */ 641 if (info->last) 642 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info); 643 644 /* Allocate cursor for this AG and query_range it. */ 645 *curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp, 646 info->pag); 647 key->ar_startblock = info->low.rm_startblock; 648 key[1].ar_startblock = info->high.rm_startblock; 649 return xfs_alloc_query_range(*curpp, key, &key[1], 650 xfs_getfsmap_datadev_bnobt_helper, info); 651 } 652 653 /* Execute a getfsmap query against the regular data device's bnobt. */ 654 STATIC int 655 xfs_getfsmap_datadev_bnobt( 656 struct xfs_trans *tp, 657 const struct xfs_fsmap *keys, 658 struct xfs_getfsmap_info *info) 659 { 660 struct xfs_alloc_rec_incore akeys[2]; 661 662 memset(akeys, 0, sizeof(akeys)); 663 info->missing_owner = XFS_FMR_OWN_UNKNOWN; 664 return __xfs_getfsmap_datadev(tp, keys, info, 665 xfs_getfsmap_datadev_bnobt_query, &akeys[0]); 666 } 667 668 /* Execute a getfsmap query against the log device. */ 669 STATIC int 670 xfs_getfsmap_logdev( 671 struct xfs_trans *tp, 672 const struct xfs_fsmap *keys, 673 struct xfs_getfsmap_info *info) 674 { 675 struct xfs_mount *mp = tp->t_mountp; 676 struct xfs_rmap_irec rmap; 677 xfs_daddr_t rec_daddr, len_daddr; 678 xfs_fsblock_t start_fsb, end_fsb; 679 uint64_t eofs; 680 681 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 682 if (keys[0].fmr_physical >= eofs) 683 return 0; 684 start_fsb = XFS_BB_TO_FSBT(mp, 685 keys[0].fmr_physical + keys[0].fmr_length); 686 end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 687 688 /* Adjust the low key if we are continuing from where we left off. */ 689 if (keys[0].fmr_length > 0) 690 info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); 691 692 trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); 693 trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); 694 695 if (start_fsb > 0) 696 return 0; 697 698 /* Fabricate an rmap entry for the external log device. */ 699 rmap.rm_startblock = 0; 700 rmap.rm_blockcount = mp->m_sb.sb_logblocks; 701 rmap.rm_owner = XFS_RMAP_OWN_LOG; 702 rmap.rm_offset = 0; 703 rmap.rm_flags = 0; 704 705 rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); 706 len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); 707 return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr); 708 } 709 710 #ifdef CONFIG_XFS_RT 711 /* Transform a rtbitmap "record" into a fsmap */ 712 STATIC int 713 xfs_getfsmap_rtdev_rtbitmap_helper( 714 struct xfs_mount *mp, 715 struct xfs_trans *tp, 716 const struct xfs_rtalloc_rec *rec, 717 void *priv) 718 { 719 struct xfs_getfsmap_info *info = priv; 720 struct xfs_rmap_irec irec; 721 xfs_rtblock_t rtbno; 722 xfs_daddr_t rec_daddr, len_daddr; 723 724 rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext); 725 rec_daddr = XFS_FSB_TO_BB(mp, rtbno); 726 irec.rm_startblock = rtbno; 727 728 rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount); 729 len_daddr = XFS_FSB_TO_BB(mp, rtbno); 730 irec.rm_blockcount = rtbno; 731 732 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ 733 irec.rm_offset = 0; 734 irec.rm_flags = 0; 735 736 return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); 737 } 738 739 /* Execute a getfsmap query against the realtime device rtbitmap. */ 740 STATIC int 741 xfs_getfsmap_rtdev_rtbitmap( 742 struct xfs_trans *tp, 743 const struct xfs_fsmap *keys, 744 struct xfs_getfsmap_info *info) 745 { 746 747 struct xfs_rtalloc_rec ahigh = { 0 }; 748 struct xfs_mount *mp = tp->t_mountp; 749 xfs_rtblock_t start_rtb; 750 xfs_rtblock_t end_rtb; 751 xfs_rtxnum_t high; 752 uint64_t eofs; 753 int error; 754 755 eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents)); 756 if (keys[0].fmr_physical >= eofs) 757 return 0; 758 start_rtb = XFS_BB_TO_FSBT(mp, 759 keys[0].fmr_physical + keys[0].fmr_length); 760 end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 761 762 info->missing_owner = XFS_FMR_OWN_UNKNOWN; 763 764 /* Adjust the low key if we are continuing from where we left off. */ 765 if (keys[0].fmr_length > 0) { 766 info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); 767 if (info->low_daddr >= eofs) 768 return 0; 769 } 770 771 trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); 772 trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb); 773 774 xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); 775 776 /* 777 * Set up query parameters to return free rtextents covering the range 778 * we want. 779 */ 780 high = xfs_rtb_to_rtxup(mp, end_rtb); 781 error = xfs_rtalloc_query_range(mp, tp, xfs_rtb_to_rtx(mp, start_rtb), 782 high, xfs_getfsmap_rtdev_rtbitmap_helper, info); 783 if (error) 784 goto err; 785 786 /* 787 * Report any gaps at the end of the rtbitmap by simulating a null 788 * rmap starting at the block after the end of the query range. 789 */ 790 info->last = true; 791 ahigh.ar_startext = min(mp->m_sb.sb_rextents, high); 792 793 error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info); 794 if (error) 795 goto err; 796 err: 797 xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); 798 return error; 799 } 800 #endif /* CONFIG_XFS_RT */ 801 802 /* Do we recognize the device? */ 803 STATIC bool 804 xfs_getfsmap_is_valid_device( 805 struct xfs_mount *mp, 806 struct xfs_fsmap *fm) 807 { 808 if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || 809 fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev)) 810 return true; 811 if (mp->m_logdev_targp && 812 fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev)) 813 return true; 814 if (mp->m_rtdev_targp && 815 fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev)) 816 return true; 817 return false; 818 } 819 820 /* Ensure that the low key is less than the high key. */ 821 STATIC bool 822 xfs_getfsmap_check_keys( 823 struct xfs_fsmap *low_key, 824 struct xfs_fsmap *high_key) 825 { 826 if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { 827 if (low_key->fmr_offset) 828 return false; 829 } 830 if (high_key->fmr_flags != -1U && 831 (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | 832 FMR_OF_EXTENT_MAP))) { 833 if (high_key->fmr_offset && high_key->fmr_offset != -1ULL) 834 return false; 835 } 836 if (high_key->fmr_length && high_key->fmr_length != -1ULL) 837 return false; 838 839 if (low_key->fmr_device > high_key->fmr_device) 840 return false; 841 if (low_key->fmr_device < high_key->fmr_device) 842 return true; 843 844 if (low_key->fmr_physical > high_key->fmr_physical) 845 return false; 846 if (low_key->fmr_physical < high_key->fmr_physical) 847 return true; 848 849 if (low_key->fmr_owner > high_key->fmr_owner) 850 return false; 851 if (low_key->fmr_owner < high_key->fmr_owner) 852 return true; 853 854 if (low_key->fmr_offset > high_key->fmr_offset) 855 return false; 856 if (low_key->fmr_offset < high_key->fmr_offset) 857 return true; 858 859 return false; 860 } 861 862 /* 863 * There are only two devices if we didn't configure RT devices at build time. 864 */ 865 #ifdef CONFIG_XFS_RT 866 #define XFS_GETFSMAP_DEVS 3 867 #else 868 #define XFS_GETFSMAP_DEVS 2 869 #endif /* CONFIG_XFS_RT */ 870 871 /* 872 * Get filesystem's extents as described in head, and format for output. Fills 873 * in the supplied records array until there are no more reverse mappings to 874 * return or head.fmh_entries == head.fmh_count. In the second case, this 875 * function returns -ECANCELED to indicate that more records would have been 876 * returned. 877 * 878 * Key to Confusion 879 * ---------------- 880 * There are multiple levels of keys and counters at work here: 881 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; 882 * these reflect fs-wide sector addrs. 883 * dkeys -- fmh_keys used to query each device; 884 * these are fmh_keys but w/ the low key 885 * bumped up by fmr_length. 886 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this 887 * is how we detect gaps in the fsmap 888 records and report them. 889 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from 890 * dkeys; used to query the metadata. 891 */ 892 STATIC int 893 xfs_getfsmap( 894 struct xfs_mount *mp, 895 struct xfs_fsmap_head *head, 896 struct fsmap *fsmap_recs) 897 { 898 struct xfs_trans *tp = NULL; 899 struct xfs_fsmap dkeys[2]; /* per-dev keys */ 900 struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; 901 struct xfs_getfsmap_info info = { NULL }; 902 bool use_rmap; 903 int i; 904 int error = 0; 905 906 if (head->fmh_iflags & ~FMH_IF_VALID) 907 return -EINVAL; 908 if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || 909 !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) 910 return -EINVAL; 911 if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1])) 912 return -EINVAL; 913 914 use_rmap = xfs_has_rmapbt(mp) && 915 has_capability_noaudit(current, CAP_SYS_ADMIN); 916 head->fmh_entries = 0; 917 918 /* Set up our device handlers. */ 919 memset(handlers, 0, sizeof(handlers)); 920 handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 921 handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); 922 if (use_rmap) 923 handlers[0].fn = xfs_getfsmap_datadev_rmapbt; 924 else 925 handlers[0].fn = xfs_getfsmap_datadev_bnobt; 926 if (mp->m_logdev_targp != mp->m_ddev_targp) { 927 handlers[1].nr_sectors = XFS_FSB_TO_BB(mp, 928 mp->m_sb.sb_logblocks); 929 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); 930 handlers[1].fn = xfs_getfsmap_logdev; 931 } 932 #ifdef CONFIG_XFS_RT 933 if (mp->m_rtdev_targp) { 934 handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 935 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); 936 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; 937 } 938 #endif /* CONFIG_XFS_RT */ 939 940 xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), 941 xfs_getfsmap_dev_compare); 942 943 /* 944 * To continue where we left off, we allow userspace to use the 945 * last mapping from a previous call as the low key of the next. 946 * This is identified by a non-zero length in the low key. We 947 * have to increment the low key in this scenario to ensure we 948 * don't return the same mapping again, and instead return the 949 * very next mapping. 950 * 951 * If the low key mapping refers to file data, the same physical 952 * blocks could be mapped to several other files/offsets. 953 * According to rmapbt record ordering, the minimal next 954 * possible record for the block range is the next starting 955 * offset in the same inode. Therefore, each fsmap backend bumps 956 * the file offset to continue the search appropriately. For 957 * all other low key mapping types (attr blocks, metadata), each 958 * fsmap backend bumps the physical offset as there can be no 959 * other mapping for the same physical block range. 960 */ 961 dkeys[0] = head->fmh_keys[0]; 962 memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); 963 964 info.next_daddr = head->fmh_keys[0].fmr_physical + 965 head->fmh_keys[0].fmr_length; 966 info.end_daddr = XFS_BUF_DADDR_NULL; 967 info.fsmap_recs = fsmap_recs; 968 info.head = head; 969 970 /* For each device we support... */ 971 for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { 972 /* Is this device within the range the user asked for? */ 973 if (!handlers[i].fn) 974 continue; 975 if (head->fmh_keys[0].fmr_device > handlers[i].dev) 976 continue; 977 if (head->fmh_keys[1].fmr_device < handlers[i].dev) 978 break; 979 980 /* 981 * If this device number matches the high key, we have 982 * to pass the high key to the handler to limit the 983 * query results. If the device number exceeds the 984 * low key, zero out the low key so that we get 985 * everything from the beginning. 986 */ 987 if (handlers[i].dev == head->fmh_keys[1].fmr_device) { 988 dkeys[1] = head->fmh_keys[1]; 989 info.end_daddr = min(handlers[i].nr_sectors - 1, 990 dkeys[1].fmr_physical); 991 } 992 if (handlers[i].dev > head->fmh_keys[0].fmr_device) 993 memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); 994 995 /* 996 * Grab an empty transaction so that we can use its recursive 997 * buffer locking abilities to detect cycles in the rmapbt 998 * without deadlocking. 999 */ 1000 error = xfs_trans_alloc_empty(mp, &tp); 1001 if (error) 1002 break; 1003 1004 info.dev = handlers[i].dev; 1005 info.last = false; 1006 info.pag = NULL; 1007 info.low_daddr = XFS_BUF_DADDR_NULL; 1008 info.low.rm_blockcount = 0; 1009 error = handlers[i].fn(tp, dkeys, &info); 1010 if (error) 1011 break; 1012 xfs_trans_cancel(tp); 1013 tp = NULL; 1014 info.next_daddr = 0; 1015 } 1016 1017 if (tp) 1018 xfs_trans_cancel(tp); 1019 head->fmh_oflags = FMH_OF_DEV_T; 1020 return error; 1021 } 1022 1023 int 1024 xfs_ioc_getfsmap( 1025 struct xfs_inode *ip, 1026 struct fsmap_head __user *arg) 1027 { 1028 struct xfs_fsmap_head xhead = {0}; 1029 struct fsmap_head head; 1030 struct fsmap *recs; 1031 unsigned int count; 1032 __u32 last_flags = 0; 1033 bool done = false; 1034 int error; 1035 1036 if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) 1037 return -EFAULT; 1038 if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || 1039 memchr_inv(head.fmh_keys[0].fmr_reserved, 0, 1040 sizeof(head.fmh_keys[0].fmr_reserved)) || 1041 memchr_inv(head.fmh_keys[1].fmr_reserved, 0, 1042 sizeof(head.fmh_keys[1].fmr_reserved))) 1043 return -EINVAL; 1044 1045 /* 1046 * Use an internal memory buffer so that we don't have to copy fsmap 1047 * data to userspace while holding locks. Start by trying to allocate 1048 * up to 128k for the buffer, but fall back to a single page if needed. 1049 */ 1050 count = min_t(unsigned int, head.fmh_count, 1051 131072 / sizeof(struct fsmap)); 1052 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); 1053 if (!recs) { 1054 count = min_t(unsigned int, head.fmh_count, 1055 PAGE_SIZE / sizeof(struct fsmap)); 1056 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); 1057 if (!recs) 1058 return -ENOMEM; 1059 } 1060 1061 xhead.fmh_iflags = head.fmh_iflags; 1062 xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); 1063 xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); 1064 1065 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); 1066 trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); 1067 1068 head.fmh_entries = 0; 1069 do { 1070 struct fsmap __user *user_recs; 1071 struct fsmap *last_rec; 1072 1073 user_recs = &arg->fmh_recs[head.fmh_entries]; 1074 xhead.fmh_entries = 0; 1075 xhead.fmh_count = min_t(unsigned int, count, 1076 head.fmh_count - head.fmh_entries); 1077 1078 /* Run query, record how many entries we got. */ 1079 error = xfs_getfsmap(ip->i_mount, &xhead, recs); 1080 switch (error) { 1081 case 0: 1082 /* 1083 * There are no more records in the result set. Copy 1084 * whatever we got to userspace and break out. 1085 */ 1086 done = true; 1087 break; 1088 case -ECANCELED: 1089 /* 1090 * The internal memory buffer is full. Copy whatever 1091 * records we got to userspace and go again if we have 1092 * not yet filled the userspace buffer. 1093 */ 1094 error = 0; 1095 break; 1096 default: 1097 goto out_free; 1098 } 1099 head.fmh_entries += xhead.fmh_entries; 1100 head.fmh_oflags = xhead.fmh_oflags; 1101 1102 /* 1103 * If the caller wanted a record count or there aren't any 1104 * new records to return, we're done. 1105 */ 1106 if (head.fmh_count == 0 || xhead.fmh_entries == 0) 1107 break; 1108 1109 /* Copy all the records we got out to userspace. */ 1110 if (copy_to_user(user_recs, recs, 1111 xhead.fmh_entries * sizeof(struct fsmap))) { 1112 error = -EFAULT; 1113 goto out_free; 1114 } 1115 1116 /* Remember the last record flags we copied to userspace. */ 1117 last_rec = &recs[xhead.fmh_entries - 1]; 1118 last_flags = last_rec->fmr_flags; 1119 1120 /* Set up the low key for the next iteration. */ 1121 xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); 1122 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); 1123 } while (!done && head.fmh_entries < head.fmh_count); 1124 1125 /* 1126 * If there are no more records in the query result set and we're not 1127 * in counting mode, mark the last record returned with the LAST flag. 1128 */ 1129 if (done && head.fmh_count > 0 && head.fmh_entries > 0) { 1130 struct fsmap __user *user_rec; 1131 1132 last_flags |= FMR_OF_LAST; 1133 user_rec = &arg->fmh_recs[head.fmh_entries - 1]; 1134 1135 if (copy_to_user(&user_rec->fmr_flags, &last_flags, 1136 sizeof(last_flags))) { 1137 error = -EFAULT; 1138 goto out_free; 1139 } 1140 } 1141 1142 /* copy back header */ 1143 if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { 1144 error = -EFAULT; 1145 goto out_free; 1146 } 1147 1148 out_free: 1149 kvfree(recs); 1150 return error; 1151 } 1152