1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_trans.h" 15 #include "xfs_btree.h" 16 #include "xfs_rmap_btree.h" 17 #include "xfs_trace.h" 18 #include "xfs_rmap.h" 19 #include "xfs_alloc.h" 20 #include "xfs_bit.h" 21 #include <linux/fsmap.h> 22 #include "xfs_fsmap.h" 23 #include "xfs_refcount.h" 24 #include "xfs_refcount_btree.h" 25 #include "xfs_alloc_btree.h" 26 #include "xfs_rtbitmap.h" 27 #include "xfs_ag.h" 28 #include "xfs_rtgroup.h" 29 #include "xfs_rtrmap_btree.h" 30 #include "xfs_rtrefcount_btree.h" 31 32 /* Convert an xfs_fsmap to an fsmap. */ 33 static void 34 xfs_fsmap_from_internal( 35 struct fsmap *dest, 36 struct xfs_fsmap *src) 37 { 38 dest->fmr_device = src->fmr_device; 39 dest->fmr_flags = src->fmr_flags; 40 dest->fmr_physical = BBTOB(src->fmr_physical); 41 dest->fmr_owner = src->fmr_owner; 42 dest->fmr_offset = BBTOB(src->fmr_offset); 43 dest->fmr_length = BBTOB(src->fmr_length); 44 dest->fmr_reserved[0] = 0; 45 dest->fmr_reserved[1] = 0; 46 dest->fmr_reserved[2] = 0; 47 } 48 49 /* Convert an fsmap to an xfs_fsmap. */ 50 static void 51 xfs_fsmap_to_internal( 52 struct xfs_fsmap *dest, 53 struct fsmap *src) 54 { 55 dest->fmr_device = src->fmr_device; 56 dest->fmr_flags = src->fmr_flags; 57 dest->fmr_physical = BTOBBT(src->fmr_physical); 58 dest->fmr_owner = src->fmr_owner; 59 dest->fmr_offset = BTOBBT(src->fmr_offset); 60 dest->fmr_length = BTOBBT(src->fmr_length); 61 } 62 63 /* Convert an fsmap owner into an rmapbt owner. */ 64 static int 65 xfs_fsmap_owner_to_rmap( 66 struct xfs_rmap_irec *dest, 67 const struct xfs_fsmap *src) 68 { 69 if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) { 70 dest->rm_owner = src->fmr_owner; 71 return 0; 72 } 73 74 switch (src->fmr_owner) { 75 case 0: /* "lowest owner id possible" */ 76 case -1ULL: /* "highest owner id possible" */ 77 dest->rm_owner = src->fmr_owner; 78 break; 79 case XFS_FMR_OWN_FREE: 80 dest->rm_owner = XFS_RMAP_OWN_NULL; 81 break; 82 case XFS_FMR_OWN_UNKNOWN: 83 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN; 84 break; 85 case XFS_FMR_OWN_FS: 86 dest->rm_owner = XFS_RMAP_OWN_FS; 87 break; 88 case XFS_FMR_OWN_LOG: 89 dest->rm_owner = XFS_RMAP_OWN_LOG; 90 break; 91 case XFS_FMR_OWN_AG: 92 dest->rm_owner = XFS_RMAP_OWN_AG; 93 break; 94 case XFS_FMR_OWN_INOBT: 95 dest->rm_owner = XFS_RMAP_OWN_INOBT; 96 break; 97 case XFS_FMR_OWN_INODES: 98 dest->rm_owner = XFS_RMAP_OWN_INODES; 99 break; 100 case XFS_FMR_OWN_REFC: 101 dest->rm_owner = XFS_RMAP_OWN_REFC; 102 break; 103 case XFS_FMR_OWN_COW: 104 dest->rm_owner = XFS_RMAP_OWN_COW; 105 break; 106 case XFS_FMR_OWN_DEFECTIVE: /* not implemented */ 107 /* fall through */ 108 default: 109 return -EINVAL; 110 } 111 return 0; 112 } 113 114 /* Convert an rmapbt owner into an fsmap owner. */ 115 static int 116 xfs_fsmap_owner_from_frec( 117 struct xfs_fsmap *dest, 118 const struct xfs_fsmap_irec *frec) 119 { 120 dest->fmr_flags = 0; 121 if (!XFS_RMAP_NON_INODE_OWNER(frec->owner)) { 122 dest->fmr_owner = frec->owner; 123 return 0; 124 } 125 dest->fmr_flags |= FMR_OF_SPECIAL_OWNER; 126 127 switch (frec->owner) { 128 case XFS_RMAP_OWN_FS: 129 dest->fmr_owner = XFS_FMR_OWN_FS; 130 break; 131 case XFS_RMAP_OWN_LOG: 132 dest->fmr_owner = XFS_FMR_OWN_LOG; 133 break; 134 case XFS_RMAP_OWN_AG: 135 dest->fmr_owner = XFS_FMR_OWN_AG; 136 break; 137 case XFS_RMAP_OWN_INOBT: 138 dest->fmr_owner = XFS_FMR_OWN_INOBT; 139 break; 140 case XFS_RMAP_OWN_INODES: 141 dest->fmr_owner = XFS_FMR_OWN_INODES; 142 break; 143 case XFS_RMAP_OWN_REFC: 144 dest->fmr_owner = XFS_FMR_OWN_REFC; 145 break; 146 case XFS_RMAP_OWN_COW: 147 dest->fmr_owner = XFS_FMR_OWN_COW; 148 break; 149 case XFS_RMAP_OWN_NULL: /* "free" */ 150 dest->fmr_owner = XFS_FMR_OWN_FREE; 151 break; 152 default: 153 ASSERT(0); 154 return -EFSCORRUPTED; 155 } 156 return 0; 157 } 158 159 /* getfsmap query state */ 160 struct xfs_getfsmap_info { 161 struct xfs_fsmap_head *head; 162 struct fsmap *fsmap_recs; /* mapping records */ 163 struct xfs_buf *agf_bp; /* AGF, for refcount queries */ 164 struct xfs_group *group; /* group info, if applicable */ 165 xfs_daddr_t next_daddr; /* next daddr we expect */ 166 /* daddr of low fsmap key when we're using the rtbitmap */ 167 xfs_daddr_t low_daddr; 168 /* daddr of high fsmap key, or the last daddr on the device */ 169 xfs_daddr_t end_daddr; 170 u64 missing_owner; /* owner of holes */ 171 u32 dev; /* device id */ 172 /* 173 * Low rmap key for the query. If low.rm_blockcount is nonzero, this 174 * is the second (or later) call to retrieve the recordset in pieces. 175 * xfs_getfsmap_rec_before_start will compare all records retrieved 176 * by the rmapbt query to filter out any records that start before 177 * the last record. 178 */ 179 struct xfs_rmap_irec low; 180 struct xfs_rmap_irec high; /* high rmap key */ 181 bool last; /* last extent? */ 182 }; 183 184 /* Associate a device with a getfsmap handler. */ 185 struct xfs_getfsmap_dev { 186 u32 dev; 187 int (*fn)(struct xfs_trans *tp, 188 const struct xfs_fsmap *keys, 189 struct xfs_getfsmap_info *info); 190 sector_t nr_sectors; 191 }; 192 193 /* Compare two getfsmap device handlers. */ 194 static int 195 xfs_getfsmap_dev_compare( 196 const void *p1, 197 const void *p2) 198 { 199 const struct xfs_getfsmap_dev *d1 = p1; 200 const struct xfs_getfsmap_dev *d2 = p2; 201 202 return d1->dev - d2->dev; 203 } 204 205 /* Decide if this mapping is shared. */ 206 STATIC int 207 xfs_getfsmap_is_shared( 208 struct xfs_trans *tp, 209 struct xfs_getfsmap_info *info, 210 const struct xfs_fsmap_irec *frec, 211 bool *stat) 212 { 213 struct xfs_mount *mp = tp->t_mountp; 214 struct xfs_btree_cur *cur; 215 xfs_agblock_t fbno; 216 xfs_extlen_t flen = 0; 217 int error; 218 219 *stat = false; 220 if (!xfs_has_reflink(mp) || !info->group) 221 return 0; 222 223 if (info->group->xg_type == XG_TYPE_RTG) 224 cur = xfs_rtrefcountbt_init_cursor(tp, to_rtg(info->group)); 225 else 226 cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, 227 to_perag(info->group)); 228 229 /* Are there any shared blocks here? */ 230 error = xfs_refcount_find_shared(cur, frec->rec_key, 231 XFS_BB_TO_FSBT(mp, frec->len_daddr), &fbno, &flen, 232 false); 233 234 xfs_btree_del_cursor(cur, error); 235 if (error) 236 return error; 237 238 *stat = flen > 0; 239 return 0; 240 } 241 242 static inline void 243 xfs_getfsmap_format( 244 struct xfs_mount *mp, 245 struct xfs_fsmap *xfm, 246 struct xfs_getfsmap_info *info) 247 { 248 struct fsmap *rec; 249 250 trace_xfs_getfsmap_mapping(mp, xfm); 251 252 rec = &info->fsmap_recs[info->head->fmh_entries++]; 253 xfs_fsmap_from_internal(rec, xfm); 254 } 255 256 static inline bool 257 xfs_getfsmap_frec_before_start( 258 struct xfs_getfsmap_info *info, 259 const struct xfs_fsmap_irec *frec) 260 { 261 if (info->low_daddr != XFS_BUF_DADDR_NULL) 262 return frec->start_daddr < info->low_daddr; 263 if (info->low.rm_blockcount) { 264 struct xfs_rmap_irec rec = { 265 .rm_startblock = frec->rec_key, 266 .rm_owner = frec->owner, 267 .rm_flags = frec->rm_flags, 268 }; 269 270 return xfs_rmap_compare(&rec, &info->low) < 0; 271 } 272 273 return false; 274 } 275 276 /* 277 * Format a reverse mapping for getfsmap, having translated rm_startblock 278 * into the appropriate daddr units. Pass in a nonzero @len_daddr if the 279 * length could be larger than rm_blockcount in struct xfs_rmap_irec. 280 */ 281 STATIC int 282 xfs_getfsmap_helper( 283 struct xfs_trans *tp, 284 struct xfs_getfsmap_info *info, 285 const struct xfs_fsmap_irec *frec) 286 { 287 struct xfs_fsmap fmr; 288 struct xfs_mount *mp = tp->t_mountp; 289 bool shared; 290 int error = 0; 291 292 if (fatal_signal_pending(current)) 293 return -EINTR; 294 295 /* 296 * Filter out records that start before our startpoint, if the 297 * caller requested that. 298 */ 299 if (xfs_getfsmap_frec_before_start(info, frec)) 300 goto out; 301 302 /* Are we just counting mappings? */ 303 if (info->head->fmh_count == 0) { 304 if (info->head->fmh_entries == UINT_MAX) 305 return -ECANCELED; 306 307 if (frec->start_daddr > info->next_daddr) 308 info->head->fmh_entries++; 309 310 if (info->last) 311 return 0; 312 313 info->head->fmh_entries++; 314 goto out; 315 } 316 317 /* 318 * If the record starts past the last physical block we saw, 319 * then we've found a gap. Report the gap as being owned by 320 * whatever the caller specified is the missing owner. 321 */ 322 if (frec->start_daddr > info->next_daddr) { 323 if (info->head->fmh_entries >= info->head->fmh_count) 324 return -ECANCELED; 325 326 fmr.fmr_device = info->dev; 327 fmr.fmr_physical = info->next_daddr; 328 fmr.fmr_owner = info->missing_owner; 329 fmr.fmr_offset = 0; 330 fmr.fmr_length = frec->start_daddr - info->next_daddr; 331 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; 332 xfs_getfsmap_format(mp, &fmr, info); 333 } 334 335 if (info->last) 336 goto out; 337 338 /* Fill out the extent we found */ 339 if (info->head->fmh_entries >= info->head->fmh_count) 340 return -ECANCELED; 341 342 trace_xfs_fsmap_mapping(mp, info->dev, 343 info->group ? info->group->xg_gno : NULLAGNUMBER, 344 frec); 345 346 fmr.fmr_device = info->dev; 347 fmr.fmr_physical = frec->start_daddr; 348 error = xfs_fsmap_owner_from_frec(&fmr, frec); 349 if (error) 350 return error; 351 fmr.fmr_offset = XFS_FSB_TO_BB(mp, frec->offset); 352 fmr.fmr_length = frec->len_daddr; 353 if (frec->rm_flags & XFS_RMAP_UNWRITTEN) 354 fmr.fmr_flags |= FMR_OF_PREALLOC; 355 if (frec->rm_flags & XFS_RMAP_ATTR_FORK) 356 fmr.fmr_flags |= FMR_OF_ATTR_FORK; 357 if (frec->rm_flags & XFS_RMAP_BMBT_BLOCK) 358 fmr.fmr_flags |= FMR_OF_EXTENT_MAP; 359 if (fmr.fmr_flags == 0) { 360 error = xfs_getfsmap_is_shared(tp, info, frec, &shared); 361 if (error) 362 return error; 363 if (shared) 364 fmr.fmr_flags |= FMR_OF_SHARED; 365 } 366 367 xfs_getfsmap_format(mp, &fmr, info); 368 out: 369 info->next_daddr = max(info->next_daddr, 370 frec->start_daddr + frec->len_daddr); 371 return 0; 372 } 373 374 static inline int 375 xfs_getfsmap_group_helper( 376 struct xfs_getfsmap_info *info, 377 struct xfs_trans *tp, 378 struct xfs_group *xg, 379 xfs_agblock_t startblock, 380 xfs_extlen_t blockcount, 381 struct xfs_fsmap_irec *frec) 382 { 383 /* 384 * For an info->last query, we're looking for a gap between the last 385 * mapping emitted and the high key specified by userspace. If the 386 * user's query spans less than 1 fsblock, then info->high and 387 * info->low will have the same rm_startblock, which causes rec_daddr 388 * and next_daddr to be the same. Therefore, use the end_daddr that 389 * we calculated from userspace's high key to synthesize the record. 390 * Note that if the btree query found a mapping, there won't be a gap. 391 */ 392 if (info->last) 393 frec->start_daddr = info->end_daddr + 1; 394 else 395 frec->start_daddr = xfs_gbno_to_daddr(xg, startblock); 396 397 frec->len_daddr = XFS_FSB_TO_BB(xg->xg_mount, blockcount); 398 return xfs_getfsmap_helper(tp, info, frec); 399 } 400 401 /* Transform a rmapbt irec into a fsmap */ 402 STATIC int 403 xfs_getfsmap_rmapbt_helper( 404 struct xfs_btree_cur *cur, 405 const struct xfs_rmap_irec *rec, 406 void *priv) 407 { 408 struct xfs_fsmap_irec frec = { 409 .owner = rec->rm_owner, 410 .offset = rec->rm_offset, 411 .rm_flags = rec->rm_flags, 412 .rec_key = rec->rm_startblock, 413 }; 414 struct xfs_getfsmap_info *info = priv; 415 416 return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group, 417 rec->rm_startblock, rec->rm_blockcount, &frec); 418 } 419 420 /* Transform a bnobt irec into a fsmap */ 421 STATIC int 422 xfs_getfsmap_datadev_bnobt_helper( 423 struct xfs_btree_cur *cur, 424 const struct xfs_alloc_rec_incore *rec, 425 void *priv) 426 { 427 struct xfs_fsmap_irec frec = { 428 .owner = XFS_RMAP_OWN_NULL, /* "free" */ 429 .rec_key = rec->ar_startblock, 430 }; 431 struct xfs_getfsmap_info *info = priv; 432 433 return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group, 434 rec->ar_startblock, rec->ar_blockcount, &frec); 435 } 436 437 /* Set rmap flags based on the getfsmap flags */ 438 static void 439 xfs_getfsmap_set_irec_flags( 440 struct xfs_rmap_irec *irec, 441 const struct xfs_fsmap *fmr) 442 { 443 irec->rm_flags = 0; 444 if (fmr->fmr_flags & FMR_OF_ATTR_FORK) 445 irec->rm_flags |= XFS_RMAP_ATTR_FORK; 446 if (fmr->fmr_flags & FMR_OF_EXTENT_MAP) 447 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK; 448 if (fmr->fmr_flags & FMR_OF_PREALLOC) 449 irec->rm_flags |= XFS_RMAP_UNWRITTEN; 450 } 451 452 static inline bool 453 rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) 454 { 455 if (!xfs_has_reflink(mp)) 456 return true; 457 if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner)) 458 return true; 459 if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | 460 XFS_RMAP_UNWRITTEN)) 461 return true; 462 return false; 463 } 464 465 /* Execute a getfsmap query against the regular data device. */ 466 STATIC int 467 __xfs_getfsmap_datadev( 468 struct xfs_trans *tp, 469 const struct xfs_fsmap *keys, 470 struct xfs_getfsmap_info *info, 471 int (*query_fn)(struct xfs_trans *, 472 struct xfs_getfsmap_info *, 473 struct xfs_btree_cur **, 474 void *), 475 void *priv) 476 { 477 struct xfs_mount *mp = tp->t_mountp; 478 struct xfs_perag *pag = NULL; 479 struct xfs_btree_cur *bt_cur = NULL; 480 xfs_fsblock_t start_fsb; 481 xfs_fsblock_t end_fsb; 482 xfs_agnumber_t start_ag, end_ag; 483 uint64_t eofs; 484 int error = 0; 485 486 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 487 if (keys[0].fmr_physical >= eofs) 488 return 0; 489 start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); 490 end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 491 492 /* 493 * Convert the fsmap low/high keys to AG based keys. Initialize 494 * low to the fsmap low key and max out the high key to the end 495 * of the AG. 496 */ 497 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); 498 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); 499 if (error) 500 return error; 501 info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); 502 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); 503 504 /* Adjust the low key if we are continuing from where we left off. */ 505 if (info->low.rm_blockcount == 0) { 506 /* No previous record from which to continue */ 507 } else if (rmap_not_shareable(mp, &info->low)) { 508 /* Last record seen was an unshareable extent */ 509 info->low.rm_owner = 0; 510 info->low.rm_offset = 0; 511 512 start_fsb += info->low.rm_blockcount; 513 if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) 514 return 0; 515 } else { 516 /* Last record seen was a shareable file data extent */ 517 info->low.rm_offset += info->low.rm_blockcount; 518 } 519 info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); 520 521 info->high.rm_startblock = -1U; 522 info->high.rm_owner = ULLONG_MAX; 523 info->high.rm_offset = ULLONG_MAX; 524 info->high.rm_blockcount = 0; 525 info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; 526 527 start_ag = XFS_FSB_TO_AGNO(mp, start_fsb); 528 end_ag = XFS_FSB_TO_AGNO(mp, end_fsb); 529 530 while ((pag = xfs_perag_next_range(mp, pag, start_ag, end_ag))) { 531 /* 532 * Set the AG high key from the fsmap high key if this 533 * is the last AG that we're querying. 534 */ 535 info->group = pag_group(pag); 536 if (pag_agno(pag) == end_ag) { 537 info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp, 538 end_fsb); 539 info->high.rm_offset = XFS_BB_TO_FSBT(mp, 540 keys[1].fmr_offset); 541 error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); 542 if (error) 543 break; 544 xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); 545 } 546 547 if (bt_cur) { 548 xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); 549 bt_cur = NULL; 550 xfs_trans_brelse(tp, info->agf_bp); 551 info->agf_bp = NULL; 552 } 553 554 error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp); 555 if (error) 556 break; 557 558 trace_xfs_fsmap_low_group_key(mp, info->dev, pag_agno(pag), 559 &info->low); 560 trace_xfs_fsmap_high_group_key(mp, info->dev, pag_agno(pag), 561 &info->high); 562 563 error = query_fn(tp, info, &bt_cur, priv); 564 if (error) 565 break; 566 567 /* 568 * Set the AG low key to the start of the AG prior to 569 * moving on to the next AG. 570 */ 571 if (pag_agno(pag) == start_ag) 572 memset(&info->low, 0, sizeof(info->low)); 573 574 /* 575 * If this is the last AG, report any gap at the end of it 576 * before we drop the reference to the perag when the loop 577 * terminates. 578 */ 579 if (pag_agno(pag) == end_ag) { 580 info->last = true; 581 error = query_fn(tp, info, &bt_cur, priv); 582 if (error) 583 break; 584 } 585 info->group = NULL; 586 } 587 588 if (bt_cur) 589 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : 590 XFS_BTREE_NOERROR); 591 if (info->agf_bp) { 592 xfs_trans_brelse(tp, info->agf_bp); 593 info->agf_bp = NULL; 594 } 595 if (info->group) { 596 xfs_perag_rele(pag); 597 info->group = NULL; 598 } else if (pag) { 599 /* loop termination case */ 600 xfs_perag_rele(pag); 601 } 602 603 return error; 604 } 605 606 /* Actually query the rmap btree. */ 607 STATIC int 608 xfs_getfsmap_datadev_rmapbt_query( 609 struct xfs_trans *tp, 610 struct xfs_getfsmap_info *info, 611 struct xfs_btree_cur **curpp, 612 void *priv) 613 { 614 /* Report any gap at the end of the last AG. */ 615 if (info->last) 616 return xfs_getfsmap_rmapbt_helper(*curpp, &info->high, info); 617 618 /* Allocate cursor for this AG and query_range it. */ 619 *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp, 620 to_perag(info->group)); 621 return xfs_rmap_query_range(*curpp, &info->low, &info->high, 622 xfs_getfsmap_rmapbt_helper, info); 623 } 624 625 /* Execute a getfsmap query against the regular data device rmapbt. */ 626 STATIC int 627 xfs_getfsmap_datadev_rmapbt( 628 struct xfs_trans *tp, 629 const struct xfs_fsmap *keys, 630 struct xfs_getfsmap_info *info) 631 { 632 info->missing_owner = XFS_FMR_OWN_FREE; 633 return __xfs_getfsmap_datadev(tp, keys, info, 634 xfs_getfsmap_datadev_rmapbt_query, NULL); 635 } 636 637 /* Actually query the bno btree. */ 638 STATIC int 639 xfs_getfsmap_datadev_bnobt_query( 640 struct xfs_trans *tp, 641 struct xfs_getfsmap_info *info, 642 struct xfs_btree_cur **curpp, 643 void *priv) 644 { 645 struct xfs_alloc_rec_incore *key = priv; 646 647 /* Report any gap at the end of the last AG. */ 648 if (info->last) 649 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info); 650 651 /* Allocate cursor for this AG and query_range it. */ 652 *curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp, 653 to_perag(info->group)); 654 key->ar_startblock = info->low.rm_startblock; 655 key[1].ar_startblock = info->high.rm_startblock; 656 return xfs_alloc_query_range(*curpp, key, &key[1], 657 xfs_getfsmap_datadev_bnobt_helper, info); 658 } 659 660 /* Execute a getfsmap query against the regular data device's bnobt. */ 661 STATIC int 662 xfs_getfsmap_datadev_bnobt( 663 struct xfs_trans *tp, 664 const struct xfs_fsmap *keys, 665 struct xfs_getfsmap_info *info) 666 { 667 struct xfs_alloc_rec_incore akeys[2]; 668 669 memset(akeys, 0, sizeof(akeys)); 670 info->missing_owner = XFS_FMR_OWN_UNKNOWN; 671 return __xfs_getfsmap_datadev(tp, keys, info, 672 xfs_getfsmap_datadev_bnobt_query, &akeys[0]); 673 } 674 675 /* Execute a getfsmap query against the log device. */ 676 STATIC int 677 xfs_getfsmap_logdev( 678 struct xfs_trans *tp, 679 const struct xfs_fsmap *keys, 680 struct xfs_getfsmap_info *info) 681 { 682 struct xfs_fsmap_irec frec = { 683 .start_daddr = 0, 684 .rec_key = 0, 685 .owner = XFS_RMAP_OWN_LOG, 686 }; 687 struct xfs_mount *mp = tp->t_mountp; 688 xfs_fsblock_t start_fsb, end_fsb; 689 uint64_t eofs; 690 691 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 692 if (keys[0].fmr_physical >= eofs) 693 return 0; 694 start_fsb = XFS_BB_TO_FSBT(mp, 695 keys[0].fmr_physical + keys[0].fmr_length); 696 end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 697 698 /* Adjust the low key if we are continuing from where we left off. */ 699 if (keys[0].fmr_length > 0) 700 info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); 701 702 trace_xfs_fsmap_low_linear_key(mp, info->dev, start_fsb); 703 trace_xfs_fsmap_high_linear_key(mp, info->dev, end_fsb); 704 705 if (start_fsb > 0) 706 return 0; 707 708 /* Fabricate an rmap entry for the external log device. */ 709 frec.len_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 710 return xfs_getfsmap_helper(tp, info, &frec); 711 } 712 713 #ifdef CONFIG_XFS_RT 714 /* Transform a rtbitmap "record" into a fsmap */ 715 STATIC int 716 xfs_getfsmap_rtdev_rtbitmap_helper( 717 struct xfs_rtgroup *rtg, 718 struct xfs_trans *tp, 719 const struct xfs_rtalloc_rec *rec, 720 void *priv) 721 { 722 struct xfs_fsmap_irec frec = { 723 .owner = XFS_RMAP_OWN_NULL, /* "free" */ 724 }; 725 struct xfs_mount *mp = rtg_mount(rtg); 726 struct xfs_getfsmap_info *info = priv; 727 xfs_rtblock_t start_rtb = 728 xfs_rtx_to_rtb(rtg, rec->ar_startext); 729 uint64_t rtbcount = 730 xfs_rtbxlen_to_blen(mp, rec->ar_extcount); 731 732 /* 733 * For an info->last query, we're looking for a gap between the last 734 * mapping emitted and the high key specified by userspace. If the 735 * user's query spans less than 1 fsblock, then info->high and 736 * info->low will have the same rm_startblock, which causes rec_daddr 737 * and next_daddr to be the same. Therefore, use the end_daddr that 738 * we calculated from userspace's high key to synthesize the record. 739 * Note that if the btree query found a mapping, there won't be a gap. 740 */ 741 if (info->last) 742 frec.start_daddr = info->end_daddr + 1; 743 else 744 frec.start_daddr = xfs_rtb_to_daddr(mp, start_rtb); 745 746 frec.len_daddr = XFS_FSB_TO_BB(mp, rtbcount); 747 return xfs_getfsmap_helper(tp, info, &frec); 748 } 749 750 /* Execute a getfsmap query against the realtime device rtbitmap. */ 751 STATIC int 752 xfs_getfsmap_rtdev_rtbitmap( 753 struct xfs_trans *tp, 754 const struct xfs_fsmap *keys, 755 struct xfs_getfsmap_info *info) 756 { 757 struct xfs_mount *mp = tp->t_mountp; 758 xfs_rtblock_t start_rtbno, end_rtbno; 759 xfs_rtxnum_t start_rtx, end_rtx; 760 xfs_rgnumber_t start_rgno, end_rgno; 761 struct xfs_rtgroup *rtg = NULL; 762 uint64_t eofs; 763 int error; 764 765 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 766 if (keys[0].fmr_physical >= eofs) 767 return 0; 768 769 info->missing_owner = XFS_FMR_OWN_UNKNOWN; 770 771 /* Adjust the low key if we are continuing from where we left off. */ 772 start_rtbno = xfs_daddr_to_rtb(mp, 773 keys[0].fmr_physical + keys[0].fmr_length); 774 if (keys[0].fmr_length > 0) { 775 info->low_daddr = xfs_rtb_to_daddr(mp, start_rtbno); 776 if (info->low_daddr >= eofs) 777 return 0; 778 } 779 start_rtx = xfs_rtb_to_rtx(mp, start_rtbno); 780 start_rgno = xfs_rtb_to_rgno(mp, start_rtbno); 781 782 end_rtbno = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical)); 783 end_rgno = xfs_rtb_to_rgno(mp, end_rtbno); 784 785 trace_xfs_fsmap_low_linear_key(mp, info->dev, start_rtbno); 786 trace_xfs_fsmap_high_linear_key(mp, info->dev, end_rtbno); 787 788 end_rtx = -1ULL; 789 790 while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) { 791 if (rtg_rgno(rtg) == end_rgno) 792 end_rtx = xfs_rtb_to_rtx(mp, 793 end_rtbno + mp->m_sb.sb_rextsize - 1); 794 795 info->group = rtg_group(rtg); 796 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED); 797 error = xfs_rtalloc_query_range(rtg, tp, start_rtx, end_rtx, 798 xfs_getfsmap_rtdev_rtbitmap_helper, info); 799 if (error) 800 break; 801 802 /* 803 * Report any gaps at the end of the rtbitmap by simulating a 804 * zero-length free extent starting at the rtx after the end 805 * of the query range. 806 */ 807 if (rtg_rgno(rtg) == end_rgno) { 808 struct xfs_rtalloc_rec ahigh = { 809 .ar_startext = min(end_rtx + 1, 810 rtg->rtg_extents), 811 }; 812 813 info->last = true; 814 error = xfs_getfsmap_rtdev_rtbitmap_helper(rtg, tp, 815 &ahigh, info); 816 if (error) 817 break; 818 } 819 820 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); 821 info->group = NULL; 822 start_rtx = 0; 823 } 824 825 /* loop termination case */ 826 if (rtg) { 827 if (info->group) { 828 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); 829 info->group = NULL; 830 } 831 xfs_rtgroup_rele(rtg); 832 } 833 834 return error; 835 } 836 837 /* Transform a realtime rmapbt record into a fsmap */ 838 STATIC int 839 xfs_getfsmap_rtdev_rmapbt_helper( 840 struct xfs_btree_cur *cur, 841 const struct xfs_rmap_irec *rec, 842 void *priv) 843 { 844 struct xfs_fsmap_irec frec = { 845 .owner = rec->rm_owner, 846 .offset = rec->rm_offset, 847 .rm_flags = rec->rm_flags, 848 .rec_key = rec->rm_startblock, 849 }; 850 struct xfs_getfsmap_info *info = priv; 851 852 return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group, 853 rec->rm_startblock, rec->rm_blockcount, &frec); 854 } 855 856 /* Actually query the rtrmap btree. */ 857 STATIC int 858 xfs_getfsmap_rtdev_rmapbt_query( 859 struct xfs_trans *tp, 860 struct xfs_getfsmap_info *info, 861 struct xfs_btree_cur **curpp) 862 { 863 struct xfs_rtgroup *rtg = to_rtg(info->group); 864 865 /* Query the rtrmapbt */ 866 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT); 867 *curpp = xfs_rtrmapbt_init_cursor(tp, rtg); 868 return xfs_rmap_query_range(*curpp, &info->low, &info->high, 869 xfs_getfsmap_rtdev_rmapbt_helper, info); 870 } 871 872 /* Execute a getfsmap query against the realtime device rmapbt. */ 873 STATIC int 874 xfs_getfsmap_rtdev_rmapbt( 875 struct xfs_trans *tp, 876 const struct xfs_fsmap *keys, 877 struct xfs_getfsmap_info *info) 878 { 879 struct xfs_mount *mp = tp->t_mountp; 880 struct xfs_rtgroup *rtg = NULL; 881 struct xfs_btree_cur *bt_cur = NULL; 882 xfs_rtblock_t start_rtb; 883 xfs_rtblock_t end_rtb; 884 xfs_rgnumber_t start_rg, end_rg; 885 uint64_t eofs; 886 int error = 0; 887 888 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 889 if (keys[0].fmr_physical >= eofs) 890 return 0; 891 start_rtb = xfs_daddr_to_rtb(mp, keys[0].fmr_physical); 892 end_rtb = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical)); 893 894 info->missing_owner = XFS_FMR_OWN_FREE; 895 896 /* 897 * Convert the fsmap low/high keys to rtgroup based keys. Initialize 898 * low to the fsmap low key and max out the high key to the end 899 * of the rtgroup. 900 */ 901 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); 902 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); 903 if (error) 904 return error; 905 info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); 906 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); 907 908 /* Adjust the low key if we are continuing from where we left off. */ 909 if (info->low.rm_blockcount == 0) { 910 /* No previous record from which to continue */ 911 } else if (rmap_not_shareable(mp, &info->low)) { 912 /* Last record seen was an unshareable extent */ 913 info->low.rm_owner = 0; 914 info->low.rm_offset = 0; 915 916 start_rtb += info->low.rm_blockcount; 917 if (xfs_rtb_to_daddr(mp, start_rtb) >= eofs) 918 return 0; 919 } else { 920 /* Last record seen was a shareable file data extent */ 921 info->low.rm_offset += info->low.rm_blockcount; 922 } 923 info->low.rm_startblock = xfs_rtb_to_rgbno(mp, start_rtb); 924 925 info->high.rm_startblock = -1U; 926 info->high.rm_owner = ULLONG_MAX; 927 info->high.rm_offset = ULLONG_MAX; 928 info->high.rm_blockcount = 0; 929 info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; 930 931 start_rg = xfs_rtb_to_rgno(mp, start_rtb); 932 end_rg = xfs_rtb_to_rgno(mp, end_rtb); 933 934 while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rg, end_rg))) { 935 /* 936 * Set the rtgroup high key from the fsmap high key if this 937 * is the last rtgroup that we're querying. 938 */ 939 info->group = rtg_group(rtg); 940 if (rtg_rgno(rtg) == end_rg) { 941 info->high.rm_startblock = 942 xfs_rtb_to_rgbno(mp, end_rtb); 943 info->high.rm_offset = 944 XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); 945 error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); 946 if (error) 947 break; 948 xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); 949 } 950 951 if (bt_cur) { 952 xfs_rtgroup_unlock(to_rtg(bt_cur->bc_group), 953 XFS_RTGLOCK_RMAP | 954 XFS_RTGLOCK_REFCOUNT); 955 xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); 956 bt_cur = NULL; 957 } 958 959 trace_xfs_fsmap_low_group_key(mp, info->dev, rtg_rgno(rtg), 960 &info->low); 961 trace_xfs_fsmap_high_group_key(mp, info->dev, rtg_rgno(rtg), 962 &info->high); 963 964 error = xfs_getfsmap_rtdev_rmapbt_query(tp, info, &bt_cur); 965 if (error) 966 break; 967 968 /* 969 * Set the rtgroup low key to the start of the rtgroup prior to 970 * moving on to the next rtgroup. 971 */ 972 if (rtg_rgno(rtg) == start_rg) 973 memset(&info->low, 0, sizeof(info->low)); 974 975 /* 976 * If this is the last rtgroup, report any gap at the end of it 977 * before we drop the reference to the perag when the loop 978 * terminates. 979 */ 980 if (rtg_rgno(rtg) == end_rg) { 981 info->last = true; 982 error = xfs_getfsmap_rtdev_rmapbt_helper(bt_cur, 983 &info->high, info); 984 if (error) 985 break; 986 } 987 info->group = NULL; 988 } 989 990 if (bt_cur) { 991 xfs_rtgroup_unlock(to_rtg(bt_cur->bc_group), 992 XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT); 993 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : 994 XFS_BTREE_NOERROR); 995 } 996 997 /* loop termination case */ 998 if (rtg) { 999 info->group = NULL; 1000 xfs_rtgroup_rele(rtg); 1001 } 1002 1003 return error; 1004 } 1005 #endif /* CONFIG_XFS_RT */ 1006 1007 /* Do we recognize the device? */ 1008 STATIC bool 1009 xfs_getfsmap_is_valid_device( 1010 struct xfs_mount *mp, 1011 struct xfs_fsmap *fm) 1012 { 1013 if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || 1014 fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev)) 1015 return true; 1016 if (mp->m_logdev_targp && 1017 fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev)) 1018 return true; 1019 if (mp->m_rtdev_targp && 1020 fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev)) 1021 return true; 1022 return false; 1023 } 1024 1025 /* Ensure that the low key is less than the high key. */ 1026 STATIC bool 1027 xfs_getfsmap_check_keys( 1028 struct xfs_fsmap *low_key, 1029 struct xfs_fsmap *high_key) 1030 { 1031 if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { 1032 if (low_key->fmr_offset) 1033 return false; 1034 } 1035 if (high_key->fmr_flags != -1U && 1036 (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | 1037 FMR_OF_EXTENT_MAP))) { 1038 if (high_key->fmr_offset && high_key->fmr_offset != -1ULL) 1039 return false; 1040 } 1041 if (high_key->fmr_length && high_key->fmr_length != -1ULL) 1042 return false; 1043 1044 if (low_key->fmr_device > high_key->fmr_device) 1045 return false; 1046 if (low_key->fmr_device < high_key->fmr_device) 1047 return true; 1048 1049 if (low_key->fmr_physical > high_key->fmr_physical) 1050 return false; 1051 if (low_key->fmr_physical < high_key->fmr_physical) 1052 return true; 1053 1054 if (low_key->fmr_owner > high_key->fmr_owner) 1055 return false; 1056 if (low_key->fmr_owner < high_key->fmr_owner) 1057 return true; 1058 1059 if (low_key->fmr_offset > high_key->fmr_offset) 1060 return false; 1061 if (low_key->fmr_offset < high_key->fmr_offset) 1062 return true; 1063 1064 return false; 1065 } 1066 1067 /* 1068 * There are only two devices if we didn't configure RT devices at build time. 1069 */ 1070 #ifdef CONFIG_XFS_RT 1071 #define XFS_GETFSMAP_DEVS 3 1072 #else 1073 #define XFS_GETFSMAP_DEVS 2 1074 #endif /* CONFIG_XFS_RT */ 1075 1076 /* 1077 * Get filesystem's extents as described in head, and format for output. Fills 1078 * in the supplied records array until there are no more reverse mappings to 1079 * return or head.fmh_entries == head.fmh_count. In the second case, this 1080 * function returns -ECANCELED to indicate that more records would have been 1081 * returned. 1082 * 1083 * Key to Confusion 1084 * ---------------- 1085 * There are multiple levels of keys and counters at work here: 1086 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; 1087 * these reflect fs-wide sector addrs. 1088 * dkeys -- fmh_keys used to query each device; 1089 * these are fmh_keys but w/ the low key 1090 * bumped up by fmr_length. 1091 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this 1092 * is how we detect gaps in the fsmap 1093 records and report them. 1094 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from 1095 * dkeys; used to query the metadata. 1096 */ 1097 STATIC int 1098 xfs_getfsmap( 1099 struct xfs_mount *mp, 1100 struct xfs_fsmap_head *head, 1101 struct fsmap *fsmap_recs) 1102 { 1103 struct xfs_trans *tp = NULL; 1104 struct xfs_fsmap dkeys[2]; /* per-dev keys */ 1105 struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; 1106 struct xfs_getfsmap_info info = { 1107 .fsmap_recs = fsmap_recs, 1108 .head = head, 1109 }; 1110 bool use_rmap; 1111 int i; 1112 int error = 0; 1113 1114 if (head->fmh_iflags & ~FMH_IF_VALID) 1115 return -EINVAL; 1116 if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || 1117 !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) 1118 return -EINVAL; 1119 if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1])) 1120 return -EINVAL; 1121 1122 use_rmap = xfs_has_rmapbt(mp) && 1123 has_capability_noaudit(current, CAP_SYS_ADMIN); 1124 head->fmh_entries = 0; 1125 1126 /* Set up our device handlers. */ 1127 memset(handlers, 0, sizeof(handlers)); 1128 handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 1129 handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); 1130 if (use_rmap) 1131 handlers[0].fn = xfs_getfsmap_datadev_rmapbt; 1132 else 1133 handlers[0].fn = xfs_getfsmap_datadev_bnobt; 1134 if (mp->m_logdev_targp != mp->m_ddev_targp) { 1135 handlers[1].nr_sectors = XFS_FSB_TO_BB(mp, 1136 mp->m_sb.sb_logblocks); 1137 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); 1138 handlers[1].fn = xfs_getfsmap_logdev; 1139 } 1140 #ifdef CONFIG_XFS_RT 1141 if (mp->m_rtdev_targp) { 1142 handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 1143 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); 1144 if (use_rmap) 1145 handlers[2].fn = xfs_getfsmap_rtdev_rmapbt; 1146 else 1147 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; 1148 } 1149 #endif /* CONFIG_XFS_RT */ 1150 1151 xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), 1152 xfs_getfsmap_dev_compare); 1153 1154 /* 1155 * To continue where we left off, we allow userspace to use the 1156 * last mapping from a previous call as the low key of the next. 1157 * This is identified by a non-zero length in the low key. We 1158 * have to increment the low key in this scenario to ensure we 1159 * don't return the same mapping again, and instead return the 1160 * very next mapping. 1161 * 1162 * If the low key mapping refers to file data, the same physical 1163 * blocks could be mapped to several other files/offsets. 1164 * According to rmapbt record ordering, the minimal next 1165 * possible record for the block range is the next starting 1166 * offset in the same inode. Therefore, each fsmap backend bumps 1167 * the file offset to continue the search appropriately. For 1168 * all other low key mapping types (attr blocks, metadata), each 1169 * fsmap backend bumps the physical offset as there can be no 1170 * other mapping for the same physical block range. 1171 */ 1172 dkeys[0] = head->fmh_keys[0]; 1173 memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); 1174 1175 info.next_daddr = head->fmh_keys[0].fmr_physical + 1176 head->fmh_keys[0].fmr_length; 1177 1178 /* For each device we support... */ 1179 for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { 1180 /* Is this device within the range the user asked for? */ 1181 if (!handlers[i].fn) 1182 continue; 1183 if (head->fmh_keys[0].fmr_device > handlers[i].dev) 1184 continue; 1185 if (head->fmh_keys[1].fmr_device < handlers[i].dev) 1186 break; 1187 1188 /* 1189 * If this device number matches the high key, we have to pass 1190 * the high key to the handler to limit the query results, and 1191 * set the end_daddr so that we can synthesize records at the 1192 * end of the query range or device. 1193 */ 1194 if (handlers[i].dev == head->fmh_keys[1].fmr_device) { 1195 dkeys[1] = head->fmh_keys[1]; 1196 info.end_daddr = min(handlers[i].nr_sectors - 1, 1197 dkeys[1].fmr_physical); 1198 } else { 1199 info.end_daddr = handlers[i].nr_sectors - 1; 1200 } 1201 1202 /* 1203 * If the device number exceeds the low key, zero out the low 1204 * key so that we get everything from the beginning. 1205 */ 1206 if (handlers[i].dev > head->fmh_keys[0].fmr_device) 1207 memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); 1208 1209 /* 1210 * Grab an empty transaction so that we can use its recursive 1211 * buffer locking abilities to detect cycles in the rmapbt 1212 * without deadlocking. 1213 */ 1214 error = xfs_trans_alloc_empty(mp, &tp); 1215 if (error) 1216 break; 1217 1218 info.dev = handlers[i].dev; 1219 info.last = false; 1220 info.group = NULL; 1221 info.low_daddr = XFS_BUF_DADDR_NULL; 1222 info.low.rm_blockcount = 0; 1223 error = handlers[i].fn(tp, dkeys, &info); 1224 if (error) 1225 break; 1226 xfs_trans_cancel(tp); 1227 tp = NULL; 1228 info.next_daddr = 0; 1229 } 1230 1231 if (tp) 1232 xfs_trans_cancel(tp); 1233 head->fmh_oflags = FMH_OF_DEV_T; 1234 return error; 1235 } 1236 1237 int 1238 xfs_ioc_getfsmap( 1239 struct xfs_inode *ip, 1240 struct fsmap_head __user *arg) 1241 { 1242 struct xfs_fsmap_head xhead = {0}; 1243 struct fsmap_head head; 1244 struct fsmap *recs; 1245 unsigned int count; 1246 __u32 last_flags = 0; 1247 bool done = false; 1248 int error; 1249 1250 if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) 1251 return -EFAULT; 1252 if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || 1253 memchr_inv(head.fmh_keys[0].fmr_reserved, 0, 1254 sizeof(head.fmh_keys[0].fmr_reserved)) || 1255 memchr_inv(head.fmh_keys[1].fmr_reserved, 0, 1256 sizeof(head.fmh_keys[1].fmr_reserved))) 1257 return -EINVAL; 1258 1259 /* 1260 * Use an internal memory buffer so that we don't have to copy fsmap 1261 * data to userspace while holding locks. Start by trying to allocate 1262 * up to 128k for the buffer, but fall back to a single page if needed. 1263 */ 1264 count = min_t(unsigned int, head.fmh_count, 1265 131072 / sizeof(struct fsmap)); 1266 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); 1267 if (!recs) { 1268 count = min_t(unsigned int, head.fmh_count, 1269 PAGE_SIZE / sizeof(struct fsmap)); 1270 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); 1271 if (!recs) 1272 return -ENOMEM; 1273 } 1274 1275 xhead.fmh_iflags = head.fmh_iflags; 1276 xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); 1277 xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); 1278 1279 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); 1280 trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); 1281 1282 head.fmh_entries = 0; 1283 do { 1284 struct fsmap __user *user_recs; 1285 struct fsmap *last_rec; 1286 1287 user_recs = &arg->fmh_recs[head.fmh_entries]; 1288 xhead.fmh_entries = 0; 1289 xhead.fmh_count = min_t(unsigned int, count, 1290 head.fmh_count - head.fmh_entries); 1291 1292 /* Run query, record how many entries we got. */ 1293 error = xfs_getfsmap(ip->i_mount, &xhead, recs); 1294 switch (error) { 1295 case 0: 1296 /* 1297 * There are no more records in the result set. Copy 1298 * whatever we got to userspace and break out. 1299 */ 1300 done = true; 1301 break; 1302 case -ECANCELED: 1303 /* 1304 * The internal memory buffer is full. Copy whatever 1305 * records we got to userspace and go again if we have 1306 * not yet filled the userspace buffer. 1307 */ 1308 error = 0; 1309 break; 1310 default: 1311 goto out_free; 1312 } 1313 head.fmh_entries += xhead.fmh_entries; 1314 head.fmh_oflags = xhead.fmh_oflags; 1315 1316 /* 1317 * If the caller wanted a record count or there aren't any 1318 * new records to return, we're done. 1319 */ 1320 if (head.fmh_count == 0 || xhead.fmh_entries == 0) 1321 break; 1322 1323 /* Copy all the records we got out to userspace. */ 1324 if (copy_to_user(user_recs, recs, 1325 xhead.fmh_entries * sizeof(struct fsmap))) { 1326 error = -EFAULT; 1327 goto out_free; 1328 } 1329 1330 /* Remember the last record flags we copied to userspace. */ 1331 last_rec = &recs[xhead.fmh_entries - 1]; 1332 last_flags = last_rec->fmr_flags; 1333 1334 /* Set up the low key for the next iteration. */ 1335 xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); 1336 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); 1337 } while (!done && head.fmh_entries < head.fmh_count); 1338 1339 /* 1340 * If there are no more records in the query result set and we're not 1341 * in counting mode, mark the last record returned with the LAST flag. 1342 */ 1343 if (done && head.fmh_count > 0 && head.fmh_entries > 0) { 1344 struct fsmap __user *user_rec; 1345 1346 last_flags |= FMR_OF_LAST; 1347 user_rec = &arg->fmh_recs[head.fmh_entries - 1]; 1348 1349 if (copy_to_user(&user_rec->fmr_flags, &last_flags, 1350 sizeof(last_flags))) { 1351 error = -EFAULT; 1352 goto out_free; 1353 } 1354 } 1355 1356 /* copy back header */ 1357 if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { 1358 error = -EFAULT; 1359 goto out_free; 1360 } 1361 1362 out_free: 1363 kvfree(recs); 1364 return error; 1365 } 1366