1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_btree.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_ialloc_btree.h" 17 #include "xfs_iwalk.h" 18 #include "xfs_itable.h" 19 #include "xfs_error.h" 20 #include "xfs_icache.h" 21 #include "xfs_health.h" 22 #include "xfs_trans.h" 23 24 /* 25 * Bulk Stat 26 * ========= 27 * 28 * Use the inode walking functions to fill out struct xfs_bulkstat for every 29 * allocated inode, then pass the stat information to some externally provided 30 * iteration function. 31 */ 32 33 struct xfs_bstat_chunk { 34 bulkstat_one_fmt_pf formatter; 35 struct xfs_ibulk *breq; 36 struct xfs_bulkstat *buf; 37 }; 38 39 static inline bool 40 want_metadir_file( 41 struct xfs_inode *ip, 42 struct xfs_ibulk *breq) 43 { 44 return xfs_is_metadir_inode(ip) && (breq->flags & XFS_IBULK_METADIR); 45 } 46 47 /* 48 * Fill out the bulkstat info for a single inode and report it somewhere. 49 * 50 * bc->breq->lastino is effectively the inode cursor as we walk through the 51 * filesystem. Therefore, we update it any time we need to move the cursor 52 * forward, regardless of whether or not we're sending any bstat information 53 * back to userspace. If the inode is internal metadata or, has been freed 54 * out from under us, we just simply keep going. 55 * 56 * However, if any other type of error happens we want to stop right where we 57 * are so that userspace will call back with exact number of the bad inode and 58 * we can send back an error code. 59 * 60 * Note that if the formatter tells us there's no space left in the buffer we 61 * move the cursor forward and abort the walk. 62 */ 63 STATIC int 64 xfs_bulkstat_one_int( 65 struct xfs_mount *mp, 66 struct mnt_idmap *idmap, 67 struct xfs_trans *tp, 68 xfs_ino_t ino, 69 struct xfs_bstat_chunk *bc) 70 { 71 struct user_namespace *sb_userns = mp->m_super->s_user_ns; 72 struct xfs_inode *ip; /* incore inode pointer */ 73 struct inode *inode; 74 struct xfs_bulkstat *buf = bc->buf; 75 xfs_extnum_t nextents; 76 int error = -EINVAL; 77 vfsuid_t vfsuid; 78 vfsgid_t vfsgid; 79 80 error = xfs_iget(mp, tp, ino, 81 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), 82 XFS_ILOCK_SHARED, &ip); 83 if (error == -ENOENT || error == -EINVAL) 84 goto out_advance; 85 if (error) 86 goto out; 87 88 /* Reload the incore unlinked list to avoid failure in inodegc. */ 89 if (xfs_inode_unlinked_incomplete(ip)) { 90 error = xfs_inode_reload_unlinked_bucket(tp, ip); 91 if (error) { 92 xfs_iunlock(ip, XFS_ILOCK_SHARED); 93 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 94 xfs_irele(ip); 95 return error; 96 } 97 } 98 99 ASSERT(ip != NULL); 100 ASSERT(ip->i_imap.im_blkno != 0); 101 inode = VFS_I(ip); 102 vfsuid = i_uid_into_vfsuid(idmap, inode); 103 vfsgid = i_gid_into_vfsgid(idmap, inode); 104 105 /* 106 * If caller wants files from the metadata directories, push out the 107 * bare minimum information for enabling scrub. 108 */ 109 if (want_metadir_file(ip, bc->breq)) { 110 memset(buf, 0, sizeof(*buf)); 111 buf->bs_ino = ino; 112 buf->bs_gen = inode->i_generation; 113 buf->bs_mode = inode->i_mode & S_IFMT; 114 xfs_bulkstat_health(ip, buf); 115 buf->bs_version = XFS_BULKSTAT_VERSION_V5; 116 xfs_iunlock(ip, XFS_ILOCK_SHARED); 117 xfs_irele(ip); 118 119 error = bc->formatter(bc->breq, buf); 120 if (!error || error == -ECANCELED) 121 goto out_advance; 122 goto out; 123 } 124 125 /* If this is a private inode, don't leak its details to userspace. */ 126 if (IS_PRIVATE(inode) || xfs_is_sb_inum(mp, ino)) { 127 xfs_iunlock(ip, XFS_ILOCK_SHARED); 128 xfs_irele(ip); 129 error = -EINVAL; 130 goto out_advance; 131 } 132 133 /* xfs_iget returns the following without needing 134 * further change. 135 */ 136 buf->bs_projectid = ip->i_projid; 137 buf->bs_ino = ino; 138 buf->bs_uid = from_kuid(sb_userns, vfsuid_into_kuid(vfsuid)); 139 buf->bs_gid = from_kgid(sb_userns, vfsgid_into_kgid(vfsgid)); 140 buf->bs_size = ip->i_disk_size; 141 142 buf->bs_nlink = inode->i_nlink; 143 buf->bs_atime = inode_get_atime_sec(inode); 144 buf->bs_atime_nsec = inode_get_atime_nsec(inode); 145 buf->bs_mtime = inode_get_mtime_sec(inode); 146 buf->bs_mtime_nsec = inode_get_mtime_nsec(inode); 147 buf->bs_ctime = inode_get_ctime_sec(inode); 148 buf->bs_ctime_nsec = inode_get_ctime_nsec(inode); 149 buf->bs_gen = inode->i_generation; 150 buf->bs_mode = inode->i_mode; 151 152 buf->bs_xflags = xfs_ip2xflags(ip); 153 buf->bs_extsize_blks = ip->i_extsize; 154 155 nextents = xfs_ifork_nextents(&ip->i_df); 156 if (!(bc->breq->flags & XFS_IBULK_NREXT64)) 157 buf->bs_extents = min(nextents, XFS_MAX_EXTCNT_DATA_FORK_SMALL); 158 else 159 buf->bs_extents64 = nextents; 160 161 xfs_bulkstat_health(ip, buf); 162 buf->bs_aextents = xfs_ifork_nextents(&ip->i_af); 163 buf->bs_forkoff = xfs_inode_fork_boff(ip); 164 buf->bs_version = XFS_BULKSTAT_VERSION_V5; 165 166 if (xfs_has_v3inodes(mp)) { 167 buf->bs_btime = ip->i_crtime.tv_sec; 168 buf->bs_btime_nsec = ip->i_crtime.tv_nsec; 169 if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 170 buf->bs_cowextsize_blks = ip->i_cowextsize; 171 } 172 173 switch (ip->i_df.if_format) { 174 case XFS_DINODE_FMT_DEV: 175 buf->bs_rdev = sysv_encode_dev(inode->i_rdev); 176 buf->bs_blksize = BLKDEV_IOSIZE; 177 buf->bs_blocks = 0; 178 break; 179 case XFS_DINODE_FMT_LOCAL: 180 buf->bs_rdev = 0; 181 buf->bs_blksize = mp->m_sb.sb_blocksize; 182 buf->bs_blocks = 0; 183 break; 184 case XFS_DINODE_FMT_EXTENTS: 185 case XFS_DINODE_FMT_BTREE: 186 buf->bs_rdev = 0; 187 buf->bs_blksize = mp->m_sb.sb_blocksize; 188 buf->bs_blocks = ip->i_nblocks + ip->i_delayed_blks; 189 break; 190 } 191 xfs_iunlock(ip, XFS_ILOCK_SHARED); 192 xfs_irele(ip); 193 194 error = bc->formatter(bc->breq, buf); 195 if (error == -ECANCELED) 196 goto out_advance; 197 if (error) 198 goto out; 199 200 out_advance: 201 /* 202 * Advance the cursor to the inode that comes after the one we just 203 * looked at. We want the caller to move along if the bulkstat 204 * information was copied successfully; if we tried to grab the inode 205 * but it's no longer allocated; or if it's internal metadata. 206 */ 207 bc->breq->startino = ino + 1; 208 out: 209 return error; 210 } 211 212 /* Bulkstat a single inode. */ 213 int 214 xfs_bulkstat_one( 215 struct xfs_ibulk *breq, 216 bulkstat_one_fmt_pf formatter) 217 { 218 struct xfs_bstat_chunk bc = { 219 .formatter = formatter, 220 .breq = breq, 221 }; 222 struct xfs_trans *tp; 223 int error; 224 225 if (breq->idmap != &nop_mnt_idmap) { 226 xfs_warn_ratelimited(breq->mp, 227 "bulkstat not supported inside of idmapped mounts."); 228 return -EINVAL; 229 } 230 231 ASSERT(breq->icount == 1); 232 233 bc.buf = kzalloc(sizeof(struct xfs_bulkstat), 234 GFP_KERNEL | __GFP_RETRY_MAYFAIL); 235 if (!bc.buf) 236 return -ENOMEM; 237 238 /* 239 * Grab an empty transaction so that we can use its recursive buffer 240 * locking abilities to detect cycles in the inobt without deadlocking. 241 */ 242 error = xfs_trans_alloc_empty(breq->mp, &tp); 243 if (error) 244 goto out; 245 246 error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp, 247 breq->startino, &bc); 248 xfs_trans_cancel(tp); 249 out: 250 kfree(bc.buf); 251 252 /* 253 * If we reported one inode to userspace then we abort because we hit 254 * the end of the buffer. Don't leak that back to userspace. 255 */ 256 if (error == -ECANCELED) 257 error = 0; 258 259 return error; 260 } 261 262 static int 263 xfs_bulkstat_iwalk( 264 struct xfs_mount *mp, 265 struct xfs_trans *tp, 266 xfs_ino_t ino, 267 void *data) 268 { 269 struct xfs_bstat_chunk *bc = data; 270 int error; 271 272 error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data); 273 /* bulkstat just skips over missing inodes */ 274 if (error == -ENOENT || error == -EINVAL) 275 return 0; 276 return error; 277 } 278 279 /* 280 * Check the incoming lastino parameter. 281 * 282 * We allow any inode value that could map to physical space inside the 283 * filesystem because if there are no inodes there, bulkstat moves on to the 284 * next chunk. In other words, the magic agino value of zero takes us to the 285 * first chunk in the AG, and an agino value past the end of the AG takes us to 286 * the first chunk in the next AG. 287 * 288 * Therefore we can end early if the requested inode is beyond the end of the 289 * filesystem or doesn't map properly. 290 */ 291 static inline bool 292 xfs_bulkstat_already_done( 293 struct xfs_mount *mp, 294 xfs_ino_t startino) 295 { 296 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 297 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); 298 299 return agno >= mp->m_sb.sb_agcount || 300 startino != XFS_AGINO_TO_INO(mp, agno, agino); 301 } 302 303 /* Return stat information in bulk (by-inode) for the filesystem. */ 304 int 305 xfs_bulkstat( 306 struct xfs_ibulk *breq, 307 bulkstat_one_fmt_pf formatter) 308 { 309 struct xfs_bstat_chunk bc = { 310 .formatter = formatter, 311 .breq = breq, 312 }; 313 struct xfs_trans *tp; 314 unsigned int iwalk_flags = 0; 315 int error; 316 317 if (breq->idmap != &nop_mnt_idmap) { 318 xfs_warn_ratelimited(breq->mp, 319 "bulkstat not supported inside of idmapped mounts."); 320 return -EINVAL; 321 } 322 if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 323 return 0; 324 325 bc.buf = kzalloc(sizeof(struct xfs_bulkstat), 326 GFP_KERNEL | __GFP_RETRY_MAYFAIL); 327 if (!bc.buf) 328 return -ENOMEM; 329 330 /* 331 * Grab an empty transaction so that we can use its recursive buffer 332 * locking abilities to detect cycles in the inobt without deadlocking. 333 */ 334 error = xfs_trans_alloc_empty(breq->mp, &tp); 335 if (error) 336 goto out; 337 338 if (breq->flags & XFS_IBULK_SAME_AG) 339 iwalk_flags |= XFS_IWALK_SAME_AG; 340 341 error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, 342 xfs_bulkstat_iwalk, breq->icount, &bc); 343 xfs_trans_cancel(tp); 344 out: 345 kfree(bc.buf); 346 347 /* 348 * We found some inodes, so clear the error status and return them. 349 * The lastino pointer will point directly at the inode that triggered 350 * any error that occurred, so on the next call the error will be 351 * triggered again and propagated to userspace as there will be no 352 * formatted inodes in the buffer. 353 */ 354 if (breq->ocount > 0) 355 error = 0; 356 357 return error; 358 } 359 360 /* Convert bulkstat (v5) to bstat (v1). */ 361 void 362 xfs_bulkstat_to_bstat( 363 struct xfs_mount *mp, 364 struct xfs_bstat *bs1, 365 const struct xfs_bulkstat *bstat) 366 { 367 /* memset is needed here because of padding holes in the structure. */ 368 memset(bs1, 0, sizeof(struct xfs_bstat)); 369 bs1->bs_ino = bstat->bs_ino; 370 bs1->bs_mode = bstat->bs_mode; 371 bs1->bs_nlink = bstat->bs_nlink; 372 bs1->bs_uid = bstat->bs_uid; 373 bs1->bs_gid = bstat->bs_gid; 374 bs1->bs_rdev = bstat->bs_rdev; 375 bs1->bs_blksize = bstat->bs_blksize; 376 bs1->bs_size = bstat->bs_size; 377 bs1->bs_atime.tv_sec = bstat->bs_atime; 378 bs1->bs_mtime.tv_sec = bstat->bs_mtime; 379 bs1->bs_ctime.tv_sec = bstat->bs_ctime; 380 bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; 381 bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; 382 bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; 383 bs1->bs_blocks = bstat->bs_blocks; 384 bs1->bs_xflags = bstat->bs_xflags; 385 bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); 386 bs1->bs_extents = bstat->bs_extents; 387 bs1->bs_gen = bstat->bs_gen; 388 bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; 389 bs1->bs_forkoff = bstat->bs_forkoff; 390 bs1->bs_projid_hi = bstat->bs_projectid >> 16; 391 bs1->bs_sick = bstat->bs_sick; 392 bs1->bs_checked = bstat->bs_checked; 393 bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); 394 bs1->bs_dmevmask = 0; 395 bs1->bs_dmstate = 0; 396 bs1->bs_aextents = bstat->bs_aextents; 397 } 398 399 struct xfs_inumbers_chunk { 400 inumbers_fmt_pf formatter; 401 struct xfs_ibulk *breq; 402 }; 403 404 /* 405 * INUMBERS 406 * ======== 407 * This is how we export inode btree records to userspace, so that XFS tools 408 * can figure out where inodes are allocated. 409 */ 410 411 /* 412 * Format the inode group structure and report it somewhere. 413 * 414 * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk 415 * through the filesystem so we move it forward unless there was a runtime 416 * error. If the formatter tells us the buffer is now full we also move the 417 * cursor forward and abort the walk. 418 */ 419 STATIC int 420 xfs_inumbers_walk( 421 struct xfs_mount *mp, 422 struct xfs_trans *tp, 423 xfs_agnumber_t agno, 424 const struct xfs_inobt_rec_incore *irec, 425 void *data) 426 { 427 struct xfs_inumbers inogrp = { 428 .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), 429 .xi_alloccount = irec->ir_count - irec->ir_freecount, 430 .xi_allocmask = ~irec->ir_free, 431 .xi_version = XFS_INUMBERS_VERSION_V5, 432 }; 433 struct xfs_inumbers_chunk *ic = data; 434 int error; 435 436 error = ic->formatter(ic->breq, &inogrp); 437 if (error && error != -ECANCELED) 438 return error; 439 440 ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + 441 XFS_INODES_PER_CHUNK; 442 return error; 443 } 444 445 /* 446 * Return inode number table for the filesystem. 447 */ 448 int 449 xfs_inumbers( 450 struct xfs_ibulk *breq, 451 inumbers_fmt_pf formatter) 452 { 453 struct xfs_inumbers_chunk ic = { 454 .formatter = formatter, 455 .breq = breq, 456 }; 457 struct xfs_trans *tp; 458 int error = 0; 459 460 if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 461 return 0; 462 463 /* 464 * Grab an empty transaction so that we can use its recursive buffer 465 * locking abilities to detect cycles in the inobt without deadlocking. 466 */ 467 error = xfs_trans_alloc_empty(breq->mp, &tp); 468 if (error) 469 goto out; 470 471 error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, 472 xfs_inumbers_walk, breq->icount, &ic); 473 xfs_trans_cancel(tp); 474 out: 475 476 /* 477 * We found some inode groups, so clear the error status and return 478 * them. The lastino pointer will point directly at the inode that 479 * triggered any error that occurred, so on the next call the error 480 * will be triggered again and propagated to userspace as there will be 481 * no formatted inode groups in the buffer. 482 */ 483 if (breq->ocount > 0) 484 error = 0; 485 486 return error; 487 } 488 489 /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ 490 void 491 xfs_inumbers_to_inogrp( 492 struct xfs_inogrp *ig1, 493 const struct xfs_inumbers *ig) 494 { 495 /* memset is needed here because of padding holes in the structure. */ 496 memset(ig1, 0, sizeof(struct xfs_inogrp)); 497 ig1->xi_startino = ig->xi_startino; 498 ig1->xi_alloccount = ig->xi_alloccount; 499 ig1->xi_allocmask = ig->xi_allocmask; 500 } 501