1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_dir2.h" 15 #include "xfs_dir2_priv.h" 16 #include "xfs_trace.h" 17 #include "xfs_bmap.h" 18 #include "xfs_trans.h" 19 #include "xfs_error.h" 20 #include "scrub/scrub.h" 21 #include "scrub/common.h" 22 #include "scrub/readdir.h" 23 24 /* Call a function for every entry in a shortform directory. */ 25 STATIC int 26 xchk_dir_walk_sf( 27 struct xfs_scrub *sc, 28 struct xfs_inode *dp, 29 xchk_dirent_fn dirent_fn, 30 void *priv) 31 { 32 struct xfs_name name = { 33 .name = ".", 34 .len = 1, 35 .type = XFS_DIR3_FT_DIR, 36 }; 37 struct xfs_mount *mp = dp->i_mount; 38 struct xfs_da_geometry *geo = mp->m_dir_geo; 39 struct xfs_dir2_sf_entry *sfep; 40 struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; 41 xfs_ino_t ino; 42 xfs_dir2_dataptr_t dapos; 43 unsigned int i; 44 int error; 45 46 ASSERT(dp->i_df.if_bytes == dp->i_disk_size); 47 ASSERT(sfp != NULL); 48 49 /* dot entry */ 50 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, 51 geo->data_entry_offset); 52 53 error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv); 54 if (error) 55 return error; 56 57 /* dotdot entry */ 58 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, 59 geo->data_entry_offset + 60 xfs_dir2_data_entsize(mp, sizeof(".") - 1)); 61 ino = xfs_dir2_sf_get_parent_ino(sfp); 62 name.name = ".."; 63 name.len = 2; 64 65 error = dirent_fn(sc, dp, dapos, &name, ino, priv); 66 if (error) 67 return error; 68 69 /* iterate everything else */ 70 sfep = xfs_dir2_sf_firstentry(sfp); 71 for (i = 0; i < sfp->count; i++) { 72 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, 73 xfs_dir2_sf_get_offset(sfep)); 74 ino = xfs_dir2_sf_get_ino(mp, sfp, sfep); 75 name.name = sfep->name; 76 name.len = sfep->namelen; 77 name.type = xfs_dir2_sf_get_ftype(mp, sfep); 78 79 error = dirent_fn(sc, dp, dapos, &name, ino, priv); 80 if (error) 81 return error; 82 83 sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); 84 } 85 86 return 0; 87 } 88 89 /* Call a function for every entry in a block directory. */ 90 STATIC int 91 xchk_dir_walk_block( 92 struct xfs_scrub *sc, 93 struct xfs_inode *dp, 94 xchk_dirent_fn dirent_fn, 95 void *priv) 96 { 97 struct xfs_mount *mp = dp->i_mount; 98 struct xfs_da_geometry *geo = mp->m_dir_geo; 99 struct xfs_buf *bp; 100 unsigned int off, next_off, end; 101 int error; 102 103 error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp); 104 if (error) 105 return error; 106 107 /* Walk each directory entry. */ 108 end = xfs_dir3_data_end_offset(geo, bp->b_addr); 109 for (off = geo->data_entry_offset; off < end; off = next_off) { 110 struct xfs_name name = { }; 111 struct xfs_dir2_data_unused *dup = bp->b_addr + off; 112 struct xfs_dir2_data_entry *dep = bp->b_addr + off; 113 xfs_ino_t ino; 114 xfs_dir2_dataptr_t dapos; 115 116 /* Skip an empty entry. */ 117 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 118 next_off = off + be16_to_cpu(dup->length); 119 continue; 120 } 121 122 /* Otherwise, find the next entry and report it. */ 123 next_off = off + xfs_dir2_data_entsize(mp, dep->namelen); 124 if (next_off > end) 125 break; 126 127 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off); 128 ino = be64_to_cpu(dep->inumber); 129 name.name = dep->name; 130 name.len = dep->namelen; 131 name.type = xfs_dir2_data_get_ftype(mp, dep); 132 133 error = dirent_fn(sc, dp, dapos, &name, ino, priv); 134 if (error) 135 break; 136 } 137 138 xfs_trans_brelse(sc->tp, bp); 139 return error; 140 } 141 142 /* Read a leaf-format directory buffer. */ 143 STATIC int 144 xchk_read_leaf_dir_buf( 145 struct xfs_trans *tp, 146 struct xfs_inode *dp, 147 struct xfs_da_geometry *geo, 148 xfs_dir2_off_t *curoff, 149 struct xfs_buf **bpp) 150 { 151 struct xfs_iext_cursor icur; 152 struct xfs_bmbt_irec map; 153 struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK); 154 xfs_dablk_t last_da; 155 xfs_dablk_t map_off; 156 xfs_dir2_off_t new_off; 157 158 *bpp = NULL; 159 160 /* 161 * Look for mapped directory blocks at or above the current offset. 162 * Truncate down to the nearest directory block to start the scanning 163 * operation. 164 */ 165 last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); 166 map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff)); 167 168 if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map)) 169 return 0; 170 if (map.br_startoff >= last_da) 171 return 0; 172 xfs_trim_extent(&map, map_off, last_da - map_off); 173 174 /* Read the directory block of that first mapping. */ 175 new_off = xfs_dir2_da_to_byte(geo, map.br_startoff); 176 if (new_off > *curoff) 177 *curoff = new_off; 178 179 return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp); 180 } 181 182 /* Call a function for every entry in a leaf directory. */ 183 STATIC int 184 xchk_dir_walk_leaf( 185 struct xfs_scrub *sc, 186 struct xfs_inode *dp, 187 xchk_dirent_fn dirent_fn, 188 void *priv) 189 { 190 struct xfs_mount *mp = dp->i_mount; 191 struct xfs_da_geometry *geo = mp->m_dir_geo; 192 struct xfs_buf *bp = NULL; 193 xfs_dir2_off_t curoff = 0; 194 unsigned int offset = 0; 195 int error; 196 197 /* Iterate every directory offset in this directory. */ 198 while (curoff < XFS_DIR2_LEAF_OFFSET) { 199 struct xfs_name name = { }; 200 struct xfs_dir2_data_unused *dup; 201 struct xfs_dir2_data_entry *dep; 202 xfs_ino_t ino; 203 unsigned int length; 204 xfs_dir2_dataptr_t dapos; 205 206 /* 207 * If we have no buffer, or we're off the end of the 208 * current buffer, need to get another one. 209 */ 210 if (!bp || offset >= geo->blksize) { 211 if (bp) { 212 xfs_trans_brelse(sc->tp, bp); 213 bp = NULL; 214 } 215 216 error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff, 217 &bp); 218 if (error || !bp) 219 break; 220 221 /* 222 * Find our position in the block. 223 */ 224 offset = geo->data_entry_offset; 225 curoff += geo->data_entry_offset; 226 } 227 228 /* Skip an empty entry. */ 229 dup = bp->b_addr + offset; 230 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 231 length = be16_to_cpu(dup->length); 232 offset += length; 233 curoff += length; 234 continue; 235 } 236 237 /* Otherwise, find the next entry and report it. */ 238 dep = bp->b_addr + offset; 239 length = xfs_dir2_data_entsize(mp, dep->namelen); 240 241 dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; 242 ino = be64_to_cpu(dep->inumber); 243 name.name = dep->name; 244 name.len = dep->namelen; 245 name.type = xfs_dir2_data_get_ftype(mp, dep); 246 247 error = dirent_fn(sc, dp, dapos, &name, ino, priv); 248 if (error) 249 break; 250 251 /* Advance to the next entry. */ 252 offset += length; 253 curoff += length; 254 } 255 256 if (bp) 257 xfs_trans_brelse(sc->tp, bp); 258 return error; 259 } 260 261 /* 262 * Call a function for every entry in a directory. 263 * 264 * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. 265 */ 266 int 267 xchk_dir_walk( 268 struct xfs_scrub *sc, 269 struct xfs_inode *dp, 270 xchk_dirent_fn dirent_fn, 271 void *priv) 272 { 273 struct xfs_da_args args = { 274 .dp = dp, 275 .geo = dp->i_mount->m_dir_geo, 276 .trans = sc->tp, 277 .owner = dp->i_ino, 278 }; 279 int error; 280 281 if (xfs_is_shutdown(dp->i_mount)) 282 return -EIO; 283 284 ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); 285 xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); 286 287 switch (xfs_dir2_format(&args, &error)) { 288 case XFS_DIR2_FMT_SF: 289 return xchk_dir_walk_sf(sc, dp, dirent_fn, priv); 290 case XFS_DIR2_FMT_BLOCK: 291 return xchk_dir_walk_block(sc, dp, dirent_fn, priv); 292 case XFS_DIR2_FMT_LEAF: 293 case XFS_DIR2_FMT_NODE: 294 return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv); 295 default: 296 return error; 297 } 298 } 299 300 /* 301 * Look up the inode number for an exact name in a directory. 302 * 303 * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. Names are not 304 * checked for correctness. 305 */ 306 int 307 xchk_dir_lookup( 308 struct xfs_scrub *sc, 309 struct xfs_inode *dp, 310 const struct xfs_name *name, 311 xfs_ino_t *ino) 312 { 313 struct xfs_da_args args = { 314 .dp = dp, 315 .geo = dp->i_mount->m_dir_geo, 316 .trans = sc->tp, 317 .name = name->name, 318 .namelen = name->len, 319 .filetype = name->type, 320 .hashval = xfs_dir2_hashname(dp->i_mount, name), 321 .whichfork = XFS_DATA_FORK, 322 .op_flags = XFS_DA_OP_OKNOENT, 323 .owner = dp->i_ino, 324 }; 325 int error; 326 327 if (xfs_is_shutdown(dp->i_mount)) 328 return -EIO; 329 330 /* 331 * A temporary directory's block headers are written with the owner 332 * set to sc->ip, so we must switch the owner here for the lookup. 333 */ 334 if (dp == sc->tempip) 335 args.owner = sc->ip->i_ino; 336 337 ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); 338 xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); 339 340 error = xfs_dir_lookup_args(&args); 341 if (!error) 342 *ino = args.inumber; 343 return error; 344 } 345 346 /* 347 * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock 348 * state. The caller may have a transaction, so we must use trylock for both 349 * IOLOCKs. 350 */ 351 static inline unsigned int 352 xchk_dir_trylock_both( 353 struct xfs_scrub *sc, 354 struct xfs_inode *ip) 355 { 356 if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) 357 return 0; 358 359 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 360 goto parent_iolock; 361 362 xchk_ilock(sc, XFS_ILOCK_EXCL); 363 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 364 goto parent_ilock; 365 366 return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL; 367 368 parent_ilock: 369 xchk_iunlock(sc, XFS_ILOCK_EXCL); 370 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 371 parent_iolock: 372 xchk_iunlock(sc, XFS_IOLOCK_EXCL); 373 return 0; 374 } 375 376 /* 377 * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target 378 * (@sc->ip) and the inode at the other end (@ip) of a directory or parent 379 * pointer link so that we can check that link. 380 * 381 * We do not know ahead of time that the directory tree is /not/ corrupt, so we 382 * cannot use the "lock two inode" functions because we do not know that there 383 * is not a racing thread trying to take the locks in opposite order. First 384 * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED 385 * of @ip to synchronize with the VFS. Next, take ILOCK_EXCL of the scrub 386 * target and @ip to synchronize with XFS. 387 * 388 * If the trylocks succeed, *lockmode will be set to the locks held for @ip; 389 * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will 390 * be returned. If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if 391 * XCHK_TRY_HARDER was set. Returns -EINTR if the process has been killed. 392 */ 393 int 394 xchk_dir_trylock_for_pptrs( 395 struct xfs_scrub *sc, 396 struct xfs_inode *ip, 397 unsigned int *lockmode) 398 { 399 unsigned int nr; 400 int error = 0; 401 402 ASSERT(sc->ilock_flags == 0); 403 404 for (nr = 0; nr < HZ; nr++) { 405 *lockmode = xchk_dir_trylock_both(sc, ip); 406 if (*lockmode) 407 return 0; 408 409 if (xchk_should_terminate(sc, &error)) 410 return error; 411 412 delay(1); 413 } 414 415 if (sc->flags & XCHK_TRY_HARDER) { 416 xchk_set_incomplete(sc); 417 return -ETIMEDOUT; 418 } 419 420 return -EDEADLOCK; 421 } 422