1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_inode.h" 14 #include "xfs_icache.h" 15 #include "xfs_dir2.h" 16 #include "xfs_dir2_priv.h" 17 #include "scrub/scrub.h" 18 #include "scrub/common.h" 19 20 /* Set us up to scrub parents. */ 21 int 22 xchk_setup_parent( 23 struct xfs_scrub *sc, 24 struct xfs_inode *ip) 25 { 26 return xchk_setup_inode_contents(sc, ip, 0); 27 } 28 29 /* Parent pointers */ 30 31 /* Look for an entry in a parent pointing to this inode. */ 32 33 struct xchk_parent_ctx { 34 struct dir_context dc; 35 struct xfs_scrub *sc; 36 xfs_ino_t ino; 37 xfs_nlink_t nlink; 38 bool cancelled; 39 }; 40 41 /* Look for a single entry in a directory pointing to an inode. */ 42 STATIC int 43 xchk_parent_actor( 44 struct dir_context *dc, 45 const char *name, 46 int namelen, 47 loff_t pos, 48 u64 ino, 49 unsigned type) 50 { 51 struct xchk_parent_ctx *spc; 52 int error = 0; 53 54 spc = container_of(dc, struct xchk_parent_ctx, dc); 55 if (spc->ino == ino) 56 spc->nlink++; 57 58 /* 59 * If we're facing a fatal signal, bail out. Store the cancellation 60 * status separately because the VFS readdir code squashes error codes 61 * into short directory reads. 62 */ 63 if (xchk_should_terminate(spc->sc, &error)) 64 spc->cancelled = true; 65 66 return error; 67 } 68 69 /* Count the number of dentries in the parent dir that point to this inode. */ 70 STATIC int 71 xchk_parent_count_parent_dentries( 72 struct xfs_scrub *sc, 73 struct xfs_inode *parent, 74 xfs_nlink_t *nlink) 75 { 76 struct xchk_parent_ctx spc = { 77 .dc.actor = xchk_parent_actor, 78 .ino = sc->ip->i_ino, 79 .sc = sc, 80 }; 81 size_t bufsize; 82 loff_t oldpos; 83 uint lock_mode; 84 int error = 0; 85 86 /* 87 * If there are any blocks, read-ahead block 0 as we're almost 88 * certain to have the next operation be a read there. This is 89 * how we guarantee that the parent's extent map has been loaded, 90 * if there is one. 91 */ 92 lock_mode = xfs_ilock_data_map_shared(parent); 93 if (parent->i_df.if_nextents > 0) 94 error = xfs_dir3_data_readahead(parent, 0, 0); 95 xfs_iunlock(parent, lock_mode); 96 if (error) 97 return error; 98 99 /* 100 * Iterate the parent dir to confirm that there is 101 * exactly one entry pointing back to the inode being 102 * scanned. 103 */ 104 bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, 105 parent->i_d.di_size); 106 oldpos = 0; 107 while (true) { 108 error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); 109 if (error) 110 goto out; 111 if (spc.cancelled) { 112 error = -EAGAIN; 113 goto out; 114 } 115 if (oldpos == spc.dc.pos) 116 break; 117 oldpos = spc.dc.pos; 118 } 119 *nlink = spc.nlink; 120 out: 121 return error; 122 } 123 124 /* 125 * Given the inode number of the alleged parent of the inode being 126 * scrubbed, try to validate that the parent has exactly one directory 127 * entry pointing back to the inode being scrubbed. 128 */ 129 STATIC int 130 xchk_parent_validate( 131 struct xfs_scrub *sc, 132 xfs_ino_t dnum, 133 bool *try_again) 134 { 135 struct xfs_mount *mp = sc->mp; 136 struct xfs_inode *dp = NULL; 137 xfs_nlink_t expected_nlink; 138 xfs_nlink_t nlink; 139 int error = 0; 140 141 *try_again = false; 142 143 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 144 goto out; 145 146 /* '..' must not point to ourselves. */ 147 if (sc->ip->i_ino == dnum) { 148 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 149 goto out; 150 } 151 152 /* 153 * If we're an unlinked directory, the parent /won't/ have a link 154 * to us. Otherwise, it should have one link. 155 */ 156 expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; 157 158 /* 159 * Grab this parent inode. We release the inode before we 160 * cancel the scrub transaction. Since we're don't know a 161 * priori that releasing the inode won't trigger eofblocks 162 * cleanup (which allocates what would be a nested transaction) 163 * if the parent pointer erroneously points to a file, we 164 * can't use DONTCACHE here because DONTCACHE inodes can trigger 165 * immediate inactive cleanup of the inode. 166 * 167 * If _iget returns -EINVAL then the parent inode number is garbage 168 * and the directory is corrupt. If the _iget returns -EFSCORRUPTED 169 * or -EFSBADCRC then the parent is corrupt which is a cross 170 * referencing error. Any other error is an operational error. 171 */ 172 error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); 173 if (error == -EINVAL) { 174 error = -EFSCORRUPTED; 175 xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); 176 goto out; 177 } 178 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) 179 goto out; 180 if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { 181 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 182 goto out_rele; 183 } 184 185 /* 186 * We prefer to keep the inode locked while we lock and search 187 * its alleged parent for a forward reference. If we can grab 188 * the iolock, validate the pointers and we're done. We must 189 * use nowait here to avoid an ABBA deadlock on the parent and 190 * the child inodes. 191 */ 192 if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { 193 error = xchk_parent_count_parent_dentries(sc, dp, &nlink); 194 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, 195 &error)) 196 goto out_unlock; 197 if (nlink != expected_nlink) 198 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 199 goto out_unlock; 200 } 201 202 /* 203 * The game changes if we get here. We failed to lock the parent, 204 * so we're going to try to verify both pointers while only holding 205 * one lock so as to avoid deadlocking with something that's actually 206 * trying to traverse down the directory tree. 207 */ 208 xfs_iunlock(sc->ip, sc->ilock_flags); 209 sc->ilock_flags = 0; 210 error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED); 211 if (error) 212 goto out_rele; 213 214 /* Go looking for our dentry. */ 215 error = xchk_parent_count_parent_dentries(sc, dp, &nlink); 216 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) 217 goto out_unlock; 218 219 /* Drop the parent lock, relock this inode. */ 220 xfs_iunlock(dp, XFS_IOLOCK_SHARED); 221 error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL); 222 if (error) 223 goto out_rele; 224 sc->ilock_flags = XFS_IOLOCK_EXCL; 225 226 /* 227 * If we're an unlinked directory, the parent /won't/ have a link 228 * to us. Otherwise, it should have one link. We have to re-set 229 * it here because we dropped the lock on sc->ip. 230 */ 231 expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; 232 233 /* Look up '..' to see if the inode changed. */ 234 error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); 235 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 236 goto out_rele; 237 238 /* Drat, parent changed. Try again! */ 239 if (dnum != dp->i_ino) { 240 xfs_irele(dp); 241 *try_again = true; 242 return 0; 243 } 244 xfs_irele(dp); 245 246 /* 247 * '..' didn't change, so check that there was only one entry 248 * for us in the parent. 249 */ 250 if (nlink != expected_nlink) 251 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 252 return error; 253 254 out_unlock: 255 xfs_iunlock(dp, XFS_IOLOCK_SHARED); 256 out_rele: 257 xfs_irele(dp); 258 out: 259 return error; 260 } 261 262 /* Scrub a parent pointer. */ 263 int 264 xchk_parent( 265 struct xfs_scrub *sc) 266 { 267 struct xfs_mount *mp = sc->mp; 268 xfs_ino_t dnum; 269 bool try_again; 270 int tries = 0; 271 int error = 0; 272 273 /* 274 * If we're a directory, check that the '..' link points up to 275 * a directory that has one entry pointing to us. 276 */ 277 if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) 278 return -ENOENT; 279 280 /* We're not a special inode, are we? */ 281 if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { 282 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 283 goto out; 284 } 285 286 /* 287 * The VFS grabs a read or write lock via i_rwsem before it reads 288 * or writes to a directory. If we've gotten this far we've 289 * already obtained IOLOCK_EXCL, which (since 4.10) is the same as 290 * getting a write lock on i_rwsem. Therefore, it is safe for us 291 * to drop the ILOCK here in order to do directory lookups. 292 */ 293 sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); 294 xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); 295 296 /* Look up '..' */ 297 error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); 298 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 299 goto out; 300 if (!xfs_verify_dir_ino(mp, dnum)) { 301 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 302 goto out; 303 } 304 305 /* Is this the root dir? Then '..' must point to itself. */ 306 if (sc->ip == mp->m_rootip) { 307 if (sc->ip->i_ino != mp->m_sb.sb_rootino || 308 sc->ip->i_ino != dnum) 309 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 310 goto out; 311 } 312 313 do { 314 error = xchk_parent_validate(sc, dnum, &try_again); 315 if (error) 316 goto out; 317 } while (try_again && ++tries < 20); 318 319 /* 320 * We gave it our best shot but failed, so mark this scrub 321 * incomplete. Userspace can decide if it wants to try again. 322 */ 323 if (try_again && tries == 20) 324 xchk_set_incomplete(sc); 325 out: 326 /* 327 * If we failed to lock the parent inode even after a retry, just mark 328 * this scrub incomplete and return. 329 */ 330 if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { 331 error = 0; 332 xchk_set_incomplete(sc); 333 } 334 return error; 335 } 336