xref: /linux/fs/xfs/scrub/attr_repair.c (revision 1e58a8ccf2597c9259a8e71a2bffac5e11e12ea0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
21 #include "xfs_dir2.h"
22 #include "xfs_attr.h"
23 #include "xfs_attr_leaf.h"
24 #include "xfs_attr_sf.h"
25 #include "xfs_attr_remote.h"
26 #include "xfs_bmap.h"
27 #include "xfs_bmap_util.h"
28 #include "xfs_exchmaps.h"
29 #include "xfs_exchrange.h"
30 #include "xfs_acl.h"
31 #include "scrub/xfs_scrub.h"
32 #include "scrub/scrub.h"
33 #include "scrub/common.h"
34 #include "scrub/trace.h"
35 #include "scrub/repair.h"
36 #include "scrub/tempfile.h"
37 #include "scrub/tempexch.h"
38 #include "scrub/xfile.h"
39 #include "scrub/xfarray.h"
40 #include "scrub/xfblob.h"
41 #include "scrub/attr.h"
42 #include "scrub/reap.h"
43 #include "scrub/attr_repair.h"
44 
45 /*
46  * Extended Attribute Repair
47  * =========================
48  *
49  * We repair extended attributes by reading the attr leaf blocks looking for
50  * attributes entries that look salvageable (name passes verifiers, value can
51  * be retrieved, etc).  Each extended attribute worth salvaging is stashed in
52  * memory, and the stashed entries are periodically replayed into a temporary
53  * file to constrain memory use.  Batching the construction of the temporary
54  * extended attribute structure in this fashion reduces lock cycling of the
55  * file being repaired and the temporary file.
56  *
57  * When salvaging completes, the remaining stashed attributes are replayed to
58  * the temporary file.  An atomic file contents exchange is used to commit the
59  * new xattr blocks to the file being repaired.  This will disrupt attrmulti
60  * cursors.
61  */
62 
63 struct xrep_xattr_key {
64 	/* Cookie for retrieval of the xattr name. */
65 	xfblob_cookie		name_cookie;
66 
67 	/* Cookie for retrieval of the xattr value. */
68 	xfblob_cookie		value_cookie;
69 
70 	/* XFS_ATTR_* flags */
71 	int			flags;
72 
73 	/* Length of the value and name. */
74 	uint32_t		valuelen;
75 	uint16_t		namelen;
76 };
77 
78 /*
79  * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
80  * them to the temp file.
81  */
82 #define XREP_XATTR_MAX_STASH_BYTES	(PAGE_SIZE * 8)
83 
84 struct xrep_xattr {
85 	struct xfs_scrub	*sc;
86 
87 	/* Information for exchanging attr fork mappings at the end. */
88 	struct xrep_tempexch	tx;
89 
90 	/* xattr keys */
91 	struct xfarray		*xattr_records;
92 
93 	/* xattr values */
94 	struct xfblob		*xattr_blobs;
95 
96 	/* Number of attributes that we are salvaging. */
97 	unsigned long long	attrs_found;
98 };
99 
100 /* Set up to recreate the extended attributes. */
101 int
102 xrep_setup_xattr(
103 	struct xfs_scrub	*sc)
104 {
105 	return xrep_tempfile_create(sc, S_IFREG);
106 }
107 
108 /*
109  * Decide if we want to salvage this attribute.  We don't bother with
110  * incomplete or oversized keys or values.  The @value parameter can be null
111  * for remote attrs.
112  */
113 STATIC int
114 xrep_xattr_want_salvage(
115 	struct xrep_xattr	*rx,
116 	unsigned int		attr_flags,
117 	const void		*name,
118 	int			namelen,
119 	const void		*value,
120 	int			valuelen)
121 {
122 	if (attr_flags & XFS_ATTR_INCOMPLETE)
123 		return false;
124 	if (namelen > XATTR_NAME_MAX || namelen <= 0)
125 		return false;
126 	if (!xfs_attr_namecheck(name, namelen))
127 		return false;
128 	if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
129 		return false;
130 	if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
131 		return false;
132 	return true;
133 }
134 
135 /* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
136 STATIC int
137 xrep_xattr_salvage_key(
138 	struct xrep_xattr	*rx,
139 	int			flags,
140 	unsigned char		*name,
141 	int			namelen,
142 	unsigned char		*value,
143 	int			valuelen)
144 {
145 	struct xrep_xattr_key	key = {
146 		.valuelen	= valuelen,
147 		.flags		= flags & XFS_ATTR_NSP_ONDISK_MASK,
148 	};
149 	unsigned int		i = 0;
150 	int			error = 0;
151 
152 	if (xchk_should_terminate(rx->sc, &error))
153 		return error;
154 
155 	/*
156 	 * Truncate the name to the first character that would trip namecheck.
157 	 * If we no longer have a name after that, ignore this attribute.
158 	 */
159 	while (i < namelen && name[i] != 0)
160 		i++;
161 	if (i == 0)
162 		return 0;
163 	key.namelen = i;
164 
165 	trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen,
166 			valuelen);
167 
168 	error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
169 			key.namelen);
170 	if (error)
171 		return error;
172 
173 	error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
174 			key.valuelen);
175 	if (error)
176 		return error;
177 
178 	error = xfarray_append(rx->xattr_records, &key);
179 	if (error)
180 		return error;
181 
182 	rx->attrs_found++;
183 	return 0;
184 }
185 
186 /*
187  * Record a shortform extended attribute key & value for later reinsertion
188  * into the inode.
189  */
190 STATIC int
191 xrep_xattr_salvage_sf_attr(
192 	struct xrep_xattr		*rx,
193 	struct xfs_attr_sf_hdr		*hdr,
194 	struct xfs_attr_sf_entry	*sfe)
195 {
196 	struct xfs_scrub		*sc = rx->sc;
197 	struct xchk_xattr_buf		*ab = sc->buf;
198 	unsigned char			*name = sfe->nameval;
199 	unsigned char			*value = &sfe->nameval[sfe->namelen];
200 
201 	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr,
202 			sfe->namelen))
203 		return 0;
204 
205 	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr,
206 			sfe->valuelen))
207 		return 0;
208 
209 	if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
210 			sfe->namelen, value, sfe->valuelen))
211 		return 0;
212 
213 	return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
214 			sfe->namelen, value, sfe->valuelen);
215 }
216 
217 /*
218  * Record a local format extended attribute key & value for later reinsertion
219  * into the inode.
220  */
221 STATIC int
222 xrep_xattr_salvage_local_attr(
223 	struct xrep_xattr		*rx,
224 	struct xfs_attr_leaf_entry	*ent,
225 	unsigned int			nameidx,
226 	const char			*buf_end,
227 	struct xfs_attr_leaf_name_local	*lentry)
228 {
229 	struct xchk_xattr_buf		*ab = rx->sc->buf;
230 	unsigned char			*value;
231 	unsigned int			valuelen;
232 	unsigned int			namesize;
233 
234 	/*
235 	 * Decode the leaf local entry format.  If something seems wrong, we
236 	 * junk the attribute.
237 	 */
238 	value = &lentry->nameval[lentry->namelen];
239 	valuelen = be16_to_cpu(lentry->valuelen);
240 	namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
241 	if ((char *)lentry + namesize > buf_end)
242 		return 0;
243 	if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
244 			lentry->namelen, value, valuelen))
245 		return 0;
246 	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
247 		return 0;
248 
249 	/* Try to save this attribute. */
250 	return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
251 			lentry->namelen, value, valuelen);
252 }
253 
254 /*
255  * Record a remote format extended attribute key & value for later reinsertion
256  * into the inode.
257  */
258 STATIC int
259 xrep_xattr_salvage_remote_attr(
260 	struct xrep_xattr		*rx,
261 	struct xfs_attr_leaf_entry	*ent,
262 	unsigned int			nameidx,
263 	const char			*buf_end,
264 	struct xfs_attr_leaf_name_remote *rentry,
265 	unsigned int			ent_idx,
266 	struct xfs_buf			*leaf_bp)
267 {
268 	struct xchk_xattr_buf		*ab = rx->sc->buf;
269 	struct xfs_da_args		args = {
270 		.trans			= rx->sc->tp,
271 		.dp			= rx->sc->ip,
272 		.index			= ent_idx,
273 		.geo			= rx->sc->mp->m_attr_geo,
274 		.owner			= rx->sc->ip->i_ino,
275 		.attr_filter		= ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
276 		.namelen		= rentry->namelen,
277 		.name			= rentry->name,
278 		.value			= ab->value,
279 		.valuelen		= be32_to_cpu(rentry->valuelen),
280 	};
281 	unsigned int			namesize;
282 	int				error;
283 
284 	/*
285 	 * Decode the leaf remote entry format.  If something seems wrong, we
286 	 * junk the attribute.  Note that we should never find a zero-length
287 	 * remote attribute value.
288 	 */
289 	namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
290 	if ((char *)rentry + namesize > buf_end)
291 		return 0;
292 	if (args.valuelen == 0 ||
293 	    !xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
294 			rentry->namelen, NULL, args.valuelen))
295 		return 0;
296 	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
297 		return 0;
298 
299 	/*
300 	 * Enlarge the buffer (if needed) to hold the value that we're trying
301 	 * to salvage from the old extended attribute data.
302 	 */
303 	error = xchk_setup_xattr_buf(rx->sc, args.valuelen);
304 	if (error == -ENOMEM)
305 		error = -EDEADLOCK;
306 	if (error)
307 		return error;
308 
309 	/* Look up the remote value and stash it for reconstruction. */
310 	error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
311 	if (error || args.rmtblkno == 0)
312 		goto err_free;
313 
314 	error = xfs_attr_rmtval_get(&args);
315 	if (error)
316 		goto err_free;
317 
318 	/* Try to save this attribute. */
319 	error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
320 			rentry->namelen, ab->value, args.valuelen);
321 err_free:
322 	/* remote value was garbage, junk it */
323 	if (error == -EFSBADCRC || error == -EFSCORRUPTED)
324 		error = 0;
325 	return error;
326 }
327 
328 /* Extract every xattr key that we can from this attr fork block. */
329 STATIC int
330 xrep_xattr_recover_leaf(
331 	struct xrep_xattr		*rx,
332 	struct xfs_buf			*bp)
333 {
334 	struct xfs_attr3_icleaf_hdr	leafhdr;
335 	struct xfs_scrub		*sc = rx->sc;
336 	struct xfs_mount		*mp = sc->mp;
337 	struct xfs_attr_leafblock	*leaf;
338 	struct xfs_attr_leaf_name_local	*lentry;
339 	struct xfs_attr_leaf_name_remote *rentry;
340 	struct xfs_attr_leaf_entry	*ent;
341 	struct xfs_attr_leaf_entry	*entries;
342 	struct xchk_xattr_buf		*ab = rx->sc->buf;
343 	char				*buf_end;
344 	size_t				off;
345 	unsigned int			nameidx;
346 	unsigned int			hdrsize;
347 	int				i;
348 	int				error = 0;
349 
350 	bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
351 
352 	/* Check the leaf header */
353 	leaf = bp->b_addr;
354 	xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
355 	hdrsize = xfs_attr3_leaf_hdr_size(leaf);
356 	xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
357 	entries = xfs_attr3_leaf_entryp(leaf);
358 
359 	buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
360 	for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
361 		if (xchk_should_terminate(sc, &error))
362 			return error;
363 
364 		/* Skip key if it conflicts with something else? */
365 		off = (char *)ent - (char *)leaf;
366 		if (!xchk_xattr_set_map(sc, ab->usedmap, off,
367 				sizeof(xfs_attr_leaf_entry_t)))
368 			continue;
369 
370 		/* Check the name information. */
371 		nameidx = be16_to_cpu(ent->nameidx);
372 		if (nameidx < leafhdr.firstused ||
373 		    nameidx >= mp->m_attr_geo->blksize)
374 			continue;
375 
376 		if (ent->flags & XFS_ATTR_LOCAL) {
377 			lentry = xfs_attr3_leaf_name_local(leaf, i);
378 			error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
379 					buf_end, lentry);
380 		} else {
381 			rentry = xfs_attr3_leaf_name_remote(leaf, i);
382 			error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
383 					buf_end, rentry, i, bp);
384 		}
385 		if (error)
386 			return error;
387 	}
388 
389 	return 0;
390 }
391 
392 /* Try to recover shortform attrs. */
393 STATIC int
394 xrep_xattr_recover_sf(
395 	struct xrep_xattr		*rx)
396 {
397 	struct xfs_scrub		*sc = rx->sc;
398 	struct xchk_xattr_buf		*ab = sc->buf;
399 	struct xfs_attr_sf_hdr		*hdr;
400 	struct xfs_attr_sf_entry	*sfe;
401 	struct xfs_attr_sf_entry	*next;
402 	struct xfs_ifork		*ifp;
403 	unsigned char			*end;
404 	int				i;
405 	int				error = 0;
406 
407 	ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
408 	hdr = ifp->if_data;
409 
410 	bitmap_zero(ab->usedmap, ifp->if_bytes);
411 	end = (unsigned char *)ifp->if_data + ifp->if_bytes;
412 	xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr));
413 
414 	sfe = xfs_attr_sf_firstentry(hdr);
415 	if ((unsigned char *)sfe > end)
416 		return 0;
417 
418 	for (i = 0; i < hdr->count; i++) {
419 		if (xchk_should_terminate(sc, &error))
420 			return error;
421 
422 		next = xfs_attr_sf_nextentry(sfe);
423 		if ((unsigned char *)next > end)
424 			break;
425 
426 		if (xchk_xattr_set_map(sc, ab->usedmap,
427 				(char *)sfe - (char *)hdr,
428 				sizeof(struct xfs_attr_sf_entry))) {
429 			/*
430 			 * No conflicts with the sf entry; let's save this
431 			 * attribute.
432 			 */
433 			error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe);
434 			if (error)
435 				return error;
436 		}
437 
438 		sfe = next;
439 	}
440 
441 	return 0;
442 }
443 
444 /*
445  * Try to return a buffer of xattr data for a given physical extent.
446  *
447  * Because the buffer cache get function complains if it finds a buffer
448  * matching the block number but not matching the length, we must be careful to
449  * look for incore buffers (up to the maximum length of a remote value) that
450  * could be hiding anywhere in the physical range.  If we find an incore
451  * buffer, we can pass that to the caller.  Optionally, read a single block and
452  * pass that back.
453  *
454  * Note the subtlety that remote attr value blocks for which there is no incore
455  * buffer will be passed to the callback one block at a time.  These buffers
456  * will not have any ops attached and must be staled to prevent aliasing with
457  * multiblock buffers once we drop the ILOCK.
458  */
459 STATIC int
460 xrep_xattr_find_buf(
461 	struct xfs_mount	*mp,
462 	xfs_fsblock_t		fsbno,
463 	xfs_extlen_t		max_len,
464 	bool			can_read,
465 	struct xfs_buf		**bpp)
466 {
467 	struct xrep_bufscan	scan = {
468 		.daddr		= XFS_FSB_TO_DADDR(mp, fsbno),
469 		.max_sectors	= xrep_bufscan_max_sectors(mp, max_len),
470 		.daddr_step	= XFS_FSB_TO_BB(mp, 1),
471 	};
472 	struct xfs_buf		*bp;
473 
474 	while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
475 		*bpp = bp;
476 		return 0;
477 	}
478 
479 	if (!can_read) {
480 		*bpp = NULL;
481 		return 0;
482 	}
483 
484 	return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
485 			XBF_TRYLOCK, bpp, NULL);
486 }
487 
488 /*
489  * Deal with a buffer that we found during our walk of the attr fork.
490  *
491  * Attribute leaf and node blocks are simple -- they're a single block, so we
492  * can walk them one at a time and we never have to worry about discontiguous
493  * multiblock buffers like we do for directories.
494  *
495  * Unfortunately, remote attr blocks add a lot of complexity here.  Each disk
496  * block is totally self contained, in the sense that the v5 header provides no
497  * indication that there could be more data in the next block.  The incore
498  * buffers can span multiple blocks, though they never cross extent records.
499  * However, they don't necessarily start or end on an extent record boundary.
500  * Therefore, we need a special buffer find function to walk the buffer cache
501  * for us.
502  *
503  * The caller must hold the ILOCK on the file being repaired.  We use
504  * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
505  * own the block and don't want to hang the system on a potentially garbage
506  * buffer.
507  */
508 STATIC int
509 xrep_xattr_recover_block(
510 	struct xrep_xattr	*rx,
511 	xfs_dablk_t		dabno,
512 	xfs_fsblock_t		fsbno,
513 	xfs_extlen_t		max_len,
514 	xfs_extlen_t		*actual_len)
515 {
516 	struct xfs_da_blkinfo	*info;
517 	struct xfs_buf		*bp;
518 	int			error;
519 
520 	error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
521 	if (error)
522 		return error;
523 	info = bp->b_addr;
524 	*actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
525 
526 	trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
527 			be16_to_cpu(info->magic));
528 
529 	/*
530 	 * If the buffer has the right magic number for an attr leaf block and
531 	 * passes a structure check (we don't care about checksums), salvage
532 	 * as much as we can from the block. */
533 	if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
534 	    xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
535 	    xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
536 		error = xrep_xattr_recover_leaf(rx, bp);
537 
538 	/*
539 	 * If the buffer didn't already have buffer ops set, it was read in by
540 	 * the _find_buf function and could very well be /part/ of a multiblock
541 	 * remote block.  Mark it stale so that it doesn't hang around in
542 	 * memory to cause problems.
543 	 */
544 	if (bp->b_ops == NULL)
545 		xfs_buf_stale(bp);
546 
547 	xfs_buf_relse(bp);
548 	return error;
549 }
550 
551 /* Insert one xattr key/value. */
552 STATIC int
553 xrep_xattr_insert_rec(
554 	struct xrep_xattr		*rx,
555 	const struct xrep_xattr_key	*key)
556 {
557 	struct xfs_da_args		args = {
558 		.dp			= rx->sc->tempip,
559 		.attr_filter		= key->flags,
560 		.attr_flags		= XATTR_CREATE,
561 		.namelen		= key->namelen,
562 		.valuelen		= key->valuelen,
563 		.owner			= rx->sc->ip->i_ino,
564 	};
565 	struct xchk_xattr_buf		*ab = rx->sc->buf;
566 	int				error;
567 
568 	/*
569 	 * Grab pointers to the scrub buffer so that we can use them to insert
570 	 * attrs into the temp file.
571 	 */
572 	args.name = ab->name;
573 	args.value = ab->value;
574 
575 	/*
576 	 * The attribute name is stored near the end of the in-core buffer,
577 	 * though we reserve one more byte to ensure null termination.
578 	 */
579 	ab->name[XATTR_NAME_MAX] = 0;
580 
581 	error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
582 			key->namelen);
583 	if (error)
584 		return error;
585 
586 	error = xfblob_free(rx->xattr_blobs, key->name_cookie);
587 	if (error)
588 		return error;
589 
590 	error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
591 			key->valuelen);
592 	if (error)
593 		return error;
594 
595 	error = xfblob_free(rx->xattr_blobs, key->value_cookie);
596 	if (error)
597 		return error;
598 
599 	ab->name[key->namelen] = 0;
600 
601 	trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name,
602 			key->namelen, key->valuelen);
603 
604 	/*
605 	 * xfs_attr_set creates and commits its own transaction.  If the attr
606 	 * already exists, we'll just drop it during the rebuild.
607 	 */
608 	error = xfs_attr_set(&args);
609 	if (error == -EEXIST)
610 		error = 0;
611 
612 	return error;
613 }
614 
615 /*
616  * Periodically flush salvaged attributes to the temporary file.  This is done
617  * to reduce the memory requirements of the xattr rebuild because files can
618  * contain millions of attributes.
619  */
620 STATIC int
621 xrep_xattr_flush_stashed(
622 	struct xrep_xattr	*rx)
623 {
624 	xfarray_idx_t		array_cur;
625 	int			error;
626 
627 	/*
628 	 * Entering this function, the scrub context has a reference to the
629 	 * inode being repaired, the temporary file, and a scrub transaction
630 	 * that we use during xattr salvaging to avoid livelocking if there
631 	 * are cycles in the xattr structures.  We hold ILOCK_EXCL on both
632 	 * the inode being repaired, though it is not ijoined to the scrub
633 	 * transaction.
634 	 *
635 	 * To constrain kernel memory use, we occasionally flush salvaged
636 	 * xattrs from the xfarray and xfblob structures into the temporary
637 	 * file in preparation for exchanging the xattr structures at the end.
638 	 * Updating the temporary file requires a transaction, so we commit the
639 	 * scrub transaction and drop the two ILOCKs so that xfs_attr_set can
640 	 * allocate whatever transaction it wants.
641 	 *
642 	 * We still hold IOLOCK_EXCL on the inode being repaired, which
643 	 * prevents anyone from modifying the damaged xattr data while we
644 	 * repair it.
645 	 */
646 	error = xrep_trans_commit(rx->sc);
647 	if (error)
648 		return error;
649 	xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
650 
651 	/*
652 	 * Take the IOLOCK of the temporary file while we modify xattrs.  This
653 	 * isn't strictly required because the temporary file is never revealed
654 	 * to userspace, but we follow the same locking rules.  We still hold
655 	 * sc->ip's IOLOCK.
656 	 */
657 	error = xrep_tempfile_iolock_polled(rx->sc);
658 	if (error)
659 		return error;
660 
661 	/* Add all the salvaged attrs to the temporary file. */
662 	foreach_xfarray_idx(rx->xattr_records, array_cur) {
663 		struct xrep_xattr_key	key;
664 
665 		error = xfarray_load(rx->xattr_records, array_cur, &key);
666 		if (error)
667 			return error;
668 
669 		error = xrep_xattr_insert_rec(rx, &key);
670 		if (error)
671 			return error;
672 	}
673 
674 	/* Empty out both arrays now that we've added the entries. */
675 	xfarray_truncate(rx->xattr_records);
676 	xfblob_truncate(rx->xattr_blobs);
677 
678 	xrep_tempfile_iounlock(rx->sc);
679 
680 	/* Recreate the salvage transaction and relock the inode. */
681 	error = xchk_trans_alloc(rx->sc, 0);
682 	if (error)
683 		return error;
684 	xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
685 	return 0;
686 }
687 
688 /* Decide if we've stashed too much xattr data in memory. */
689 static inline bool
690 xrep_xattr_want_flush_stashed(
691 	struct xrep_xattr	*rx)
692 {
693 	unsigned long long	bytes;
694 
695 	bytes = xfarray_bytes(rx->xattr_records) +
696 		xfblob_bytes(rx->xattr_blobs);
697 	return bytes > XREP_XATTR_MAX_STASH_BYTES;
698 }
699 
700 /* Extract as many attribute keys and values as we can. */
701 STATIC int
702 xrep_xattr_recover(
703 	struct xrep_xattr	*rx)
704 {
705 	struct xfs_bmbt_irec	got;
706 	struct xfs_scrub	*sc = rx->sc;
707 	struct xfs_da_geometry	*geo = sc->mp->m_attr_geo;
708 	xfs_fileoff_t		offset;
709 	xfs_extlen_t		len;
710 	xfs_dablk_t		dabno;
711 	int			nmap;
712 	int			error;
713 
714 	/*
715 	 * Iterate each xattr leaf block in the attr fork to scan them for any
716 	 * attributes that we might salvage.
717 	 */
718 	for (offset = 0;
719 	     offset < XFS_MAX_FILEOFF;
720 	     offset = got.br_startoff + got.br_blockcount) {
721 		nmap = 1;
722 		error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
723 				&got, &nmap, XFS_BMAPI_ATTRFORK);
724 		if (error)
725 			return error;
726 		if (nmap != 1)
727 			return -EFSCORRUPTED;
728 		if (!xfs_bmap_is_written_extent(&got))
729 			continue;
730 
731 		for (dabno = round_up(got.br_startoff, geo->fsbcount);
732 		     dabno < got.br_startoff + got.br_blockcount;
733 		     dabno += len) {
734 			xfs_fileoff_t	curr_offset = dabno - got.br_startoff;
735 			xfs_extlen_t	maxlen;
736 
737 			if (xchk_should_terminate(rx->sc, &error))
738 				return error;
739 
740 			maxlen = min_t(xfs_filblks_t, INT_MAX,
741 					got.br_blockcount - curr_offset);
742 			error = xrep_xattr_recover_block(rx, dabno,
743 					curr_offset + got.br_startblock,
744 					maxlen, &len);
745 			if (error)
746 				return error;
747 
748 			if (xrep_xattr_want_flush_stashed(rx)) {
749 				error = xrep_xattr_flush_stashed(rx);
750 				if (error)
751 					return error;
752 			}
753 		}
754 	}
755 
756 	return 0;
757 }
758 
759 /*
760  * Reset the extended attribute fork to a state where we can start re-adding
761  * the salvaged attributes.
762  */
763 STATIC int
764 xrep_xattr_fork_remove(
765 	struct xfs_scrub	*sc,
766 	struct xfs_inode	*ip)
767 {
768 	struct xfs_attr_sf_hdr	*hdr;
769 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
770 
771 	/*
772 	 * If the data fork is in btree format, we can't change di_forkoff
773 	 * because we could run afoul of the rule that the data fork isn't
774 	 * supposed to be in btree format if there's enough space in the fork
775 	 * that it could have used extents format.  Instead, reinitialize the
776 	 * attr fork to have a shortform structure with zero attributes.
777 	 */
778 	if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
779 		ifp->if_format = XFS_DINODE_FMT_LOCAL;
780 		hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
781 				XFS_ATTR_FORK);
782 		hdr->count = 0;
783 		hdr->totsize = cpu_to_be16(sizeof(*hdr));
784 		xfs_trans_log_inode(sc->tp, ip,
785 				XFS_ILOG_CORE | XFS_ILOG_ADATA);
786 		return 0;
787 	}
788 
789 	/* If we still have attr fork extents, something's wrong. */
790 	if (ifp->if_nextents != 0) {
791 		struct xfs_iext_cursor	icur;
792 		struct xfs_bmbt_irec	irec;
793 		unsigned int		i = 0;
794 
795 		xfs_emerg(sc->mp,
796 	"inode 0x%llx attr fork still has %llu attr extents, format %d?!",
797 				ip->i_ino, ifp->if_nextents, ifp->if_format);
798 		for_each_xfs_iext(ifp, &icur, &irec) {
799 			xfs_err(sc->mp,
800 	"[%u]: startoff %llu startblock %llu blockcount %llu state %u",
801 					i++, irec.br_startoff,
802 					irec.br_startblock, irec.br_blockcount,
803 					irec.br_state);
804 		}
805 		ASSERT(0);
806 		return -EFSCORRUPTED;
807 	}
808 
809 	xfs_attr_fork_remove(ip, sc->tp);
810 	return 0;
811 }
812 
813 /*
814  * Free all the attribute fork blocks of the file being repaired and delete the
815  * fork.  The caller must ILOCK the scrub file and join it to the transaction.
816  * This function returns with the inode joined to a clean transaction.
817  */
818 int
819 xrep_xattr_reset_fork(
820 	struct xfs_scrub	*sc)
821 {
822 	int			error;
823 
824 	trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
825 
826 	/* Unmap all the attr blocks. */
827 	if (xfs_ifork_has_extents(&sc->ip->i_af)) {
828 		error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
829 		if (error)
830 			return error;
831 	}
832 
833 	error = xrep_xattr_fork_remove(sc, sc->ip);
834 	if (error)
835 		return error;
836 
837 	return xfs_trans_roll_inode(&sc->tp, sc->ip);
838 }
839 
840 /*
841  * Free all the attribute fork blocks of the temporary file and delete the attr
842  * fork.  The caller must ILOCK the tempfile and join it to the transaction.
843  * This function returns with the inode joined to a clean scrub transaction.
844  */
845 STATIC int
846 xrep_xattr_reset_tempfile_fork(
847 	struct xfs_scrub	*sc)
848 {
849 	int			error;
850 
851 	trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
852 
853 	/*
854 	 * Wipe out the attr fork of the temp file so that regular inode
855 	 * inactivation won't trip over the corrupt attr fork.
856 	 */
857 	if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
858 		error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
859 		if (error)
860 			return error;
861 	}
862 
863 	return xrep_xattr_fork_remove(sc, sc->tempip);
864 }
865 
866 /*
867  * Find all the extended attributes for this inode by scraping them out of the
868  * attribute key blocks by hand, and flushing them into the temp file.
869  * When we're done, free the staging memory before exchanging the xattr
870  * structures to reduce memory usage.
871  */
872 STATIC int
873 xrep_xattr_salvage_attributes(
874 	struct xrep_xattr	*rx)
875 {
876 	struct xfs_inode	*ip = rx->sc->ip;
877 	int			error;
878 
879 	/* Short format xattrs are easy! */
880 	if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
881 		error = xrep_xattr_recover_sf(rx);
882 		if (error)
883 			return error;
884 
885 		return xrep_xattr_flush_stashed(rx);
886 	}
887 
888 	/*
889 	 * For non-inline xattr structures, the salvage function scans the
890 	 * buffer cache looking for potential attr leaf blocks.  The scan
891 	 * requires the ability to lock any buffer found and runs independently
892 	 * of any transaction <-> buffer item <-> buffer linkage.  Therefore,
893 	 * roll the transaction to ensure there are no buffers joined.  We hold
894 	 * the ILOCK independently of the transaction.
895 	 */
896 	error = xfs_trans_roll(&rx->sc->tp);
897 	if (error)
898 		return error;
899 
900 	error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
901 	if (error)
902 		return error;
903 
904 	error = xrep_xattr_recover(rx);
905 	if (error)
906 		return error;
907 
908 	return xrep_xattr_flush_stashed(rx);
909 }
910 
911 /*
912  * Prepare both inodes' attribute forks for an exchange.  Promote the tempfile
913  * from short format to leaf format, and if the file being repaired has a short
914  * format attr fork, turn it into an empty extent list.
915  */
916 STATIC int
917 xrep_xattr_swap_prep(
918 	struct xfs_scrub	*sc,
919 	bool			temp_local,
920 	bool			ip_local)
921 {
922 	int			error;
923 
924 	/*
925 	 * If the tempfile's attributes are in shortform format, convert that
926 	 * to a single leaf extent so that we can use the atomic mapping
927 	 * exchange.
928 	 */
929 	if (temp_local) {
930 		struct xfs_da_args	args = {
931 			.dp		= sc->tempip,
932 			.geo		= sc->mp->m_attr_geo,
933 			.whichfork	= XFS_ATTR_FORK,
934 			.trans		= sc->tp,
935 			.total		= 1,
936 			.owner		= sc->ip->i_ino,
937 		};
938 
939 		error = xfs_attr_shortform_to_leaf(&args);
940 		if (error)
941 			return error;
942 
943 		/*
944 		 * Roll the deferred log items to get us back to a clean
945 		 * transaction.
946 		 */
947 		error = xfs_defer_finish(&sc->tp);
948 		if (error)
949 			return error;
950 	}
951 
952 	/*
953 	 * If the file being repaired had a shortform attribute fork, convert
954 	 * that to an empty extent list in preparation for the atomic mapping
955 	 * exchange.
956 	 */
957 	if (ip_local) {
958 		struct xfs_ifork	*ifp;
959 
960 		ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
961 
962 		xfs_idestroy_fork(ifp);
963 		ifp->if_format = XFS_DINODE_FMT_EXTENTS;
964 		ifp->if_nextents = 0;
965 		ifp->if_bytes = 0;
966 		ifp->if_data = NULL;
967 		ifp->if_height = 0;
968 
969 		xfs_trans_log_inode(sc->tp, sc->ip,
970 				XFS_ILOG_CORE | XFS_ILOG_ADATA);
971 	}
972 
973 	return 0;
974 }
975 
976 /* Exchange the temporary file's attribute fork with the one being repaired. */
977 STATIC int
978 xrep_xattr_swap(
979 	struct xfs_scrub	*sc,
980 	struct xrep_tempexch	*tx)
981 {
982 	bool			ip_local, temp_local;
983 	int			error = 0;
984 
985 	ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
986 	temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
987 
988 	/*
989 	 * If the both files have a local format attr fork and the rebuilt
990 	 * xattr data would fit in the repaired file's attr fork, just copy
991 	 * the contents from the tempfile and declare ourselves done.
992 	 */
993 	if (ip_local && temp_local) {
994 		int	forkoff;
995 		int	newsize;
996 
997 		newsize = xfs_attr_sf_totsize(sc->tempip);
998 		forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
999 		if (forkoff > 0) {
1000 			sc->ip->i_forkoff = forkoff;
1001 			xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
1002 			return 0;
1003 		}
1004 	}
1005 
1006 	/* Otherwise, make sure both attr forks are in block-mapping mode. */
1007 	error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
1008 	if (error)
1009 		return error;
1010 
1011 	return xrep_tempexch_contents(sc, tx);
1012 }
1013 
1014 /*
1015  * Exchange the new extended attribute data (which we created in the tempfile)
1016  * with the file being repaired.
1017  */
1018 STATIC int
1019 xrep_xattr_rebuild_tree(
1020 	struct xrep_xattr	*rx)
1021 {
1022 	struct xfs_scrub	*sc = rx->sc;
1023 	int			error;
1024 
1025 	/*
1026 	 * If we didn't find any attributes to salvage, repair the file by
1027 	 * zapping its attr fork.
1028 	 */
1029 	if (rx->attrs_found == 0) {
1030 		xfs_trans_ijoin(sc->tp, sc->ip, 0);
1031 		error = xrep_xattr_reset_fork(sc);
1032 		if (error)
1033 			return error;
1034 
1035 		goto forget_acls;
1036 	}
1037 
1038 	trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
1039 
1040 	/*
1041 	 * Commit the repair transaction and drop the ILOCKs so that we can use
1042 	 * the atomic file content exchange helper functions to compute the
1043 	 * correct resource reservations.
1044 	 *
1045 	 * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
1046 	 * modifications, but there's nothing to prevent userspace from reading
1047 	 * the attributes until we're ready for the exchange operation.  Reads
1048 	 * will return -EIO without shutting down the fs, so we're ok with
1049 	 * that.
1050 	 */
1051 	error = xrep_trans_commit(sc);
1052 	if (error)
1053 		return error;
1054 
1055 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1056 
1057 	/*
1058 	 * Take the IOLOCK on the temporary file so that we can run xattr
1059 	 * operations with the same locks held as we would for a normal file.
1060 	 * We still hold sc->ip's IOLOCK.
1061 	 */
1062 	error = xrep_tempfile_iolock_polled(rx->sc);
1063 	if (error)
1064 		return error;
1065 
1066 	/* Allocate exchange transaction and lock both inodes. */
1067 	error = xrep_tempexch_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx);
1068 	if (error)
1069 		return error;
1070 
1071 	/*
1072 	 * Exchange the blocks mapped by the tempfile's attr fork with the file
1073 	 * being repaired.  The old attr blocks will then be attached to the
1074 	 * tempfile, so reap its attr fork.
1075 	 */
1076 	error = xrep_xattr_swap(sc, &rx->tx);
1077 	if (error)
1078 		return error;
1079 
1080 	error = xrep_xattr_reset_tempfile_fork(sc);
1081 	if (error)
1082 		return error;
1083 
1084 	/*
1085 	 * Roll to get a transaction without any inodes joined to it.  Then we
1086 	 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1087 	 * the scrub target file.
1088 	 */
1089 	error = xfs_trans_roll(&sc->tp);
1090 	if (error)
1091 		return error;
1092 
1093 	xrep_tempfile_iunlock(sc);
1094 	xrep_tempfile_iounlock(sc);
1095 
1096 forget_acls:
1097 	/* Invalidate cached ACLs now that we've reloaded all the xattrs. */
1098 	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
1099 	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
1100 	return 0;
1101 }
1102 
1103 /* Tear down all the incore scan stuff we created. */
1104 STATIC void
1105 xrep_xattr_teardown(
1106 	struct xrep_xattr	*rx)
1107 {
1108 	xfblob_destroy(rx->xattr_blobs);
1109 	xfarray_destroy(rx->xattr_records);
1110 	kfree(rx);
1111 }
1112 
1113 /* Set up the filesystem scan so we can regenerate extended attributes. */
1114 STATIC int
1115 xrep_xattr_setup_scan(
1116 	struct xfs_scrub	*sc,
1117 	struct xrep_xattr	**rxp)
1118 {
1119 	struct xrep_xattr	*rx;
1120 	char			*descr;
1121 	int			max_len;
1122 	int			error;
1123 
1124 	rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
1125 	if (!rx)
1126 		return -ENOMEM;
1127 	rx->sc = sc;
1128 
1129 	/*
1130 	 * Allocate enough memory to handle loading local attr values from the
1131 	 * xfblob data while flushing stashed attrs to the temporary file.
1132 	 * We only realloc the buffer when salvaging remote attr values.
1133 	 */
1134 	max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
1135 	error = xchk_setup_xattr_buf(rx->sc, max_len);
1136 	if (error == -ENOMEM)
1137 		error = -EDEADLOCK;
1138 	if (error)
1139 		goto out_rx;
1140 
1141 	/* Set up some staging for salvaged attribute keys and values */
1142 	descr = xchk_xfile_ino_descr(sc, "xattr keys");
1143 	error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
1144 			&rx->xattr_records);
1145 	kfree(descr);
1146 	if (error)
1147 		goto out_rx;
1148 
1149 	descr = xchk_xfile_ino_descr(sc, "xattr names");
1150 	error = xfblob_create(descr, &rx->xattr_blobs);
1151 	kfree(descr);
1152 	if (error)
1153 		goto out_keys;
1154 
1155 	*rxp = rx;
1156 	return 0;
1157 out_keys:
1158 	xfarray_destroy(rx->xattr_records);
1159 out_rx:
1160 	kfree(rx);
1161 	return error;
1162 }
1163 
1164 /*
1165  * Repair the extended attribute metadata.
1166  *
1167  * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
1168  * The buffer cache in XFS can't handle aliased multiblock buffers, so this
1169  * might misbehave if the attr fork is crosslinked with other filesystem
1170  * metadata.
1171  */
1172 int
1173 xrep_xattr(
1174 	struct xfs_scrub	*sc)
1175 {
1176 	struct xrep_xattr	*rx = NULL;
1177 	int			error;
1178 
1179 	if (!xfs_inode_hasattr(sc->ip))
1180 		return -ENOENT;
1181 
1182 	/* The rmapbt is required to reap the old attr fork. */
1183 	if (!xfs_has_rmapbt(sc->mp))
1184 		return -EOPNOTSUPP;
1185 
1186 	error = xrep_xattr_setup_scan(sc, &rx);
1187 	if (error)
1188 		return error;
1189 
1190 	ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1191 
1192 	error = xrep_xattr_salvage_attributes(rx);
1193 	if (error)
1194 		goto out_scan;
1195 
1196 	/* Last chance to abort before we start committing fixes. */
1197 	if (xchk_should_terminate(sc, &error))
1198 		goto out_scan;
1199 
1200 	error = xrep_xattr_rebuild_tree(rx);
1201 	if (error)
1202 		goto out_scan;
1203 
1204 out_scan:
1205 	xrep_xattr_teardown(rx);
1206 	return error;
1207 }
1208