xref: /linux/fs/xfs/scrub/attr_repair.c (revision 429508c84d95811dd1300181dfe84743caff9a38)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
21 #include "xfs_dir2.h"
22 #include "xfs_attr.h"
23 #include "xfs_attr_leaf.h"
24 #include "xfs_attr_sf.h"
25 #include "xfs_attr_remote.h"
26 #include "xfs_bmap.h"
27 #include "xfs_bmap_util.h"
28 #include "xfs_exchmaps.h"
29 #include "xfs_exchrange.h"
30 #include "xfs_acl.h"
31 #include "xfs_parent.h"
32 #include "scrub/xfs_scrub.h"
33 #include "scrub/scrub.h"
34 #include "scrub/common.h"
35 #include "scrub/trace.h"
36 #include "scrub/repair.h"
37 #include "scrub/tempfile.h"
38 #include "scrub/tempexch.h"
39 #include "scrub/xfile.h"
40 #include "scrub/xfarray.h"
41 #include "scrub/xfblob.h"
42 #include "scrub/attr.h"
43 #include "scrub/reap.h"
44 #include "scrub/attr_repair.h"
45 
46 /*
47  * Extended Attribute Repair
48  * =========================
49  *
50  * We repair extended attributes by reading the attr leaf blocks looking for
51  * attributes entries that look salvageable (name passes verifiers, value can
52  * be retrieved, etc).  Each extended attribute worth salvaging is stashed in
53  * memory, and the stashed entries are periodically replayed into a temporary
54  * file to constrain memory use.  Batching the construction of the temporary
55  * extended attribute structure in this fashion reduces lock cycling of the
56  * file being repaired and the temporary file.
57  *
58  * When salvaging completes, the remaining stashed attributes are replayed to
59  * the temporary file.  An atomic file contents exchange is used to commit the
60  * new xattr blocks to the file being repaired.  This will disrupt attrmulti
61  * cursors.
62  */
63 
64 struct xrep_xattr_key {
65 	/* Cookie for retrieval of the xattr name. */
66 	xfblob_cookie		name_cookie;
67 
68 	/* Cookie for retrieval of the xattr value. */
69 	xfblob_cookie		value_cookie;
70 
71 	/* XFS_ATTR_* flags */
72 	int			flags;
73 
74 	/* Length of the value and name. */
75 	uint32_t		valuelen;
76 	uint16_t		namelen;
77 };
78 
79 /*
80  * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
81  * them to the temp file.
82  */
83 #define XREP_XATTR_MAX_STASH_BYTES	(PAGE_SIZE * 8)
84 
85 struct xrep_xattr {
86 	struct xfs_scrub	*sc;
87 
88 	/* Information for exchanging attr fork mappings at the end. */
89 	struct xrep_tempexch	tx;
90 
91 	/* xattr keys */
92 	struct xfarray		*xattr_records;
93 
94 	/* xattr values */
95 	struct xfblob		*xattr_blobs;
96 
97 	/* Number of attributes that we are salvaging. */
98 	unsigned long long	attrs_found;
99 
100 	/* Can we flush stashed attrs to the tempfile? */
101 	bool			can_flush;
102 
103 	/* Did the live update fail, and hence the repair is now out of date? */
104 	bool			live_update_aborted;
105 
106 	/* Lock protecting parent pointer updates */
107 	struct mutex		lock;
108 
109 	/* Fixed-size array of xrep_xattr_pptr structures. */
110 	struct xfarray		*pptr_recs;
111 
112 	/* Blobs containing parent pointer names. */
113 	struct xfblob		*pptr_names;
114 
115 	/* Hook to capture parent pointer updates. */
116 	struct xfs_dir_hook	dhook;
117 
118 	/* Scratch buffer for capturing parent pointers. */
119 	struct xfs_da_args	pptr_args;
120 
121 	/* Name buffer */
122 	struct xfs_name		xname;
123 	char			namebuf[MAXNAMELEN];
124 };
125 
126 /* Create a parent pointer in the tempfile. */
127 #define XREP_XATTR_PPTR_ADD	(1)
128 
129 /* Remove a parent pointer from the tempfile. */
130 #define XREP_XATTR_PPTR_REMOVE	(2)
131 
132 /* A stashed parent pointer update. */
133 struct xrep_xattr_pptr {
134 	/* Cookie for retrieval of the pptr name. */
135 	xfblob_cookie		name_cookie;
136 
137 	/* Parent pointer record. */
138 	struct xfs_parent_rec	pptr_rec;
139 
140 	/* Length of the pptr name. */
141 	uint8_t			namelen;
142 
143 	/* XREP_XATTR_PPTR_{ADD,REMOVE} */
144 	uint8_t			action;
145 };
146 
147 /* Set up to recreate the extended attributes. */
148 int
149 xrep_setup_xattr(
150 	struct xfs_scrub	*sc)
151 {
152 	if (xfs_has_parent(sc->mp))
153 		xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
154 
155 	return xrep_tempfile_create(sc, S_IFREG);
156 }
157 
158 /*
159  * Decide if we want to salvage this attribute.  We don't bother with
160  * incomplete or oversized keys or values.  The @value parameter can be null
161  * for remote attrs.
162  */
163 STATIC int
164 xrep_xattr_want_salvage(
165 	struct xrep_xattr	*rx,
166 	unsigned int		attr_flags,
167 	const void		*name,
168 	int			namelen,
169 	const void		*value,
170 	int			valuelen)
171 {
172 	if (attr_flags & XFS_ATTR_INCOMPLETE)
173 		return false;
174 	if (namelen > XATTR_NAME_MAX || namelen <= 0)
175 		return false;
176 	if (!xfs_attr_namecheck(attr_flags, name, namelen))
177 		return false;
178 	if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
179 		return false;
180 	if (attr_flags & XFS_ATTR_PARENT)
181 		return xfs_parent_valuecheck(rx->sc->mp, value, valuelen);
182 
183 	return true;
184 }
185 
186 /* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
187 STATIC int
188 xrep_xattr_salvage_key(
189 	struct xrep_xattr	*rx,
190 	int			flags,
191 	unsigned char		*name,
192 	int			namelen,
193 	unsigned char		*value,
194 	int			valuelen)
195 {
196 	struct xrep_xattr_key	key = {
197 		.valuelen	= valuelen,
198 		.flags		= flags & XFS_ATTR_NSP_ONDISK_MASK,
199 	};
200 	unsigned int		i = 0;
201 	int			error = 0;
202 
203 	if (xchk_should_terminate(rx->sc, &error))
204 		return error;
205 
206 	/*
207 	 * Truncate the name to the first character that would trip namecheck.
208 	 * If we no longer have a name after that, ignore this attribute.
209 	 */
210 	if (flags & XFS_ATTR_PARENT) {
211 		key.namelen = namelen;
212 
213 		trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
214 				key.namelen, value, valuelen);
215 	} else {
216 		while (i < namelen && name[i] != 0)
217 			i++;
218 		if (i == 0)
219 			return 0;
220 		key.namelen = i;
221 
222 		trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name,
223 				key.namelen, valuelen);
224 	}
225 
226 	error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
227 			key.namelen);
228 	if (error)
229 		return error;
230 
231 	error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
232 			key.valuelen);
233 	if (error)
234 		return error;
235 
236 	error = xfarray_append(rx->xattr_records, &key);
237 	if (error)
238 		return error;
239 
240 	rx->attrs_found++;
241 	return 0;
242 }
243 
244 /*
245  * Record a shortform extended attribute key & value for later reinsertion
246  * into the inode.
247  */
248 STATIC int
249 xrep_xattr_salvage_sf_attr(
250 	struct xrep_xattr		*rx,
251 	struct xfs_attr_sf_hdr		*hdr,
252 	struct xfs_attr_sf_entry	*sfe)
253 {
254 	struct xfs_scrub		*sc = rx->sc;
255 	struct xchk_xattr_buf		*ab = sc->buf;
256 	unsigned char			*name = sfe->nameval;
257 	unsigned char			*value = &sfe->nameval[sfe->namelen];
258 
259 	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr,
260 			sfe->namelen))
261 		return 0;
262 
263 	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr,
264 			sfe->valuelen))
265 		return 0;
266 
267 	if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
268 			sfe->namelen, value, sfe->valuelen))
269 		return 0;
270 
271 	return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
272 			sfe->namelen, value, sfe->valuelen);
273 }
274 
275 /*
276  * Record a local format extended attribute key & value for later reinsertion
277  * into the inode.
278  */
279 STATIC int
280 xrep_xattr_salvage_local_attr(
281 	struct xrep_xattr		*rx,
282 	struct xfs_attr_leaf_entry	*ent,
283 	unsigned int			nameidx,
284 	const char			*buf_end,
285 	struct xfs_attr_leaf_name_local	*lentry)
286 {
287 	struct xchk_xattr_buf		*ab = rx->sc->buf;
288 	unsigned char			*value;
289 	unsigned int			valuelen;
290 	unsigned int			namesize;
291 
292 	/*
293 	 * Decode the leaf local entry format.  If something seems wrong, we
294 	 * junk the attribute.
295 	 */
296 	value = &lentry->nameval[lentry->namelen];
297 	valuelen = be16_to_cpu(lentry->valuelen);
298 	namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
299 	if ((char *)lentry + namesize > buf_end)
300 		return 0;
301 	if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
302 			lentry->namelen, value, valuelen))
303 		return 0;
304 	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
305 		return 0;
306 
307 	/* Try to save this attribute. */
308 	return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
309 			lentry->namelen, value, valuelen);
310 }
311 
312 /*
313  * Record a remote format extended attribute key & value for later reinsertion
314  * into the inode.
315  */
316 STATIC int
317 xrep_xattr_salvage_remote_attr(
318 	struct xrep_xattr		*rx,
319 	struct xfs_attr_leaf_entry	*ent,
320 	unsigned int			nameidx,
321 	const char			*buf_end,
322 	struct xfs_attr_leaf_name_remote *rentry,
323 	unsigned int			ent_idx,
324 	struct xfs_buf			*leaf_bp)
325 {
326 	struct xchk_xattr_buf		*ab = rx->sc->buf;
327 	struct xfs_da_args		args = {
328 		.trans			= rx->sc->tp,
329 		.dp			= rx->sc->ip,
330 		.index			= ent_idx,
331 		.geo			= rx->sc->mp->m_attr_geo,
332 		.owner			= rx->sc->ip->i_ino,
333 		.attr_filter		= ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
334 		.namelen		= rentry->namelen,
335 		.name			= rentry->name,
336 		.value			= ab->value,
337 		.valuelen		= be32_to_cpu(rentry->valuelen),
338 	};
339 	unsigned int			namesize;
340 	int				error;
341 
342 	/*
343 	 * Decode the leaf remote entry format.  If something seems wrong, we
344 	 * junk the attribute.  Note that we should never find a zero-length
345 	 * remote attribute value.
346 	 */
347 	namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
348 	if ((char *)rentry + namesize > buf_end)
349 		return 0;
350 	if (args.valuelen == 0 ||
351 	    !xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
352 			rentry->namelen, NULL, args.valuelen))
353 		return 0;
354 	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
355 		return 0;
356 
357 	/*
358 	 * Enlarge the buffer (if needed) to hold the value that we're trying
359 	 * to salvage from the old extended attribute data.
360 	 */
361 	error = xchk_setup_xattr_buf(rx->sc, args.valuelen);
362 	if (error == -ENOMEM)
363 		error = -EDEADLOCK;
364 	if (error)
365 		return error;
366 
367 	/* Look up the remote value and stash it for reconstruction. */
368 	error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
369 	if (error || args.rmtblkno == 0)
370 		goto err_free;
371 
372 	error = xfs_attr_rmtval_get(&args);
373 	if (error)
374 		goto err_free;
375 
376 	/* Try to save this attribute. */
377 	error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
378 			rentry->namelen, ab->value, args.valuelen);
379 err_free:
380 	/* remote value was garbage, junk it */
381 	if (error == -EFSBADCRC || error == -EFSCORRUPTED)
382 		error = 0;
383 	return error;
384 }
385 
386 /* Extract every xattr key that we can from this attr fork block. */
387 STATIC int
388 xrep_xattr_recover_leaf(
389 	struct xrep_xattr		*rx,
390 	struct xfs_buf			*bp)
391 {
392 	struct xfs_attr3_icleaf_hdr	leafhdr;
393 	struct xfs_scrub		*sc = rx->sc;
394 	struct xfs_mount		*mp = sc->mp;
395 	struct xfs_attr_leafblock	*leaf;
396 	struct xfs_attr_leaf_name_local	*lentry;
397 	struct xfs_attr_leaf_name_remote *rentry;
398 	struct xfs_attr_leaf_entry	*ent;
399 	struct xfs_attr_leaf_entry	*entries;
400 	struct xchk_xattr_buf		*ab = rx->sc->buf;
401 	char				*buf_end;
402 	size_t				off;
403 	unsigned int			nameidx;
404 	unsigned int			hdrsize;
405 	int				i;
406 	int				error = 0;
407 
408 	bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
409 
410 	/* Check the leaf header */
411 	leaf = bp->b_addr;
412 	xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
413 	hdrsize = xfs_attr3_leaf_hdr_size(leaf);
414 	xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
415 	entries = xfs_attr3_leaf_entryp(leaf);
416 
417 	buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
418 	for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
419 		if (xchk_should_terminate(sc, &error))
420 			return error;
421 
422 		/* Skip key if it conflicts with something else? */
423 		off = (char *)ent - (char *)leaf;
424 		if (!xchk_xattr_set_map(sc, ab->usedmap, off,
425 				sizeof(xfs_attr_leaf_entry_t)))
426 			continue;
427 
428 		/* Check the name information. */
429 		nameidx = be16_to_cpu(ent->nameidx);
430 		if (nameidx < leafhdr.firstused ||
431 		    nameidx >= mp->m_attr_geo->blksize)
432 			continue;
433 
434 		if (ent->flags & XFS_ATTR_LOCAL) {
435 			lentry = xfs_attr3_leaf_name_local(leaf, i);
436 			error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
437 					buf_end, lentry);
438 		} else {
439 			rentry = xfs_attr3_leaf_name_remote(leaf, i);
440 			error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
441 					buf_end, rentry, i, bp);
442 		}
443 		if (error)
444 			return error;
445 	}
446 
447 	return 0;
448 }
449 
450 /* Try to recover shortform attrs. */
451 STATIC int
452 xrep_xattr_recover_sf(
453 	struct xrep_xattr		*rx)
454 {
455 	struct xfs_scrub		*sc = rx->sc;
456 	struct xchk_xattr_buf		*ab = sc->buf;
457 	struct xfs_attr_sf_hdr		*hdr;
458 	struct xfs_attr_sf_entry	*sfe;
459 	struct xfs_attr_sf_entry	*next;
460 	struct xfs_ifork		*ifp;
461 	unsigned char			*end;
462 	int				i;
463 	int				error = 0;
464 
465 	ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
466 	hdr = ifp->if_data;
467 
468 	bitmap_zero(ab->usedmap, ifp->if_bytes);
469 	end = (unsigned char *)ifp->if_data + ifp->if_bytes;
470 	xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr));
471 
472 	sfe = xfs_attr_sf_firstentry(hdr);
473 	if ((unsigned char *)sfe > end)
474 		return 0;
475 
476 	for (i = 0; i < hdr->count; i++) {
477 		if (xchk_should_terminate(sc, &error))
478 			return error;
479 
480 		next = xfs_attr_sf_nextentry(sfe);
481 		if ((unsigned char *)next > end)
482 			break;
483 
484 		if (xchk_xattr_set_map(sc, ab->usedmap,
485 				(char *)sfe - (char *)hdr,
486 				sizeof(struct xfs_attr_sf_entry))) {
487 			/*
488 			 * No conflicts with the sf entry; let's save this
489 			 * attribute.
490 			 */
491 			error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe);
492 			if (error)
493 				return error;
494 		}
495 
496 		sfe = next;
497 	}
498 
499 	return 0;
500 }
501 
502 /*
503  * Try to return a buffer of xattr data for a given physical extent.
504  *
505  * Because the buffer cache get function complains if it finds a buffer
506  * matching the block number but not matching the length, we must be careful to
507  * look for incore buffers (up to the maximum length of a remote value) that
508  * could be hiding anywhere in the physical range.  If we find an incore
509  * buffer, we can pass that to the caller.  Optionally, read a single block and
510  * pass that back.
511  *
512  * Note the subtlety that remote attr value blocks for which there is no incore
513  * buffer will be passed to the callback one block at a time.  These buffers
514  * will not have any ops attached and must be staled to prevent aliasing with
515  * multiblock buffers once we drop the ILOCK.
516  */
517 STATIC int
518 xrep_xattr_find_buf(
519 	struct xfs_mount	*mp,
520 	xfs_fsblock_t		fsbno,
521 	xfs_extlen_t		max_len,
522 	bool			can_read,
523 	struct xfs_buf		**bpp)
524 {
525 	struct xrep_bufscan	scan = {
526 		.daddr		= XFS_FSB_TO_DADDR(mp, fsbno),
527 		.max_sectors	= xrep_bufscan_max_sectors(mp, max_len),
528 		.daddr_step	= XFS_FSB_TO_BB(mp, 1),
529 	};
530 	struct xfs_buf		*bp;
531 
532 	while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
533 		*bpp = bp;
534 		return 0;
535 	}
536 
537 	if (!can_read) {
538 		*bpp = NULL;
539 		return 0;
540 	}
541 
542 	return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
543 			XBF_TRYLOCK, bpp, NULL);
544 }
545 
546 /*
547  * Deal with a buffer that we found during our walk of the attr fork.
548  *
549  * Attribute leaf and node blocks are simple -- they're a single block, so we
550  * can walk them one at a time and we never have to worry about discontiguous
551  * multiblock buffers like we do for directories.
552  *
553  * Unfortunately, remote attr blocks add a lot of complexity here.  Each disk
554  * block is totally self contained, in the sense that the v5 header provides no
555  * indication that there could be more data in the next block.  The incore
556  * buffers can span multiple blocks, though they never cross extent records.
557  * However, they don't necessarily start or end on an extent record boundary.
558  * Therefore, we need a special buffer find function to walk the buffer cache
559  * for us.
560  *
561  * The caller must hold the ILOCK on the file being repaired.  We use
562  * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
563  * own the block and don't want to hang the system on a potentially garbage
564  * buffer.
565  */
566 STATIC int
567 xrep_xattr_recover_block(
568 	struct xrep_xattr	*rx,
569 	xfs_dablk_t		dabno,
570 	xfs_fsblock_t		fsbno,
571 	xfs_extlen_t		max_len,
572 	xfs_extlen_t		*actual_len)
573 {
574 	struct xfs_da_blkinfo	*info;
575 	struct xfs_buf		*bp;
576 	int			error;
577 
578 	error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
579 	if (error)
580 		return error;
581 	info = bp->b_addr;
582 	*actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
583 
584 	trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
585 			be16_to_cpu(info->magic));
586 
587 	/*
588 	 * If the buffer has the right magic number for an attr leaf block and
589 	 * passes a structure check (we don't care about checksums), salvage
590 	 * as much as we can from the block. */
591 	if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
592 	    xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
593 	    xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
594 		error = xrep_xattr_recover_leaf(rx, bp);
595 
596 	/*
597 	 * If the buffer didn't already have buffer ops set, it was read in by
598 	 * the _find_buf function and could very well be /part/ of a multiblock
599 	 * remote block.  Mark it stale so that it doesn't hang around in
600 	 * memory to cause problems.
601 	 */
602 	if (bp->b_ops == NULL)
603 		xfs_buf_stale(bp);
604 
605 	xfs_buf_relse(bp);
606 	return error;
607 }
608 
609 /* Insert one xattr key/value. */
610 STATIC int
611 xrep_xattr_insert_rec(
612 	struct xrep_xattr		*rx,
613 	const struct xrep_xattr_key	*key)
614 {
615 	struct xfs_da_args		args = {
616 		.dp			= rx->sc->tempip,
617 		.attr_filter		= key->flags,
618 		.namelen		= key->namelen,
619 		.valuelen		= key->valuelen,
620 		.owner			= rx->sc->ip->i_ino,
621 		.geo			= rx->sc->mp->m_attr_geo,
622 		.whichfork		= XFS_ATTR_FORK,
623 		.op_flags		= XFS_DA_OP_OKNOENT,
624 	};
625 	struct xchk_xattr_buf		*ab = rx->sc->buf;
626 	int				error;
627 
628 	/*
629 	 * Grab pointers to the scrub buffer so that we can use them to insert
630 	 * attrs into the temp file.
631 	 */
632 	args.name = ab->name;
633 	args.value = ab->value;
634 
635 	/*
636 	 * The attribute name is stored near the end of the in-core buffer,
637 	 * though we reserve one more byte to ensure null termination.
638 	 */
639 	ab->name[XATTR_NAME_MAX] = 0;
640 
641 	error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
642 			key->namelen);
643 	if (error)
644 		return error;
645 
646 	error = xfblob_free(rx->xattr_blobs, key->name_cookie);
647 	if (error)
648 		return error;
649 
650 	error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
651 			key->valuelen);
652 	if (error)
653 		return error;
654 
655 	error = xfblob_free(rx->xattr_blobs, key->value_cookie);
656 	if (error)
657 		return error;
658 
659 	ab->name[key->namelen] = 0;
660 
661 	if (key->flags & XFS_ATTR_PARENT) {
662 		trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
663 				ab->name, key->namelen, ab->value,
664 				key->valuelen);
665 		args.op_flags |= XFS_DA_OP_LOGGED;
666 	} else {
667 		trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
668 				ab->name, key->namelen, key->valuelen);
669 	}
670 
671 	/*
672 	 * xfs_attr_set creates and commits its own transaction.  If the attr
673 	 * already exists, we'll just drop it during the rebuild.
674 	 */
675 	xfs_attr_sethash(&args);
676 	error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE, false);
677 	if (error == -EEXIST)
678 		error = 0;
679 
680 	return error;
681 }
682 
683 /*
684  * Periodically flush salvaged attributes to the temporary file.  This is done
685  * to reduce the memory requirements of the xattr rebuild because files can
686  * contain millions of attributes.
687  */
688 STATIC int
689 xrep_xattr_flush_stashed(
690 	struct xrep_xattr	*rx)
691 {
692 	xfarray_idx_t		array_cur;
693 	int			error;
694 
695 	/*
696 	 * Entering this function, the scrub context has a reference to the
697 	 * inode being repaired, the temporary file, and a scrub transaction
698 	 * that we use during xattr salvaging to avoid livelocking if there
699 	 * are cycles in the xattr structures.  We hold ILOCK_EXCL on both
700 	 * the inode being repaired, though it is not ijoined to the scrub
701 	 * transaction.
702 	 *
703 	 * To constrain kernel memory use, we occasionally flush salvaged
704 	 * xattrs from the xfarray and xfblob structures into the temporary
705 	 * file in preparation for exchanging the xattr structures at the end.
706 	 * Updating the temporary file requires a transaction, so we commit the
707 	 * scrub transaction and drop the two ILOCKs so that xfs_attr_set can
708 	 * allocate whatever transaction it wants.
709 	 *
710 	 * We still hold IOLOCK_EXCL on the inode being repaired, which
711 	 * prevents anyone from modifying the damaged xattr data while we
712 	 * repair it.
713 	 */
714 	error = xrep_trans_commit(rx->sc);
715 	if (error)
716 		return error;
717 	xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
718 
719 	/*
720 	 * Take the IOLOCK of the temporary file while we modify xattrs.  This
721 	 * isn't strictly required because the temporary file is never revealed
722 	 * to userspace, but we follow the same locking rules.  We still hold
723 	 * sc->ip's IOLOCK.
724 	 */
725 	error = xrep_tempfile_iolock_polled(rx->sc);
726 	if (error)
727 		return error;
728 
729 	/* Add all the salvaged attrs to the temporary file. */
730 	foreach_xfarray_idx(rx->xattr_records, array_cur) {
731 		struct xrep_xattr_key	key;
732 
733 		error = xfarray_load(rx->xattr_records, array_cur, &key);
734 		if (error)
735 			return error;
736 
737 		error = xrep_xattr_insert_rec(rx, &key);
738 		if (error)
739 			return error;
740 	}
741 
742 	/* Empty out both arrays now that we've added the entries. */
743 	xfarray_truncate(rx->xattr_records);
744 	xfblob_truncate(rx->xattr_blobs);
745 
746 	xrep_tempfile_iounlock(rx->sc);
747 
748 	/* Recreate the salvage transaction and relock the inode. */
749 	error = xchk_trans_alloc(rx->sc, 0);
750 	if (error)
751 		return error;
752 	xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
753 	return 0;
754 }
755 
756 /* Decide if we've stashed too much xattr data in memory. */
757 static inline bool
758 xrep_xattr_want_flush_stashed(
759 	struct xrep_xattr	*rx)
760 {
761 	unsigned long long	bytes;
762 
763 	if (!rx->can_flush)
764 		return false;
765 
766 	bytes = xfarray_bytes(rx->xattr_records) +
767 		xfblob_bytes(rx->xattr_blobs);
768 	return bytes > XREP_XATTR_MAX_STASH_BYTES;
769 }
770 
771 /*
772  * Did we observe rename changing parent pointer xattrs while we were flushing
773  * salvaged attrs?
774  */
775 static inline bool
776 xrep_xattr_saw_pptr_conflict(
777 	struct xrep_xattr	*rx)
778 {
779 	bool			ret;
780 
781 	ASSERT(rx->can_flush);
782 
783 	if (!xfs_has_parent(rx->sc->mp))
784 		return false;
785 
786 	xfs_assert_ilocked(rx->sc->ip, XFS_ILOCK_EXCL);
787 
788 	mutex_lock(&rx->lock);
789 	ret = xfarray_bytes(rx->pptr_recs) > 0;
790 	mutex_unlock(&rx->lock);
791 
792 	return ret;
793 }
794 
795 /*
796  * Reset the entire repair state back to initial conditions, now that we've
797  * detected a parent pointer update to the attr structure while we were
798  * flushing salvaged attrs.  See the locking notes in dir_repair.c for more
799  * information on why this is all necessary.
800  */
801 STATIC int
802 xrep_xattr_full_reset(
803 	struct xrep_xattr	*rx)
804 {
805 	struct xfs_scrub	*sc = rx->sc;
806 	struct xfs_attr_sf_hdr	*hdr;
807 	struct xfs_ifork	*ifp = &sc->tempip->i_af;
808 	int			error;
809 
810 	trace_xrep_xattr_full_reset(sc->ip, sc->tempip);
811 
812 	/* The temporary file's data fork had better not be in btree format. */
813 	if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
814 		ASSERT(0);
815 		return -EIO;
816 	}
817 
818 	/*
819 	 * We begin in transaction context with sc->ip ILOCKed but not joined
820 	 * to the transaction.  To reset to the initial state, we must hold
821 	 * sc->ip's ILOCK to prevent rename from updating parent pointer
822 	 * information and the tempfile's ILOCK to clear its contents.
823 	 */
824 	xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
825 	xrep_tempfile_ilock_both(sc);
826 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
827 	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
828 
829 	/*
830 	 * Free all the blocks of the attr fork of the temp file, and reset
831 	 * it back to local format.
832 	 */
833 	if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
834 		error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
835 		if (error)
836 			return error;
837 
838 		ASSERT(ifp->if_bytes == 0);
839 		ifp->if_format = XFS_DINODE_FMT_LOCAL;
840 		xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK);
841 	}
842 
843 	/* Reinitialize the attr fork to an empty shortform structure. */
844 	hdr = ifp->if_data;
845 	memset(hdr, 0, sizeof(*hdr));
846 	hdr->totsize = cpu_to_be16(sizeof(*hdr));
847 	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
848 
849 	/*
850 	 * Roll this transaction to commit our reset ondisk.  The tempfile
851 	 * should no longer be joined to the transaction, so we drop its ILOCK.
852 	 * This should leave us in transaction context with sc->ip ILOCKed but
853 	 * not joined to the transaction.
854 	 */
855 	error = xrep_roll_trans(sc);
856 	if (error)
857 		return error;
858 	xrep_tempfile_iunlock(sc);
859 
860 	/*
861 	 * Erase any accumulated parent pointer updates now that we've erased
862 	 * the tempfile's attr fork.  We're resetting the entire repair state
863 	 * back to where we were initially, except now we won't flush salvaged
864 	 * xattrs until the very end.
865 	 */
866 	mutex_lock(&rx->lock);
867 	xfarray_truncate(rx->pptr_recs);
868 	xfblob_truncate(rx->pptr_names);
869 	mutex_unlock(&rx->lock);
870 
871 	rx->can_flush = false;
872 	rx->attrs_found = 0;
873 
874 	ASSERT(xfarray_bytes(rx->xattr_records) == 0);
875 	ASSERT(xfblob_bytes(rx->xattr_blobs) == 0);
876 	return 0;
877 }
878 
879 /* Extract as many attribute keys and values as we can. */
880 STATIC int
881 xrep_xattr_recover(
882 	struct xrep_xattr	*rx)
883 {
884 	struct xfs_bmbt_irec	got;
885 	struct xfs_scrub	*sc = rx->sc;
886 	struct xfs_da_geometry	*geo = sc->mp->m_attr_geo;
887 	xfs_fileoff_t		offset;
888 	xfs_extlen_t		len;
889 	xfs_dablk_t		dabno;
890 	int			nmap;
891 	int			error;
892 
893 restart:
894 	/*
895 	 * Iterate each xattr leaf block in the attr fork to scan them for any
896 	 * attributes that we might salvage.
897 	 */
898 	for (offset = 0;
899 	     offset < XFS_MAX_FILEOFF;
900 	     offset = got.br_startoff + got.br_blockcount) {
901 		nmap = 1;
902 		error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
903 				&got, &nmap, XFS_BMAPI_ATTRFORK);
904 		if (error)
905 			return error;
906 		if (nmap != 1)
907 			return -EFSCORRUPTED;
908 		if (!xfs_bmap_is_written_extent(&got))
909 			continue;
910 
911 		for (dabno = round_up(got.br_startoff, geo->fsbcount);
912 		     dabno < got.br_startoff + got.br_blockcount;
913 		     dabno += len) {
914 			xfs_fileoff_t	curr_offset = dabno - got.br_startoff;
915 			xfs_extlen_t	maxlen;
916 
917 			if (xchk_should_terminate(rx->sc, &error))
918 				return error;
919 
920 			maxlen = min_t(xfs_filblks_t, INT_MAX,
921 					got.br_blockcount - curr_offset);
922 			error = xrep_xattr_recover_block(rx, dabno,
923 					curr_offset + got.br_startblock,
924 					maxlen, &len);
925 			if (error)
926 				return error;
927 
928 			if (xrep_xattr_want_flush_stashed(rx)) {
929 				error = xrep_xattr_flush_stashed(rx);
930 				if (error)
931 					return error;
932 
933 				if (xrep_xattr_saw_pptr_conflict(rx)) {
934 					error = xrep_xattr_full_reset(rx);
935 					if (error)
936 						return error;
937 
938 					goto restart;
939 				}
940 			}
941 		}
942 	}
943 
944 	return 0;
945 }
946 
947 /*
948  * Reset the extended attribute fork to a state where we can start re-adding
949  * the salvaged attributes.
950  */
951 STATIC int
952 xrep_xattr_fork_remove(
953 	struct xfs_scrub	*sc,
954 	struct xfs_inode	*ip)
955 {
956 	struct xfs_attr_sf_hdr	*hdr;
957 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
958 
959 	/*
960 	 * If the data fork is in btree format, we can't change di_forkoff
961 	 * because we could run afoul of the rule that the data fork isn't
962 	 * supposed to be in btree format if there's enough space in the fork
963 	 * that it could have used extents format.  Instead, reinitialize the
964 	 * attr fork to have a shortform structure with zero attributes.
965 	 */
966 	if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
967 		ifp->if_format = XFS_DINODE_FMT_LOCAL;
968 		hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
969 				XFS_ATTR_FORK);
970 		hdr->count = 0;
971 		hdr->totsize = cpu_to_be16(sizeof(*hdr));
972 		xfs_trans_log_inode(sc->tp, ip,
973 				XFS_ILOG_CORE | XFS_ILOG_ADATA);
974 		return 0;
975 	}
976 
977 	/* If we still have attr fork extents, something's wrong. */
978 	if (ifp->if_nextents != 0) {
979 		struct xfs_iext_cursor	icur;
980 		struct xfs_bmbt_irec	irec;
981 		unsigned int		i = 0;
982 
983 		xfs_emerg(sc->mp,
984 	"inode 0x%llx attr fork still has %llu attr extents, format %d?!",
985 				ip->i_ino, ifp->if_nextents, ifp->if_format);
986 		for_each_xfs_iext(ifp, &icur, &irec) {
987 			xfs_err(sc->mp,
988 	"[%u]: startoff %llu startblock %llu blockcount %llu state %u",
989 					i++, irec.br_startoff,
990 					irec.br_startblock, irec.br_blockcount,
991 					irec.br_state);
992 		}
993 		ASSERT(0);
994 		return -EFSCORRUPTED;
995 	}
996 
997 	xfs_attr_fork_remove(ip, sc->tp);
998 	return 0;
999 }
1000 
1001 /*
1002  * Free all the attribute fork blocks of the file being repaired and delete the
1003  * fork.  The caller must ILOCK the scrub file and join it to the transaction.
1004  * This function returns with the inode joined to a clean transaction.
1005  */
1006 int
1007 xrep_xattr_reset_fork(
1008 	struct xfs_scrub	*sc)
1009 {
1010 	int			error;
1011 
1012 	trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
1013 
1014 	/* Unmap all the attr blocks. */
1015 	if (xfs_ifork_has_extents(&sc->ip->i_af)) {
1016 		error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
1017 		if (error)
1018 			return error;
1019 	}
1020 
1021 	error = xrep_xattr_fork_remove(sc, sc->ip);
1022 	if (error)
1023 		return error;
1024 
1025 	return xfs_trans_roll_inode(&sc->tp, sc->ip);
1026 }
1027 
1028 /*
1029  * Free all the attribute fork blocks of the temporary file and delete the attr
1030  * fork.  The caller must ILOCK the tempfile and join it to the transaction.
1031  * This function returns with the inode joined to a clean scrub transaction.
1032  */
1033 int
1034 xrep_xattr_reset_tempfile_fork(
1035 	struct xfs_scrub	*sc)
1036 {
1037 	int			error;
1038 
1039 	trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
1040 
1041 	/*
1042 	 * Wipe out the attr fork of the temp file so that regular inode
1043 	 * inactivation won't trip over the corrupt attr fork.
1044 	 */
1045 	if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
1046 		error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
1047 		if (error)
1048 			return error;
1049 	}
1050 
1051 	return xrep_xattr_fork_remove(sc, sc->tempip);
1052 }
1053 
1054 /*
1055  * Find all the extended attributes for this inode by scraping them out of the
1056  * attribute key blocks by hand, and flushing them into the temp file.
1057  * When we're done, free the staging memory before exchanging the xattr
1058  * structures to reduce memory usage.
1059  */
1060 STATIC int
1061 xrep_xattr_salvage_attributes(
1062 	struct xrep_xattr	*rx)
1063 {
1064 	struct xfs_inode	*ip = rx->sc->ip;
1065 	int			error;
1066 
1067 	/* Short format xattrs are easy! */
1068 	if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
1069 		error = xrep_xattr_recover_sf(rx);
1070 		if (error)
1071 			return error;
1072 
1073 		return xrep_xattr_flush_stashed(rx);
1074 	}
1075 
1076 	/*
1077 	 * For non-inline xattr structures, the salvage function scans the
1078 	 * buffer cache looking for potential attr leaf blocks.  The scan
1079 	 * requires the ability to lock any buffer found and runs independently
1080 	 * of any transaction <-> buffer item <-> buffer linkage.  Therefore,
1081 	 * roll the transaction to ensure there are no buffers joined.  We hold
1082 	 * the ILOCK independently of the transaction.
1083 	 */
1084 	error = xfs_trans_roll(&rx->sc->tp);
1085 	if (error)
1086 		return error;
1087 
1088 	error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
1089 	if (error)
1090 		return error;
1091 
1092 	error = xrep_xattr_recover(rx);
1093 	if (error)
1094 		return error;
1095 
1096 	return xrep_xattr_flush_stashed(rx);
1097 }
1098 
1099 /*
1100  * Add this stashed incore parent pointer to the temporary file.  The caller
1101  * must hold the tempdir's IOLOCK, must not hold any ILOCKs, and must not be in
1102  * transaction context.
1103  */
1104 STATIC int
1105 xrep_xattr_replay_pptr_update(
1106 	struct xrep_xattr		*rx,
1107 	const struct xfs_name		*xname,
1108 	struct xrep_xattr_pptr		*pptr)
1109 {
1110 	struct xfs_scrub		*sc = rx->sc;
1111 	int				error;
1112 
1113 	switch (pptr->action) {
1114 	case XREP_XATTR_PPTR_ADD:
1115 		/* Create parent pointer. */
1116 		trace_xrep_xattr_replay_parentadd(sc->tempip, xname,
1117 				&pptr->pptr_rec);
1118 
1119 		error = xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
1120 				&pptr->pptr_rec, &rx->pptr_args);
1121 		ASSERT(error != -EEXIST);
1122 		return error;
1123 	case XREP_XATTR_PPTR_REMOVE:
1124 		/* Remove parent pointer. */
1125 		trace_xrep_xattr_replay_parentremove(sc->tempip, xname,
1126 				&pptr->pptr_rec);
1127 
1128 		error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
1129 				&pptr->pptr_rec, &rx->pptr_args);
1130 		ASSERT(error != -ENOATTR);
1131 		return error;
1132 	}
1133 
1134 	ASSERT(0);
1135 	return -EIO;
1136 }
1137 
1138 /*
1139  * Flush stashed parent pointer updates that have been recorded by the scanner.
1140  * This is done to reduce the memory requirements of the xattr rebuild, since
1141  * files can have a lot of hardlinks and the fs can be busy.
1142  *
1143  * Caller must not hold transactions or ILOCKs.  Caller must hold the tempfile
1144  * IOLOCK.
1145  */
1146 STATIC int
1147 xrep_xattr_replay_pptr_updates(
1148 	struct xrep_xattr	*rx)
1149 {
1150 	xfarray_idx_t		array_cur;
1151 	int			error;
1152 
1153 	mutex_lock(&rx->lock);
1154 	foreach_xfarray_idx(rx->pptr_recs, array_cur) {
1155 		struct xrep_xattr_pptr	pptr;
1156 
1157 		error = xfarray_load(rx->pptr_recs, array_cur, &pptr);
1158 		if (error)
1159 			goto out_unlock;
1160 
1161 		error = xfblob_loadname(rx->pptr_names, pptr.name_cookie,
1162 				&rx->xname, pptr.namelen);
1163 		if (error)
1164 			goto out_unlock;
1165 		mutex_unlock(&rx->lock);
1166 
1167 		error = xrep_xattr_replay_pptr_update(rx, &rx->xname, &pptr);
1168 		if (error)
1169 			return error;
1170 
1171 		mutex_lock(&rx->lock);
1172 	}
1173 
1174 	/* Empty out both arrays now that we've added the entries. */
1175 	xfarray_truncate(rx->pptr_recs);
1176 	xfblob_truncate(rx->pptr_names);
1177 	mutex_unlock(&rx->lock);
1178 	return 0;
1179 out_unlock:
1180 	mutex_unlock(&rx->lock);
1181 	return error;
1182 }
1183 
1184 /*
1185  * Remember that we want to create a parent pointer in the tempfile.  These
1186  * stashed actions will be replayed later.
1187  */
1188 STATIC int
1189 xrep_xattr_stash_parentadd(
1190 	struct xrep_xattr	*rx,
1191 	const struct xfs_name	*name,
1192 	const struct xfs_inode	*dp)
1193 {
1194 	struct xrep_xattr_pptr	pptr = {
1195 		.action		= XREP_XATTR_PPTR_ADD,
1196 		.namelen	= name->len,
1197 	};
1198 	int			error;
1199 
1200 	trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name);
1201 
1202 	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1203 	error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1204 	if (error)
1205 		return error;
1206 
1207 	return xfarray_append(rx->pptr_recs, &pptr);
1208 }
1209 
1210 /*
1211  * Remember that we want to remove a parent pointer from the tempfile.  These
1212  * stashed actions will be replayed later.
1213  */
1214 STATIC int
1215 xrep_xattr_stash_parentremove(
1216 	struct xrep_xattr	*rx,
1217 	const struct xfs_name	*name,
1218 	const struct xfs_inode	*dp)
1219 {
1220 	struct xrep_xattr_pptr	pptr = {
1221 		.action		= XREP_XATTR_PPTR_REMOVE,
1222 		.namelen	= name->len,
1223 	};
1224 	int			error;
1225 
1226 	trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name);
1227 
1228 	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1229 	error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1230 	if (error)
1231 		return error;
1232 
1233 	return xfarray_append(rx->pptr_recs, &pptr);
1234 }
1235 
1236 /*
1237  * Capture dirent updates being made by other threads.  We will have to replay
1238  * the parent pointer updates before exchanging attr forks.
1239  */
1240 STATIC int
1241 xrep_xattr_live_dirent_update(
1242 	struct notifier_block		*nb,
1243 	unsigned long			action,
1244 	void				*data)
1245 {
1246 	struct xfs_dir_update_params	*p = data;
1247 	struct xrep_xattr		*rx;
1248 	struct xfs_scrub		*sc;
1249 	int				error;
1250 
1251 	rx = container_of(nb, struct xrep_xattr, dhook.dirent_hook.nb);
1252 	sc = rx->sc;
1253 
1254 	/*
1255 	 * This thread updated a dirent that points to the file that we're
1256 	 * repairing, so stash the update for replay against the temporary
1257 	 * file.
1258 	 */
1259 	if (p->ip->i_ino != sc->ip->i_ino)
1260 		return NOTIFY_DONE;
1261 
1262 	mutex_lock(&rx->lock);
1263 	if (p->delta > 0)
1264 		error = xrep_xattr_stash_parentadd(rx, p->name, p->dp);
1265 	else
1266 		error = xrep_xattr_stash_parentremove(rx, p->name, p->dp);
1267 	if (error)
1268 		rx->live_update_aborted = true;
1269 	mutex_unlock(&rx->lock);
1270 	return NOTIFY_DONE;
1271 }
1272 
1273 /*
1274  * Prepare both inodes' attribute forks for an exchange.  Promote the tempfile
1275  * from short format to leaf format, and if the file being repaired has a short
1276  * format attr fork, turn it into an empty extent list.
1277  */
1278 STATIC int
1279 xrep_xattr_swap_prep(
1280 	struct xfs_scrub	*sc,
1281 	bool			temp_local,
1282 	bool			ip_local)
1283 {
1284 	int			error;
1285 
1286 	/*
1287 	 * If the tempfile's attributes are in shortform format, convert that
1288 	 * to a single leaf extent so that we can use the atomic mapping
1289 	 * exchange.
1290 	 */
1291 	if (temp_local) {
1292 		struct xfs_da_args	args = {
1293 			.dp		= sc->tempip,
1294 			.geo		= sc->mp->m_attr_geo,
1295 			.whichfork	= XFS_ATTR_FORK,
1296 			.trans		= sc->tp,
1297 			.total		= 1,
1298 			.owner		= sc->ip->i_ino,
1299 		};
1300 
1301 		error = xfs_attr_shortform_to_leaf(&args);
1302 		if (error)
1303 			return error;
1304 
1305 		/*
1306 		 * Roll the deferred log items to get us back to a clean
1307 		 * transaction.
1308 		 */
1309 		error = xfs_defer_finish(&sc->tp);
1310 		if (error)
1311 			return error;
1312 	}
1313 
1314 	/*
1315 	 * If the file being repaired had a shortform attribute fork, convert
1316 	 * that to an empty extent list in preparation for the atomic mapping
1317 	 * exchange.
1318 	 */
1319 	if (ip_local) {
1320 		struct xfs_ifork	*ifp;
1321 
1322 		ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1323 
1324 		xfs_idestroy_fork(ifp);
1325 		ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1326 		ifp->if_nextents = 0;
1327 		ifp->if_bytes = 0;
1328 		ifp->if_data = NULL;
1329 		ifp->if_height = 0;
1330 
1331 		xfs_trans_log_inode(sc->tp, sc->ip,
1332 				XFS_ILOG_CORE | XFS_ILOG_ADATA);
1333 	}
1334 
1335 	return 0;
1336 }
1337 
1338 /* Exchange the temporary file's attribute fork with the one being repaired. */
1339 int
1340 xrep_xattr_swap(
1341 	struct xfs_scrub	*sc,
1342 	struct xrep_tempexch	*tx)
1343 {
1344 	bool			ip_local, temp_local;
1345 	int			error = 0;
1346 
1347 	ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1348 	temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1349 
1350 	/*
1351 	 * If the both files have a local format attr fork and the rebuilt
1352 	 * xattr data would fit in the repaired file's attr fork, just copy
1353 	 * the contents from the tempfile and declare ourselves done.
1354 	 */
1355 	if (ip_local && temp_local) {
1356 		int	forkoff;
1357 		int	newsize;
1358 
1359 		newsize = xfs_attr_sf_totsize(sc->tempip);
1360 		forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
1361 		if (forkoff > 0) {
1362 			sc->ip->i_forkoff = forkoff;
1363 			xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
1364 			return 0;
1365 		}
1366 	}
1367 
1368 	/* Otherwise, make sure both attr forks are in block-mapping mode. */
1369 	error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
1370 	if (error)
1371 		return error;
1372 
1373 	return xrep_tempexch_contents(sc, tx);
1374 }
1375 
1376 /*
1377  * Finish replaying stashed parent pointer updates, allocate a transaction for
1378  * exchanging extent mappings, and take the ILOCKs of both files before we
1379  * commit the new extended attribute structure.
1380  */
1381 STATIC int
1382 xrep_xattr_finalize_tempfile(
1383 	struct xrep_xattr	*rx)
1384 {
1385 	struct xfs_scrub	*sc = rx->sc;
1386 	int			error;
1387 
1388 	if (!xfs_has_parent(sc->mp))
1389 		return xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1390 
1391 	/*
1392 	 * Repair relies on the ILOCK to quiesce all possible xattr updates.
1393 	 * Replay all queued parent pointer updates into the tempfile before
1394 	 * exchanging the contents, even if that means dropping the ILOCKs and
1395 	 * the transaction.
1396 	 */
1397 	do {
1398 		error = xrep_xattr_replay_pptr_updates(rx);
1399 		if (error)
1400 			return error;
1401 
1402 		error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1403 		if (error)
1404 			return error;
1405 
1406 		if (xfarray_length(rx->pptr_recs) == 0)
1407 			break;
1408 
1409 		xchk_trans_cancel(sc);
1410 		xrep_tempfile_iunlock_both(sc);
1411 	} while (!xchk_should_terminate(sc, &error));
1412 	return error;
1413 }
1414 
1415 /*
1416  * Exchange the new extended attribute data (which we created in the tempfile)
1417  * with the file being repaired.
1418  */
1419 STATIC int
1420 xrep_xattr_rebuild_tree(
1421 	struct xrep_xattr	*rx)
1422 {
1423 	struct xfs_scrub	*sc = rx->sc;
1424 	int			error;
1425 
1426 	/*
1427 	 * If we didn't find any attributes to salvage, repair the file by
1428 	 * zapping its attr fork.
1429 	 */
1430 	if (rx->attrs_found == 0) {
1431 		xfs_trans_ijoin(sc->tp, sc->ip, 0);
1432 		error = xrep_xattr_reset_fork(sc);
1433 		if (error)
1434 			return error;
1435 
1436 		goto forget_acls;
1437 	}
1438 
1439 	trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
1440 
1441 	/*
1442 	 * Commit the repair transaction and drop the ILOCKs so that we can use
1443 	 * the atomic file content exchange helper functions to compute the
1444 	 * correct resource reservations.
1445 	 *
1446 	 * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
1447 	 * modifications, but there's nothing to prevent userspace from reading
1448 	 * the attributes until we're ready for the exchange operation.  Reads
1449 	 * will return -EIO without shutting down the fs, so we're ok with
1450 	 * that.
1451 	 */
1452 	error = xrep_trans_commit(sc);
1453 	if (error)
1454 		return error;
1455 
1456 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1457 
1458 	/*
1459 	 * Take the IOLOCK on the temporary file so that we can run xattr
1460 	 * operations with the same locks held as we would for a normal file.
1461 	 * We still hold sc->ip's IOLOCK.
1462 	 */
1463 	error = xrep_tempfile_iolock_polled(rx->sc);
1464 	if (error)
1465 		return error;
1466 
1467 	/*
1468 	 * Allocate transaction, lock inodes, and make sure that we've replayed
1469 	 * all the stashed parent pointer updates to the temp file.  After this
1470 	 * point, we're ready to exchange attr fork mappings.
1471 	 */
1472 	error = xrep_xattr_finalize_tempfile(rx);
1473 	if (error)
1474 		return error;
1475 
1476 	/*
1477 	 * Exchange the blocks mapped by the tempfile's attr fork with the file
1478 	 * being repaired.  The old attr blocks will then be attached to the
1479 	 * tempfile, so reap its attr fork.
1480 	 */
1481 	error = xrep_xattr_swap(sc, &rx->tx);
1482 	if (error)
1483 		return error;
1484 
1485 	error = xrep_xattr_reset_tempfile_fork(sc);
1486 	if (error)
1487 		return error;
1488 
1489 	/*
1490 	 * Roll to get a transaction without any inodes joined to it.  Then we
1491 	 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1492 	 * the scrub target file.
1493 	 */
1494 	error = xfs_trans_roll(&sc->tp);
1495 	if (error)
1496 		return error;
1497 
1498 	xrep_tempfile_iunlock(sc);
1499 	xrep_tempfile_iounlock(sc);
1500 
1501 forget_acls:
1502 	/* Invalidate cached ACLs now that we've reloaded all the xattrs. */
1503 	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
1504 	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
1505 	return 0;
1506 }
1507 
1508 /* Tear down all the incore scan stuff we created. */
1509 STATIC void
1510 xrep_xattr_teardown(
1511 	struct xrep_xattr	*rx)
1512 {
1513 	if (xfs_has_parent(rx->sc->mp))
1514 		xfs_dir_hook_del(rx->sc->mp, &rx->dhook);
1515 	if (rx->pptr_names)
1516 		xfblob_destroy(rx->pptr_names);
1517 	if (rx->pptr_recs)
1518 		xfarray_destroy(rx->pptr_recs);
1519 	xfblob_destroy(rx->xattr_blobs);
1520 	xfarray_destroy(rx->xattr_records);
1521 	mutex_destroy(&rx->lock);
1522 	kfree(rx);
1523 }
1524 
1525 /* Set up the filesystem scan so we can regenerate extended attributes. */
1526 STATIC int
1527 xrep_xattr_setup_scan(
1528 	struct xfs_scrub	*sc,
1529 	struct xrep_xattr	**rxp)
1530 {
1531 	struct xrep_xattr	*rx;
1532 	char			*descr;
1533 	int			max_len;
1534 	int			error;
1535 
1536 	rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
1537 	if (!rx)
1538 		return -ENOMEM;
1539 	rx->sc = sc;
1540 	rx->can_flush = true;
1541 	rx->xname.name = rx->namebuf;
1542 
1543 	mutex_init(&rx->lock);
1544 
1545 	/*
1546 	 * Allocate enough memory to handle loading local attr values from the
1547 	 * xfblob data while flushing stashed attrs to the temporary file.
1548 	 * We only realloc the buffer when salvaging remote attr values.
1549 	 */
1550 	max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
1551 	error = xchk_setup_xattr_buf(rx->sc, max_len);
1552 	if (error == -ENOMEM)
1553 		error = -EDEADLOCK;
1554 	if (error)
1555 		goto out_rx;
1556 
1557 	/* Set up some staging for salvaged attribute keys and values */
1558 	descr = xchk_xfile_ino_descr(sc, "xattr keys");
1559 	error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
1560 			&rx->xattr_records);
1561 	kfree(descr);
1562 	if (error)
1563 		goto out_rx;
1564 
1565 	descr = xchk_xfile_ino_descr(sc, "xattr names");
1566 	error = xfblob_create(descr, &rx->xattr_blobs);
1567 	kfree(descr);
1568 	if (error)
1569 		goto out_keys;
1570 
1571 	if (xfs_has_parent(sc->mp)) {
1572 		ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
1573 
1574 		descr = xchk_xfile_ino_descr(sc,
1575 				"xattr retained parent pointer entries");
1576 		error = xfarray_create(descr, 0,
1577 				sizeof(struct xrep_xattr_pptr),
1578 				&rx->pptr_recs);
1579 		kfree(descr);
1580 		if (error)
1581 			goto out_values;
1582 
1583 		descr = xchk_xfile_ino_descr(sc,
1584 				"xattr retained parent pointer names");
1585 		error = xfblob_create(descr, &rx->pptr_names);
1586 		kfree(descr);
1587 		if (error)
1588 			goto out_pprecs;
1589 
1590 		xfs_dir_hook_setup(&rx->dhook, xrep_xattr_live_dirent_update);
1591 		error = xfs_dir_hook_add(sc->mp, &rx->dhook);
1592 		if (error)
1593 			goto out_ppnames;
1594 	}
1595 
1596 	*rxp = rx;
1597 	return 0;
1598 out_ppnames:
1599 	xfblob_destroy(rx->pptr_names);
1600 out_pprecs:
1601 	xfarray_destroy(rx->pptr_recs);
1602 out_values:
1603 	xfblob_destroy(rx->xattr_blobs);
1604 out_keys:
1605 	xfarray_destroy(rx->xattr_records);
1606 out_rx:
1607 	mutex_destroy(&rx->lock);
1608 	kfree(rx);
1609 	return error;
1610 }
1611 
1612 /*
1613  * Repair the extended attribute metadata.
1614  *
1615  * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
1616  * The buffer cache in XFS can't handle aliased multiblock buffers, so this
1617  * might misbehave if the attr fork is crosslinked with other filesystem
1618  * metadata.
1619  */
1620 int
1621 xrep_xattr(
1622 	struct xfs_scrub	*sc)
1623 {
1624 	struct xrep_xattr	*rx = NULL;
1625 	int			error;
1626 
1627 	if (!xfs_inode_hasattr(sc->ip))
1628 		return -ENOENT;
1629 
1630 	/* The rmapbt is required to reap the old attr fork. */
1631 	if (!xfs_has_rmapbt(sc->mp))
1632 		return -EOPNOTSUPP;
1633 	/* We require atomic file exchange range to rebuild anything. */
1634 	if (!xfs_has_exchange_range(sc->mp))
1635 		return -EOPNOTSUPP;
1636 
1637 	error = xrep_xattr_setup_scan(sc, &rx);
1638 	if (error)
1639 		return error;
1640 
1641 	ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1642 
1643 	error = xrep_xattr_salvage_attributes(rx);
1644 	if (error)
1645 		goto out_scan;
1646 
1647 	if (rx->live_update_aborted) {
1648 		error = -EIO;
1649 		goto out_scan;
1650 	}
1651 
1652 	/* Last chance to abort before we start committing fixes. */
1653 	if (xchk_should_terminate(sc, &error))
1654 		goto out_scan;
1655 
1656 	error = xrep_xattr_rebuild_tree(rx);
1657 	if (error)
1658 		goto out_scan;
1659 
1660 out_scan:
1661 	xrep_xattr_teardown(rx);
1662 	return error;
1663 }
1664