xref: /linux/fs/xfs/libxfs/xfs_attr.c (revision 5c35a02c545a7bbe77f3a1ae337d9e29beed079b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_da_format.h"
16 #include "xfs_da_btree.h"
17 #include "xfs_attr_sf.h"
18 #include "xfs_inode.h"
19 #include "xfs_alloc.h"
20 #include "xfs_trans.h"
21 #include "xfs_inode_item.h"
22 #include "xfs_bmap.h"
23 #include "xfs_bmap_util.h"
24 #include "xfs_bmap_btree.h"
25 #include "xfs_attr.h"
26 #include "xfs_attr_leaf.h"
27 #include "xfs_attr_remote.h"
28 #include "xfs_error.h"
29 #include "xfs_quota.h"
30 #include "xfs_trans_space.h"
31 #include "xfs_trace.h"
32 
33 /*
34  * xfs_attr.c
35  *
36  * Provide the external interfaces to manage attribute lists.
37  */
38 
39 /*========================================================================
40  * Function prototypes for the kernel.
41  *========================================================================*/
42 
43 /*
44  * Internal routines when attribute list fits inside the inode.
45  */
46 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
47 
48 /*
49  * Internal routines when attribute list is one block.
50  */
51 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
52 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
53 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
54 
55 /*
56  * Internal routines when attribute list is more than one block.
57  */
58 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
59 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
60 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
61 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
62 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
63 
64 
65 STATIC int
66 xfs_attr_args_init(
67 	struct xfs_da_args	*args,
68 	struct xfs_inode	*dp,
69 	const unsigned char	*name,
70 	int			flags)
71 {
72 
73 	if (!name)
74 		return -EINVAL;
75 
76 	memset(args, 0, sizeof(*args));
77 	args->geo = dp->i_mount->m_attr_geo;
78 	args->whichfork = XFS_ATTR_FORK;
79 	args->dp = dp;
80 	args->flags = flags;
81 	args->name = name;
82 	args->namelen = strlen((const char *)name);
83 	if (args->namelen >= MAXNAMELEN)
84 		return -EFAULT;		/* match IRIX behaviour */
85 
86 	args->hashval = xfs_da_hashname(args->name, args->namelen);
87 	return 0;
88 }
89 
90 int
91 xfs_inode_hasattr(
92 	struct xfs_inode	*ip)
93 {
94 	if (!XFS_IFORK_Q(ip) ||
95 	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
96 	     ip->i_d.di_anextents == 0))
97 		return 0;
98 	return 1;
99 }
100 
101 /*========================================================================
102  * Overall external interface routines.
103  *========================================================================*/
104 
105 /* Retrieve an extended attribute and its value.  Must have ilock. */
106 int
107 xfs_attr_get_ilocked(
108 	struct xfs_inode	*ip,
109 	struct xfs_da_args	*args)
110 {
111 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
112 
113 	if (!xfs_inode_hasattr(ip))
114 		return -ENOATTR;
115 	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
116 		return xfs_attr_shortform_getvalue(args);
117 	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
118 		return xfs_attr_leaf_get(args);
119 	else
120 		return xfs_attr_node_get(args);
121 }
122 
123 /* Retrieve an extended attribute by name, and its value. */
124 int
125 xfs_attr_get(
126 	struct xfs_inode	*ip,
127 	const unsigned char	*name,
128 	unsigned char		*value,
129 	int			*valuelenp,
130 	int			flags)
131 {
132 	struct xfs_da_args	args;
133 	uint			lock_mode;
134 	int			error;
135 
136 	XFS_STATS_INC(ip->i_mount, xs_attr_get);
137 
138 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
139 		return -EIO;
140 
141 	error = xfs_attr_args_init(&args, ip, name, flags);
142 	if (error)
143 		return error;
144 
145 	args.value = value;
146 	args.valuelen = *valuelenp;
147 	/* Entirely possible to look up a name which doesn't exist */
148 	args.op_flags = XFS_DA_OP_OKNOENT;
149 
150 	lock_mode = xfs_ilock_attr_map_shared(ip);
151 	error = xfs_attr_get_ilocked(ip, &args);
152 	xfs_iunlock(ip, lock_mode);
153 
154 	*valuelenp = args.valuelen;
155 	return error == -EEXIST ? 0 : error;
156 }
157 
158 /*
159  * Calculate how many blocks we need for the new attribute,
160  */
161 STATIC int
162 xfs_attr_calc_size(
163 	struct xfs_da_args	*args,
164 	int			*local)
165 {
166 	struct xfs_mount	*mp = args->dp->i_mount;
167 	int			size;
168 	int			nblks;
169 
170 	/*
171 	 * Determine space new attribute will use, and if it would be
172 	 * "local" or "remote" (note: local != inline).
173 	 */
174 	size = xfs_attr_leaf_newentsize(args, local);
175 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
176 	if (*local) {
177 		if (size > (args->geo->blksize / 2)) {
178 			/* Double split possible */
179 			nblks *= 2;
180 		}
181 	} else {
182 		/*
183 		 * Out of line attribute, cannot double split, but
184 		 * make room for the attribute value itself.
185 		 */
186 		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
187 		nblks += dblocks;
188 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
189 	}
190 
191 	return nblks;
192 }
193 
194 int
195 xfs_attr_set(
196 	struct xfs_inode	*dp,
197 	const unsigned char	*name,
198 	unsigned char		*value,
199 	int			valuelen,
200 	int			flags)
201 {
202 	struct xfs_mount	*mp = dp->i_mount;
203 	struct xfs_buf		*leaf_bp = NULL;
204 	struct xfs_da_args	args;
205 	struct xfs_defer_ops	dfops;
206 	struct xfs_trans_res	tres;
207 	xfs_fsblock_t		firstblock;
208 	int			rsvd = (flags & ATTR_ROOT) != 0;
209 	int			error, err2, local;
210 
211 	XFS_STATS_INC(mp, xs_attr_set);
212 
213 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
214 		return -EIO;
215 
216 	error = xfs_attr_args_init(&args, dp, name, flags);
217 	if (error)
218 		return error;
219 
220 	args.value = value;
221 	args.valuelen = valuelen;
222 	args.firstblock = &firstblock;
223 	args.dfops = &dfops;
224 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
225 	args.total = xfs_attr_calc_size(&args, &local);
226 
227 	error = xfs_qm_dqattach(dp);
228 	if (error)
229 		return error;
230 
231 	/*
232 	 * If the inode doesn't have an attribute fork, add one.
233 	 * (inode must not be locked when we call this routine)
234 	 */
235 	if (XFS_IFORK_Q(dp) == 0) {
236 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
237 			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
238 
239 		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
240 		if (error)
241 			return error;
242 	}
243 
244 	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
245 			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
246 	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
247 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
248 
249 	/*
250 	 * Root fork attributes can use reserved data blocks for this
251 	 * operation if necessary
252 	 */
253 	error = xfs_trans_alloc(mp, &tres, args.total, 0,
254 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
255 	if (error)
256 		return error;
257 
258 	xfs_ilock(dp, XFS_ILOCK_EXCL);
259 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
260 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
261 				       XFS_QMOPT_RES_REGBLKS);
262 	if (error) {
263 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
264 		xfs_trans_cancel(args.trans);
265 		return error;
266 	}
267 
268 	xfs_trans_ijoin(args.trans, dp, 0);
269 
270 	/*
271 	 * If the attribute list is non-existent or a shortform list,
272 	 * upgrade it to a single-leaf-block attribute list.
273 	 */
274 	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
275 	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
276 	     dp->i_d.di_anextents == 0)) {
277 
278 		/*
279 		 * Build initial attribute list (if required).
280 		 */
281 		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
282 			xfs_attr_shortform_create(&args);
283 
284 		/*
285 		 * Try to add the attr to the attribute list in
286 		 * the inode.
287 		 */
288 		error = xfs_attr_shortform_addname(&args);
289 		if (error != -ENOSPC) {
290 			/*
291 			 * Commit the shortform mods, and we're done.
292 			 * NOTE: this is also the error path (EEXIST, etc).
293 			 */
294 			ASSERT(args.trans != NULL);
295 
296 			/*
297 			 * If this is a synchronous mount, make sure that
298 			 * the transaction goes to disk before returning
299 			 * to the user.
300 			 */
301 			if (mp->m_flags & XFS_MOUNT_WSYNC)
302 				xfs_trans_set_sync(args.trans);
303 
304 			if (!error && (flags & ATTR_KERNOTIME) == 0) {
305 				xfs_trans_ichgtime(args.trans, dp,
306 							XFS_ICHGTIME_CHG);
307 			}
308 			err2 = xfs_trans_commit(args.trans);
309 			xfs_iunlock(dp, XFS_ILOCK_EXCL);
310 
311 			return error ? error : err2;
312 		}
313 
314 		/*
315 		 * It won't fit in the shortform, transform to a leaf block.
316 		 * GROT: another possible req'mt for a double-split btree op.
317 		 */
318 		xfs_defer_init(args.dfops, args.firstblock);
319 		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
320 		if (error)
321 			goto out_defer_cancel;
322 		/*
323 		 * Prevent the leaf buffer from being unlocked so that a
324 		 * concurrent AIL push cannot grab the half-baked leaf
325 		 * buffer and run into problems with the write verifier.
326 		 */
327 		xfs_trans_bhold(args.trans, leaf_bp);
328 		xfs_defer_bjoin(args.dfops, leaf_bp);
329 		xfs_defer_ijoin(args.dfops, dp);
330 		error = xfs_defer_finish(&args.trans, args.dfops);
331 		if (error)
332 			goto out_defer_cancel;
333 
334 		/*
335 		 * Commit the leaf transformation.  We'll need another (linked)
336 		 * transaction to add the new attribute to the leaf, which
337 		 * means that we have to hold & join the leaf buffer here too.
338 		 */
339 		error = xfs_trans_roll_inode(&args.trans, dp);
340 		if (error)
341 			goto out;
342 		xfs_trans_bjoin(args.trans, leaf_bp);
343 		leaf_bp = NULL;
344 	}
345 
346 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
347 		error = xfs_attr_leaf_addname(&args);
348 	else
349 		error = xfs_attr_node_addname(&args);
350 	if (error)
351 		goto out;
352 
353 	/*
354 	 * If this is a synchronous mount, make sure that the
355 	 * transaction goes to disk before returning to the user.
356 	 */
357 	if (mp->m_flags & XFS_MOUNT_WSYNC)
358 		xfs_trans_set_sync(args.trans);
359 
360 	if ((flags & ATTR_KERNOTIME) == 0)
361 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
362 
363 	/*
364 	 * Commit the last in the sequence of transactions.
365 	 */
366 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
367 	error = xfs_trans_commit(args.trans);
368 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
369 
370 	return error;
371 
372 out_defer_cancel:
373 	xfs_defer_cancel(&dfops);
374 out:
375 	if (leaf_bp)
376 		xfs_trans_brelse(args.trans, leaf_bp);
377 	if (args.trans)
378 		xfs_trans_cancel(args.trans);
379 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
380 	return error;
381 }
382 
383 /*
384  * Generic handler routine to remove a name from an attribute list.
385  * Transitions attribute list from Btree to shortform as necessary.
386  */
387 int
388 xfs_attr_remove(
389 	struct xfs_inode	*dp,
390 	const unsigned char	*name,
391 	int			flags)
392 {
393 	struct xfs_mount	*mp = dp->i_mount;
394 	struct xfs_da_args	args;
395 	struct xfs_defer_ops	dfops;
396 	xfs_fsblock_t		firstblock;
397 	int			error;
398 
399 	XFS_STATS_INC(mp, xs_attr_remove);
400 
401 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
402 		return -EIO;
403 
404 	error = xfs_attr_args_init(&args, dp, name, flags);
405 	if (error)
406 		return error;
407 
408 	args.firstblock = &firstblock;
409 	args.dfops = &dfops;
410 
411 	/*
412 	 * we have no control over the attribute names that userspace passes us
413 	 * to remove, so we have to allow the name lookup prior to attribute
414 	 * removal to fail.
415 	 */
416 	args.op_flags = XFS_DA_OP_OKNOENT;
417 
418 	error = xfs_qm_dqattach(dp);
419 	if (error)
420 		return error;
421 
422 	/*
423 	 * Root fork attributes can use reserved data blocks for this
424 	 * operation if necessary
425 	 */
426 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
427 			XFS_ATTRRM_SPACE_RES(mp), 0,
428 			(flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
429 			&args.trans);
430 	if (error)
431 		return error;
432 
433 	xfs_ilock(dp, XFS_ILOCK_EXCL);
434 	/*
435 	 * No need to make quota reservations here. We expect to release some
436 	 * blocks not allocate in the common case.
437 	 */
438 	xfs_trans_ijoin(args.trans, dp, 0);
439 
440 	if (!xfs_inode_hasattr(dp)) {
441 		error = -ENOATTR;
442 	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
443 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
444 		error = xfs_attr_shortform_remove(&args);
445 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
446 		error = xfs_attr_leaf_removename(&args);
447 	} else {
448 		error = xfs_attr_node_removename(&args);
449 	}
450 
451 	if (error)
452 		goto out;
453 
454 	/*
455 	 * If this is a synchronous mount, make sure that the
456 	 * transaction goes to disk before returning to the user.
457 	 */
458 	if (mp->m_flags & XFS_MOUNT_WSYNC)
459 		xfs_trans_set_sync(args.trans);
460 
461 	if ((flags & ATTR_KERNOTIME) == 0)
462 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
463 
464 	/*
465 	 * Commit the last in the sequence of transactions.
466 	 */
467 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
468 	error = xfs_trans_commit(args.trans);
469 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
470 
471 	return error;
472 
473 out:
474 	if (args.trans)
475 		xfs_trans_cancel(args.trans);
476 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
477 	return error;
478 }
479 
480 /*========================================================================
481  * External routines when attribute list is inside the inode
482  *========================================================================*/
483 
484 /*
485  * Add a name to the shortform attribute list structure
486  * This is the external routine.
487  */
488 STATIC int
489 xfs_attr_shortform_addname(xfs_da_args_t *args)
490 {
491 	int newsize, forkoff, retval;
492 
493 	trace_xfs_attr_sf_addname(args);
494 
495 	retval = xfs_attr_shortform_lookup(args);
496 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
497 		return retval;
498 	} else if (retval == -EEXIST) {
499 		if (args->flags & ATTR_CREATE)
500 			return retval;
501 		retval = xfs_attr_shortform_remove(args);
502 		if (retval)
503 			return retval;
504 		/*
505 		 * Since we have removed the old attr, clear ATTR_REPLACE so
506 		 * that the leaf format add routine won't trip over the attr
507 		 * not being around.
508 		 */
509 		args->flags &= ~ATTR_REPLACE;
510 	}
511 
512 	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
513 	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
514 		return -ENOSPC;
515 
516 	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
517 	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
518 
519 	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
520 	if (!forkoff)
521 		return -ENOSPC;
522 
523 	xfs_attr_shortform_add(args, forkoff);
524 	return 0;
525 }
526 
527 
528 /*========================================================================
529  * External routines when attribute list is one block
530  *========================================================================*/
531 
532 /*
533  * Add a name to the leaf attribute list structure
534  *
535  * This leaf block cannot have a "remote" value, we only call this routine
536  * if bmap_one_block() says there is only one block (ie: no remote blks).
537  */
538 STATIC int
539 xfs_attr_leaf_addname(xfs_da_args_t *args)
540 {
541 	xfs_inode_t *dp;
542 	struct xfs_buf *bp;
543 	int retval, error, forkoff;
544 
545 	trace_xfs_attr_leaf_addname(args);
546 
547 	/*
548 	 * Read the (only) block in the attribute list in.
549 	 */
550 	dp = args->dp;
551 	args->blkno = 0;
552 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
553 	if (error)
554 		return error;
555 
556 	/*
557 	 * Look up the given attribute in the leaf block.  Figure out if
558 	 * the given flags produce an error or call for an atomic rename.
559 	 */
560 	retval = xfs_attr3_leaf_lookup_int(bp, args);
561 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
562 		xfs_trans_brelse(args->trans, bp);
563 		return retval;
564 	} else if (retval == -EEXIST) {
565 		if (args->flags & ATTR_CREATE) {	/* pure create op */
566 			xfs_trans_brelse(args->trans, bp);
567 			return retval;
568 		}
569 
570 		trace_xfs_attr_leaf_replace(args);
571 
572 		/* save the attribute state for later removal*/
573 		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
574 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
575 		args->index2 = args->index;
576 		args->rmtblkno2 = args->rmtblkno;
577 		args->rmtblkcnt2 = args->rmtblkcnt;
578 		args->rmtvaluelen2 = args->rmtvaluelen;
579 
580 		/*
581 		 * clear the remote attr state now that it is saved so that the
582 		 * values reflect the state of the attribute we are about to
583 		 * add, not the attribute we just found and will remove later.
584 		 */
585 		args->rmtblkno = 0;
586 		args->rmtblkcnt = 0;
587 		args->rmtvaluelen = 0;
588 	}
589 
590 	/*
591 	 * Add the attribute to the leaf block, transitioning to a Btree
592 	 * if required.
593 	 */
594 	retval = xfs_attr3_leaf_add(bp, args);
595 	if (retval == -ENOSPC) {
596 		/*
597 		 * Promote the attribute list to the Btree format, then
598 		 * Commit that transaction so that the node_addname() call
599 		 * can manage its own transactions.
600 		 */
601 		xfs_defer_init(args->dfops, args->firstblock);
602 		error = xfs_attr3_leaf_to_node(args);
603 		if (error)
604 			goto out_defer_cancel;
605 		xfs_defer_ijoin(args->dfops, dp);
606 		error = xfs_defer_finish(&args->trans, args->dfops);
607 		if (error)
608 			goto out_defer_cancel;
609 
610 		/*
611 		 * Commit the current trans (including the inode) and start
612 		 * a new one.
613 		 */
614 		error = xfs_trans_roll_inode(&args->trans, dp);
615 		if (error)
616 			return error;
617 
618 		/*
619 		 * Fob the whole rest of the problem off on the Btree code.
620 		 */
621 		error = xfs_attr_node_addname(args);
622 		return error;
623 	}
624 
625 	/*
626 	 * Commit the transaction that added the attr name so that
627 	 * later routines can manage their own transactions.
628 	 */
629 	error = xfs_trans_roll_inode(&args->trans, dp);
630 	if (error)
631 		return error;
632 
633 	/*
634 	 * If there was an out-of-line value, allocate the blocks we
635 	 * identified for its storage and copy the value.  This is done
636 	 * after we create the attribute so that we don't overflow the
637 	 * maximum size of a transaction and/or hit a deadlock.
638 	 */
639 	if (args->rmtblkno > 0) {
640 		error = xfs_attr_rmtval_set(args);
641 		if (error)
642 			return error;
643 	}
644 
645 	/*
646 	 * If this is an atomic rename operation, we must "flip" the
647 	 * incomplete flags on the "new" and "old" attribute/value pairs
648 	 * so that one disappears and one appears atomically.  Then we
649 	 * must remove the "old" attribute/value pair.
650 	 */
651 	if (args->op_flags & XFS_DA_OP_RENAME) {
652 		/*
653 		 * In a separate transaction, set the incomplete flag on the
654 		 * "old" attr and clear the incomplete flag on the "new" attr.
655 		 */
656 		error = xfs_attr3_leaf_flipflags(args);
657 		if (error)
658 			return error;
659 
660 		/*
661 		 * Dismantle the "old" attribute/value pair by removing
662 		 * a "remote" value (if it exists).
663 		 */
664 		args->index = args->index2;
665 		args->blkno = args->blkno2;
666 		args->rmtblkno = args->rmtblkno2;
667 		args->rmtblkcnt = args->rmtblkcnt2;
668 		args->rmtvaluelen = args->rmtvaluelen2;
669 		if (args->rmtblkno) {
670 			error = xfs_attr_rmtval_remove(args);
671 			if (error)
672 				return error;
673 		}
674 
675 		/*
676 		 * Read in the block containing the "old" attr, then
677 		 * remove the "old" attr from that block (neat, huh!)
678 		 */
679 		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
680 					   -1, &bp);
681 		if (error)
682 			return error;
683 
684 		xfs_attr3_leaf_remove(bp, args);
685 
686 		/*
687 		 * If the result is small enough, shrink it all into the inode.
688 		 */
689 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
690 			xfs_defer_init(args->dfops, args->firstblock);
691 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
692 			/* bp is gone due to xfs_da_shrink_inode */
693 			if (error)
694 				goto out_defer_cancel;
695 			xfs_defer_ijoin(args->dfops, dp);
696 			error = xfs_defer_finish(&args->trans, args->dfops);
697 			if (error)
698 				goto out_defer_cancel;
699 		}
700 
701 		/*
702 		 * Commit the remove and start the next trans in series.
703 		 */
704 		error = xfs_trans_roll_inode(&args->trans, dp);
705 
706 	} else if (args->rmtblkno > 0) {
707 		/*
708 		 * Added a "remote" value, just clear the incomplete flag.
709 		 */
710 		error = xfs_attr3_leaf_clearflag(args);
711 	}
712 	return error;
713 out_defer_cancel:
714 	xfs_defer_cancel(args->dfops);
715 	return error;
716 }
717 
718 /*
719  * Remove a name from the leaf attribute list structure
720  *
721  * This leaf block cannot have a "remote" value, we only call this routine
722  * if bmap_one_block() says there is only one block (ie: no remote blks).
723  */
724 STATIC int
725 xfs_attr_leaf_removename(xfs_da_args_t *args)
726 {
727 	xfs_inode_t *dp;
728 	struct xfs_buf *bp;
729 	int error, forkoff;
730 
731 	trace_xfs_attr_leaf_removename(args);
732 
733 	/*
734 	 * Remove the attribute.
735 	 */
736 	dp = args->dp;
737 	args->blkno = 0;
738 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
739 	if (error)
740 		return error;
741 
742 	error = xfs_attr3_leaf_lookup_int(bp, args);
743 	if (error == -ENOATTR) {
744 		xfs_trans_brelse(args->trans, bp);
745 		return error;
746 	}
747 
748 	xfs_attr3_leaf_remove(bp, args);
749 
750 	/*
751 	 * If the result is small enough, shrink it all into the inode.
752 	 */
753 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
754 		xfs_defer_init(args->dfops, args->firstblock);
755 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
756 		/* bp is gone due to xfs_da_shrink_inode */
757 		if (error)
758 			goto out_defer_cancel;
759 		xfs_defer_ijoin(args->dfops, dp);
760 		error = xfs_defer_finish(&args->trans, args->dfops);
761 		if (error)
762 			goto out_defer_cancel;
763 	}
764 	return 0;
765 out_defer_cancel:
766 	xfs_defer_cancel(args->dfops);
767 	return error;
768 }
769 
770 /*
771  * Look up a name in a leaf attribute list structure.
772  *
773  * This leaf block cannot have a "remote" value, we only call this routine
774  * if bmap_one_block() says there is only one block (ie: no remote blks).
775  */
776 STATIC int
777 xfs_attr_leaf_get(xfs_da_args_t *args)
778 {
779 	struct xfs_buf *bp;
780 	int error;
781 
782 	trace_xfs_attr_leaf_get(args);
783 
784 	args->blkno = 0;
785 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
786 	if (error)
787 		return error;
788 
789 	error = xfs_attr3_leaf_lookup_int(bp, args);
790 	if (error != -EEXIST)  {
791 		xfs_trans_brelse(args->trans, bp);
792 		return error;
793 	}
794 	error = xfs_attr3_leaf_getvalue(bp, args);
795 	xfs_trans_brelse(args->trans, bp);
796 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
797 		error = xfs_attr_rmtval_get(args);
798 	}
799 	return error;
800 }
801 
802 /*========================================================================
803  * External routines when attribute list size > geo->blksize
804  *========================================================================*/
805 
806 /*
807  * Add a name to a Btree-format attribute list.
808  *
809  * This will involve walking down the Btree, and may involve splitting
810  * leaf nodes and even splitting intermediate nodes up to and including
811  * the root node (a special case of an intermediate node).
812  *
813  * "Remote" attribute values confuse the issue and atomic rename operations
814  * add a whole extra layer of confusion on top of that.
815  */
816 STATIC int
817 xfs_attr_node_addname(xfs_da_args_t *args)
818 {
819 	xfs_da_state_t *state;
820 	xfs_da_state_blk_t *blk;
821 	xfs_inode_t *dp;
822 	xfs_mount_t *mp;
823 	int retval, error;
824 
825 	trace_xfs_attr_node_addname(args);
826 
827 	/*
828 	 * Fill in bucket of arguments/results/context to carry around.
829 	 */
830 	dp = args->dp;
831 	mp = dp->i_mount;
832 restart:
833 	state = xfs_da_state_alloc();
834 	state->args = args;
835 	state->mp = mp;
836 
837 	/*
838 	 * Search to see if name already exists, and get back a pointer
839 	 * to where it should go.
840 	 */
841 	error = xfs_da3_node_lookup_int(state, &retval);
842 	if (error)
843 		goto out;
844 	blk = &state->path.blk[ state->path.active-1 ];
845 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
846 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
847 		goto out;
848 	} else if (retval == -EEXIST) {
849 		if (args->flags & ATTR_CREATE)
850 			goto out;
851 
852 		trace_xfs_attr_node_replace(args);
853 
854 		/* save the attribute state for later removal*/
855 		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
856 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
857 		args->index2 = args->index;
858 		args->rmtblkno2 = args->rmtblkno;
859 		args->rmtblkcnt2 = args->rmtblkcnt;
860 		args->rmtvaluelen2 = args->rmtvaluelen;
861 
862 		/*
863 		 * clear the remote attr state now that it is saved so that the
864 		 * values reflect the state of the attribute we are about to
865 		 * add, not the attribute we just found and will remove later.
866 		 */
867 		args->rmtblkno = 0;
868 		args->rmtblkcnt = 0;
869 		args->rmtvaluelen = 0;
870 	}
871 
872 	retval = xfs_attr3_leaf_add(blk->bp, state->args);
873 	if (retval == -ENOSPC) {
874 		if (state->path.active == 1) {
875 			/*
876 			 * Its really a single leaf node, but it had
877 			 * out-of-line values so it looked like it *might*
878 			 * have been a b-tree.
879 			 */
880 			xfs_da_state_free(state);
881 			state = NULL;
882 			xfs_defer_init(args->dfops, args->firstblock);
883 			error = xfs_attr3_leaf_to_node(args);
884 			if (error)
885 				goto out_defer_cancel;
886 			xfs_defer_ijoin(args->dfops, dp);
887 			error = xfs_defer_finish(&args->trans, args->dfops);
888 			if (error)
889 				goto out_defer_cancel;
890 
891 			/*
892 			 * Commit the node conversion and start the next
893 			 * trans in the chain.
894 			 */
895 			error = xfs_trans_roll_inode(&args->trans, dp);
896 			if (error)
897 				goto out;
898 
899 			goto restart;
900 		}
901 
902 		/*
903 		 * Split as many Btree elements as required.
904 		 * This code tracks the new and old attr's location
905 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
906 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
907 		 */
908 		xfs_defer_init(args->dfops, args->firstblock);
909 		error = xfs_da3_split(state);
910 		if (error)
911 			goto out_defer_cancel;
912 		xfs_defer_ijoin(args->dfops, dp);
913 		error = xfs_defer_finish(&args->trans, args->dfops);
914 		if (error)
915 			goto out_defer_cancel;
916 	} else {
917 		/*
918 		 * Addition succeeded, update Btree hashvals.
919 		 */
920 		xfs_da3_fixhashpath(state, &state->path);
921 	}
922 
923 	/*
924 	 * Kill the state structure, we're done with it and need to
925 	 * allow the buffers to come back later.
926 	 */
927 	xfs_da_state_free(state);
928 	state = NULL;
929 
930 	/*
931 	 * Commit the leaf addition or btree split and start the next
932 	 * trans in the chain.
933 	 */
934 	error = xfs_trans_roll_inode(&args->trans, dp);
935 	if (error)
936 		goto out;
937 
938 	/*
939 	 * If there was an out-of-line value, allocate the blocks we
940 	 * identified for its storage and copy the value.  This is done
941 	 * after we create the attribute so that we don't overflow the
942 	 * maximum size of a transaction and/or hit a deadlock.
943 	 */
944 	if (args->rmtblkno > 0) {
945 		error = xfs_attr_rmtval_set(args);
946 		if (error)
947 			return error;
948 	}
949 
950 	/*
951 	 * If this is an atomic rename operation, we must "flip" the
952 	 * incomplete flags on the "new" and "old" attribute/value pairs
953 	 * so that one disappears and one appears atomically.  Then we
954 	 * must remove the "old" attribute/value pair.
955 	 */
956 	if (args->op_flags & XFS_DA_OP_RENAME) {
957 		/*
958 		 * In a separate transaction, set the incomplete flag on the
959 		 * "old" attr and clear the incomplete flag on the "new" attr.
960 		 */
961 		error = xfs_attr3_leaf_flipflags(args);
962 		if (error)
963 			goto out;
964 
965 		/*
966 		 * Dismantle the "old" attribute/value pair by removing
967 		 * a "remote" value (if it exists).
968 		 */
969 		args->index = args->index2;
970 		args->blkno = args->blkno2;
971 		args->rmtblkno = args->rmtblkno2;
972 		args->rmtblkcnt = args->rmtblkcnt2;
973 		args->rmtvaluelen = args->rmtvaluelen2;
974 		if (args->rmtblkno) {
975 			error = xfs_attr_rmtval_remove(args);
976 			if (error)
977 				return error;
978 		}
979 
980 		/*
981 		 * Re-find the "old" attribute entry after any split ops.
982 		 * The INCOMPLETE flag means that we will find the "old"
983 		 * attr, not the "new" one.
984 		 */
985 		args->flags |= XFS_ATTR_INCOMPLETE;
986 		state = xfs_da_state_alloc();
987 		state->args = args;
988 		state->mp = mp;
989 		state->inleaf = 0;
990 		error = xfs_da3_node_lookup_int(state, &retval);
991 		if (error)
992 			goto out;
993 
994 		/*
995 		 * Remove the name and update the hashvals in the tree.
996 		 */
997 		blk = &state->path.blk[ state->path.active-1 ];
998 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
999 		error = xfs_attr3_leaf_remove(blk->bp, args);
1000 		xfs_da3_fixhashpath(state, &state->path);
1001 
1002 		/*
1003 		 * Check to see if the tree needs to be collapsed.
1004 		 */
1005 		if (retval && (state->path.active > 1)) {
1006 			xfs_defer_init(args->dfops, args->firstblock);
1007 			error = xfs_da3_join(state);
1008 			if (error)
1009 				goto out_defer_cancel;
1010 			xfs_defer_ijoin(args->dfops, dp);
1011 			error = xfs_defer_finish(&args->trans, args->dfops);
1012 			if (error)
1013 				goto out_defer_cancel;
1014 		}
1015 
1016 		/*
1017 		 * Commit and start the next trans in the chain.
1018 		 */
1019 		error = xfs_trans_roll_inode(&args->trans, dp);
1020 		if (error)
1021 			goto out;
1022 
1023 	} else if (args->rmtblkno > 0) {
1024 		/*
1025 		 * Added a "remote" value, just clear the incomplete flag.
1026 		 */
1027 		error = xfs_attr3_leaf_clearflag(args);
1028 		if (error)
1029 			goto out;
1030 	}
1031 	retval = error = 0;
1032 
1033 out:
1034 	if (state)
1035 		xfs_da_state_free(state);
1036 	if (error)
1037 		return error;
1038 	return retval;
1039 out_defer_cancel:
1040 	xfs_defer_cancel(args->dfops);
1041 	goto out;
1042 }
1043 
1044 /*
1045  * Remove a name from a B-tree attribute list.
1046  *
1047  * This will involve walking down the Btree, and may involve joining
1048  * leaf nodes and even joining intermediate nodes up to and including
1049  * the root node (a special case of an intermediate node).
1050  */
1051 STATIC int
1052 xfs_attr_node_removename(xfs_da_args_t *args)
1053 {
1054 	xfs_da_state_t *state;
1055 	xfs_da_state_blk_t *blk;
1056 	xfs_inode_t *dp;
1057 	struct xfs_buf *bp;
1058 	int retval, error, forkoff;
1059 
1060 	trace_xfs_attr_node_removename(args);
1061 
1062 	/*
1063 	 * Tie a string around our finger to remind us where we are.
1064 	 */
1065 	dp = args->dp;
1066 	state = xfs_da_state_alloc();
1067 	state->args = args;
1068 	state->mp = dp->i_mount;
1069 
1070 	/*
1071 	 * Search to see if name exists, and get back a pointer to it.
1072 	 */
1073 	error = xfs_da3_node_lookup_int(state, &retval);
1074 	if (error || (retval != -EEXIST)) {
1075 		if (error == 0)
1076 			error = retval;
1077 		goto out;
1078 	}
1079 
1080 	/*
1081 	 * If there is an out-of-line value, de-allocate the blocks.
1082 	 * This is done before we remove the attribute so that we don't
1083 	 * overflow the maximum size of a transaction and/or hit a deadlock.
1084 	 */
1085 	blk = &state->path.blk[ state->path.active-1 ];
1086 	ASSERT(blk->bp != NULL);
1087 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1088 	if (args->rmtblkno > 0) {
1089 		/*
1090 		 * Fill in disk block numbers in the state structure
1091 		 * so that we can get the buffers back after we commit
1092 		 * several transactions in the following calls.
1093 		 */
1094 		error = xfs_attr_fillstate(state);
1095 		if (error)
1096 			goto out;
1097 
1098 		/*
1099 		 * Mark the attribute as INCOMPLETE, then bunmapi() the
1100 		 * remote value.
1101 		 */
1102 		error = xfs_attr3_leaf_setflag(args);
1103 		if (error)
1104 			goto out;
1105 		error = xfs_attr_rmtval_remove(args);
1106 		if (error)
1107 			goto out;
1108 
1109 		/*
1110 		 * Refill the state structure with buffers, the prior calls
1111 		 * released our buffers.
1112 		 */
1113 		error = xfs_attr_refillstate(state);
1114 		if (error)
1115 			goto out;
1116 	}
1117 
1118 	/*
1119 	 * Remove the name and update the hashvals in the tree.
1120 	 */
1121 	blk = &state->path.blk[ state->path.active-1 ];
1122 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1123 	retval = xfs_attr3_leaf_remove(blk->bp, args);
1124 	xfs_da3_fixhashpath(state, &state->path);
1125 
1126 	/*
1127 	 * Check to see if the tree needs to be collapsed.
1128 	 */
1129 	if (retval && (state->path.active > 1)) {
1130 		xfs_defer_init(args->dfops, args->firstblock);
1131 		error = xfs_da3_join(state);
1132 		if (error)
1133 			goto out_defer_cancel;
1134 		xfs_defer_ijoin(args->dfops, dp);
1135 		error = xfs_defer_finish(&args->trans, args->dfops);
1136 		if (error)
1137 			goto out_defer_cancel;
1138 		/*
1139 		 * Commit the Btree join operation and start a new trans.
1140 		 */
1141 		error = xfs_trans_roll_inode(&args->trans, dp);
1142 		if (error)
1143 			goto out;
1144 	}
1145 
1146 	/*
1147 	 * If the result is small enough, push it all into the inode.
1148 	 */
1149 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1150 		/*
1151 		 * Have to get rid of the copy of this dabuf in the state.
1152 		 */
1153 		ASSERT(state->path.active == 1);
1154 		ASSERT(state->path.blk[0].bp);
1155 		state->path.blk[0].bp = NULL;
1156 
1157 		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1158 		if (error)
1159 			goto out;
1160 
1161 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1162 			xfs_defer_init(args->dfops, args->firstblock);
1163 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1164 			/* bp is gone due to xfs_da_shrink_inode */
1165 			if (error)
1166 				goto out_defer_cancel;
1167 			xfs_defer_ijoin(args->dfops, dp);
1168 			error = xfs_defer_finish(&args->trans, args->dfops);
1169 			if (error)
1170 				goto out_defer_cancel;
1171 		} else
1172 			xfs_trans_brelse(args->trans, bp);
1173 	}
1174 	error = 0;
1175 
1176 out:
1177 	xfs_da_state_free(state);
1178 	return error;
1179 out_defer_cancel:
1180 	xfs_defer_cancel(args->dfops);
1181 	goto out;
1182 }
1183 
1184 /*
1185  * Fill in the disk block numbers in the state structure for the buffers
1186  * that are attached to the state structure.
1187  * This is done so that we can quickly reattach ourselves to those buffers
1188  * after some set of transaction commits have released these buffers.
1189  */
1190 STATIC int
1191 xfs_attr_fillstate(xfs_da_state_t *state)
1192 {
1193 	xfs_da_state_path_t *path;
1194 	xfs_da_state_blk_t *blk;
1195 	int level;
1196 
1197 	trace_xfs_attr_fillstate(state->args);
1198 
1199 	/*
1200 	 * Roll down the "path" in the state structure, storing the on-disk
1201 	 * block number for those buffers in the "path".
1202 	 */
1203 	path = &state->path;
1204 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1205 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1206 		if (blk->bp) {
1207 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1208 			blk->bp = NULL;
1209 		} else {
1210 			blk->disk_blkno = 0;
1211 		}
1212 	}
1213 
1214 	/*
1215 	 * Roll down the "altpath" in the state structure, storing the on-disk
1216 	 * block number for those buffers in the "altpath".
1217 	 */
1218 	path = &state->altpath;
1219 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1220 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1221 		if (blk->bp) {
1222 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1223 			blk->bp = NULL;
1224 		} else {
1225 			blk->disk_blkno = 0;
1226 		}
1227 	}
1228 
1229 	return 0;
1230 }
1231 
1232 /*
1233  * Reattach the buffers to the state structure based on the disk block
1234  * numbers stored in the state structure.
1235  * This is done after some set of transaction commits have released those
1236  * buffers from our grip.
1237  */
1238 STATIC int
1239 xfs_attr_refillstate(xfs_da_state_t *state)
1240 {
1241 	xfs_da_state_path_t *path;
1242 	xfs_da_state_blk_t *blk;
1243 	int level, error;
1244 
1245 	trace_xfs_attr_refillstate(state->args);
1246 
1247 	/*
1248 	 * Roll down the "path" in the state structure, storing the on-disk
1249 	 * block number for those buffers in the "path".
1250 	 */
1251 	path = &state->path;
1252 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1253 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1254 		if (blk->disk_blkno) {
1255 			error = xfs_da3_node_read(state->args->trans,
1256 						state->args->dp,
1257 						blk->blkno, blk->disk_blkno,
1258 						&blk->bp, XFS_ATTR_FORK);
1259 			if (error)
1260 				return error;
1261 		} else {
1262 			blk->bp = NULL;
1263 		}
1264 	}
1265 
1266 	/*
1267 	 * Roll down the "altpath" in the state structure, storing the on-disk
1268 	 * block number for those buffers in the "altpath".
1269 	 */
1270 	path = &state->altpath;
1271 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1272 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1273 		if (blk->disk_blkno) {
1274 			error = xfs_da3_node_read(state->args->trans,
1275 						state->args->dp,
1276 						blk->blkno, blk->disk_blkno,
1277 						&blk->bp, XFS_ATTR_FORK);
1278 			if (error)
1279 				return error;
1280 		} else {
1281 			blk->bp = NULL;
1282 		}
1283 	}
1284 
1285 	return 0;
1286 }
1287 
1288 /*
1289  * Look up a filename in a node attribute list.
1290  *
1291  * This routine gets called for any attribute fork that has more than one
1292  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1293  * "remote" values taking up more blocks.
1294  */
1295 STATIC int
1296 xfs_attr_node_get(xfs_da_args_t *args)
1297 {
1298 	xfs_da_state_t *state;
1299 	xfs_da_state_blk_t *blk;
1300 	int error, retval;
1301 	int i;
1302 
1303 	trace_xfs_attr_node_get(args);
1304 
1305 	state = xfs_da_state_alloc();
1306 	state->args = args;
1307 	state->mp = args->dp->i_mount;
1308 
1309 	/*
1310 	 * Search to see if name exists, and get back a pointer to it.
1311 	 */
1312 	error = xfs_da3_node_lookup_int(state, &retval);
1313 	if (error) {
1314 		retval = error;
1315 	} else if (retval == -EEXIST) {
1316 		blk = &state->path.blk[ state->path.active-1 ];
1317 		ASSERT(blk->bp != NULL);
1318 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1319 
1320 		/*
1321 		 * Get the value, local or "remote"
1322 		 */
1323 		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1324 		if (!retval && (args->rmtblkno > 0)
1325 		    && !(args->flags & ATTR_KERNOVAL)) {
1326 			retval = xfs_attr_rmtval_get(args);
1327 		}
1328 	}
1329 
1330 	/*
1331 	 * If not in a transaction, we have to release all the buffers.
1332 	 */
1333 	for (i = 0; i < state->path.active; i++) {
1334 		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1335 		state->path.blk[i].bp = NULL;
1336 	}
1337 
1338 	xfs_da_state_free(state);
1339 	return retval;
1340 }
1341