xref: /linux/fs/ocfs2/xattr.c (revision 5138c936c2c82c9be8883921854bc6f7e1177d8c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * xattr.c
4  *
5  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
6  *
7  * CREDITS:
8  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
9  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10  */
11 
12 #include <linux/capability.h>
13 #include <linux/fs.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
16 #include <linux/highmem.h>
17 #include <linux/pagemap.h>
18 #include <linux/uio.h>
19 #include <linux/sched.h>
20 #include <linux/splice.h>
21 #include <linux/mount.h>
22 #include <linux/writeback.h>
23 #include <linux/falloc.h>
24 #include <linux/sort.h>
25 #include <linux/init.h>
26 #include <linux/module.h>
27 #include <linux/string.h>
28 #include <linux/security.h>
29 
30 #include <cluster/masklog.h>
31 
32 #include "ocfs2.h"
33 #include "alloc.h"
34 #include "blockcheck.h"
35 #include "dlmglue.h"
36 #include "file.h"
37 #include "symlink.h"
38 #include "sysfile.h"
39 #include "inode.h"
40 #include "journal.h"
41 #include "ocfs2_fs.h"
42 #include "suballoc.h"
43 #include "uptodate.h"
44 #include "buffer_head_io.h"
45 #include "super.h"
46 #include "xattr.h"
47 #include "refcounttree.h"
48 #include "acl.h"
49 #include "ocfs2_trace.h"
50 
51 struct ocfs2_xattr_def_value_root {
52 	/* Must be last as it ends in a flexible-array member. */
53 	TRAILING_OVERLAP(struct ocfs2_xattr_value_root, xv, xr_list.l_recs,
54 		struct ocfs2_extent_rec		er;
55 	);
56 };
57 static_assert(offsetof(struct ocfs2_xattr_def_value_root, xv.xr_list.l_recs) ==
58 	      offsetof(struct ocfs2_xattr_def_value_root, er));
59 
60 struct ocfs2_xattr_bucket {
61 	/* The inode these xattrs are associated with */
62 	struct inode *bu_inode;
63 
64 	/* The actual buffers that make up the bucket */
65 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
66 
67 	/* How many blocks make up one bucket for this filesystem */
68 	int bu_blocks;
69 };
70 
71 struct ocfs2_xattr_set_ctxt {
72 	handle_t *handle;
73 	struct ocfs2_alloc_context *meta_ac;
74 	struct ocfs2_alloc_context *data_ac;
75 	struct ocfs2_cached_dealloc_ctxt dealloc;
76 	int set_abort;
77 };
78 
79 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
80 #define OCFS2_XATTR_INLINE_SIZE	80
81 #define OCFS2_XATTR_HEADER_GAP	4
82 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
83 					 - sizeof(struct ocfs2_xattr_header) \
84 					 - OCFS2_XATTR_HEADER_GAP)
85 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
86 					 - sizeof(struct ocfs2_xattr_block) \
87 					 - sizeof(struct ocfs2_xattr_header) \
88 					 - OCFS2_XATTR_HEADER_GAP)
89 
90 static struct ocfs2_xattr_def_value_root def_xv = {
91 	.xv.xr_list.l_count = cpu_to_le16(1),
92 };
93 
94 const struct xattr_handler * const ocfs2_xattr_handlers[] = {
95 	&ocfs2_xattr_user_handler,
96 	&ocfs2_xattr_trusted_handler,
97 	&ocfs2_xattr_security_handler,
98 	NULL
99 };
100 
101 static const struct xattr_handler * const ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
102 	[OCFS2_XATTR_INDEX_USER]		= &ocfs2_xattr_user_handler,
103 	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]	= &nop_posix_acl_access,
104 	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]	= &nop_posix_acl_default,
105 	[OCFS2_XATTR_INDEX_TRUSTED]		= &ocfs2_xattr_trusted_handler,
106 	[OCFS2_XATTR_INDEX_SECURITY]		= &ocfs2_xattr_security_handler,
107 };
108 
109 struct ocfs2_xattr_info {
110 	int		xi_name_index;
111 	const char	*xi_name;
112 	int		xi_name_len;
113 	const void	*xi_value;
114 	size_t		xi_value_len;
115 };
116 
117 struct ocfs2_xattr_search {
118 	struct buffer_head *inode_bh;
119 	/*
120 	 * xattr_bh point to the block buffer head which has extended attribute
121 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
122 	 */
123 	struct buffer_head *xattr_bh;
124 	struct ocfs2_xattr_header *header;
125 	struct ocfs2_xattr_bucket *bucket;
126 	void *base;
127 	void *end;
128 	struct ocfs2_xattr_entry *here;
129 	int not_found;
130 };
131 
132 /* Operations on struct ocfs2_xa_entry */
133 struct ocfs2_xa_loc;
134 struct ocfs2_xa_loc_operations {
135 	/*
136 	 * Journal functions
137 	 */
138 	int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
139 				  int type);
140 	void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
141 
142 	/*
143 	 * Return a pointer to the appropriate buffer in loc->xl_storage
144 	 * at the given offset from loc->xl_header.
145 	 */
146 	void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
147 
148 	/* Can we reuse the existing entry for the new value? */
149 	int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
150 			     struct ocfs2_xattr_info *xi);
151 
152 	/* How much space is needed for the new value? */
153 	int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
154 			       struct ocfs2_xattr_info *xi);
155 
156 	/*
157 	 * Return the offset of the first name+value pair.  This is
158 	 * the start of our downward-filling free space.
159 	 */
160 	int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
161 
162 	/*
163 	 * Remove the name+value at this location.  Do whatever is
164 	 * appropriate with the remaining name+value pairs.
165 	 */
166 	void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
167 
168 	/* Fill xl_entry with a new entry */
169 	void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
170 
171 	/* Add name+value storage to an entry */
172 	void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
173 
174 	/*
175 	 * Initialize the value buf's access and bh fields for this entry.
176 	 * ocfs2_xa_fill_value_buf() will handle the xv pointer.
177 	 */
178 	void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
179 				   struct ocfs2_xattr_value_buf *vb);
180 };
181 
182 /*
183  * Describes an xattr entry location.  This is a memory structure
184  * tracking the on-disk structure.
185  */
186 struct ocfs2_xa_loc {
187 	/* This xattr belongs to this inode */
188 	struct inode *xl_inode;
189 
190 	/* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
191 	struct ocfs2_xattr_header *xl_header;
192 
193 	/* Bytes from xl_header to the end of the storage */
194 	int xl_size;
195 
196 	/*
197 	 * The ocfs2_xattr_entry this location describes.  If this is
198 	 * NULL, this location describes the on-disk structure where it
199 	 * would have been.
200 	 */
201 	struct ocfs2_xattr_entry *xl_entry;
202 
203 	/*
204 	 * Internal housekeeping
205 	 */
206 
207 	/* Buffer(s) containing this entry */
208 	void *xl_storage;
209 
210 	/* Operations on the storage backing this location */
211 	const struct ocfs2_xa_loc_operations *xl_ops;
212 };
213 
214 /*
215  * Convenience functions to calculate how much space is needed for a
216  * given name+value pair
217  */
218 static int namevalue_size(int name_len, uint64_t value_len)
219 {
220 	if (value_len > OCFS2_XATTR_INLINE_SIZE)
221 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
222 	else
223 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
224 }
225 
226 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
227 {
228 	return namevalue_size(xi->xi_name_len, xi->xi_value_len);
229 }
230 
231 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
232 {
233 	u64 value_len = le64_to_cpu(xe->xe_value_size);
234 
235 	BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
236 	       ocfs2_xattr_is_local(xe));
237 	return namevalue_size(xe->xe_name_len, value_len);
238 }
239 
240 
241 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
242 					     struct ocfs2_xattr_header *xh,
243 					     int index,
244 					     int *block_off,
245 					     int *new_offset);
246 
247 static int ocfs2_xattr_block_find(struct inode *inode,
248 				  int name_index,
249 				  const char *name,
250 				  struct ocfs2_xattr_search *xs);
251 static int ocfs2_xattr_index_block_find(struct inode *inode,
252 					struct buffer_head *root_bh,
253 					int name_index,
254 					const char *name,
255 					struct ocfs2_xattr_search *xs);
256 
257 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
258 					struct buffer_head *blk_bh,
259 					char *buffer,
260 					size_t buffer_size);
261 
262 static int ocfs2_xattr_create_index_block(struct inode *inode,
263 					  struct ocfs2_xattr_search *xs,
264 					  struct ocfs2_xattr_set_ctxt *ctxt);
265 
266 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
267 					     struct ocfs2_xattr_info *xi,
268 					     struct ocfs2_xattr_search *xs,
269 					     struct ocfs2_xattr_set_ctxt *ctxt);
270 
271 typedef int (xattr_tree_rec_func)(struct inode *inode,
272 				  struct buffer_head *root_bh,
273 				  u64 blkno, u32 cpos, u32 len, void *para);
274 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
275 					   struct buffer_head *root_bh,
276 					   xattr_tree_rec_func *rec_func,
277 					   void *para);
278 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
279 					struct ocfs2_xattr_bucket *bucket,
280 					void *para);
281 static int ocfs2_rm_xattr_cluster(struct inode *inode,
282 				  struct buffer_head *root_bh,
283 				  u64 blkno,
284 				  u32 cpos,
285 				  u32 len,
286 				  void *para);
287 
288 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
289 				  u64 src_blk, u64 last_blk, u64 to_blk,
290 				  unsigned int start_bucket,
291 				  u32 *first_hash);
292 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
293 					struct ocfs2_dinode *di,
294 					struct ocfs2_xattr_info *xi,
295 					struct ocfs2_xattr_search *xis,
296 					struct ocfs2_xattr_search *xbs,
297 					struct ocfs2_refcount_tree **ref_tree,
298 					int *meta_need,
299 					int *credits);
300 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
301 					   struct ocfs2_xattr_bucket *bucket,
302 					   int offset,
303 					   struct ocfs2_xattr_value_root **xv,
304 					   struct buffer_head **bh);
305 
306 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
307 {
308 	return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
309 }
310 
311 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
312 {
313 	return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
314 }
315 
316 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
317 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
318 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
319 
320 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
321 {
322 	struct ocfs2_xattr_bucket *bucket;
323 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
324 
325 	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
326 
327 	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
328 	if (bucket) {
329 		bucket->bu_inode = inode;
330 		bucket->bu_blocks = blks;
331 	}
332 
333 	return bucket;
334 }
335 
336 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
337 {
338 	int i;
339 
340 	for (i = 0; i < bucket->bu_blocks; i++) {
341 		brelse(bucket->bu_bhs[i]);
342 		bucket->bu_bhs[i] = NULL;
343 	}
344 }
345 
346 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
347 {
348 	if (bucket) {
349 		ocfs2_xattr_bucket_relse(bucket);
350 		bucket->bu_inode = NULL;
351 		kfree(bucket);
352 	}
353 }
354 
355 /*
356  * A bucket that has never been written to disk doesn't need to be
357  * read.  We just need the buffer_heads.  Don't call this for
358  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
359  * them fully.
360  */
361 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
362 				   u64 xb_blkno, int new)
363 {
364 	int i, rc = 0;
365 
366 	for (i = 0; i < bucket->bu_blocks; i++) {
367 		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
368 					      xb_blkno + i);
369 		if (!bucket->bu_bhs[i]) {
370 			rc = -ENOMEM;
371 			mlog_errno(rc);
372 			break;
373 		}
374 
375 		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
376 					   bucket->bu_bhs[i])) {
377 			if (new)
378 				ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
379 							      bucket->bu_bhs[i]);
380 			else {
381 				set_buffer_uptodate(bucket->bu_bhs[i]);
382 				ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
383 							  bucket->bu_bhs[i]);
384 			}
385 		}
386 	}
387 
388 	if (rc)
389 		ocfs2_xattr_bucket_relse(bucket);
390 	return rc;
391 }
392 
393 /* Read the xattr bucket at xb_blkno */
394 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
395 				   u64 xb_blkno)
396 {
397 	int rc;
398 
399 	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
400 			       bucket->bu_blocks, bucket->bu_bhs, 0,
401 			       NULL);
402 	if (!rc) {
403 		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
404 		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
405 						 bucket->bu_bhs,
406 						 bucket->bu_blocks,
407 						 &bucket_xh(bucket)->xh_check);
408 		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
409 		if (rc)
410 			mlog_errno(rc);
411 	}
412 
413 	if (rc)
414 		ocfs2_xattr_bucket_relse(bucket);
415 	return rc;
416 }
417 
418 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
419 					     struct ocfs2_xattr_bucket *bucket,
420 					     int type)
421 {
422 	int i, rc = 0;
423 
424 	for (i = 0; i < bucket->bu_blocks; i++) {
425 		rc = ocfs2_journal_access(handle,
426 					  INODE_CACHE(bucket->bu_inode),
427 					  bucket->bu_bhs[i], type);
428 		if (rc) {
429 			mlog_errno(rc);
430 			break;
431 		}
432 	}
433 
434 	return rc;
435 }
436 
437 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
438 					     struct ocfs2_xattr_bucket *bucket)
439 {
440 	int i;
441 
442 	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
443 	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
444 				   bucket->bu_bhs, bucket->bu_blocks,
445 				   &bucket_xh(bucket)->xh_check);
446 	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
447 
448 	for (i = 0; i < bucket->bu_blocks; i++)
449 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
450 }
451 
452 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
453 					 struct ocfs2_xattr_bucket *src)
454 {
455 	int i;
456 	int blocksize = src->bu_inode->i_sb->s_blocksize;
457 
458 	BUG_ON(dest->bu_blocks != src->bu_blocks);
459 	BUG_ON(dest->bu_inode != src->bu_inode);
460 
461 	for (i = 0; i < src->bu_blocks; i++) {
462 		memcpy(bucket_block(dest, i), bucket_block(src, i),
463 		       blocksize);
464 	}
465 }
466 
467 static int ocfs2_validate_xattr_block(struct super_block *sb,
468 				      struct buffer_head *bh)
469 {
470 	int rc;
471 	struct ocfs2_xattr_block *xb =
472 		(struct ocfs2_xattr_block *)bh->b_data;
473 
474 	trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
475 
476 	BUG_ON(!buffer_uptodate(bh));
477 
478 	/*
479 	 * If the ecc fails, we return the error but otherwise
480 	 * leave the filesystem running.  We know any error is
481 	 * local to this block.
482 	 */
483 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
484 	if (rc)
485 		return rc;
486 
487 	/*
488 	 * Errors after here are fatal
489 	 */
490 
491 	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
492 		return ocfs2_error(sb,
493 				   "Extended attribute block #%llu has bad signature %.*s\n",
494 				   (unsigned long long)bh->b_blocknr, 7,
495 				   xb->xb_signature);
496 	}
497 
498 	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
499 		return ocfs2_error(sb,
500 				   "Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
501 				   (unsigned long long)bh->b_blocknr,
502 				   (unsigned long long)le64_to_cpu(xb->xb_blkno));
503 	}
504 
505 	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
506 		return ocfs2_error(sb,
507 				   "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
508 				   (unsigned long long)bh->b_blocknr,
509 				   le32_to_cpu(xb->xb_fs_generation));
510 	}
511 
512 	return 0;
513 }
514 
515 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
516 				  struct buffer_head **bh)
517 {
518 	int rc;
519 	struct buffer_head *tmp = *bh;
520 
521 	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
522 			      ocfs2_validate_xattr_block);
523 
524 	/* If ocfs2_read_block() got us a new bh, pass it up. */
525 	if (!rc && !*bh)
526 		*bh = tmp;
527 
528 	return rc;
529 }
530 
531 static inline const char *ocfs2_xattr_prefix(int name_index)
532 {
533 	const struct xattr_handler *handler = NULL;
534 
535 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
536 		handler = ocfs2_xattr_handler_map[name_index];
537 	return handler ? xattr_prefix(handler) : NULL;
538 }
539 
540 static u32 ocfs2_xattr_name_hash(struct inode *inode,
541 				 const char *name,
542 				 int name_len)
543 {
544 	/* Get hash value of uuid from super block */
545 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
546 	int i;
547 
548 	/* hash extended attribute name */
549 	for (i = 0; i < name_len; i++) {
550 		hash = (hash << OCFS2_HASH_SHIFT) ^
551 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
552 		       *name++;
553 	}
554 
555 	return hash;
556 }
557 
558 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
559 {
560 	return namevalue_size(name_len, value_len) +
561 		sizeof(struct ocfs2_xattr_entry);
562 }
563 
564 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
565 {
566 	return namevalue_size_xi(xi) +
567 		sizeof(struct ocfs2_xattr_entry);
568 }
569 
570 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
571 {
572 	return namevalue_size_xe(xe) +
573 		sizeof(struct ocfs2_xattr_entry);
574 }
575 
576 int ocfs2_calc_security_init(struct inode *dir,
577 			     struct ocfs2_security_xattr_info *si,
578 			     int *want_clusters,
579 			     int *xattr_credits,
580 			     struct ocfs2_alloc_context **xattr_ac)
581 {
582 	int ret = 0;
583 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
584 	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
585 						 si->value_len);
586 
587 	/*
588 	 * The max space of security xattr taken inline is
589 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
590 	 * So reserve one metadata block for it is ok.
591 	 */
592 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
593 	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
594 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
595 		if (ret) {
596 			mlog_errno(ret);
597 			return ret;
598 		}
599 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
600 	}
601 
602 	/* reserve clusters for xattr value which will be set in B tree*/
603 	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
604 		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
605 							    si->value_len);
606 
607 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
608 							   new_clusters);
609 		*want_clusters += new_clusters;
610 	}
611 	return ret;
612 }
613 
614 int ocfs2_calc_xattr_init(struct inode *dir,
615 			  struct buffer_head *dir_bh,
616 			  umode_t mode,
617 			  struct ocfs2_security_xattr_info *si,
618 			  int *want_clusters,
619 			  int *xattr_credits,
620 			  int *want_meta)
621 {
622 	int ret = 0;
623 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
624 	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
625 
626 	if (si->enable)
627 		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
628 						     si->value_len);
629 
630 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
631 		down_read(&OCFS2_I(dir)->ip_xattr_sem);
632 		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
633 					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
634 					"", NULL, 0);
635 		up_read(&OCFS2_I(dir)->ip_xattr_sem);
636 		if (acl_len > 0) {
637 			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
638 			if (S_ISDIR(mode))
639 				a_size <<= 1;
640 		} else if (acl_len != 0 && acl_len != -ENODATA) {
641 			ret = acl_len;
642 			mlog_errno(ret);
643 			return ret;
644 		}
645 	}
646 
647 	if (!(s_size + a_size))
648 		return ret;
649 
650 	/*
651 	 * The max space of security xattr taken inline is
652 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
653 	 * The max space of acl xattr taken inline is
654 	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
655 	 * when blocksize = 512, may reserve one more cluster for
656 	 * xattr bucket, otherwise reserve one metadata block
657 	 * for them is ok.
658 	 * If this is a new directory with inline data,
659 	 * we choose to reserve the entire inline area for
660 	 * directory contents and force an external xattr block.
661 	 */
662 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
663 	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
664 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
665 		*want_meta = *want_meta + 1;
666 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
667 	}
668 
669 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
670 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
671 		*want_clusters += 1;
672 		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
673 	}
674 
675 	/*
676 	 * reserve credits and clusters for xattrs which has large value
677 	 * and have to be set outside
678 	 */
679 	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
680 		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
681 							si->value_len);
682 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
683 							   new_clusters);
684 		*want_clusters += new_clusters;
685 	}
686 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
687 	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
688 		/* for directory, it has DEFAULT and ACCESS two types of acls */
689 		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
690 				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
691 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
692 							   new_clusters);
693 		*want_clusters += new_clusters;
694 	}
695 
696 	return ret;
697 }
698 
699 static int ocfs2_xattr_extend_allocation(struct inode *inode,
700 					 u32 clusters_to_add,
701 					 struct ocfs2_xattr_value_buf *vb,
702 					 struct ocfs2_xattr_set_ctxt *ctxt)
703 {
704 	int status = 0, credits;
705 	handle_t *handle = ctxt->handle;
706 	enum ocfs2_alloc_restarted why;
707 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
708 	struct ocfs2_extent_tree et;
709 
710 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
711 
712 	while (clusters_to_add) {
713 		trace_ocfs2_xattr_extend_allocation(clusters_to_add);
714 
715 		status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
716 				       OCFS2_JOURNAL_ACCESS_WRITE);
717 		if (status < 0) {
718 			mlog_errno(status);
719 			break;
720 		}
721 
722 		prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
723 		status = ocfs2_add_clusters_in_btree(handle,
724 						     &et,
725 						     &logical_start,
726 						     clusters_to_add,
727 						     0,
728 						     ctxt->data_ac,
729 						     ctxt->meta_ac,
730 						     &why);
731 		if ((status < 0) && (status != -EAGAIN)) {
732 			if (status != -ENOSPC)
733 				mlog_errno(status);
734 			break;
735 		}
736 
737 		ocfs2_journal_dirty(handle, vb->vb_bh);
738 
739 		clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
740 					 prev_clusters;
741 
742 		if (why != RESTART_NONE && clusters_to_add) {
743 			/*
744 			 * We can only fail in case the alloc file doesn't give
745 			 * up enough clusters.
746 			 */
747 			BUG_ON(why == RESTART_META);
748 
749 			credits = ocfs2_calc_extend_credits(inode->i_sb,
750 							    &vb->vb_xv->xr_list);
751 			status = ocfs2_extend_trans(handle, credits);
752 			if (status < 0) {
753 				status = -ENOMEM;
754 				mlog_errno(status);
755 				break;
756 			}
757 		}
758 	}
759 
760 	return status;
761 }
762 
763 static int __ocfs2_remove_xattr_range(struct inode *inode,
764 				      struct ocfs2_xattr_value_buf *vb,
765 				      u32 cpos, u32 phys_cpos, u32 len,
766 				      unsigned int ext_flags,
767 				      struct ocfs2_xattr_set_ctxt *ctxt)
768 {
769 	int ret;
770 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
771 	handle_t *handle = ctxt->handle;
772 	struct ocfs2_extent_tree et;
773 
774 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
775 
776 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
777 			    OCFS2_JOURNAL_ACCESS_WRITE);
778 	if (ret) {
779 		mlog_errno(ret);
780 		goto out;
781 	}
782 
783 	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
784 				  &ctxt->dealloc);
785 	if (ret) {
786 		mlog_errno(ret);
787 		goto out;
788 	}
789 
790 	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
791 	ocfs2_journal_dirty(handle, vb->vb_bh);
792 
793 	if (ext_flags & OCFS2_EXT_REFCOUNTED)
794 		ret = ocfs2_decrease_refcount(inode, handle,
795 					ocfs2_blocks_to_clusters(inode->i_sb,
796 								 phys_blkno),
797 					len, ctxt->meta_ac, &ctxt->dealloc, 1);
798 	else
799 		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
800 						  phys_blkno, len);
801 	if (ret)
802 		mlog_errno(ret);
803 
804 out:
805 	return ret;
806 }
807 
808 static int ocfs2_xattr_shrink_size(struct inode *inode,
809 				   u32 old_clusters,
810 				   u32 new_clusters,
811 				   struct ocfs2_xattr_value_buf *vb,
812 				   struct ocfs2_xattr_set_ctxt *ctxt)
813 {
814 	int ret = 0;
815 	unsigned int ext_flags;
816 	u32 trunc_len, cpos, phys_cpos, alloc_size;
817 	u64 block;
818 
819 	if (old_clusters <= new_clusters)
820 		return 0;
821 
822 	cpos = new_clusters;
823 	trunc_len = old_clusters - new_clusters;
824 	while (trunc_len) {
825 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
826 					       &alloc_size,
827 					       &vb->vb_xv->xr_list, &ext_flags);
828 		if (ret) {
829 			mlog_errno(ret);
830 			goto out;
831 		}
832 
833 		if (alloc_size > trunc_len)
834 			alloc_size = trunc_len;
835 
836 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
837 						 phys_cpos, alloc_size,
838 						 ext_flags, ctxt);
839 		if (ret) {
840 			mlog_errno(ret);
841 			goto out;
842 		}
843 
844 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
845 		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
846 						       block, alloc_size);
847 		cpos += alloc_size;
848 		trunc_len -= alloc_size;
849 	}
850 
851 out:
852 	return ret;
853 }
854 
855 static int ocfs2_xattr_value_truncate(struct inode *inode,
856 				      struct ocfs2_xattr_value_buf *vb,
857 				      int len,
858 				      struct ocfs2_xattr_set_ctxt *ctxt)
859 {
860 	int ret;
861 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
862 	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
863 
864 	if (new_clusters == old_clusters)
865 		return 0;
866 
867 	if (new_clusters > old_clusters)
868 		ret = ocfs2_xattr_extend_allocation(inode,
869 						    new_clusters - old_clusters,
870 						    vb, ctxt);
871 	else
872 		ret = ocfs2_xattr_shrink_size(inode,
873 					      old_clusters, new_clusters,
874 					      vb, ctxt);
875 
876 	return ret;
877 }
878 
879 static int ocfs2_xattr_list_entry(struct super_block *sb,
880 				  char *buffer, size_t size,
881 				  size_t *result, int type,
882 				  const char *name, int name_len)
883 {
884 	char *p = buffer + *result;
885 	const char *prefix;
886 	int prefix_len;
887 	int total_len;
888 
889 	switch(type) {
890 	case OCFS2_XATTR_INDEX_USER:
891 		if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
892 			return 0;
893 		break;
894 
895 	case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
896 	case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
897 		if (!(sb->s_flags & SB_POSIXACL))
898 			return 0;
899 		break;
900 
901 	case OCFS2_XATTR_INDEX_TRUSTED:
902 		if (!capable(CAP_SYS_ADMIN))
903 			return 0;
904 		break;
905 	}
906 
907 	prefix = ocfs2_xattr_prefix(type);
908 	if (!prefix)
909 		return 0;
910 	prefix_len = strlen(prefix);
911 	total_len = prefix_len + name_len + 1;
912 	*result += total_len;
913 
914 	/* we are just looking for how big our buffer needs to be */
915 	if (!size)
916 		return 0;
917 
918 	if (*result > size)
919 		return -ERANGE;
920 
921 	memcpy(p, prefix, prefix_len);
922 	memcpy(p + prefix_len, name, name_len);
923 	p[prefix_len + name_len] = '\0';
924 
925 	return 0;
926 }
927 
928 static int ocfs2_xattr_list_entries(struct inode *inode,
929 				    struct ocfs2_xattr_header *header,
930 				    char *buffer, size_t buffer_size)
931 {
932 	size_t result = 0;
933 	int i, type, ret;
934 	const char *name;
935 
936 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
937 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
938 		type = ocfs2_xattr_get_type(entry);
939 		name = (const char *)header +
940 			le16_to_cpu(entry->xe_name_offset);
941 
942 		ret = ocfs2_xattr_list_entry(inode->i_sb,
943 					     buffer, buffer_size,
944 					     &result, type, name,
945 					     entry->xe_name_len);
946 		if (ret)
947 			return ret;
948 	}
949 
950 	return result;
951 }
952 
953 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
954 					 struct ocfs2_dinode *di)
955 {
956 	struct ocfs2_xattr_header *xh;
957 	int i;
958 
959 	xh = (struct ocfs2_xattr_header *)
960 		 ((void *)di + inode->i_sb->s_blocksize -
961 		 le16_to_cpu(di->i_xattr_inline_size));
962 
963 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
964 		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
965 			return 1;
966 
967 	return 0;
968 }
969 
970 static int ocfs2_xattr_ibody_list(struct inode *inode,
971 				  struct ocfs2_dinode *di,
972 				  char *buffer,
973 				  size_t buffer_size)
974 {
975 	struct ocfs2_xattr_header *header = NULL;
976 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
977 	int ret = 0;
978 	u16 xattr_count;
979 	size_t max_entries;
980 	u16 inline_size;
981 
982 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
983 		return ret;
984 
985 	inline_size = le16_to_cpu(di->i_xattr_inline_size);
986 
987 	/* Validate inline size is reasonable */
988 	if (inline_size > inode->i_sb->s_blocksize ||
989 	    inline_size < sizeof(struct ocfs2_xattr_header)) {
990 		ocfs2_error(inode->i_sb,
991 			    "Invalid xattr inline size %u in inode %llu\n",
992 			    inline_size,
993 			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
994 		return -EFSCORRUPTED;
995 	}
996 
997 	header = (struct ocfs2_xattr_header *)
998 		 ((void *)di + inode->i_sb->s_blocksize - inline_size);
999 
1000 	xattr_count = le16_to_cpu(header->xh_count);
1001 	max_entries = (inline_size - sizeof(struct ocfs2_xattr_header)) /
1002 		       sizeof(struct ocfs2_xattr_entry);
1003 
1004 	if (xattr_count > max_entries) {
1005 		ocfs2_error(inode->i_sb,
1006 			    "xattr entry count %u exceeds maximum %zu in inode %llu\n",
1007 			    xattr_count, max_entries,
1008 			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
1009 		return -EFSCORRUPTED;
1010 	}
1011 
1012 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
1013 
1014 	return ret;
1015 }
1016 
1017 static int ocfs2_xattr_block_list(struct inode *inode,
1018 				  struct ocfs2_dinode *di,
1019 				  char *buffer,
1020 				  size_t buffer_size)
1021 {
1022 	struct buffer_head *blk_bh = NULL;
1023 	struct ocfs2_xattr_block *xb;
1024 	int ret = 0;
1025 
1026 	if (!di->i_xattr_loc)
1027 		return ret;
1028 
1029 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
1030 				     &blk_bh);
1031 	if (ret < 0) {
1032 		mlog_errno(ret);
1033 		return ret;
1034 	}
1035 
1036 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1037 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1038 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1039 		ret = ocfs2_xattr_list_entries(inode, header,
1040 					       buffer, buffer_size);
1041 	} else
1042 		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1043 						   buffer, buffer_size);
1044 
1045 	brelse(blk_bh);
1046 
1047 	return ret;
1048 }
1049 
1050 ssize_t ocfs2_listxattr(struct dentry *dentry,
1051 			char *buffer,
1052 			size_t size)
1053 {
1054 	int ret = 0, i_ret = 0, b_ret = 0;
1055 	struct buffer_head *di_bh = NULL;
1056 	struct ocfs2_dinode *di = NULL;
1057 	struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
1058 
1059 	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1060 		return -EOPNOTSUPP;
1061 
1062 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1063 		return ret;
1064 
1065 	ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
1066 	if (ret < 0) {
1067 		mlog_errno(ret);
1068 		return ret;
1069 	}
1070 
1071 	di = (struct ocfs2_dinode *)di_bh->b_data;
1072 
1073 	down_read(&oi->ip_xattr_sem);
1074 	i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
1075 	if (i_ret < 0)
1076 		b_ret = 0;
1077 	else {
1078 		if (buffer) {
1079 			buffer += i_ret;
1080 			size -= i_ret;
1081 		}
1082 		b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
1083 					       buffer, size);
1084 		if (b_ret < 0)
1085 			i_ret = 0;
1086 	}
1087 	up_read(&oi->ip_xattr_sem);
1088 	ocfs2_inode_unlock(d_inode(dentry), 0);
1089 
1090 	brelse(di_bh);
1091 
1092 	return i_ret + b_ret;
1093 }
1094 
1095 static int ocfs2_xattr_find_entry(struct inode *inode, int name_index,
1096 				  const char *name,
1097 				  struct ocfs2_xattr_search *xs)
1098 {
1099 	struct ocfs2_xattr_entry *entry;
1100 	size_t name_len;
1101 	int i, name_offset, cmp = 1;
1102 
1103 	if (name == NULL)
1104 		return -EINVAL;
1105 
1106 	name_len = strlen(name);
1107 	entry = xs->here;
1108 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1109 		if ((void *)entry >= xs->end) {
1110 			ocfs2_error(inode->i_sb, "corrupted xattr entries");
1111 			return -EFSCORRUPTED;
1112 		}
1113 		cmp = name_index - ocfs2_xattr_get_type(entry);
1114 		if (!cmp)
1115 			cmp = name_len - entry->xe_name_len;
1116 		if (!cmp) {
1117 			name_offset = le16_to_cpu(entry->xe_name_offset);
1118 			if ((xs->base + name_offset + name_len) > xs->end) {
1119 				ocfs2_error(inode->i_sb,
1120 					    "corrupted xattr entries");
1121 				return -EFSCORRUPTED;
1122 			}
1123 			cmp = memcmp(name, (xs->base + name_offset), name_len);
1124 		}
1125 		if (cmp == 0)
1126 			break;
1127 		entry += 1;
1128 	}
1129 	xs->here = entry;
1130 
1131 	return cmp ? -ENODATA : 0;
1132 }
1133 
1134 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1135 					 struct ocfs2_xattr_value_root *xv,
1136 					 void *buffer,
1137 					 size_t len)
1138 {
1139 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
1140 	u64 blkno;
1141 	int i, ret = 0;
1142 	size_t cplen, blocksize;
1143 	struct buffer_head *bh = NULL;
1144 	struct ocfs2_extent_list *el;
1145 
1146 	el = &xv->xr_list;
1147 	clusters = le32_to_cpu(xv->xr_clusters);
1148 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1149 	blocksize = inode->i_sb->s_blocksize;
1150 
1151 	cpos = 0;
1152 	while (cpos < clusters) {
1153 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1154 					       &num_clusters, el, NULL);
1155 		if (ret) {
1156 			mlog_errno(ret);
1157 			goto out;
1158 		}
1159 
1160 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1161 		/* Copy ocfs2_xattr_value */
1162 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1163 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1164 					       &bh, NULL);
1165 			if (ret) {
1166 				mlog_errno(ret);
1167 				goto out;
1168 			}
1169 
1170 			cplen = len >= blocksize ? blocksize : len;
1171 			memcpy(buffer, bh->b_data, cplen);
1172 			len -= cplen;
1173 			buffer += cplen;
1174 
1175 			brelse(bh);
1176 			bh = NULL;
1177 			if (len == 0)
1178 				break;
1179 		}
1180 		cpos += num_clusters;
1181 	}
1182 out:
1183 	return ret;
1184 }
1185 
1186 static int ocfs2_xattr_ibody_get(struct inode *inode,
1187 				 int name_index,
1188 				 const char *name,
1189 				 void *buffer,
1190 				 size_t buffer_size,
1191 				 struct ocfs2_xattr_search *xs)
1192 {
1193 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1194 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1195 	struct ocfs2_xattr_value_root *xv;
1196 	size_t size;
1197 	int ret = 0;
1198 
1199 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1200 		return -ENODATA;
1201 
1202 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1203 	xs->header = (struct ocfs2_xattr_header *)
1204 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1205 	xs->base = (void *)xs->header;
1206 	xs->here = xs->header->xh_entries;
1207 
1208 	ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
1209 	if (ret)
1210 		return ret;
1211 	size = le64_to_cpu(xs->here->xe_value_size);
1212 	if (buffer) {
1213 		if (size > buffer_size)
1214 			return -ERANGE;
1215 		if (ocfs2_xattr_is_local(xs->here)) {
1216 			memcpy(buffer, (void *)xs->base +
1217 			       le16_to_cpu(xs->here->xe_name_offset) +
1218 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1219 		} else {
1220 			xv = (struct ocfs2_xattr_value_root *)
1221 				(xs->base + le16_to_cpu(
1222 				 xs->here->xe_name_offset) +
1223 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1224 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1225 							    buffer, size);
1226 			if (ret < 0) {
1227 				mlog_errno(ret);
1228 				return ret;
1229 			}
1230 		}
1231 	}
1232 
1233 	return size;
1234 }
1235 
1236 static int ocfs2_xattr_block_get(struct inode *inode,
1237 				 int name_index,
1238 				 const char *name,
1239 				 void *buffer,
1240 				 size_t buffer_size,
1241 				 struct ocfs2_xattr_search *xs)
1242 {
1243 	struct ocfs2_xattr_block *xb;
1244 	struct ocfs2_xattr_value_root *xv;
1245 	size_t size;
1246 	int ret = -ENODATA, name_offset, name_len, i;
1247 	int block_off;
1248 
1249 	xs->bucket = ocfs2_xattr_bucket_new(inode);
1250 	if (!xs->bucket) {
1251 		ret = -ENOMEM;
1252 		mlog_errno(ret);
1253 		goto cleanup;
1254 	}
1255 
1256 	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1257 	if (ret) {
1258 		mlog_errno(ret);
1259 		goto cleanup;
1260 	}
1261 
1262 	if (xs->not_found) {
1263 		ret = -ENODATA;
1264 		goto cleanup;
1265 	}
1266 
1267 	xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1268 	size = le64_to_cpu(xs->here->xe_value_size);
1269 	if (buffer) {
1270 		ret = -ERANGE;
1271 		if (size > buffer_size)
1272 			goto cleanup;
1273 
1274 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
1275 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1276 		i = xs->here - xs->header->xh_entries;
1277 
1278 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1279 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1280 								bucket_xh(xs->bucket),
1281 								i,
1282 								&block_off,
1283 								&name_offset);
1284 			if (ret) {
1285 				mlog_errno(ret);
1286 				goto cleanup;
1287 			}
1288 			xs->base = bucket_block(xs->bucket, block_off);
1289 		}
1290 		if (ocfs2_xattr_is_local(xs->here)) {
1291 			memcpy(buffer, (void *)xs->base +
1292 			       name_offset + name_len, size);
1293 		} else {
1294 			xv = (struct ocfs2_xattr_value_root *)
1295 				(xs->base + name_offset + name_len);
1296 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1297 							    buffer, size);
1298 			if (ret < 0) {
1299 				mlog_errno(ret);
1300 				goto cleanup;
1301 			}
1302 		}
1303 	}
1304 	ret = size;
1305 cleanup:
1306 	ocfs2_xattr_bucket_free(xs->bucket);
1307 
1308 	brelse(xs->xattr_bh);
1309 	xs->xattr_bh = NULL;
1310 	return ret;
1311 }
1312 
1313 int ocfs2_xattr_get_nolock(struct inode *inode,
1314 			   struct buffer_head *di_bh,
1315 			   int name_index,
1316 			   const char *name,
1317 			   void *buffer,
1318 			   size_t buffer_size)
1319 {
1320 	int ret;
1321 	struct ocfs2_dinode *di = NULL;
1322 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1323 	struct ocfs2_xattr_search xis = {
1324 		.not_found = -ENODATA,
1325 	};
1326 	struct ocfs2_xattr_search xbs = {
1327 		.not_found = -ENODATA,
1328 	};
1329 
1330 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1331 		return -EOPNOTSUPP;
1332 
1333 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1334 		return -ENODATA;
1335 
1336 	xis.inode_bh = xbs.inode_bh = di_bh;
1337 	di = (struct ocfs2_dinode *)di_bh->b_data;
1338 
1339 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1340 				    buffer_size, &xis);
1341 	if (ret == -ENODATA && di->i_xattr_loc)
1342 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1343 					    buffer_size, &xbs);
1344 
1345 	return ret;
1346 }
1347 
1348 /* ocfs2_xattr_get()
1349  *
1350  * Copy an extended attribute into the buffer provided.
1351  * Buffer is NULL to compute the size of buffer required.
1352  */
1353 static int ocfs2_xattr_get(struct inode *inode,
1354 			   int name_index,
1355 			   const char *name,
1356 			   void *buffer,
1357 			   size_t buffer_size)
1358 {
1359 	int ret, had_lock;
1360 	struct buffer_head *di_bh = NULL;
1361 	struct ocfs2_lock_holder oh;
1362 
1363 	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
1364 	if (had_lock < 0) {
1365 		mlog_errno(had_lock);
1366 		return had_lock;
1367 	}
1368 	down_read(&OCFS2_I(inode)->ip_xattr_sem);
1369 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1370 				     name, buffer, buffer_size);
1371 	up_read(&OCFS2_I(inode)->ip_xattr_sem);
1372 
1373 	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
1374 
1375 	brelse(di_bh);
1376 
1377 	return ret;
1378 }
1379 
1380 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1381 					   handle_t *handle,
1382 					   struct ocfs2_xattr_value_buf *vb,
1383 					   const void *value,
1384 					   int value_len)
1385 {
1386 	int ret = 0, i, cp_len;
1387 	u16 blocksize = inode->i_sb->s_blocksize;
1388 	u32 p_cluster, num_clusters;
1389 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1390 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1391 	u64 blkno;
1392 	struct buffer_head *bh = NULL;
1393 	unsigned int ext_flags;
1394 	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1395 
1396 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1397 
1398 	while (cpos < clusters) {
1399 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1400 					       &num_clusters, &xv->xr_list,
1401 					       &ext_flags);
1402 		if (ret) {
1403 			mlog_errno(ret);
1404 			goto out;
1405 		}
1406 
1407 		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1408 
1409 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1410 
1411 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1412 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1413 					       &bh, NULL);
1414 			if (ret) {
1415 				mlog_errno(ret);
1416 				goto out;
1417 			}
1418 
1419 			ret = ocfs2_journal_access(handle,
1420 						   INODE_CACHE(inode),
1421 						   bh,
1422 						   OCFS2_JOURNAL_ACCESS_WRITE);
1423 			if (ret < 0) {
1424 				mlog_errno(ret);
1425 				goto out;
1426 			}
1427 
1428 			cp_len = value_len > blocksize ? blocksize : value_len;
1429 			memcpy(bh->b_data, value, cp_len);
1430 			value_len -= cp_len;
1431 			value += cp_len;
1432 			if (cp_len < blocksize)
1433 				memset(bh->b_data + cp_len, 0,
1434 				       blocksize - cp_len);
1435 
1436 			ocfs2_journal_dirty(handle, bh);
1437 			brelse(bh);
1438 			bh = NULL;
1439 
1440 			/*
1441 			 * XXX: do we need to empty all the following
1442 			 * blocks in this cluster?
1443 			 */
1444 			if (!value_len)
1445 				break;
1446 		}
1447 		cpos += num_clusters;
1448 	}
1449 out:
1450 	brelse(bh);
1451 
1452 	return ret;
1453 }
1454 
1455 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1456 				       int num_entries)
1457 {
1458 	int free_space;
1459 
1460 	if (!needed_space)
1461 		return 0;
1462 
1463 	free_space = free_start -
1464 		sizeof(struct ocfs2_xattr_header) -
1465 		(num_entries * sizeof(struct ocfs2_xattr_entry)) -
1466 		OCFS2_XATTR_HEADER_GAP;
1467 	if (free_space < 0)
1468 		return -EIO;
1469 	if (free_space < needed_space)
1470 		return -ENOSPC;
1471 
1472 	return 0;
1473 }
1474 
1475 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1476 				   int type)
1477 {
1478 	return loc->xl_ops->xlo_journal_access(handle, loc, type);
1479 }
1480 
1481 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1482 {
1483 	loc->xl_ops->xlo_journal_dirty(handle, loc);
1484 }
1485 
1486 /* Give a pointer into the storage for the given offset */
1487 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1488 {
1489 	BUG_ON(offset >= loc->xl_size);
1490 	return loc->xl_ops->xlo_offset_pointer(loc, offset);
1491 }
1492 
1493 /*
1494  * Wipe the name+value pair and allow the storage to reclaim it.  This
1495  * must be followed by either removal of the entry or a call to
1496  * ocfs2_xa_add_namevalue().
1497  */
1498 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1499 {
1500 	loc->xl_ops->xlo_wipe_namevalue(loc);
1501 }
1502 
1503 /*
1504  * Find lowest offset to a name+value pair.  This is the start of our
1505  * downward-growing free space.
1506  */
1507 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1508 {
1509 	return loc->xl_ops->xlo_get_free_start(loc);
1510 }
1511 
1512 /* Can we reuse loc->xl_entry for xi? */
1513 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1514 				    struct ocfs2_xattr_info *xi)
1515 {
1516 	return loc->xl_ops->xlo_can_reuse(loc, xi);
1517 }
1518 
1519 /* How much free space is needed to set the new value */
1520 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1521 				struct ocfs2_xattr_info *xi)
1522 {
1523 	return loc->xl_ops->xlo_check_space(loc, xi);
1524 }
1525 
1526 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1527 {
1528 	loc->xl_ops->xlo_add_entry(loc, name_hash);
1529 	loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1530 	/*
1531 	 * We can't leave the new entry's xe_name_offset at zero or
1532 	 * add_namevalue() will go nuts.  We set it to the size of our
1533 	 * storage so that it can never be less than any other entry.
1534 	 */
1535 	loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1536 }
1537 
1538 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1539 				   struct ocfs2_xattr_info *xi)
1540 {
1541 	int size = namevalue_size_xi(xi);
1542 	int nameval_offset;
1543 	char *nameval_buf;
1544 
1545 	loc->xl_ops->xlo_add_namevalue(loc, size);
1546 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1547 	loc->xl_entry->xe_name_len = xi->xi_name_len;
1548 	ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1549 	ocfs2_xattr_set_local(loc->xl_entry,
1550 			      xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1551 
1552 	nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1553 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1554 	memset(nameval_buf, 0, size);
1555 	memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1556 }
1557 
1558 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1559 				    struct ocfs2_xattr_value_buf *vb)
1560 {
1561 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1562 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1563 
1564 	/* Value bufs are for value trees */
1565 	BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1566 	BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1567 	       (name_size + OCFS2_XATTR_ROOT_SIZE));
1568 
1569 	loc->xl_ops->xlo_fill_value_buf(loc, vb);
1570 	vb->vb_xv =
1571 		(struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1572 							nameval_offset +
1573 							name_size);
1574 }
1575 
1576 static int ocfs2_xa_block_journal_access(handle_t *handle,
1577 					 struct ocfs2_xa_loc *loc, int type)
1578 {
1579 	struct buffer_head *bh = loc->xl_storage;
1580 	ocfs2_journal_access_func access;
1581 
1582 	if (loc->xl_size == (bh->b_size -
1583 			     offsetof(struct ocfs2_xattr_block,
1584 				      xb_attrs.xb_header)))
1585 		access = ocfs2_journal_access_xb;
1586 	else
1587 		access = ocfs2_journal_access_di;
1588 	return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1589 }
1590 
1591 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1592 					 struct ocfs2_xa_loc *loc)
1593 {
1594 	struct buffer_head *bh = loc->xl_storage;
1595 
1596 	ocfs2_journal_dirty(handle, bh);
1597 }
1598 
1599 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1600 					   int offset)
1601 {
1602 	return (char *)loc->xl_header + offset;
1603 }
1604 
1605 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1606 				    struct ocfs2_xattr_info *xi)
1607 {
1608 	/*
1609 	 * Block storage is strict.  If the sizes aren't exact, we will
1610 	 * remove the old one and reinsert the new.
1611 	 */
1612 	return namevalue_size_xe(loc->xl_entry) ==
1613 		namevalue_size_xi(xi);
1614 }
1615 
1616 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1617 {
1618 	struct ocfs2_xattr_header *xh = loc->xl_header;
1619 	int i, count = le16_to_cpu(xh->xh_count);
1620 	int offset, free_start = loc->xl_size;
1621 
1622 	for (i = 0; i < count; i++) {
1623 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1624 		if (offset < free_start)
1625 			free_start = offset;
1626 	}
1627 
1628 	return free_start;
1629 }
1630 
1631 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1632 				      struct ocfs2_xattr_info *xi)
1633 {
1634 	int count = le16_to_cpu(loc->xl_header->xh_count);
1635 	int free_start = ocfs2_xa_get_free_start(loc);
1636 	int needed_space = ocfs2_xi_entry_usage(xi);
1637 
1638 	/*
1639 	 * Block storage will reclaim the original entry before inserting
1640 	 * the new value, so we only need the difference.  If the new
1641 	 * entry is smaller than the old one, we don't need anything.
1642 	 */
1643 	if (loc->xl_entry) {
1644 		/* Don't need space if we're reusing! */
1645 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1646 			needed_space = 0;
1647 		else
1648 			needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1649 	}
1650 	if (needed_space < 0)
1651 		needed_space = 0;
1652 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1653 }
1654 
1655 /*
1656  * Block storage for xattrs keeps the name+value pairs compacted.  When
1657  * we remove one, we have to shift any that preceded it towards the end.
1658  */
1659 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1660 {
1661 	int i, offset;
1662 	int namevalue_offset, first_namevalue_offset, namevalue_size;
1663 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1664 	struct ocfs2_xattr_header *xh = loc->xl_header;
1665 	int count = le16_to_cpu(xh->xh_count);
1666 
1667 	namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1668 	namevalue_size = namevalue_size_xe(entry);
1669 	first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1670 
1671 	/* Shift the name+value pairs */
1672 	memmove((char *)xh + first_namevalue_offset + namevalue_size,
1673 		(char *)xh + first_namevalue_offset,
1674 		namevalue_offset - first_namevalue_offset);
1675 	memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1676 
1677 	/* Now tell xh->xh_entries about it */
1678 	for (i = 0; i < count; i++) {
1679 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1680 		if (offset <= namevalue_offset)
1681 			le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1682 				     namevalue_size);
1683 	}
1684 
1685 	/*
1686 	 * Note that we don't update xh_free_start or xh_name_value_len
1687 	 * because they're not used in block-stored xattrs.
1688 	 */
1689 }
1690 
1691 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1692 {
1693 	int count = le16_to_cpu(loc->xl_header->xh_count);
1694 	loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1695 	le16_add_cpu(&loc->xl_header->xh_count, 1);
1696 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1697 }
1698 
1699 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1700 {
1701 	int free_start = ocfs2_xa_get_free_start(loc);
1702 
1703 	loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1704 }
1705 
1706 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1707 					  struct ocfs2_xattr_value_buf *vb)
1708 {
1709 	struct buffer_head *bh = loc->xl_storage;
1710 
1711 	if (loc->xl_size == (bh->b_size -
1712 			     offsetof(struct ocfs2_xattr_block,
1713 				      xb_attrs.xb_header)))
1714 		vb->vb_access = ocfs2_journal_access_xb;
1715 	else
1716 		vb->vb_access = ocfs2_journal_access_di;
1717 	vb->vb_bh = bh;
1718 }
1719 
1720 /*
1721  * Operations for xattrs stored in blocks.  This includes inline inode
1722  * storage and unindexed ocfs2_xattr_blocks.
1723  */
1724 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1725 	.xlo_journal_access	= ocfs2_xa_block_journal_access,
1726 	.xlo_journal_dirty	= ocfs2_xa_block_journal_dirty,
1727 	.xlo_offset_pointer	= ocfs2_xa_block_offset_pointer,
1728 	.xlo_check_space	= ocfs2_xa_block_check_space,
1729 	.xlo_can_reuse		= ocfs2_xa_block_can_reuse,
1730 	.xlo_get_free_start	= ocfs2_xa_block_get_free_start,
1731 	.xlo_wipe_namevalue	= ocfs2_xa_block_wipe_namevalue,
1732 	.xlo_add_entry		= ocfs2_xa_block_add_entry,
1733 	.xlo_add_namevalue	= ocfs2_xa_block_add_namevalue,
1734 	.xlo_fill_value_buf	= ocfs2_xa_block_fill_value_buf,
1735 };
1736 
1737 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1738 					  struct ocfs2_xa_loc *loc, int type)
1739 {
1740 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1741 
1742 	return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1743 }
1744 
1745 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1746 					  struct ocfs2_xa_loc *loc)
1747 {
1748 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1749 
1750 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1751 }
1752 
1753 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1754 					    int offset)
1755 {
1756 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1757 	int block, block_offset;
1758 
1759 	/* The header is at the front of the bucket */
1760 	block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1761 	block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1762 
1763 	return bucket_block(bucket, block) + block_offset;
1764 }
1765 
1766 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1767 				     struct ocfs2_xattr_info *xi)
1768 {
1769 	return namevalue_size_xe(loc->xl_entry) >=
1770 		namevalue_size_xi(xi);
1771 }
1772 
1773 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1774 {
1775 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1776 	return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1777 }
1778 
1779 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1780 					 int free_start, int size)
1781 {
1782 	/*
1783 	 * We need to make sure that the name+value pair fits within
1784 	 * one block.
1785 	 */
1786 	if (((free_start - size) >> sb->s_blocksize_bits) !=
1787 	    ((free_start - 1) >> sb->s_blocksize_bits))
1788 		free_start -= free_start % sb->s_blocksize;
1789 
1790 	return free_start;
1791 }
1792 
1793 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1794 				       struct ocfs2_xattr_info *xi)
1795 {
1796 	int rc;
1797 	int count = le16_to_cpu(loc->xl_header->xh_count);
1798 	int free_start = ocfs2_xa_get_free_start(loc);
1799 	int needed_space = ocfs2_xi_entry_usage(xi);
1800 	int size = namevalue_size_xi(xi);
1801 	struct super_block *sb = loc->xl_inode->i_sb;
1802 
1803 	/*
1804 	 * Bucket storage does not reclaim name+value pairs it cannot
1805 	 * reuse.  They live as holes until the bucket fills, and then
1806 	 * the bucket is defragmented.  However, the bucket can reclaim
1807 	 * the ocfs2_xattr_entry.
1808 	 */
1809 	if (loc->xl_entry) {
1810 		/* Don't need space if we're reusing! */
1811 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1812 			needed_space = 0;
1813 		else
1814 			needed_space -= sizeof(struct ocfs2_xattr_entry);
1815 	}
1816 	BUG_ON(needed_space < 0);
1817 
1818 	if (free_start < size) {
1819 		if (needed_space)
1820 			return -ENOSPC;
1821 	} else {
1822 		/*
1823 		 * First we check if it would fit in the first place.
1824 		 * Below, we align the free start to a block.  This may
1825 		 * slide us below the minimum gap.  By checking unaligned
1826 		 * first, we avoid that error.
1827 		 */
1828 		rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1829 						 count);
1830 		if (rc)
1831 			return rc;
1832 		free_start = ocfs2_bucket_align_free_start(sb, free_start,
1833 							   size);
1834 	}
1835 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1836 }
1837 
1838 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1839 {
1840 	le16_add_cpu(&loc->xl_header->xh_name_value_len,
1841 		     -namevalue_size_xe(loc->xl_entry));
1842 }
1843 
1844 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1845 {
1846 	struct ocfs2_xattr_header *xh = loc->xl_header;
1847 	int count = le16_to_cpu(xh->xh_count);
1848 	int low = 0, high = count - 1, tmp;
1849 	struct ocfs2_xattr_entry *tmp_xe;
1850 
1851 	/*
1852 	 * We keep buckets sorted by name_hash, so we need to find
1853 	 * our insert place.
1854 	 */
1855 	while (low <= high && count) {
1856 		tmp = (low + high) / 2;
1857 		tmp_xe = &xh->xh_entries[tmp];
1858 
1859 		if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1860 			low = tmp + 1;
1861 		else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1862 			high = tmp - 1;
1863 		else {
1864 			low = tmp;
1865 			break;
1866 		}
1867 	}
1868 
1869 	if (low != count)
1870 		memmove(&xh->xh_entries[low + 1],
1871 			&xh->xh_entries[low],
1872 			((count - low) * sizeof(struct ocfs2_xattr_entry)));
1873 
1874 	le16_add_cpu(&xh->xh_count, 1);
1875 	loc->xl_entry = &xh->xh_entries[low];
1876 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1877 }
1878 
1879 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1880 {
1881 	int free_start = ocfs2_xa_get_free_start(loc);
1882 	struct ocfs2_xattr_header *xh = loc->xl_header;
1883 	struct super_block *sb = loc->xl_inode->i_sb;
1884 	int nameval_offset;
1885 
1886 	free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1887 	nameval_offset = free_start - size;
1888 	loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1889 	xh->xh_free_start = cpu_to_le16(nameval_offset);
1890 	le16_add_cpu(&xh->xh_name_value_len, size);
1891 
1892 }
1893 
1894 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1895 					   struct ocfs2_xattr_value_buf *vb)
1896 {
1897 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1898 	struct super_block *sb = loc->xl_inode->i_sb;
1899 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1900 	int size = namevalue_size_xe(loc->xl_entry);
1901 	int block_offset = nameval_offset >> sb->s_blocksize_bits;
1902 
1903 	/* Values are not allowed to straddle block boundaries */
1904 	BUG_ON(block_offset !=
1905 	       ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1906 	/* We expect the bucket to be filled in */
1907 	BUG_ON(!bucket->bu_bhs[block_offset]);
1908 
1909 	vb->vb_access = ocfs2_journal_access;
1910 	vb->vb_bh = bucket->bu_bhs[block_offset];
1911 }
1912 
1913 /* Operations for xattrs stored in buckets. */
1914 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1915 	.xlo_journal_access	= ocfs2_xa_bucket_journal_access,
1916 	.xlo_journal_dirty	= ocfs2_xa_bucket_journal_dirty,
1917 	.xlo_offset_pointer	= ocfs2_xa_bucket_offset_pointer,
1918 	.xlo_check_space	= ocfs2_xa_bucket_check_space,
1919 	.xlo_can_reuse		= ocfs2_xa_bucket_can_reuse,
1920 	.xlo_get_free_start	= ocfs2_xa_bucket_get_free_start,
1921 	.xlo_wipe_namevalue	= ocfs2_xa_bucket_wipe_namevalue,
1922 	.xlo_add_entry		= ocfs2_xa_bucket_add_entry,
1923 	.xlo_add_namevalue	= ocfs2_xa_bucket_add_namevalue,
1924 	.xlo_fill_value_buf	= ocfs2_xa_bucket_fill_value_buf,
1925 };
1926 
1927 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1928 {
1929 	struct ocfs2_xattr_value_buf vb;
1930 
1931 	if (ocfs2_xattr_is_local(loc->xl_entry))
1932 		return 0;
1933 
1934 	ocfs2_xa_fill_value_buf(loc, &vb);
1935 	return le32_to_cpu(vb.vb_xv->xr_clusters);
1936 }
1937 
1938 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1939 				   struct ocfs2_xattr_set_ctxt *ctxt)
1940 {
1941 	int trunc_rc, access_rc;
1942 	struct ocfs2_xattr_value_buf vb;
1943 
1944 	ocfs2_xa_fill_value_buf(loc, &vb);
1945 	trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1946 					      ctxt);
1947 
1948 	/*
1949 	 * The caller of ocfs2_xa_value_truncate() has already called
1950 	 * ocfs2_xa_journal_access on the loc.  However, The truncate code
1951 	 * calls ocfs2_extend_trans().  This may commit the previous
1952 	 * transaction and open a new one.  If this is a bucket, truncate
1953 	 * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1954 	 * the caller is expecting to dirty the entire bucket.  So we must
1955 	 * reset the journal work.  We do this even if truncate has failed,
1956 	 * as it could have failed after committing the extend.
1957 	 */
1958 	access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1959 					    OCFS2_JOURNAL_ACCESS_WRITE);
1960 
1961 	/* Errors in truncate take precedence */
1962 	return trunc_rc ? trunc_rc : access_rc;
1963 }
1964 
1965 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1966 {
1967 	int index, count;
1968 	struct ocfs2_xattr_header *xh = loc->xl_header;
1969 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1970 
1971 	ocfs2_xa_wipe_namevalue(loc);
1972 	loc->xl_entry = NULL;
1973 
1974 	count = le16_to_cpu(xh->xh_count) - 1;
1975 
1976 	/*
1977 	 * Only zero out the entry if there are more remaining.  This is
1978 	 * important for an empty bucket, as it keeps track of the
1979 	 * bucket's hash value.  It doesn't hurt empty block storage.
1980 	 */
1981 	if (count) {
1982 		index = ((char *)entry - (char *)&xh->xh_entries) /
1983 			sizeof(struct ocfs2_xattr_entry);
1984 		memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1985 			(count - index) * sizeof(struct ocfs2_xattr_entry));
1986 		memset(&xh->xh_entries[count], 0,
1987 		       sizeof(struct ocfs2_xattr_entry));
1988 	}
1989 
1990 	xh->xh_count = cpu_to_le16(count);
1991 }
1992 
1993 /*
1994  * If we have a problem adjusting the size of an external value during
1995  * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1996  * in an intermediate state.  For example, the value may be partially
1997  * truncated.
1998  *
1999  * If the value tree hasn't changed, the extend/truncate went nowhere.
2000  * We have nothing to do.  The caller can treat it as a straight error.
2001  *
2002  * If the value tree got partially truncated, we now have a corrupted
2003  * extended attribute.  We're going to wipe its entry and leak the
2004  * clusters.  Better to leak some storage than leave a corrupt entry.
2005  *
2006  * If the value tree grew, it obviously didn't grow enough for the
2007  * new entry.  We're not going to try and reclaim those clusters either.
2008  * If there was already an external value there (orig_clusters != 0),
2009  * the new clusters are attached safely and we can just leave the old
2010  * value in place.  If there was no external value there, we remove
2011  * the entry.
2012  *
2013  * This way, the xattr block we store in the journal will be consistent.
2014  * If the size change broke because of the journal, no changes will hit
2015  * disk anyway.
2016  */
2017 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
2018 					    const char *what,
2019 					    unsigned int orig_clusters)
2020 {
2021 	unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
2022 	char *nameval_buf = ocfs2_xa_offset_pointer(loc,
2023 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2024 
2025 	if (new_clusters < orig_clusters) {
2026 		mlog(ML_ERROR,
2027 		     "Partial truncate while %s xattr %.*s.  Leaking "
2028 		     "%u clusters and removing the entry\n",
2029 		     what, loc->xl_entry->xe_name_len, nameval_buf,
2030 		     orig_clusters - new_clusters);
2031 		ocfs2_xa_remove_entry(loc);
2032 	} else if (!orig_clusters) {
2033 		mlog(ML_ERROR,
2034 		     "Unable to allocate an external value for xattr "
2035 		     "%.*s safely.  Leaking %u clusters and removing the "
2036 		     "entry\n",
2037 		     loc->xl_entry->xe_name_len, nameval_buf,
2038 		     new_clusters - orig_clusters);
2039 		ocfs2_xa_remove_entry(loc);
2040 	} else if (new_clusters > orig_clusters)
2041 		mlog(ML_ERROR,
2042 		     "Unable to grow xattr %.*s safely.  %u new clusters "
2043 		     "have been added, but the value will not be "
2044 		     "modified\n",
2045 		     loc->xl_entry->xe_name_len, nameval_buf,
2046 		     new_clusters - orig_clusters);
2047 }
2048 
2049 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
2050 			   struct ocfs2_xattr_set_ctxt *ctxt)
2051 {
2052 	int rc = 0;
2053 	unsigned int orig_clusters;
2054 
2055 	if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2056 		orig_clusters = ocfs2_xa_value_clusters(loc);
2057 		rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2058 		if (rc) {
2059 			mlog_errno(rc);
2060 			/*
2061 			 * Since this is remove, we can return 0 if
2062 			 * ocfs2_xa_cleanup_value_truncate() is going to
2063 			 * wipe the entry anyway.  So we check the
2064 			 * cluster count as well.
2065 			 */
2066 			if (orig_clusters != ocfs2_xa_value_clusters(loc))
2067 				rc = 0;
2068 			ocfs2_xa_cleanup_value_truncate(loc, "removing",
2069 							orig_clusters);
2070 			goto out;
2071 		}
2072 	}
2073 
2074 	ocfs2_xa_remove_entry(loc);
2075 
2076 out:
2077 	return rc;
2078 }
2079 
2080 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2081 {
2082 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2083 	char *nameval_buf;
2084 
2085 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2086 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2087 	memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2088 }
2089 
2090 /*
2091  * Take an existing entry and make it ready for the new value.  This
2092  * won't allocate space, but it may free space.  It should be ready for
2093  * ocfs2_xa_prepare_entry() to finish the work.
2094  */
2095 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2096 				struct ocfs2_xattr_info *xi,
2097 				struct ocfs2_xattr_set_ctxt *ctxt)
2098 {
2099 	int rc = 0;
2100 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2101 	unsigned int orig_clusters;
2102 	char *nameval_buf;
2103 	int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2104 	int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2105 
2106 	BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2107 	       name_size);
2108 
2109 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2110 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2111 	if (xe_local) {
2112 		memset(nameval_buf + name_size, 0,
2113 		       namevalue_size_xe(loc->xl_entry) - name_size);
2114 		if (!xi_local)
2115 			ocfs2_xa_install_value_root(loc);
2116 	} else {
2117 		orig_clusters = ocfs2_xa_value_clusters(loc);
2118 		if (xi_local) {
2119 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2120 			if (rc < 0)
2121 				mlog_errno(rc);
2122 			else
2123 				memset(nameval_buf + name_size, 0,
2124 				       namevalue_size_xe(loc->xl_entry) -
2125 				       name_size);
2126 		} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2127 			   xi->xi_value_len) {
2128 			rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2129 						     ctxt);
2130 			if (rc < 0)
2131 				mlog_errno(rc);
2132 		}
2133 
2134 		if (rc) {
2135 			ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2136 							orig_clusters);
2137 			goto out;
2138 		}
2139 	}
2140 
2141 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2142 	ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2143 
2144 out:
2145 	return rc;
2146 }
2147 
2148 /*
2149  * Prepares loc->xl_entry to receive the new xattr.  This includes
2150  * properly setting up the name+value pair region.  If loc->xl_entry
2151  * already exists, it will take care of modifying it appropriately.
2152  *
2153  * Note that this modifies the data.  You did journal_access already,
2154  * right?
2155  */
2156 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2157 				  struct ocfs2_xattr_info *xi,
2158 				  u32 name_hash,
2159 				  struct ocfs2_xattr_set_ctxt *ctxt)
2160 {
2161 	int rc = 0;
2162 	unsigned int orig_clusters;
2163 	__le64 orig_value_size = 0;
2164 
2165 	rc = ocfs2_xa_check_space(loc, xi);
2166 	if (rc)
2167 		goto out;
2168 
2169 	if (loc->xl_entry) {
2170 		if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2171 			orig_value_size = loc->xl_entry->xe_value_size;
2172 			rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2173 			if (rc)
2174 				goto out;
2175 			goto alloc_value;
2176 		}
2177 
2178 		if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2179 			orig_clusters = ocfs2_xa_value_clusters(loc);
2180 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2181 			if (rc) {
2182 				mlog_errno(rc);
2183 				ocfs2_xa_cleanup_value_truncate(loc,
2184 								"overwriting",
2185 								orig_clusters);
2186 				goto out;
2187 			}
2188 		}
2189 		ocfs2_xa_wipe_namevalue(loc);
2190 	} else
2191 		ocfs2_xa_add_entry(loc, name_hash);
2192 
2193 	/*
2194 	 * If we get here, we have a blank entry.  Fill it.  We grow our
2195 	 * name+value pair back from the end.
2196 	 */
2197 	ocfs2_xa_add_namevalue(loc, xi);
2198 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2199 		ocfs2_xa_install_value_root(loc);
2200 
2201 alloc_value:
2202 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2203 		orig_clusters = ocfs2_xa_value_clusters(loc);
2204 		rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2205 		if (rc < 0) {
2206 			ctxt->set_abort = 1;
2207 			ocfs2_xa_cleanup_value_truncate(loc, "growing",
2208 							orig_clusters);
2209 			/*
2210 			 * If we were growing an existing value,
2211 			 * ocfs2_xa_cleanup_value_truncate() won't remove
2212 			 * the entry. We need to restore the original value
2213 			 * size.
2214 			 */
2215 			if (loc->xl_entry) {
2216 				BUG_ON(!orig_value_size);
2217 				loc->xl_entry->xe_value_size = orig_value_size;
2218 			}
2219 			mlog_errno(rc);
2220 		}
2221 	}
2222 
2223 out:
2224 	return rc;
2225 }
2226 
2227 /*
2228  * Store the value portion of the name+value pair.  This will skip
2229  * values that are stored externally.  Their tree roots were set up
2230  * by ocfs2_xa_prepare_entry().
2231  */
2232 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2233 				struct ocfs2_xattr_info *xi,
2234 				struct ocfs2_xattr_set_ctxt *ctxt)
2235 {
2236 	int rc = 0;
2237 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2238 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2239 	char *nameval_buf;
2240 	struct ocfs2_xattr_value_buf vb;
2241 
2242 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2243 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2244 		ocfs2_xa_fill_value_buf(loc, &vb);
2245 		rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2246 						     ctxt->handle, &vb,
2247 						     xi->xi_value,
2248 						     xi->xi_value_len);
2249 	} else
2250 		memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2251 
2252 	return rc;
2253 }
2254 
2255 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2256 			struct ocfs2_xattr_info *xi,
2257 			struct ocfs2_xattr_set_ctxt *ctxt)
2258 {
2259 	int ret;
2260 	u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2261 					      xi->xi_name_len);
2262 
2263 	ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2264 				      OCFS2_JOURNAL_ACCESS_WRITE);
2265 	if (ret) {
2266 		mlog_errno(ret);
2267 		goto out;
2268 	}
2269 
2270 	/*
2271 	 * From here on out, everything is going to modify the buffer a
2272 	 * little.  Errors are going to leave the xattr header in a
2273 	 * sane state.  Thus, even with errors we dirty the sucker.
2274 	 */
2275 
2276 	/* Don't worry, we are never called with !xi_value and !xl_entry */
2277 	if (!xi->xi_value) {
2278 		ret = ocfs2_xa_remove(loc, ctxt);
2279 		goto out_dirty;
2280 	}
2281 
2282 	ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2283 	if (ret) {
2284 		if (ret != -ENOSPC)
2285 			mlog_errno(ret);
2286 		goto out_dirty;
2287 	}
2288 
2289 	ret = ocfs2_xa_store_value(loc, xi, ctxt);
2290 	if (ret)
2291 		mlog_errno(ret);
2292 
2293 out_dirty:
2294 	ocfs2_xa_journal_dirty(ctxt->handle, loc);
2295 
2296 out:
2297 	return ret;
2298 }
2299 
2300 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2301 				     struct inode *inode,
2302 				     struct buffer_head *bh,
2303 				     struct ocfs2_xattr_entry *entry)
2304 {
2305 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2306 
2307 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2308 
2309 	loc->xl_inode = inode;
2310 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2311 	loc->xl_storage = bh;
2312 	loc->xl_entry = entry;
2313 	loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2314 	loc->xl_header =
2315 		(struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2316 					      loc->xl_size);
2317 }
2318 
2319 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2320 					  struct inode *inode,
2321 					  struct buffer_head *bh,
2322 					  struct ocfs2_xattr_entry *entry)
2323 {
2324 	struct ocfs2_xattr_block *xb =
2325 		(struct ocfs2_xattr_block *)bh->b_data;
2326 
2327 	BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2328 
2329 	loc->xl_inode = inode;
2330 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2331 	loc->xl_storage = bh;
2332 	loc->xl_header = &(xb->xb_attrs.xb_header);
2333 	loc->xl_entry = entry;
2334 	loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2335 					     xb_attrs.xb_header);
2336 }
2337 
2338 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2339 					   struct ocfs2_xattr_bucket *bucket,
2340 					   struct ocfs2_xattr_entry *entry)
2341 {
2342 	loc->xl_inode = bucket->bu_inode;
2343 	loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2344 	loc->xl_storage = bucket;
2345 	loc->xl_header = bucket_xh(bucket);
2346 	loc->xl_entry = entry;
2347 	loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2348 }
2349 
2350 /*
2351  * In xattr remove, if it is stored outside and refcounted, we may have
2352  * the chance to split the refcount tree. So need the allocators.
2353  */
2354 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2355 					struct ocfs2_xattr_value_root *xv,
2356 					struct ocfs2_caching_info *ref_ci,
2357 					struct buffer_head *ref_root_bh,
2358 					struct ocfs2_alloc_context **meta_ac,
2359 					int *ref_credits)
2360 {
2361 	int ret, meta_add = 0;
2362 	u32 p_cluster, num_clusters;
2363 	unsigned int ext_flags;
2364 
2365 	*ref_credits = 0;
2366 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2367 				       &num_clusters,
2368 				       &xv->xr_list,
2369 				       &ext_flags);
2370 	if (ret) {
2371 		mlog_errno(ret);
2372 		goto out;
2373 	}
2374 
2375 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2376 		goto out;
2377 
2378 	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2379 						 ref_root_bh, xv,
2380 						 &meta_add, ref_credits);
2381 	if (ret) {
2382 		mlog_errno(ret);
2383 		goto out;
2384 	}
2385 
2386 	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2387 						meta_add, meta_ac);
2388 	if (ret)
2389 		mlog_errno(ret);
2390 
2391 out:
2392 	return ret;
2393 }
2394 
2395 static int ocfs2_remove_value_outside(struct inode*inode,
2396 				      struct ocfs2_xattr_value_buf *vb,
2397 				      struct ocfs2_xattr_header *header,
2398 				      struct ocfs2_caching_info *ref_ci,
2399 				      struct buffer_head *ref_root_bh)
2400 {
2401 	int ret = 0, i, ref_credits;
2402 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2403 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2404 	void *val;
2405 
2406 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2407 
2408 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2409 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2410 
2411 		if (ocfs2_xattr_is_local(entry))
2412 			continue;
2413 
2414 		val = (void *)header +
2415 			le16_to_cpu(entry->xe_name_offset);
2416 		vb->vb_xv = (struct ocfs2_xattr_value_root *)
2417 			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2418 
2419 		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2420 							 ref_ci, ref_root_bh,
2421 							 &ctxt.meta_ac,
2422 							 &ref_credits);
2423 
2424 		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2425 					ocfs2_remove_extent_credits(osb->sb));
2426 		if (IS_ERR(ctxt.handle)) {
2427 			ret = PTR_ERR(ctxt.handle);
2428 			mlog_errno(ret);
2429 			break;
2430 		}
2431 
2432 		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2433 
2434 		ocfs2_commit_trans(osb, ctxt.handle);
2435 		if (ctxt.meta_ac) {
2436 			ocfs2_free_alloc_context(ctxt.meta_ac);
2437 			ctxt.meta_ac = NULL;
2438 		}
2439 
2440 		if (ret < 0) {
2441 			mlog_errno(ret);
2442 			break;
2443 		}
2444 
2445 	}
2446 
2447 	if (ctxt.meta_ac)
2448 		ocfs2_free_alloc_context(ctxt.meta_ac);
2449 	ocfs2_schedule_truncate_log_flush(osb, 1);
2450 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
2451 	return ret;
2452 }
2453 
2454 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2455 				    struct buffer_head *di_bh,
2456 				    struct ocfs2_caching_info *ref_ci,
2457 				    struct buffer_head *ref_root_bh)
2458 {
2459 
2460 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2461 	struct ocfs2_xattr_header *header;
2462 	int ret;
2463 	struct ocfs2_xattr_value_buf vb = {
2464 		.vb_bh = di_bh,
2465 		.vb_access = ocfs2_journal_access_di,
2466 	};
2467 
2468 	header = (struct ocfs2_xattr_header *)
2469 		 ((void *)di + inode->i_sb->s_blocksize -
2470 		 le16_to_cpu(di->i_xattr_inline_size));
2471 
2472 	ret = ocfs2_remove_value_outside(inode, &vb, header,
2473 					 ref_ci, ref_root_bh);
2474 
2475 	return ret;
2476 }
2477 
2478 struct ocfs2_rm_xattr_bucket_para {
2479 	struct ocfs2_caching_info *ref_ci;
2480 	struct buffer_head *ref_root_bh;
2481 };
2482 
2483 static int ocfs2_xattr_block_remove(struct inode *inode,
2484 				    struct buffer_head *blk_bh,
2485 				    struct ocfs2_caching_info *ref_ci,
2486 				    struct buffer_head *ref_root_bh)
2487 {
2488 	struct ocfs2_xattr_block *xb;
2489 	int ret = 0;
2490 	struct ocfs2_xattr_value_buf vb = {
2491 		.vb_bh = blk_bh,
2492 		.vb_access = ocfs2_journal_access_xb,
2493 	};
2494 	struct ocfs2_rm_xattr_bucket_para args = {
2495 		.ref_ci = ref_ci,
2496 		.ref_root_bh = ref_root_bh,
2497 	};
2498 
2499 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2500 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2501 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2502 		ret = ocfs2_remove_value_outside(inode, &vb, header,
2503 						 ref_ci, ref_root_bh);
2504 	} else
2505 		ret = ocfs2_iterate_xattr_index_block(inode,
2506 						blk_bh,
2507 						ocfs2_rm_xattr_cluster,
2508 						&args);
2509 
2510 	return ret;
2511 }
2512 
2513 static int ocfs2_xattr_free_block(struct inode *inode,
2514 				  u64 block,
2515 				  struct ocfs2_caching_info *ref_ci,
2516 				  struct buffer_head *ref_root_bh)
2517 {
2518 	struct inode *xb_alloc_inode;
2519 	struct buffer_head *xb_alloc_bh = NULL;
2520 	struct buffer_head *blk_bh = NULL;
2521 	struct ocfs2_xattr_block *xb;
2522 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2523 	handle_t *handle;
2524 	int ret = 0;
2525 	u64 blk, bg_blkno;
2526 	u16 bit;
2527 
2528 	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2529 	if (ret < 0) {
2530 		mlog_errno(ret);
2531 		goto out;
2532 	}
2533 
2534 	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2535 	if (ret < 0) {
2536 		mlog_errno(ret);
2537 		goto out;
2538 	}
2539 
2540 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2541 	blk = le64_to_cpu(xb->xb_blkno);
2542 	bit = le16_to_cpu(xb->xb_suballoc_bit);
2543 	if (xb->xb_suballoc_loc)
2544 		bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2545 	else
2546 		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2547 
2548 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2549 				EXTENT_ALLOC_SYSTEM_INODE,
2550 				le16_to_cpu(xb->xb_suballoc_slot));
2551 	if (!xb_alloc_inode) {
2552 		ret = -ENOMEM;
2553 		mlog_errno(ret);
2554 		goto out;
2555 	}
2556 	inode_lock(xb_alloc_inode);
2557 
2558 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2559 	if (ret < 0) {
2560 		mlog_errno(ret);
2561 		goto out_mutex;
2562 	}
2563 
2564 	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2565 	if (IS_ERR(handle)) {
2566 		ret = PTR_ERR(handle);
2567 		mlog_errno(ret);
2568 		goto out_unlock;
2569 	}
2570 
2571 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2572 				       bit, bg_blkno, 1);
2573 	if (ret < 0)
2574 		mlog_errno(ret);
2575 
2576 	ocfs2_commit_trans(osb, handle);
2577 out_unlock:
2578 	ocfs2_inode_unlock(xb_alloc_inode, 1);
2579 	brelse(xb_alloc_bh);
2580 out_mutex:
2581 	inode_unlock(xb_alloc_inode);
2582 	iput(xb_alloc_inode);
2583 out:
2584 	brelse(blk_bh);
2585 	return ret;
2586 }
2587 
2588 /*
2589  * ocfs2_xattr_remove()
2590  *
2591  * Free extended attribute resources associated with this inode.
2592  */
2593 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2594 {
2595 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2596 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2597 	struct ocfs2_refcount_tree *ref_tree = NULL;
2598 	struct buffer_head *ref_root_bh = NULL;
2599 	struct ocfs2_caching_info *ref_ci = NULL;
2600 	handle_t *handle;
2601 	int ret;
2602 
2603 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2604 		return 0;
2605 
2606 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2607 		return 0;
2608 
2609 	if (ocfs2_is_refcount_inode(inode)) {
2610 		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2611 					       le64_to_cpu(di->i_refcount_loc),
2612 					       1, &ref_tree, &ref_root_bh);
2613 		if (ret) {
2614 			mlog_errno(ret);
2615 			goto out;
2616 		}
2617 		ref_ci = &ref_tree->rf_ci;
2618 
2619 	}
2620 
2621 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2622 		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2623 					       ref_ci, ref_root_bh);
2624 		if (ret < 0) {
2625 			mlog_errno(ret);
2626 			goto out;
2627 		}
2628 	}
2629 
2630 	if (di->i_xattr_loc) {
2631 		ret = ocfs2_xattr_free_block(inode,
2632 					     le64_to_cpu(di->i_xattr_loc),
2633 					     ref_ci, ref_root_bh);
2634 		if (ret < 0) {
2635 			mlog_errno(ret);
2636 			goto out;
2637 		}
2638 	}
2639 
2640 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2641 				   OCFS2_INODE_UPDATE_CREDITS);
2642 	if (IS_ERR(handle)) {
2643 		ret = PTR_ERR(handle);
2644 		mlog_errno(ret);
2645 		goto out;
2646 	}
2647 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2648 				      OCFS2_JOURNAL_ACCESS_WRITE);
2649 	if (ret) {
2650 		mlog_errno(ret);
2651 		goto out_commit;
2652 	}
2653 
2654 	di->i_xattr_loc = 0;
2655 
2656 	spin_lock(&oi->ip_lock);
2657 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2658 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2659 	spin_unlock(&oi->ip_lock);
2660 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
2661 
2662 	ocfs2_journal_dirty(handle, di_bh);
2663 out_commit:
2664 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2665 out:
2666 	if (ref_tree)
2667 		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2668 	brelse(ref_root_bh);
2669 	return ret;
2670 }
2671 
2672 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2673 					struct ocfs2_dinode *di)
2674 {
2675 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2676 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2677 	int free;
2678 
2679 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2680 		return 0;
2681 
2682 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2683 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2684 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2685 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
2686 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
2687 			le64_to_cpu(di->i_size);
2688 	} else {
2689 		struct ocfs2_extent_list *el = &di->id2.i_list;
2690 		free = (le16_to_cpu(el->l_count) -
2691 			le16_to_cpu(el->l_next_free_rec)) *
2692 			sizeof(struct ocfs2_extent_rec);
2693 	}
2694 	if (free >= xattrsize)
2695 		return 1;
2696 
2697 	return 0;
2698 }
2699 
2700 /*
2701  * ocfs2_xattr_ibody_find()
2702  *
2703  * Find extended attribute in inode block and
2704  * fill search info into struct ocfs2_xattr_search.
2705  */
2706 static int ocfs2_xattr_ibody_find(struct inode *inode,
2707 				  int name_index,
2708 				  const char *name,
2709 				  struct ocfs2_xattr_search *xs)
2710 {
2711 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2712 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2713 	int ret;
2714 	int has_space = 0;
2715 
2716 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2717 		return 0;
2718 
2719 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2720 		down_read(&oi->ip_alloc_sem);
2721 		has_space = ocfs2_xattr_has_space_inline(inode, di);
2722 		up_read(&oi->ip_alloc_sem);
2723 		if (!has_space)
2724 			return 0;
2725 	}
2726 
2727 	xs->xattr_bh = xs->inode_bh;
2728 	xs->end = (void *)di + inode->i_sb->s_blocksize;
2729 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2730 		xs->header = (struct ocfs2_xattr_header *)
2731 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
2732 	else
2733 		xs->header = (struct ocfs2_xattr_header *)
2734 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2735 	xs->base = (void *)xs->header;
2736 	xs->here = xs->header->xh_entries;
2737 
2738 	/* Find the named attribute. */
2739 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2740 		ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
2741 		if (ret && ret != -ENODATA)
2742 			return ret;
2743 		xs->not_found = ret;
2744 	}
2745 
2746 	return 0;
2747 }
2748 
2749 static int ocfs2_xattr_ibody_init(struct inode *inode,
2750 				  struct buffer_head *di_bh,
2751 				  struct ocfs2_xattr_set_ctxt *ctxt)
2752 {
2753 	int ret;
2754 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2755 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2756 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2757 	unsigned int xattrsize = osb->s_xattr_inline_size;
2758 
2759 	if (!ocfs2_xattr_has_space_inline(inode, di)) {
2760 		ret = -ENOSPC;
2761 		goto out;
2762 	}
2763 
2764 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2765 				      OCFS2_JOURNAL_ACCESS_WRITE);
2766 	if (ret) {
2767 		mlog_errno(ret);
2768 		goto out;
2769 	}
2770 
2771 	/*
2772 	 * Adjust extent record count or inline data size
2773 	 * to reserve space for extended attribute.
2774 	 */
2775 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2776 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2777 		le16_add_cpu(&idata->id_count, -xattrsize);
2778 	} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2779 		struct ocfs2_extent_list *el = &di->id2.i_list;
2780 		le16_add_cpu(&el->l_count, -(xattrsize /
2781 					     sizeof(struct ocfs2_extent_rec)));
2782 	}
2783 	di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2784 
2785 	spin_lock(&oi->ip_lock);
2786 	oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2787 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2788 	spin_unlock(&oi->ip_lock);
2789 
2790 	ocfs2_journal_dirty(ctxt->handle, di_bh);
2791 
2792 out:
2793 	return ret;
2794 }
2795 
2796 /*
2797  * ocfs2_xattr_ibody_set()
2798  *
2799  * Set, replace or remove an extended attribute into inode block.
2800  *
2801  */
2802 static int ocfs2_xattr_ibody_set(struct inode *inode,
2803 				 struct ocfs2_xattr_info *xi,
2804 				 struct ocfs2_xattr_search *xs,
2805 				 struct ocfs2_xattr_set_ctxt *ctxt)
2806 {
2807 	int ret;
2808 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2809 	struct ocfs2_xa_loc loc;
2810 
2811 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2812 		return -ENOSPC;
2813 
2814 	down_write(&oi->ip_alloc_sem);
2815 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2816 		ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2817 		if (ret) {
2818 			if (ret != -ENOSPC)
2819 				mlog_errno(ret);
2820 			goto out;
2821 		}
2822 	}
2823 
2824 	ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2825 				 xs->not_found ? NULL : xs->here);
2826 	ret = ocfs2_xa_set(&loc, xi, ctxt);
2827 	if (ret) {
2828 		if (ret != -ENOSPC)
2829 			mlog_errno(ret);
2830 		goto out;
2831 	}
2832 	xs->here = loc.xl_entry;
2833 
2834 out:
2835 	up_write(&oi->ip_alloc_sem);
2836 
2837 	return ret;
2838 }
2839 
2840 /*
2841  * ocfs2_xattr_block_find()
2842  *
2843  * Find extended attribute in external block and
2844  * fill search info into struct ocfs2_xattr_search.
2845  */
2846 static int ocfs2_xattr_block_find(struct inode *inode,
2847 				  int name_index,
2848 				  const char *name,
2849 				  struct ocfs2_xattr_search *xs)
2850 {
2851 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2852 	struct buffer_head *blk_bh = NULL;
2853 	struct ocfs2_xattr_block *xb;
2854 	int ret = 0;
2855 
2856 	if (!di->i_xattr_loc)
2857 		return ret;
2858 
2859 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2860 				     &blk_bh);
2861 	if (ret < 0) {
2862 		mlog_errno(ret);
2863 		return ret;
2864 	}
2865 
2866 	xs->xattr_bh = blk_bh;
2867 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2868 
2869 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2870 		xs->header = &xb->xb_attrs.xb_header;
2871 		xs->base = (void *)xs->header;
2872 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2873 		xs->here = xs->header->xh_entries;
2874 
2875 		ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
2876 	} else
2877 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2878 						   name_index,
2879 						   name, xs);
2880 
2881 	if (ret && ret != -ENODATA) {
2882 		xs->xattr_bh = NULL;
2883 		goto cleanup;
2884 	}
2885 	xs->not_found = ret;
2886 	return 0;
2887 cleanup:
2888 	brelse(blk_bh);
2889 
2890 	return ret;
2891 }
2892 
2893 static int ocfs2_create_xattr_block(struct inode *inode,
2894 				    struct buffer_head *inode_bh,
2895 				    struct ocfs2_xattr_set_ctxt *ctxt,
2896 				    int indexed,
2897 				    struct buffer_head **ret_bh)
2898 {
2899 	int ret;
2900 	u16 suballoc_bit_start;
2901 	u32 num_got;
2902 	u64 suballoc_loc, first_blkno;
2903 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2904 	struct buffer_head *new_bh = NULL;
2905 	struct ocfs2_xattr_block *xblk;
2906 
2907 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2908 				      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2909 	if (ret < 0) {
2910 		mlog_errno(ret);
2911 		goto end;
2912 	}
2913 
2914 	ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2915 				   &suballoc_loc, &suballoc_bit_start,
2916 				   &num_got, &first_blkno);
2917 	if (ret < 0) {
2918 		mlog_errno(ret);
2919 		goto end;
2920 	}
2921 
2922 	new_bh = sb_getblk(inode->i_sb, first_blkno);
2923 	if (!new_bh) {
2924 		ret = -ENOMEM;
2925 		mlog_errno(ret);
2926 		goto end;
2927 	}
2928 
2929 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2930 
2931 	ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2932 				      new_bh,
2933 				      OCFS2_JOURNAL_ACCESS_CREATE);
2934 	if (ret < 0) {
2935 		mlog_errno(ret);
2936 		goto end;
2937 	}
2938 
2939 	/* Initialize ocfs2_xattr_block */
2940 	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2941 	memset(xblk, 0, inode->i_sb->s_blocksize);
2942 	strscpy(xblk->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE);
2943 	xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2944 	xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2945 	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2946 	xblk->xb_fs_generation =
2947 		cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2948 	xblk->xb_blkno = cpu_to_le64(first_blkno);
2949 	if (indexed) {
2950 		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2951 		xr->xt_clusters = cpu_to_le32(1);
2952 		xr->xt_last_eb_blk = 0;
2953 		xr->xt_list.l_tree_depth = 0;
2954 		xr->xt_list.l_count = cpu_to_le16(
2955 					ocfs2_xattr_recs_per_xb(inode->i_sb));
2956 		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2957 		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2958 	}
2959 	ocfs2_journal_dirty(ctxt->handle, new_bh);
2960 
2961 	/* Add it to the inode */
2962 	di->i_xattr_loc = cpu_to_le64(first_blkno);
2963 
2964 	spin_lock(&OCFS2_I(inode)->ip_lock);
2965 	OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2966 	di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2967 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2968 
2969 	ocfs2_journal_dirty(ctxt->handle, inode_bh);
2970 
2971 	*ret_bh = new_bh;
2972 	new_bh = NULL;
2973 
2974 end:
2975 	brelse(new_bh);
2976 	return ret;
2977 }
2978 
2979 /*
2980  * ocfs2_xattr_block_set()
2981  *
2982  * Set, replace or remove an extended attribute into external block.
2983  *
2984  */
2985 static int ocfs2_xattr_block_set(struct inode *inode,
2986 				 struct ocfs2_xattr_info *xi,
2987 				 struct ocfs2_xattr_search *xs,
2988 				 struct ocfs2_xattr_set_ctxt *ctxt)
2989 {
2990 	struct buffer_head *new_bh = NULL;
2991 	struct ocfs2_xattr_block *xblk = NULL;
2992 	int ret;
2993 	struct ocfs2_xa_loc loc;
2994 
2995 	if (!xs->xattr_bh) {
2996 		ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2997 					       0, &new_bh);
2998 		if (ret) {
2999 			mlog_errno(ret);
3000 			goto end;
3001 		}
3002 
3003 		xs->xattr_bh = new_bh;
3004 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
3005 		xs->header = &xblk->xb_attrs.xb_header;
3006 		xs->base = (void *)xs->header;
3007 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
3008 		xs->here = xs->header->xh_entries;
3009 	} else
3010 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
3011 
3012 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
3013 		ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
3014 					      xs->not_found ? NULL : xs->here);
3015 
3016 		ret = ocfs2_xa_set(&loc, xi, ctxt);
3017 		if (!ret)
3018 			xs->here = loc.xl_entry;
3019 		else if ((ret != -ENOSPC) || ctxt->set_abort)
3020 			goto end;
3021 		else {
3022 			ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
3023 			if (ret)
3024 				goto end;
3025 		}
3026 	}
3027 
3028 	if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
3029 		ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
3030 
3031 end:
3032 	return ret;
3033 }
3034 
3035 /* Check whether the new xattr can be inserted into the inode. */
3036 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
3037 				       struct ocfs2_xattr_info *xi,
3038 				       struct ocfs2_xattr_search *xs)
3039 {
3040 	struct ocfs2_xattr_entry *last;
3041 	int free, i;
3042 	size_t min_offs = xs->end - xs->base;
3043 
3044 	if (!xs->header)
3045 		return 0;
3046 
3047 	last = xs->header->xh_entries;
3048 
3049 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
3050 		size_t offs = le16_to_cpu(last->xe_name_offset);
3051 		if (offs < min_offs)
3052 			min_offs = offs;
3053 		last += 1;
3054 	}
3055 
3056 	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
3057 	if (free < 0)
3058 		return 0;
3059 
3060 	BUG_ON(!xs->not_found);
3061 
3062 	if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3063 		return 1;
3064 
3065 	return 0;
3066 }
3067 
3068 static int ocfs2_calc_xattr_set_need(struct inode *inode,
3069 				     struct ocfs2_dinode *di,
3070 				     struct ocfs2_xattr_info *xi,
3071 				     struct ocfs2_xattr_search *xis,
3072 				     struct ocfs2_xattr_search *xbs,
3073 				     int *clusters_need,
3074 				     int *meta_need,
3075 				     int *credits_need)
3076 {
3077 	int ret = 0, old_in_xb = 0;
3078 	int clusters_add = 0, meta_add = 0, credits = 0;
3079 	struct buffer_head *bh = NULL;
3080 	struct ocfs2_xattr_block *xb = NULL;
3081 	struct ocfs2_xattr_entry *xe = NULL;
3082 	struct ocfs2_xattr_value_root *xv = NULL;
3083 	char *base = NULL;
3084 	int name_offset, name_len = 0;
3085 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3086 						    xi->xi_value_len);
3087 	u64 value_size;
3088 
3089 	/*
3090 	 * Calculate the clusters we need to write.
3091 	 * No matter whether we replace an old one or add a new one,
3092 	 * we need this for writing.
3093 	 */
3094 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3095 		credits += new_clusters *
3096 			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
3097 
3098 	if (xis->not_found && xbs->not_found) {
3099 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3100 
3101 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3102 			clusters_add += new_clusters;
3103 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3104 							&def_xv.xv.xr_list);
3105 		}
3106 
3107 		goto meta_guess;
3108 	}
3109 
3110 	if (!xis->not_found) {
3111 		xe = xis->here;
3112 		name_offset = le16_to_cpu(xe->xe_name_offset);
3113 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3114 		base = xis->base;
3115 		credits += OCFS2_INODE_UPDATE_CREDITS;
3116 	} else {
3117 		int i, block_off = 0;
3118 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3119 		xe = xbs->here;
3120 		name_offset = le16_to_cpu(xe->xe_name_offset);
3121 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3122 		i = xbs->here - xbs->header->xh_entries;
3123 		old_in_xb = 1;
3124 
3125 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3126 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3127 							bucket_xh(xbs->bucket),
3128 							i, &block_off,
3129 							&name_offset);
3130 			base = bucket_block(xbs->bucket, block_off);
3131 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3132 		} else {
3133 			base = xbs->base;
3134 			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3135 		}
3136 	}
3137 
3138 	/*
3139 	 * delete a xattr doesn't need metadata and cluster allocation.
3140 	 * so just calculate the credits and return.
3141 	 *
3142 	 * The credits for removing the value tree will be extended
3143 	 * by ocfs2_remove_extent itself.
3144 	 */
3145 	if (!xi->xi_value) {
3146 		if (!ocfs2_xattr_is_local(xe))
3147 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3148 
3149 		goto out;
3150 	}
3151 
3152 	/* do cluster allocation guess first. */
3153 	value_size = le64_to_cpu(xe->xe_value_size);
3154 
3155 	if (old_in_xb) {
3156 		/*
3157 		 * In xattr set, we always try to set the xe in inode first,
3158 		 * so if it can be inserted into inode successfully, the old
3159 		 * one will be removed from the xattr block, and this xattr
3160 		 * will be inserted into inode as a new xattr in inode.
3161 		 */
3162 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3163 			clusters_add += new_clusters;
3164 			credits += ocfs2_remove_extent_credits(inode->i_sb) +
3165 				    OCFS2_INODE_UPDATE_CREDITS;
3166 			if (!ocfs2_xattr_is_local(xe))
3167 				credits += ocfs2_calc_extend_credits(
3168 							inode->i_sb,
3169 							&def_xv.xv.xr_list);
3170 			goto out;
3171 		}
3172 	}
3173 
3174 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3175 		/* the new values will be stored outside. */
3176 		u32 old_clusters = 0;
3177 
3178 		if (!ocfs2_xattr_is_local(xe)) {
3179 			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
3180 								 value_size);
3181 			xv = (struct ocfs2_xattr_value_root *)
3182 			     (base + name_offset + name_len);
3183 			value_size = OCFS2_XATTR_ROOT_SIZE;
3184 		} else
3185 			xv = &def_xv.xv;
3186 
3187 		if (old_clusters >= new_clusters) {
3188 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3189 			goto out;
3190 		} else {
3191 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3192 			clusters_add += new_clusters - old_clusters;
3193 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3194 							     &xv->xr_list);
3195 			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3196 				goto out;
3197 		}
3198 	} else {
3199 		/*
3200 		 * Now the new value will be stored inside. So if the new
3201 		 * value is smaller than the size of value root or the old
3202 		 * value, we don't need any allocation, otherwise we have
3203 		 * to guess metadata allocation.
3204 		 */
3205 		if ((ocfs2_xattr_is_local(xe) &&
3206 		     (value_size >= xi->xi_value_len)) ||
3207 		    (!ocfs2_xattr_is_local(xe) &&
3208 		     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3209 			goto out;
3210 	}
3211 
3212 meta_guess:
3213 	/* calculate metadata allocation. */
3214 	if (di->i_xattr_loc) {
3215 		if (!xbs->xattr_bh) {
3216 			ret = ocfs2_read_xattr_block(inode,
3217 						     le64_to_cpu(di->i_xattr_loc),
3218 						     &bh);
3219 			if (ret) {
3220 				mlog_errno(ret);
3221 				goto out;
3222 			}
3223 
3224 			xb = (struct ocfs2_xattr_block *)bh->b_data;
3225 		} else
3226 			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3227 
3228 		/*
3229 		 * If there is already an xattr tree, good, we can calculate
3230 		 * like other b-trees. Otherwise we may have the chance of
3231 		 * create a tree, the credit calculation is borrowed from
3232 		 * ocfs2_calc_extend_credits with root_el = NULL. And the
3233 		 * new tree will be cluster based, so no meta is needed.
3234 		 */
3235 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3236 			struct ocfs2_extent_list *el =
3237 				 &xb->xb_attrs.xb_root.xt_list;
3238 			meta_add += ocfs2_extend_meta_needed(el);
3239 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3240 							     el);
3241 		} else
3242 			credits += OCFS2_SUBALLOC_ALLOC + 1;
3243 
3244 		/*
3245 		 * This cluster will be used either for new bucket or for
3246 		 * new xattr block.
3247 		 * If the cluster size is the same as the bucket size, one
3248 		 * more is needed since we may need to extend the bucket
3249 		 * also.
3250 		 */
3251 		clusters_add += 1;
3252 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3253 		if (OCFS2_XATTR_BUCKET_SIZE ==
3254 			OCFS2_SB(inode->i_sb)->s_clustersize) {
3255 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3256 			clusters_add += 1;
3257 		}
3258 	} else {
3259 		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3260 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3261 			struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
3262 			meta_add += ocfs2_extend_meta_needed(el);
3263 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3264 							     el);
3265 		} else {
3266 			meta_add += 1;
3267 		}
3268 	}
3269 out:
3270 	if (clusters_need)
3271 		*clusters_need = clusters_add;
3272 	if (meta_need)
3273 		*meta_need = meta_add;
3274 	if (credits_need)
3275 		*credits_need = credits;
3276 	brelse(bh);
3277 	return ret;
3278 }
3279 
3280 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3281 				     struct ocfs2_dinode *di,
3282 				     struct ocfs2_xattr_info *xi,
3283 				     struct ocfs2_xattr_search *xis,
3284 				     struct ocfs2_xattr_search *xbs,
3285 				     struct ocfs2_xattr_set_ctxt *ctxt,
3286 				     int extra_meta,
3287 				     int *credits)
3288 {
3289 	int clusters_add, meta_add, ret;
3290 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3291 
3292 	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3293 
3294 	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3295 
3296 	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3297 					&clusters_add, &meta_add, credits);
3298 	if (ret) {
3299 		mlog_errno(ret);
3300 		return ret;
3301 	}
3302 
3303 	meta_add += extra_meta;
3304 	trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3305 					clusters_add, *credits);
3306 
3307 	if (meta_add) {
3308 		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3309 							&ctxt->meta_ac);
3310 		if (ret) {
3311 			mlog_errno(ret);
3312 			goto out;
3313 		}
3314 	}
3315 
3316 	if (clusters_add) {
3317 		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3318 		if (ret)
3319 			mlog_errno(ret);
3320 	}
3321 out:
3322 	if (ret) {
3323 		if (ctxt->meta_ac) {
3324 			ocfs2_free_alloc_context(ctxt->meta_ac);
3325 			ctxt->meta_ac = NULL;
3326 		}
3327 
3328 		/*
3329 		 * We cannot have an error and a non null ctxt->data_ac.
3330 		 */
3331 	}
3332 
3333 	return ret;
3334 }
3335 
3336 static int __ocfs2_xattr_set_handle(struct inode *inode,
3337 				    struct ocfs2_dinode *di,
3338 				    struct ocfs2_xattr_info *xi,
3339 				    struct ocfs2_xattr_search *xis,
3340 				    struct ocfs2_xattr_search *xbs,
3341 				    struct ocfs2_xattr_set_ctxt *ctxt)
3342 {
3343 	int ret = 0, credits, old_found;
3344 
3345 	if (!xi->xi_value) {
3346 		/* Remove existing extended attribute */
3347 		if (!xis->not_found)
3348 			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3349 		else if (!xbs->not_found)
3350 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3351 	} else {
3352 		/* We always try to set extended attribute into inode first*/
3353 		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3354 		if (!ret && !xbs->not_found) {
3355 			/*
3356 			 * If succeed and that extended attribute existing in
3357 			 * external block, then we will remove it.
3358 			 */
3359 			xi->xi_value = NULL;
3360 			xi->xi_value_len = 0;
3361 
3362 			old_found = xis->not_found;
3363 			xis->not_found = -ENODATA;
3364 			ret = ocfs2_calc_xattr_set_need(inode,
3365 							di,
3366 							xi,
3367 							xis,
3368 							xbs,
3369 							NULL,
3370 							NULL,
3371 							&credits);
3372 			xis->not_found = old_found;
3373 			if (ret) {
3374 				mlog_errno(ret);
3375 				goto out;
3376 			}
3377 
3378 			ret = ocfs2_extend_trans(ctxt->handle, credits);
3379 			if (ret) {
3380 				mlog_errno(ret);
3381 				goto out;
3382 			}
3383 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3384 		} else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3385 			if (di->i_xattr_loc && !xbs->xattr_bh) {
3386 				ret = ocfs2_xattr_block_find(inode,
3387 							     xi->xi_name_index,
3388 							     xi->xi_name, xbs);
3389 				if (ret)
3390 					goto out;
3391 
3392 				old_found = xis->not_found;
3393 				xis->not_found = -ENODATA;
3394 				ret = ocfs2_calc_xattr_set_need(inode,
3395 								di,
3396 								xi,
3397 								xis,
3398 								xbs,
3399 								NULL,
3400 								NULL,
3401 								&credits);
3402 				xis->not_found = old_found;
3403 				if (ret) {
3404 					mlog_errno(ret);
3405 					goto out;
3406 				}
3407 
3408 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3409 				if (ret) {
3410 					mlog_errno(ret);
3411 					goto out;
3412 				}
3413 			}
3414 			/*
3415 			 * If no space in inode, we will set extended attribute
3416 			 * into external block.
3417 			 */
3418 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3419 			if (ret)
3420 				goto out;
3421 			if (!xis->not_found) {
3422 				/*
3423 				 * If succeed and that extended attribute
3424 				 * existing in inode, we will remove it.
3425 				 */
3426 				xi->xi_value = NULL;
3427 				xi->xi_value_len = 0;
3428 				xbs->not_found = -ENODATA;
3429 				ret = ocfs2_calc_xattr_set_need(inode,
3430 								di,
3431 								xi,
3432 								xis,
3433 								xbs,
3434 								NULL,
3435 								NULL,
3436 								&credits);
3437 				if (ret) {
3438 					mlog_errno(ret);
3439 					goto out;
3440 				}
3441 
3442 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3443 				if (ret) {
3444 					mlog_errno(ret);
3445 					goto out;
3446 				}
3447 				ret = ocfs2_xattr_ibody_set(inode, xi,
3448 							    xis, ctxt);
3449 			}
3450 		}
3451 	}
3452 
3453 	if (!ret) {
3454 		/* Update inode ctime. */
3455 		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3456 					      xis->inode_bh,
3457 					      OCFS2_JOURNAL_ACCESS_WRITE);
3458 		if (ret) {
3459 			mlog_errno(ret);
3460 			goto out;
3461 		}
3462 
3463 		inode_set_ctime_current(inode);
3464 		di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
3465 		di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
3466 		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3467 	}
3468 out:
3469 	return ret;
3470 }
3471 
3472 /*
3473  * This function only called duing creating inode
3474  * for init security/acl xattrs of the new inode.
3475  * All transanction credits have been reserved in mknod.
3476  */
3477 int ocfs2_xattr_set_handle(handle_t *handle,
3478 			   struct inode *inode,
3479 			   struct buffer_head *di_bh,
3480 			   int name_index,
3481 			   const char *name,
3482 			   const void *value,
3483 			   size_t value_len,
3484 			   int flags,
3485 			   struct ocfs2_alloc_context *meta_ac,
3486 			   struct ocfs2_alloc_context *data_ac)
3487 {
3488 	struct ocfs2_dinode *di;
3489 	int ret;
3490 
3491 	struct ocfs2_xattr_info xi = {
3492 		.xi_name_index = name_index,
3493 		.xi_name = name,
3494 		.xi_name_len = strlen(name),
3495 		.xi_value = value,
3496 		.xi_value_len = value_len,
3497 	};
3498 
3499 	struct ocfs2_xattr_search xis = {
3500 		.not_found = -ENODATA,
3501 	};
3502 
3503 	struct ocfs2_xattr_search xbs = {
3504 		.not_found = -ENODATA,
3505 	};
3506 
3507 	struct ocfs2_xattr_set_ctxt ctxt = {
3508 		.handle = handle,
3509 		.meta_ac = meta_ac,
3510 		.data_ac = data_ac,
3511 	};
3512 
3513 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3514 		return -EOPNOTSUPP;
3515 
3516 	/*
3517 	 * In extreme situation, may need xattr bucket when
3518 	 * block size is too small. And we have already reserved
3519 	 * the credits for bucket in mknod.
3520 	 */
3521 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3522 		xbs.bucket = ocfs2_xattr_bucket_new(inode);
3523 		if (!xbs.bucket) {
3524 			mlog_errno(-ENOMEM);
3525 			return -ENOMEM;
3526 		}
3527 	}
3528 
3529 	xis.inode_bh = xbs.inode_bh = di_bh;
3530 	di = (struct ocfs2_dinode *)di_bh->b_data;
3531 
3532 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3533 
3534 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3535 	if (ret)
3536 		goto cleanup;
3537 	if (xis.not_found) {
3538 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3539 		if (ret)
3540 			goto cleanup;
3541 	}
3542 
3543 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3544 
3545 cleanup:
3546 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3547 	brelse(xbs.xattr_bh);
3548 	ocfs2_xattr_bucket_free(xbs.bucket);
3549 
3550 	return ret;
3551 }
3552 
3553 /*
3554  * ocfs2_xattr_set()
3555  *
3556  * Set, replace or remove an extended attribute for this inode.
3557  * value is NULL to remove an existing extended attribute, else either
3558  * create or replace an extended attribute.
3559  */
3560 int ocfs2_xattr_set(struct inode *inode,
3561 		    int name_index,
3562 		    const char *name,
3563 		    const void *value,
3564 		    size_t value_len,
3565 		    int flags)
3566 {
3567 	struct buffer_head *di_bh = NULL;
3568 	struct ocfs2_dinode *di;
3569 	int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
3570 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3571 	struct inode *tl_inode = osb->osb_tl_inode;
3572 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
3573 	struct ocfs2_refcount_tree *ref_tree = NULL;
3574 	struct ocfs2_lock_holder oh;
3575 
3576 	struct ocfs2_xattr_info xi = {
3577 		.xi_name_index = name_index,
3578 		.xi_name = name,
3579 		.xi_name_len = strlen(name),
3580 		.xi_value = value,
3581 		.xi_value_len = value_len,
3582 	};
3583 
3584 	struct ocfs2_xattr_search xis = {
3585 		.not_found = -ENODATA,
3586 	};
3587 
3588 	struct ocfs2_xattr_search xbs = {
3589 		.not_found = -ENODATA,
3590 	};
3591 
3592 	if (!ocfs2_supports_xattr(osb))
3593 		return -EOPNOTSUPP;
3594 
3595 	/*
3596 	 * Only xbs will be used on indexed trees.  xis doesn't need a
3597 	 * bucket.
3598 	 */
3599 	xbs.bucket = ocfs2_xattr_bucket_new(inode);
3600 	if (!xbs.bucket) {
3601 		mlog_errno(-ENOMEM);
3602 		return -ENOMEM;
3603 	}
3604 
3605 	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
3606 	if (had_lock < 0) {
3607 		ret = had_lock;
3608 		mlog_errno(ret);
3609 		goto cleanup_nolock;
3610 	}
3611 	xis.inode_bh = xbs.inode_bh = di_bh;
3612 	di = (struct ocfs2_dinode *)di_bh->b_data;
3613 
3614 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3615 	/*
3616 	 * Scan inode and external block to find the same name
3617 	 * extended attribute and collect search information.
3618 	 */
3619 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3620 	if (ret)
3621 		goto cleanup;
3622 	if (xis.not_found) {
3623 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3624 		if (ret)
3625 			goto cleanup;
3626 	}
3627 
3628 	if (xis.not_found && xbs.not_found) {
3629 		ret = -ENODATA;
3630 		if (flags & XATTR_REPLACE)
3631 			goto cleanup;
3632 		ret = 0;
3633 		if (!value)
3634 			goto cleanup;
3635 	} else {
3636 		ret = -EEXIST;
3637 		if (flags & XATTR_CREATE)
3638 			goto cleanup;
3639 	}
3640 
3641 	/* Check whether the value is refcounted and do some preparation. */
3642 	if (ocfs2_is_refcount_inode(inode) &&
3643 	    (!xis.not_found || !xbs.not_found)) {
3644 		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3645 						   &xis, &xbs, &ref_tree,
3646 						   &ref_meta, &ref_credits);
3647 		if (ret) {
3648 			mlog_errno(ret);
3649 			goto cleanup;
3650 		}
3651 	}
3652 
3653 	inode_lock(tl_inode);
3654 
3655 	if (ocfs2_truncate_log_needs_flush(osb)) {
3656 		ret = __ocfs2_flush_truncate_log(osb);
3657 		if (ret < 0) {
3658 			inode_unlock(tl_inode);
3659 			mlog_errno(ret);
3660 			goto cleanup;
3661 		}
3662 	}
3663 	inode_unlock(tl_inode);
3664 
3665 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3666 					&xbs, &ctxt, ref_meta, &credits);
3667 	if (ret) {
3668 		mlog_errno(ret);
3669 		goto cleanup;
3670 	}
3671 
3672 	/* we need to update inode's ctime field, so add credit for it. */
3673 	credits += OCFS2_INODE_UPDATE_CREDITS;
3674 	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3675 	if (IS_ERR(ctxt.handle)) {
3676 		ret = PTR_ERR(ctxt.handle);
3677 		mlog_errno(ret);
3678 		goto out_free_ac;
3679 	}
3680 
3681 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3682 	ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
3683 
3684 	ocfs2_commit_trans(osb, ctxt.handle);
3685 
3686 out_free_ac:
3687 	if (ctxt.data_ac)
3688 		ocfs2_free_alloc_context(ctxt.data_ac);
3689 	if (ctxt.meta_ac)
3690 		ocfs2_free_alloc_context(ctxt.meta_ac);
3691 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3692 		ocfs2_schedule_truncate_log_flush(osb, 1);
3693 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
3694 
3695 cleanup:
3696 	if (ref_tree)
3697 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3698 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3699 	if (!value && !ret) {
3700 		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3701 		if (ret)
3702 			mlog_errno(ret);
3703 	}
3704 	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
3705 cleanup_nolock:
3706 	brelse(di_bh);
3707 	brelse(xbs.xattr_bh);
3708 	ocfs2_xattr_bucket_free(xbs.bucket);
3709 
3710 	return ret;
3711 }
3712 
3713 /*
3714  * Find the xattr extent rec which may contains name_hash.
3715  * e_cpos will be the first name hash of the xattr rec.
3716  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3717  */
3718 static int ocfs2_xattr_get_rec(struct inode *inode,
3719 			       u32 name_hash,
3720 			       u64 *p_blkno,
3721 			       u32 *e_cpos,
3722 			       u32 *num_clusters,
3723 			       struct ocfs2_extent_list *el)
3724 {
3725 	int ret = 0, i;
3726 	struct buffer_head *eb_bh = NULL;
3727 	struct ocfs2_extent_block *eb;
3728 	struct ocfs2_extent_rec *rec = NULL;
3729 	u64 e_blkno = 0;
3730 
3731 	if (el->l_tree_depth) {
3732 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3733 				      &eb_bh);
3734 		if (ret) {
3735 			mlog_errno(ret);
3736 			goto out;
3737 		}
3738 
3739 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3740 		el = &eb->h_list;
3741 
3742 		if (el->l_tree_depth) {
3743 			ret = ocfs2_error(inode->i_sb,
3744 					  "Inode %lu has non zero tree depth in xattr tree block %llu\n",
3745 					  inode->i_ino,
3746 					  (unsigned long long)eb_bh->b_blocknr);
3747 			goto out;
3748 		}
3749 	}
3750 
3751 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3752 		rec = &el->l_recs[i];
3753 
3754 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3755 			e_blkno = le64_to_cpu(rec->e_blkno);
3756 			break;
3757 		}
3758 	}
3759 
3760 	if (!e_blkno) {
3761 		ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
3762 				  inode->i_ino,
3763 				  le32_to_cpu(rec->e_cpos),
3764 				  ocfs2_rec_clusters(el, rec));
3765 		goto out;
3766 	}
3767 
3768 	*p_blkno = le64_to_cpu(rec->e_blkno);
3769 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3770 	if (e_cpos)
3771 		*e_cpos = le32_to_cpu(rec->e_cpos);
3772 out:
3773 	brelse(eb_bh);
3774 	return ret;
3775 }
3776 
3777 typedef int (xattr_bucket_func)(struct inode *inode,
3778 				struct ocfs2_xattr_bucket *bucket,
3779 				void *para);
3780 
3781 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3782 				   struct ocfs2_xattr_bucket *bucket,
3783 				   int name_index,
3784 				   const char *name,
3785 				   u32 name_hash,
3786 				   u16 *xe_index,
3787 				   int *found)
3788 {
3789 	int i, ret = 0, cmp = 1, block_off, new_offset;
3790 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3791 	size_t name_len = strlen(name);
3792 	struct ocfs2_xattr_entry *xe = NULL;
3793 	char *xe_name;
3794 
3795 	/*
3796 	 * We don't use binary search in the bucket because there
3797 	 * may be multiple entries with the same name hash.
3798 	 */
3799 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3800 		xe = &xh->xh_entries[i];
3801 
3802 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
3803 			continue;
3804 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3805 			break;
3806 
3807 		cmp = name_index - ocfs2_xattr_get_type(xe);
3808 		if (!cmp)
3809 			cmp = name_len - xe->xe_name_len;
3810 		if (cmp)
3811 			continue;
3812 
3813 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3814 							xh,
3815 							i,
3816 							&block_off,
3817 							&new_offset);
3818 		if (ret) {
3819 			mlog_errno(ret);
3820 			break;
3821 		}
3822 
3823 
3824 		xe_name = bucket_block(bucket, block_off) + new_offset;
3825 		if (!memcmp(name, xe_name, name_len)) {
3826 			*xe_index = i;
3827 			*found = 1;
3828 			ret = 0;
3829 			break;
3830 		}
3831 	}
3832 
3833 	return ret;
3834 }
3835 
3836 /*
3837  * Find the specified xattr entry in a series of buckets.
3838  * This series start from p_blkno and last for num_clusters.
3839  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3840  * the num of the valid buckets.
3841  *
3842  * Return the buffer_head this xattr should reside in. And if the xattr's
3843  * hash is in the gap of 2 buckets, return the lower bucket.
3844  */
3845 static int ocfs2_xattr_bucket_find(struct inode *inode,
3846 				   int name_index,
3847 				   const char *name,
3848 				   u32 name_hash,
3849 				   u64 p_blkno,
3850 				   u32 first_hash,
3851 				   u32 num_clusters,
3852 				   struct ocfs2_xattr_search *xs)
3853 {
3854 	int ret, found = 0;
3855 	struct ocfs2_xattr_header *xh = NULL;
3856 	struct ocfs2_xattr_entry *xe = NULL;
3857 	u16 index = 0;
3858 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3859 	int low_bucket = 0, bucket, high_bucket;
3860 	struct ocfs2_xattr_bucket *search;
3861 	u64 blkno, lower_blkno = 0;
3862 
3863 	search = ocfs2_xattr_bucket_new(inode);
3864 	if (!search) {
3865 		ret = -ENOMEM;
3866 		mlog_errno(ret);
3867 		goto out;
3868 	}
3869 
3870 	ret = ocfs2_read_xattr_bucket(search, p_blkno);
3871 	if (ret) {
3872 		mlog_errno(ret);
3873 		goto out;
3874 	}
3875 
3876 	xh = bucket_xh(search);
3877 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3878 	while (low_bucket <= high_bucket) {
3879 		ocfs2_xattr_bucket_relse(search);
3880 
3881 		bucket = (low_bucket + high_bucket) / 2;
3882 		blkno = p_blkno + bucket * blk_per_bucket;
3883 		ret = ocfs2_read_xattr_bucket(search, blkno);
3884 		if (ret) {
3885 			mlog_errno(ret);
3886 			goto out;
3887 		}
3888 
3889 		xh = bucket_xh(search);
3890 		xe = &xh->xh_entries[0];
3891 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3892 			high_bucket = bucket - 1;
3893 			continue;
3894 		}
3895 
3896 		/*
3897 		 * Check whether the hash of the last entry in our
3898 		 * bucket is larger than the search one. for an empty
3899 		 * bucket, the last one is also the first one.
3900 		 */
3901 		if (xh->xh_count)
3902 			xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3903 
3904 		/* record lower_blkno which may be the insert place. */
3905 		lower_blkno = blkno;
3906 
3907 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3908 			low_bucket = bucket + 1;
3909 			continue;
3910 		}
3911 
3912 		/* the searched xattr should reside in this bucket if exists. */
3913 		ret = ocfs2_find_xe_in_bucket(inode, search,
3914 					      name_index, name, name_hash,
3915 					      &index, &found);
3916 		if (ret) {
3917 			mlog_errno(ret);
3918 			goto out;
3919 		}
3920 		break;
3921 	}
3922 
3923 	/*
3924 	 * Record the bucket we have found.
3925 	 * When the xattr's hash value is in the gap of 2 buckets, we will
3926 	 * always set it to the previous bucket.
3927 	 */
3928 	if (!lower_blkno)
3929 		lower_blkno = p_blkno;
3930 
3931 	/* This should be in cache - we just read it during the search */
3932 	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3933 	if (ret) {
3934 		mlog_errno(ret);
3935 		goto out;
3936 	}
3937 
3938 	xs->header = bucket_xh(xs->bucket);
3939 	xs->base = bucket_block(xs->bucket, 0);
3940 	xs->end = xs->base + inode->i_sb->s_blocksize;
3941 
3942 	if (found) {
3943 		xs->here = &xs->header->xh_entries[index];
3944 		trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3945 			name, name_index, name_hash,
3946 			(unsigned long long)bucket_blkno(xs->bucket),
3947 			index);
3948 	} else
3949 		ret = -ENODATA;
3950 
3951 out:
3952 	ocfs2_xattr_bucket_free(search);
3953 	return ret;
3954 }
3955 
3956 static int ocfs2_xattr_index_block_find(struct inode *inode,
3957 					struct buffer_head *root_bh,
3958 					int name_index,
3959 					const char *name,
3960 					struct ocfs2_xattr_search *xs)
3961 {
3962 	int ret;
3963 	struct ocfs2_xattr_block *xb =
3964 			(struct ocfs2_xattr_block *)root_bh->b_data;
3965 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3966 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3967 	u64 p_blkno = 0;
3968 	u32 first_hash, num_clusters = 0;
3969 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3970 
3971 	if (le16_to_cpu(el->l_next_free_rec) == 0)
3972 		return -ENODATA;
3973 
3974 	trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3975 					name, name_index, name_hash,
3976 					(unsigned long long)root_bh->b_blocknr,
3977 					-1);
3978 
3979 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3980 				  &num_clusters, el);
3981 	if (ret) {
3982 		mlog_errno(ret);
3983 		goto out;
3984 	}
3985 
3986 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3987 
3988 	trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3989 					name, name_index, first_hash,
3990 					(unsigned long long)p_blkno,
3991 					num_clusters);
3992 
3993 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3994 				      p_blkno, first_hash, num_clusters, xs);
3995 
3996 out:
3997 	return ret;
3998 }
3999 
4000 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
4001 				       u64 blkno,
4002 				       u32 clusters,
4003 				       xattr_bucket_func *func,
4004 				       void *para)
4005 {
4006 	int i, ret = 0;
4007 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
4008 	u32 num_buckets = clusters * bpc;
4009 	struct ocfs2_xattr_bucket *bucket;
4010 
4011 	bucket = ocfs2_xattr_bucket_new(inode);
4012 	if (!bucket) {
4013 		mlog_errno(-ENOMEM);
4014 		return -ENOMEM;
4015 	}
4016 
4017 	trace_ocfs2_iterate_xattr_buckets(
4018 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
4019 		(unsigned long long)blkno, clusters);
4020 
4021 	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
4022 		ret = ocfs2_read_xattr_bucket(bucket, blkno);
4023 		if (ret) {
4024 			mlog_errno(ret);
4025 			break;
4026 		}
4027 
4028 		/*
4029 		 * The real bucket num in this series of blocks is stored
4030 		 * in the 1st bucket.
4031 		 */
4032 		if (i == 0)
4033 			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
4034 
4035 		trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
4036 		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
4037 		if (func) {
4038 			ret = func(inode, bucket, para);
4039 			if (ret && ret != -ERANGE)
4040 				mlog_errno(ret);
4041 			/* Fall through to bucket_relse() */
4042 		}
4043 
4044 		ocfs2_xattr_bucket_relse(bucket);
4045 		if (ret)
4046 			break;
4047 	}
4048 
4049 	ocfs2_xattr_bucket_free(bucket);
4050 	return ret;
4051 }
4052 
4053 struct ocfs2_xattr_tree_list {
4054 	char *buffer;
4055 	size_t buffer_size;
4056 	size_t result;
4057 };
4058 
4059 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
4060 					     struct ocfs2_xattr_header *xh,
4061 					     int index,
4062 					     int *block_off,
4063 					     int *new_offset)
4064 {
4065 	u16 name_offset;
4066 
4067 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4068 		return -EINVAL;
4069 
4070 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4071 
4072 	*block_off = name_offset >> sb->s_blocksize_bits;
4073 	*new_offset = name_offset % sb->s_blocksize;
4074 
4075 	return 0;
4076 }
4077 
4078 static int ocfs2_list_xattr_bucket(struct inode *inode,
4079 				   struct ocfs2_xattr_bucket *bucket,
4080 				   void *para)
4081 {
4082 	int ret = 0, type;
4083 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4084 	int i, block_off, new_offset;
4085 	const char *name;
4086 
4087 	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4088 		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4089 		type = ocfs2_xattr_get_type(entry);
4090 
4091 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4092 							bucket_xh(bucket),
4093 							i,
4094 							&block_off,
4095 							&new_offset);
4096 		if (ret)
4097 			break;
4098 
4099 		name = (const char *)bucket_block(bucket, block_off) +
4100 			new_offset;
4101 		ret = ocfs2_xattr_list_entry(inode->i_sb,
4102 					     xl->buffer,
4103 					     xl->buffer_size,
4104 					     &xl->result,
4105 					     type, name,
4106 					     entry->xe_name_len);
4107 		if (ret)
4108 			break;
4109 	}
4110 
4111 	return ret;
4112 }
4113 
4114 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4115 					   struct buffer_head *blk_bh,
4116 					   xattr_tree_rec_func *rec_func,
4117 					   void *para)
4118 {
4119 	struct ocfs2_xattr_block *xb =
4120 			(struct ocfs2_xattr_block *)blk_bh->b_data;
4121 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4122 	int ret = 0;
4123 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4124 	u64 p_blkno = 0;
4125 
4126 	if (!el->l_next_free_rec || !rec_func)
4127 		return 0;
4128 
4129 	while (name_hash > 0) {
4130 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4131 					  &e_cpos, &num_clusters, el);
4132 		if (ret) {
4133 			mlog_errno(ret);
4134 			break;
4135 		}
4136 
4137 		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4138 			       num_clusters, para);
4139 		if (ret) {
4140 			if (ret != -ERANGE)
4141 				mlog_errno(ret);
4142 			break;
4143 		}
4144 
4145 		if (e_cpos == 0)
4146 			break;
4147 
4148 		name_hash = e_cpos - 1;
4149 	}
4150 
4151 	return ret;
4152 
4153 }
4154 
4155 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4156 				     struct buffer_head *root_bh,
4157 				     u64 blkno, u32 cpos, u32 len, void *para)
4158 {
4159 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4160 					   ocfs2_list_xattr_bucket, para);
4161 }
4162 
4163 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4164 					     struct buffer_head *blk_bh,
4165 					     char *buffer,
4166 					     size_t buffer_size)
4167 {
4168 	int ret;
4169 	struct ocfs2_xattr_tree_list xl = {
4170 		.buffer = buffer,
4171 		.buffer_size = buffer_size,
4172 		.result = 0,
4173 	};
4174 
4175 	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4176 					      ocfs2_list_xattr_tree_rec, &xl);
4177 	if (ret) {
4178 		mlog_errno(ret);
4179 		goto out;
4180 	}
4181 
4182 	ret = xl.result;
4183 out:
4184 	return ret;
4185 }
4186 
4187 static int cmp_xe(const void *a, const void *b)
4188 {
4189 	const struct ocfs2_xattr_entry *l = a, *r = b;
4190 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
4191 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
4192 
4193 	if (l_hash > r_hash)
4194 		return 1;
4195 	if (l_hash < r_hash)
4196 		return -1;
4197 	return 0;
4198 }
4199 
4200 /*
4201  * When the ocfs2_xattr_block is filled up, new bucket will be created
4202  * and all the xattr entries will be moved to the new bucket.
4203  * The header goes at the start of the bucket, and the names+values are
4204  * filled from the end.  This is why *target starts as the last buffer.
4205  * Note: we need to sort the entries since they are not saved in order
4206  * in the ocfs2_xattr_block.
4207  */
4208 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4209 					   struct buffer_head *xb_bh,
4210 					   struct ocfs2_xattr_bucket *bucket)
4211 {
4212 	int i, blocksize = inode->i_sb->s_blocksize;
4213 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4214 	u16 offset, size, off_change;
4215 	struct ocfs2_xattr_entry *xe;
4216 	struct ocfs2_xattr_block *xb =
4217 				(struct ocfs2_xattr_block *)xb_bh->b_data;
4218 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4219 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4220 	u16 count = le16_to_cpu(xb_xh->xh_count);
4221 	char *src = xb_bh->b_data;
4222 	char *target = bucket_block(bucket, blks - 1);
4223 
4224 	trace_ocfs2_cp_xattr_block_to_bucket_begin(
4225 				(unsigned long long)xb_bh->b_blocknr,
4226 				(unsigned long long)bucket_blkno(bucket));
4227 
4228 	for (i = 0; i < blks; i++)
4229 		memset(bucket_block(bucket, i), 0, blocksize);
4230 
4231 	/*
4232 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
4233 	 * there is a offset change corresponding to the change of
4234 	 * ocfs2_xattr_header's position.
4235 	 */
4236 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4237 	xe = &xb_xh->xh_entries[count - 1];
4238 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4239 	size = blocksize - offset;
4240 
4241 	/* copy all the names and values. */
4242 	memcpy(target + offset, src + offset, size);
4243 
4244 	/* Init new header now. */
4245 	xh->xh_count = xb_xh->xh_count;
4246 	xh->xh_num_buckets = cpu_to_le16(1);
4247 	xh->xh_name_value_len = cpu_to_le16(size);
4248 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4249 
4250 	/* copy all the entries. */
4251 	target = bucket_block(bucket, 0);
4252 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4253 	size = count * sizeof(struct ocfs2_xattr_entry);
4254 	memcpy(target + offset, (char *)xb_xh + offset, size);
4255 
4256 	/* Change the xe offset for all the xe because of the move. */
4257 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4258 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4259 	for (i = 0; i < count; i++)
4260 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4261 
4262 	trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4263 
4264 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4265 	     cmp_xe, NULL);
4266 }
4267 
4268 /*
4269  * After we move xattr from block to index btree, we have to
4270  * update ocfs2_xattr_search to the new xe and base.
4271  *
4272  * When the entry is in xattr block, xattr_bh indicates the storage place.
4273  * While if the entry is in index b-tree, "bucket" indicates the
4274  * real place of the xattr.
4275  */
4276 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4277 					    struct ocfs2_xattr_search *xs,
4278 					    struct buffer_head *old_bh)
4279 {
4280 	char *buf = old_bh->b_data;
4281 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4282 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4283 	int i;
4284 
4285 	xs->header = bucket_xh(xs->bucket);
4286 	xs->base = bucket_block(xs->bucket, 0);
4287 	xs->end = xs->base + inode->i_sb->s_blocksize;
4288 
4289 	if (xs->not_found)
4290 		return;
4291 
4292 	i = xs->here - old_xh->xh_entries;
4293 	xs->here = &xs->header->xh_entries[i];
4294 }
4295 
4296 static int ocfs2_xattr_create_index_block(struct inode *inode,
4297 					  struct ocfs2_xattr_search *xs,
4298 					  struct ocfs2_xattr_set_ctxt *ctxt)
4299 {
4300 	int ret;
4301 	u32 bit_off, len;
4302 	u64 blkno;
4303 	handle_t *handle = ctxt->handle;
4304 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
4305 	struct buffer_head *xb_bh = xs->xattr_bh;
4306 	struct ocfs2_xattr_block *xb =
4307 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4308 	struct ocfs2_xattr_tree_root *xr;
4309 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
4310 
4311 	trace_ocfs2_xattr_create_index_block_begin(
4312 				(unsigned long long)xb_bh->b_blocknr);
4313 
4314 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4315 	BUG_ON(!xs->bucket);
4316 
4317 	/*
4318 	 * XXX:
4319 	 * We can use this lock for now, and maybe move to a dedicated mutex
4320 	 * if performance becomes a problem later.
4321 	 */
4322 	down_write(&oi->ip_alloc_sem);
4323 
4324 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4325 				      OCFS2_JOURNAL_ACCESS_WRITE);
4326 	if (ret) {
4327 		mlog_errno(ret);
4328 		goto out;
4329 	}
4330 
4331 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4332 				     1, 1, &bit_off, &len);
4333 	if (ret) {
4334 		mlog_errno(ret);
4335 		goto out;
4336 	}
4337 
4338 	/*
4339 	 * The bucket may spread in many blocks, and
4340 	 * we will only touch the 1st block and the last block
4341 	 * in the whole bucket(one for entry and one for data).
4342 	 */
4343 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4344 
4345 	trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4346 
4347 	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
4348 	if (ret) {
4349 		mlog_errno(ret);
4350 		goto out;
4351 	}
4352 
4353 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4354 						OCFS2_JOURNAL_ACCESS_CREATE);
4355 	if (ret) {
4356 		mlog_errno(ret);
4357 		goto out;
4358 	}
4359 
4360 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4361 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4362 
4363 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4364 
4365 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4366 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4367 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
4368 
4369 	xr = &xb->xb_attrs.xb_root;
4370 	xr->xt_clusters = cpu_to_le32(1);
4371 	xr->xt_last_eb_blk = 0;
4372 	xr->xt_list.l_tree_depth = 0;
4373 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4374 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4375 
4376 	xr->xt_list.l_recs[0].e_cpos = 0;
4377 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4378 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4379 
4380 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4381 
4382 	ocfs2_journal_dirty(handle, xb_bh);
4383 
4384 out:
4385 	up_write(&oi->ip_alloc_sem);
4386 
4387 	return ret;
4388 }
4389 
4390 static int cmp_xe_offset(const void *a, const void *b)
4391 {
4392 	const struct ocfs2_xattr_entry *l = a, *r = b;
4393 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4394 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4395 
4396 	if (l_name_offset < r_name_offset)
4397 		return 1;
4398 	if (l_name_offset > r_name_offset)
4399 		return -1;
4400 	return 0;
4401 }
4402 
4403 /*
4404  * defrag a xattr bucket if we find that the bucket has some
4405  * holes between name/value pairs.
4406  * We will move all the name/value pairs to the end of the bucket
4407  * so that we can spare some space for insertion.
4408  */
4409 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4410 				     handle_t *handle,
4411 				     struct ocfs2_xattr_bucket *bucket)
4412 {
4413 	int ret, i;
4414 	size_t end, offset, len;
4415 	struct ocfs2_xattr_header *xh;
4416 	char *entries, *buf, *bucket_buf = NULL;
4417 	u64 blkno = bucket_blkno(bucket);
4418 	u16 xh_free_start;
4419 	size_t blocksize = inode->i_sb->s_blocksize;
4420 	struct ocfs2_xattr_entry *xe;
4421 
4422 	/*
4423 	 * In order to make the operation more efficient and generic,
4424 	 * we copy all the blocks into a contiguous memory and do the
4425 	 * defragment there, so if anything is error, we will not touch
4426 	 * the real block.
4427 	 */
4428 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4429 	if (!bucket_buf) {
4430 		ret = -EIO;
4431 		goto out;
4432 	}
4433 
4434 	buf = bucket_buf;
4435 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4436 		memcpy(buf, bucket_block(bucket, i), blocksize);
4437 
4438 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4439 						OCFS2_JOURNAL_ACCESS_WRITE);
4440 	if (ret < 0) {
4441 		mlog_errno(ret);
4442 		goto out;
4443 	}
4444 
4445 	xh = (struct ocfs2_xattr_header *)bucket_buf;
4446 	entries = (char *)xh->xh_entries;
4447 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4448 
4449 	trace_ocfs2_defrag_xattr_bucket(
4450 	     (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4451 	     xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4452 
4453 	/*
4454 	 * sort all the entries by their offset.
4455 	 * the largest will be the first, so that we can
4456 	 * move them to the end one by one.
4457 	 */
4458 	sort(entries, le16_to_cpu(xh->xh_count),
4459 	     sizeof(struct ocfs2_xattr_entry),
4460 	     cmp_xe_offset, NULL);
4461 
4462 	/* Move all name/values to the end of the bucket. */
4463 	xe = xh->xh_entries;
4464 	end = OCFS2_XATTR_BUCKET_SIZE;
4465 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4466 		offset = le16_to_cpu(xe->xe_name_offset);
4467 		len = namevalue_size_xe(xe);
4468 
4469 		/*
4470 		 * We must make sure that the name/value pair
4471 		 * exist in the same block. So adjust end to
4472 		 * the previous block end if needed.
4473 		 */
4474 		if (((end - len) / blocksize !=
4475 			(end - 1) / blocksize))
4476 			end = end - end % blocksize;
4477 
4478 		if (end > offset + len) {
4479 			memmove(bucket_buf + end - len,
4480 				bucket_buf + offset, len);
4481 			xe->xe_name_offset = cpu_to_le16(end - len);
4482 		}
4483 
4484 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4485 				"bucket %llu\n", (unsigned long long)blkno);
4486 
4487 		end -= len;
4488 	}
4489 
4490 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4491 			"bucket %llu\n", (unsigned long long)blkno);
4492 
4493 	if (xh_free_start == end)
4494 		goto out;
4495 
4496 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4497 	xh->xh_free_start = cpu_to_le16(end);
4498 
4499 	/* sort the entries by their name_hash. */
4500 	sort(entries, le16_to_cpu(xh->xh_count),
4501 	     sizeof(struct ocfs2_xattr_entry),
4502 	     cmp_xe, NULL);
4503 
4504 	buf = bucket_buf;
4505 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4506 		memcpy(bucket_block(bucket, i), buf, blocksize);
4507 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4508 
4509 out:
4510 	kfree(bucket_buf);
4511 	return ret;
4512 }
4513 
4514 /*
4515  * prev_blkno points to the start of an existing extent.  new_blkno
4516  * points to a newly allocated extent.  Because we know each of our
4517  * clusters contains more than bucket, we can easily split one cluster
4518  * at a bucket boundary.  So we take the last cluster of the existing
4519  * extent and split it down the middle.  We move the last half of the
4520  * buckets in the last cluster of the existing extent over to the new
4521  * extent.
4522  *
4523  * first_bh is the buffer at prev_blkno so we can update the existing
4524  * extent's bucket count.  header_bh is the bucket were we were hoping
4525  * to insert our xattr.  If the bucket move places the target in the new
4526  * extent, we'll update first_bh and header_bh after modifying the old
4527  * extent.
4528  *
4529  * first_hash will be set as the 1st xe's name_hash in the new extent.
4530  */
4531 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4532 					       handle_t *handle,
4533 					       struct ocfs2_xattr_bucket *first,
4534 					       struct ocfs2_xattr_bucket *target,
4535 					       u64 new_blkno,
4536 					       u32 num_clusters,
4537 					       u32 *first_hash)
4538 {
4539 	int ret;
4540 	struct super_block *sb = inode->i_sb;
4541 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4542 	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4543 	int to_move = num_buckets / 2;
4544 	u64 src_blkno;
4545 	u64 last_cluster_blkno = bucket_blkno(first) +
4546 		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4547 
4548 	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4549 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4550 
4551 	trace_ocfs2_mv_xattr_bucket_cross_cluster(
4552 				(unsigned long long)last_cluster_blkno,
4553 				(unsigned long long)new_blkno);
4554 
4555 	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4556 				     last_cluster_blkno, new_blkno,
4557 				     to_move, first_hash);
4558 	if (ret) {
4559 		mlog_errno(ret);
4560 		goto out;
4561 	}
4562 
4563 	/* This is the first bucket that got moved */
4564 	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4565 
4566 	/*
4567 	 * If the target bucket was part of the moved buckets, we need to
4568 	 * update first and target.
4569 	 */
4570 	if (bucket_blkno(target) >= src_blkno) {
4571 		/* Find the block for the new target bucket */
4572 		src_blkno = new_blkno +
4573 			(bucket_blkno(target) - src_blkno);
4574 
4575 		ocfs2_xattr_bucket_relse(first);
4576 		ocfs2_xattr_bucket_relse(target);
4577 
4578 		/*
4579 		 * These shouldn't fail - the buffers are in the
4580 		 * journal from ocfs2_cp_xattr_bucket().
4581 		 */
4582 		ret = ocfs2_read_xattr_bucket(first, new_blkno);
4583 		if (ret) {
4584 			mlog_errno(ret);
4585 			goto out;
4586 		}
4587 		ret = ocfs2_read_xattr_bucket(target, src_blkno);
4588 		if (ret)
4589 			mlog_errno(ret);
4590 
4591 	}
4592 
4593 out:
4594 	return ret;
4595 }
4596 
4597 /*
4598  * Find the suitable pos when we divide a bucket into 2.
4599  * We have to make sure the xattrs with the same hash value exist
4600  * in the same bucket.
4601  *
4602  * If this ocfs2_xattr_header covers more than one hash value, find a
4603  * place where the hash value changes.  Try to find the most even split.
4604  * The most common case is that all entries have different hash values,
4605  * and the first check we make will find a place to split.
4606  */
4607 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4608 {
4609 	struct ocfs2_xattr_entry *entries = xh->xh_entries;
4610 	int count = le16_to_cpu(xh->xh_count);
4611 	int delta, middle = count / 2;
4612 
4613 	/*
4614 	 * We start at the middle.  Each step gets farther away in both
4615 	 * directions.  We therefore hit the change in hash value
4616 	 * nearest to the middle.  Note that this loop does not execute for
4617 	 * count < 2.
4618 	 */
4619 	for (delta = 0; delta < middle; delta++) {
4620 		/* Let's check delta earlier than middle */
4621 		if (cmp_xe(&entries[middle - delta - 1],
4622 			   &entries[middle - delta]))
4623 			return middle - delta;
4624 
4625 		/* For even counts, don't walk off the end */
4626 		if ((middle + delta + 1) == count)
4627 			continue;
4628 
4629 		/* Now try delta past middle */
4630 		if (cmp_xe(&entries[middle + delta],
4631 			   &entries[middle + delta + 1]))
4632 			return middle + delta + 1;
4633 	}
4634 
4635 	/* Every entry had the same hash */
4636 	return count;
4637 }
4638 
4639 /*
4640  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4641  * first_hash will record the 1st hash of the new bucket.
4642  *
4643  * Normally half of the xattrs will be moved.  But we have to make
4644  * sure that the xattrs with the same hash value are stored in the
4645  * same bucket. If all the xattrs in this bucket have the same hash
4646  * value, the new bucket will be initialized as an empty one and the
4647  * first_hash will be initialized as (hash_value+1).
4648  */
4649 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4650 				    handle_t *handle,
4651 				    u64 blk,
4652 				    u64 new_blk,
4653 				    u32 *first_hash,
4654 				    int new_bucket_head)
4655 {
4656 	int ret, i;
4657 	int count, start, len, name_value_len = 0, name_offset = 0;
4658 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4659 	struct ocfs2_xattr_header *xh;
4660 	struct ocfs2_xattr_entry *xe;
4661 	int blocksize = inode->i_sb->s_blocksize;
4662 
4663 	trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4664 					      (unsigned long long)new_blk);
4665 
4666 	s_bucket = ocfs2_xattr_bucket_new(inode);
4667 	t_bucket = ocfs2_xattr_bucket_new(inode);
4668 	if (!s_bucket || !t_bucket) {
4669 		ret = -ENOMEM;
4670 		mlog_errno(ret);
4671 		goto out;
4672 	}
4673 
4674 	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4675 	if (ret) {
4676 		mlog_errno(ret);
4677 		goto out;
4678 	}
4679 
4680 	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4681 						OCFS2_JOURNAL_ACCESS_WRITE);
4682 	if (ret) {
4683 		mlog_errno(ret);
4684 		goto out;
4685 	}
4686 
4687 	/*
4688 	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4689 	 * there's no need to read it.
4690 	 */
4691 	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
4692 	if (ret) {
4693 		mlog_errno(ret);
4694 		goto out;
4695 	}
4696 
4697 	/*
4698 	 * Hey, if we're overwriting t_bucket, what difference does
4699 	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4700 	 * same part of ocfs2_cp_xattr_bucket().
4701 	 */
4702 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4703 						new_bucket_head ?
4704 						OCFS2_JOURNAL_ACCESS_CREATE :
4705 						OCFS2_JOURNAL_ACCESS_WRITE);
4706 	if (ret) {
4707 		mlog_errno(ret);
4708 		goto out;
4709 	}
4710 
4711 	xh = bucket_xh(s_bucket);
4712 	count = le16_to_cpu(xh->xh_count);
4713 	start = ocfs2_xattr_find_divide_pos(xh);
4714 
4715 	if (start == count) {
4716 		xe = &xh->xh_entries[start-1];
4717 
4718 		/*
4719 		 * initialized a new empty bucket here.
4720 		 * The hash value is set as one larger than
4721 		 * that of the last entry in the previous bucket.
4722 		 */
4723 		for (i = 0; i < t_bucket->bu_blocks; i++)
4724 			memset(bucket_block(t_bucket, i), 0, blocksize);
4725 
4726 		xh = bucket_xh(t_bucket);
4727 		xh->xh_free_start = cpu_to_le16(blocksize);
4728 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4729 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4730 
4731 		goto set_num_buckets;
4732 	}
4733 
4734 	/* copy the whole bucket to the new first. */
4735 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4736 
4737 	/* update the new bucket. */
4738 	xh = bucket_xh(t_bucket);
4739 
4740 	/*
4741 	 * Calculate the total name/value len and xh_free_start for
4742 	 * the old bucket first.
4743 	 */
4744 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
4745 	name_value_len = 0;
4746 	for (i = 0; i < start; i++) {
4747 		xe = &xh->xh_entries[i];
4748 		name_value_len += namevalue_size_xe(xe);
4749 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4750 			name_offset = le16_to_cpu(xe->xe_name_offset);
4751 	}
4752 
4753 	/*
4754 	 * Now begin the modification to the new bucket.
4755 	 *
4756 	 * In the new bucket, We just move the xattr entry to the beginning
4757 	 * and don't touch the name/value. So there will be some holes in the
4758 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4759 	 * called.
4760 	 */
4761 	xe = &xh->xh_entries[start];
4762 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4763 	trace_ocfs2_divide_xattr_bucket_move(len,
4764 			(int)((char *)xe - (char *)xh),
4765 			(int)((char *)xh->xh_entries - (char *)xh));
4766 	memmove((char *)xh->xh_entries, (char *)xe, len);
4767 	xe = &xh->xh_entries[count - start];
4768 	len = sizeof(struct ocfs2_xattr_entry) * start;
4769 	memset((char *)xe, 0, len);
4770 
4771 	le16_add_cpu(&xh->xh_count, -start);
4772 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4773 
4774 	/* Calculate xh_free_start for the new bucket. */
4775 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4776 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4777 		xe = &xh->xh_entries[i];
4778 		if (le16_to_cpu(xe->xe_name_offset) <
4779 		    le16_to_cpu(xh->xh_free_start))
4780 			xh->xh_free_start = xe->xe_name_offset;
4781 	}
4782 
4783 set_num_buckets:
4784 	/* set xh->xh_num_buckets for the new xh. */
4785 	if (new_bucket_head)
4786 		xh->xh_num_buckets = cpu_to_le16(1);
4787 	else
4788 		xh->xh_num_buckets = 0;
4789 
4790 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4791 
4792 	/* store the first_hash of the new bucket. */
4793 	if (first_hash)
4794 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4795 
4796 	/*
4797 	 * Now only update the 1st block of the old bucket.  If we
4798 	 * just added a new empty bucket, there is no need to modify
4799 	 * it.
4800 	 */
4801 	if (start == count)
4802 		goto out;
4803 
4804 	xh = bucket_xh(s_bucket);
4805 	memset(&xh->xh_entries[start], 0,
4806 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
4807 	xh->xh_count = cpu_to_le16(start);
4808 	xh->xh_free_start = cpu_to_le16(name_offset);
4809 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
4810 
4811 	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4812 
4813 out:
4814 	ocfs2_xattr_bucket_free(s_bucket);
4815 	ocfs2_xattr_bucket_free(t_bucket);
4816 
4817 	return ret;
4818 }
4819 
4820 /*
4821  * Copy xattr from one bucket to another bucket.
4822  *
4823  * The caller must make sure that the journal transaction
4824  * has enough space for journaling.
4825  */
4826 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4827 				 handle_t *handle,
4828 				 u64 s_blkno,
4829 				 u64 t_blkno,
4830 				 int t_is_new)
4831 {
4832 	int ret;
4833 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4834 
4835 	BUG_ON(s_blkno == t_blkno);
4836 
4837 	trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4838 				    (unsigned long long)t_blkno,
4839 				    t_is_new);
4840 
4841 	s_bucket = ocfs2_xattr_bucket_new(inode);
4842 	t_bucket = ocfs2_xattr_bucket_new(inode);
4843 	if (!s_bucket || !t_bucket) {
4844 		ret = -ENOMEM;
4845 		mlog_errno(ret);
4846 		goto out;
4847 	}
4848 
4849 	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4850 	if (ret)
4851 		goto out;
4852 
4853 	/*
4854 	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4855 	 * there's no need to read it.
4856 	 */
4857 	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
4858 	if (ret)
4859 		goto out;
4860 
4861 	/*
4862 	 * Hey, if we're overwriting t_bucket, what difference does
4863 	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4864 	 * cluster to fill, we came here from
4865 	 * ocfs2_mv_xattr_buckets(), and it is really new -
4866 	 * ACCESS_CREATE is required.  But we also might have moved data
4867 	 * out of t_bucket before extending back into it.
4868 	 * ocfs2_add_new_xattr_bucket() can do this - its call to
4869 	 * ocfs2_add_new_xattr_cluster() may have created a new extent
4870 	 * and copied out the end of the old extent.  Then it re-extends
4871 	 * the old extent back to create space for new xattrs.  That's
4872 	 * how we get here, and the bucket isn't really new.
4873 	 */
4874 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4875 						t_is_new ?
4876 						OCFS2_JOURNAL_ACCESS_CREATE :
4877 						OCFS2_JOURNAL_ACCESS_WRITE);
4878 	if (ret)
4879 		goto out;
4880 
4881 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4882 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4883 
4884 out:
4885 	ocfs2_xattr_bucket_free(t_bucket);
4886 	ocfs2_xattr_bucket_free(s_bucket);
4887 
4888 	return ret;
4889 }
4890 
4891 /*
4892  * src_blk points to the start of an existing extent.  last_blk points to
4893  * last cluster in that extent.  to_blk points to a newly allocated
4894  * extent.  We copy the buckets from the cluster at last_blk to the new
4895  * extent.  If start_bucket is non-zero, we skip that many buckets before
4896  * we start copying.  The new extent's xh_num_buckets gets set to the
4897  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4898  * by the same amount.
4899  */
4900 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4901 				  u64 src_blk, u64 last_blk, u64 to_blk,
4902 				  unsigned int start_bucket,
4903 				  u32 *first_hash)
4904 {
4905 	int i, ret, credits;
4906 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4907 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4908 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4909 	struct ocfs2_xattr_bucket *old_first, *new_first;
4910 
4911 	trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4912 				     (unsigned long long)to_blk);
4913 
4914 	BUG_ON(start_bucket >= num_buckets);
4915 	if (start_bucket) {
4916 		num_buckets -= start_bucket;
4917 		last_blk += (start_bucket * blks_per_bucket);
4918 	}
4919 
4920 	/* The first bucket of the original extent */
4921 	old_first = ocfs2_xattr_bucket_new(inode);
4922 	/* The first bucket of the new extent */
4923 	new_first = ocfs2_xattr_bucket_new(inode);
4924 	if (!old_first || !new_first) {
4925 		ret = -ENOMEM;
4926 		mlog_errno(ret);
4927 		goto out;
4928 	}
4929 
4930 	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4931 	if (ret) {
4932 		mlog_errno(ret);
4933 		goto out;
4934 	}
4935 
4936 	/*
4937 	 * We need to update the first bucket of the old extent and all
4938 	 * the buckets going to the new extent.
4939 	 */
4940 	credits = ((num_buckets + 1) * blks_per_bucket);
4941 	ret = ocfs2_extend_trans(handle, credits);
4942 	if (ret) {
4943 		mlog_errno(ret);
4944 		goto out;
4945 	}
4946 
4947 	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4948 						OCFS2_JOURNAL_ACCESS_WRITE);
4949 	if (ret) {
4950 		mlog_errno(ret);
4951 		goto out;
4952 	}
4953 
4954 	for (i = 0; i < num_buckets; i++) {
4955 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4956 					    last_blk + (i * blks_per_bucket),
4957 					    to_blk + (i * blks_per_bucket),
4958 					    1);
4959 		if (ret) {
4960 			mlog_errno(ret);
4961 			goto out;
4962 		}
4963 	}
4964 
4965 	/*
4966 	 * Get the new bucket ready before we dirty anything
4967 	 * (This actually shouldn't fail, because we already dirtied
4968 	 * it once in ocfs2_cp_xattr_bucket()).
4969 	 */
4970 	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4971 	if (ret) {
4972 		mlog_errno(ret);
4973 		goto out;
4974 	}
4975 	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4976 						OCFS2_JOURNAL_ACCESS_WRITE);
4977 	if (ret) {
4978 		mlog_errno(ret);
4979 		goto out;
4980 	}
4981 
4982 	/* Now update the headers */
4983 	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4984 	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4985 
4986 	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4987 	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4988 
4989 	if (first_hash)
4990 		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4991 
4992 out:
4993 	ocfs2_xattr_bucket_free(new_first);
4994 	ocfs2_xattr_bucket_free(old_first);
4995 	return ret;
4996 }
4997 
4998 /*
4999  * Move some xattrs in this cluster to the new cluster.
5000  * This function should only be called when bucket size == cluster size.
5001  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
5002  */
5003 static int ocfs2_divide_xattr_cluster(struct inode *inode,
5004 				      handle_t *handle,
5005 				      u64 prev_blk,
5006 				      u64 new_blk,
5007 				      u32 *first_hash)
5008 {
5009 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5010 	int ret, credits = 2 * blk_per_bucket;
5011 
5012 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
5013 
5014 	ret = ocfs2_extend_trans(handle, credits);
5015 	if (ret) {
5016 		mlog_errno(ret);
5017 		return ret;
5018 	}
5019 
5020 	/* Move half of the xattr in start_blk to the next bucket. */
5021 	return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
5022 					  new_blk, first_hash, 1);
5023 }
5024 
5025 /*
5026  * Move some xattrs from the old cluster to the new one since they are not
5027  * contiguous in ocfs2 xattr tree.
5028  *
5029  * new_blk starts a new separate cluster, and we will move some xattrs from
5030  * prev_blk to it. v_start will be set as the first name hash value in this
5031  * new cluster so that it can be used as e_cpos during tree insertion and
5032  * don't collide with our original b-tree operations. first_bh and header_bh
5033  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
5034  * to extend the insert bucket.
5035  *
5036  * The problem is how much xattr should we move to the new one and when should
5037  * we update first_bh and header_bh?
5038  * 1. If cluster size > bucket size, that means the previous cluster has more
5039  *    than 1 bucket, so just move half nums of bucket into the new cluster and
5040  *    update the first_bh and header_bh if the insert bucket has been moved
5041  *    to the new cluster.
5042  * 2. If cluster_size == bucket_size:
5043  *    a) If the previous extent rec has more than one cluster and the insert
5044  *       place isn't in the last cluster, copy the entire last cluster to the
5045  *       new one. This time, we don't need to update the first_bh and header_bh
5046  *       since they will not be moved into the new cluster.
5047  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
5048  *       the new one. And we set the extend flag to zero if the insert place is
5049  *       moved into the new allocated cluster since no extend is needed.
5050  */
5051 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5052 					    handle_t *handle,
5053 					    struct ocfs2_xattr_bucket *first,
5054 					    struct ocfs2_xattr_bucket *target,
5055 					    u64 new_blk,
5056 					    u32 prev_clusters,
5057 					    u32 *v_start,
5058 					    int *extend)
5059 {
5060 	int ret;
5061 
5062 	trace_ocfs2_adjust_xattr_cross_cluster(
5063 			(unsigned long long)bucket_blkno(first),
5064 			(unsigned long long)new_blk, prev_clusters);
5065 
5066 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5067 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5068 							  handle,
5069 							  first, target,
5070 							  new_blk,
5071 							  prev_clusters,
5072 							  v_start);
5073 		if (ret)
5074 			mlog_errno(ret);
5075 	} else {
5076 		/* The start of the last cluster in the first extent */
5077 		u64 last_blk = bucket_blkno(first) +
5078 			((prev_clusters - 1) *
5079 			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
5080 
5081 		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5082 			ret = ocfs2_mv_xattr_buckets(inode, handle,
5083 						     bucket_blkno(first),
5084 						     last_blk, new_blk, 0,
5085 						     v_start);
5086 			if (ret)
5087 				mlog_errno(ret);
5088 		} else {
5089 			ret = ocfs2_divide_xattr_cluster(inode, handle,
5090 							 last_blk, new_blk,
5091 							 v_start);
5092 			if (ret)
5093 				mlog_errno(ret);
5094 
5095 			if ((bucket_blkno(target) == last_blk) && extend)
5096 				*extend = 0;
5097 		}
5098 	}
5099 
5100 	return ret;
5101 }
5102 
5103 /*
5104  * Add a new cluster for xattr storage.
5105  *
5106  * If the new cluster is contiguous with the previous one, it will be
5107  * appended to the same extent record, and num_clusters will be updated.
5108  * If not, we will insert a new extent for it and move some xattrs in
5109  * the last cluster into the new allocated one.
5110  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5111  * lose the benefits of hashing because we'll have to search large leaves.
5112  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5113  * if it's bigger).
5114  *
5115  * first_bh is the first block of the previous extent rec and header_bh
5116  * indicates the bucket we will insert the new xattrs. They will be updated
5117  * when the header_bh is moved into the new cluster.
5118  */
5119 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5120 				       struct buffer_head *root_bh,
5121 				       struct ocfs2_xattr_bucket *first,
5122 				       struct ocfs2_xattr_bucket *target,
5123 				       u32 *num_clusters,
5124 				       u32 prev_cpos,
5125 				       int *extend,
5126 				       struct ocfs2_xattr_set_ctxt *ctxt)
5127 {
5128 	int ret;
5129 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5130 	u32 prev_clusters = *num_clusters;
5131 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5132 	u64 block;
5133 	handle_t *handle = ctxt->handle;
5134 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5135 	struct ocfs2_extent_tree et;
5136 
5137 	trace_ocfs2_add_new_xattr_cluster_begin(
5138 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
5139 		(unsigned long long)bucket_blkno(first),
5140 		prev_cpos, prev_clusters);
5141 
5142 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5143 
5144 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5145 				      OCFS2_JOURNAL_ACCESS_WRITE);
5146 	if (ret < 0) {
5147 		mlog_errno(ret);
5148 		goto leave;
5149 	}
5150 
5151 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5152 				     clusters_to_add, &bit_off, &num_bits);
5153 	if (ret < 0) {
5154 		if (ret != -ENOSPC)
5155 			mlog_errno(ret);
5156 		goto leave;
5157 	}
5158 
5159 	BUG_ON(num_bits > clusters_to_add);
5160 
5161 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5162 	trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5163 
5164 	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5165 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5166 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5167 		/*
5168 		 * If this cluster is contiguous with the old one and
5169 		 * adding this new cluster, we don't surpass the limit of
5170 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5171 		 * initialized and used like other buckets in the previous
5172 		 * cluster.
5173 		 * So add it as a contiguous one. The caller will handle
5174 		 * its init process.
5175 		 */
5176 		v_start = prev_cpos + prev_clusters;
5177 		*num_clusters = prev_clusters + num_bits;
5178 	} else {
5179 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
5180 						       handle,
5181 						       first,
5182 						       target,
5183 						       block,
5184 						       prev_clusters,
5185 						       &v_start,
5186 						       extend);
5187 		if (ret) {
5188 			mlog_errno(ret);
5189 			goto leave;
5190 		}
5191 	}
5192 
5193 	trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5194 						 v_start, num_bits);
5195 	ret = ocfs2_insert_extent(handle, &et, v_start, block,
5196 				  num_bits, 0, ctxt->meta_ac);
5197 	if (ret < 0) {
5198 		mlog_errno(ret);
5199 		goto leave;
5200 	}
5201 
5202 	ocfs2_journal_dirty(handle, root_bh);
5203 
5204 leave:
5205 	return ret;
5206 }
5207 
5208 /*
5209  * We are given an extent.  'first' is the bucket at the very front of
5210  * the extent.  The extent has space for an additional bucket past
5211  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5212  * of the target bucket.  We wish to shift every bucket past the target
5213  * down one, filling in that additional space.  When we get back to the
5214  * target, we split the target between itself and the now-empty bucket
5215  * at target+1 (aka, target_blkno + blks_per_bucket).
5216  */
5217 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5218 				     handle_t *handle,
5219 				     struct ocfs2_xattr_bucket *first,
5220 				     u64 target_blk,
5221 				     u32 num_clusters)
5222 {
5223 	int ret, credits;
5224 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5225 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5226 	u64 end_blk;
5227 	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5228 
5229 	trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5230 					(unsigned long long)bucket_blkno(first),
5231 					num_clusters, new_bucket);
5232 
5233 	/* The extent must have room for an additional bucket */
5234 	BUG_ON(new_bucket >=
5235 	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5236 
5237 	/* end_blk points to the last existing bucket */
5238 	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5239 
5240 	/*
5241 	 * end_blk is the start of the last existing bucket.
5242 	 * Thus, (end_blk - target_blk) covers the target bucket and
5243 	 * every bucket after it up to, but not including, the last
5244 	 * existing bucket.  Then we add the last existing bucket, the
5245 	 * new bucket, and the first bucket (3 * blk_per_bucket).
5246 	 */
5247 	credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5248 	ret = ocfs2_extend_trans(handle, credits);
5249 	if (ret) {
5250 		mlog_errno(ret);
5251 		goto out;
5252 	}
5253 
5254 	ret = ocfs2_xattr_bucket_journal_access(handle, first,
5255 						OCFS2_JOURNAL_ACCESS_WRITE);
5256 	if (ret) {
5257 		mlog_errno(ret);
5258 		goto out;
5259 	}
5260 
5261 	while (end_blk != target_blk) {
5262 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5263 					    end_blk + blk_per_bucket, 0);
5264 		if (ret)
5265 			goto out;
5266 		end_blk -= blk_per_bucket;
5267 	}
5268 
5269 	/* Move half of the xattr in target_blkno to the next bucket. */
5270 	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5271 					target_blk + blk_per_bucket, NULL, 0);
5272 
5273 	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5274 	ocfs2_xattr_bucket_journal_dirty(handle, first);
5275 
5276 out:
5277 	return ret;
5278 }
5279 
5280 /*
5281  * Add new xattr bucket in an extent record and adjust the buckets
5282  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5283  * bucket we want to insert into.
5284  *
5285  * In the easy case, we will move all the buckets after target down by
5286  * one. Half of target's xattrs will be moved to the next bucket.
5287  *
5288  * If current cluster is full, we'll allocate a new one.  This may not
5289  * be contiguous.  The underlying calls will make sure that there is
5290  * space for the insert, shifting buckets around if necessary.
5291  * 'target' may be moved by those calls.
5292  */
5293 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5294 				      struct buffer_head *xb_bh,
5295 				      struct ocfs2_xattr_bucket *target,
5296 				      struct ocfs2_xattr_set_ctxt *ctxt)
5297 {
5298 	struct ocfs2_xattr_block *xb =
5299 			(struct ocfs2_xattr_block *)xb_bh->b_data;
5300 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5301 	struct ocfs2_extent_list *el = &xb_root->xt_list;
5302 	u32 name_hash =
5303 		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5304 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5305 	int ret, num_buckets, extend = 1;
5306 	u64 p_blkno;
5307 	u32 e_cpos, num_clusters;
5308 	/* The bucket at the front of the extent */
5309 	struct ocfs2_xattr_bucket *first;
5310 
5311 	trace_ocfs2_add_new_xattr_bucket(
5312 				(unsigned long long)bucket_blkno(target));
5313 
5314 	/* The first bucket of the original extent */
5315 	first = ocfs2_xattr_bucket_new(inode);
5316 	if (!first) {
5317 		ret = -ENOMEM;
5318 		mlog_errno(ret);
5319 		goto out;
5320 	}
5321 
5322 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5323 				  &num_clusters, el);
5324 	if (ret) {
5325 		mlog_errno(ret);
5326 		goto out;
5327 	}
5328 
5329 	ret = ocfs2_read_xattr_bucket(first, p_blkno);
5330 	if (ret) {
5331 		mlog_errno(ret);
5332 		goto out;
5333 	}
5334 
5335 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5336 	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5337 		/*
5338 		 * This can move first+target if the target bucket moves
5339 		 * to the new extent.
5340 		 */
5341 		ret = ocfs2_add_new_xattr_cluster(inode,
5342 						  xb_bh,
5343 						  first,
5344 						  target,
5345 						  &num_clusters,
5346 						  e_cpos,
5347 						  &extend,
5348 						  ctxt);
5349 		if (ret) {
5350 			mlog_errno(ret);
5351 			goto out;
5352 		}
5353 	}
5354 
5355 	if (extend) {
5356 		ret = ocfs2_extend_xattr_bucket(inode,
5357 						ctxt->handle,
5358 						first,
5359 						bucket_blkno(target),
5360 						num_clusters);
5361 		if (ret)
5362 			mlog_errno(ret);
5363 	}
5364 
5365 out:
5366 	ocfs2_xattr_bucket_free(first);
5367 
5368 	return ret;
5369 }
5370 
5371 /*
5372  * Truncate the specified xe_off entry in xattr bucket.
5373  * bucket is indicated by header_bh and len is the new length.
5374  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5375  *
5376  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5377  */
5378 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5379 					     struct ocfs2_xattr_bucket *bucket,
5380 					     int xe_off,
5381 					     int len,
5382 					     struct ocfs2_xattr_set_ctxt *ctxt)
5383 {
5384 	int ret, offset;
5385 	u64 value_blk;
5386 	struct ocfs2_xattr_entry *xe;
5387 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5388 	size_t blocksize = inode->i_sb->s_blocksize;
5389 	struct ocfs2_xattr_value_buf vb = {
5390 		.vb_access = ocfs2_journal_access,
5391 	};
5392 
5393 	xe = &xh->xh_entries[xe_off];
5394 
5395 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5396 
5397 	offset = le16_to_cpu(xe->xe_name_offset) +
5398 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
5399 
5400 	value_blk = offset / blocksize;
5401 
5402 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
5403 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5404 
5405 	vb.vb_bh = bucket->bu_bhs[value_blk];
5406 	BUG_ON(!vb.vb_bh);
5407 
5408 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5409 		(vb.vb_bh->b_data + offset % blocksize);
5410 
5411 	/*
5412 	 * From here on out we have to dirty the bucket.  The generic
5413 	 * value calls only modify one of the bucket's bhs, but we need
5414 	 * to send the bucket at once.  So if they error, they *could* have
5415 	 * modified something.  We have to assume they did, and dirty
5416 	 * the whole bucket.  This leaves us in a consistent state.
5417 	 */
5418 	trace_ocfs2_xattr_bucket_value_truncate(
5419 			(unsigned long long)bucket_blkno(bucket), xe_off, len);
5420 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5421 	if (ret) {
5422 		mlog_errno(ret);
5423 		goto out;
5424 	}
5425 
5426 	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5427 						OCFS2_JOURNAL_ACCESS_WRITE);
5428 	if (ret) {
5429 		mlog_errno(ret);
5430 		goto out;
5431 	}
5432 
5433 	xe->xe_value_size = cpu_to_le64(len);
5434 
5435 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5436 
5437 out:
5438 	return ret;
5439 }
5440 
5441 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5442 				  struct buffer_head *root_bh,
5443 				  u64 blkno,
5444 				  u32 cpos,
5445 				  u32 len,
5446 				  void *para)
5447 {
5448 	int ret;
5449 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5450 	struct inode *tl_inode = osb->osb_tl_inode;
5451 	handle_t *handle;
5452 	struct ocfs2_xattr_block *xb =
5453 			(struct ocfs2_xattr_block *)root_bh->b_data;
5454 	struct ocfs2_alloc_context *meta_ac = NULL;
5455 	struct ocfs2_cached_dealloc_ctxt dealloc;
5456 	struct ocfs2_extent_tree et;
5457 
5458 	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5459 					  ocfs2_delete_xattr_in_bucket, para);
5460 	if (ret) {
5461 		mlog_errno(ret);
5462 		return ret;
5463 	}
5464 
5465 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5466 
5467 	ocfs2_init_dealloc_ctxt(&dealloc);
5468 
5469 	trace_ocfs2_rm_xattr_cluster(
5470 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
5471 			(unsigned long long)blkno, cpos, len);
5472 
5473 	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5474 					       len);
5475 
5476 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5477 	if (ret) {
5478 		mlog_errno(ret);
5479 		return ret;
5480 	}
5481 
5482 	inode_lock(tl_inode);
5483 
5484 	if (ocfs2_truncate_log_needs_flush(osb)) {
5485 		ret = __ocfs2_flush_truncate_log(osb);
5486 		if (ret < 0) {
5487 			mlog_errno(ret);
5488 			goto out;
5489 		}
5490 	}
5491 
5492 	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5493 	if (IS_ERR(handle)) {
5494 		ret = -ENOMEM;
5495 		mlog_errno(ret);
5496 		goto out;
5497 	}
5498 
5499 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5500 				      OCFS2_JOURNAL_ACCESS_WRITE);
5501 	if (ret) {
5502 		mlog_errno(ret);
5503 		goto out_commit;
5504 	}
5505 
5506 	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5507 				  &dealloc);
5508 	if (ret) {
5509 		mlog_errno(ret);
5510 		goto out_commit;
5511 	}
5512 
5513 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5514 	ocfs2_journal_dirty(handle, root_bh);
5515 
5516 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5517 	if (ret)
5518 		mlog_errno(ret);
5519 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
5520 
5521 out_commit:
5522 	ocfs2_commit_trans(osb, handle);
5523 out:
5524 	ocfs2_schedule_truncate_log_flush(osb, 1);
5525 
5526 	inode_unlock(tl_inode);
5527 
5528 	if (meta_ac)
5529 		ocfs2_free_alloc_context(meta_ac);
5530 
5531 	ocfs2_run_deallocs(osb, &dealloc);
5532 
5533 	return ret;
5534 }
5535 
5536 /*
5537  * check whether the xattr bucket is filled up with the same hash value.
5538  * If we want to insert the xattr with the same hash, return -ENOSPC.
5539  * If we want to insert a xattr with different hash value, go ahead
5540  * and ocfs2_divide_xattr_bucket will handle this.
5541  */
5542 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5543 					      struct ocfs2_xattr_bucket *bucket,
5544 					      const char *name)
5545 {
5546 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5547 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5548 
5549 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5550 		return 0;
5551 
5552 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5553 	    xh->xh_entries[0].xe_name_hash) {
5554 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5555 		     "hash = %u\n",
5556 		     (unsigned long long)bucket_blkno(bucket),
5557 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5558 		return -ENOSPC;
5559 	}
5560 
5561 	return 0;
5562 }
5563 
5564 /*
5565  * Try to set the entry in the current bucket.  If we fail, the caller
5566  * will handle getting us another bucket.
5567  */
5568 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5569 					struct ocfs2_xattr_info *xi,
5570 					struct ocfs2_xattr_search *xs,
5571 					struct ocfs2_xattr_set_ctxt *ctxt)
5572 {
5573 	int ret;
5574 	struct ocfs2_xa_loc loc;
5575 
5576 	trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5577 
5578 	ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5579 				       xs->not_found ? NULL : xs->here);
5580 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5581 	if (!ret) {
5582 		xs->here = loc.xl_entry;
5583 		goto out;
5584 	}
5585 	if (ret != -ENOSPC) {
5586 		mlog_errno(ret);
5587 		goto out;
5588 	}
5589 
5590 	/* Ok, we need space.  Let's try defragmenting the bucket. */
5591 	ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5592 					xs->bucket);
5593 	if (ret) {
5594 		mlog_errno(ret);
5595 		goto out;
5596 	}
5597 
5598 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5599 	if (!ret) {
5600 		xs->here = loc.xl_entry;
5601 		goto out;
5602 	}
5603 	if (ret != -ENOSPC)
5604 		mlog_errno(ret);
5605 
5606 
5607 out:
5608 	return ret;
5609 }
5610 
5611 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5612 					     struct ocfs2_xattr_info *xi,
5613 					     struct ocfs2_xattr_search *xs,
5614 					     struct ocfs2_xattr_set_ctxt *ctxt)
5615 {
5616 	int ret;
5617 
5618 	trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5619 
5620 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5621 	if (!ret)
5622 		goto out;
5623 	if (ret != -ENOSPC) {
5624 		mlog_errno(ret);
5625 		goto out;
5626 	}
5627 
5628 	/* Ack, need more space.  Let's try to get another bucket! */
5629 
5630 	/*
5631 	 * We do not allow for overlapping ranges between buckets. And
5632 	 * the maximum number of collisions we will allow for then is
5633 	 * one bucket's worth, so check it here whether we need to
5634 	 * add a new bucket for the insert.
5635 	 */
5636 	ret = ocfs2_check_xattr_bucket_collision(inode,
5637 						 xs->bucket,
5638 						 xi->xi_name);
5639 	if (ret) {
5640 		mlog_errno(ret);
5641 		goto out;
5642 	}
5643 
5644 	ret = ocfs2_add_new_xattr_bucket(inode,
5645 					 xs->xattr_bh,
5646 					 xs->bucket,
5647 					 ctxt);
5648 	if (ret) {
5649 		mlog_errno(ret);
5650 		goto out;
5651 	}
5652 
5653 	/*
5654 	 * ocfs2_add_new_xattr_bucket() will have updated
5655 	 * xs->bucket if it moved, but it will not have updated
5656 	 * any of the other search fields.  Thus, we drop it and
5657 	 * re-search.  Everything should be cached, so it'll be
5658 	 * quick.
5659 	 */
5660 	ocfs2_xattr_bucket_relse(xs->bucket);
5661 	ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5662 					   xi->xi_name_index,
5663 					   xi->xi_name, xs);
5664 	if (ret && ret != -ENODATA)
5665 		goto out;
5666 	xs->not_found = ret;
5667 
5668 	/* Ok, we have a new bucket, let's try again */
5669 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5670 	if (ret && (ret != -ENOSPC))
5671 		mlog_errno(ret);
5672 
5673 out:
5674 	return ret;
5675 }
5676 
5677 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5678 					struct ocfs2_xattr_bucket *bucket,
5679 					void *para)
5680 {
5681 	int ret = 0, ref_credits;
5682 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5683 	u16 i;
5684 	struct ocfs2_xattr_entry *xe;
5685 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5686 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5687 	int credits = ocfs2_remove_extent_credits(osb->sb) +
5688 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5689 	struct ocfs2_xattr_value_root *xv;
5690 	struct ocfs2_rm_xattr_bucket_para *args =
5691 			(struct ocfs2_rm_xattr_bucket_para *)para;
5692 
5693 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5694 
5695 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5696 		xe = &xh->xh_entries[i];
5697 		if (ocfs2_xattr_is_local(xe))
5698 			continue;
5699 
5700 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5701 						      i, &xv, NULL);
5702 		if (ret) {
5703 			mlog_errno(ret);
5704 			break;
5705 		}
5706 
5707 		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5708 							 args->ref_ci,
5709 							 args->ref_root_bh,
5710 							 &ctxt.meta_ac,
5711 							 &ref_credits);
5712 
5713 		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5714 		if (IS_ERR(ctxt.handle)) {
5715 			ret = PTR_ERR(ctxt.handle);
5716 			mlog_errno(ret);
5717 			break;
5718 		}
5719 
5720 		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5721 							i, 0, &ctxt);
5722 
5723 		ocfs2_commit_trans(osb, ctxt.handle);
5724 		if (ctxt.meta_ac) {
5725 			ocfs2_free_alloc_context(ctxt.meta_ac);
5726 			ctxt.meta_ac = NULL;
5727 		}
5728 		if (ret) {
5729 			mlog_errno(ret);
5730 			break;
5731 		}
5732 	}
5733 
5734 	if (ctxt.meta_ac)
5735 		ocfs2_free_alloc_context(ctxt.meta_ac);
5736 	ocfs2_schedule_truncate_log_flush(osb, 1);
5737 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
5738 	return ret;
5739 }
5740 
5741 /*
5742  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5743  * or change the extent record flag), we need to recalculate
5744  * the metaecc for the whole bucket. So it is done here.
5745  *
5746  * Note:
5747  * We have to give the extra credits for the caller.
5748  */
5749 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5750 					    handle_t *handle,
5751 					    void *para)
5752 {
5753 	int ret;
5754 	struct ocfs2_xattr_bucket *bucket =
5755 			(struct ocfs2_xattr_bucket *)para;
5756 
5757 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5758 						OCFS2_JOURNAL_ACCESS_WRITE);
5759 	if (ret) {
5760 		mlog_errno(ret);
5761 		return ret;
5762 	}
5763 
5764 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5765 
5766 	return 0;
5767 }
5768 
5769 /*
5770  * Special action we need if the xattr value is refcounted.
5771  *
5772  * 1. If the xattr is refcounted, lock the tree.
5773  * 2. CoW the xattr if we are setting the new value and the value
5774  *    will be stored outside.
5775  * 3. In other case, decrease_refcount will work for us, so just
5776  *    lock the refcount tree, calculate the meta and credits is OK.
5777  *
5778  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5779  * currently CoW is a completed transaction, while this function
5780  * will also lock the allocators and let us deadlock. So we will
5781  * CoW the whole xattr value.
5782  */
5783 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5784 					struct ocfs2_dinode *di,
5785 					struct ocfs2_xattr_info *xi,
5786 					struct ocfs2_xattr_search *xis,
5787 					struct ocfs2_xattr_search *xbs,
5788 					struct ocfs2_refcount_tree **ref_tree,
5789 					int *meta_add,
5790 					int *credits)
5791 {
5792 	int ret = 0;
5793 	struct ocfs2_xattr_block *xb;
5794 	struct ocfs2_xattr_entry *xe;
5795 	char *base;
5796 	u32 p_cluster, num_clusters;
5797 	unsigned int ext_flags;
5798 	int name_offset, name_len;
5799 	struct ocfs2_xattr_value_buf vb;
5800 	struct ocfs2_xattr_bucket *bucket = NULL;
5801 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5802 	struct ocfs2_post_refcount refcount;
5803 	struct ocfs2_post_refcount *p = NULL;
5804 	struct buffer_head *ref_root_bh = NULL;
5805 
5806 	if (!xis->not_found) {
5807 		xe = xis->here;
5808 		name_offset = le16_to_cpu(xe->xe_name_offset);
5809 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5810 		base = xis->base;
5811 		vb.vb_bh = xis->inode_bh;
5812 		vb.vb_access = ocfs2_journal_access_di;
5813 	} else {
5814 		int i, block_off = 0;
5815 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5816 		xe = xbs->here;
5817 		name_offset = le16_to_cpu(xe->xe_name_offset);
5818 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5819 		i = xbs->here - xbs->header->xh_entries;
5820 
5821 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5822 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5823 							bucket_xh(xbs->bucket),
5824 							i, &block_off,
5825 							&name_offset);
5826 			if (ret) {
5827 				mlog_errno(ret);
5828 				goto out;
5829 			}
5830 			base = bucket_block(xbs->bucket, block_off);
5831 			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5832 			vb.vb_access = ocfs2_journal_access;
5833 
5834 			if (ocfs2_meta_ecc(osb)) {
5835 				/*create parameters for ocfs2_post_refcount. */
5836 				bucket = xbs->bucket;
5837 				refcount.credits = bucket->bu_blocks;
5838 				refcount.para = bucket;
5839 				refcount.func =
5840 					ocfs2_xattr_bucket_post_refcount;
5841 				p = &refcount;
5842 			}
5843 		} else {
5844 			base = xbs->base;
5845 			vb.vb_bh = xbs->xattr_bh;
5846 			vb.vb_access = ocfs2_journal_access_xb;
5847 		}
5848 	}
5849 
5850 	if (ocfs2_xattr_is_local(xe))
5851 		goto out;
5852 
5853 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5854 				(base + name_offset + name_len);
5855 
5856 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5857 				       &num_clusters, &vb.vb_xv->xr_list,
5858 				       &ext_flags);
5859 	if (ret) {
5860 		mlog_errno(ret);
5861 		goto out;
5862 	}
5863 
5864 	/*
5865 	 * We just need to check the 1st extent record, since we always
5866 	 * CoW the whole xattr. So there shouldn't be a xattr with
5867 	 * some REFCOUNT extent recs after the 1st one.
5868 	 */
5869 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5870 		goto out;
5871 
5872 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5873 				       1, ref_tree, &ref_root_bh);
5874 	if (ret) {
5875 		mlog_errno(ret);
5876 		goto out;
5877 	}
5878 
5879 	/*
5880 	 * If we are deleting the xattr or the new size will be stored inside,
5881 	 * cool, leave it there, the xattr truncate process will remove them
5882 	 * for us(it still needs the refcount tree lock and the meta, credits).
5883 	 * And the worse case is that every cluster truncate will split the
5884 	 * refcount tree, and make the original extent become 3. So we will need
5885 	 * 2 * cluster more extent recs at most.
5886 	 */
5887 	if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5888 
5889 		ret = ocfs2_refcounted_xattr_delete_need(inode,
5890 							 &(*ref_tree)->rf_ci,
5891 							 ref_root_bh, vb.vb_xv,
5892 							 meta_add, credits);
5893 		if (ret)
5894 			mlog_errno(ret);
5895 		goto out;
5896 	}
5897 
5898 	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5899 				       *ref_tree, ref_root_bh, 0,
5900 				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5901 	if (ret)
5902 		mlog_errno(ret);
5903 
5904 out:
5905 	brelse(ref_root_bh);
5906 	return ret;
5907 }
5908 
5909 /*
5910  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5911  * The physical clusters will be added to refcount tree.
5912  */
5913 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5914 				struct ocfs2_xattr_value_root *xv,
5915 				struct ocfs2_extent_tree *value_et,
5916 				struct ocfs2_caching_info *ref_ci,
5917 				struct buffer_head *ref_root_bh,
5918 				struct ocfs2_cached_dealloc_ctxt *dealloc,
5919 				struct ocfs2_post_refcount *refcount)
5920 {
5921 	int ret = 0;
5922 	u32 clusters = le32_to_cpu(xv->xr_clusters);
5923 	u32 cpos, p_cluster, num_clusters;
5924 	struct ocfs2_extent_list *el = &xv->xr_list;
5925 	unsigned int ext_flags;
5926 
5927 	cpos = 0;
5928 	while (cpos < clusters) {
5929 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5930 					       &num_clusters, el, &ext_flags);
5931 		if (ret) {
5932 			mlog_errno(ret);
5933 			break;
5934 		}
5935 
5936 		cpos += num_clusters;
5937 		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5938 			continue;
5939 
5940 		BUG_ON(!p_cluster);
5941 
5942 		ret = ocfs2_add_refcount_flag(inode, value_et,
5943 					      ref_ci, ref_root_bh,
5944 					      cpos - num_clusters,
5945 					      p_cluster, num_clusters,
5946 					      dealloc, refcount);
5947 		if (ret) {
5948 			mlog_errno(ret);
5949 			break;
5950 		}
5951 	}
5952 
5953 	return ret;
5954 }
5955 
5956 /*
5957  * Given a normal ocfs2_xattr_header, refcount all the entries which
5958  * have value stored outside.
5959  * Used for xattrs stored in inode and ocfs2_xattr_block.
5960  */
5961 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5962 				struct ocfs2_xattr_value_buf *vb,
5963 				struct ocfs2_xattr_header *header,
5964 				struct ocfs2_caching_info *ref_ci,
5965 				struct buffer_head *ref_root_bh,
5966 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5967 {
5968 
5969 	struct ocfs2_xattr_entry *xe;
5970 	struct ocfs2_xattr_value_root *xv;
5971 	struct ocfs2_extent_tree et;
5972 	int i, ret = 0;
5973 
5974 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5975 		xe = &header->xh_entries[i];
5976 
5977 		if (ocfs2_xattr_is_local(xe))
5978 			continue;
5979 
5980 		xv = (struct ocfs2_xattr_value_root *)((void *)header +
5981 			le16_to_cpu(xe->xe_name_offset) +
5982 			OCFS2_XATTR_SIZE(xe->xe_name_len));
5983 
5984 		vb->vb_xv = xv;
5985 		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5986 
5987 		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5988 							ref_ci, ref_root_bh,
5989 							dealloc, NULL);
5990 		if (ret) {
5991 			mlog_errno(ret);
5992 			break;
5993 		}
5994 	}
5995 
5996 	return ret;
5997 }
5998 
5999 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
6000 				struct buffer_head *fe_bh,
6001 				struct ocfs2_caching_info *ref_ci,
6002 				struct buffer_head *ref_root_bh,
6003 				struct ocfs2_cached_dealloc_ctxt *dealloc)
6004 {
6005 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6006 	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
6007 				(fe_bh->b_data + inode->i_sb->s_blocksize -
6008 				le16_to_cpu(di->i_xattr_inline_size));
6009 	struct ocfs2_xattr_value_buf vb = {
6010 		.vb_bh = fe_bh,
6011 		.vb_access = ocfs2_journal_access_di,
6012 	};
6013 
6014 	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6015 						  ref_ci, ref_root_bh, dealloc);
6016 }
6017 
6018 struct ocfs2_xattr_tree_value_refcount_para {
6019 	struct ocfs2_caching_info *ref_ci;
6020 	struct buffer_head *ref_root_bh;
6021 	struct ocfs2_cached_dealloc_ctxt *dealloc;
6022 };
6023 
6024 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
6025 					   struct ocfs2_xattr_bucket *bucket,
6026 					   int offset,
6027 					   struct ocfs2_xattr_value_root **xv,
6028 					   struct buffer_head **bh)
6029 {
6030 	int ret, block_off, name_offset;
6031 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
6032 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6033 	void *base;
6034 
6035 	ret = ocfs2_xattr_bucket_get_name_value(sb,
6036 						bucket_xh(bucket),
6037 						offset,
6038 						&block_off,
6039 						&name_offset);
6040 	if (ret) {
6041 		mlog_errno(ret);
6042 		goto out;
6043 	}
6044 
6045 	base = bucket_block(bucket, block_off);
6046 
6047 	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6048 			 OCFS2_XATTR_SIZE(xe->xe_name_len));
6049 
6050 	if (bh)
6051 		*bh = bucket->bu_bhs[block_off];
6052 out:
6053 	return ret;
6054 }
6055 
6056 /*
6057  * For a given xattr bucket, refcount all the entries which
6058  * have value stored outside.
6059  */
6060 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6061 					     struct ocfs2_xattr_bucket *bucket,
6062 					     void *para)
6063 {
6064 	int i, ret = 0;
6065 	struct ocfs2_extent_tree et;
6066 	struct ocfs2_xattr_tree_value_refcount_para *ref =
6067 			(struct ocfs2_xattr_tree_value_refcount_para *)para;
6068 	struct ocfs2_xattr_header *xh =
6069 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6070 	struct ocfs2_xattr_entry *xe;
6071 	struct ocfs2_xattr_value_buf vb = {
6072 		.vb_access = ocfs2_journal_access,
6073 	};
6074 	struct ocfs2_post_refcount refcount = {
6075 		.credits = bucket->bu_blocks,
6076 		.para = bucket,
6077 		.func = ocfs2_xattr_bucket_post_refcount,
6078 	};
6079 	struct ocfs2_post_refcount *p = NULL;
6080 
6081 	/* We only need post_refcount if we support metaecc. */
6082 	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6083 		p = &refcount;
6084 
6085 	trace_ocfs2_xattr_bucket_value_refcount(
6086 				(unsigned long long)bucket_blkno(bucket),
6087 				le16_to_cpu(xh->xh_count));
6088 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6089 		xe = &xh->xh_entries[i];
6090 
6091 		if (ocfs2_xattr_is_local(xe))
6092 			continue;
6093 
6094 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6095 						      &vb.vb_xv, &vb.vb_bh);
6096 		if (ret) {
6097 			mlog_errno(ret);
6098 			break;
6099 		}
6100 
6101 		ocfs2_init_xattr_value_extent_tree(&et,
6102 						   INODE_CACHE(inode), &vb);
6103 
6104 		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6105 							&et, ref->ref_ci,
6106 							ref->ref_root_bh,
6107 							ref->dealloc, p);
6108 		if (ret) {
6109 			mlog_errno(ret);
6110 			break;
6111 		}
6112 	}
6113 
6114 	return ret;
6115 
6116 }
6117 
6118 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6119 				     struct buffer_head *root_bh,
6120 				     u64 blkno, u32 cpos, u32 len, void *para)
6121 {
6122 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6123 					   ocfs2_xattr_bucket_value_refcount,
6124 					   para);
6125 }
6126 
6127 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6128 				struct buffer_head *blk_bh,
6129 				struct ocfs2_caching_info *ref_ci,
6130 				struct buffer_head *ref_root_bh,
6131 				struct ocfs2_cached_dealloc_ctxt *dealloc)
6132 {
6133 	int ret = 0;
6134 	struct ocfs2_xattr_block *xb =
6135 				(struct ocfs2_xattr_block *)blk_bh->b_data;
6136 
6137 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6138 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6139 		struct ocfs2_xattr_value_buf vb = {
6140 			.vb_bh = blk_bh,
6141 			.vb_access = ocfs2_journal_access_xb,
6142 		};
6143 
6144 		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6145 							 ref_ci, ref_root_bh,
6146 							 dealloc);
6147 	} else {
6148 		struct ocfs2_xattr_tree_value_refcount_para para = {
6149 			.ref_ci = ref_ci,
6150 			.ref_root_bh = ref_root_bh,
6151 			.dealloc = dealloc,
6152 		};
6153 
6154 		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6155 						ocfs2_refcount_xattr_tree_rec,
6156 						&para);
6157 	}
6158 
6159 	return ret;
6160 }
6161 
6162 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6163 				     struct buffer_head *fe_bh,
6164 				     struct ocfs2_caching_info *ref_ci,
6165 				     struct buffer_head *ref_root_bh,
6166 				     struct ocfs2_cached_dealloc_ctxt *dealloc)
6167 {
6168 	int ret = 0;
6169 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
6170 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6171 	struct buffer_head *blk_bh = NULL;
6172 
6173 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6174 		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6175 							 ref_ci, ref_root_bh,
6176 							 dealloc);
6177 		if (ret) {
6178 			mlog_errno(ret);
6179 			goto out;
6180 		}
6181 	}
6182 
6183 	if (!di->i_xattr_loc)
6184 		goto out;
6185 
6186 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6187 				     &blk_bh);
6188 	if (ret < 0) {
6189 		mlog_errno(ret);
6190 		goto out;
6191 	}
6192 
6193 	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6194 						ref_root_bh, dealloc);
6195 	if (ret)
6196 		mlog_errno(ret);
6197 
6198 	brelse(blk_bh);
6199 out:
6200 
6201 	return ret;
6202 }
6203 
6204 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6205 /*
6206  * Store the information we need in xattr reflink.
6207  * old_bh and new_bh are inode bh for the old and new inode.
6208  */
6209 struct ocfs2_xattr_reflink {
6210 	struct inode *old_inode;
6211 	struct inode *new_inode;
6212 	struct buffer_head *old_bh;
6213 	struct buffer_head *new_bh;
6214 	struct ocfs2_caching_info *ref_ci;
6215 	struct buffer_head *ref_root_bh;
6216 	struct ocfs2_cached_dealloc_ctxt *dealloc;
6217 	should_xattr_reflinked *xattr_reflinked;
6218 };
6219 
6220 /*
6221  * Given a xattr header and xe offset,
6222  * return the proper xv and the corresponding bh.
6223  * xattr in inode, block and xattr tree have different implementations.
6224  */
6225 typedef int (get_xattr_value_root)(struct super_block *sb,
6226 				   struct buffer_head *bh,
6227 				   struct ocfs2_xattr_header *xh,
6228 				   int offset,
6229 				   struct ocfs2_xattr_value_root **xv,
6230 				   struct buffer_head **ret_bh,
6231 				   void *para);
6232 
6233 /*
6234  * Calculate all the xattr value root metadata stored in this xattr header and
6235  * credits we need if we create them from the scratch.
6236  * We use get_xattr_value_root so that all types of xattr container can use it.
6237  */
6238 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6239 					     struct buffer_head *bh,
6240 					     struct ocfs2_xattr_header *xh,
6241 					     int *metas, int *credits,
6242 					     int *num_recs,
6243 					     get_xattr_value_root *func,
6244 					     void *para)
6245 {
6246 	int i, ret = 0;
6247 	struct ocfs2_xattr_value_root *xv;
6248 	struct ocfs2_xattr_entry *xe;
6249 
6250 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6251 		xe = &xh->xh_entries[i];
6252 		if (ocfs2_xattr_is_local(xe))
6253 			continue;
6254 
6255 		ret = func(sb, bh, xh, i, &xv, NULL, para);
6256 		if (ret) {
6257 			mlog_errno(ret);
6258 			break;
6259 		}
6260 
6261 		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6262 			  le16_to_cpu(xv->xr_list.l_next_free_rec);
6263 
6264 		*credits += ocfs2_calc_extend_credits(sb,
6265 						&def_xv.xv.xr_list);
6266 
6267 		/*
6268 		 * If the value is a tree with depth > 1, We don't go deep
6269 		 * to the extent block, so just calculate a maximum record num.
6270 		 */
6271 		if (!xv->xr_list.l_tree_depth)
6272 			*num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6273 		else
6274 			*num_recs += ocfs2_clusters_for_bytes(sb,
6275 							      XATTR_SIZE_MAX);
6276 	}
6277 
6278 	return ret;
6279 }
6280 
6281 /* Used by xattr inode and block to return the right xv and buffer_head. */
6282 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6283 				      struct buffer_head *bh,
6284 				      struct ocfs2_xattr_header *xh,
6285 				      int offset,
6286 				      struct ocfs2_xattr_value_root **xv,
6287 				      struct buffer_head **ret_bh,
6288 				      void *para)
6289 {
6290 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6291 
6292 	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6293 		le16_to_cpu(xe->xe_name_offset) +
6294 		OCFS2_XATTR_SIZE(xe->xe_name_len));
6295 
6296 	if (ret_bh)
6297 		*ret_bh = bh;
6298 
6299 	return 0;
6300 }
6301 
6302 /*
6303  * Lock the meta_ac and calculate how much credits we need for reflink xattrs.
6304  * It is only used for inline xattr and xattr block.
6305  */
6306 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6307 					struct ocfs2_xattr_header *xh,
6308 					struct buffer_head *ref_root_bh,
6309 					int *credits,
6310 					struct ocfs2_alloc_context **meta_ac)
6311 {
6312 	int ret, meta_add = 0, num_recs = 0;
6313 	struct ocfs2_refcount_block *rb =
6314 			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
6315 
6316 	*credits = 0;
6317 
6318 	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6319 						&meta_add, credits, &num_recs,
6320 						ocfs2_get_xattr_value_root,
6321 						NULL);
6322 	if (ret) {
6323 		mlog_errno(ret);
6324 		goto out;
6325 	}
6326 
6327 	/*
6328 	 * We need to add/modify num_recs in refcount tree, so just calculate
6329 	 * an approximate number we need for refcount tree change.
6330 	 * Sometimes we need to split the tree, and after split,  half recs
6331 	 * will be moved to the new block, and a new block can only provide
6332 	 * half number of recs. So we multiple new blocks by 2.
6333 	 */
6334 	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6335 	meta_add += num_recs;
6336 	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6337 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6338 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6339 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6340 	else
6341 		*credits += 1;
6342 
6343 	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6344 	if (ret)
6345 		mlog_errno(ret);
6346 
6347 out:
6348 	return ret;
6349 }
6350 
6351 /*
6352  * Given a xattr header, reflink all the xattrs in this container.
6353  * It can be used for inode, block and bucket.
6354  *
6355  * NOTE:
6356  * Before we call this function, the caller has memcpy the xattr in
6357  * old_xh to the new_xh.
6358  *
6359  * If args.xattr_reflinked is set, call it to decide whether the xe should
6360  * be reflinked or not. If not, remove it from the new xattr header.
6361  */
6362 static int ocfs2_reflink_xattr_header(handle_t *handle,
6363 				      struct ocfs2_xattr_reflink *args,
6364 				      struct buffer_head *old_bh,
6365 				      struct ocfs2_xattr_header *xh,
6366 				      struct buffer_head *new_bh,
6367 				      struct ocfs2_xattr_header *new_xh,
6368 				      struct ocfs2_xattr_value_buf *vb,
6369 				      struct ocfs2_alloc_context *meta_ac,
6370 				      get_xattr_value_root *func,
6371 				      void *para)
6372 {
6373 	int ret = 0, i, j;
6374 	struct super_block *sb = args->old_inode->i_sb;
6375 	struct buffer_head *value_bh;
6376 	struct ocfs2_xattr_entry *xe, *last;
6377 	struct ocfs2_xattr_value_root *xv, *new_xv;
6378 	struct ocfs2_extent_tree data_et;
6379 	u32 clusters, cpos, p_cluster, num_clusters;
6380 	unsigned int ext_flags = 0;
6381 
6382 	trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6383 					 le16_to_cpu(xh->xh_count));
6384 
6385 	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1;
6386 	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6387 		xe = &xh->xh_entries[i];
6388 
6389 		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6390 			xe = &new_xh->xh_entries[j];
6391 
6392 			le16_add_cpu(&new_xh->xh_count, -1);
6393 			if (new_xh->xh_count) {
6394 				memmove(xe, xe + 1,
6395 					(void *)last - (void *)xe);
6396 				memset(last, 0,
6397 				       sizeof(struct ocfs2_xattr_entry));
6398 				last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1;
6399 			} else {
6400 				memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
6401 				last = NULL;
6402 			}
6403 
6404 			/*
6405 			 * We don't want j to increase in the next round since
6406 			 * it is already moved ahead.
6407 			 */
6408 			j--;
6409 			continue;
6410 		}
6411 
6412 		if (ocfs2_xattr_is_local(xe))
6413 			continue;
6414 
6415 		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6416 		if (ret) {
6417 			mlog_errno(ret);
6418 			break;
6419 		}
6420 
6421 		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6422 		if (ret) {
6423 			mlog_errno(ret);
6424 			break;
6425 		}
6426 
6427 		/*
6428 		 * For the xattr which has l_tree_depth = 0, all the extent
6429 		 * recs have already be copied to the new xh with the
6430 		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6431 		 * increase the refount count int the refcount tree.
6432 		 *
6433 		 * For the xattr which has l_tree_depth > 0, we need
6434 		 * to initialize it to the empty default value root,
6435 		 * and then insert the extents one by one.
6436 		 */
6437 		if (xv->xr_list.l_tree_depth) {
6438 			memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE);
6439 			vb->vb_xv = new_xv;
6440 			vb->vb_bh = value_bh;
6441 			ocfs2_init_xattr_value_extent_tree(&data_et,
6442 					INODE_CACHE(args->new_inode), vb);
6443 		}
6444 
6445 		clusters = le32_to_cpu(xv->xr_clusters);
6446 		cpos = 0;
6447 		while (cpos < clusters) {
6448 			ret = ocfs2_xattr_get_clusters(args->old_inode,
6449 						       cpos,
6450 						       &p_cluster,
6451 						       &num_clusters,
6452 						       &xv->xr_list,
6453 						       &ext_flags);
6454 			if (ret) {
6455 				mlog_errno(ret);
6456 				goto out;
6457 			}
6458 
6459 			BUG_ON(!p_cluster);
6460 
6461 			if (xv->xr_list.l_tree_depth) {
6462 				ret = ocfs2_insert_extent(handle,
6463 						&data_et, cpos,
6464 						ocfs2_clusters_to_blocks(
6465 							args->old_inode->i_sb,
6466 							p_cluster),
6467 						num_clusters, ext_flags,
6468 						meta_ac);
6469 				if (ret) {
6470 					mlog_errno(ret);
6471 					goto out;
6472 				}
6473 			}
6474 
6475 			ret = ocfs2_increase_refcount(handle, args->ref_ci,
6476 						      args->ref_root_bh,
6477 						      p_cluster, num_clusters,
6478 						      meta_ac, args->dealloc);
6479 			if (ret) {
6480 				mlog_errno(ret);
6481 				goto out;
6482 			}
6483 
6484 			cpos += num_clusters;
6485 		}
6486 	}
6487 
6488 out:
6489 	return ret;
6490 }
6491 
6492 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6493 {
6494 	int ret = 0, credits = 0;
6495 	handle_t *handle;
6496 	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6497 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6498 	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6499 	int header_off = osb->sb->s_blocksize - inline_size;
6500 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6501 					(args->old_bh->b_data + header_off);
6502 	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6503 					(args->new_bh->b_data + header_off);
6504 	struct ocfs2_alloc_context *meta_ac = NULL;
6505 	struct ocfs2_inode_info *new_oi;
6506 	struct ocfs2_dinode *new_di;
6507 	struct ocfs2_xattr_value_buf vb = {
6508 		.vb_bh = args->new_bh,
6509 		.vb_access = ocfs2_journal_access_di,
6510 	};
6511 
6512 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6513 						  &credits, &meta_ac);
6514 	if (ret) {
6515 		mlog_errno(ret);
6516 		goto out;
6517 	}
6518 
6519 	handle = ocfs2_start_trans(osb, credits);
6520 	if (IS_ERR(handle)) {
6521 		ret = PTR_ERR(handle);
6522 		mlog_errno(ret);
6523 		goto out;
6524 	}
6525 
6526 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6527 				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6528 	if (ret) {
6529 		mlog_errno(ret);
6530 		goto out_commit;
6531 	}
6532 
6533 	memcpy(args->new_bh->b_data + header_off,
6534 	       args->old_bh->b_data + header_off, inline_size);
6535 
6536 	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6537 	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6538 
6539 	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6540 					 args->new_bh, new_xh, &vb, meta_ac,
6541 					 ocfs2_get_xattr_value_root, NULL);
6542 	if (ret) {
6543 		mlog_errno(ret);
6544 		goto out_commit;
6545 	}
6546 
6547 	new_oi = OCFS2_I(args->new_inode);
6548 
6549 	spin_lock(&new_oi->ip_lock);
6550 	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6551 	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6552 	spin_unlock(&new_oi->ip_lock);
6553 
6554 	ocfs2_journal_dirty(handle, args->new_bh);
6555 
6556 out_commit:
6557 	ocfs2_commit_trans(osb, handle);
6558 
6559 out:
6560 	if (meta_ac)
6561 		ocfs2_free_alloc_context(meta_ac);
6562 	return ret;
6563 }
6564 
6565 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6566 					  struct buffer_head *fe_bh,
6567 					  struct buffer_head **ret_bh,
6568 					  int indexed)
6569 {
6570 	int ret;
6571 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6572 	struct ocfs2_xattr_set_ctxt ctxt;
6573 
6574 	memset(&ctxt, 0, sizeof(ctxt));
6575 	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6576 	if (ret < 0) {
6577 		mlog_errno(ret);
6578 		return ret;
6579 	}
6580 
6581 	ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6582 	if (IS_ERR(ctxt.handle)) {
6583 		ret = PTR_ERR(ctxt.handle);
6584 		mlog_errno(ret);
6585 		goto out;
6586 	}
6587 
6588 	trace_ocfs2_create_empty_xattr_block(
6589 				(unsigned long long)fe_bh->b_blocknr, indexed);
6590 	ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6591 				       ret_bh);
6592 	if (ret)
6593 		mlog_errno(ret);
6594 
6595 	ocfs2_commit_trans(osb, ctxt.handle);
6596 out:
6597 	ocfs2_free_alloc_context(ctxt.meta_ac);
6598 	return ret;
6599 }
6600 
6601 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6602 				     struct buffer_head *blk_bh,
6603 				     struct buffer_head *new_blk_bh)
6604 {
6605 	int ret = 0, credits = 0;
6606 	handle_t *handle;
6607 	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6608 	struct ocfs2_dinode *new_di;
6609 	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6610 	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6611 	struct ocfs2_xattr_block *xb =
6612 			(struct ocfs2_xattr_block *)blk_bh->b_data;
6613 	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6614 	struct ocfs2_xattr_block *new_xb =
6615 			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
6616 	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6617 	struct ocfs2_alloc_context *meta_ac;
6618 	struct ocfs2_xattr_value_buf vb = {
6619 		.vb_bh = new_blk_bh,
6620 		.vb_access = ocfs2_journal_access_xb,
6621 	};
6622 
6623 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6624 						  &credits, &meta_ac);
6625 	if (ret) {
6626 		mlog_errno(ret);
6627 		return ret;
6628 	}
6629 
6630 	/* One more credits in case we need to add xattr flags in new inode. */
6631 	handle = ocfs2_start_trans(osb, credits + 1);
6632 	if (IS_ERR(handle)) {
6633 		ret = PTR_ERR(handle);
6634 		mlog_errno(ret);
6635 		goto out;
6636 	}
6637 
6638 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6639 		ret = ocfs2_journal_access_di(handle,
6640 					      INODE_CACHE(args->new_inode),
6641 					      args->new_bh,
6642 					      OCFS2_JOURNAL_ACCESS_WRITE);
6643 		if (ret) {
6644 			mlog_errno(ret);
6645 			goto out_commit;
6646 		}
6647 	}
6648 
6649 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6650 				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6651 	if (ret) {
6652 		mlog_errno(ret);
6653 		goto out_commit;
6654 	}
6655 
6656 	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6657 	       osb->sb->s_blocksize - header_off);
6658 
6659 	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6660 					 new_blk_bh, new_xh, &vb, meta_ac,
6661 					 ocfs2_get_xattr_value_root, NULL);
6662 	if (ret) {
6663 		mlog_errno(ret);
6664 		goto out_commit;
6665 	}
6666 
6667 	ocfs2_journal_dirty(handle, new_blk_bh);
6668 
6669 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6670 		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6671 		spin_lock(&new_oi->ip_lock);
6672 		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6673 		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6674 		spin_unlock(&new_oi->ip_lock);
6675 
6676 		ocfs2_journal_dirty(handle, args->new_bh);
6677 	}
6678 
6679 out_commit:
6680 	ocfs2_commit_trans(osb, handle);
6681 
6682 out:
6683 	ocfs2_free_alloc_context(meta_ac);
6684 	return ret;
6685 }
6686 
6687 struct ocfs2_reflink_xattr_tree_args {
6688 	struct ocfs2_xattr_reflink *reflink;
6689 	struct buffer_head *old_blk_bh;
6690 	struct buffer_head *new_blk_bh;
6691 	struct ocfs2_xattr_bucket *old_bucket;
6692 	struct ocfs2_xattr_bucket *new_bucket;
6693 };
6694 
6695 /*
6696  * NOTE:
6697  * We have to handle the case that both old bucket and new bucket
6698  * will call this function to get the right ret_bh.
6699  * So The caller must give us the right bh.
6700  */
6701 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6702 					struct buffer_head *bh,
6703 					struct ocfs2_xattr_header *xh,
6704 					int offset,
6705 					struct ocfs2_xattr_value_root **xv,
6706 					struct buffer_head **ret_bh,
6707 					void *para)
6708 {
6709 	struct ocfs2_reflink_xattr_tree_args *args =
6710 			(struct ocfs2_reflink_xattr_tree_args *)para;
6711 	struct ocfs2_xattr_bucket *bucket;
6712 
6713 	if (bh == args->old_bucket->bu_bhs[0])
6714 		bucket = args->old_bucket;
6715 	else
6716 		bucket = args->new_bucket;
6717 
6718 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6719 					       xv, ret_bh);
6720 }
6721 
6722 struct ocfs2_value_tree_metas {
6723 	int num_metas;
6724 	int credits;
6725 	int num_recs;
6726 };
6727 
6728 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6729 					struct buffer_head *bh,
6730 					struct ocfs2_xattr_header *xh,
6731 					int offset,
6732 					struct ocfs2_xattr_value_root **xv,
6733 					struct buffer_head **ret_bh,
6734 					void *para)
6735 {
6736 	struct ocfs2_xattr_bucket *bucket =
6737 				(struct ocfs2_xattr_bucket *)para;
6738 
6739 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6740 					       xv, ret_bh);
6741 }
6742 
6743 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6744 				      struct ocfs2_xattr_bucket *bucket,
6745 				      void *para)
6746 {
6747 	struct ocfs2_value_tree_metas *metas =
6748 			(struct ocfs2_value_tree_metas *)para;
6749 	struct ocfs2_xattr_header *xh =
6750 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6751 
6752 	/* Add the credits for this bucket first. */
6753 	metas->credits += bucket->bu_blocks;
6754 	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6755 					xh, &metas->num_metas,
6756 					&metas->credits, &metas->num_recs,
6757 					ocfs2_value_tree_metas_in_bucket,
6758 					bucket);
6759 }
6760 
6761 /*
6762  * Given a xattr extent rec starting from blkno and having len clusters,
6763  * iterate all the buckets calculate how much metadata we need for reflinking
6764  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6765  */
6766 static int ocfs2_lock_reflink_xattr_rec_allocators(
6767 				struct ocfs2_reflink_xattr_tree_args *args,
6768 				struct ocfs2_extent_tree *xt_et,
6769 				u64 blkno, u32 len, int *credits,
6770 				struct ocfs2_alloc_context **meta_ac,
6771 				struct ocfs2_alloc_context **data_ac)
6772 {
6773 	int ret, num_free_extents;
6774 	struct ocfs2_value_tree_metas metas;
6775 	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6776 	struct ocfs2_refcount_block *rb;
6777 
6778 	memset(&metas, 0, sizeof(metas));
6779 
6780 	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6781 					  ocfs2_calc_value_tree_metas, &metas);
6782 	if (ret) {
6783 		mlog_errno(ret);
6784 		goto out;
6785 	}
6786 
6787 	*credits = metas.credits;
6788 
6789 	/*
6790 	 * Calculate we need for refcount tree change.
6791 	 *
6792 	 * We need to add/modify num_recs in refcount tree, so just calculate
6793 	 * an approximate number we need for refcount tree change.
6794 	 * Sometimes we need to split the tree, and after split,  half recs
6795 	 * will be moved to the new block, and a new block can only provide
6796 	 * half number of recs. So we multiple new blocks by 2.
6797 	 * In the end, we have to add credits for modifying the already
6798 	 * existed refcount block.
6799 	 */
6800 	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6801 	metas.num_recs =
6802 		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6803 		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6804 	metas.num_metas += metas.num_recs;
6805 	*credits += metas.num_recs +
6806 		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6807 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6808 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6809 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6810 	else
6811 		*credits += 1;
6812 
6813 	/* count in the xattr tree change. */
6814 	num_free_extents = ocfs2_num_free_extents(xt_et);
6815 	if (num_free_extents < 0) {
6816 		ret = num_free_extents;
6817 		mlog_errno(ret);
6818 		goto out;
6819 	}
6820 
6821 	if (num_free_extents < len)
6822 		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6823 
6824 	*credits += ocfs2_calc_extend_credits(osb->sb,
6825 					      xt_et->et_root_el);
6826 
6827 	if (metas.num_metas) {
6828 		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6829 							meta_ac);
6830 		if (ret) {
6831 			mlog_errno(ret);
6832 			goto out;
6833 		}
6834 	}
6835 
6836 	if (len) {
6837 		ret = ocfs2_reserve_clusters(osb, len, data_ac);
6838 		if (ret)
6839 			mlog_errno(ret);
6840 	}
6841 out:
6842 	if (ret) {
6843 		if (*meta_ac) {
6844 			ocfs2_free_alloc_context(*meta_ac);
6845 			*meta_ac = NULL;
6846 		}
6847 	}
6848 
6849 	return ret;
6850 }
6851 
6852 static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6853 				u64 blkno, u64 new_blkno, u32 clusters,
6854 				u32 *cpos, int num_buckets,
6855 				struct ocfs2_alloc_context *meta_ac,
6856 				struct ocfs2_alloc_context *data_ac,
6857 				struct ocfs2_reflink_xattr_tree_args *args)
6858 {
6859 	int i, j, ret = 0;
6860 	struct super_block *sb = args->reflink->old_inode->i_sb;
6861 	int bpb = args->old_bucket->bu_blocks;
6862 	struct ocfs2_xattr_value_buf vb = {
6863 		.vb_access = ocfs2_journal_access,
6864 	};
6865 
6866 	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6867 		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6868 		if (ret) {
6869 			mlog_errno(ret);
6870 			break;
6871 		}
6872 
6873 		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
6874 		if (ret) {
6875 			mlog_errno(ret);
6876 			break;
6877 		}
6878 
6879 		ret = ocfs2_xattr_bucket_journal_access(handle,
6880 						args->new_bucket,
6881 						OCFS2_JOURNAL_ACCESS_CREATE);
6882 		if (ret) {
6883 			mlog_errno(ret);
6884 			break;
6885 		}
6886 
6887 		for (j = 0; j < bpb; j++)
6888 			memcpy(bucket_block(args->new_bucket, j),
6889 			       bucket_block(args->old_bucket, j),
6890 			       sb->s_blocksize);
6891 
6892 		/*
6893 		 * Record the start cpos so that we can use it to initialize
6894 		 * our xattr tree we also set the xh_num_bucket for the new
6895 		 * bucket.
6896 		 */
6897 		if (i == 0) {
6898 			*cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6899 					    xh_entries[0].xe_name_hash);
6900 			bucket_xh(args->new_bucket)->xh_num_buckets =
6901 				cpu_to_le16(num_buckets);
6902 		}
6903 
6904 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6905 
6906 		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6907 					args->old_bucket->bu_bhs[0],
6908 					bucket_xh(args->old_bucket),
6909 					args->new_bucket->bu_bhs[0],
6910 					bucket_xh(args->new_bucket),
6911 					&vb, meta_ac,
6912 					ocfs2_get_reflink_xattr_value_root,
6913 					args);
6914 		if (ret) {
6915 			mlog_errno(ret);
6916 			break;
6917 		}
6918 
6919 		/*
6920 		 * Re-access and dirty the bucket to calculate metaecc.
6921 		 * Because we may extend the transaction in reflink_xattr_header
6922 		 * which will let the already accessed block gone.
6923 		 */
6924 		ret = ocfs2_xattr_bucket_journal_access(handle,
6925 						args->new_bucket,
6926 						OCFS2_JOURNAL_ACCESS_WRITE);
6927 		if (ret) {
6928 			mlog_errno(ret);
6929 			break;
6930 		}
6931 
6932 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6933 
6934 		ocfs2_xattr_bucket_relse(args->old_bucket);
6935 		ocfs2_xattr_bucket_relse(args->new_bucket);
6936 	}
6937 
6938 	ocfs2_xattr_bucket_relse(args->old_bucket);
6939 	ocfs2_xattr_bucket_relse(args->new_bucket);
6940 	return ret;
6941 }
6942 
6943 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6944 				struct inode *inode,
6945 				struct ocfs2_reflink_xattr_tree_args *args,
6946 				struct ocfs2_extent_tree *et,
6947 				struct ocfs2_alloc_context *meta_ac,
6948 				struct ocfs2_alloc_context *data_ac,
6949 				u64 blkno, u32 cpos, u32 len)
6950 {
6951 	int ret, first_inserted = 0;
6952 	u32 p_cluster, num_clusters, reflink_cpos = 0;
6953 	u64 new_blkno;
6954 	unsigned int num_buckets, reflink_buckets;
6955 	unsigned int bpc =
6956 		ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6957 
6958 	ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6959 	if (ret) {
6960 		mlog_errno(ret);
6961 		goto out;
6962 	}
6963 	num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6964 	ocfs2_xattr_bucket_relse(args->old_bucket);
6965 
6966 	while (len && num_buckets) {
6967 		ret = ocfs2_claim_clusters(handle, data_ac,
6968 					   1, &p_cluster, &num_clusters);
6969 		if (ret) {
6970 			mlog_errno(ret);
6971 			goto out;
6972 		}
6973 
6974 		new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6975 		reflink_buckets = min(num_buckets, bpc * num_clusters);
6976 
6977 		ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6978 						 new_blkno, num_clusters,
6979 						 &reflink_cpos, reflink_buckets,
6980 						 meta_ac, data_ac, args);
6981 		if (ret) {
6982 			mlog_errno(ret);
6983 			goto out;
6984 		}
6985 
6986 		/*
6987 		 * For the 1st allocated cluster, we make it use the same cpos
6988 		 * so that the xattr tree looks the same as the original one
6989 		 * in the most case.
6990 		 */
6991 		if (!first_inserted) {
6992 			reflink_cpos = cpos;
6993 			first_inserted = 1;
6994 		}
6995 		ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6996 					  num_clusters, 0, meta_ac);
6997 		if (ret)
6998 			mlog_errno(ret);
6999 
7000 		trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
7001 						  num_clusters, reflink_cpos);
7002 
7003 		len -= num_clusters;
7004 		blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
7005 		num_buckets -= reflink_buckets;
7006 	}
7007 out:
7008 	return ret;
7009 }
7010 
7011 /*
7012  * Create the same xattr extent record in the new inode's xattr tree.
7013  */
7014 static int ocfs2_reflink_xattr_rec(struct inode *inode,
7015 				   struct buffer_head *root_bh,
7016 				   u64 blkno,
7017 				   u32 cpos,
7018 				   u32 len,
7019 				   void *para)
7020 {
7021 	int ret, credits = 0;
7022 	handle_t *handle;
7023 	struct ocfs2_reflink_xattr_tree_args *args =
7024 			(struct ocfs2_reflink_xattr_tree_args *)para;
7025 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7026 	struct ocfs2_alloc_context *meta_ac = NULL;
7027 	struct ocfs2_alloc_context *data_ac = NULL;
7028 	struct ocfs2_extent_tree et;
7029 
7030 	trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
7031 
7032 	ocfs2_init_xattr_tree_extent_tree(&et,
7033 					  INODE_CACHE(args->reflink->new_inode),
7034 					  args->new_blk_bh);
7035 
7036 	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7037 						      len, &credits,
7038 						      &meta_ac, &data_ac);
7039 	if (ret) {
7040 		mlog_errno(ret);
7041 		goto out;
7042 	}
7043 
7044 	handle = ocfs2_start_trans(osb, credits);
7045 	if (IS_ERR(handle)) {
7046 		ret = PTR_ERR(handle);
7047 		mlog_errno(ret);
7048 		goto out;
7049 	}
7050 
7051 	ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
7052 					  meta_ac, data_ac,
7053 					  blkno, cpos, len);
7054 	if (ret)
7055 		mlog_errno(ret);
7056 
7057 	ocfs2_commit_trans(osb, handle);
7058 
7059 out:
7060 	if (meta_ac)
7061 		ocfs2_free_alloc_context(meta_ac);
7062 	if (data_ac)
7063 		ocfs2_free_alloc_context(data_ac);
7064 	return ret;
7065 }
7066 
7067 /*
7068  * Create reflinked xattr buckets.
7069  * We will add bucket one by one, and refcount all the xattrs in the bucket
7070  * if they are stored outside.
7071  */
7072 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7073 				    struct buffer_head *blk_bh,
7074 				    struct buffer_head *new_blk_bh)
7075 {
7076 	int ret;
7077 	struct ocfs2_reflink_xattr_tree_args para;
7078 
7079 	memset(&para, 0, sizeof(para));
7080 	para.reflink = args;
7081 	para.old_blk_bh = blk_bh;
7082 	para.new_blk_bh = new_blk_bh;
7083 
7084 	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7085 	if (!para.old_bucket) {
7086 		mlog_errno(-ENOMEM);
7087 		return -ENOMEM;
7088 	}
7089 
7090 	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7091 	if (!para.new_bucket) {
7092 		ret = -ENOMEM;
7093 		mlog_errno(ret);
7094 		goto out;
7095 	}
7096 
7097 	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7098 					      ocfs2_reflink_xattr_rec,
7099 					      &para);
7100 	if (ret)
7101 		mlog_errno(ret);
7102 
7103 out:
7104 	ocfs2_xattr_bucket_free(para.old_bucket);
7105 	ocfs2_xattr_bucket_free(para.new_bucket);
7106 	return ret;
7107 }
7108 
7109 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7110 					struct buffer_head *blk_bh)
7111 {
7112 	int ret, indexed = 0;
7113 	struct buffer_head *new_blk_bh = NULL;
7114 	struct ocfs2_xattr_block *xb =
7115 			(struct ocfs2_xattr_block *)blk_bh->b_data;
7116 
7117 
7118 	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7119 		indexed = 1;
7120 
7121 	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7122 					     &new_blk_bh, indexed);
7123 	if (ret) {
7124 		mlog_errno(ret);
7125 		goto out;
7126 	}
7127 
7128 	if (!indexed)
7129 		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7130 	else
7131 		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7132 	if (ret)
7133 		mlog_errno(ret);
7134 
7135 out:
7136 	brelse(new_blk_bh);
7137 	return ret;
7138 }
7139 
7140 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7141 {
7142 	int type = ocfs2_xattr_get_type(xe);
7143 
7144 	return type != OCFS2_XATTR_INDEX_SECURITY &&
7145 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7146 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7147 }
7148 
7149 int ocfs2_reflink_xattrs(struct inode *old_inode,
7150 			 struct buffer_head *old_bh,
7151 			 struct inode *new_inode,
7152 			 struct buffer_head *new_bh,
7153 			 bool preserve_security)
7154 {
7155 	int ret;
7156 	struct ocfs2_xattr_reflink args;
7157 	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7158 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7159 	struct buffer_head *blk_bh = NULL;
7160 	struct ocfs2_cached_dealloc_ctxt dealloc;
7161 	struct ocfs2_refcount_tree *ref_tree;
7162 	struct buffer_head *ref_root_bh = NULL;
7163 
7164 	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7165 				       le64_to_cpu(di->i_refcount_loc),
7166 				       1, &ref_tree, &ref_root_bh);
7167 	if (ret) {
7168 		mlog_errno(ret);
7169 		goto out;
7170 	}
7171 
7172 	ocfs2_init_dealloc_ctxt(&dealloc);
7173 
7174 	args.old_inode = old_inode;
7175 	args.new_inode = new_inode;
7176 	args.old_bh = old_bh;
7177 	args.new_bh = new_bh;
7178 	args.ref_ci = &ref_tree->rf_ci;
7179 	args.ref_root_bh = ref_root_bh;
7180 	args.dealloc = &dealloc;
7181 	if (preserve_security)
7182 		args.xattr_reflinked = NULL;
7183 	else
7184 		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7185 
7186 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7187 		ret = ocfs2_reflink_xattr_inline(&args);
7188 		if (ret) {
7189 			mlog_errno(ret);
7190 			goto out_unlock;
7191 		}
7192 	}
7193 
7194 	if (!di->i_xattr_loc)
7195 		goto out_unlock;
7196 
7197 	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7198 				     &blk_bh);
7199 	if (ret < 0) {
7200 		mlog_errno(ret);
7201 		goto out_unlock;
7202 	}
7203 
7204 	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7205 	if (ret)
7206 		mlog_errno(ret);
7207 
7208 	brelse(blk_bh);
7209 
7210 out_unlock:
7211 	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7212 				   ref_tree, 1);
7213 	brelse(ref_root_bh);
7214 
7215 	if (ocfs2_dealloc_has_cluster(&dealloc)) {
7216 		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7217 		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7218 	}
7219 
7220 out:
7221 	return ret;
7222 }
7223 
7224 /*
7225  * Initialize security and acl for a already created inode.
7226  * Used for reflink a non-preserve-security file.
7227  *
7228  * It uses common api like ocfs2_xattr_set, so the caller
7229  * must not hold any lock expect i_rwsem.
7230  */
7231 int ocfs2_init_security_and_acl(struct inode *dir,
7232 				struct inode *inode,
7233 				const struct qstr *qstr)
7234 {
7235 	int ret = 0;
7236 	struct buffer_head *dir_bh = NULL;
7237 
7238 	ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7239 	if (ret) {
7240 		mlog_errno(ret);
7241 		goto leave;
7242 	}
7243 
7244 	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7245 	if (ret) {
7246 		mlog_errno(ret);
7247 		goto leave;
7248 	}
7249 	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7250 	if (ret)
7251 		mlog_errno(ret);
7252 
7253 	ocfs2_inode_unlock(dir, 0);
7254 	brelse(dir_bh);
7255 leave:
7256 	return ret;
7257 }
7258 
7259 /*
7260  * 'security' attributes support
7261  */
7262 static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
7263 				    struct dentry *unused, struct inode *inode,
7264 				    const char *name, void *buffer, size_t size)
7265 {
7266 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY,
7267 			       name, buffer, size);
7268 }
7269 
7270 static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
7271 				    struct mnt_idmap *idmap,
7272 				    struct dentry *unused, struct inode *inode,
7273 				    const char *name, const void *value,
7274 				    size_t size, int flags)
7275 {
7276 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7277 			       name, value, size, flags);
7278 }
7279 
7280 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
7281 		     void *fs_info)
7282 {
7283 	struct ocfs2_security_xattr_info *si = fs_info;
7284 	const struct xattr *xattr;
7285 	int err = 0;
7286 
7287 	if (si) {
7288 		si->value = kmemdup(xattr_array->value, xattr_array->value_len,
7289 				    GFP_KERNEL);
7290 		if (!si->value)
7291 			return -ENOMEM;
7292 
7293 		si->name = xattr_array->name;
7294 		si->value_len = xattr_array->value_len;
7295 		return 0;
7296 	}
7297 
7298 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
7299 		err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7300 				      xattr->name, xattr->value,
7301 				      xattr->value_len, XATTR_CREATE);
7302 		if (err)
7303 			break;
7304 	}
7305 	return err;
7306 }
7307 
7308 int ocfs2_init_security_get(struct inode *inode,
7309 			    struct inode *dir,
7310 			    const struct qstr *qstr,
7311 			    struct ocfs2_security_xattr_info *si)
7312 {
7313 	int ret;
7314 
7315 	/* check whether ocfs2 support feature xattr */
7316 	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7317 		return -EOPNOTSUPP;
7318 	if (si) {
7319 		ret = security_inode_init_security(inode, dir, qstr,
7320 						   &ocfs2_initxattrs, si);
7321 		/*
7322 		 * security_inode_init_security() does not return -EOPNOTSUPP,
7323 		 * we have to check the xattr ourselves.
7324 		 */
7325 		if (!ret && !si->name)
7326 			si->enable = 0;
7327 
7328 		return ret;
7329 	}
7330 
7331 	return security_inode_init_security(inode, dir, qstr,
7332 					    &ocfs2_initxattrs, NULL);
7333 }
7334 
7335 int ocfs2_init_security_set(handle_t *handle,
7336 			    struct inode *inode,
7337 			    struct buffer_head *di_bh,
7338 			    struct ocfs2_security_xattr_info *si,
7339 			    struct ocfs2_alloc_context *xattr_ac,
7340 			    struct ocfs2_alloc_context *data_ac)
7341 {
7342 	return ocfs2_xattr_set_handle(handle, inode, di_bh,
7343 				     OCFS2_XATTR_INDEX_SECURITY,
7344 				     si->name, si->value, si->value_len, 0,
7345 				     xattr_ac, data_ac);
7346 }
7347 
7348 const struct xattr_handler ocfs2_xattr_security_handler = {
7349 	.prefix	= XATTR_SECURITY_PREFIX,
7350 	.get	= ocfs2_xattr_security_get,
7351 	.set	= ocfs2_xattr_security_set,
7352 };
7353 
7354 /*
7355  * 'trusted' attributes support
7356  */
7357 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
7358 				   struct dentry *unused, struct inode *inode,
7359 				   const char *name, void *buffer, size_t size)
7360 {
7361 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED,
7362 			       name, buffer, size);
7363 }
7364 
7365 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
7366 				   struct mnt_idmap *idmap,
7367 				   struct dentry *unused, struct inode *inode,
7368 				   const char *name, const void *value,
7369 				   size_t size, int flags)
7370 {
7371 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED,
7372 			       name, value, size, flags);
7373 }
7374 
7375 const struct xattr_handler ocfs2_xattr_trusted_handler = {
7376 	.prefix	= XATTR_TRUSTED_PREFIX,
7377 	.get	= ocfs2_xattr_trusted_get,
7378 	.set	= ocfs2_xattr_trusted_set,
7379 };
7380 
7381 /*
7382  * 'user' attributes support
7383  */
7384 static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
7385 				struct dentry *unused, struct inode *inode,
7386 				const char *name, void *buffer, size_t size)
7387 {
7388 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7389 
7390 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7391 		return -EOPNOTSUPP;
7392 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
7393 			       buffer, size);
7394 }
7395 
7396 static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
7397 				struct mnt_idmap *idmap,
7398 				struct dentry *unused, struct inode *inode,
7399 				const char *name, const void *value,
7400 				size_t size, int flags)
7401 {
7402 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7403 
7404 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7405 		return -EOPNOTSUPP;
7406 
7407 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER,
7408 			       name, value, size, flags);
7409 }
7410 
7411 const struct xattr_handler ocfs2_xattr_user_handler = {
7412 	.prefix	= XATTR_USER_PREFIX,
7413 	.get	= ocfs2_xattr_user_get,
7414 	.set	= ocfs2_xattr_user_set,
7415 };
7416