xref: /linux/fs/ocfs2/xattr.c (revision 2993c9b04e616df0848b655d7202a707a70fc876)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* -*- mode: c; c-basic-offset: 8; -*-
3  * vim: noexpandtab sw=8 ts=8 sts=0:
4  *
5  * xattr.c
6  *
7  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
8  *
9  * CREDITS:
10  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
11  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
12  */
13 
14 #include <linux/capability.h>
15 #include <linux/fs.h>
16 #include <linux/types.h>
17 #include <linux/slab.h>
18 #include <linux/highmem.h>
19 #include <linux/pagemap.h>
20 #include <linux/uio.h>
21 #include <linux/sched.h>
22 #include <linux/splice.h>
23 #include <linux/mount.h>
24 #include <linux/writeback.h>
25 #include <linux/falloc.h>
26 #include <linux/sort.h>
27 #include <linux/init.h>
28 #include <linux/module.h>
29 #include <linux/string.h>
30 #include <linux/security.h>
31 
32 #include <cluster/masklog.h>
33 
34 #include "ocfs2.h"
35 #include "alloc.h"
36 #include "blockcheck.h"
37 #include "dlmglue.h"
38 #include "file.h"
39 #include "symlink.h"
40 #include "sysfile.h"
41 #include "inode.h"
42 #include "journal.h"
43 #include "ocfs2_fs.h"
44 #include "suballoc.h"
45 #include "uptodate.h"
46 #include "buffer_head_io.h"
47 #include "super.h"
48 #include "xattr.h"
49 #include "refcounttree.h"
50 #include "acl.h"
51 #include "ocfs2_trace.h"
52 
53 struct ocfs2_xattr_def_value_root {
54 	struct ocfs2_xattr_value_root	xv;
55 	struct ocfs2_extent_rec		er;
56 };
57 
58 struct ocfs2_xattr_bucket {
59 	/* The inode these xattrs are associated with */
60 	struct inode *bu_inode;
61 
62 	/* The actual buffers that make up the bucket */
63 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
64 
65 	/* How many blocks make up one bucket for this filesystem */
66 	int bu_blocks;
67 };
68 
69 struct ocfs2_xattr_set_ctxt {
70 	handle_t *handle;
71 	struct ocfs2_alloc_context *meta_ac;
72 	struct ocfs2_alloc_context *data_ac;
73 	struct ocfs2_cached_dealloc_ctxt dealloc;
74 	int set_abort;
75 };
76 
77 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
78 #define OCFS2_XATTR_INLINE_SIZE	80
79 #define OCFS2_XATTR_HEADER_GAP	4
80 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
81 					 - sizeof(struct ocfs2_xattr_header) \
82 					 - OCFS2_XATTR_HEADER_GAP)
83 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
84 					 - sizeof(struct ocfs2_xattr_block) \
85 					 - sizeof(struct ocfs2_xattr_header) \
86 					 - OCFS2_XATTR_HEADER_GAP)
87 
88 static struct ocfs2_xattr_def_value_root def_xv = {
89 	.xv.xr_list.l_count = cpu_to_le16(1),
90 };
91 
92 const struct xattr_handler *ocfs2_xattr_handlers[] = {
93 	&ocfs2_xattr_user_handler,
94 	&posix_acl_access_xattr_handler,
95 	&posix_acl_default_xattr_handler,
96 	&ocfs2_xattr_trusted_handler,
97 	&ocfs2_xattr_security_handler,
98 	NULL
99 };
100 
101 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
102 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
103 	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
104 					= &posix_acl_access_xattr_handler,
105 	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
106 					= &posix_acl_default_xattr_handler,
107 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
108 	[OCFS2_XATTR_INDEX_SECURITY]	= &ocfs2_xattr_security_handler,
109 };
110 
111 struct ocfs2_xattr_info {
112 	int		xi_name_index;
113 	const char	*xi_name;
114 	int		xi_name_len;
115 	const void	*xi_value;
116 	size_t		xi_value_len;
117 };
118 
119 struct ocfs2_xattr_search {
120 	struct buffer_head *inode_bh;
121 	/*
122 	 * xattr_bh point to the block buffer head which has extended attribute
123 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
124 	 */
125 	struct buffer_head *xattr_bh;
126 	struct ocfs2_xattr_header *header;
127 	struct ocfs2_xattr_bucket *bucket;
128 	void *base;
129 	void *end;
130 	struct ocfs2_xattr_entry *here;
131 	int not_found;
132 };
133 
134 /* Operations on struct ocfs2_xa_entry */
135 struct ocfs2_xa_loc;
136 struct ocfs2_xa_loc_operations {
137 	/*
138 	 * Journal functions
139 	 */
140 	int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
141 				  int type);
142 	void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
143 
144 	/*
145 	 * Return a pointer to the appropriate buffer in loc->xl_storage
146 	 * at the given offset from loc->xl_header.
147 	 */
148 	void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
149 
150 	/* Can we reuse the existing entry for the new value? */
151 	int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
152 			     struct ocfs2_xattr_info *xi);
153 
154 	/* How much space is needed for the new value? */
155 	int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
156 			       struct ocfs2_xattr_info *xi);
157 
158 	/*
159 	 * Return the offset of the first name+value pair.  This is
160 	 * the start of our downward-filling free space.
161 	 */
162 	int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
163 
164 	/*
165 	 * Remove the name+value at this location.  Do whatever is
166 	 * appropriate with the remaining name+value pairs.
167 	 */
168 	void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
169 
170 	/* Fill xl_entry with a new entry */
171 	void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
172 
173 	/* Add name+value storage to an entry */
174 	void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
175 
176 	/*
177 	 * Initialize the value buf's access and bh fields for this entry.
178 	 * ocfs2_xa_fill_value_buf() will handle the xv pointer.
179 	 */
180 	void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
181 				   struct ocfs2_xattr_value_buf *vb);
182 };
183 
184 /*
185  * Describes an xattr entry location.  This is a memory structure
186  * tracking the on-disk structure.
187  */
188 struct ocfs2_xa_loc {
189 	/* This xattr belongs to this inode */
190 	struct inode *xl_inode;
191 
192 	/* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
193 	struct ocfs2_xattr_header *xl_header;
194 
195 	/* Bytes from xl_header to the end of the storage */
196 	int xl_size;
197 
198 	/*
199 	 * The ocfs2_xattr_entry this location describes.  If this is
200 	 * NULL, this location describes the on-disk structure where it
201 	 * would have been.
202 	 */
203 	struct ocfs2_xattr_entry *xl_entry;
204 
205 	/*
206 	 * Internal housekeeping
207 	 */
208 
209 	/* Buffer(s) containing this entry */
210 	void *xl_storage;
211 
212 	/* Operations on the storage backing this location */
213 	const struct ocfs2_xa_loc_operations *xl_ops;
214 };
215 
216 /*
217  * Convenience functions to calculate how much space is needed for a
218  * given name+value pair
219  */
220 static int namevalue_size(int name_len, uint64_t value_len)
221 {
222 	if (value_len > OCFS2_XATTR_INLINE_SIZE)
223 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
224 	else
225 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
226 }
227 
228 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
229 {
230 	return namevalue_size(xi->xi_name_len, xi->xi_value_len);
231 }
232 
233 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
234 {
235 	u64 value_len = le64_to_cpu(xe->xe_value_size);
236 
237 	BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
238 	       ocfs2_xattr_is_local(xe));
239 	return namevalue_size(xe->xe_name_len, value_len);
240 }
241 
242 
243 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
244 					     struct ocfs2_xattr_header *xh,
245 					     int index,
246 					     int *block_off,
247 					     int *new_offset);
248 
249 static int ocfs2_xattr_block_find(struct inode *inode,
250 				  int name_index,
251 				  const char *name,
252 				  struct ocfs2_xattr_search *xs);
253 static int ocfs2_xattr_index_block_find(struct inode *inode,
254 					struct buffer_head *root_bh,
255 					int name_index,
256 					const char *name,
257 					struct ocfs2_xattr_search *xs);
258 
259 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
260 					struct buffer_head *blk_bh,
261 					char *buffer,
262 					size_t buffer_size);
263 
264 static int ocfs2_xattr_create_index_block(struct inode *inode,
265 					  struct ocfs2_xattr_search *xs,
266 					  struct ocfs2_xattr_set_ctxt *ctxt);
267 
268 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
269 					     struct ocfs2_xattr_info *xi,
270 					     struct ocfs2_xattr_search *xs,
271 					     struct ocfs2_xattr_set_ctxt *ctxt);
272 
273 typedef int (xattr_tree_rec_func)(struct inode *inode,
274 				  struct buffer_head *root_bh,
275 				  u64 blkno, u32 cpos, u32 len, void *para);
276 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
277 					   struct buffer_head *root_bh,
278 					   xattr_tree_rec_func *rec_func,
279 					   void *para);
280 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
281 					struct ocfs2_xattr_bucket *bucket,
282 					void *para);
283 static int ocfs2_rm_xattr_cluster(struct inode *inode,
284 				  struct buffer_head *root_bh,
285 				  u64 blkno,
286 				  u32 cpos,
287 				  u32 len,
288 				  void *para);
289 
290 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
291 				  u64 src_blk, u64 last_blk, u64 to_blk,
292 				  unsigned int start_bucket,
293 				  u32 *first_hash);
294 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
295 					struct ocfs2_dinode *di,
296 					struct ocfs2_xattr_info *xi,
297 					struct ocfs2_xattr_search *xis,
298 					struct ocfs2_xattr_search *xbs,
299 					struct ocfs2_refcount_tree **ref_tree,
300 					int *meta_need,
301 					int *credits);
302 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
303 					   struct ocfs2_xattr_bucket *bucket,
304 					   int offset,
305 					   struct ocfs2_xattr_value_root **xv,
306 					   struct buffer_head **bh);
307 
308 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
309 {
310 	return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
311 }
312 
313 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
314 {
315 	return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
316 }
317 
318 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
319 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
320 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
321 
322 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
323 {
324 	struct ocfs2_xattr_bucket *bucket;
325 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
326 
327 	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
328 
329 	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
330 	if (bucket) {
331 		bucket->bu_inode = inode;
332 		bucket->bu_blocks = blks;
333 	}
334 
335 	return bucket;
336 }
337 
338 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
339 {
340 	int i;
341 
342 	for (i = 0; i < bucket->bu_blocks; i++) {
343 		brelse(bucket->bu_bhs[i]);
344 		bucket->bu_bhs[i] = NULL;
345 	}
346 }
347 
348 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
349 {
350 	if (bucket) {
351 		ocfs2_xattr_bucket_relse(bucket);
352 		bucket->bu_inode = NULL;
353 		kfree(bucket);
354 	}
355 }
356 
357 /*
358  * A bucket that has never been written to disk doesn't need to be
359  * read.  We just need the buffer_heads.  Don't call this for
360  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
361  * them fully.
362  */
363 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
364 				   u64 xb_blkno, int new)
365 {
366 	int i, rc = 0;
367 
368 	for (i = 0; i < bucket->bu_blocks; i++) {
369 		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
370 					      xb_blkno + i);
371 		if (!bucket->bu_bhs[i]) {
372 			rc = -ENOMEM;
373 			mlog_errno(rc);
374 			break;
375 		}
376 
377 		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
378 					   bucket->bu_bhs[i])) {
379 			if (new)
380 				ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
381 							      bucket->bu_bhs[i]);
382 			else {
383 				set_buffer_uptodate(bucket->bu_bhs[i]);
384 				ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
385 							  bucket->bu_bhs[i]);
386 			}
387 		}
388 	}
389 
390 	if (rc)
391 		ocfs2_xattr_bucket_relse(bucket);
392 	return rc;
393 }
394 
395 /* Read the xattr bucket at xb_blkno */
396 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
397 				   u64 xb_blkno)
398 {
399 	int rc;
400 
401 	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
402 			       bucket->bu_blocks, bucket->bu_bhs, 0,
403 			       NULL);
404 	if (!rc) {
405 		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
406 		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
407 						 bucket->bu_bhs,
408 						 bucket->bu_blocks,
409 						 &bucket_xh(bucket)->xh_check);
410 		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
411 		if (rc)
412 			mlog_errno(rc);
413 	}
414 
415 	if (rc)
416 		ocfs2_xattr_bucket_relse(bucket);
417 	return rc;
418 }
419 
420 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
421 					     struct ocfs2_xattr_bucket *bucket,
422 					     int type)
423 {
424 	int i, rc = 0;
425 
426 	for (i = 0; i < bucket->bu_blocks; i++) {
427 		rc = ocfs2_journal_access(handle,
428 					  INODE_CACHE(bucket->bu_inode),
429 					  bucket->bu_bhs[i], type);
430 		if (rc) {
431 			mlog_errno(rc);
432 			break;
433 		}
434 	}
435 
436 	return rc;
437 }
438 
439 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
440 					     struct ocfs2_xattr_bucket *bucket)
441 {
442 	int i;
443 
444 	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
445 	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
446 				   bucket->bu_bhs, bucket->bu_blocks,
447 				   &bucket_xh(bucket)->xh_check);
448 	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
449 
450 	for (i = 0; i < bucket->bu_blocks; i++)
451 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
452 }
453 
454 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
455 					 struct ocfs2_xattr_bucket *src)
456 {
457 	int i;
458 	int blocksize = src->bu_inode->i_sb->s_blocksize;
459 
460 	BUG_ON(dest->bu_blocks != src->bu_blocks);
461 	BUG_ON(dest->bu_inode != src->bu_inode);
462 
463 	for (i = 0; i < src->bu_blocks; i++) {
464 		memcpy(bucket_block(dest, i), bucket_block(src, i),
465 		       blocksize);
466 	}
467 }
468 
469 static int ocfs2_validate_xattr_block(struct super_block *sb,
470 				      struct buffer_head *bh)
471 {
472 	int rc;
473 	struct ocfs2_xattr_block *xb =
474 		(struct ocfs2_xattr_block *)bh->b_data;
475 
476 	trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
477 
478 	BUG_ON(!buffer_uptodate(bh));
479 
480 	/*
481 	 * If the ecc fails, we return the error but otherwise
482 	 * leave the filesystem running.  We know any error is
483 	 * local to this block.
484 	 */
485 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
486 	if (rc)
487 		return rc;
488 
489 	/*
490 	 * Errors after here are fatal
491 	 */
492 
493 	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
494 		return ocfs2_error(sb,
495 				   "Extended attribute block #%llu has bad signature %.*s\n",
496 				   (unsigned long long)bh->b_blocknr, 7,
497 				   xb->xb_signature);
498 	}
499 
500 	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
501 		return ocfs2_error(sb,
502 				   "Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
503 				   (unsigned long long)bh->b_blocknr,
504 				   (unsigned long long)le64_to_cpu(xb->xb_blkno));
505 	}
506 
507 	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
508 		return ocfs2_error(sb,
509 				   "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
510 				   (unsigned long long)bh->b_blocknr,
511 				   le32_to_cpu(xb->xb_fs_generation));
512 	}
513 
514 	return 0;
515 }
516 
517 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
518 				  struct buffer_head **bh)
519 {
520 	int rc;
521 	struct buffer_head *tmp = *bh;
522 
523 	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
524 			      ocfs2_validate_xattr_block);
525 
526 	/* If ocfs2_read_block() got us a new bh, pass it up. */
527 	if (!rc && !*bh)
528 		*bh = tmp;
529 
530 	return rc;
531 }
532 
533 static inline const char *ocfs2_xattr_prefix(int name_index)
534 {
535 	const struct xattr_handler *handler = NULL;
536 
537 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
538 		handler = ocfs2_xattr_handler_map[name_index];
539 	return handler ? xattr_prefix(handler) : NULL;
540 }
541 
542 static u32 ocfs2_xattr_name_hash(struct inode *inode,
543 				 const char *name,
544 				 int name_len)
545 {
546 	/* Get hash value of uuid from super block */
547 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
548 	int i;
549 
550 	/* hash extended attribute name */
551 	for (i = 0; i < name_len; i++) {
552 		hash = (hash << OCFS2_HASH_SHIFT) ^
553 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
554 		       *name++;
555 	}
556 
557 	return hash;
558 }
559 
560 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
561 {
562 	return namevalue_size(name_len, value_len) +
563 		sizeof(struct ocfs2_xattr_entry);
564 }
565 
566 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
567 {
568 	return namevalue_size_xi(xi) +
569 		sizeof(struct ocfs2_xattr_entry);
570 }
571 
572 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
573 {
574 	return namevalue_size_xe(xe) +
575 		sizeof(struct ocfs2_xattr_entry);
576 }
577 
578 int ocfs2_calc_security_init(struct inode *dir,
579 			     struct ocfs2_security_xattr_info *si,
580 			     int *want_clusters,
581 			     int *xattr_credits,
582 			     struct ocfs2_alloc_context **xattr_ac)
583 {
584 	int ret = 0;
585 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
586 	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
587 						 si->value_len);
588 
589 	/*
590 	 * The max space of security xattr taken inline is
591 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
592 	 * So reserve one metadata block for it is ok.
593 	 */
594 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
595 	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
596 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
597 		if (ret) {
598 			mlog_errno(ret);
599 			return ret;
600 		}
601 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
602 	}
603 
604 	/* reserve clusters for xattr value which will be set in B tree*/
605 	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
606 		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
607 							    si->value_len);
608 
609 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
610 							   new_clusters);
611 		*want_clusters += new_clusters;
612 	}
613 	return ret;
614 }
615 
616 int ocfs2_calc_xattr_init(struct inode *dir,
617 			  struct buffer_head *dir_bh,
618 			  umode_t mode,
619 			  struct ocfs2_security_xattr_info *si,
620 			  int *want_clusters,
621 			  int *xattr_credits,
622 			  int *want_meta)
623 {
624 	int ret = 0;
625 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
626 	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
627 
628 	if (si->enable)
629 		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
630 						     si->value_len);
631 
632 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
633 		down_read(&OCFS2_I(dir)->ip_xattr_sem);
634 		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
635 					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
636 					"", NULL, 0);
637 		up_read(&OCFS2_I(dir)->ip_xattr_sem);
638 		if (acl_len > 0) {
639 			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
640 			if (S_ISDIR(mode))
641 				a_size <<= 1;
642 		} else if (acl_len != 0 && acl_len != -ENODATA) {
643 			ret = acl_len;
644 			mlog_errno(ret);
645 			return ret;
646 		}
647 	}
648 
649 	if (!(s_size + a_size))
650 		return ret;
651 
652 	/*
653 	 * The max space of security xattr taken inline is
654 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
655 	 * The max space of acl xattr taken inline is
656 	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
657 	 * when blocksize = 512, may reserve one more cluser for
658 	 * xattr bucket, otherwise reserve one metadata block
659 	 * for them is ok.
660 	 * If this is a new directory with inline data,
661 	 * we choose to reserve the entire inline area for
662 	 * directory contents and force an external xattr block.
663 	 */
664 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
665 	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
666 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
667 		*want_meta = *want_meta + 1;
668 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
669 	}
670 
671 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
672 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
673 		*want_clusters += 1;
674 		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
675 	}
676 
677 	/*
678 	 * reserve credits and clusters for xattrs which has large value
679 	 * and have to be set outside
680 	 */
681 	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
682 		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
683 							si->value_len);
684 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
685 							   new_clusters);
686 		*want_clusters += new_clusters;
687 	}
688 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
689 	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
690 		/* for directory, it has DEFAULT and ACCESS two types of acls */
691 		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
692 				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
693 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
694 							   new_clusters);
695 		*want_clusters += new_clusters;
696 	}
697 
698 	return ret;
699 }
700 
701 static int ocfs2_xattr_extend_allocation(struct inode *inode,
702 					 u32 clusters_to_add,
703 					 struct ocfs2_xattr_value_buf *vb,
704 					 struct ocfs2_xattr_set_ctxt *ctxt)
705 {
706 	int status = 0, credits;
707 	handle_t *handle = ctxt->handle;
708 	enum ocfs2_alloc_restarted why;
709 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
710 	struct ocfs2_extent_tree et;
711 
712 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
713 
714 	while (clusters_to_add) {
715 		trace_ocfs2_xattr_extend_allocation(clusters_to_add);
716 
717 		status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
718 				       OCFS2_JOURNAL_ACCESS_WRITE);
719 		if (status < 0) {
720 			mlog_errno(status);
721 			break;
722 		}
723 
724 		prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
725 		status = ocfs2_add_clusters_in_btree(handle,
726 						     &et,
727 						     &logical_start,
728 						     clusters_to_add,
729 						     0,
730 						     ctxt->data_ac,
731 						     ctxt->meta_ac,
732 						     &why);
733 		if ((status < 0) && (status != -EAGAIN)) {
734 			if (status != -ENOSPC)
735 				mlog_errno(status);
736 			break;
737 		}
738 
739 		ocfs2_journal_dirty(handle, vb->vb_bh);
740 
741 		clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
742 					 prev_clusters;
743 
744 		if (why != RESTART_NONE && clusters_to_add) {
745 			/*
746 			 * We can only fail in case the alloc file doesn't give
747 			 * up enough clusters.
748 			 */
749 			BUG_ON(why == RESTART_META);
750 
751 			credits = ocfs2_calc_extend_credits(inode->i_sb,
752 							    &vb->vb_xv->xr_list);
753 			status = ocfs2_extend_trans(handle, credits);
754 			if (status < 0) {
755 				status = -ENOMEM;
756 				mlog_errno(status);
757 				break;
758 			}
759 		}
760 	}
761 
762 	return status;
763 }
764 
765 static int __ocfs2_remove_xattr_range(struct inode *inode,
766 				      struct ocfs2_xattr_value_buf *vb,
767 				      u32 cpos, u32 phys_cpos, u32 len,
768 				      unsigned int ext_flags,
769 				      struct ocfs2_xattr_set_ctxt *ctxt)
770 {
771 	int ret;
772 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
773 	handle_t *handle = ctxt->handle;
774 	struct ocfs2_extent_tree et;
775 
776 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
777 
778 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
779 			    OCFS2_JOURNAL_ACCESS_WRITE);
780 	if (ret) {
781 		mlog_errno(ret);
782 		goto out;
783 	}
784 
785 	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
786 				  &ctxt->dealloc);
787 	if (ret) {
788 		mlog_errno(ret);
789 		goto out;
790 	}
791 
792 	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
793 	ocfs2_journal_dirty(handle, vb->vb_bh);
794 
795 	if (ext_flags & OCFS2_EXT_REFCOUNTED)
796 		ret = ocfs2_decrease_refcount(inode, handle,
797 					ocfs2_blocks_to_clusters(inode->i_sb,
798 								 phys_blkno),
799 					len, ctxt->meta_ac, &ctxt->dealloc, 1);
800 	else
801 		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
802 						  phys_blkno, len);
803 	if (ret)
804 		mlog_errno(ret);
805 
806 out:
807 	return ret;
808 }
809 
810 static int ocfs2_xattr_shrink_size(struct inode *inode,
811 				   u32 old_clusters,
812 				   u32 new_clusters,
813 				   struct ocfs2_xattr_value_buf *vb,
814 				   struct ocfs2_xattr_set_ctxt *ctxt)
815 {
816 	int ret = 0;
817 	unsigned int ext_flags;
818 	u32 trunc_len, cpos, phys_cpos, alloc_size;
819 	u64 block;
820 
821 	if (old_clusters <= new_clusters)
822 		return 0;
823 
824 	cpos = new_clusters;
825 	trunc_len = old_clusters - new_clusters;
826 	while (trunc_len) {
827 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
828 					       &alloc_size,
829 					       &vb->vb_xv->xr_list, &ext_flags);
830 		if (ret) {
831 			mlog_errno(ret);
832 			goto out;
833 		}
834 
835 		if (alloc_size > trunc_len)
836 			alloc_size = trunc_len;
837 
838 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
839 						 phys_cpos, alloc_size,
840 						 ext_flags, ctxt);
841 		if (ret) {
842 			mlog_errno(ret);
843 			goto out;
844 		}
845 
846 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
847 		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
848 						       block, alloc_size);
849 		cpos += alloc_size;
850 		trunc_len -= alloc_size;
851 	}
852 
853 out:
854 	return ret;
855 }
856 
857 static int ocfs2_xattr_value_truncate(struct inode *inode,
858 				      struct ocfs2_xattr_value_buf *vb,
859 				      int len,
860 				      struct ocfs2_xattr_set_ctxt *ctxt)
861 {
862 	int ret;
863 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
864 	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
865 
866 	if (new_clusters == old_clusters)
867 		return 0;
868 
869 	if (new_clusters > old_clusters)
870 		ret = ocfs2_xattr_extend_allocation(inode,
871 						    new_clusters - old_clusters,
872 						    vb, ctxt);
873 	else
874 		ret = ocfs2_xattr_shrink_size(inode,
875 					      old_clusters, new_clusters,
876 					      vb, ctxt);
877 
878 	return ret;
879 }
880 
881 static int ocfs2_xattr_list_entry(struct super_block *sb,
882 				  char *buffer, size_t size,
883 				  size_t *result, int type,
884 				  const char *name, int name_len)
885 {
886 	char *p = buffer + *result;
887 	const char *prefix;
888 	int prefix_len;
889 	int total_len;
890 
891 	switch(type) {
892 	case OCFS2_XATTR_INDEX_USER:
893 		if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
894 			return 0;
895 		break;
896 
897 	case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
898 	case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
899 		if (!(sb->s_flags & SB_POSIXACL))
900 			return 0;
901 		break;
902 
903 	case OCFS2_XATTR_INDEX_TRUSTED:
904 		if (!capable(CAP_SYS_ADMIN))
905 			return 0;
906 		break;
907 	}
908 
909 	prefix = ocfs2_xattr_prefix(type);
910 	if (!prefix)
911 		return 0;
912 	prefix_len = strlen(prefix);
913 	total_len = prefix_len + name_len + 1;
914 	*result += total_len;
915 
916 	/* we are just looking for how big our buffer needs to be */
917 	if (!size)
918 		return 0;
919 
920 	if (*result > size)
921 		return -ERANGE;
922 
923 	memcpy(p, prefix, prefix_len);
924 	memcpy(p + prefix_len, name, name_len);
925 	p[prefix_len + name_len] = '\0';
926 
927 	return 0;
928 }
929 
930 static int ocfs2_xattr_list_entries(struct inode *inode,
931 				    struct ocfs2_xattr_header *header,
932 				    char *buffer, size_t buffer_size)
933 {
934 	size_t result = 0;
935 	int i, type, ret;
936 	const char *name;
937 
938 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
939 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
940 		type = ocfs2_xattr_get_type(entry);
941 		name = (const char *)header +
942 			le16_to_cpu(entry->xe_name_offset);
943 
944 		ret = ocfs2_xattr_list_entry(inode->i_sb,
945 					     buffer, buffer_size,
946 					     &result, type, name,
947 					     entry->xe_name_len);
948 		if (ret)
949 			return ret;
950 	}
951 
952 	return result;
953 }
954 
955 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
956 					 struct ocfs2_dinode *di)
957 {
958 	struct ocfs2_xattr_header *xh;
959 	int i;
960 
961 	xh = (struct ocfs2_xattr_header *)
962 		 ((void *)di + inode->i_sb->s_blocksize -
963 		 le16_to_cpu(di->i_xattr_inline_size));
964 
965 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
966 		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
967 			return 1;
968 
969 	return 0;
970 }
971 
972 static int ocfs2_xattr_ibody_list(struct inode *inode,
973 				  struct ocfs2_dinode *di,
974 				  char *buffer,
975 				  size_t buffer_size)
976 {
977 	struct ocfs2_xattr_header *header = NULL;
978 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
979 	int ret = 0;
980 
981 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
982 		return ret;
983 
984 	header = (struct ocfs2_xattr_header *)
985 		 ((void *)di + inode->i_sb->s_blocksize -
986 		 le16_to_cpu(di->i_xattr_inline_size));
987 
988 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
989 
990 	return ret;
991 }
992 
993 static int ocfs2_xattr_block_list(struct inode *inode,
994 				  struct ocfs2_dinode *di,
995 				  char *buffer,
996 				  size_t buffer_size)
997 {
998 	struct buffer_head *blk_bh = NULL;
999 	struct ocfs2_xattr_block *xb;
1000 	int ret = 0;
1001 
1002 	if (!di->i_xattr_loc)
1003 		return ret;
1004 
1005 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
1006 				     &blk_bh);
1007 	if (ret < 0) {
1008 		mlog_errno(ret);
1009 		return ret;
1010 	}
1011 
1012 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1013 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1014 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1015 		ret = ocfs2_xattr_list_entries(inode, header,
1016 					       buffer, buffer_size);
1017 	} else
1018 		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1019 						   buffer, buffer_size);
1020 
1021 	brelse(blk_bh);
1022 
1023 	return ret;
1024 }
1025 
1026 ssize_t ocfs2_listxattr(struct dentry *dentry,
1027 			char *buffer,
1028 			size_t size)
1029 {
1030 	int ret = 0, i_ret = 0, b_ret = 0;
1031 	struct buffer_head *di_bh = NULL;
1032 	struct ocfs2_dinode *di = NULL;
1033 	struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
1034 
1035 	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1036 		return -EOPNOTSUPP;
1037 
1038 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1039 		return ret;
1040 
1041 	ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
1042 	if (ret < 0) {
1043 		mlog_errno(ret);
1044 		return ret;
1045 	}
1046 
1047 	di = (struct ocfs2_dinode *)di_bh->b_data;
1048 
1049 	down_read(&oi->ip_xattr_sem);
1050 	i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
1051 	if (i_ret < 0)
1052 		b_ret = 0;
1053 	else {
1054 		if (buffer) {
1055 			buffer += i_ret;
1056 			size -= i_ret;
1057 		}
1058 		b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
1059 					       buffer, size);
1060 		if (b_ret < 0)
1061 			i_ret = 0;
1062 	}
1063 	up_read(&oi->ip_xattr_sem);
1064 	ocfs2_inode_unlock(d_inode(dentry), 0);
1065 
1066 	brelse(di_bh);
1067 
1068 	return i_ret + b_ret;
1069 }
1070 
1071 static int ocfs2_xattr_find_entry(int name_index,
1072 				  const char *name,
1073 				  struct ocfs2_xattr_search *xs)
1074 {
1075 	struct ocfs2_xattr_entry *entry;
1076 	size_t name_len;
1077 	int i, cmp = 1;
1078 
1079 	if (name == NULL)
1080 		return -EINVAL;
1081 
1082 	name_len = strlen(name);
1083 	entry = xs->here;
1084 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1085 		cmp = name_index - ocfs2_xattr_get_type(entry);
1086 		if (!cmp)
1087 			cmp = name_len - entry->xe_name_len;
1088 		if (!cmp)
1089 			cmp = memcmp(name, (xs->base +
1090 				     le16_to_cpu(entry->xe_name_offset)),
1091 				     name_len);
1092 		if (cmp == 0)
1093 			break;
1094 		entry += 1;
1095 	}
1096 	xs->here = entry;
1097 
1098 	return cmp ? -ENODATA : 0;
1099 }
1100 
1101 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1102 					 struct ocfs2_xattr_value_root *xv,
1103 					 void *buffer,
1104 					 size_t len)
1105 {
1106 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
1107 	u64 blkno;
1108 	int i, ret = 0;
1109 	size_t cplen, blocksize;
1110 	struct buffer_head *bh = NULL;
1111 	struct ocfs2_extent_list *el;
1112 
1113 	el = &xv->xr_list;
1114 	clusters = le32_to_cpu(xv->xr_clusters);
1115 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1116 	blocksize = inode->i_sb->s_blocksize;
1117 
1118 	cpos = 0;
1119 	while (cpos < clusters) {
1120 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1121 					       &num_clusters, el, NULL);
1122 		if (ret) {
1123 			mlog_errno(ret);
1124 			goto out;
1125 		}
1126 
1127 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1128 		/* Copy ocfs2_xattr_value */
1129 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1130 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1131 					       &bh, NULL);
1132 			if (ret) {
1133 				mlog_errno(ret);
1134 				goto out;
1135 			}
1136 
1137 			cplen = len >= blocksize ? blocksize : len;
1138 			memcpy(buffer, bh->b_data, cplen);
1139 			len -= cplen;
1140 			buffer += cplen;
1141 
1142 			brelse(bh);
1143 			bh = NULL;
1144 			if (len == 0)
1145 				break;
1146 		}
1147 		cpos += num_clusters;
1148 	}
1149 out:
1150 	return ret;
1151 }
1152 
1153 static int ocfs2_xattr_ibody_get(struct inode *inode,
1154 				 int name_index,
1155 				 const char *name,
1156 				 void *buffer,
1157 				 size_t buffer_size,
1158 				 struct ocfs2_xattr_search *xs)
1159 {
1160 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1161 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1162 	struct ocfs2_xattr_value_root *xv;
1163 	size_t size;
1164 	int ret = 0;
1165 
1166 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1167 		return -ENODATA;
1168 
1169 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1170 	xs->header = (struct ocfs2_xattr_header *)
1171 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1172 	xs->base = (void *)xs->header;
1173 	xs->here = xs->header->xh_entries;
1174 
1175 	ret = ocfs2_xattr_find_entry(name_index, name, xs);
1176 	if (ret)
1177 		return ret;
1178 	size = le64_to_cpu(xs->here->xe_value_size);
1179 	if (buffer) {
1180 		if (size > buffer_size)
1181 			return -ERANGE;
1182 		if (ocfs2_xattr_is_local(xs->here)) {
1183 			memcpy(buffer, (void *)xs->base +
1184 			       le16_to_cpu(xs->here->xe_name_offset) +
1185 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1186 		} else {
1187 			xv = (struct ocfs2_xattr_value_root *)
1188 				(xs->base + le16_to_cpu(
1189 				 xs->here->xe_name_offset) +
1190 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1191 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1192 							    buffer, size);
1193 			if (ret < 0) {
1194 				mlog_errno(ret);
1195 				return ret;
1196 			}
1197 		}
1198 	}
1199 
1200 	return size;
1201 }
1202 
1203 static int ocfs2_xattr_block_get(struct inode *inode,
1204 				 int name_index,
1205 				 const char *name,
1206 				 void *buffer,
1207 				 size_t buffer_size,
1208 				 struct ocfs2_xattr_search *xs)
1209 {
1210 	struct ocfs2_xattr_block *xb;
1211 	struct ocfs2_xattr_value_root *xv;
1212 	size_t size;
1213 	int ret = -ENODATA, name_offset, name_len, i;
1214 	int uninitialized_var(block_off);
1215 
1216 	xs->bucket = ocfs2_xattr_bucket_new(inode);
1217 	if (!xs->bucket) {
1218 		ret = -ENOMEM;
1219 		mlog_errno(ret);
1220 		goto cleanup;
1221 	}
1222 
1223 	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1224 	if (ret) {
1225 		mlog_errno(ret);
1226 		goto cleanup;
1227 	}
1228 
1229 	if (xs->not_found) {
1230 		ret = -ENODATA;
1231 		goto cleanup;
1232 	}
1233 
1234 	xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1235 	size = le64_to_cpu(xs->here->xe_value_size);
1236 	if (buffer) {
1237 		ret = -ERANGE;
1238 		if (size > buffer_size)
1239 			goto cleanup;
1240 
1241 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
1242 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1243 		i = xs->here - xs->header->xh_entries;
1244 
1245 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1246 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1247 								bucket_xh(xs->bucket),
1248 								i,
1249 								&block_off,
1250 								&name_offset);
1251 			if (ret) {
1252 				mlog_errno(ret);
1253 				goto cleanup;
1254 			}
1255 			xs->base = bucket_block(xs->bucket, block_off);
1256 		}
1257 		if (ocfs2_xattr_is_local(xs->here)) {
1258 			memcpy(buffer, (void *)xs->base +
1259 			       name_offset + name_len, size);
1260 		} else {
1261 			xv = (struct ocfs2_xattr_value_root *)
1262 				(xs->base + name_offset + name_len);
1263 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1264 							    buffer, size);
1265 			if (ret < 0) {
1266 				mlog_errno(ret);
1267 				goto cleanup;
1268 			}
1269 		}
1270 	}
1271 	ret = size;
1272 cleanup:
1273 	ocfs2_xattr_bucket_free(xs->bucket);
1274 
1275 	brelse(xs->xattr_bh);
1276 	xs->xattr_bh = NULL;
1277 	return ret;
1278 }
1279 
1280 int ocfs2_xattr_get_nolock(struct inode *inode,
1281 			   struct buffer_head *di_bh,
1282 			   int name_index,
1283 			   const char *name,
1284 			   void *buffer,
1285 			   size_t buffer_size)
1286 {
1287 	int ret;
1288 	struct ocfs2_dinode *di = NULL;
1289 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1290 	struct ocfs2_xattr_search xis = {
1291 		.not_found = -ENODATA,
1292 	};
1293 	struct ocfs2_xattr_search xbs = {
1294 		.not_found = -ENODATA,
1295 	};
1296 
1297 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1298 		return -EOPNOTSUPP;
1299 
1300 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1301 		return -ENODATA;
1302 
1303 	xis.inode_bh = xbs.inode_bh = di_bh;
1304 	di = (struct ocfs2_dinode *)di_bh->b_data;
1305 
1306 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1307 				    buffer_size, &xis);
1308 	if (ret == -ENODATA && di->i_xattr_loc)
1309 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1310 					    buffer_size, &xbs);
1311 
1312 	return ret;
1313 }
1314 
1315 /* ocfs2_xattr_get()
1316  *
1317  * Copy an extended attribute into the buffer provided.
1318  * Buffer is NULL to compute the size of buffer required.
1319  */
1320 static int ocfs2_xattr_get(struct inode *inode,
1321 			   int name_index,
1322 			   const char *name,
1323 			   void *buffer,
1324 			   size_t buffer_size)
1325 {
1326 	int ret, had_lock;
1327 	struct buffer_head *di_bh = NULL;
1328 	struct ocfs2_lock_holder oh;
1329 
1330 	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
1331 	if (had_lock < 0) {
1332 		mlog_errno(had_lock);
1333 		return had_lock;
1334 	}
1335 	down_read(&OCFS2_I(inode)->ip_xattr_sem);
1336 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1337 				     name, buffer, buffer_size);
1338 	up_read(&OCFS2_I(inode)->ip_xattr_sem);
1339 
1340 	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
1341 
1342 	brelse(di_bh);
1343 
1344 	return ret;
1345 }
1346 
1347 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1348 					   handle_t *handle,
1349 					   struct ocfs2_xattr_value_buf *vb,
1350 					   const void *value,
1351 					   int value_len)
1352 {
1353 	int ret = 0, i, cp_len;
1354 	u16 blocksize = inode->i_sb->s_blocksize;
1355 	u32 p_cluster, num_clusters;
1356 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1357 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1358 	u64 blkno;
1359 	struct buffer_head *bh = NULL;
1360 	unsigned int ext_flags;
1361 	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1362 
1363 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1364 
1365 	while (cpos < clusters) {
1366 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1367 					       &num_clusters, &xv->xr_list,
1368 					       &ext_flags);
1369 		if (ret) {
1370 			mlog_errno(ret);
1371 			goto out;
1372 		}
1373 
1374 		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1375 
1376 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1377 
1378 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1379 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1380 					       &bh, NULL);
1381 			if (ret) {
1382 				mlog_errno(ret);
1383 				goto out;
1384 			}
1385 
1386 			ret = ocfs2_journal_access(handle,
1387 						   INODE_CACHE(inode),
1388 						   bh,
1389 						   OCFS2_JOURNAL_ACCESS_WRITE);
1390 			if (ret < 0) {
1391 				mlog_errno(ret);
1392 				goto out;
1393 			}
1394 
1395 			cp_len = value_len > blocksize ? blocksize : value_len;
1396 			memcpy(bh->b_data, value, cp_len);
1397 			value_len -= cp_len;
1398 			value += cp_len;
1399 			if (cp_len < blocksize)
1400 				memset(bh->b_data + cp_len, 0,
1401 				       blocksize - cp_len);
1402 
1403 			ocfs2_journal_dirty(handle, bh);
1404 			brelse(bh);
1405 			bh = NULL;
1406 
1407 			/*
1408 			 * XXX: do we need to empty all the following
1409 			 * blocks in this cluster?
1410 			 */
1411 			if (!value_len)
1412 				break;
1413 		}
1414 		cpos += num_clusters;
1415 	}
1416 out:
1417 	brelse(bh);
1418 
1419 	return ret;
1420 }
1421 
1422 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1423 				       int num_entries)
1424 {
1425 	int free_space;
1426 
1427 	if (!needed_space)
1428 		return 0;
1429 
1430 	free_space = free_start -
1431 		sizeof(struct ocfs2_xattr_header) -
1432 		(num_entries * sizeof(struct ocfs2_xattr_entry)) -
1433 		OCFS2_XATTR_HEADER_GAP;
1434 	if (free_space < 0)
1435 		return -EIO;
1436 	if (free_space < needed_space)
1437 		return -ENOSPC;
1438 
1439 	return 0;
1440 }
1441 
1442 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1443 				   int type)
1444 {
1445 	return loc->xl_ops->xlo_journal_access(handle, loc, type);
1446 }
1447 
1448 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1449 {
1450 	loc->xl_ops->xlo_journal_dirty(handle, loc);
1451 }
1452 
1453 /* Give a pointer into the storage for the given offset */
1454 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1455 {
1456 	BUG_ON(offset >= loc->xl_size);
1457 	return loc->xl_ops->xlo_offset_pointer(loc, offset);
1458 }
1459 
1460 /*
1461  * Wipe the name+value pair and allow the storage to reclaim it.  This
1462  * must be followed by either removal of the entry or a call to
1463  * ocfs2_xa_add_namevalue().
1464  */
1465 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1466 {
1467 	loc->xl_ops->xlo_wipe_namevalue(loc);
1468 }
1469 
1470 /*
1471  * Find lowest offset to a name+value pair.  This is the start of our
1472  * downward-growing free space.
1473  */
1474 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1475 {
1476 	return loc->xl_ops->xlo_get_free_start(loc);
1477 }
1478 
1479 /* Can we reuse loc->xl_entry for xi? */
1480 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1481 				    struct ocfs2_xattr_info *xi)
1482 {
1483 	return loc->xl_ops->xlo_can_reuse(loc, xi);
1484 }
1485 
1486 /* How much free space is needed to set the new value */
1487 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1488 				struct ocfs2_xattr_info *xi)
1489 {
1490 	return loc->xl_ops->xlo_check_space(loc, xi);
1491 }
1492 
1493 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1494 				   struct ocfs2_xattr_info *xi)
1495 {
1496 	int size = namevalue_size_xi(xi);
1497 	int nameval_offset;
1498 	char *nameval_buf;
1499 
1500 	loc->xl_ops->xlo_add_namevalue(loc, size);
1501 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1502 	loc->xl_entry->xe_name_len = xi->xi_name_len;
1503 	ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1504 	ocfs2_xattr_set_local(loc->xl_entry,
1505 			      xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1506 
1507 	nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1508 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1509 	memset(nameval_buf, 0, size);
1510 	memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1511 }
1512 
1513 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1514 				    struct ocfs2_xattr_value_buf *vb)
1515 {
1516 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1517 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1518 
1519 	/* Value bufs are for value trees */
1520 	BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1521 	BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1522 	       (name_size + OCFS2_XATTR_ROOT_SIZE));
1523 
1524 	loc->xl_ops->xlo_fill_value_buf(loc, vb);
1525 	vb->vb_xv =
1526 		(struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1527 							nameval_offset +
1528 							name_size);
1529 }
1530 
1531 static int ocfs2_xa_block_journal_access(handle_t *handle,
1532 					 struct ocfs2_xa_loc *loc, int type)
1533 {
1534 	struct buffer_head *bh = loc->xl_storage;
1535 	ocfs2_journal_access_func access;
1536 
1537 	if (loc->xl_size == (bh->b_size -
1538 			     offsetof(struct ocfs2_xattr_block,
1539 				      xb_attrs.xb_header)))
1540 		access = ocfs2_journal_access_xb;
1541 	else
1542 		access = ocfs2_journal_access_di;
1543 	return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1544 }
1545 
1546 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1547 					 struct ocfs2_xa_loc *loc)
1548 {
1549 	struct buffer_head *bh = loc->xl_storage;
1550 
1551 	ocfs2_journal_dirty(handle, bh);
1552 }
1553 
1554 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1555 					   int offset)
1556 {
1557 	return (char *)loc->xl_header + offset;
1558 }
1559 
1560 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1561 				    struct ocfs2_xattr_info *xi)
1562 {
1563 	/*
1564 	 * Block storage is strict.  If the sizes aren't exact, we will
1565 	 * remove the old one and reinsert the new.
1566 	 */
1567 	return namevalue_size_xe(loc->xl_entry) ==
1568 		namevalue_size_xi(xi);
1569 }
1570 
1571 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1572 {
1573 	struct ocfs2_xattr_header *xh = loc->xl_header;
1574 	int i, count = le16_to_cpu(xh->xh_count);
1575 	int offset, free_start = loc->xl_size;
1576 
1577 	for (i = 0; i < count; i++) {
1578 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1579 		if (offset < free_start)
1580 			free_start = offset;
1581 	}
1582 
1583 	return free_start;
1584 }
1585 
1586 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1587 				      struct ocfs2_xattr_info *xi)
1588 {
1589 	int count = le16_to_cpu(loc->xl_header->xh_count);
1590 	int free_start = ocfs2_xa_get_free_start(loc);
1591 	int needed_space = ocfs2_xi_entry_usage(xi);
1592 
1593 	/*
1594 	 * Block storage will reclaim the original entry before inserting
1595 	 * the new value, so we only need the difference.  If the new
1596 	 * entry is smaller than the old one, we don't need anything.
1597 	 */
1598 	if (loc->xl_entry) {
1599 		/* Don't need space if we're reusing! */
1600 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1601 			needed_space = 0;
1602 		else
1603 			needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1604 	}
1605 	if (needed_space < 0)
1606 		needed_space = 0;
1607 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1608 }
1609 
1610 /*
1611  * Block storage for xattrs keeps the name+value pairs compacted.  When
1612  * we remove one, we have to shift any that preceded it towards the end.
1613  */
1614 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1615 {
1616 	int i, offset;
1617 	int namevalue_offset, first_namevalue_offset, namevalue_size;
1618 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1619 	struct ocfs2_xattr_header *xh = loc->xl_header;
1620 	int count = le16_to_cpu(xh->xh_count);
1621 
1622 	namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1623 	namevalue_size = namevalue_size_xe(entry);
1624 	first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1625 
1626 	/* Shift the name+value pairs */
1627 	memmove((char *)xh + first_namevalue_offset + namevalue_size,
1628 		(char *)xh + first_namevalue_offset,
1629 		namevalue_offset - first_namevalue_offset);
1630 	memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1631 
1632 	/* Now tell xh->xh_entries about it */
1633 	for (i = 0; i < count; i++) {
1634 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1635 		if (offset <= namevalue_offset)
1636 			le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1637 				     namevalue_size);
1638 	}
1639 
1640 	/*
1641 	 * Note that we don't update xh_free_start or xh_name_value_len
1642 	 * because they're not used in block-stored xattrs.
1643 	 */
1644 }
1645 
1646 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1647 {
1648 	int count = le16_to_cpu(loc->xl_header->xh_count);
1649 	loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1650 	le16_add_cpu(&loc->xl_header->xh_count, 1);
1651 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1652 }
1653 
1654 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1655 {
1656 	int free_start = ocfs2_xa_get_free_start(loc);
1657 
1658 	loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1659 }
1660 
1661 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1662 					  struct ocfs2_xattr_value_buf *vb)
1663 {
1664 	struct buffer_head *bh = loc->xl_storage;
1665 
1666 	if (loc->xl_size == (bh->b_size -
1667 			     offsetof(struct ocfs2_xattr_block,
1668 				      xb_attrs.xb_header)))
1669 		vb->vb_access = ocfs2_journal_access_xb;
1670 	else
1671 		vb->vb_access = ocfs2_journal_access_di;
1672 	vb->vb_bh = bh;
1673 }
1674 
1675 /*
1676  * Operations for xattrs stored in blocks.  This includes inline inode
1677  * storage and unindexed ocfs2_xattr_blocks.
1678  */
1679 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1680 	.xlo_journal_access	= ocfs2_xa_block_journal_access,
1681 	.xlo_journal_dirty	= ocfs2_xa_block_journal_dirty,
1682 	.xlo_offset_pointer	= ocfs2_xa_block_offset_pointer,
1683 	.xlo_check_space	= ocfs2_xa_block_check_space,
1684 	.xlo_can_reuse		= ocfs2_xa_block_can_reuse,
1685 	.xlo_get_free_start	= ocfs2_xa_block_get_free_start,
1686 	.xlo_wipe_namevalue	= ocfs2_xa_block_wipe_namevalue,
1687 	.xlo_add_entry		= ocfs2_xa_block_add_entry,
1688 	.xlo_add_namevalue	= ocfs2_xa_block_add_namevalue,
1689 	.xlo_fill_value_buf	= ocfs2_xa_block_fill_value_buf,
1690 };
1691 
1692 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1693 					  struct ocfs2_xa_loc *loc, int type)
1694 {
1695 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1696 
1697 	return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1698 }
1699 
1700 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1701 					  struct ocfs2_xa_loc *loc)
1702 {
1703 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1704 
1705 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1706 }
1707 
1708 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1709 					    int offset)
1710 {
1711 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1712 	int block, block_offset;
1713 
1714 	/* The header is at the front of the bucket */
1715 	block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1716 	block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1717 
1718 	return bucket_block(bucket, block) + block_offset;
1719 }
1720 
1721 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1722 				     struct ocfs2_xattr_info *xi)
1723 {
1724 	return namevalue_size_xe(loc->xl_entry) >=
1725 		namevalue_size_xi(xi);
1726 }
1727 
1728 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1729 {
1730 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1731 	return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1732 }
1733 
1734 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1735 					 int free_start, int size)
1736 {
1737 	/*
1738 	 * We need to make sure that the name+value pair fits within
1739 	 * one block.
1740 	 */
1741 	if (((free_start - size) >> sb->s_blocksize_bits) !=
1742 	    ((free_start - 1) >> sb->s_blocksize_bits))
1743 		free_start -= free_start % sb->s_blocksize;
1744 
1745 	return free_start;
1746 }
1747 
1748 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1749 				       struct ocfs2_xattr_info *xi)
1750 {
1751 	int rc;
1752 	int count = le16_to_cpu(loc->xl_header->xh_count);
1753 	int free_start = ocfs2_xa_get_free_start(loc);
1754 	int needed_space = ocfs2_xi_entry_usage(xi);
1755 	int size = namevalue_size_xi(xi);
1756 	struct super_block *sb = loc->xl_inode->i_sb;
1757 
1758 	/*
1759 	 * Bucket storage does not reclaim name+value pairs it cannot
1760 	 * reuse.  They live as holes until the bucket fills, and then
1761 	 * the bucket is defragmented.  However, the bucket can reclaim
1762 	 * the ocfs2_xattr_entry.
1763 	 */
1764 	if (loc->xl_entry) {
1765 		/* Don't need space if we're reusing! */
1766 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1767 			needed_space = 0;
1768 		else
1769 			needed_space -= sizeof(struct ocfs2_xattr_entry);
1770 	}
1771 	BUG_ON(needed_space < 0);
1772 
1773 	if (free_start < size) {
1774 		if (needed_space)
1775 			return -ENOSPC;
1776 	} else {
1777 		/*
1778 		 * First we check if it would fit in the first place.
1779 		 * Below, we align the free start to a block.  This may
1780 		 * slide us below the minimum gap.  By checking unaligned
1781 		 * first, we avoid that error.
1782 		 */
1783 		rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1784 						 count);
1785 		if (rc)
1786 			return rc;
1787 		free_start = ocfs2_bucket_align_free_start(sb, free_start,
1788 							   size);
1789 	}
1790 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1791 }
1792 
1793 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1794 {
1795 	le16_add_cpu(&loc->xl_header->xh_name_value_len,
1796 		     -namevalue_size_xe(loc->xl_entry));
1797 }
1798 
1799 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1800 {
1801 	struct ocfs2_xattr_header *xh = loc->xl_header;
1802 	int count = le16_to_cpu(xh->xh_count);
1803 	int low = 0, high = count - 1, tmp;
1804 	struct ocfs2_xattr_entry *tmp_xe;
1805 
1806 	/*
1807 	 * We keep buckets sorted by name_hash, so we need to find
1808 	 * our insert place.
1809 	 */
1810 	while (low <= high && count) {
1811 		tmp = (low + high) / 2;
1812 		tmp_xe = &xh->xh_entries[tmp];
1813 
1814 		if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1815 			low = tmp + 1;
1816 		else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1817 			high = tmp - 1;
1818 		else {
1819 			low = tmp;
1820 			break;
1821 		}
1822 	}
1823 
1824 	if (low != count)
1825 		memmove(&xh->xh_entries[low + 1],
1826 			&xh->xh_entries[low],
1827 			((count - low) * sizeof(struct ocfs2_xattr_entry)));
1828 
1829 	le16_add_cpu(&xh->xh_count, 1);
1830 	loc->xl_entry = &xh->xh_entries[low];
1831 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1832 }
1833 
1834 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1835 {
1836 	int free_start = ocfs2_xa_get_free_start(loc);
1837 	struct ocfs2_xattr_header *xh = loc->xl_header;
1838 	struct super_block *sb = loc->xl_inode->i_sb;
1839 	int nameval_offset;
1840 
1841 	free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1842 	nameval_offset = free_start - size;
1843 	loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1844 	xh->xh_free_start = cpu_to_le16(nameval_offset);
1845 	le16_add_cpu(&xh->xh_name_value_len, size);
1846 
1847 }
1848 
1849 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1850 					   struct ocfs2_xattr_value_buf *vb)
1851 {
1852 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1853 	struct super_block *sb = loc->xl_inode->i_sb;
1854 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1855 	int size = namevalue_size_xe(loc->xl_entry);
1856 	int block_offset = nameval_offset >> sb->s_blocksize_bits;
1857 
1858 	/* Values are not allowed to straddle block boundaries */
1859 	BUG_ON(block_offset !=
1860 	       ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1861 	/* We expect the bucket to be filled in */
1862 	BUG_ON(!bucket->bu_bhs[block_offset]);
1863 
1864 	vb->vb_access = ocfs2_journal_access;
1865 	vb->vb_bh = bucket->bu_bhs[block_offset];
1866 }
1867 
1868 /* Operations for xattrs stored in buckets. */
1869 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1870 	.xlo_journal_access	= ocfs2_xa_bucket_journal_access,
1871 	.xlo_journal_dirty	= ocfs2_xa_bucket_journal_dirty,
1872 	.xlo_offset_pointer	= ocfs2_xa_bucket_offset_pointer,
1873 	.xlo_check_space	= ocfs2_xa_bucket_check_space,
1874 	.xlo_can_reuse		= ocfs2_xa_bucket_can_reuse,
1875 	.xlo_get_free_start	= ocfs2_xa_bucket_get_free_start,
1876 	.xlo_wipe_namevalue	= ocfs2_xa_bucket_wipe_namevalue,
1877 	.xlo_add_entry		= ocfs2_xa_bucket_add_entry,
1878 	.xlo_add_namevalue	= ocfs2_xa_bucket_add_namevalue,
1879 	.xlo_fill_value_buf	= ocfs2_xa_bucket_fill_value_buf,
1880 };
1881 
1882 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1883 {
1884 	struct ocfs2_xattr_value_buf vb;
1885 
1886 	if (ocfs2_xattr_is_local(loc->xl_entry))
1887 		return 0;
1888 
1889 	ocfs2_xa_fill_value_buf(loc, &vb);
1890 	return le32_to_cpu(vb.vb_xv->xr_clusters);
1891 }
1892 
1893 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1894 				   struct ocfs2_xattr_set_ctxt *ctxt)
1895 {
1896 	int trunc_rc, access_rc;
1897 	struct ocfs2_xattr_value_buf vb;
1898 
1899 	ocfs2_xa_fill_value_buf(loc, &vb);
1900 	trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1901 					      ctxt);
1902 
1903 	/*
1904 	 * The caller of ocfs2_xa_value_truncate() has already called
1905 	 * ocfs2_xa_journal_access on the loc.  However, The truncate code
1906 	 * calls ocfs2_extend_trans().  This may commit the previous
1907 	 * transaction and open a new one.  If this is a bucket, truncate
1908 	 * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1909 	 * the caller is expecting to dirty the entire bucket.  So we must
1910 	 * reset the journal work.  We do this even if truncate has failed,
1911 	 * as it could have failed after committing the extend.
1912 	 */
1913 	access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1914 					    OCFS2_JOURNAL_ACCESS_WRITE);
1915 
1916 	/* Errors in truncate take precedence */
1917 	return trunc_rc ? trunc_rc : access_rc;
1918 }
1919 
1920 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1921 {
1922 	int index, count;
1923 	struct ocfs2_xattr_header *xh = loc->xl_header;
1924 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1925 
1926 	ocfs2_xa_wipe_namevalue(loc);
1927 	loc->xl_entry = NULL;
1928 
1929 	le16_add_cpu(&xh->xh_count, -1);
1930 	count = le16_to_cpu(xh->xh_count);
1931 
1932 	/*
1933 	 * Only zero out the entry if there are more remaining.  This is
1934 	 * important for an empty bucket, as it keeps track of the
1935 	 * bucket's hash value.  It doesn't hurt empty block storage.
1936 	 */
1937 	if (count) {
1938 		index = ((char *)entry - (char *)&xh->xh_entries) /
1939 			sizeof(struct ocfs2_xattr_entry);
1940 		memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1941 			(count - index) * sizeof(struct ocfs2_xattr_entry));
1942 		memset(&xh->xh_entries[count], 0,
1943 		       sizeof(struct ocfs2_xattr_entry));
1944 	}
1945 }
1946 
1947 /*
1948  * If we have a problem adjusting the size of an external value during
1949  * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1950  * in an intermediate state.  For example, the value may be partially
1951  * truncated.
1952  *
1953  * If the value tree hasn't changed, the extend/truncate went nowhere.
1954  * We have nothing to do.  The caller can treat it as a straight error.
1955  *
1956  * If the value tree got partially truncated, we now have a corrupted
1957  * extended attribute.  We're going to wipe its entry and leak the
1958  * clusters.  Better to leak some storage than leave a corrupt entry.
1959  *
1960  * If the value tree grew, it obviously didn't grow enough for the
1961  * new entry.  We're not going to try and reclaim those clusters either.
1962  * If there was already an external value there (orig_clusters != 0),
1963  * the new clusters are attached safely and we can just leave the old
1964  * value in place.  If there was no external value there, we remove
1965  * the entry.
1966  *
1967  * This way, the xattr block we store in the journal will be consistent.
1968  * If the size change broke because of the journal, no changes will hit
1969  * disk anyway.
1970  */
1971 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1972 					    const char *what,
1973 					    unsigned int orig_clusters)
1974 {
1975 	unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1976 	char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1977 				le16_to_cpu(loc->xl_entry->xe_name_offset));
1978 
1979 	if (new_clusters < orig_clusters) {
1980 		mlog(ML_ERROR,
1981 		     "Partial truncate while %s xattr %.*s.  Leaking "
1982 		     "%u clusters and removing the entry\n",
1983 		     what, loc->xl_entry->xe_name_len, nameval_buf,
1984 		     orig_clusters - new_clusters);
1985 		ocfs2_xa_remove_entry(loc);
1986 	} else if (!orig_clusters) {
1987 		mlog(ML_ERROR,
1988 		     "Unable to allocate an external value for xattr "
1989 		     "%.*s safely.  Leaking %u clusters and removing the "
1990 		     "entry\n",
1991 		     loc->xl_entry->xe_name_len, nameval_buf,
1992 		     new_clusters - orig_clusters);
1993 		ocfs2_xa_remove_entry(loc);
1994 	} else if (new_clusters > orig_clusters)
1995 		mlog(ML_ERROR,
1996 		     "Unable to grow xattr %.*s safely.  %u new clusters "
1997 		     "have been added, but the value will not be "
1998 		     "modified\n",
1999 		     loc->xl_entry->xe_name_len, nameval_buf,
2000 		     new_clusters - orig_clusters);
2001 }
2002 
2003 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
2004 			   struct ocfs2_xattr_set_ctxt *ctxt)
2005 {
2006 	int rc = 0;
2007 	unsigned int orig_clusters;
2008 
2009 	if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2010 		orig_clusters = ocfs2_xa_value_clusters(loc);
2011 		rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2012 		if (rc) {
2013 			mlog_errno(rc);
2014 			/*
2015 			 * Since this is remove, we can return 0 if
2016 			 * ocfs2_xa_cleanup_value_truncate() is going to
2017 			 * wipe the entry anyway.  So we check the
2018 			 * cluster count as well.
2019 			 */
2020 			if (orig_clusters != ocfs2_xa_value_clusters(loc))
2021 				rc = 0;
2022 			ocfs2_xa_cleanup_value_truncate(loc, "removing",
2023 							orig_clusters);
2024 			if (rc)
2025 				goto out;
2026 		}
2027 	}
2028 
2029 	ocfs2_xa_remove_entry(loc);
2030 
2031 out:
2032 	return rc;
2033 }
2034 
2035 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2036 {
2037 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2038 	char *nameval_buf;
2039 
2040 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2041 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2042 	memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2043 }
2044 
2045 /*
2046  * Take an existing entry and make it ready for the new value.  This
2047  * won't allocate space, but it may free space.  It should be ready for
2048  * ocfs2_xa_prepare_entry() to finish the work.
2049  */
2050 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2051 				struct ocfs2_xattr_info *xi,
2052 				struct ocfs2_xattr_set_ctxt *ctxt)
2053 {
2054 	int rc = 0;
2055 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2056 	unsigned int orig_clusters;
2057 	char *nameval_buf;
2058 	int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2059 	int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2060 
2061 	BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2062 	       name_size);
2063 
2064 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2065 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2066 	if (xe_local) {
2067 		memset(nameval_buf + name_size, 0,
2068 		       namevalue_size_xe(loc->xl_entry) - name_size);
2069 		if (!xi_local)
2070 			ocfs2_xa_install_value_root(loc);
2071 	} else {
2072 		orig_clusters = ocfs2_xa_value_clusters(loc);
2073 		if (xi_local) {
2074 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2075 			if (rc < 0)
2076 				mlog_errno(rc);
2077 			else
2078 				memset(nameval_buf + name_size, 0,
2079 				       namevalue_size_xe(loc->xl_entry) -
2080 				       name_size);
2081 		} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2082 			   xi->xi_value_len) {
2083 			rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2084 						     ctxt);
2085 			if (rc < 0)
2086 				mlog_errno(rc);
2087 		}
2088 
2089 		if (rc) {
2090 			ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2091 							orig_clusters);
2092 			goto out;
2093 		}
2094 	}
2095 
2096 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2097 	ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2098 
2099 out:
2100 	return rc;
2101 }
2102 
2103 /*
2104  * Prepares loc->xl_entry to receive the new xattr.  This includes
2105  * properly setting up the name+value pair region.  If loc->xl_entry
2106  * already exists, it will take care of modifying it appropriately.
2107  *
2108  * Note that this modifies the data.  You did journal_access already,
2109  * right?
2110  */
2111 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2112 				  struct ocfs2_xattr_info *xi,
2113 				  u32 name_hash,
2114 				  struct ocfs2_xattr_set_ctxt *ctxt)
2115 {
2116 	int rc = 0;
2117 	unsigned int orig_clusters;
2118 	__le64 orig_value_size = 0;
2119 
2120 	rc = ocfs2_xa_check_space(loc, xi);
2121 	if (rc)
2122 		goto out;
2123 
2124 	if (!loc->xl_entry) {
2125 		rc = -EINVAL;
2126 		goto out;
2127 	}
2128 
2129 	if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2130 		orig_value_size = loc->xl_entry->xe_value_size;
2131 		rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2132 		if (rc)
2133 			goto out;
2134 		goto alloc_value;
2135 	}
2136 
2137 	if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2138 		orig_clusters = ocfs2_xa_value_clusters(loc);
2139 		rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2140 		if (rc) {
2141 			mlog_errno(rc);
2142 			ocfs2_xa_cleanup_value_truncate(loc,
2143 							"overwriting",
2144 							orig_clusters);
2145 			goto out;
2146 		}
2147 	}
2148 	ocfs2_xa_wipe_namevalue(loc);
2149 
2150 	/*
2151 	 * If we get here, we have a blank entry.  Fill it.  We grow our
2152 	 * name+value pair back from the end.
2153 	 */
2154 	ocfs2_xa_add_namevalue(loc, xi);
2155 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2156 		ocfs2_xa_install_value_root(loc);
2157 
2158 alloc_value:
2159 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2160 		orig_clusters = ocfs2_xa_value_clusters(loc);
2161 		rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2162 		if (rc < 0) {
2163 			ctxt->set_abort = 1;
2164 			ocfs2_xa_cleanup_value_truncate(loc, "growing",
2165 							orig_clusters);
2166 			/*
2167 			 * If we were growing an existing value,
2168 			 * ocfs2_xa_cleanup_value_truncate() won't remove
2169 			 * the entry. We need to restore the original value
2170 			 * size.
2171 			 */
2172 			if (loc->xl_entry) {
2173 				BUG_ON(!orig_value_size);
2174 				loc->xl_entry->xe_value_size = orig_value_size;
2175 			}
2176 			mlog_errno(rc);
2177 		}
2178 	}
2179 
2180 out:
2181 	return rc;
2182 }
2183 
2184 /*
2185  * Store the value portion of the name+value pair.  This will skip
2186  * values that are stored externally.  Their tree roots were set up
2187  * by ocfs2_xa_prepare_entry().
2188  */
2189 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2190 				struct ocfs2_xattr_info *xi,
2191 				struct ocfs2_xattr_set_ctxt *ctxt)
2192 {
2193 	int rc = 0;
2194 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2195 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2196 	char *nameval_buf;
2197 	struct ocfs2_xattr_value_buf vb;
2198 
2199 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2200 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2201 		ocfs2_xa_fill_value_buf(loc, &vb);
2202 		rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2203 						     ctxt->handle, &vb,
2204 						     xi->xi_value,
2205 						     xi->xi_value_len);
2206 	} else
2207 		memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2208 
2209 	return rc;
2210 }
2211 
2212 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2213 			struct ocfs2_xattr_info *xi,
2214 			struct ocfs2_xattr_set_ctxt *ctxt)
2215 {
2216 	int ret;
2217 	u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2218 					      xi->xi_name_len);
2219 
2220 	ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2221 				      OCFS2_JOURNAL_ACCESS_WRITE);
2222 	if (ret) {
2223 		mlog_errno(ret);
2224 		goto out;
2225 	}
2226 
2227 	/*
2228 	 * From here on out, everything is going to modify the buffer a
2229 	 * little.  Errors are going to leave the xattr header in a
2230 	 * sane state.  Thus, even with errors we dirty the sucker.
2231 	 */
2232 
2233 	/* Don't worry, we are never called with !xi_value and !xl_entry */
2234 	if (!xi->xi_value) {
2235 		ret = ocfs2_xa_remove(loc, ctxt);
2236 		goto out_dirty;
2237 	}
2238 
2239 	ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2240 	if (ret) {
2241 		if (ret != -ENOSPC)
2242 			mlog_errno(ret);
2243 		goto out_dirty;
2244 	}
2245 
2246 	ret = ocfs2_xa_store_value(loc, xi, ctxt);
2247 	if (ret)
2248 		mlog_errno(ret);
2249 
2250 out_dirty:
2251 	ocfs2_xa_journal_dirty(ctxt->handle, loc);
2252 
2253 out:
2254 	return ret;
2255 }
2256 
2257 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2258 				     struct inode *inode,
2259 				     struct buffer_head *bh,
2260 				     struct ocfs2_xattr_entry *entry)
2261 {
2262 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2263 
2264 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2265 
2266 	loc->xl_inode = inode;
2267 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2268 	loc->xl_storage = bh;
2269 	loc->xl_entry = entry;
2270 	loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2271 	loc->xl_header =
2272 		(struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2273 					      loc->xl_size);
2274 }
2275 
2276 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2277 					  struct inode *inode,
2278 					  struct buffer_head *bh,
2279 					  struct ocfs2_xattr_entry *entry)
2280 {
2281 	struct ocfs2_xattr_block *xb =
2282 		(struct ocfs2_xattr_block *)bh->b_data;
2283 
2284 	BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2285 
2286 	loc->xl_inode = inode;
2287 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2288 	loc->xl_storage = bh;
2289 	loc->xl_header = &(xb->xb_attrs.xb_header);
2290 	loc->xl_entry = entry;
2291 	loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2292 					     xb_attrs.xb_header);
2293 }
2294 
2295 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2296 					   struct ocfs2_xattr_bucket *bucket,
2297 					   struct ocfs2_xattr_entry *entry)
2298 {
2299 	loc->xl_inode = bucket->bu_inode;
2300 	loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2301 	loc->xl_storage = bucket;
2302 	loc->xl_header = bucket_xh(bucket);
2303 	loc->xl_entry = entry;
2304 	loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2305 }
2306 
2307 /*
2308  * In xattr remove, if it is stored outside and refcounted, we may have
2309  * the chance to split the refcount tree. So need the allocators.
2310  */
2311 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2312 					struct ocfs2_xattr_value_root *xv,
2313 					struct ocfs2_caching_info *ref_ci,
2314 					struct buffer_head *ref_root_bh,
2315 					struct ocfs2_alloc_context **meta_ac,
2316 					int *ref_credits)
2317 {
2318 	int ret, meta_add = 0;
2319 	u32 p_cluster, num_clusters;
2320 	unsigned int ext_flags;
2321 
2322 	*ref_credits = 0;
2323 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2324 				       &num_clusters,
2325 				       &xv->xr_list,
2326 				       &ext_flags);
2327 	if (ret) {
2328 		mlog_errno(ret);
2329 		goto out;
2330 	}
2331 
2332 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2333 		goto out;
2334 
2335 	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2336 						 ref_root_bh, xv,
2337 						 &meta_add, ref_credits);
2338 	if (ret) {
2339 		mlog_errno(ret);
2340 		goto out;
2341 	}
2342 
2343 	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2344 						meta_add, meta_ac);
2345 	if (ret)
2346 		mlog_errno(ret);
2347 
2348 out:
2349 	return ret;
2350 }
2351 
2352 static int ocfs2_remove_value_outside(struct inode*inode,
2353 				      struct ocfs2_xattr_value_buf *vb,
2354 				      struct ocfs2_xattr_header *header,
2355 				      struct ocfs2_caching_info *ref_ci,
2356 				      struct buffer_head *ref_root_bh)
2357 {
2358 	int ret = 0, i, ref_credits;
2359 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2360 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2361 	void *val;
2362 
2363 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2364 
2365 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2366 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2367 
2368 		if (ocfs2_xattr_is_local(entry))
2369 			continue;
2370 
2371 		val = (void *)header +
2372 			le16_to_cpu(entry->xe_name_offset);
2373 		vb->vb_xv = (struct ocfs2_xattr_value_root *)
2374 			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2375 
2376 		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2377 							 ref_ci, ref_root_bh,
2378 							 &ctxt.meta_ac,
2379 							 &ref_credits);
2380 
2381 		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2382 					ocfs2_remove_extent_credits(osb->sb));
2383 		if (IS_ERR(ctxt.handle)) {
2384 			ret = PTR_ERR(ctxt.handle);
2385 			mlog_errno(ret);
2386 			break;
2387 		}
2388 
2389 		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2390 
2391 		ocfs2_commit_trans(osb, ctxt.handle);
2392 		if (ctxt.meta_ac) {
2393 			ocfs2_free_alloc_context(ctxt.meta_ac);
2394 			ctxt.meta_ac = NULL;
2395 		}
2396 
2397 		if (ret < 0) {
2398 			mlog_errno(ret);
2399 			break;
2400 		}
2401 
2402 	}
2403 
2404 	if (ctxt.meta_ac)
2405 		ocfs2_free_alloc_context(ctxt.meta_ac);
2406 	ocfs2_schedule_truncate_log_flush(osb, 1);
2407 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
2408 	return ret;
2409 }
2410 
2411 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2412 				    struct buffer_head *di_bh,
2413 				    struct ocfs2_caching_info *ref_ci,
2414 				    struct buffer_head *ref_root_bh)
2415 {
2416 
2417 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2418 	struct ocfs2_xattr_header *header;
2419 	int ret;
2420 	struct ocfs2_xattr_value_buf vb = {
2421 		.vb_bh = di_bh,
2422 		.vb_access = ocfs2_journal_access_di,
2423 	};
2424 
2425 	header = (struct ocfs2_xattr_header *)
2426 		 ((void *)di + inode->i_sb->s_blocksize -
2427 		 le16_to_cpu(di->i_xattr_inline_size));
2428 
2429 	ret = ocfs2_remove_value_outside(inode, &vb, header,
2430 					 ref_ci, ref_root_bh);
2431 
2432 	return ret;
2433 }
2434 
2435 struct ocfs2_rm_xattr_bucket_para {
2436 	struct ocfs2_caching_info *ref_ci;
2437 	struct buffer_head *ref_root_bh;
2438 };
2439 
2440 static int ocfs2_xattr_block_remove(struct inode *inode,
2441 				    struct buffer_head *blk_bh,
2442 				    struct ocfs2_caching_info *ref_ci,
2443 				    struct buffer_head *ref_root_bh)
2444 {
2445 	struct ocfs2_xattr_block *xb;
2446 	int ret = 0;
2447 	struct ocfs2_xattr_value_buf vb = {
2448 		.vb_bh = blk_bh,
2449 		.vb_access = ocfs2_journal_access_xb,
2450 	};
2451 	struct ocfs2_rm_xattr_bucket_para args = {
2452 		.ref_ci = ref_ci,
2453 		.ref_root_bh = ref_root_bh,
2454 	};
2455 
2456 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2457 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2458 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2459 		ret = ocfs2_remove_value_outside(inode, &vb, header,
2460 						 ref_ci, ref_root_bh);
2461 	} else
2462 		ret = ocfs2_iterate_xattr_index_block(inode,
2463 						blk_bh,
2464 						ocfs2_rm_xattr_cluster,
2465 						&args);
2466 
2467 	return ret;
2468 }
2469 
2470 static int ocfs2_xattr_free_block(struct inode *inode,
2471 				  u64 block,
2472 				  struct ocfs2_caching_info *ref_ci,
2473 				  struct buffer_head *ref_root_bh)
2474 {
2475 	struct inode *xb_alloc_inode;
2476 	struct buffer_head *xb_alloc_bh = NULL;
2477 	struct buffer_head *blk_bh = NULL;
2478 	struct ocfs2_xattr_block *xb;
2479 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2480 	handle_t *handle;
2481 	int ret = 0;
2482 	u64 blk, bg_blkno;
2483 	u16 bit;
2484 
2485 	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2486 	if (ret < 0) {
2487 		mlog_errno(ret);
2488 		goto out;
2489 	}
2490 
2491 	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2492 	if (ret < 0) {
2493 		mlog_errno(ret);
2494 		goto out;
2495 	}
2496 
2497 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2498 	blk = le64_to_cpu(xb->xb_blkno);
2499 	bit = le16_to_cpu(xb->xb_suballoc_bit);
2500 	if (xb->xb_suballoc_loc)
2501 		bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2502 	else
2503 		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2504 
2505 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2506 				EXTENT_ALLOC_SYSTEM_INODE,
2507 				le16_to_cpu(xb->xb_suballoc_slot));
2508 	if (!xb_alloc_inode) {
2509 		ret = -ENOMEM;
2510 		mlog_errno(ret);
2511 		goto out;
2512 	}
2513 	inode_lock(xb_alloc_inode);
2514 
2515 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2516 	if (ret < 0) {
2517 		mlog_errno(ret);
2518 		goto out_mutex;
2519 	}
2520 
2521 	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2522 	if (IS_ERR(handle)) {
2523 		ret = PTR_ERR(handle);
2524 		mlog_errno(ret);
2525 		goto out_unlock;
2526 	}
2527 
2528 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2529 				       bit, bg_blkno, 1);
2530 	if (ret < 0)
2531 		mlog_errno(ret);
2532 
2533 	ocfs2_commit_trans(osb, handle);
2534 out_unlock:
2535 	ocfs2_inode_unlock(xb_alloc_inode, 1);
2536 	brelse(xb_alloc_bh);
2537 out_mutex:
2538 	inode_unlock(xb_alloc_inode);
2539 	iput(xb_alloc_inode);
2540 out:
2541 	brelse(blk_bh);
2542 	return ret;
2543 }
2544 
2545 /*
2546  * ocfs2_xattr_remove()
2547  *
2548  * Free extended attribute resources associated with this inode.
2549  */
2550 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2551 {
2552 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2553 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2554 	struct ocfs2_refcount_tree *ref_tree = NULL;
2555 	struct buffer_head *ref_root_bh = NULL;
2556 	struct ocfs2_caching_info *ref_ci = NULL;
2557 	handle_t *handle;
2558 	int ret;
2559 
2560 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2561 		return 0;
2562 
2563 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2564 		return 0;
2565 
2566 	if (ocfs2_is_refcount_inode(inode)) {
2567 		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2568 					       le64_to_cpu(di->i_refcount_loc),
2569 					       1, &ref_tree, &ref_root_bh);
2570 		if (ret) {
2571 			mlog_errno(ret);
2572 			goto out;
2573 		}
2574 		ref_ci = &ref_tree->rf_ci;
2575 
2576 	}
2577 
2578 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2579 		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2580 					       ref_ci, ref_root_bh);
2581 		if (ret < 0) {
2582 			mlog_errno(ret);
2583 			goto out;
2584 		}
2585 	}
2586 
2587 	if (di->i_xattr_loc) {
2588 		ret = ocfs2_xattr_free_block(inode,
2589 					     le64_to_cpu(di->i_xattr_loc),
2590 					     ref_ci, ref_root_bh);
2591 		if (ret < 0) {
2592 			mlog_errno(ret);
2593 			goto out;
2594 		}
2595 	}
2596 
2597 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2598 				   OCFS2_INODE_UPDATE_CREDITS);
2599 	if (IS_ERR(handle)) {
2600 		ret = PTR_ERR(handle);
2601 		mlog_errno(ret);
2602 		goto out;
2603 	}
2604 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2605 				      OCFS2_JOURNAL_ACCESS_WRITE);
2606 	if (ret) {
2607 		mlog_errno(ret);
2608 		goto out_commit;
2609 	}
2610 
2611 	di->i_xattr_loc = 0;
2612 
2613 	spin_lock(&oi->ip_lock);
2614 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2615 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2616 	spin_unlock(&oi->ip_lock);
2617 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
2618 
2619 	ocfs2_journal_dirty(handle, di_bh);
2620 out_commit:
2621 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2622 out:
2623 	if (ref_tree)
2624 		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2625 	brelse(ref_root_bh);
2626 	return ret;
2627 }
2628 
2629 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2630 					struct ocfs2_dinode *di)
2631 {
2632 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2633 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2634 	int free;
2635 
2636 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2637 		return 0;
2638 
2639 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2640 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2641 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2642 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
2643 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
2644 			le64_to_cpu(di->i_size);
2645 	} else {
2646 		struct ocfs2_extent_list *el = &di->id2.i_list;
2647 		free = (le16_to_cpu(el->l_count) -
2648 			le16_to_cpu(el->l_next_free_rec)) *
2649 			sizeof(struct ocfs2_extent_rec);
2650 	}
2651 	if (free >= xattrsize)
2652 		return 1;
2653 
2654 	return 0;
2655 }
2656 
2657 /*
2658  * ocfs2_xattr_ibody_find()
2659  *
2660  * Find extended attribute in inode block and
2661  * fill search info into struct ocfs2_xattr_search.
2662  */
2663 static int ocfs2_xattr_ibody_find(struct inode *inode,
2664 				  int name_index,
2665 				  const char *name,
2666 				  struct ocfs2_xattr_search *xs)
2667 {
2668 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2669 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2670 	int ret;
2671 	int has_space = 0;
2672 
2673 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2674 		return 0;
2675 
2676 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2677 		down_read(&oi->ip_alloc_sem);
2678 		has_space = ocfs2_xattr_has_space_inline(inode, di);
2679 		up_read(&oi->ip_alloc_sem);
2680 		if (!has_space)
2681 			return 0;
2682 	}
2683 
2684 	xs->xattr_bh = xs->inode_bh;
2685 	xs->end = (void *)di + inode->i_sb->s_blocksize;
2686 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2687 		xs->header = (struct ocfs2_xattr_header *)
2688 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
2689 	else
2690 		xs->header = (struct ocfs2_xattr_header *)
2691 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2692 	xs->base = (void *)xs->header;
2693 	xs->here = xs->header->xh_entries;
2694 
2695 	/* Find the named attribute. */
2696 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2697 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2698 		if (ret && ret != -ENODATA)
2699 			return ret;
2700 		xs->not_found = ret;
2701 	}
2702 
2703 	return 0;
2704 }
2705 
2706 static int ocfs2_xattr_ibody_init(struct inode *inode,
2707 				  struct buffer_head *di_bh,
2708 				  struct ocfs2_xattr_set_ctxt *ctxt)
2709 {
2710 	int ret;
2711 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2712 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2713 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2714 	unsigned int xattrsize = osb->s_xattr_inline_size;
2715 
2716 	if (!ocfs2_xattr_has_space_inline(inode, di)) {
2717 		ret = -ENOSPC;
2718 		goto out;
2719 	}
2720 
2721 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2722 				      OCFS2_JOURNAL_ACCESS_WRITE);
2723 	if (ret) {
2724 		mlog_errno(ret);
2725 		goto out;
2726 	}
2727 
2728 	/*
2729 	 * Adjust extent record count or inline data size
2730 	 * to reserve space for extended attribute.
2731 	 */
2732 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2733 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2734 		le16_add_cpu(&idata->id_count, -xattrsize);
2735 	} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2736 		struct ocfs2_extent_list *el = &di->id2.i_list;
2737 		le16_add_cpu(&el->l_count, -(xattrsize /
2738 					     sizeof(struct ocfs2_extent_rec)));
2739 	}
2740 	di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2741 
2742 	spin_lock(&oi->ip_lock);
2743 	oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2744 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2745 	spin_unlock(&oi->ip_lock);
2746 
2747 	ocfs2_journal_dirty(ctxt->handle, di_bh);
2748 
2749 out:
2750 	return ret;
2751 }
2752 
2753 /*
2754  * ocfs2_xattr_ibody_set()
2755  *
2756  * Set, replace or remove an extended attribute into inode block.
2757  *
2758  */
2759 static int ocfs2_xattr_ibody_set(struct inode *inode,
2760 				 struct ocfs2_xattr_info *xi,
2761 				 struct ocfs2_xattr_search *xs,
2762 				 struct ocfs2_xattr_set_ctxt *ctxt)
2763 {
2764 	int ret;
2765 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2766 	struct ocfs2_xa_loc loc;
2767 
2768 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2769 		return -ENOSPC;
2770 
2771 	down_write(&oi->ip_alloc_sem);
2772 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2773 		ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2774 		if (ret) {
2775 			if (ret != -ENOSPC)
2776 				mlog_errno(ret);
2777 			goto out;
2778 		}
2779 	}
2780 
2781 	ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2782 				 xs->not_found ? NULL : xs->here);
2783 	ret = ocfs2_xa_set(&loc, xi, ctxt);
2784 	if (ret) {
2785 		if (ret != -ENOSPC)
2786 			mlog_errno(ret);
2787 		goto out;
2788 	}
2789 	xs->here = loc.xl_entry;
2790 
2791 out:
2792 	up_write(&oi->ip_alloc_sem);
2793 
2794 	return ret;
2795 }
2796 
2797 /*
2798  * ocfs2_xattr_block_find()
2799  *
2800  * Find extended attribute in external block and
2801  * fill search info into struct ocfs2_xattr_search.
2802  */
2803 static int ocfs2_xattr_block_find(struct inode *inode,
2804 				  int name_index,
2805 				  const char *name,
2806 				  struct ocfs2_xattr_search *xs)
2807 {
2808 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2809 	struct buffer_head *blk_bh = NULL;
2810 	struct ocfs2_xattr_block *xb;
2811 	int ret = 0;
2812 
2813 	if (!di->i_xattr_loc)
2814 		return ret;
2815 
2816 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2817 				     &blk_bh);
2818 	if (ret < 0) {
2819 		mlog_errno(ret);
2820 		return ret;
2821 	}
2822 
2823 	xs->xattr_bh = blk_bh;
2824 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2825 
2826 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2827 		xs->header = &xb->xb_attrs.xb_header;
2828 		xs->base = (void *)xs->header;
2829 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2830 		xs->here = xs->header->xh_entries;
2831 
2832 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2833 	} else
2834 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2835 						   name_index,
2836 						   name, xs);
2837 
2838 	if (ret && ret != -ENODATA) {
2839 		xs->xattr_bh = NULL;
2840 		goto cleanup;
2841 	}
2842 	xs->not_found = ret;
2843 	return 0;
2844 cleanup:
2845 	brelse(blk_bh);
2846 
2847 	return ret;
2848 }
2849 
2850 static int ocfs2_create_xattr_block(struct inode *inode,
2851 				    struct buffer_head *inode_bh,
2852 				    struct ocfs2_xattr_set_ctxt *ctxt,
2853 				    int indexed,
2854 				    struct buffer_head **ret_bh)
2855 {
2856 	int ret;
2857 	u16 suballoc_bit_start;
2858 	u32 num_got;
2859 	u64 suballoc_loc, first_blkno;
2860 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2861 	struct buffer_head *new_bh = NULL;
2862 	struct ocfs2_xattr_block *xblk;
2863 
2864 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2865 				      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2866 	if (ret < 0) {
2867 		mlog_errno(ret);
2868 		goto end;
2869 	}
2870 
2871 	ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2872 				   &suballoc_loc, &suballoc_bit_start,
2873 				   &num_got, &first_blkno);
2874 	if (ret < 0) {
2875 		mlog_errno(ret);
2876 		goto end;
2877 	}
2878 
2879 	new_bh = sb_getblk(inode->i_sb, first_blkno);
2880 	if (!new_bh) {
2881 		ret = -ENOMEM;
2882 		mlog_errno(ret);
2883 		goto end;
2884 	}
2885 
2886 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2887 
2888 	ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2889 				      new_bh,
2890 				      OCFS2_JOURNAL_ACCESS_CREATE);
2891 	if (ret < 0) {
2892 		mlog_errno(ret);
2893 		goto end;
2894 	}
2895 
2896 	/* Initialize ocfs2_xattr_block */
2897 	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2898 	memset(xblk, 0, inode->i_sb->s_blocksize);
2899 	strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2900 	xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2901 	xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2902 	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2903 	xblk->xb_fs_generation =
2904 		cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2905 	xblk->xb_blkno = cpu_to_le64(first_blkno);
2906 	if (indexed) {
2907 		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2908 		xr->xt_clusters = cpu_to_le32(1);
2909 		xr->xt_last_eb_blk = 0;
2910 		xr->xt_list.l_tree_depth = 0;
2911 		xr->xt_list.l_count = cpu_to_le16(
2912 					ocfs2_xattr_recs_per_xb(inode->i_sb));
2913 		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2914 		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2915 	}
2916 	ocfs2_journal_dirty(ctxt->handle, new_bh);
2917 
2918 	/* Add it to the inode */
2919 	di->i_xattr_loc = cpu_to_le64(first_blkno);
2920 
2921 	spin_lock(&OCFS2_I(inode)->ip_lock);
2922 	OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2923 	di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2924 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2925 
2926 	ocfs2_journal_dirty(ctxt->handle, inode_bh);
2927 
2928 	*ret_bh = new_bh;
2929 	new_bh = NULL;
2930 
2931 end:
2932 	brelse(new_bh);
2933 	return ret;
2934 }
2935 
2936 /*
2937  * ocfs2_xattr_block_set()
2938  *
2939  * Set, replace or remove an extended attribute into external block.
2940  *
2941  */
2942 static int ocfs2_xattr_block_set(struct inode *inode,
2943 				 struct ocfs2_xattr_info *xi,
2944 				 struct ocfs2_xattr_search *xs,
2945 				 struct ocfs2_xattr_set_ctxt *ctxt)
2946 {
2947 	struct buffer_head *new_bh = NULL;
2948 	struct ocfs2_xattr_block *xblk = NULL;
2949 	int ret;
2950 	struct ocfs2_xa_loc loc;
2951 
2952 	if (!xs->xattr_bh) {
2953 		ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2954 					       0, &new_bh);
2955 		if (ret) {
2956 			mlog_errno(ret);
2957 			goto end;
2958 		}
2959 
2960 		xs->xattr_bh = new_bh;
2961 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2962 		xs->header = &xblk->xb_attrs.xb_header;
2963 		xs->base = (void *)xs->header;
2964 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2965 		xs->here = xs->header->xh_entries;
2966 	} else
2967 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2968 
2969 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2970 		ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2971 					      xs->not_found ? NULL : xs->here);
2972 
2973 		ret = ocfs2_xa_set(&loc, xi, ctxt);
2974 		if (!ret)
2975 			xs->here = loc.xl_entry;
2976 		else if ((ret != -ENOSPC) || ctxt->set_abort)
2977 			goto end;
2978 		else {
2979 			ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2980 			if (ret)
2981 				goto end;
2982 		}
2983 	}
2984 
2985 	if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
2986 		ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2987 
2988 end:
2989 	return ret;
2990 }
2991 
2992 /* Check whether the new xattr can be inserted into the inode. */
2993 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2994 				       struct ocfs2_xattr_info *xi,
2995 				       struct ocfs2_xattr_search *xs)
2996 {
2997 	struct ocfs2_xattr_entry *last;
2998 	int free, i;
2999 	size_t min_offs = xs->end - xs->base;
3000 
3001 	if (!xs->header)
3002 		return 0;
3003 
3004 	last = xs->header->xh_entries;
3005 
3006 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
3007 		size_t offs = le16_to_cpu(last->xe_name_offset);
3008 		if (offs < min_offs)
3009 			min_offs = offs;
3010 		last += 1;
3011 	}
3012 
3013 	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
3014 	if (free < 0)
3015 		return 0;
3016 
3017 	BUG_ON(!xs->not_found);
3018 
3019 	if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3020 		return 1;
3021 
3022 	return 0;
3023 }
3024 
3025 static int ocfs2_calc_xattr_set_need(struct inode *inode,
3026 				     struct ocfs2_dinode *di,
3027 				     struct ocfs2_xattr_info *xi,
3028 				     struct ocfs2_xattr_search *xis,
3029 				     struct ocfs2_xattr_search *xbs,
3030 				     int *clusters_need,
3031 				     int *meta_need,
3032 				     int *credits_need)
3033 {
3034 	int ret = 0, old_in_xb = 0;
3035 	int clusters_add = 0, meta_add = 0, credits = 0;
3036 	struct buffer_head *bh = NULL;
3037 	struct ocfs2_xattr_block *xb = NULL;
3038 	struct ocfs2_xattr_entry *xe = NULL;
3039 	struct ocfs2_xattr_value_root *xv = NULL;
3040 	char *base = NULL;
3041 	int name_offset, name_len = 0;
3042 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3043 						    xi->xi_value_len);
3044 	u64 value_size;
3045 
3046 	/*
3047 	 * Calculate the clusters we need to write.
3048 	 * No matter whether we replace an old one or add a new one,
3049 	 * we need this for writing.
3050 	 */
3051 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3052 		credits += new_clusters *
3053 			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
3054 
3055 	if (xis->not_found && xbs->not_found) {
3056 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3057 
3058 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3059 			clusters_add += new_clusters;
3060 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3061 							&def_xv.xv.xr_list);
3062 		}
3063 
3064 		goto meta_guess;
3065 	}
3066 
3067 	if (!xis->not_found) {
3068 		xe = xis->here;
3069 		name_offset = le16_to_cpu(xe->xe_name_offset);
3070 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3071 		base = xis->base;
3072 		credits += OCFS2_INODE_UPDATE_CREDITS;
3073 	} else {
3074 		int i, block_off = 0;
3075 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3076 		xe = xbs->here;
3077 		name_offset = le16_to_cpu(xe->xe_name_offset);
3078 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3079 		i = xbs->here - xbs->header->xh_entries;
3080 		old_in_xb = 1;
3081 
3082 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3083 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3084 							bucket_xh(xbs->bucket),
3085 							i, &block_off,
3086 							&name_offset);
3087 			base = bucket_block(xbs->bucket, block_off);
3088 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3089 		} else {
3090 			base = xbs->base;
3091 			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3092 		}
3093 	}
3094 
3095 	/*
3096 	 * delete a xattr doesn't need metadata and cluster allocation.
3097 	 * so just calculate the credits and return.
3098 	 *
3099 	 * The credits for removing the value tree will be extended
3100 	 * by ocfs2_remove_extent itself.
3101 	 */
3102 	if (!xi->xi_value) {
3103 		if (!ocfs2_xattr_is_local(xe))
3104 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3105 
3106 		goto out;
3107 	}
3108 
3109 	/* do cluster allocation guess first. */
3110 	value_size = le64_to_cpu(xe->xe_value_size);
3111 
3112 	if (old_in_xb) {
3113 		/*
3114 		 * In xattr set, we always try to set the xe in inode first,
3115 		 * so if it can be inserted into inode successfully, the old
3116 		 * one will be removed from the xattr block, and this xattr
3117 		 * will be inserted into inode as a new xattr in inode.
3118 		 */
3119 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3120 			clusters_add += new_clusters;
3121 			credits += ocfs2_remove_extent_credits(inode->i_sb) +
3122 				    OCFS2_INODE_UPDATE_CREDITS;
3123 			if (!ocfs2_xattr_is_local(xe))
3124 				credits += ocfs2_calc_extend_credits(
3125 							inode->i_sb,
3126 							&def_xv.xv.xr_list);
3127 			goto out;
3128 		}
3129 	}
3130 
3131 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3132 		/* the new values will be stored outside. */
3133 		u32 old_clusters = 0;
3134 
3135 		if (!ocfs2_xattr_is_local(xe)) {
3136 			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
3137 								 value_size);
3138 			xv = (struct ocfs2_xattr_value_root *)
3139 			     (base + name_offset + name_len);
3140 			value_size = OCFS2_XATTR_ROOT_SIZE;
3141 		} else
3142 			xv = &def_xv.xv;
3143 
3144 		if (old_clusters >= new_clusters) {
3145 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3146 			goto out;
3147 		} else {
3148 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3149 			clusters_add += new_clusters - old_clusters;
3150 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3151 							     &xv->xr_list);
3152 			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3153 				goto out;
3154 		}
3155 	} else {
3156 		/*
3157 		 * Now the new value will be stored inside. So if the new
3158 		 * value is smaller than the size of value root or the old
3159 		 * value, we don't need any allocation, otherwise we have
3160 		 * to guess metadata allocation.
3161 		 */
3162 		if ((ocfs2_xattr_is_local(xe) &&
3163 		     (value_size >= xi->xi_value_len)) ||
3164 		    (!ocfs2_xattr_is_local(xe) &&
3165 		     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3166 			goto out;
3167 	}
3168 
3169 meta_guess:
3170 	/* calculate metadata allocation. */
3171 	if (di->i_xattr_loc) {
3172 		if (!xbs->xattr_bh) {
3173 			ret = ocfs2_read_xattr_block(inode,
3174 						     le64_to_cpu(di->i_xattr_loc),
3175 						     &bh);
3176 			if (ret) {
3177 				mlog_errno(ret);
3178 				goto out;
3179 			}
3180 
3181 			xb = (struct ocfs2_xattr_block *)bh->b_data;
3182 		} else
3183 			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3184 
3185 		/*
3186 		 * If there is already an xattr tree, good, we can calculate
3187 		 * like other b-trees. Otherwise we may have the chance of
3188 		 * create a tree, the credit calculation is borrowed from
3189 		 * ocfs2_calc_extend_credits with root_el = NULL. And the
3190 		 * new tree will be cluster based, so no meta is needed.
3191 		 */
3192 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3193 			struct ocfs2_extent_list *el =
3194 				 &xb->xb_attrs.xb_root.xt_list;
3195 			meta_add += ocfs2_extend_meta_needed(el);
3196 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3197 							     el);
3198 		} else
3199 			credits += OCFS2_SUBALLOC_ALLOC + 1;
3200 
3201 		/*
3202 		 * This cluster will be used either for new bucket or for
3203 		 * new xattr block.
3204 		 * If the cluster size is the same as the bucket size, one
3205 		 * more is needed since we may need to extend the bucket
3206 		 * also.
3207 		 */
3208 		clusters_add += 1;
3209 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3210 		if (OCFS2_XATTR_BUCKET_SIZE ==
3211 			OCFS2_SB(inode->i_sb)->s_clustersize) {
3212 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3213 			clusters_add += 1;
3214 		}
3215 	} else {
3216 		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3217 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3218 			struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
3219 			meta_add += ocfs2_extend_meta_needed(el);
3220 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3221 							     el);
3222 		} else {
3223 			meta_add += 1;
3224 		}
3225 	}
3226 out:
3227 	if (clusters_need)
3228 		*clusters_need = clusters_add;
3229 	if (meta_need)
3230 		*meta_need = meta_add;
3231 	if (credits_need)
3232 		*credits_need = credits;
3233 	brelse(bh);
3234 	return ret;
3235 }
3236 
3237 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3238 				     struct ocfs2_dinode *di,
3239 				     struct ocfs2_xattr_info *xi,
3240 				     struct ocfs2_xattr_search *xis,
3241 				     struct ocfs2_xattr_search *xbs,
3242 				     struct ocfs2_xattr_set_ctxt *ctxt,
3243 				     int extra_meta,
3244 				     int *credits)
3245 {
3246 	int clusters_add, meta_add, ret;
3247 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3248 
3249 	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3250 
3251 	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3252 
3253 	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3254 					&clusters_add, &meta_add, credits);
3255 	if (ret) {
3256 		mlog_errno(ret);
3257 		return ret;
3258 	}
3259 
3260 	meta_add += extra_meta;
3261 	trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3262 					clusters_add, *credits);
3263 
3264 	if (meta_add) {
3265 		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3266 							&ctxt->meta_ac);
3267 		if (ret) {
3268 			mlog_errno(ret);
3269 			goto out;
3270 		}
3271 	}
3272 
3273 	if (clusters_add) {
3274 		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3275 		if (ret)
3276 			mlog_errno(ret);
3277 	}
3278 out:
3279 	if (ret) {
3280 		if (ctxt->meta_ac) {
3281 			ocfs2_free_alloc_context(ctxt->meta_ac);
3282 			ctxt->meta_ac = NULL;
3283 		}
3284 
3285 		/*
3286 		 * We cannot have an error and a non null ctxt->data_ac.
3287 		 */
3288 	}
3289 
3290 	return ret;
3291 }
3292 
3293 static int __ocfs2_xattr_set_handle(struct inode *inode,
3294 				    struct ocfs2_dinode *di,
3295 				    struct ocfs2_xattr_info *xi,
3296 				    struct ocfs2_xattr_search *xis,
3297 				    struct ocfs2_xattr_search *xbs,
3298 				    struct ocfs2_xattr_set_ctxt *ctxt)
3299 {
3300 	int ret = 0, credits, old_found;
3301 
3302 	if (!xi->xi_value) {
3303 		/* Remove existing extended attribute */
3304 		if (!xis->not_found)
3305 			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3306 		else if (!xbs->not_found)
3307 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3308 	} else {
3309 		/* We always try to set extended attribute into inode first*/
3310 		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3311 		if (!ret && !xbs->not_found) {
3312 			/*
3313 			 * If succeed and that extended attribute existing in
3314 			 * external block, then we will remove it.
3315 			 */
3316 			xi->xi_value = NULL;
3317 			xi->xi_value_len = 0;
3318 
3319 			old_found = xis->not_found;
3320 			xis->not_found = -ENODATA;
3321 			ret = ocfs2_calc_xattr_set_need(inode,
3322 							di,
3323 							xi,
3324 							xis,
3325 							xbs,
3326 							NULL,
3327 							NULL,
3328 							&credits);
3329 			xis->not_found = old_found;
3330 			if (ret) {
3331 				mlog_errno(ret);
3332 				goto out;
3333 			}
3334 
3335 			ret = ocfs2_extend_trans(ctxt->handle, credits);
3336 			if (ret) {
3337 				mlog_errno(ret);
3338 				goto out;
3339 			}
3340 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3341 		} else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3342 			if (di->i_xattr_loc && !xbs->xattr_bh) {
3343 				ret = ocfs2_xattr_block_find(inode,
3344 							     xi->xi_name_index,
3345 							     xi->xi_name, xbs);
3346 				if (ret)
3347 					goto out;
3348 
3349 				old_found = xis->not_found;
3350 				xis->not_found = -ENODATA;
3351 				ret = ocfs2_calc_xattr_set_need(inode,
3352 								di,
3353 								xi,
3354 								xis,
3355 								xbs,
3356 								NULL,
3357 								NULL,
3358 								&credits);
3359 				xis->not_found = old_found;
3360 				if (ret) {
3361 					mlog_errno(ret);
3362 					goto out;
3363 				}
3364 
3365 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3366 				if (ret) {
3367 					mlog_errno(ret);
3368 					goto out;
3369 				}
3370 			}
3371 			/*
3372 			 * If no space in inode, we will set extended attribute
3373 			 * into external block.
3374 			 */
3375 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3376 			if (ret)
3377 				goto out;
3378 			if (!xis->not_found) {
3379 				/*
3380 				 * If succeed and that extended attribute
3381 				 * existing in inode, we will remove it.
3382 				 */
3383 				xi->xi_value = NULL;
3384 				xi->xi_value_len = 0;
3385 				xbs->not_found = -ENODATA;
3386 				ret = ocfs2_calc_xattr_set_need(inode,
3387 								di,
3388 								xi,
3389 								xis,
3390 								xbs,
3391 								NULL,
3392 								NULL,
3393 								&credits);
3394 				if (ret) {
3395 					mlog_errno(ret);
3396 					goto out;
3397 				}
3398 
3399 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3400 				if (ret) {
3401 					mlog_errno(ret);
3402 					goto out;
3403 				}
3404 				ret = ocfs2_xattr_ibody_set(inode, xi,
3405 							    xis, ctxt);
3406 			}
3407 		}
3408 	}
3409 
3410 	if (!ret) {
3411 		/* Update inode ctime. */
3412 		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3413 					      xis->inode_bh,
3414 					      OCFS2_JOURNAL_ACCESS_WRITE);
3415 		if (ret) {
3416 			mlog_errno(ret);
3417 			goto out;
3418 		}
3419 
3420 		inode->i_ctime = current_time(inode);
3421 		di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3422 		di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3423 		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3424 	}
3425 out:
3426 	return ret;
3427 }
3428 
3429 /*
3430  * This function only called duing creating inode
3431  * for init security/acl xattrs of the new inode.
3432  * All transanction credits have been reserved in mknod.
3433  */
3434 int ocfs2_xattr_set_handle(handle_t *handle,
3435 			   struct inode *inode,
3436 			   struct buffer_head *di_bh,
3437 			   int name_index,
3438 			   const char *name,
3439 			   const void *value,
3440 			   size_t value_len,
3441 			   int flags,
3442 			   struct ocfs2_alloc_context *meta_ac,
3443 			   struct ocfs2_alloc_context *data_ac)
3444 {
3445 	struct ocfs2_dinode *di;
3446 	int ret;
3447 
3448 	struct ocfs2_xattr_info xi = {
3449 		.xi_name_index = name_index,
3450 		.xi_name = name,
3451 		.xi_name_len = strlen(name),
3452 		.xi_value = value,
3453 		.xi_value_len = value_len,
3454 	};
3455 
3456 	struct ocfs2_xattr_search xis = {
3457 		.not_found = -ENODATA,
3458 	};
3459 
3460 	struct ocfs2_xattr_search xbs = {
3461 		.not_found = -ENODATA,
3462 	};
3463 
3464 	struct ocfs2_xattr_set_ctxt ctxt = {
3465 		.handle = handle,
3466 		.meta_ac = meta_ac,
3467 		.data_ac = data_ac,
3468 	};
3469 
3470 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3471 		return -EOPNOTSUPP;
3472 
3473 	/*
3474 	 * In extreme situation, may need xattr bucket when
3475 	 * block size is too small. And we have already reserved
3476 	 * the credits for bucket in mknod.
3477 	 */
3478 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3479 		xbs.bucket = ocfs2_xattr_bucket_new(inode);
3480 		if (!xbs.bucket) {
3481 			mlog_errno(-ENOMEM);
3482 			return -ENOMEM;
3483 		}
3484 	}
3485 
3486 	xis.inode_bh = xbs.inode_bh = di_bh;
3487 	di = (struct ocfs2_dinode *)di_bh->b_data;
3488 
3489 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3490 
3491 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3492 	if (ret)
3493 		goto cleanup;
3494 	if (xis.not_found) {
3495 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3496 		if (ret)
3497 			goto cleanup;
3498 	}
3499 
3500 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3501 
3502 cleanup:
3503 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3504 	brelse(xbs.xattr_bh);
3505 	ocfs2_xattr_bucket_free(xbs.bucket);
3506 
3507 	return ret;
3508 }
3509 
3510 /*
3511  * ocfs2_xattr_set()
3512  *
3513  * Set, replace or remove an extended attribute for this inode.
3514  * value is NULL to remove an existing extended attribute, else either
3515  * create or replace an extended attribute.
3516  */
3517 int ocfs2_xattr_set(struct inode *inode,
3518 		    int name_index,
3519 		    const char *name,
3520 		    const void *value,
3521 		    size_t value_len,
3522 		    int flags)
3523 {
3524 	struct buffer_head *di_bh = NULL;
3525 	struct ocfs2_dinode *di;
3526 	int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
3527 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3528 	struct inode *tl_inode = osb->osb_tl_inode;
3529 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
3530 	struct ocfs2_refcount_tree *ref_tree = NULL;
3531 	struct ocfs2_lock_holder oh;
3532 
3533 	struct ocfs2_xattr_info xi = {
3534 		.xi_name_index = name_index,
3535 		.xi_name = name,
3536 		.xi_name_len = strlen(name),
3537 		.xi_value = value,
3538 		.xi_value_len = value_len,
3539 	};
3540 
3541 	struct ocfs2_xattr_search xis = {
3542 		.not_found = -ENODATA,
3543 	};
3544 
3545 	struct ocfs2_xattr_search xbs = {
3546 		.not_found = -ENODATA,
3547 	};
3548 
3549 	if (!ocfs2_supports_xattr(osb))
3550 		return -EOPNOTSUPP;
3551 
3552 	/*
3553 	 * Only xbs will be used on indexed trees.  xis doesn't need a
3554 	 * bucket.
3555 	 */
3556 	xbs.bucket = ocfs2_xattr_bucket_new(inode);
3557 	if (!xbs.bucket) {
3558 		mlog_errno(-ENOMEM);
3559 		return -ENOMEM;
3560 	}
3561 
3562 	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
3563 	if (had_lock < 0) {
3564 		ret = had_lock;
3565 		mlog_errno(ret);
3566 		goto cleanup_nolock;
3567 	}
3568 	xis.inode_bh = xbs.inode_bh = di_bh;
3569 	di = (struct ocfs2_dinode *)di_bh->b_data;
3570 
3571 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3572 	/*
3573 	 * Scan inode and external block to find the same name
3574 	 * extended attribute and collect search information.
3575 	 */
3576 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3577 	if (ret)
3578 		goto cleanup;
3579 	if (xis.not_found) {
3580 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3581 		if (ret)
3582 			goto cleanup;
3583 	}
3584 
3585 	if (xis.not_found && xbs.not_found) {
3586 		ret = -ENODATA;
3587 		if (flags & XATTR_REPLACE)
3588 			goto cleanup;
3589 		ret = 0;
3590 		if (!value)
3591 			goto cleanup;
3592 	} else {
3593 		ret = -EEXIST;
3594 		if (flags & XATTR_CREATE)
3595 			goto cleanup;
3596 	}
3597 
3598 	/* Check whether the value is refcounted and do some preparation. */
3599 	if (ocfs2_is_refcount_inode(inode) &&
3600 	    (!xis.not_found || !xbs.not_found)) {
3601 		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3602 						   &xis, &xbs, &ref_tree,
3603 						   &ref_meta, &ref_credits);
3604 		if (ret) {
3605 			mlog_errno(ret);
3606 			goto cleanup;
3607 		}
3608 	}
3609 
3610 	inode_lock(tl_inode);
3611 
3612 	if (ocfs2_truncate_log_needs_flush(osb)) {
3613 		ret = __ocfs2_flush_truncate_log(osb);
3614 		if (ret < 0) {
3615 			inode_unlock(tl_inode);
3616 			mlog_errno(ret);
3617 			goto cleanup;
3618 		}
3619 	}
3620 	inode_unlock(tl_inode);
3621 
3622 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3623 					&xbs, &ctxt, ref_meta, &credits);
3624 	if (ret) {
3625 		mlog_errno(ret);
3626 		goto cleanup;
3627 	}
3628 
3629 	/* we need to update inode's ctime field, so add credit for it. */
3630 	credits += OCFS2_INODE_UPDATE_CREDITS;
3631 	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3632 	if (IS_ERR(ctxt.handle)) {
3633 		ret = PTR_ERR(ctxt.handle);
3634 		mlog_errno(ret);
3635 		goto out_free_ac;
3636 	}
3637 
3638 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3639 	ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
3640 
3641 	ocfs2_commit_trans(osb, ctxt.handle);
3642 
3643 out_free_ac:
3644 	if (ctxt.data_ac)
3645 		ocfs2_free_alloc_context(ctxt.data_ac);
3646 	if (ctxt.meta_ac)
3647 		ocfs2_free_alloc_context(ctxt.meta_ac);
3648 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3649 		ocfs2_schedule_truncate_log_flush(osb, 1);
3650 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
3651 
3652 cleanup:
3653 	if (ref_tree)
3654 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3655 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3656 	if (!value && !ret) {
3657 		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3658 		if (ret)
3659 			mlog_errno(ret);
3660 	}
3661 	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
3662 cleanup_nolock:
3663 	brelse(di_bh);
3664 	brelse(xbs.xattr_bh);
3665 	ocfs2_xattr_bucket_free(xbs.bucket);
3666 
3667 	return ret;
3668 }
3669 
3670 /*
3671  * Find the xattr extent rec which may contains name_hash.
3672  * e_cpos will be the first name hash of the xattr rec.
3673  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3674  */
3675 static int ocfs2_xattr_get_rec(struct inode *inode,
3676 			       u32 name_hash,
3677 			       u64 *p_blkno,
3678 			       u32 *e_cpos,
3679 			       u32 *num_clusters,
3680 			       struct ocfs2_extent_list *el)
3681 {
3682 	int ret = 0, i;
3683 	struct buffer_head *eb_bh = NULL;
3684 	struct ocfs2_extent_block *eb;
3685 	struct ocfs2_extent_rec *rec = NULL;
3686 	u64 e_blkno = 0;
3687 
3688 	if (el->l_tree_depth) {
3689 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3690 				      &eb_bh);
3691 		if (ret) {
3692 			mlog_errno(ret);
3693 			goto out;
3694 		}
3695 
3696 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3697 		el = &eb->h_list;
3698 
3699 		if (el->l_tree_depth) {
3700 			ret = ocfs2_error(inode->i_sb,
3701 					  "Inode %lu has non zero tree depth in xattr tree block %llu\n",
3702 					  inode->i_ino,
3703 					  (unsigned long long)eb_bh->b_blocknr);
3704 			goto out;
3705 		}
3706 	}
3707 
3708 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3709 		rec = &el->l_recs[i];
3710 
3711 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3712 			e_blkno = le64_to_cpu(rec->e_blkno);
3713 			break;
3714 		}
3715 	}
3716 
3717 	if (!e_blkno) {
3718 		ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
3719 				  inode->i_ino,
3720 				  le32_to_cpu(rec->e_cpos),
3721 				  ocfs2_rec_clusters(el, rec));
3722 		goto out;
3723 	}
3724 
3725 	*p_blkno = le64_to_cpu(rec->e_blkno);
3726 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3727 	if (e_cpos)
3728 		*e_cpos = le32_to_cpu(rec->e_cpos);
3729 out:
3730 	brelse(eb_bh);
3731 	return ret;
3732 }
3733 
3734 typedef int (xattr_bucket_func)(struct inode *inode,
3735 				struct ocfs2_xattr_bucket *bucket,
3736 				void *para);
3737 
3738 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3739 				   struct ocfs2_xattr_bucket *bucket,
3740 				   int name_index,
3741 				   const char *name,
3742 				   u32 name_hash,
3743 				   u16 *xe_index,
3744 				   int *found)
3745 {
3746 	int i, ret = 0, cmp = 1, block_off, new_offset;
3747 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3748 	size_t name_len = strlen(name);
3749 	struct ocfs2_xattr_entry *xe = NULL;
3750 	char *xe_name;
3751 
3752 	/*
3753 	 * We don't use binary search in the bucket because there
3754 	 * may be multiple entries with the same name hash.
3755 	 */
3756 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3757 		xe = &xh->xh_entries[i];
3758 
3759 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
3760 			continue;
3761 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3762 			break;
3763 
3764 		cmp = name_index - ocfs2_xattr_get_type(xe);
3765 		if (!cmp)
3766 			cmp = name_len - xe->xe_name_len;
3767 		if (cmp)
3768 			continue;
3769 
3770 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3771 							xh,
3772 							i,
3773 							&block_off,
3774 							&new_offset);
3775 		if (ret) {
3776 			mlog_errno(ret);
3777 			break;
3778 		}
3779 
3780 
3781 		xe_name = bucket_block(bucket, block_off) + new_offset;
3782 		if (!memcmp(name, xe_name, name_len)) {
3783 			*xe_index = i;
3784 			*found = 1;
3785 			ret = 0;
3786 			break;
3787 		}
3788 	}
3789 
3790 	return ret;
3791 }
3792 
3793 /*
3794  * Find the specified xattr entry in a series of buckets.
3795  * This series start from p_blkno and last for num_clusters.
3796  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3797  * the num of the valid buckets.
3798  *
3799  * Return the buffer_head this xattr should reside in. And if the xattr's
3800  * hash is in the gap of 2 buckets, return the lower bucket.
3801  */
3802 static int ocfs2_xattr_bucket_find(struct inode *inode,
3803 				   int name_index,
3804 				   const char *name,
3805 				   u32 name_hash,
3806 				   u64 p_blkno,
3807 				   u32 first_hash,
3808 				   u32 num_clusters,
3809 				   struct ocfs2_xattr_search *xs)
3810 {
3811 	int ret, found = 0;
3812 	struct ocfs2_xattr_header *xh = NULL;
3813 	struct ocfs2_xattr_entry *xe = NULL;
3814 	u16 index = 0;
3815 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3816 	int low_bucket = 0, bucket, high_bucket;
3817 	struct ocfs2_xattr_bucket *search;
3818 	u64 blkno, lower_blkno = 0;
3819 
3820 	search = ocfs2_xattr_bucket_new(inode);
3821 	if (!search) {
3822 		ret = -ENOMEM;
3823 		mlog_errno(ret);
3824 		goto out;
3825 	}
3826 
3827 	ret = ocfs2_read_xattr_bucket(search, p_blkno);
3828 	if (ret) {
3829 		mlog_errno(ret);
3830 		goto out;
3831 	}
3832 
3833 	xh = bucket_xh(search);
3834 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3835 	while (low_bucket <= high_bucket) {
3836 		ocfs2_xattr_bucket_relse(search);
3837 
3838 		bucket = (low_bucket + high_bucket) / 2;
3839 		blkno = p_blkno + bucket * blk_per_bucket;
3840 		ret = ocfs2_read_xattr_bucket(search, blkno);
3841 		if (ret) {
3842 			mlog_errno(ret);
3843 			goto out;
3844 		}
3845 
3846 		xh = bucket_xh(search);
3847 		xe = &xh->xh_entries[0];
3848 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3849 			high_bucket = bucket - 1;
3850 			continue;
3851 		}
3852 
3853 		/*
3854 		 * Check whether the hash of the last entry in our
3855 		 * bucket is larger than the search one. for an empty
3856 		 * bucket, the last one is also the first one.
3857 		 */
3858 		if (xh->xh_count)
3859 			xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3860 
3861 		/* record lower_blkno which may be the insert place. */
3862 		lower_blkno = blkno;
3863 
3864 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3865 			low_bucket = bucket + 1;
3866 			continue;
3867 		}
3868 
3869 		/* the searched xattr should reside in this bucket if exists. */
3870 		ret = ocfs2_find_xe_in_bucket(inode, search,
3871 					      name_index, name, name_hash,
3872 					      &index, &found);
3873 		if (ret) {
3874 			mlog_errno(ret);
3875 			goto out;
3876 		}
3877 		break;
3878 	}
3879 
3880 	/*
3881 	 * Record the bucket we have found.
3882 	 * When the xattr's hash value is in the gap of 2 buckets, we will
3883 	 * always set it to the previous bucket.
3884 	 */
3885 	if (!lower_blkno)
3886 		lower_blkno = p_blkno;
3887 
3888 	/* This should be in cache - we just read it during the search */
3889 	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3890 	if (ret) {
3891 		mlog_errno(ret);
3892 		goto out;
3893 	}
3894 
3895 	xs->header = bucket_xh(xs->bucket);
3896 	xs->base = bucket_block(xs->bucket, 0);
3897 	xs->end = xs->base + inode->i_sb->s_blocksize;
3898 
3899 	if (found) {
3900 		xs->here = &xs->header->xh_entries[index];
3901 		trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3902 			name, name_index, name_hash,
3903 			(unsigned long long)bucket_blkno(xs->bucket),
3904 			index);
3905 	} else
3906 		ret = -ENODATA;
3907 
3908 out:
3909 	ocfs2_xattr_bucket_free(search);
3910 	return ret;
3911 }
3912 
3913 static int ocfs2_xattr_index_block_find(struct inode *inode,
3914 					struct buffer_head *root_bh,
3915 					int name_index,
3916 					const char *name,
3917 					struct ocfs2_xattr_search *xs)
3918 {
3919 	int ret;
3920 	struct ocfs2_xattr_block *xb =
3921 			(struct ocfs2_xattr_block *)root_bh->b_data;
3922 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3923 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3924 	u64 p_blkno = 0;
3925 	u32 first_hash, num_clusters = 0;
3926 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3927 
3928 	if (le16_to_cpu(el->l_next_free_rec) == 0)
3929 		return -ENODATA;
3930 
3931 	trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3932 					name, name_index, name_hash,
3933 					(unsigned long long)root_bh->b_blocknr,
3934 					-1);
3935 
3936 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3937 				  &num_clusters, el);
3938 	if (ret) {
3939 		mlog_errno(ret);
3940 		goto out;
3941 	}
3942 
3943 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3944 
3945 	trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3946 					name, name_index, first_hash,
3947 					(unsigned long long)p_blkno,
3948 					num_clusters);
3949 
3950 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3951 				      p_blkno, first_hash, num_clusters, xs);
3952 
3953 out:
3954 	return ret;
3955 }
3956 
3957 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3958 				       u64 blkno,
3959 				       u32 clusters,
3960 				       xattr_bucket_func *func,
3961 				       void *para)
3962 {
3963 	int i, ret = 0;
3964 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3965 	u32 num_buckets = clusters * bpc;
3966 	struct ocfs2_xattr_bucket *bucket;
3967 
3968 	bucket = ocfs2_xattr_bucket_new(inode);
3969 	if (!bucket) {
3970 		mlog_errno(-ENOMEM);
3971 		return -ENOMEM;
3972 	}
3973 
3974 	trace_ocfs2_iterate_xattr_buckets(
3975 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
3976 		(unsigned long long)blkno, clusters);
3977 
3978 	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3979 		ret = ocfs2_read_xattr_bucket(bucket, blkno);
3980 		if (ret) {
3981 			mlog_errno(ret);
3982 			break;
3983 		}
3984 
3985 		/*
3986 		 * The real bucket num in this series of blocks is stored
3987 		 * in the 1st bucket.
3988 		 */
3989 		if (i == 0)
3990 			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3991 
3992 		trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
3993 		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3994 		if (func) {
3995 			ret = func(inode, bucket, para);
3996 			if (ret && ret != -ERANGE)
3997 				mlog_errno(ret);
3998 			/* Fall through to bucket_relse() */
3999 		}
4000 
4001 		ocfs2_xattr_bucket_relse(bucket);
4002 		if (ret)
4003 			break;
4004 	}
4005 
4006 	ocfs2_xattr_bucket_free(bucket);
4007 	return ret;
4008 }
4009 
4010 struct ocfs2_xattr_tree_list {
4011 	char *buffer;
4012 	size_t buffer_size;
4013 	size_t result;
4014 };
4015 
4016 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
4017 					     struct ocfs2_xattr_header *xh,
4018 					     int index,
4019 					     int *block_off,
4020 					     int *new_offset)
4021 {
4022 	u16 name_offset;
4023 
4024 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4025 		return -EINVAL;
4026 
4027 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4028 
4029 	*block_off = name_offset >> sb->s_blocksize_bits;
4030 	*new_offset = name_offset % sb->s_blocksize;
4031 
4032 	return 0;
4033 }
4034 
4035 static int ocfs2_list_xattr_bucket(struct inode *inode,
4036 				   struct ocfs2_xattr_bucket *bucket,
4037 				   void *para)
4038 {
4039 	int ret = 0, type;
4040 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4041 	int i, block_off, new_offset;
4042 	const char *name;
4043 
4044 	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4045 		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4046 		type = ocfs2_xattr_get_type(entry);
4047 
4048 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4049 							bucket_xh(bucket),
4050 							i,
4051 							&block_off,
4052 							&new_offset);
4053 		if (ret)
4054 			break;
4055 
4056 		name = (const char *)bucket_block(bucket, block_off) +
4057 			new_offset;
4058 		ret = ocfs2_xattr_list_entry(inode->i_sb,
4059 					     xl->buffer,
4060 					     xl->buffer_size,
4061 					     &xl->result,
4062 					     type, name,
4063 					     entry->xe_name_len);
4064 		if (ret)
4065 			break;
4066 	}
4067 
4068 	return ret;
4069 }
4070 
4071 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4072 					   struct buffer_head *blk_bh,
4073 					   xattr_tree_rec_func *rec_func,
4074 					   void *para)
4075 {
4076 	struct ocfs2_xattr_block *xb =
4077 			(struct ocfs2_xattr_block *)blk_bh->b_data;
4078 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4079 	int ret = 0;
4080 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4081 	u64 p_blkno = 0;
4082 
4083 	if (!el->l_next_free_rec || !rec_func)
4084 		return 0;
4085 
4086 	while (name_hash > 0) {
4087 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4088 					  &e_cpos, &num_clusters, el);
4089 		if (ret) {
4090 			mlog_errno(ret);
4091 			break;
4092 		}
4093 
4094 		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4095 			       num_clusters, para);
4096 		if (ret) {
4097 			if (ret != -ERANGE)
4098 				mlog_errno(ret);
4099 			break;
4100 		}
4101 
4102 		if (e_cpos == 0)
4103 			break;
4104 
4105 		name_hash = e_cpos - 1;
4106 	}
4107 
4108 	return ret;
4109 
4110 }
4111 
4112 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4113 				     struct buffer_head *root_bh,
4114 				     u64 blkno, u32 cpos, u32 len, void *para)
4115 {
4116 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4117 					   ocfs2_list_xattr_bucket, para);
4118 }
4119 
4120 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4121 					     struct buffer_head *blk_bh,
4122 					     char *buffer,
4123 					     size_t buffer_size)
4124 {
4125 	int ret;
4126 	struct ocfs2_xattr_tree_list xl = {
4127 		.buffer = buffer,
4128 		.buffer_size = buffer_size,
4129 		.result = 0,
4130 	};
4131 
4132 	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4133 					      ocfs2_list_xattr_tree_rec, &xl);
4134 	if (ret) {
4135 		mlog_errno(ret);
4136 		goto out;
4137 	}
4138 
4139 	ret = xl.result;
4140 out:
4141 	return ret;
4142 }
4143 
4144 static int cmp_xe(const void *a, const void *b)
4145 {
4146 	const struct ocfs2_xattr_entry *l = a, *r = b;
4147 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
4148 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
4149 
4150 	if (l_hash > r_hash)
4151 		return 1;
4152 	if (l_hash < r_hash)
4153 		return -1;
4154 	return 0;
4155 }
4156 
4157 static void swap_xe(void *a, void *b, int size)
4158 {
4159 	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4160 
4161 	tmp = *l;
4162 	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4163 	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4164 }
4165 
4166 /*
4167  * When the ocfs2_xattr_block is filled up, new bucket will be created
4168  * and all the xattr entries will be moved to the new bucket.
4169  * The header goes at the start of the bucket, and the names+values are
4170  * filled from the end.  This is why *target starts as the last buffer.
4171  * Note: we need to sort the entries since they are not saved in order
4172  * in the ocfs2_xattr_block.
4173  */
4174 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4175 					   struct buffer_head *xb_bh,
4176 					   struct ocfs2_xattr_bucket *bucket)
4177 {
4178 	int i, blocksize = inode->i_sb->s_blocksize;
4179 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4180 	u16 offset, size, off_change;
4181 	struct ocfs2_xattr_entry *xe;
4182 	struct ocfs2_xattr_block *xb =
4183 				(struct ocfs2_xattr_block *)xb_bh->b_data;
4184 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4185 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4186 	u16 count = le16_to_cpu(xb_xh->xh_count);
4187 	char *src = xb_bh->b_data;
4188 	char *target = bucket_block(bucket, blks - 1);
4189 
4190 	trace_ocfs2_cp_xattr_block_to_bucket_begin(
4191 				(unsigned long long)xb_bh->b_blocknr,
4192 				(unsigned long long)bucket_blkno(bucket));
4193 
4194 	for (i = 0; i < blks; i++)
4195 		memset(bucket_block(bucket, i), 0, blocksize);
4196 
4197 	/*
4198 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
4199 	 * there is a offset change corresponding to the change of
4200 	 * ocfs2_xattr_header's position.
4201 	 */
4202 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4203 	xe = &xb_xh->xh_entries[count - 1];
4204 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4205 	size = blocksize - offset;
4206 
4207 	/* copy all the names and values. */
4208 	memcpy(target + offset, src + offset, size);
4209 
4210 	/* Init new header now. */
4211 	xh->xh_count = xb_xh->xh_count;
4212 	xh->xh_num_buckets = cpu_to_le16(1);
4213 	xh->xh_name_value_len = cpu_to_le16(size);
4214 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4215 
4216 	/* copy all the entries. */
4217 	target = bucket_block(bucket, 0);
4218 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4219 	size = count * sizeof(struct ocfs2_xattr_entry);
4220 	memcpy(target + offset, (char *)xb_xh + offset, size);
4221 
4222 	/* Change the xe offset for all the xe because of the move. */
4223 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4224 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4225 	for (i = 0; i < count; i++)
4226 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4227 
4228 	trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4229 
4230 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4231 	     cmp_xe, swap_xe);
4232 }
4233 
4234 /*
4235  * After we move xattr from block to index btree, we have to
4236  * update ocfs2_xattr_search to the new xe and base.
4237  *
4238  * When the entry is in xattr block, xattr_bh indicates the storage place.
4239  * While if the entry is in index b-tree, "bucket" indicates the
4240  * real place of the xattr.
4241  */
4242 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4243 					    struct ocfs2_xattr_search *xs,
4244 					    struct buffer_head *old_bh)
4245 {
4246 	char *buf = old_bh->b_data;
4247 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4248 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4249 	int i;
4250 
4251 	xs->header = bucket_xh(xs->bucket);
4252 	xs->base = bucket_block(xs->bucket, 0);
4253 	xs->end = xs->base + inode->i_sb->s_blocksize;
4254 
4255 	if (xs->not_found)
4256 		return;
4257 
4258 	i = xs->here - old_xh->xh_entries;
4259 	xs->here = &xs->header->xh_entries[i];
4260 }
4261 
4262 static int ocfs2_xattr_create_index_block(struct inode *inode,
4263 					  struct ocfs2_xattr_search *xs,
4264 					  struct ocfs2_xattr_set_ctxt *ctxt)
4265 {
4266 	int ret;
4267 	u32 bit_off, len;
4268 	u64 blkno;
4269 	handle_t *handle = ctxt->handle;
4270 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
4271 	struct buffer_head *xb_bh = xs->xattr_bh;
4272 	struct ocfs2_xattr_block *xb =
4273 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4274 	struct ocfs2_xattr_tree_root *xr;
4275 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
4276 
4277 	trace_ocfs2_xattr_create_index_block_begin(
4278 				(unsigned long long)xb_bh->b_blocknr);
4279 
4280 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4281 	BUG_ON(!xs->bucket);
4282 
4283 	/*
4284 	 * XXX:
4285 	 * We can use this lock for now, and maybe move to a dedicated mutex
4286 	 * if performance becomes a problem later.
4287 	 */
4288 	down_write(&oi->ip_alloc_sem);
4289 
4290 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4291 				      OCFS2_JOURNAL_ACCESS_WRITE);
4292 	if (ret) {
4293 		mlog_errno(ret);
4294 		goto out;
4295 	}
4296 
4297 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4298 				     1, 1, &bit_off, &len);
4299 	if (ret) {
4300 		mlog_errno(ret);
4301 		goto out;
4302 	}
4303 
4304 	/*
4305 	 * The bucket may spread in many blocks, and
4306 	 * we will only touch the 1st block and the last block
4307 	 * in the whole bucket(one for entry and one for data).
4308 	 */
4309 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4310 
4311 	trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4312 
4313 	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
4314 	if (ret) {
4315 		mlog_errno(ret);
4316 		goto out;
4317 	}
4318 
4319 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4320 						OCFS2_JOURNAL_ACCESS_CREATE);
4321 	if (ret) {
4322 		mlog_errno(ret);
4323 		goto out;
4324 	}
4325 
4326 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4327 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4328 
4329 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4330 
4331 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4332 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4333 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
4334 
4335 	xr = &xb->xb_attrs.xb_root;
4336 	xr->xt_clusters = cpu_to_le32(1);
4337 	xr->xt_last_eb_blk = 0;
4338 	xr->xt_list.l_tree_depth = 0;
4339 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4340 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4341 
4342 	xr->xt_list.l_recs[0].e_cpos = 0;
4343 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4344 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4345 
4346 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4347 
4348 	ocfs2_journal_dirty(handle, xb_bh);
4349 
4350 out:
4351 	up_write(&oi->ip_alloc_sem);
4352 
4353 	return ret;
4354 }
4355 
4356 static int cmp_xe_offset(const void *a, const void *b)
4357 {
4358 	const struct ocfs2_xattr_entry *l = a, *r = b;
4359 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4360 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4361 
4362 	if (l_name_offset < r_name_offset)
4363 		return 1;
4364 	if (l_name_offset > r_name_offset)
4365 		return -1;
4366 	return 0;
4367 }
4368 
4369 /*
4370  * defrag a xattr bucket if we find that the bucket has some
4371  * holes beteen name/value pairs.
4372  * We will move all the name/value pairs to the end of the bucket
4373  * so that we can spare some space for insertion.
4374  */
4375 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4376 				     handle_t *handle,
4377 				     struct ocfs2_xattr_bucket *bucket)
4378 {
4379 	int ret, i;
4380 	size_t end, offset, len;
4381 	struct ocfs2_xattr_header *xh;
4382 	char *entries, *buf, *bucket_buf = NULL;
4383 	u64 blkno = bucket_blkno(bucket);
4384 	u16 xh_free_start;
4385 	size_t blocksize = inode->i_sb->s_blocksize;
4386 	struct ocfs2_xattr_entry *xe;
4387 
4388 	/*
4389 	 * In order to make the operation more efficient and generic,
4390 	 * we copy all the blocks into a contiguous memory and do the
4391 	 * defragment there, so if anything is error, we will not touch
4392 	 * the real block.
4393 	 */
4394 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4395 	if (!bucket_buf) {
4396 		ret = -EIO;
4397 		goto out;
4398 	}
4399 
4400 	buf = bucket_buf;
4401 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4402 		memcpy(buf, bucket_block(bucket, i), blocksize);
4403 
4404 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4405 						OCFS2_JOURNAL_ACCESS_WRITE);
4406 	if (ret < 0) {
4407 		mlog_errno(ret);
4408 		goto out;
4409 	}
4410 
4411 	xh = (struct ocfs2_xattr_header *)bucket_buf;
4412 	entries = (char *)xh->xh_entries;
4413 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4414 
4415 	trace_ocfs2_defrag_xattr_bucket(
4416 	     (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4417 	     xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4418 
4419 	/*
4420 	 * sort all the entries by their offset.
4421 	 * the largest will be the first, so that we can
4422 	 * move them to the end one by one.
4423 	 */
4424 	sort(entries, le16_to_cpu(xh->xh_count),
4425 	     sizeof(struct ocfs2_xattr_entry),
4426 	     cmp_xe_offset, swap_xe);
4427 
4428 	/* Move all name/values to the end of the bucket. */
4429 	xe = xh->xh_entries;
4430 	end = OCFS2_XATTR_BUCKET_SIZE;
4431 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4432 		offset = le16_to_cpu(xe->xe_name_offset);
4433 		len = namevalue_size_xe(xe);
4434 
4435 		/*
4436 		 * We must make sure that the name/value pair
4437 		 * exist in the same block. So adjust end to
4438 		 * the previous block end if needed.
4439 		 */
4440 		if (((end - len) / blocksize !=
4441 			(end - 1) / blocksize))
4442 			end = end - end % blocksize;
4443 
4444 		if (end > offset + len) {
4445 			memmove(bucket_buf + end - len,
4446 				bucket_buf + offset, len);
4447 			xe->xe_name_offset = cpu_to_le16(end - len);
4448 		}
4449 
4450 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4451 				"bucket %llu\n", (unsigned long long)blkno);
4452 
4453 		end -= len;
4454 	}
4455 
4456 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4457 			"bucket %llu\n", (unsigned long long)blkno);
4458 
4459 	if (xh_free_start == end)
4460 		goto out;
4461 
4462 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4463 	xh->xh_free_start = cpu_to_le16(end);
4464 
4465 	/* sort the entries by their name_hash. */
4466 	sort(entries, le16_to_cpu(xh->xh_count),
4467 	     sizeof(struct ocfs2_xattr_entry),
4468 	     cmp_xe, swap_xe);
4469 
4470 	buf = bucket_buf;
4471 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4472 		memcpy(bucket_block(bucket, i), buf, blocksize);
4473 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4474 
4475 out:
4476 	kfree(bucket_buf);
4477 	return ret;
4478 }
4479 
4480 /*
4481  * prev_blkno points to the start of an existing extent.  new_blkno
4482  * points to a newly allocated extent.  Because we know each of our
4483  * clusters contains more than bucket, we can easily split one cluster
4484  * at a bucket boundary.  So we take the last cluster of the existing
4485  * extent and split it down the middle.  We move the last half of the
4486  * buckets in the last cluster of the existing extent over to the new
4487  * extent.
4488  *
4489  * first_bh is the buffer at prev_blkno so we can update the existing
4490  * extent's bucket count.  header_bh is the bucket were we were hoping
4491  * to insert our xattr.  If the bucket move places the target in the new
4492  * extent, we'll update first_bh and header_bh after modifying the old
4493  * extent.
4494  *
4495  * first_hash will be set as the 1st xe's name_hash in the new extent.
4496  */
4497 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4498 					       handle_t *handle,
4499 					       struct ocfs2_xattr_bucket *first,
4500 					       struct ocfs2_xattr_bucket *target,
4501 					       u64 new_blkno,
4502 					       u32 num_clusters,
4503 					       u32 *first_hash)
4504 {
4505 	int ret;
4506 	struct super_block *sb = inode->i_sb;
4507 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4508 	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4509 	int to_move = num_buckets / 2;
4510 	u64 src_blkno;
4511 	u64 last_cluster_blkno = bucket_blkno(first) +
4512 		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4513 
4514 	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4515 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4516 
4517 	trace_ocfs2_mv_xattr_bucket_cross_cluster(
4518 				(unsigned long long)last_cluster_blkno,
4519 				(unsigned long long)new_blkno);
4520 
4521 	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4522 				     last_cluster_blkno, new_blkno,
4523 				     to_move, first_hash);
4524 	if (ret) {
4525 		mlog_errno(ret);
4526 		goto out;
4527 	}
4528 
4529 	/* This is the first bucket that got moved */
4530 	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4531 
4532 	/*
4533 	 * If the target bucket was part of the moved buckets, we need to
4534 	 * update first and target.
4535 	 */
4536 	if (bucket_blkno(target) >= src_blkno) {
4537 		/* Find the block for the new target bucket */
4538 		src_blkno = new_blkno +
4539 			(bucket_blkno(target) - src_blkno);
4540 
4541 		ocfs2_xattr_bucket_relse(first);
4542 		ocfs2_xattr_bucket_relse(target);
4543 
4544 		/*
4545 		 * These shouldn't fail - the buffers are in the
4546 		 * journal from ocfs2_cp_xattr_bucket().
4547 		 */
4548 		ret = ocfs2_read_xattr_bucket(first, new_blkno);
4549 		if (ret) {
4550 			mlog_errno(ret);
4551 			goto out;
4552 		}
4553 		ret = ocfs2_read_xattr_bucket(target, src_blkno);
4554 		if (ret)
4555 			mlog_errno(ret);
4556 
4557 	}
4558 
4559 out:
4560 	return ret;
4561 }
4562 
4563 /*
4564  * Find the suitable pos when we divide a bucket into 2.
4565  * We have to make sure the xattrs with the same hash value exist
4566  * in the same bucket.
4567  *
4568  * If this ocfs2_xattr_header covers more than one hash value, find a
4569  * place where the hash value changes.  Try to find the most even split.
4570  * The most common case is that all entries have different hash values,
4571  * and the first check we make will find a place to split.
4572  */
4573 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4574 {
4575 	struct ocfs2_xattr_entry *entries = xh->xh_entries;
4576 	int count = le16_to_cpu(xh->xh_count);
4577 	int delta, middle = count / 2;
4578 
4579 	/*
4580 	 * We start at the middle.  Each step gets farther away in both
4581 	 * directions.  We therefore hit the change in hash value
4582 	 * nearest to the middle.  Note that this loop does not execute for
4583 	 * count < 2.
4584 	 */
4585 	for (delta = 0; delta < middle; delta++) {
4586 		/* Let's check delta earlier than middle */
4587 		if (cmp_xe(&entries[middle - delta - 1],
4588 			   &entries[middle - delta]))
4589 			return middle - delta;
4590 
4591 		/* For even counts, don't walk off the end */
4592 		if ((middle + delta + 1) == count)
4593 			continue;
4594 
4595 		/* Now try delta past middle */
4596 		if (cmp_xe(&entries[middle + delta],
4597 			   &entries[middle + delta + 1]))
4598 			return middle + delta + 1;
4599 	}
4600 
4601 	/* Every entry had the same hash */
4602 	return count;
4603 }
4604 
4605 /*
4606  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4607  * first_hash will record the 1st hash of the new bucket.
4608  *
4609  * Normally half of the xattrs will be moved.  But we have to make
4610  * sure that the xattrs with the same hash value are stored in the
4611  * same bucket. If all the xattrs in this bucket have the same hash
4612  * value, the new bucket will be initialized as an empty one and the
4613  * first_hash will be initialized as (hash_value+1).
4614  */
4615 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4616 				    handle_t *handle,
4617 				    u64 blk,
4618 				    u64 new_blk,
4619 				    u32 *first_hash,
4620 				    int new_bucket_head)
4621 {
4622 	int ret, i;
4623 	int count, start, len, name_value_len = 0, name_offset = 0;
4624 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4625 	struct ocfs2_xattr_header *xh;
4626 	struct ocfs2_xattr_entry *xe;
4627 	int blocksize = inode->i_sb->s_blocksize;
4628 
4629 	trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4630 					      (unsigned long long)new_blk);
4631 
4632 	s_bucket = ocfs2_xattr_bucket_new(inode);
4633 	t_bucket = ocfs2_xattr_bucket_new(inode);
4634 	if (!s_bucket || !t_bucket) {
4635 		ret = -ENOMEM;
4636 		mlog_errno(ret);
4637 		goto out;
4638 	}
4639 
4640 	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4641 	if (ret) {
4642 		mlog_errno(ret);
4643 		goto out;
4644 	}
4645 
4646 	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4647 						OCFS2_JOURNAL_ACCESS_WRITE);
4648 	if (ret) {
4649 		mlog_errno(ret);
4650 		goto out;
4651 	}
4652 
4653 	/*
4654 	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4655 	 * there's no need to read it.
4656 	 */
4657 	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
4658 	if (ret) {
4659 		mlog_errno(ret);
4660 		goto out;
4661 	}
4662 
4663 	/*
4664 	 * Hey, if we're overwriting t_bucket, what difference does
4665 	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4666 	 * same part of ocfs2_cp_xattr_bucket().
4667 	 */
4668 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4669 						new_bucket_head ?
4670 						OCFS2_JOURNAL_ACCESS_CREATE :
4671 						OCFS2_JOURNAL_ACCESS_WRITE);
4672 	if (ret) {
4673 		mlog_errno(ret);
4674 		goto out;
4675 	}
4676 
4677 	xh = bucket_xh(s_bucket);
4678 	count = le16_to_cpu(xh->xh_count);
4679 	start = ocfs2_xattr_find_divide_pos(xh);
4680 
4681 	if (start == count) {
4682 		xe = &xh->xh_entries[start-1];
4683 
4684 		/*
4685 		 * initialized a new empty bucket here.
4686 		 * The hash value is set as one larger than
4687 		 * that of the last entry in the previous bucket.
4688 		 */
4689 		for (i = 0; i < t_bucket->bu_blocks; i++)
4690 			memset(bucket_block(t_bucket, i), 0, blocksize);
4691 
4692 		xh = bucket_xh(t_bucket);
4693 		xh->xh_free_start = cpu_to_le16(blocksize);
4694 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4695 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4696 
4697 		goto set_num_buckets;
4698 	}
4699 
4700 	/* copy the whole bucket to the new first. */
4701 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4702 
4703 	/* update the new bucket. */
4704 	xh = bucket_xh(t_bucket);
4705 
4706 	/*
4707 	 * Calculate the total name/value len and xh_free_start for
4708 	 * the old bucket first.
4709 	 */
4710 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
4711 	name_value_len = 0;
4712 	for (i = 0; i < start; i++) {
4713 		xe = &xh->xh_entries[i];
4714 		name_value_len += namevalue_size_xe(xe);
4715 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4716 			name_offset = le16_to_cpu(xe->xe_name_offset);
4717 	}
4718 
4719 	/*
4720 	 * Now begin the modification to the new bucket.
4721 	 *
4722 	 * In the new bucket, We just move the xattr entry to the beginning
4723 	 * and don't touch the name/value. So there will be some holes in the
4724 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4725 	 * called.
4726 	 */
4727 	xe = &xh->xh_entries[start];
4728 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4729 	trace_ocfs2_divide_xattr_bucket_move(len,
4730 			(int)((char *)xe - (char *)xh),
4731 			(int)((char *)xh->xh_entries - (char *)xh));
4732 	memmove((char *)xh->xh_entries, (char *)xe, len);
4733 	xe = &xh->xh_entries[count - start];
4734 	len = sizeof(struct ocfs2_xattr_entry) * start;
4735 	memset((char *)xe, 0, len);
4736 
4737 	le16_add_cpu(&xh->xh_count, -start);
4738 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4739 
4740 	/* Calculate xh_free_start for the new bucket. */
4741 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4742 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4743 		xe = &xh->xh_entries[i];
4744 		if (le16_to_cpu(xe->xe_name_offset) <
4745 		    le16_to_cpu(xh->xh_free_start))
4746 			xh->xh_free_start = xe->xe_name_offset;
4747 	}
4748 
4749 set_num_buckets:
4750 	/* set xh->xh_num_buckets for the new xh. */
4751 	if (new_bucket_head)
4752 		xh->xh_num_buckets = cpu_to_le16(1);
4753 	else
4754 		xh->xh_num_buckets = 0;
4755 
4756 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4757 
4758 	/* store the first_hash of the new bucket. */
4759 	if (first_hash)
4760 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4761 
4762 	/*
4763 	 * Now only update the 1st block of the old bucket.  If we
4764 	 * just added a new empty bucket, there is no need to modify
4765 	 * it.
4766 	 */
4767 	if (start == count)
4768 		goto out;
4769 
4770 	xh = bucket_xh(s_bucket);
4771 	memset(&xh->xh_entries[start], 0,
4772 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
4773 	xh->xh_count = cpu_to_le16(start);
4774 	xh->xh_free_start = cpu_to_le16(name_offset);
4775 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
4776 
4777 	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4778 
4779 out:
4780 	ocfs2_xattr_bucket_free(s_bucket);
4781 	ocfs2_xattr_bucket_free(t_bucket);
4782 
4783 	return ret;
4784 }
4785 
4786 /*
4787  * Copy xattr from one bucket to another bucket.
4788  *
4789  * The caller must make sure that the journal transaction
4790  * has enough space for journaling.
4791  */
4792 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4793 				 handle_t *handle,
4794 				 u64 s_blkno,
4795 				 u64 t_blkno,
4796 				 int t_is_new)
4797 {
4798 	int ret;
4799 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4800 
4801 	BUG_ON(s_blkno == t_blkno);
4802 
4803 	trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4804 				    (unsigned long long)t_blkno,
4805 				    t_is_new);
4806 
4807 	s_bucket = ocfs2_xattr_bucket_new(inode);
4808 	t_bucket = ocfs2_xattr_bucket_new(inode);
4809 	if (!s_bucket || !t_bucket) {
4810 		ret = -ENOMEM;
4811 		mlog_errno(ret);
4812 		goto out;
4813 	}
4814 
4815 	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4816 	if (ret)
4817 		goto out;
4818 
4819 	/*
4820 	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4821 	 * there's no need to read it.
4822 	 */
4823 	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
4824 	if (ret)
4825 		goto out;
4826 
4827 	/*
4828 	 * Hey, if we're overwriting t_bucket, what difference does
4829 	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4830 	 * cluster to fill, we came here from
4831 	 * ocfs2_mv_xattr_buckets(), and it is really new -
4832 	 * ACCESS_CREATE is required.  But we also might have moved data
4833 	 * out of t_bucket before extending back into it.
4834 	 * ocfs2_add_new_xattr_bucket() can do this - its call to
4835 	 * ocfs2_add_new_xattr_cluster() may have created a new extent
4836 	 * and copied out the end of the old extent.  Then it re-extends
4837 	 * the old extent back to create space for new xattrs.  That's
4838 	 * how we get here, and the bucket isn't really new.
4839 	 */
4840 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4841 						t_is_new ?
4842 						OCFS2_JOURNAL_ACCESS_CREATE :
4843 						OCFS2_JOURNAL_ACCESS_WRITE);
4844 	if (ret)
4845 		goto out;
4846 
4847 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4848 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4849 
4850 out:
4851 	ocfs2_xattr_bucket_free(t_bucket);
4852 	ocfs2_xattr_bucket_free(s_bucket);
4853 
4854 	return ret;
4855 }
4856 
4857 /*
4858  * src_blk points to the start of an existing extent.  last_blk points to
4859  * last cluster in that extent.  to_blk points to a newly allocated
4860  * extent.  We copy the buckets from the cluster at last_blk to the new
4861  * extent.  If start_bucket is non-zero, we skip that many buckets before
4862  * we start copying.  The new extent's xh_num_buckets gets set to the
4863  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4864  * by the same amount.
4865  */
4866 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4867 				  u64 src_blk, u64 last_blk, u64 to_blk,
4868 				  unsigned int start_bucket,
4869 				  u32 *first_hash)
4870 {
4871 	int i, ret, credits;
4872 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4873 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4874 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4875 	struct ocfs2_xattr_bucket *old_first, *new_first;
4876 
4877 	trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4878 				     (unsigned long long)to_blk);
4879 
4880 	BUG_ON(start_bucket >= num_buckets);
4881 	if (start_bucket) {
4882 		num_buckets -= start_bucket;
4883 		last_blk += (start_bucket * blks_per_bucket);
4884 	}
4885 
4886 	/* The first bucket of the original extent */
4887 	old_first = ocfs2_xattr_bucket_new(inode);
4888 	/* The first bucket of the new extent */
4889 	new_first = ocfs2_xattr_bucket_new(inode);
4890 	if (!old_first || !new_first) {
4891 		ret = -ENOMEM;
4892 		mlog_errno(ret);
4893 		goto out;
4894 	}
4895 
4896 	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4897 	if (ret) {
4898 		mlog_errno(ret);
4899 		goto out;
4900 	}
4901 
4902 	/*
4903 	 * We need to update the first bucket of the old extent and all
4904 	 * the buckets going to the new extent.
4905 	 */
4906 	credits = ((num_buckets + 1) * blks_per_bucket);
4907 	ret = ocfs2_extend_trans(handle, credits);
4908 	if (ret) {
4909 		mlog_errno(ret);
4910 		goto out;
4911 	}
4912 
4913 	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4914 						OCFS2_JOURNAL_ACCESS_WRITE);
4915 	if (ret) {
4916 		mlog_errno(ret);
4917 		goto out;
4918 	}
4919 
4920 	for (i = 0; i < num_buckets; i++) {
4921 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4922 					    last_blk + (i * blks_per_bucket),
4923 					    to_blk + (i * blks_per_bucket),
4924 					    1);
4925 		if (ret) {
4926 			mlog_errno(ret);
4927 			goto out;
4928 		}
4929 	}
4930 
4931 	/*
4932 	 * Get the new bucket ready before we dirty anything
4933 	 * (This actually shouldn't fail, because we already dirtied
4934 	 * it once in ocfs2_cp_xattr_bucket()).
4935 	 */
4936 	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4937 	if (ret) {
4938 		mlog_errno(ret);
4939 		goto out;
4940 	}
4941 	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4942 						OCFS2_JOURNAL_ACCESS_WRITE);
4943 	if (ret) {
4944 		mlog_errno(ret);
4945 		goto out;
4946 	}
4947 
4948 	/* Now update the headers */
4949 	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4950 	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4951 
4952 	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4953 	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4954 
4955 	if (first_hash)
4956 		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4957 
4958 out:
4959 	ocfs2_xattr_bucket_free(new_first);
4960 	ocfs2_xattr_bucket_free(old_first);
4961 	return ret;
4962 }
4963 
4964 /*
4965  * Move some xattrs in this cluster to the new cluster.
4966  * This function should only be called when bucket size == cluster size.
4967  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4968  */
4969 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4970 				      handle_t *handle,
4971 				      u64 prev_blk,
4972 				      u64 new_blk,
4973 				      u32 *first_hash)
4974 {
4975 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4976 	int ret, credits = 2 * blk_per_bucket;
4977 
4978 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4979 
4980 	ret = ocfs2_extend_trans(handle, credits);
4981 	if (ret) {
4982 		mlog_errno(ret);
4983 		return ret;
4984 	}
4985 
4986 	/* Move half of the xattr in start_blk to the next bucket. */
4987 	return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4988 					  new_blk, first_hash, 1);
4989 }
4990 
4991 /*
4992  * Move some xattrs from the old cluster to the new one since they are not
4993  * contiguous in ocfs2 xattr tree.
4994  *
4995  * new_blk starts a new separate cluster, and we will move some xattrs from
4996  * prev_blk to it. v_start will be set as the first name hash value in this
4997  * new cluster so that it can be used as e_cpos during tree insertion and
4998  * don't collide with our original b-tree operations. first_bh and header_bh
4999  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
5000  * to extend the insert bucket.
5001  *
5002  * The problem is how much xattr should we move to the new one and when should
5003  * we update first_bh and header_bh?
5004  * 1. If cluster size > bucket size, that means the previous cluster has more
5005  *    than 1 bucket, so just move half nums of bucket into the new cluster and
5006  *    update the first_bh and header_bh if the insert bucket has been moved
5007  *    to the new cluster.
5008  * 2. If cluster_size == bucket_size:
5009  *    a) If the previous extent rec has more than one cluster and the insert
5010  *       place isn't in the last cluster, copy the entire last cluster to the
5011  *       new one. This time, we don't need to upate the first_bh and header_bh
5012  *       since they will not be moved into the new cluster.
5013  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
5014  *       the new one. And we set the extend flag to zero if the insert place is
5015  *       moved into the new allocated cluster since no extend is needed.
5016  */
5017 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5018 					    handle_t *handle,
5019 					    struct ocfs2_xattr_bucket *first,
5020 					    struct ocfs2_xattr_bucket *target,
5021 					    u64 new_blk,
5022 					    u32 prev_clusters,
5023 					    u32 *v_start,
5024 					    int *extend)
5025 {
5026 	int ret;
5027 
5028 	trace_ocfs2_adjust_xattr_cross_cluster(
5029 			(unsigned long long)bucket_blkno(first),
5030 			(unsigned long long)new_blk, prev_clusters);
5031 
5032 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5033 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5034 							  handle,
5035 							  first, target,
5036 							  new_blk,
5037 							  prev_clusters,
5038 							  v_start);
5039 		if (ret)
5040 			mlog_errno(ret);
5041 	} else {
5042 		/* The start of the last cluster in the first extent */
5043 		u64 last_blk = bucket_blkno(first) +
5044 			((prev_clusters - 1) *
5045 			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
5046 
5047 		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5048 			ret = ocfs2_mv_xattr_buckets(inode, handle,
5049 						     bucket_blkno(first),
5050 						     last_blk, new_blk, 0,
5051 						     v_start);
5052 			if (ret)
5053 				mlog_errno(ret);
5054 		} else {
5055 			ret = ocfs2_divide_xattr_cluster(inode, handle,
5056 							 last_blk, new_blk,
5057 							 v_start);
5058 			if (ret)
5059 				mlog_errno(ret);
5060 
5061 			if ((bucket_blkno(target) == last_blk) && extend)
5062 				*extend = 0;
5063 		}
5064 	}
5065 
5066 	return ret;
5067 }
5068 
5069 /*
5070  * Add a new cluster for xattr storage.
5071  *
5072  * If the new cluster is contiguous with the previous one, it will be
5073  * appended to the same extent record, and num_clusters will be updated.
5074  * If not, we will insert a new extent for it and move some xattrs in
5075  * the last cluster into the new allocated one.
5076  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5077  * lose the benefits of hashing because we'll have to search large leaves.
5078  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5079  * if it's bigger).
5080  *
5081  * first_bh is the first block of the previous extent rec and header_bh
5082  * indicates the bucket we will insert the new xattrs. They will be updated
5083  * when the header_bh is moved into the new cluster.
5084  */
5085 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5086 				       struct buffer_head *root_bh,
5087 				       struct ocfs2_xattr_bucket *first,
5088 				       struct ocfs2_xattr_bucket *target,
5089 				       u32 *num_clusters,
5090 				       u32 prev_cpos,
5091 				       int *extend,
5092 				       struct ocfs2_xattr_set_ctxt *ctxt)
5093 {
5094 	int ret;
5095 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5096 	u32 prev_clusters = *num_clusters;
5097 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5098 	u64 block;
5099 	handle_t *handle = ctxt->handle;
5100 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5101 	struct ocfs2_extent_tree et;
5102 
5103 	trace_ocfs2_add_new_xattr_cluster_begin(
5104 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
5105 		(unsigned long long)bucket_blkno(first),
5106 		prev_cpos, prev_clusters);
5107 
5108 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5109 
5110 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5111 				      OCFS2_JOURNAL_ACCESS_WRITE);
5112 	if (ret < 0) {
5113 		mlog_errno(ret);
5114 		goto leave;
5115 	}
5116 
5117 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5118 				     clusters_to_add, &bit_off, &num_bits);
5119 	if (ret < 0) {
5120 		if (ret != -ENOSPC)
5121 			mlog_errno(ret);
5122 		goto leave;
5123 	}
5124 
5125 	BUG_ON(num_bits > clusters_to_add);
5126 
5127 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5128 	trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5129 
5130 	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5131 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5132 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5133 		/*
5134 		 * If this cluster is contiguous with the old one and
5135 		 * adding this new cluster, we don't surpass the limit of
5136 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5137 		 * initialized and used like other buckets in the previous
5138 		 * cluster.
5139 		 * So add it as a contiguous one. The caller will handle
5140 		 * its init process.
5141 		 */
5142 		v_start = prev_cpos + prev_clusters;
5143 		*num_clusters = prev_clusters + num_bits;
5144 	} else {
5145 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
5146 						       handle,
5147 						       first,
5148 						       target,
5149 						       block,
5150 						       prev_clusters,
5151 						       &v_start,
5152 						       extend);
5153 		if (ret) {
5154 			mlog_errno(ret);
5155 			goto leave;
5156 		}
5157 	}
5158 
5159 	trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5160 						 v_start, num_bits);
5161 	ret = ocfs2_insert_extent(handle, &et, v_start, block,
5162 				  num_bits, 0, ctxt->meta_ac);
5163 	if (ret < 0) {
5164 		mlog_errno(ret);
5165 		goto leave;
5166 	}
5167 
5168 	ocfs2_journal_dirty(handle, root_bh);
5169 
5170 leave:
5171 	return ret;
5172 }
5173 
5174 /*
5175  * We are given an extent.  'first' is the bucket at the very front of
5176  * the extent.  The extent has space for an additional bucket past
5177  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5178  * of the target bucket.  We wish to shift every bucket past the target
5179  * down one, filling in that additional space.  When we get back to the
5180  * target, we split the target between itself and the now-empty bucket
5181  * at target+1 (aka, target_blkno + blks_per_bucket).
5182  */
5183 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5184 				     handle_t *handle,
5185 				     struct ocfs2_xattr_bucket *first,
5186 				     u64 target_blk,
5187 				     u32 num_clusters)
5188 {
5189 	int ret, credits;
5190 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5191 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5192 	u64 end_blk;
5193 	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5194 
5195 	trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5196 					(unsigned long long)bucket_blkno(first),
5197 					num_clusters, new_bucket);
5198 
5199 	/* The extent must have room for an additional bucket */
5200 	BUG_ON(new_bucket >=
5201 	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5202 
5203 	/* end_blk points to the last existing bucket */
5204 	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5205 
5206 	/*
5207 	 * end_blk is the start of the last existing bucket.
5208 	 * Thus, (end_blk - target_blk) covers the target bucket and
5209 	 * every bucket after it up to, but not including, the last
5210 	 * existing bucket.  Then we add the last existing bucket, the
5211 	 * new bucket, and the first bucket (3 * blk_per_bucket).
5212 	 */
5213 	credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5214 	ret = ocfs2_extend_trans(handle, credits);
5215 	if (ret) {
5216 		mlog_errno(ret);
5217 		goto out;
5218 	}
5219 
5220 	ret = ocfs2_xattr_bucket_journal_access(handle, first,
5221 						OCFS2_JOURNAL_ACCESS_WRITE);
5222 	if (ret) {
5223 		mlog_errno(ret);
5224 		goto out;
5225 	}
5226 
5227 	while (end_blk != target_blk) {
5228 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5229 					    end_blk + blk_per_bucket, 0);
5230 		if (ret)
5231 			goto out;
5232 		end_blk -= blk_per_bucket;
5233 	}
5234 
5235 	/* Move half of the xattr in target_blkno to the next bucket. */
5236 	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5237 					target_blk + blk_per_bucket, NULL, 0);
5238 
5239 	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5240 	ocfs2_xattr_bucket_journal_dirty(handle, first);
5241 
5242 out:
5243 	return ret;
5244 }
5245 
5246 /*
5247  * Add new xattr bucket in an extent record and adjust the buckets
5248  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5249  * bucket we want to insert into.
5250  *
5251  * In the easy case, we will move all the buckets after target down by
5252  * one. Half of target's xattrs will be moved to the next bucket.
5253  *
5254  * If current cluster is full, we'll allocate a new one.  This may not
5255  * be contiguous.  The underlying calls will make sure that there is
5256  * space for the insert, shifting buckets around if necessary.
5257  * 'target' may be moved by those calls.
5258  */
5259 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5260 				      struct buffer_head *xb_bh,
5261 				      struct ocfs2_xattr_bucket *target,
5262 				      struct ocfs2_xattr_set_ctxt *ctxt)
5263 {
5264 	struct ocfs2_xattr_block *xb =
5265 			(struct ocfs2_xattr_block *)xb_bh->b_data;
5266 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5267 	struct ocfs2_extent_list *el = &xb_root->xt_list;
5268 	u32 name_hash =
5269 		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5270 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5271 	int ret, num_buckets, extend = 1;
5272 	u64 p_blkno;
5273 	u32 e_cpos, num_clusters;
5274 	/* The bucket at the front of the extent */
5275 	struct ocfs2_xattr_bucket *first;
5276 
5277 	trace_ocfs2_add_new_xattr_bucket(
5278 				(unsigned long long)bucket_blkno(target));
5279 
5280 	/* The first bucket of the original extent */
5281 	first = ocfs2_xattr_bucket_new(inode);
5282 	if (!first) {
5283 		ret = -ENOMEM;
5284 		mlog_errno(ret);
5285 		goto out;
5286 	}
5287 
5288 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5289 				  &num_clusters, el);
5290 	if (ret) {
5291 		mlog_errno(ret);
5292 		goto out;
5293 	}
5294 
5295 	ret = ocfs2_read_xattr_bucket(first, p_blkno);
5296 	if (ret) {
5297 		mlog_errno(ret);
5298 		goto out;
5299 	}
5300 
5301 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5302 	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5303 		/*
5304 		 * This can move first+target if the target bucket moves
5305 		 * to the new extent.
5306 		 */
5307 		ret = ocfs2_add_new_xattr_cluster(inode,
5308 						  xb_bh,
5309 						  first,
5310 						  target,
5311 						  &num_clusters,
5312 						  e_cpos,
5313 						  &extend,
5314 						  ctxt);
5315 		if (ret) {
5316 			mlog_errno(ret);
5317 			goto out;
5318 		}
5319 	}
5320 
5321 	if (extend) {
5322 		ret = ocfs2_extend_xattr_bucket(inode,
5323 						ctxt->handle,
5324 						first,
5325 						bucket_blkno(target),
5326 						num_clusters);
5327 		if (ret)
5328 			mlog_errno(ret);
5329 	}
5330 
5331 out:
5332 	ocfs2_xattr_bucket_free(first);
5333 
5334 	return ret;
5335 }
5336 
5337 /*
5338  * Truncate the specified xe_off entry in xattr bucket.
5339  * bucket is indicated by header_bh and len is the new length.
5340  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5341  *
5342  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5343  */
5344 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5345 					     struct ocfs2_xattr_bucket *bucket,
5346 					     int xe_off,
5347 					     int len,
5348 					     struct ocfs2_xattr_set_ctxt *ctxt)
5349 {
5350 	int ret, offset;
5351 	u64 value_blk;
5352 	struct ocfs2_xattr_entry *xe;
5353 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5354 	size_t blocksize = inode->i_sb->s_blocksize;
5355 	struct ocfs2_xattr_value_buf vb = {
5356 		.vb_access = ocfs2_journal_access,
5357 	};
5358 
5359 	xe = &xh->xh_entries[xe_off];
5360 
5361 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5362 
5363 	offset = le16_to_cpu(xe->xe_name_offset) +
5364 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
5365 
5366 	value_blk = offset / blocksize;
5367 
5368 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
5369 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5370 
5371 	vb.vb_bh = bucket->bu_bhs[value_blk];
5372 	BUG_ON(!vb.vb_bh);
5373 
5374 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5375 		(vb.vb_bh->b_data + offset % blocksize);
5376 
5377 	/*
5378 	 * From here on out we have to dirty the bucket.  The generic
5379 	 * value calls only modify one of the bucket's bhs, but we need
5380 	 * to send the bucket at once.  So if they error, they *could* have
5381 	 * modified something.  We have to assume they did, and dirty
5382 	 * the whole bucket.  This leaves us in a consistent state.
5383 	 */
5384 	trace_ocfs2_xattr_bucket_value_truncate(
5385 			(unsigned long long)bucket_blkno(bucket), xe_off, len);
5386 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5387 	if (ret) {
5388 		mlog_errno(ret);
5389 		goto out;
5390 	}
5391 
5392 	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5393 						OCFS2_JOURNAL_ACCESS_WRITE);
5394 	if (ret) {
5395 		mlog_errno(ret);
5396 		goto out;
5397 	}
5398 
5399 	xe->xe_value_size = cpu_to_le64(len);
5400 
5401 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5402 
5403 out:
5404 	return ret;
5405 }
5406 
5407 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5408 				  struct buffer_head *root_bh,
5409 				  u64 blkno,
5410 				  u32 cpos,
5411 				  u32 len,
5412 				  void *para)
5413 {
5414 	int ret;
5415 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5416 	struct inode *tl_inode = osb->osb_tl_inode;
5417 	handle_t *handle;
5418 	struct ocfs2_xattr_block *xb =
5419 			(struct ocfs2_xattr_block *)root_bh->b_data;
5420 	struct ocfs2_alloc_context *meta_ac = NULL;
5421 	struct ocfs2_cached_dealloc_ctxt dealloc;
5422 	struct ocfs2_extent_tree et;
5423 
5424 	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5425 					  ocfs2_delete_xattr_in_bucket, para);
5426 	if (ret) {
5427 		mlog_errno(ret);
5428 		return ret;
5429 	}
5430 
5431 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5432 
5433 	ocfs2_init_dealloc_ctxt(&dealloc);
5434 
5435 	trace_ocfs2_rm_xattr_cluster(
5436 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
5437 			(unsigned long long)blkno, cpos, len);
5438 
5439 	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5440 					       len);
5441 
5442 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5443 	if (ret) {
5444 		mlog_errno(ret);
5445 		return ret;
5446 	}
5447 
5448 	inode_lock(tl_inode);
5449 
5450 	if (ocfs2_truncate_log_needs_flush(osb)) {
5451 		ret = __ocfs2_flush_truncate_log(osb);
5452 		if (ret < 0) {
5453 			mlog_errno(ret);
5454 			goto out;
5455 		}
5456 	}
5457 
5458 	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5459 	if (IS_ERR(handle)) {
5460 		ret = -ENOMEM;
5461 		mlog_errno(ret);
5462 		goto out;
5463 	}
5464 
5465 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5466 				      OCFS2_JOURNAL_ACCESS_WRITE);
5467 	if (ret) {
5468 		mlog_errno(ret);
5469 		goto out_commit;
5470 	}
5471 
5472 	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5473 				  &dealloc);
5474 	if (ret) {
5475 		mlog_errno(ret);
5476 		goto out_commit;
5477 	}
5478 
5479 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5480 	ocfs2_journal_dirty(handle, root_bh);
5481 
5482 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5483 	if (ret)
5484 		mlog_errno(ret);
5485 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
5486 
5487 out_commit:
5488 	ocfs2_commit_trans(osb, handle);
5489 out:
5490 	ocfs2_schedule_truncate_log_flush(osb, 1);
5491 
5492 	inode_unlock(tl_inode);
5493 
5494 	if (meta_ac)
5495 		ocfs2_free_alloc_context(meta_ac);
5496 
5497 	ocfs2_run_deallocs(osb, &dealloc);
5498 
5499 	return ret;
5500 }
5501 
5502 /*
5503  * check whether the xattr bucket is filled up with the same hash value.
5504  * If we want to insert the xattr with the same hash, return -ENOSPC.
5505  * If we want to insert a xattr with different hash value, go ahead
5506  * and ocfs2_divide_xattr_bucket will handle this.
5507  */
5508 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5509 					      struct ocfs2_xattr_bucket *bucket,
5510 					      const char *name)
5511 {
5512 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5513 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5514 
5515 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5516 		return 0;
5517 
5518 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5519 	    xh->xh_entries[0].xe_name_hash) {
5520 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5521 		     "hash = %u\n",
5522 		     (unsigned long long)bucket_blkno(bucket),
5523 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5524 		return -ENOSPC;
5525 	}
5526 
5527 	return 0;
5528 }
5529 
5530 /*
5531  * Try to set the entry in the current bucket.  If we fail, the caller
5532  * will handle getting us another bucket.
5533  */
5534 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5535 					struct ocfs2_xattr_info *xi,
5536 					struct ocfs2_xattr_search *xs,
5537 					struct ocfs2_xattr_set_ctxt *ctxt)
5538 {
5539 	int ret;
5540 	struct ocfs2_xa_loc loc;
5541 
5542 	trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5543 
5544 	ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5545 				       xs->not_found ? NULL : xs->here);
5546 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5547 	if (!ret) {
5548 		xs->here = loc.xl_entry;
5549 		goto out;
5550 	}
5551 	if (ret != -ENOSPC) {
5552 		mlog_errno(ret);
5553 		goto out;
5554 	}
5555 
5556 	/* Ok, we need space.  Let's try defragmenting the bucket. */
5557 	ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5558 					xs->bucket);
5559 	if (ret) {
5560 		mlog_errno(ret);
5561 		goto out;
5562 	}
5563 
5564 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5565 	if (!ret) {
5566 		xs->here = loc.xl_entry;
5567 		goto out;
5568 	}
5569 	if (ret != -ENOSPC)
5570 		mlog_errno(ret);
5571 
5572 
5573 out:
5574 	return ret;
5575 }
5576 
5577 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5578 					     struct ocfs2_xattr_info *xi,
5579 					     struct ocfs2_xattr_search *xs,
5580 					     struct ocfs2_xattr_set_ctxt *ctxt)
5581 {
5582 	int ret;
5583 
5584 	trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5585 
5586 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5587 	if (!ret)
5588 		goto out;
5589 	if (ret != -ENOSPC) {
5590 		mlog_errno(ret);
5591 		goto out;
5592 	}
5593 
5594 	/* Ack, need more space.  Let's try to get another bucket! */
5595 
5596 	/*
5597 	 * We do not allow for overlapping ranges between buckets. And
5598 	 * the maximum number of collisions we will allow for then is
5599 	 * one bucket's worth, so check it here whether we need to
5600 	 * add a new bucket for the insert.
5601 	 */
5602 	ret = ocfs2_check_xattr_bucket_collision(inode,
5603 						 xs->bucket,
5604 						 xi->xi_name);
5605 	if (ret) {
5606 		mlog_errno(ret);
5607 		goto out;
5608 	}
5609 
5610 	ret = ocfs2_add_new_xattr_bucket(inode,
5611 					 xs->xattr_bh,
5612 					 xs->bucket,
5613 					 ctxt);
5614 	if (ret) {
5615 		mlog_errno(ret);
5616 		goto out;
5617 	}
5618 
5619 	/*
5620 	 * ocfs2_add_new_xattr_bucket() will have updated
5621 	 * xs->bucket if it moved, but it will not have updated
5622 	 * any of the other search fields.  Thus, we drop it and
5623 	 * re-search.  Everything should be cached, so it'll be
5624 	 * quick.
5625 	 */
5626 	ocfs2_xattr_bucket_relse(xs->bucket);
5627 	ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5628 					   xi->xi_name_index,
5629 					   xi->xi_name, xs);
5630 	if (ret && ret != -ENODATA)
5631 		goto out;
5632 	xs->not_found = ret;
5633 
5634 	/* Ok, we have a new bucket, let's try again */
5635 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5636 	if (ret && (ret != -ENOSPC))
5637 		mlog_errno(ret);
5638 
5639 out:
5640 	return ret;
5641 }
5642 
5643 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5644 					struct ocfs2_xattr_bucket *bucket,
5645 					void *para)
5646 {
5647 	int ret = 0, ref_credits;
5648 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5649 	u16 i;
5650 	struct ocfs2_xattr_entry *xe;
5651 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5652 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5653 	int credits = ocfs2_remove_extent_credits(osb->sb) +
5654 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5655 	struct ocfs2_xattr_value_root *xv;
5656 	struct ocfs2_rm_xattr_bucket_para *args =
5657 			(struct ocfs2_rm_xattr_bucket_para *)para;
5658 
5659 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5660 
5661 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5662 		xe = &xh->xh_entries[i];
5663 		if (ocfs2_xattr_is_local(xe))
5664 			continue;
5665 
5666 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5667 						      i, &xv, NULL);
5668 		if (ret) {
5669 			mlog_errno(ret);
5670 			break;
5671 		}
5672 
5673 		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5674 							 args->ref_ci,
5675 							 args->ref_root_bh,
5676 							 &ctxt.meta_ac,
5677 							 &ref_credits);
5678 
5679 		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5680 		if (IS_ERR(ctxt.handle)) {
5681 			ret = PTR_ERR(ctxt.handle);
5682 			mlog_errno(ret);
5683 			break;
5684 		}
5685 
5686 		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5687 							i, 0, &ctxt);
5688 
5689 		ocfs2_commit_trans(osb, ctxt.handle);
5690 		if (ctxt.meta_ac) {
5691 			ocfs2_free_alloc_context(ctxt.meta_ac);
5692 			ctxt.meta_ac = NULL;
5693 		}
5694 		if (ret) {
5695 			mlog_errno(ret);
5696 			break;
5697 		}
5698 	}
5699 
5700 	if (ctxt.meta_ac)
5701 		ocfs2_free_alloc_context(ctxt.meta_ac);
5702 	ocfs2_schedule_truncate_log_flush(osb, 1);
5703 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
5704 	return ret;
5705 }
5706 
5707 /*
5708  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5709  * or change the extent record flag), we need to recalculate
5710  * the metaecc for the whole bucket. So it is done here.
5711  *
5712  * Note:
5713  * We have to give the extra credits for the caller.
5714  */
5715 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5716 					    handle_t *handle,
5717 					    void *para)
5718 {
5719 	int ret;
5720 	struct ocfs2_xattr_bucket *bucket =
5721 			(struct ocfs2_xattr_bucket *)para;
5722 
5723 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5724 						OCFS2_JOURNAL_ACCESS_WRITE);
5725 	if (ret) {
5726 		mlog_errno(ret);
5727 		return ret;
5728 	}
5729 
5730 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5731 
5732 	return 0;
5733 }
5734 
5735 /*
5736  * Special action we need if the xattr value is refcounted.
5737  *
5738  * 1. If the xattr is refcounted, lock the tree.
5739  * 2. CoW the xattr if we are setting the new value and the value
5740  *    will be stored outside.
5741  * 3. In other case, decrease_refcount will work for us, so just
5742  *    lock the refcount tree, calculate the meta and credits is OK.
5743  *
5744  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5745  * currently CoW is a completed transaction, while this function
5746  * will also lock the allocators and let us deadlock. So we will
5747  * CoW the whole xattr value.
5748  */
5749 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5750 					struct ocfs2_dinode *di,
5751 					struct ocfs2_xattr_info *xi,
5752 					struct ocfs2_xattr_search *xis,
5753 					struct ocfs2_xattr_search *xbs,
5754 					struct ocfs2_refcount_tree **ref_tree,
5755 					int *meta_add,
5756 					int *credits)
5757 {
5758 	int ret = 0;
5759 	struct ocfs2_xattr_block *xb;
5760 	struct ocfs2_xattr_entry *xe;
5761 	char *base;
5762 	u32 p_cluster, num_clusters;
5763 	unsigned int ext_flags;
5764 	int name_offset, name_len;
5765 	struct ocfs2_xattr_value_buf vb;
5766 	struct ocfs2_xattr_bucket *bucket = NULL;
5767 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5768 	struct ocfs2_post_refcount refcount;
5769 	struct ocfs2_post_refcount *p = NULL;
5770 	struct buffer_head *ref_root_bh = NULL;
5771 
5772 	if (!xis->not_found) {
5773 		xe = xis->here;
5774 		name_offset = le16_to_cpu(xe->xe_name_offset);
5775 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5776 		base = xis->base;
5777 		vb.vb_bh = xis->inode_bh;
5778 		vb.vb_access = ocfs2_journal_access_di;
5779 	} else {
5780 		int i, block_off = 0;
5781 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5782 		xe = xbs->here;
5783 		name_offset = le16_to_cpu(xe->xe_name_offset);
5784 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5785 		i = xbs->here - xbs->header->xh_entries;
5786 
5787 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5788 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5789 							bucket_xh(xbs->bucket),
5790 							i, &block_off,
5791 							&name_offset);
5792 			if (ret) {
5793 				mlog_errno(ret);
5794 				goto out;
5795 			}
5796 			base = bucket_block(xbs->bucket, block_off);
5797 			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5798 			vb.vb_access = ocfs2_journal_access;
5799 
5800 			if (ocfs2_meta_ecc(osb)) {
5801 				/*create parameters for ocfs2_post_refcount. */
5802 				bucket = xbs->bucket;
5803 				refcount.credits = bucket->bu_blocks;
5804 				refcount.para = bucket;
5805 				refcount.func =
5806 					ocfs2_xattr_bucket_post_refcount;
5807 				p = &refcount;
5808 			}
5809 		} else {
5810 			base = xbs->base;
5811 			vb.vb_bh = xbs->xattr_bh;
5812 			vb.vb_access = ocfs2_journal_access_xb;
5813 		}
5814 	}
5815 
5816 	if (ocfs2_xattr_is_local(xe))
5817 		goto out;
5818 
5819 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5820 				(base + name_offset + name_len);
5821 
5822 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5823 				       &num_clusters, &vb.vb_xv->xr_list,
5824 				       &ext_flags);
5825 	if (ret) {
5826 		mlog_errno(ret);
5827 		goto out;
5828 	}
5829 
5830 	/*
5831 	 * We just need to check the 1st extent record, since we always
5832 	 * CoW the whole xattr. So there shouldn't be a xattr with
5833 	 * some REFCOUNT extent recs after the 1st one.
5834 	 */
5835 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5836 		goto out;
5837 
5838 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5839 				       1, ref_tree, &ref_root_bh);
5840 	if (ret) {
5841 		mlog_errno(ret);
5842 		goto out;
5843 	}
5844 
5845 	/*
5846 	 * If we are deleting the xattr or the new size will be stored inside,
5847 	 * cool, leave it there, the xattr truncate process will remove them
5848 	 * for us(it still needs the refcount tree lock and the meta, credits).
5849 	 * And the worse case is that every cluster truncate will split the
5850 	 * refcount tree, and make the original extent become 3. So we will need
5851 	 * 2 * cluster more extent recs at most.
5852 	 */
5853 	if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5854 
5855 		ret = ocfs2_refcounted_xattr_delete_need(inode,
5856 							 &(*ref_tree)->rf_ci,
5857 							 ref_root_bh, vb.vb_xv,
5858 							 meta_add, credits);
5859 		if (ret)
5860 			mlog_errno(ret);
5861 		goto out;
5862 	}
5863 
5864 	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5865 				       *ref_tree, ref_root_bh, 0,
5866 				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5867 	if (ret)
5868 		mlog_errno(ret);
5869 
5870 out:
5871 	brelse(ref_root_bh);
5872 	return ret;
5873 }
5874 
5875 /*
5876  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5877  * The physical clusters will be added to refcount tree.
5878  */
5879 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5880 				struct ocfs2_xattr_value_root *xv,
5881 				struct ocfs2_extent_tree *value_et,
5882 				struct ocfs2_caching_info *ref_ci,
5883 				struct buffer_head *ref_root_bh,
5884 				struct ocfs2_cached_dealloc_ctxt *dealloc,
5885 				struct ocfs2_post_refcount *refcount)
5886 {
5887 	int ret = 0;
5888 	u32 clusters = le32_to_cpu(xv->xr_clusters);
5889 	u32 cpos, p_cluster, num_clusters;
5890 	struct ocfs2_extent_list *el = &xv->xr_list;
5891 	unsigned int ext_flags;
5892 
5893 	cpos = 0;
5894 	while (cpos < clusters) {
5895 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5896 					       &num_clusters, el, &ext_flags);
5897 		if (ret) {
5898 			mlog_errno(ret);
5899 			break;
5900 		}
5901 
5902 		cpos += num_clusters;
5903 		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5904 			continue;
5905 
5906 		BUG_ON(!p_cluster);
5907 
5908 		ret = ocfs2_add_refcount_flag(inode, value_et,
5909 					      ref_ci, ref_root_bh,
5910 					      cpos - num_clusters,
5911 					      p_cluster, num_clusters,
5912 					      dealloc, refcount);
5913 		if (ret) {
5914 			mlog_errno(ret);
5915 			break;
5916 		}
5917 	}
5918 
5919 	return ret;
5920 }
5921 
5922 /*
5923  * Given a normal ocfs2_xattr_header, refcount all the entries which
5924  * have value stored outside.
5925  * Used for xattrs stored in inode and ocfs2_xattr_block.
5926  */
5927 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5928 				struct ocfs2_xattr_value_buf *vb,
5929 				struct ocfs2_xattr_header *header,
5930 				struct ocfs2_caching_info *ref_ci,
5931 				struct buffer_head *ref_root_bh,
5932 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5933 {
5934 
5935 	struct ocfs2_xattr_entry *xe;
5936 	struct ocfs2_xattr_value_root *xv;
5937 	struct ocfs2_extent_tree et;
5938 	int i, ret = 0;
5939 
5940 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5941 		xe = &header->xh_entries[i];
5942 
5943 		if (ocfs2_xattr_is_local(xe))
5944 			continue;
5945 
5946 		xv = (struct ocfs2_xattr_value_root *)((void *)header +
5947 			le16_to_cpu(xe->xe_name_offset) +
5948 			OCFS2_XATTR_SIZE(xe->xe_name_len));
5949 
5950 		vb->vb_xv = xv;
5951 		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5952 
5953 		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5954 							ref_ci, ref_root_bh,
5955 							dealloc, NULL);
5956 		if (ret) {
5957 			mlog_errno(ret);
5958 			break;
5959 		}
5960 	}
5961 
5962 	return ret;
5963 }
5964 
5965 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5966 				struct buffer_head *fe_bh,
5967 				struct ocfs2_caching_info *ref_ci,
5968 				struct buffer_head *ref_root_bh,
5969 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5970 {
5971 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5972 	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5973 				(fe_bh->b_data + inode->i_sb->s_blocksize -
5974 				le16_to_cpu(di->i_xattr_inline_size));
5975 	struct ocfs2_xattr_value_buf vb = {
5976 		.vb_bh = fe_bh,
5977 		.vb_access = ocfs2_journal_access_di,
5978 	};
5979 
5980 	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5981 						  ref_ci, ref_root_bh, dealloc);
5982 }
5983 
5984 struct ocfs2_xattr_tree_value_refcount_para {
5985 	struct ocfs2_caching_info *ref_ci;
5986 	struct buffer_head *ref_root_bh;
5987 	struct ocfs2_cached_dealloc_ctxt *dealloc;
5988 };
5989 
5990 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5991 					   struct ocfs2_xattr_bucket *bucket,
5992 					   int offset,
5993 					   struct ocfs2_xattr_value_root **xv,
5994 					   struct buffer_head **bh)
5995 {
5996 	int ret, block_off, name_offset;
5997 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5998 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5999 	void *base;
6000 
6001 	ret = ocfs2_xattr_bucket_get_name_value(sb,
6002 						bucket_xh(bucket),
6003 						offset,
6004 						&block_off,
6005 						&name_offset);
6006 	if (ret) {
6007 		mlog_errno(ret);
6008 		goto out;
6009 	}
6010 
6011 	base = bucket_block(bucket, block_off);
6012 
6013 	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6014 			 OCFS2_XATTR_SIZE(xe->xe_name_len));
6015 
6016 	if (bh)
6017 		*bh = bucket->bu_bhs[block_off];
6018 out:
6019 	return ret;
6020 }
6021 
6022 /*
6023  * For a given xattr bucket, refcount all the entries which
6024  * have value stored outside.
6025  */
6026 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6027 					     struct ocfs2_xattr_bucket *bucket,
6028 					     void *para)
6029 {
6030 	int i, ret = 0;
6031 	struct ocfs2_extent_tree et;
6032 	struct ocfs2_xattr_tree_value_refcount_para *ref =
6033 			(struct ocfs2_xattr_tree_value_refcount_para *)para;
6034 	struct ocfs2_xattr_header *xh =
6035 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6036 	struct ocfs2_xattr_entry *xe;
6037 	struct ocfs2_xattr_value_buf vb = {
6038 		.vb_access = ocfs2_journal_access,
6039 	};
6040 	struct ocfs2_post_refcount refcount = {
6041 		.credits = bucket->bu_blocks,
6042 		.para = bucket,
6043 		.func = ocfs2_xattr_bucket_post_refcount,
6044 	};
6045 	struct ocfs2_post_refcount *p = NULL;
6046 
6047 	/* We only need post_refcount if we support metaecc. */
6048 	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6049 		p = &refcount;
6050 
6051 	trace_ocfs2_xattr_bucket_value_refcount(
6052 				(unsigned long long)bucket_blkno(bucket),
6053 				le16_to_cpu(xh->xh_count));
6054 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6055 		xe = &xh->xh_entries[i];
6056 
6057 		if (ocfs2_xattr_is_local(xe))
6058 			continue;
6059 
6060 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6061 						      &vb.vb_xv, &vb.vb_bh);
6062 		if (ret) {
6063 			mlog_errno(ret);
6064 			break;
6065 		}
6066 
6067 		ocfs2_init_xattr_value_extent_tree(&et,
6068 						   INODE_CACHE(inode), &vb);
6069 
6070 		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6071 							&et, ref->ref_ci,
6072 							ref->ref_root_bh,
6073 							ref->dealloc, p);
6074 		if (ret) {
6075 			mlog_errno(ret);
6076 			break;
6077 		}
6078 	}
6079 
6080 	return ret;
6081 
6082 }
6083 
6084 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6085 				     struct buffer_head *root_bh,
6086 				     u64 blkno, u32 cpos, u32 len, void *para)
6087 {
6088 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6089 					   ocfs2_xattr_bucket_value_refcount,
6090 					   para);
6091 }
6092 
6093 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6094 				struct buffer_head *blk_bh,
6095 				struct ocfs2_caching_info *ref_ci,
6096 				struct buffer_head *ref_root_bh,
6097 				struct ocfs2_cached_dealloc_ctxt *dealloc)
6098 {
6099 	int ret = 0;
6100 	struct ocfs2_xattr_block *xb =
6101 				(struct ocfs2_xattr_block *)blk_bh->b_data;
6102 
6103 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6104 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6105 		struct ocfs2_xattr_value_buf vb = {
6106 			.vb_bh = blk_bh,
6107 			.vb_access = ocfs2_journal_access_xb,
6108 		};
6109 
6110 		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6111 							 ref_ci, ref_root_bh,
6112 							 dealloc);
6113 	} else {
6114 		struct ocfs2_xattr_tree_value_refcount_para para = {
6115 			.ref_ci = ref_ci,
6116 			.ref_root_bh = ref_root_bh,
6117 			.dealloc = dealloc,
6118 		};
6119 
6120 		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6121 						ocfs2_refcount_xattr_tree_rec,
6122 						&para);
6123 	}
6124 
6125 	return ret;
6126 }
6127 
6128 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6129 				     struct buffer_head *fe_bh,
6130 				     struct ocfs2_caching_info *ref_ci,
6131 				     struct buffer_head *ref_root_bh,
6132 				     struct ocfs2_cached_dealloc_ctxt *dealloc)
6133 {
6134 	int ret = 0;
6135 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
6136 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6137 	struct buffer_head *blk_bh = NULL;
6138 
6139 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6140 		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6141 							 ref_ci, ref_root_bh,
6142 							 dealloc);
6143 		if (ret) {
6144 			mlog_errno(ret);
6145 			goto out;
6146 		}
6147 	}
6148 
6149 	if (!di->i_xattr_loc)
6150 		goto out;
6151 
6152 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6153 				     &blk_bh);
6154 	if (ret < 0) {
6155 		mlog_errno(ret);
6156 		goto out;
6157 	}
6158 
6159 	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6160 						ref_root_bh, dealloc);
6161 	if (ret)
6162 		mlog_errno(ret);
6163 
6164 	brelse(blk_bh);
6165 out:
6166 
6167 	return ret;
6168 }
6169 
6170 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6171 /*
6172  * Store the information we need in xattr reflink.
6173  * old_bh and new_bh are inode bh for the old and new inode.
6174  */
6175 struct ocfs2_xattr_reflink {
6176 	struct inode *old_inode;
6177 	struct inode *new_inode;
6178 	struct buffer_head *old_bh;
6179 	struct buffer_head *new_bh;
6180 	struct ocfs2_caching_info *ref_ci;
6181 	struct buffer_head *ref_root_bh;
6182 	struct ocfs2_cached_dealloc_ctxt *dealloc;
6183 	should_xattr_reflinked *xattr_reflinked;
6184 };
6185 
6186 /*
6187  * Given a xattr header and xe offset,
6188  * return the proper xv and the corresponding bh.
6189  * xattr in inode, block and xattr tree have different implementaions.
6190  */
6191 typedef int (get_xattr_value_root)(struct super_block *sb,
6192 				   struct buffer_head *bh,
6193 				   struct ocfs2_xattr_header *xh,
6194 				   int offset,
6195 				   struct ocfs2_xattr_value_root **xv,
6196 				   struct buffer_head **ret_bh,
6197 				   void *para);
6198 
6199 /*
6200  * Calculate all the xattr value root metadata stored in this xattr header and
6201  * credits we need if we create them from the scratch.
6202  * We use get_xattr_value_root so that all types of xattr container can use it.
6203  */
6204 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6205 					     struct buffer_head *bh,
6206 					     struct ocfs2_xattr_header *xh,
6207 					     int *metas, int *credits,
6208 					     int *num_recs,
6209 					     get_xattr_value_root *func,
6210 					     void *para)
6211 {
6212 	int i, ret = 0;
6213 	struct ocfs2_xattr_value_root *xv;
6214 	struct ocfs2_xattr_entry *xe;
6215 
6216 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6217 		xe = &xh->xh_entries[i];
6218 		if (ocfs2_xattr_is_local(xe))
6219 			continue;
6220 
6221 		ret = func(sb, bh, xh, i, &xv, NULL, para);
6222 		if (ret) {
6223 			mlog_errno(ret);
6224 			break;
6225 		}
6226 
6227 		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6228 			  le16_to_cpu(xv->xr_list.l_next_free_rec);
6229 
6230 		*credits += ocfs2_calc_extend_credits(sb,
6231 						&def_xv.xv.xr_list);
6232 
6233 		/*
6234 		 * If the value is a tree with depth > 1, We don't go deep
6235 		 * to the extent block, so just calculate a maximum record num.
6236 		 */
6237 		if (!xv->xr_list.l_tree_depth)
6238 			*num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6239 		else
6240 			*num_recs += ocfs2_clusters_for_bytes(sb,
6241 							      XATTR_SIZE_MAX);
6242 	}
6243 
6244 	return ret;
6245 }
6246 
6247 /* Used by xattr inode and block to return the right xv and buffer_head. */
6248 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6249 				      struct buffer_head *bh,
6250 				      struct ocfs2_xattr_header *xh,
6251 				      int offset,
6252 				      struct ocfs2_xattr_value_root **xv,
6253 				      struct buffer_head **ret_bh,
6254 				      void *para)
6255 {
6256 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6257 
6258 	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6259 		le16_to_cpu(xe->xe_name_offset) +
6260 		OCFS2_XATTR_SIZE(xe->xe_name_len));
6261 
6262 	if (ret_bh)
6263 		*ret_bh = bh;
6264 
6265 	return 0;
6266 }
6267 
6268 /*
6269  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6270  * It is only used for inline xattr and xattr block.
6271  */
6272 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6273 					struct ocfs2_xattr_header *xh,
6274 					struct buffer_head *ref_root_bh,
6275 					int *credits,
6276 					struct ocfs2_alloc_context **meta_ac)
6277 {
6278 	int ret, meta_add = 0, num_recs = 0;
6279 	struct ocfs2_refcount_block *rb =
6280 			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
6281 
6282 	*credits = 0;
6283 
6284 	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6285 						&meta_add, credits, &num_recs,
6286 						ocfs2_get_xattr_value_root,
6287 						NULL);
6288 	if (ret) {
6289 		mlog_errno(ret);
6290 		goto out;
6291 	}
6292 
6293 	/*
6294 	 * We need to add/modify num_recs in refcount tree, so just calculate
6295 	 * an approximate number we need for refcount tree change.
6296 	 * Sometimes we need to split the tree, and after split,  half recs
6297 	 * will be moved to the new block, and a new block can only provide
6298 	 * half number of recs. So we multiple new blocks by 2.
6299 	 */
6300 	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6301 	meta_add += num_recs;
6302 	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6303 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6304 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6305 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6306 	else
6307 		*credits += 1;
6308 
6309 	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6310 	if (ret)
6311 		mlog_errno(ret);
6312 
6313 out:
6314 	return ret;
6315 }
6316 
6317 /*
6318  * Given a xattr header, reflink all the xattrs in this container.
6319  * It can be used for inode, block and bucket.
6320  *
6321  * NOTE:
6322  * Before we call this function, the caller has memcpy the xattr in
6323  * old_xh to the new_xh.
6324  *
6325  * If args.xattr_reflinked is set, call it to decide whether the xe should
6326  * be reflinked or not. If not, remove it from the new xattr header.
6327  */
6328 static int ocfs2_reflink_xattr_header(handle_t *handle,
6329 				      struct ocfs2_xattr_reflink *args,
6330 				      struct buffer_head *old_bh,
6331 				      struct ocfs2_xattr_header *xh,
6332 				      struct buffer_head *new_bh,
6333 				      struct ocfs2_xattr_header *new_xh,
6334 				      struct ocfs2_xattr_value_buf *vb,
6335 				      struct ocfs2_alloc_context *meta_ac,
6336 				      get_xattr_value_root *func,
6337 				      void *para)
6338 {
6339 	int ret = 0, i, j;
6340 	struct super_block *sb = args->old_inode->i_sb;
6341 	struct buffer_head *value_bh;
6342 	struct ocfs2_xattr_entry *xe, *last;
6343 	struct ocfs2_xattr_value_root *xv, *new_xv;
6344 	struct ocfs2_extent_tree data_et;
6345 	u32 clusters, cpos, p_cluster, num_clusters;
6346 	unsigned int ext_flags = 0;
6347 
6348 	trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6349 					 le16_to_cpu(xh->xh_count));
6350 
6351 	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6352 	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6353 		xe = &xh->xh_entries[i];
6354 
6355 		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6356 			xe = &new_xh->xh_entries[j];
6357 
6358 			le16_add_cpu(&new_xh->xh_count, -1);
6359 			if (new_xh->xh_count) {
6360 				memmove(xe, xe + 1,
6361 					(void *)last - (void *)xe);
6362 				memset(last, 0,
6363 				       sizeof(struct ocfs2_xattr_entry));
6364 			}
6365 
6366 			/*
6367 			 * We don't want j to increase in the next round since
6368 			 * it is already moved ahead.
6369 			 */
6370 			j--;
6371 			continue;
6372 		}
6373 
6374 		if (ocfs2_xattr_is_local(xe))
6375 			continue;
6376 
6377 		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6378 		if (ret) {
6379 			mlog_errno(ret);
6380 			break;
6381 		}
6382 
6383 		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6384 		if (ret) {
6385 			mlog_errno(ret);
6386 			break;
6387 		}
6388 
6389 		/*
6390 		 * For the xattr which has l_tree_depth = 0, all the extent
6391 		 * recs have already be copied to the new xh with the
6392 		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6393 		 * increase the refount count int the refcount tree.
6394 		 *
6395 		 * For the xattr which has l_tree_depth > 0, we need
6396 		 * to initialize it to the empty default value root,
6397 		 * and then insert the extents one by one.
6398 		 */
6399 		if (xv->xr_list.l_tree_depth) {
6400 			memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE);
6401 			vb->vb_xv = new_xv;
6402 			vb->vb_bh = value_bh;
6403 			ocfs2_init_xattr_value_extent_tree(&data_et,
6404 					INODE_CACHE(args->new_inode), vb);
6405 		}
6406 
6407 		clusters = le32_to_cpu(xv->xr_clusters);
6408 		cpos = 0;
6409 		while (cpos < clusters) {
6410 			ret = ocfs2_xattr_get_clusters(args->old_inode,
6411 						       cpos,
6412 						       &p_cluster,
6413 						       &num_clusters,
6414 						       &xv->xr_list,
6415 						       &ext_flags);
6416 			if (ret) {
6417 				mlog_errno(ret);
6418 				goto out;
6419 			}
6420 
6421 			BUG_ON(!p_cluster);
6422 
6423 			if (xv->xr_list.l_tree_depth) {
6424 				ret = ocfs2_insert_extent(handle,
6425 						&data_et, cpos,
6426 						ocfs2_clusters_to_blocks(
6427 							args->old_inode->i_sb,
6428 							p_cluster),
6429 						num_clusters, ext_flags,
6430 						meta_ac);
6431 				if (ret) {
6432 					mlog_errno(ret);
6433 					goto out;
6434 				}
6435 			}
6436 
6437 			ret = ocfs2_increase_refcount(handle, args->ref_ci,
6438 						      args->ref_root_bh,
6439 						      p_cluster, num_clusters,
6440 						      meta_ac, args->dealloc);
6441 			if (ret) {
6442 				mlog_errno(ret);
6443 				goto out;
6444 			}
6445 
6446 			cpos += num_clusters;
6447 		}
6448 	}
6449 
6450 out:
6451 	return ret;
6452 }
6453 
6454 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6455 {
6456 	int ret = 0, credits = 0;
6457 	handle_t *handle;
6458 	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6459 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6460 	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6461 	int header_off = osb->sb->s_blocksize - inline_size;
6462 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6463 					(args->old_bh->b_data + header_off);
6464 	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6465 					(args->new_bh->b_data + header_off);
6466 	struct ocfs2_alloc_context *meta_ac = NULL;
6467 	struct ocfs2_inode_info *new_oi;
6468 	struct ocfs2_dinode *new_di;
6469 	struct ocfs2_xattr_value_buf vb = {
6470 		.vb_bh = args->new_bh,
6471 		.vb_access = ocfs2_journal_access_di,
6472 	};
6473 
6474 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6475 						  &credits, &meta_ac);
6476 	if (ret) {
6477 		mlog_errno(ret);
6478 		goto out;
6479 	}
6480 
6481 	handle = ocfs2_start_trans(osb, credits);
6482 	if (IS_ERR(handle)) {
6483 		ret = PTR_ERR(handle);
6484 		mlog_errno(ret);
6485 		goto out;
6486 	}
6487 
6488 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6489 				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6490 	if (ret) {
6491 		mlog_errno(ret);
6492 		goto out_commit;
6493 	}
6494 
6495 	memcpy(args->new_bh->b_data + header_off,
6496 	       args->old_bh->b_data + header_off, inline_size);
6497 
6498 	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6499 	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6500 
6501 	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6502 					 args->new_bh, new_xh, &vb, meta_ac,
6503 					 ocfs2_get_xattr_value_root, NULL);
6504 	if (ret) {
6505 		mlog_errno(ret);
6506 		goto out_commit;
6507 	}
6508 
6509 	new_oi = OCFS2_I(args->new_inode);
6510 	/*
6511 	 * Adjust extent record count to reserve space for extended attribute.
6512 	 * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
6513 	 */
6514 	if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
6515 	    !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
6516 		struct ocfs2_extent_list *el = &new_di->id2.i_list;
6517 		le16_add_cpu(&el->l_count, -(inline_size /
6518 					sizeof(struct ocfs2_extent_rec)));
6519 	}
6520 	spin_lock(&new_oi->ip_lock);
6521 	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6522 	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6523 	spin_unlock(&new_oi->ip_lock);
6524 
6525 	ocfs2_journal_dirty(handle, args->new_bh);
6526 
6527 out_commit:
6528 	ocfs2_commit_trans(osb, handle);
6529 
6530 out:
6531 	if (meta_ac)
6532 		ocfs2_free_alloc_context(meta_ac);
6533 	return ret;
6534 }
6535 
6536 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6537 					  struct buffer_head *fe_bh,
6538 					  struct buffer_head **ret_bh,
6539 					  int indexed)
6540 {
6541 	int ret;
6542 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6543 	struct ocfs2_xattr_set_ctxt ctxt;
6544 
6545 	memset(&ctxt, 0, sizeof(ctxt));
6546 	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6547 	if (ret < 0) {
6548 		mlog_errno(ret);
6549 		return ret;
6550 	}
6551 
6552 	ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6553 	if (IS_ERR(ctxt.handle)) {
6554 		ret = PTR_ERR(ctxt.handle);
6555 		mlog_errno(ret);
6556 		goto out;
6557 	}
6558 
6559 	trace_ocfs2_create_empty_xattr_block(
6560 				(unsigned long long)fe_bh->b_blocknr, indexed);
6561 	ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6562 				       ret_bh);
6563 	if (ret)
6564 		mlog_errno(ret);
6565 
6566 	ocfs2_commit_trans(osb, ctxt.handle);
6567 out:
6568 	ocfs2_free_alloc_context(ctxt.meta_ac);
6569 	return ret;
6570 }
6571 
6572 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6573 				     struct buffer_head *blk_bh,
6574 				     struct buffer_head *new_blk_bh)
6575 {
6576 	int ret = 0, credits = 0;
6577 	handle_t *handle;
6578 	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6579 	struct ocfs2_dinode *new_di;
6580 	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6581 	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6582 	struct ocfs2_xattr_block *xb =
6583 			(struct ocfs2_xattr_block *)blk_bh->b_data;
6584 	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6585 	struct ocfs2_xattr_block *new_xb =
6586 			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
6587 	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6588 	struct ocfs2_alloc_context *meta_ac;
6589 	struct ocfs2_xattr_value_buf vb = {
6590 		.vb_bh = new_blk_bh,
6591 		.vb_access = ocfs2_journal_access_xb,
6592 	};
6593 
6594 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6595 						  &credits, &meta_ac);
6596 	if (ret) {
6597 		mlog_errno(ret);
6598 		return ret;
6599 	}
6600 
6601 	/* One more credits in case we need to add xattr flags in new inode. */
6602 	handle = ocfs2_start_trans(osb, credits + 1);
6603 	if (IS_ERR(handle)) {
6604 		ret = PTR_ERR(handle);
6605 		mlog_errno(ret);
6606 		goto out;
6607 	}
6608 
6609 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6610 		ret = ocfs2_journal_access_di(handle,
6611 					      INODE_CACHE(args->new_inode),
6612 					      args->new_bh,
6613 					      OCFS2_JOURNAL_ACCESS_WRITE);
6614 		if (ret) {
6615 			mlog_errno(ret);
6616 			goto out_commit;
6617 		}
6618 	}
6619 
6620 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6621 				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6622 	if (ret) {
6623 		mlog_errno(ret);
6624 		goto out_commit;
6625 	}
6626 
6627 	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6628 	       osb->sb->s_blocksize - header_off);
6629 
6630 	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6631 					 new_blk_bh, new_xh, &vb, meta_ac,
6632 					 ocfs2_get_xattr_value_root, NULL);
6633 	if (ret) {
6634 		mlog_errno(ret);
6635 		goto out_commit;
6636 	}
6637 
6638 	ocfs2_journal_dirty(handle, new_blk_bh);
6639 
6640 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6641 		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6642 		spin_lock(&new_oi->ip_lock);
6643 		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6644 		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6645 		spin_unlock(&new_oi->ip_lock);
6646 
6647 		ocfs2_journal_dirty(handle, args->new_bh);
6648 	}
6649 
6650 out_commit:
6651 	ocfs2_commit_trans(osb, handle);
6652 
6653 out:
6654 	ocfs2_free_alloc_context(meta_ac);
6655 	return ret;
6656 }
6657 
6658 struct ocfs2_reflink_xattr_tree_args {
6659 	struct ocfs2_xattr_reflink *reflink;
6660 	struct buffer_head *old_blk_bh;
6661 	struct buffer_head *new_blk_bh;
6662 	struct ocfs2_xattr_bucket *old_bucket;
6663 	struct ocfs2_xattr_bucket *new_bucket;
6664 };
6665 
6666 /*
6667  * NOTE:
6668  * We have to handle the case that both old bucket and new bucket
6669  * will call this function to get the right ret_bh.
6670  * So The caller must give us the right bh.
6671  */
6672 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6673 					struct buffer_head *bh,
6674 					struct ocfs2_xattr_header *xh,
6675 					int offset,
6676 					struct ocfs2_xattr_value_root **xv,
6677 					struct buffer_head **ret_bh,
6678 					void *para)
6679 {
6680 	struct ocfs2_reflink_xattr_tree_args *args =
6681 			(struct ocfs2_reflink_xattr_tree_args *)para;
6682 	struct ocfs2_xattr_bucket *bucket;
6683 
6684 	if (bh == args->old_bucket->bu_bhs[0])
6685 		bucket = args->old_bucket;
6686 	else
6687 		bucket = args->new_bucket;
6688 
6689 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6690 					       xv, ret_bh);
6691 }
6692 
6693 struct ocfs2_value_tree_metas {
6694 	int num_metas;
6695 	int credits;
6696 	int num_recs;
6697 };
6698 
6699 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6700 					struct buffer_head *bh,
6701 					struct ocfs2_xattr_header *xh,
6702 					int offset,
6703 					struct ocfs2_xattr_value_root **xv,
6704 					struct buffer_head **ret_bh,
6705 					void *para)
6706 {
6707 	struct ocfs2_xattr_bucket *bucket =
6708 				(struct ocfs2_xattr_bucket *)para;
6709 
6710 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6711 					       xv, ret_bh);
6712 }
6713 
6714 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6715 				      struct ocfs2_xattr_bucket *bucket,
6716 				      void *para)
6717 {
6718 	struct ocfs2_value_tree_metas *metas =
6719 			(struct ocfs2_value_tree_metas *)para;
6720 	struct ocfs2_xattr_header *xh =
6721 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6722 
6723 	/* Add the credits for this bucket first. */
6724 	metas->credits += bucket->bu_blocks;
6725 	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6726 					xh, &metas->num_metas,
6727 					&metas->credits, &metas->num_recs,
6728 					ocfs2_value_tree_metas_in_bucket,
6729 					bucket);
6730 }
6731 
6732 /*
6733  * Given a xattr extent rec starting from blkno and having len clusters,
6734  * iterate all the buckets calculate how much metadata we need for reflinking
6735  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6736  */
6737 static int ocfs2_lock_reflink_xattr_rec_allocators(
6738 				struct ocfs2_reflink_xattr_tree_args *args,
6739 				struct ocfs2_extent_tree *xt_et,
6740 				u64 blkno, u32 len, int *credits,
6741 				struct ocfs2_alloc_context **meta_ac,
6742 				struct ocfs2_alloc_context **data_ac)
6743 {
6744 	int ret, num_free_extents;
6745 	struct ocfs2_value_tree_metas metas;
6746 	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6747 	struct ocfs2_refcount_block *rb;
6748 
6749 	memset(&metas, 0, sizeof(metas));
6750 
6751 	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6752 					  ocfs2_calc_value_tree_metas, &metas);
6753 	if (ret) {
6754 		mlog_errno(ret);
6755 		goto out;
6756 	}
6757 
6758 	*credits = metas.credits;
6759 
6760 	/*
6761 	 * Calculate we need for refcount tree change.
6762 	 *
6763 	 * We need to add/modify num_recs in refcount tree, so just calculate
6764 	 * an approximate number we need for refcount tree change.
6765 	 * Sometimes we need to split the tree, and after split,  half recs
6766 	 * will be moved to the new block, and a new block can only provide
6767 	 * half number of recs. So we multiple new blocks by 2.
6768 	 * In the end, we have to add credits for modifying the already
6769 	 * existed refcount block.
6770 	 */
6771 	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6772 	metas.num_recs =
6773 		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6774 		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6775 	metas.num_metas += metas.num_recs;
6776 	*credits += metas.num_recs +
6777 		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6778 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6779 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6780 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6781 	else
6782 		*credits += 1;
6783 
6784 	/* count in the xattr tree change. */
6785 	num_free_extents = ocfs2_num_free_extents(xt_et);
6786 	if (num_free_extents < 0) {
6787 		ret = num_free_extents;
6788 		mlog_errno(ret);
6789 		goto out;
6790 	}
6791 
6792 	if (num_free_extents < len)
6793 		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6794 
6795 	*credits += ocfs2_calc_extend_credits(osb->sb,
6796 					      xt_et->et_root_el);
6797 
6798 	if (metas.num_metas) {
6799 		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6800 							meta_ac);
6801 		if (ret) {
6802 			mlog_errno(ret);
6803 			goto out;
6804 		}
6805 	}
6806 
6807 	if (len) {
6808 		ret = ocfs2_reserve_clusters(osb, len, data_ac);
6809 		if (ret)
6810 			mlog_errno(ret);
6811 	}
6812 out:
6813 	if (ret) {
6814 		if (*meta_ac) {
6815 			ocfs2_free_alloc_context(*meta_ac);
6816 			*meta_ac = NULL;
6817 		}
6818 	}
6819 
6820 	return ret;
6821 }
6822 
6823 static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6824 				u64 blkno, u64 new_blkno, u32 clusters,
6825 				u32 *cpos, int num_buckets,
6826 				struct ocfs2_alloc_context *meta_ac,
6827 				struct ocfs2_alloc_context *data_ac,
6828 				struct ocfs2_reflink_xattr_tree_args *args)
6829 {
6830 	int i, j, ret = 0;
6831 	struct super_block *sb = args->reflink->old_inode->i_sb;
6832 	int bpb = args->old_bucket->bu_blocks;
6833 	struct ocfs2_xattr_value_buf vb = {
6834 		.vb_access = ocfs2_journal_access,
6835 	};
6836 
6837 	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6838 		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6839 		if (ret) {
6840 			mlog_errno(ret);
6841 			break;
6842 		}
6843 
6844 		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
6845 		if (ret) {
6846 			mlog_errno(ret);
6847 			break;
6848 		}
6849 
6850 		ret = ocfs2_xattr_bucket_journal_access(handle,
6851 						args->new_bucket,
6852 						OCFS2_JOURNAL_ACCESS_CREATE);
6853 		if (ret) {
6854 			mlog_errno(ret);
6855 			break;
6856 		}
6857 
6858 		for (j = 0; j < bpb; j++)
6859 			memcpy(bucket_block(args->new_bucket, j),
6860 			       bucket_block(args->old_bucket, j),
6861 			       sb->s_blocksize);
6862 
6863 		/*
6864 		 * Record the start cpos so that we can use it to initialize
6865 		 * our xattr tree we also set the xh_num_bucket for the new
6866 		 * bucket.
6867 		 */
6868 		if (i == 0) {
6869 			*cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6870 					    xh_entries[0].xe_name_hash);
6871 			bucket_xh(args->new_bucket)->xh_num_buckets =
6872 				cpu_to_le16(num_buckets);
6873 		}
6874 
6875 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6876 
6877 		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6878 					args->old_bucket->bu_bhs[0],
6879 					bucket_xh(args->old_bucket),
6880 					args->new_bucket->bu_bhs[0],
6881 					bucket_xh(args->new_bucket),
6882 					&vb, meta_ac,
6883 					ocfs2_get_reflink_xattr_value_root,
6884 					args);
6885 		if (ret) {
6886 			mlog_errno(ret);
6887 			break;
6888 		}
6889 
6890 		/*
6891 		 * Re-access and dirty the bucket to calculate metaecc.
6892 		 * Because we may extend the transaction in reflink_xattr_header
6893 		 * which will let the already accessed block gone.
6894 		 */
6895 		ret = ocfs2_xattr_bucket_journal_access(handle,
6896 						args->new_bucket,
6897 						OCFS2_JOURNAL_ACCESS_WRITE);
6898 		if (ret) {
6899 			mlog_errno(ret);
6900 			break;
6901 		}
6902 
6903 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6904 
6905 		ocfs2_xattr_bucket_relse(args->old_bucket);
6906 		ocfs2_xattr_bucket_relse(args->new_bucket);
6907 	}
6908 
6909 	ocfs2_xattr_bucket_relse(args->old_bucket);
6910 	ocfs2_xattr_bucket_relse(args->new_bucket);
6911 	return ret;
6912 }
6913 
6914 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6915 				struct inode *inode,
6916 				struct ocfs2_reflink_xattr_tree_args *args,
6917 				struct ocfs2_extent_tree *et,
6918 				struct ocfs2_alloc_context *meta_ac,
6919 				struct ocfs2_alloc_context *data_ac,
6920 				u64 blkno, u32 cpos, u32 len)
6921 {
6922 	int ret, first_inserted = 0;
6923 	u32 p_cluster, num_clusters, reflink_cpos = 0;
6924 	u64 new_blkno;
6925 	unsigned int num_buckets, reflink_buckets;
6926 	unsigned int bpc =
6927 		ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6928 
6929 	ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6930 	if (ret) {
6931 		mlog_errno(ret);
6932 		goto out;
6933 	}
6934 	num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6935 	ocfs2_xattr_bucket_relse(args->old_bucket);
6936 
6937 	while (len && num_buckets) {
6938 		ret = ocfs2_claim_clusters(handle, data_ac,
6939 					   1, &p_cluster, &num_clusters);
6940 		if (ret) {
6941 			mlog_errno(ret);
6942 			goto out;
6943 		}
6944 
6945 		new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6946 		reflink_buckets = min(num_buckets, bpc * num_clusters);
6947 
6948 		ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6949 						 new_blkno, num_clusters,
6950 						 &reflink_cpos, reflink_buckets,
6951 						 meta_ac, data_ac, args);
6952 		if (ret) {
6953 			mlog_errno(ret);
6954 			goto out;
6955 		}
6956 
6957 		/*
6958 		 * For the 1st allocated cluster, we make it use the same cpos
6959 		 * so that the xattr tree looks the same as the original one
6960 		 * in the most case.
6961 		 */
6962 		if (!first_inserted) {
6963 			reflink_cpos = cpos;
6964 			first_inserted = 1;
6965 		}
6966 		ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6967 					  num_clusters, 0, meta_ac);
6968 		if (ret)
6969 			mlog_errno(ret);
6970 
6971 		trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
6972 						  num_clusters, reflink_cpos);
6973 
6974 		len -= num_clusters;
6975 		blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6976 		num_buckets -= reflink_buckets;
6977 	}
6978 out:
6979 	return ret;
6980 }
6981 
6982 /*
6983  * Create the same xattr extent record in the new inode's xattr tree.
6984  */
6985 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6986 				   struct buffer_head *root_bh,
6987 				   u64 blkno,
6988 				   u32 cpos,
6989 				   u32 len,
6990 				   void *para)
6991 {
6992 	int ret, credits = 0;
6993 	handle_t *handle;
6994 	struct ocfs2_reflink_xattr_tree_args *args =
6995 			(struct ocfs2_reflink_xattr_tree_args *)para;
6996 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6997 	struct ocfs2_alloc_context *meta_ac = NULL;
6998 	struct ocfs2_alloc_context *data_ac = NULL;
6999 	struct ocfs2_extent_tree et;
7000 
7001 	trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
7002 
7003 	ocfs2_init_xattr_tree_extent_tree(&et,
7004 					  INODE_CACHE(args->reflink->new_inode),
7005 					  args->new_blk_bh);
7006 
7007 	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7008 						      len, &credits,
7009 						      &meta_ac, &data_ac);
7010 	if (ret) {
7011 		mlog_errno(ret);
7012 		goto out;
7013 	}
7014 
7015 	handle = ocfs2_start_trans(osb, credits);
7016 	if (IS_ERR(handle)) {
7017 		ret = PTR_ERR(handle);
7018 		mlog_errno(ret);
7019 		goto out;
7020 	}
7021 
7022 	ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
7023 					  meta_ac, data_ac,
7024 					  blkno, cpos, len);
7025 	if (ret)
7026 		mlog_errno(ret);
7027 
7028 	ocfs2_commit_trans(osb, handle);
7029 
7030 out:
7031 	if (meta_ac)
7032 		ocfs2_free_alloc_context(meta_ac);
7033 	if (data_ac)
7034 		ocfs2_free_alloc_context(data_ac);
7035 	return ret;
7036 }
7037 
7038 /*
7039  * Create reflinked xattr buckets.
7040  * We will add bucket one by one, and refcount all the xattrs in the bucket
7041  * if they are stored outside.
7042  */
7043 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7044 				    struct buffer_head *blk_bh,
7045 				    struct buffer_head *new_blk_bh)
7046 {
7047 	int ret;
7048 	struct ocfs2_reflink_xattr_tree_args para;
7049 
7050 	memset(&para, 0, sizeof(para));
7051 	para.reflink = args;
7052 	para.old_blk_bh = blk_bh;
7053 	para.new_blk_bh = new_blk_bh;
7054 
7055 	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7056 	if (!para.old_bucket) {
7057 		mlog_errno(-ENOMEM);
7058 		return -ENOMEM;
7059 	}
7060 
7061 	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7062 	if (!para.new_bucket) {
7063 		ret = -ENOMEM;
7064 		mlog_errno(ret);
7065 		goto out;
7066 	}
7067 
7068 	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7069 					      ocfs2_reflink_xattr_rec,
7070 					      &para);
7071 	if (ret)
7072 		mlog_errno(ret);
7073 
7074 out:
7075 	ocfs2_xattr_bucket_free(para.old_bucket);
7076 	ocfs2_xattr_bucket_free(para.new_bucket);
7077 	return ret;
7078 }
7079 
7080 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7081 					struct buffer_head *blk_bh)
7082 {
7083 	int ret, indexed = 0;
7084 	struct buffer_head *new_blk_bh = NULL;
7085 	struct ocfs2_xattr_block *xb =
7086 			(struct ocfs2_xattr_block *)blk_bh->b_data;
7087 
7088 
7089 	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7090 		indexed = 1;
7091 
7092 	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7093 					     &new_blk_bh, indexed);
7094 	if (ret) {
7095 		mlog_errno(ret);
7096 		goto out;
7097 	}
7098 
7099 	if (!indexed)
7100 		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7101 	else
7102 		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7103 	if (ret)
7104 		mlog_errno(ret);
7105 
7106 out:
7107 	brelse(new_blk_bh);
7108 	return ret;
7109 }
7110 
7111 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7112 {
7113 	int type = ocfs2_xattr_get_type(xe);
7114 
7115 	return type != OCFS2_XATTR_INDEX_SECURITY &&
7116 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7117 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7118 }
7119 
7120 int ocfs2_reflink_xattrs(struct inode *old_inode,
7121 			 struct buffer_head *old_bh,
7122 			 struct inode *new_inode,
7123 			 struct buffer_head *new_bh,
7124 			 bool preserve_security)
7125 {
7126 	int ret;
7127 	struct ocfs2_xattr_reflink args;
7128 	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7129 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7130 	struct buffer_head *blk_bh = NULL;
7131 	struct ocfs2_cached_dealloc_ctxt dealloc;
7132 	struct ocfs2_refcount_tree *ref_tree;
7133 	struct buffer_head *ref_root_bh = NULL;
7134 
7135 	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7136 				       le64_to_cpu(di->i_refcount_loc),
7137 				       1, &ref_tree, &ref_root_bh);
7138 	if (ret) {
7139 		mlog_errno(ret);
7140 		goto out;
7141 	}
7142 
7143 	ocfs2_init_dealloc_ctxt(&dealloc);
7144 
7145 	args.old_inode = old_inode;
7146 	args.new_inode = new_inode;
7147 	args.old_bh = old_bh;
7148 	args.new_bh = new_bh;
7149 	args.ref_ci = &ref_tree->rf_ci;
7150 	args.ref_root_bh = ref_root_bh;
7151 	args.dealloc = &dealloc;
7152 	if (preserve_security)
7153 		args.xattr_reflinked = NULL;
7154 	else
7155 		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7156 
7157 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7158 		ret = ocfs2_reflink_xattr_inline(&args);
7159 		if (ret) {
7160 			mlog_errno(ret);
7161 			goto out_unlock;
7162 		}
7163 	}
7164 
7165 	if (!di->i_xattr_loc)
7166 		goto out_unlock;
7167 
7168 	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7169 				     &blk_bh);
7170 	if (ret < 0) {
7171 		mlog_errno(ret);
7172 		goto out_unlock;
7173 	}
7174 
7175 	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7176 	if (ret)
7177 		mlog_errno(ret);
7178 
7179 	brelse(blk_bh);
7180 
7181 out_unlock:
7182 	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7183 				   ref_tree, 1);
7184 	brelse(ref_root_bh);
7185 
7186 	if (ocfs2_dealloc_has_cluster(&dealloc)) {
7187 		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7188 		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7189 	}
7190 
7191 out:
7192 	return ret;
7193 }
7194 
7195 /*
7196  * Initialize security and acl for a already created inode.
7197  * Used for reflink a non-preserve-security file.
7198  *
7199  * It uses common api like ocfs2_xattr_set, so the caller
7200  * must not hold any lock expect i_mutex.
7201  */
7202 int ocfs2_init_security_and_acl(struct inode *dir,
7203 				struct inode *inode,
7204 				const struct qstr *qstr)
7205 {
7206 	int ret = 0;
7207 	struct buffer_head *dir_bh = NULL;
7208 
7209 	ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7210 	if (ret) {
7211 		mlog_errno(ret);
7212 		goto leave;
7213 	}
7214 
7215 	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7216 	if (ret) {
7217 		mlog_errno(ret);
7218 		goto leave;
7219 	}
7220 	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7221 	if (ret)
7222 		mlog_errno(ret);
7223 
7224 	ocfs2_inode_unlock(dir, 0);
7225 	brelse(dir_bh);
7226 leave:
7227 	return ret;
7228 }
7229 
7230 /*
7231  * 'security' attributes support
7232  */
7233 static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
7234 				    struct dentry *unused, struct inode *inode,
7235 				    const char *name, void *buffer, size_t size)
7236 {
7237 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY,
7238 			       name, buffer, size);
7239 }
7240 
7241 static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
7242 				    struct dentry *unused, struct inode *inode,
7243 				    const char *name, const void *value,
7244 				    size_t size, int flags)
7245 {
7246 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7247 			       name, value, size, flags);
7248 }
7249 
7250 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
7251 		     void *fs_info)
7252 {
7253 	const struct xattr *xattr;
7254 	int err = 0;
7255 
7256 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
7257 		err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7258 				      xattr->name, xattr->value,
7259 				      xattr->value_len, XATTR_CREATE);
7260 		if (err)
7261 			break;
7262 	}
7263 	return err;
7264 }
7265 
7266 int ocfs2_init_security_get(struct inode *inode,
7267 			    struct inode *dir,
7268 			    const struct qstr *qstr,
7269 			    struct ocfs2_security_xattr_info *si)
7270 {
7271 	/* check whether ocfs2 support feature xattr */
7272 	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7273 		return -EOPNOTSUPP;
7274 	if (si)
7275 		return security_old_inode_init_security(inode, dir, qstr,
7276 							&si->name, &si->value,
7277 							&si->value_len);
7278 
7279 	return security_inode_init_security(inode, dir, qstr,
7280 					    &ocfs2_initxattrs, NULL);
7281 }
7282 
7283 int ocfs2_init_security_set(handle_t *handle,
7284 			    struct inode *inode,
7285 			    struct buffer_head *di_bh,
7286 			    struct ocfs2_security_xattr_info *si,
7287 			    struct ocfs2_alloc_context *xattr_ac,
7288 			    struct ocfs2_alloc_context *data_ac)
7289 {
7290 	return ocfs2_xattr_set_handle(handle, inode, di_bh,
7291 				     OCFS2_XATTR_INDEX_SECURITY,
7292 				     si->name, si->value, si->value_len, 0,
7293 				     xattr_ac, data_ac);
7294 }
7295 
7296 const struct xattr_handler ocfs2_xattr_security_handler = {
7297 	.prefix	= XATTR_SECURITY_PREFIX,
7298 	.get	= ocfs2_xattr_security_get,
7299 	.set	= ocfs2_xattr_security_set,
7300 };
7301 
7302 /*
7303  * 'trusted' attributes support
7304  */
7305 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
7306 				   struct dentry *unused, struct inode *inode,
7307 				   const char *name, void *buffer, size_t size)
7308 {
7309 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED,
7310 			       name, buffer, size);
7311 }
7312 
7313 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
7314 				   struct dentry *unused, struct inode *inode,
7315 				   const char *name, const void *value,
7316 				   size_t size, int flags)
7317 {
7318 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED,
7319 			       name, value, size, flags);
7320 }
7321 
7322 const struct xattr_handler ocfs2_xattr_trusted_handler = {
7323 	.prefix	= XATTR_TRUSTED_PREFIX,
7324 	.get	= ocfs2_xattr_trusted_get,
7325 	.set	= ocfs2_xattr_trusted_set,
7326 };
7327 
7328 /*
7329  * 'user' attributes support
7330  */
7331 static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
7332 				struct dentry *unused, struct inode *inode,
7333 				const char *name, void *buffer, size_t size)
7334 {
7335 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7336 
7337 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7338 		return -EOPNOTSUPP;
7339 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
7340 			       buffer, size);
7341 }
7342 
7343 static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
7344 				struct dentry *unused, struct inode *inode,
7345 				const char *name, const void *value,
7346 				size_t size, int flags)
7347 {
7348 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7349 
7350 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7351 		return -EOPNOTSUPP;
7352 
7353 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER,
7354 			       name, value, size, flags);
7355 }
7356 
7357 const struct xattr_handler ocfs2_xattr_user_handler = {
7358 	.prefix	= XATTR_USER_PREFIX,
7359 	.get	= ocfs2_xattr_user_get,
7360 	.set	= ocfs2_xattr_user_set,
7361 };
7362