xref: /linux/fs/ocfs2/xattr.c (revision 9d9c1cfec01cdbf24bd9322ed555713a20422115)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * xattr.c
4  *
5  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
6  *
7  * CREDITS:
8  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
9  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10  */
11 
12 #include <linux/capability.h>
13 #include <linux/fs.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
16 #include <linux/highmem.h>
17 #include <linux/pagemap.h>
18 #include <linux/uio.h>
19 #include <linux/sched.h>
20 #include <linux/splice.h>
21 #include <linux/mount.h>
22 #include <linux/writeback.h>
23 #include <linux/falloc.h>
24 #include <linux/sort.h>
25 #include <linux/init.h>
26 #include <linux/module.h>
27 #include <linux/string.h>
28 #include <linux/security.h>
29 
30 #include <cluster/masklog.h>
31 
32 #include "ocfs2.h"
33 #include "alloc.h"
34 #include "blockcheck.h"
35 #include "dlmglue.h"
36 #include "file.h"
37 #include "symlink.h"
38 #include "sysfile.h"
39 #include "inode.h"
40 #include "journal.h"
41 #include "ocfs2_fs.h"
42 #include "suballoc.h"
43 #include "uptodate.h"
44 #include "buffer_head_io.h"
45 #include "super.h"
46 #include "xattr.h"
47 #include "refcounttree.h"
48 #include "acl.h"
49 #include "ocfs2_trace.h"
50 
51 struct ocfs2_xattr_def_value_root {
52 	/* Must be last as it ends in a flexible-array member. */
53 	TRAILING_OVERLAP(struct ocfs2_xattr_value_root, xv, xr_list.l_recs,
54 		struct ocfs2_extent_rec		er;
55 	);
56 };
57 static_assert(offsetof(struct ocfs2_xattr_def_value_root, xv.xr_list.l_recs) ==
58 	      offsetof(struct ocfs2_xattr_def_value_root, er));
59 
60 struct ocfs2_xattr_bucket {
61 	/* The inode these xattrs are associated with */
62 	struct inode *bu_inode;
63 
64 	/* The actual buffers that make up the bucket */
65 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
66 
67 	/* How many blocks make up one bucket for this filesystem */
68 	int bu_blocks;
69 };
70 
71 struct ocfs2_xattr_set_ctxt {
72 	handle_t *handle;
73 	struct ocfs2_alloc_context *meta_ac;
74 	struct ocfs2_alloc_context *data_ac;
75 	struct ocfs2_cached_dealloc_ctxt dealloc;
76 	int set_abort;
77 };
78 
79 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
80 #define OCFS2_XATTR_INLINE_SIZE	80
81 #define OCFS2_XATTR_HEADER_GAP	4
82 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
83 					 - sizeof(struct ocfs2_xattr_header) \
84 					 - OCFS2_XATTR_HEADER_GAP)
85 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
86 					 - sizeof(struct ocfs2_xattr_block) \
87 					 - sizeof(struct ocfs2_xattr_header) \
88 					 - OCFS2_XATTR_HEADER_GAP)
89 
90 static struct ocfs2_xattr_def_value_root def_xv = {
91 	.xv.xr_list.l_count = cpu_to_le16(1),
92 };
93 
94 const struct xattr_handler * const ocfs2_xattr_handlers[] = {
95 	&ocfs2_xattr_user_handler,
96 	&ocfs2_xattr_trusted_handler,
97 	&ocfs2_xattr_security_handler,
98 	NULL
99 };
100 
101 static const struct xattr_handler * const ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
102 	[OCFS2_XATTR_INDEX_USER]		= &ocfs2_xattr_user_handler,
103 	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]	= &nop_posix_acl_access,
104 	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]	= &nop_posix_acl_default,
105 	[OCFS2_XATTR_INDEX_TRUSTED]		= &ocfs2_xattr_trusted_handler,
106 	[OCFS2_XATTR_INDEX_SECURITY]		= &ocfs2_xattr_security_handler,
107 };
108 
109 struct ocfs2_xattr_info {
110 	int		xi_name_index;
111 	const char	*xi_name;
112 	int		xi_name_len;
113 	const void	*xi_value;
114 	size_t		xi_value_len;
115 };
116 
117 struct ocfs2_xattr_search {
118 	struct buffer_head *inode_bh;
119 	/*
120 	 * xattr_bh point to the block buffer head which has extended attribute
121 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
122 	 */
123 	struct buffer_head *xattr_bh;
124 	struct ocfs2_xattr_header *header;
125 	struct ocfs2_xattr_bucket *bucket;
126 	void *base;
127 	void *end;
128 	struct ocfs2_xattr_entry *here;
129 	int not_found;
130 };
131 
132 /* Operations on struct ocfs2_xa_entry */
133 struct ocfs2_xa_loc;
134 struct ocfs2_xa_loc_operations {
135 	/*
136 	 * Journal functions
137 	 */
138 	int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
139 				  int type);
140 	void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
141 
142 	/*
143 	 * Return a pointer to the appropriate buffer in loc->xl_storage
144 	 * at the given offset from loc->xl_header.
145 	 */
146 	void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
147 
148 	/* Can we reuse the existing entry for the new value? */
149 	int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
150 			     struct ocfs2_xattr_info *xi);
151 
152 	/* How much space is needed for the new value? */
153 	int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
154 			       struct ocfs2_xattr_info *xi);
155 
156 	/*
157 	 * Return the offset of the first name+value pair.  This is
158 	 * the start of our downward-filling free space.
159 	 */
160 	int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
161 
162 	/*
163 	 * Remove the name+value at this location.  Do whatever is
164 	 * appropriate with the remaining name+value pairs.
165 	 */
166 	void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
167 
168 	/* Fill xl_entry with a new entry */
169 	void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
170 
171 	/* Add name+value storage to an entry */
172 	void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
173 
174 	/*
175 	 * Initialize the value buf's access and bh fields for this entry.
176 	 * ocfs2_xa_fill_value_buf() will handle the xv pointer.
177 	 */
178 	void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
179 				   struct ocfs2_xattr_value_buf *vb);
180 };
181 
182 /*
183  * Describes an xattr entry location.  This is a memory structure
184  * tracking the on-disk structure.
185  */
186 struct ocfs2_xa_loc {
187 	/* This xattr belongs to this inode */
188 	struct inode *xl_inode;
189 
190 	/* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
191 	struct ocfs2_xattr_header *xl_header;
192 
193 	/* Bytes from xl_header to the end of the storage */
194 	int xl_size;
195 
196 	/*
197 	 * The ocfs2_xattr_entry this location describes.  If this is
198 	 * NULL, this location describes the on-disk structure where it
199 	 * would have been.
200 	 */
201 	struct ocfs2_xattr_entry *xl_entry;
202 
203 	/*
204 	 * Internal housekeeping
205 	 */
206 
207 	/* Buffer(s) containing this entry */
208 	void *xl_storage;
209 
210 	/* Operations on the storage backing this location */
211 	const struct ocfs2_xa_loc_operations *xl_ops;
212 };
213 
214 /*
215  * Convenience functions to calculate how much space is needed for a
216  * given name+value pair
217  */
namevalue_size(int name_len,uint64_t value_len)218 static int namevalue_size(int name_len, uint64_t value_len)
219 {
220 	if (value_len > OCFS2_XATTR_INLINE_SIZE)
221 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
222 	else
223 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
224 }
225 
namevalue_size_xi(struct ocfs2_xattr_info * xi)226 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
227 {
228 	return namevalue_size(xi->xi_name_len, xi->xi_value_len);
229 }
230 
namevalue_size_xe(struct ocfs2_xattr_entry * xe)231 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
232 {
233 	u64 value_len = le64_to_cpu(xe->xe_value_size);
234 
235 	BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
236 	       ocfs2_xattr_is_local(xe));
237 	return namevalue_size(xe->xe_name_len, value_len);
238 }
239 
240 
241 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
242 					     struct ocfs2_xattr_header *xh,
243 					     int index,
244 					     int *block_off,
245 					     int *new_offset);
246 
247 static int ocfs2_xattr_block_find(struct inode *inode,
248 				  int name_index,
249 				  const char *name,
250 				  struct ocfs2_xattr_search *xs);
251 static int ocfs2_xattr_index_block_find(struct inode *inode,
252 					struct buffer_head *root_bh,
253 					int name_index,
254 					const char *name,
255 					struct ocfs2_xattr_search *xs);
256 
257 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
258 					struct buffer_head *blk_bh,
259 					char *buffer,
260 					size_t buffer_size);
261 
262 static int ocfs2_xattr_create_index_block(struct inode *inode,
263 					  struct ocfs2_xattr_search *xs,
264 					  struct ocfs2_xattr_set_ctxt *ctxt);
265 
266 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
267 					     struct ocfs2_xattr_info *xi,
268 					     struct ocfs2_xattr_search *xs,
269 					     struct ocfs2_xattr_set_ctxt *ctxt);
270 
271 typedef int (xattr_tree_rec_func)(struct inode *inode,
272 				  struct buffer_head *root_bh,
273 				  u64 blkno, u32 cpos, u32 len, void *para);
274 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
275 					   struct buffer_head *root_bh,
276 					   xattr_tree_rec_func *rec_func,
277 					   void *para);
278 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
279 					struct ocfs2_xattr_bucket *bucket,
280 					void *para);
281 static int ocfs2_rm_xattr_cluster(struct inode *inode,
282 				  struct buffer_head *root_bh,
283 				  u64 blkno,
284 				  u32 cpos,
285 				  u32 len,
286 				  void *para);
287 
288 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
289 				  u64 src_blk, u64 last_blk, u64 to_blk,
290 				  unsigned int start_bucket,
291 				  u32 *first_hash);
292 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
293 					struct ocfs2_dinode *di,
294 					struct ocfs2_xattr_info *xi,
295 					struct ocfs2_xattr_search *xis,
296 					struct ocfs2_xattr_search *xbs,
297 					struct ocfs2_refcount_tree **ref_tree,
298 					int *meta_need,
299 					int *credits);
300 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
301 					   struct ocfs2_xattr_bucket *bucket,
302 					   int offset,
303 					   struct ocfs2_xattr_value_root **xv,
304 					   struct buffer_head **bh);
305 
ocfs2_xattr_buckets_per_cluster(struct ocfs2_super * osb)306 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
307 {
308 	return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
309 }
310 
ocfs2_blocks_per_xattr_bucket(struct super_block * sb)311 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
312 {
313 	return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
314 }
315 
316 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
317 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
318 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
319 
ocfs2_xattr_bucket_new(struct inode * inode)320 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
321 {
322 	struct ocfs2_xattr_bucket *bucket;
323 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
324 
325 	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
326 
327 	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
328 	if (bucket) {
329 		bucket->bu_inode = inode;
330 		bucket->bu_blocks = blks;
331 	}
332 
333 	return bucket;
334 }
335 
ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket * bucket)336 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
337 {
338 	int i;
339 
340 	for (i = 0; i < bucket->bu_blocks; i++) {
341 		brelse(bucket->bu_bhs[i]);
342 		bucket->bu_bhs[i] = NULL;
343 	}
344 }
345 
ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket * bucket)346 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
347 {
348 	if (bucket) {
349 		ocfs2_xattr_bucket_relse(bucket);
350 		bucket->bu_inode = NULL;
351 		kfree(bucket);
352 	}
353 }
354 
355 /*
356  * A bucket that has never been written to disk doesn't need to be
357  * read.  We just need the buffer_heads.  Don't call this for
358  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
359  * them fully.
360  */
ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket * bucket,u64 xb_blkno,int new)361 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
362 				   u64 xb_blkno, int new)
363 {
364 	int i, rc = 0;
365 
366 	for (i = 0; i < bucket->bu_blocks; i++) {
367 		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
368 					      xb_blkno + i);
369 		if (!bucket->bu_bhs[i]) {
370 			rc = -ENOMEM;
371 			mlog_errno(rc);
372 			break;
373 		}
374 
375 		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
376 					   bucket->bu_bhs[i])) {
377 			if (new)
378 				ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
379 							      bucket->bu_bhs[i]);
380 			else {
381 				set_buffer_uptodate(bucket->bu_bhs[i]);
382 				ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
383 							  bucket->bu_bhs[i]);
384 			}
385 		}
386 	}
387 
388 	if (rc)
389 		ocfs2_xattr_bucket_relse(bucket);
390 	return rc;
391 }
392 
393 /* Read the xattr bucket at xb_blkno */
ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket * bucket,u64 xb_blkno)394 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
395 				   u64 xb_blkno)
396 {
397 	int rc;
398 
399 	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
400 			       bucket->bu_blocks, bucket->bu_bhs, 0,
401 			       NULL);
402 	if (!rc) {
403 		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
404 		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
405 						 bucket->bu_bhs,
406 						 bucket->bu_blocks,
407 						 &bucket_xh(bucket)->xh_check);
408 		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
409 		if (rc)
410 			mlog_errno(rc);
411 	}
412 
413 	if (rc)
414 		ocfs2_xattr_bucket_relse(bucket);
415 	return rc;
416 }
417 
ocfs2_xattr_bucket_journal_access(handle_t * handle,struct ocfs2_xattr_bucket * bucket,int type)418 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
419 					     struct ocfs2_xattr_bucket *bucket,
420 					     int type)
421 {
422 	int i, rc = 0;
423 
424 	for (i = 0; i < bucket->bu_blocks; i++) {
425 		rc = ocfs2_journal_access(handle,
426 					  INODE_CACHE(bucket->bu_inode),
427 					  bucket->bu_bhs[i], type);
428 		if (rc) {
429 			mlog_errno(rc);
430 			break;
431 		}
432 	}
433 
434 	return rc;
435 }
436 
ocfs2_xattr_bucket_journal_dirty(handle_t * handle,struct ocfs2_xattr_bucket * bucket)437 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
438 					     struct ocfs2_xattr_bucket *bucket)
439 {
440 	int i;
441 
442 	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
443 	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
444 				   bucket->bu_bhs, bucket->bu_blocks,
445 				   &bucket_xh(bucket)->xh_check);
446 	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
447 
448 	for (i = 0; i < bucket->bu_blocks; i++)
449 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
450 }
451 
ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket * dest,struct ocfs2_xattr_bucket * src)452 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
453 					 struct ocfs2_xattr_bucket *src)
454 {
455 	int i;
456 	int blocksize = src->bu_inode->i_sb->s_blocksize;
457 
458 	BUG_ON(dest->bu_blocks != src->bu_blocks);
459 	BUG_ON(dest->bu_inode != src->bu_inode);
460 
461 	for (i = 0; i < src->bu_blocks; i++) {
462 		memcpy(bucket_block(dest, i), bucket_block(src, i),
463 		       blocksize);
464 	}
465 }
466 
ocfs2_validate_xattr_block(struct super_block * sb,struct buffer_head * bh)467 static int ocfs2_validate_xattr_block(struct super_block *sb,
468 				      struct buffer_head *bh)
469 {
470 	int rc;
471 	struct ocfs2_xattr_block *xb =
472 		(struct ocfs2_xattr_block *)bh->b_data;
473 
474 	trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
475 
476 	BUG_ON(!buffer_uptodate(bh));
477 
478 	/*
479 	 * If the ecc fails, we return the error but otherwise
480 	 * leave the filesystem running.  We know any error is
481 	 * local to this block.
482 	 */
483 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
484 	if (rc)
485 		return rc;
486 
487 	/*
488 	 * Errors after here are fatal
489 	 */
490 
491 	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
492 		return ocfs2_error(sb,
493 				   "Extended attribute block #%llu has bad signature %.*s\n",
494 				   (unsigned long long)bh->b_blocknr, 7,
495 				   xb->xb_signature);
496 	}
497 
498 	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
499 		return ocfs2_error(sb,
500 				   "Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
501 				   (unsigned long long)bh->b_blocknr,
502 				   (unsigned long long)le64_to_cpu(xb->xb_blkno));
503 	}
504 
505 	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
506 		return ocfs2_error(sb,
507 				   "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
508 				   (unsigned long long)bh->b_blocknr,
509 				   le32_to_cpu(xb->xb_fs_generation));
510 	}
511 
512 	return 0;
513 }
514 
ocfs2_read_xattr_block(struct inode * inode,u64 xb_blkno,struct buffer_head ** bh)515 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
516 				  struct buffer_head **bh)
517 {
518 	int rc;
519 	struct buffer_head *tmp = *bh;
520 
521 	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
522 			      ocfs2_validate_xattr_block);
523 
524 	/* If ocfs2_read_block() got us a new bh, pass it up. */
525 	if (!rc && !*bh)
526 		*bh = tmp;
527 
528 	return rc;
529 }
530 
ocfs2_xattr_prefix(int name_index)531 static inline const char *ocfs2_xattr_prefix(int name_index)
532 {
533 	const struct xattr_handler *handler = NULL;
534 
535 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
536 		handler = ocfs2_xattr_handler_map[name_index];
537 	return handler ? xattr_prefix(handler) : NULL;
538 }
539 
ocfs2_xattr_name_hash(struct inode * inode,const char * name,int name_len)540 static u32 ocfs2_xattr_name_hash(struct inode *inode,
541 				 const char *name,
542 				 int name_len)
543 {
544 	/* Get hash value of uuid from super block */
545 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
546 	int i;
547 
548 	/* hash extended attribute name */
549 	for (i = 0; i < name_len; i++) {
550 		hash = (hash << OCFS2_HASH_SHIFT) ^
551 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
552 		       *name++;
553 	}
554 
555 	return hash;
556 }
557 
ocfs2_xattr_entry_real_size(int name_len,size_t value_len)558 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
559 {
560 	return namevalue_size(name_len, value_len) +
561 		sizeof(struct ocfs2_xattr_entry);
562 }
563 
ocfs2_xi_entry_usage(struct ocfs2_xattr_info * xi)564 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
565 {
566 	return namevalue_size_xi(xi) +
567 		sizeof(struct ocfs2_xattr_entry);
568 }
569 
ocfs2_xe_entry_usage(struct ocfs2_xattr_entry * xe)570 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
571 {
572 	return namevalue_size_xe(xe) +
573 		sizeof(struct ocfs2_xattr_entry);
574 }
575 
ocfs2_calc_security_init(struct inode * dir,struct ocfs2_security_xattr_info * si,int * want_clusters,int * xattr_credits,struct ocfs2_alloc_context ** xattr_ac)576 int ocfs2_calc_security_init(struct inode *dir,
577 			     struct ocfs2_security_xattr_info *si,
578 			     int *want_clusters,
579 			     int *xattr_credits,
580 			     struct ocfs2_alloc_context **xattr_ac)
581 {
582 	int ret = 0;
583 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
584 	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
585 						 si->value_len);
586 
587 	/*
588 	 * The max space of security xattr taken inline is
589 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
590 	 * So reserve one metadata block for it is ok.
591 	 */
592 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
593 	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
594 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
595 		if (ret) {
596 			mlog_errno(ret);
597 			return ret;
598 		}
599 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
600 	}
601 
602 	/* reserve clusters for xattr value which will be set in B tree*/
603 	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
604 		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
605 							    si->value_len);
606 
607 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
608 							   new_clusters);
609 		*want_clusters += new_clusters;
610 	}
611 	return ret;
612 }
613 
ocfs2_calc_xattr_init(struct inode * dir,struct buffer_head * dir_bh,umode_t mode,struct ocfs2_security_xattr_info * si,int * want_clusters,int * xattr_credits,int * want_meta)614 int ocfs2_calc_xattr_init(struct inode *dir,
615 			  struct buffer_head *dir_bh,
616 			  umode_t mode,
617 			  struct ocfs2_security_xattr_info *si,
618 			  int *want_clusters,
619 			  int *xattr_credits,
620 			  int *want_meta)
621 {
622 	int ret = 0;
623 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
624 	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
625 
626 	if (si->enable)
627 		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
628 						     si->value_len);
629 
630 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
631 		down_read(&OCFS2_I(dir)->ip_xattr_sem);
632 		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
633 					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
634 					"", NULL, 0);
635 		up_read(&OCFS2_I(dir)->ip_xattr_sem);
636 		if (acl_len > 0) {
637 			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
638 			if (S_ISDIR(mode))
639 				a_size <<= 1;
640 		} else if (acl_len != 0 && acl_len != -ENODATA) {
641 			ret = acl_len;
642 			mlog_errno(ret);
643 			return ret;
644 		}
645 	}
646 
647 	if (!(s_size + a_size))
648 		return ret;
649 
650 	/*
651 	 * The max space of security xattr taken inline is
652 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
653 	 * The max space of acl xattr taken inline is
654 	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
655 	 * when blocksize = 512, may reserve one more cluster for
656 	 * xattr bucket, otherwise reserve one metadata block
657 	 * for them is ok.
658 	 * If this is a new directory with inline data,
659 	 * we choose to reserve the entire inline area for
660 	 * directory contents and force an external xattr block.
661 	 */
662 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
663 	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
664 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
665 		*want_meta = *want_meta + 1;
666 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
667 	}
668 
669 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
670 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
671 		*want_clusters += 1;
672 		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
673 	}
674 
675 	/*
676 	 * reserve credits and clusters for xattrs which has large value
677 	 * and have to be set outside
678 	 */
679 	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
680 		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
681 							si->value_len);
682 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
683 							   new_clusters);
684 		*want_clusters += new_clusters;
685 	}
686 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
687 	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
688 		/* for directory, it has DEFAULT and ACCESS two types of acls */
689 		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
690 				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
691 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
692 							   new_clusters);
693 		*want_clusters += new_clusters;
694 	}
695 
696 	return ret;
697 }
698 
ocfs2_xattr_extend_allocation(struct inode * inode,u32 clusters_to_add,struct ocfs2_xattr_value_buf * vb,struct ocfs2_xattr_set_ctxt * ctxt)699 static int ocfs2_xattr_extend_allocation(struct inode *inode,
700 					 u32 clusters_to_add,
701 					 struct ocfs2_xattr_value_buf *vb,
702 					 struct ocfs2_xattr_set_ctxt *ctxt)
703 {
704 	int status = 0, credits;
705 	handle_t *handle = ctxt->handle;
706 	enum ocfs2_alloc_restarted why;
707 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
708 	struct ocfs2_extent_tree et;
709 
710 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
711 
712 	while (clusters_to_add) {
713 		trace_ocfs2_xattr_extend_allocation(clusters_to_add);
714 
715 		status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
716 				       OCFS2_JOURNAL_ACCESS_WRITE);
717 		if (status < 0) {
718 			mlog_errno(status);
719 			break;
720 		}
721 
722 		prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
723 		status = ocfs2_add_clusters_in_btree(handle,
724 						     &et,
725 						     &logical_start,
726 						     clusters_to_add,
727 						     0,
728 						     ctxt->data_ac,
729 						     ctxt->meta_ac,
730 						     &why);
731 		if ((status < 0) && (status != -EAGAIN)) {
732 			if (status != -ENOSPC)
733 				mlog_errno(status);
734 			break;
735 		}
736 
737 		ocfs2_journal_dirty(handle, vb->vb_bh);
738 
739 		clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
740 					 prev_clusters;
741 
742 		if (why != RESTART_NONE && clusters_to_add) {
743 			/*
744 			 * We can only fail in case the alloc file doesn't give
745 			 * up enough clusters.
746 			 */
747 			BUG_ON(why == RESTART_META);
748 
749 			credits = ocfs2_calc_extend_credits(inode->i_sb,
750 							    &vb->vb_xv->xr_list);
751 			status = ocfs2_extend_trans(handle, credits);
752 			if (status < 0) {
753 				status = -ENOMEM;
754 				mlog_errno(status);
755 				break;
756 			}
757 		}
758 	}
759 
760 	return status;
761 }
762 
__ocfs2_remove_xattr_range(struct inode * inode,struct ocfs2_xattr_value_buf * vb,u32 cpos,u32 phys_cpos,u32 len,unsigned int ext_flags,struct ocfs2_xattr_set_ctxt * ctxt)763 static int __ocfs2_remove_xattr_range(struct inode *inode,
764 				      struct ocfs2_xattr_value_buf *vb,
765 				      u32 cpos, u32 phys_cpos, u32 len,
766 				      unsigned int ext_flags,
767 				      struct ocfs2_xattr_set_ctxt *ctxt)
768 {
769 	int ret;
770 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
771 	handle_t *handle = ctxt->handle;
772 	struct ocfs2_extent_tree et;
773 
774 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
775 
776 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
777 			    OCFS2_JOURNAL_ACCESS_WRITE);
778 	if (ret) {
779 		mlog_errno(ret);
780 		goto out;
781 	}
782 
783 	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
784 				  &ctxt->dealloc);
785 	if (ret) {
786 		mlog_errno(ret);
787 		goto out;
788 	}
789 
790 	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
791 	ocfs2_journal_dirty(handle, vb->vb_bh);
792 
793 	if (ext_flags & OCFS2_EXT_REFCOUNTED)
794 		ret = ocfs2_decrease_refcount(inode, handle,
795 					ocfs2_blocks_to_clusters(inode->i_sb,
796 								 phys_blkno),
797 					len, ctxt->meta_ac, &ctxt->dealloc, 1);
798 	else
799 		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
800 						  phys_blkno, len);
801 	if (ret)
802 		mlog_errno(ret);
803 
804 out:
805 	return ret;
806 }
807 
ocfs2_xattr_shrink_size(struct inode * inode,u32 old_clusters,u32 new_clusters,struct ocfs2_xattr_value_buf * vb,struct ocfs2_xattr_set_ctxt * ctxt)808 static int ocfs2_xattr_shrink_size(struct inode *inode,
809 				   u32 old_clusters,
810 				   u32 new_clusters,
811 				   struct ocfs2_xattr_value_buf *vb,
812 				   struct ocfs2_xattr_set_ctxt *ctxt)
813 {
814 	int ret = 0;
815 	unsigned int ext_flags;
816 	u32 trunc_len, cpos, phys_cpos, alloc_size;
817 	u64 block;
818 
819 	if (old_clusters <= new_clusters)
820 		return 0;
821 
822 	cpos = new_clusters;
823 	trunc_len = old_clusters - new_clusters;
824 	while (trunc_len) {
825 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
826 					       &alloc_size,
827 					       &vb->vb_xv->xr_list, &ext_flags);
828 		if (ret) {
829 			mlog_errno(ret);
830 			goto out;
831 		}
832 
833 		if (alloc_size > trunc_len)
834 			alloc_size = trunc_len;
835 
836 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
837 						 phys_cpos, alloc_size,
838 						 ext_flags, ctxt);
839 		if (ret) {
840 			mlog_errno(ret);
841 			goto out;
842 		}
843 
844 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
845 		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
846 						       block, alloc_size);
847 		cpos += alloc_size;
848 		trunc_len -= alloc_size;
849 	}
850 
851 out:
852 	return ret;
853 }
854 
ocfs2_xattr_value_truncate(struct inode * inode,struct ocfs2_xattr_value_buf * vb,int len,struct ocfs2_xattr_set_ctxt * ctxt)855 static int ocfs2_xattr_value_truncate(struct inode *inode,
856 				      struct ocfs2_xattr_value_buf *vb,
857 				      int len,
858 				      struct ocfs2_xattr_set_ctxt *ctxt)
859 {
860 	int ret;
861 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
862 	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
863 
864 	if (new_clusters == old_clusters)
865 		return 0;
866 
867 	if (new_clusters > old_clusters)
868 		ret = ocfs2_xattr_extend_allocation(inode,
869 						    new_clusters - old_clusters,
870 						    vb, ctxt);
871 	else
872 		ret = ocfs2_xattr_shrink_size(inode,
873 					      old_clusters, new_clusters,
874 					      vb, ctxt);
875 
876 	return ret;
877 }
878 
ocfs2_xattr_list_entry(struct super_block * sb,char * buffer,size_t size,size_t * result,int type,const char * name,int name_len)879 static int ocfs2_xattr_list_entry(struct super_block *sb,
880 				  char *buffer, size_t size,
881 				  size_t *result, int type,
882 				  const char *name, int name_len)
883 {
884 	char *p = buffer + *result;
885 	const char *prefix;
886 	int prefix_len;
887 	int total_len;
888 
889 	switch(type) {
890 	case OCFS2_XATTR_INDEX_USER:
891 		if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
892 			return 0;
893 		break;
894 
895 	case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
896 	case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
897 		if (!(sb->s_flags & SB_POSIXACL))
898 			return 0;
899 		break;
900 
901 	case OCFS2_XATTR_INDEX_TRUSTED:
902 		if (!capable(CAP_SYS_ADMIN))
903 			return 0;
904 		break;
905 	}
906 
907 	prefix = ocfs2_xattr_prefix(type);
908 	if (!prefix)
909 		return 0;
910 	prefix_len = strlen(prefix);
911 	total_len = prefix_len + name_len + 1;
912 	*result += total_len;
913 
914 	/* we are just looking for how big our buffer needs to be */
915 	if (!size)
916 		return 0;
917 
918 	if (*result > size)
919 		return -ERANGE;
920 
921 	memcpy(p, prefix, prefix_len);
922 	memcpy(p + prefix_len, name, name_len);
923 	p[prefix_len + name_len] = '\0';
924 
925 	return 0;
926 }
927 
ocfs2_xattr_list_entries(struct inode * inode,struct ocfs2_xattr_header * header,char * buffer,size_t buffer_size)928 static int ocfs2_xattr_list_entries(struct inode *inode,
929 				    struct ocfs2_xattr_header *header,
930 				    char *buffer, size_t buffer_size)
931 {
932 	size_t result = 0;
933 	int i, type, ret;
934 	const char *name;
935 
936 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
937 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
938 		type = ocfs2_xattr_get_type(entry);
939 		name = (const char *)header +
940 			le16_to_cpu(entry->xe_name_offset);
941 
942 		ret = ocfs2_xattr_list_entry(inode->i_sb,
943 					     buffer, buffer_size,
944 					     &result, type, name,
945 					     entry->xe_name_len);
946 		if (ret)
947 			return ret;
948 	}
949 
950 	return result;
951 }
952 
ocfs2_has_inline_xattr_value_outside(struct inode * inode,struct ocfs2_dinode * di)953 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
954 					 struct ocfs2_dinode *di)
955 {
956 	struct ocfs2_xattr_header *xh;
957 	int i;
958 
959 	xh = (struct ocfs2_xattr_header *)
960 		 ((void *)di + inode->i_sb->s_blocksize -
961 		 le16_to_cpu(di->i_xattr_inline_size));
962 
963 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
964 		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
965 			return 1;
966 
967 	return 0;
968 }
969 
ocfs2_xattr_ibody_list(struct inode * inode,struct ocfs2_dinode * di,char * buffer,size_t buffer_size)970 static int ocfs2_xattr_ibody_list(struct inode *inode,
971 				  struct ocfs2_dinode *di,
972 				  char *buffer,
973 				  size_t buffer_size)
974 {
975 	struct ocfs2_xattr_header *header = NULL;
976 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
977 	int ret = 0;
978 	u16 xattr_count;
979 	size_t max_entries;
980 	u16 inline_size;
981 
982 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
983 		return ret;
984 
985 	inline_size = le16_to_cpu(di->i_xattr_inline_size);
986 
987 	/* Validate inline size is reasonable */
988 	if (inline_size > inode->i_sb->s_blocksize ||
989 	    inline_size < sizeof(struct ocfs2_xattr_header)) {
990 		ocfs2_error(inode->i_sb,
991 			    "Invalid xattr inline size %u in inode %llu\n",
992 			    inline_size,
993 			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
994 		return -EFSCORRUPTED;
995 	}
996 
997 	header = (struct ocfs2_xattr_header *)
998 		 ((void *)di + inode->i_sb->s_blocksize - inline_size);
999 
1000 	xattr_count = le16_to_cpu(header->xh_count);
1001 	max_entries = (inline_size - sizeof(struct ocfs2_xattr_header)) /
1002 		       sizeof(struct ocfs2_xattr_entry);
1003 
1004 	if (xattr_count > max_entries) {
1005 		ocfs2_error(inode->i_sb,
1006 			    "xattr entry count %u exceeds maximum %zu in inode %llu\n",
1007 			    xattr_count, max_entries,
1008 			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
1009 		return -EFSCORRUPTED;
1010 	}
1011 
1012 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
1013 
1014 	return ret;
1015 }
1016 
ocfs2_xattr_block_list(struct inode * inode,struct ocfs2_dinode * di,char * buffer,size_t buffer_size)1017 static int ocfs2_xattr_block_list(struct inode *inode,
1018 				  struct ocfs2_dinode *di,
1019 				  char *buffer,
1020 				  size_t buffer_size)
1021 {
1022 	struct buffer_head *blk_bh = NULL;
1023 	struct ocfs2_xattr_block *xb;
1024 	int ret = 0;
1025 
1026 	if (!di->i_xattr_loc)
1027 		return ret;
1028 
1029 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
1030 				     &blk_bh);
1031 	if (ret < 0) {
1032 		mlog_errno(ret);
1033 		return ret;
1034 	}
1035 
1036 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1037 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1038 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1039 		ret = ocfs2_xattr_list_entries(inode, header,
1040 					       buffer, buffer_size);
1041 	} else
1042 		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1043 						   buffer, buffer_size);
1044 
1045 	brelse(blk_bh);
1046 
1047 	return ret;
1048 }
1049 
ocfs2_listxattr(struct dentry * dentry,char * buffer,size_t size)1050 ssize_t ocfs2_listxattr(struct dentry *dentry,
1051 			char *buffer,
1052 			size_t size)
1053 {
1054 	int ret = 0, i_ret = 0, b_ret = 0;
1055 	struct buffer_head *di_bh = NULL;
1056 	struct ocfs2_dinode *di = NULL;
1057 	struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
1058 
1059 	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1060 		return -EOPNOTSUPP;
1061 
1062 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1063 		return ret;
1064 
1065 	ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
1066 	if (ret < 0) {
1067 		mlog_errno(ret);
1068 		return ret;
1069 	}
1070 
1071 	di = (struct ocfs2_dinode *)di_bh->b_data;
1072 
1073 	down_read(&oi->ip_xattr_sem);
1074 	i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
1075 	if (i_ret < 0)
1076 		b_ret = 0;
1077 	else {
1078 		if (buffer) {
1079 			buffer += i_ret;
1080 			size -= i_ret;
1081 		}
1082 		b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
1083 					       buffer, size);
1084 		if (b_ret < 0)
1085 			i_ret = 0;
1086 	}
1087 	up_read(&oi->ip_xattr_sem);
1088 	ocfs2_inode_unlock(d_inode(dentry), 0);
1089 
1090 	brelse(di_bh);
1091 
1092 	return i_ret + b_ret;
1093 }
1094 
ocfs2_xattr_find_entry(struct inode * inode,int name_index,const char * name,struct ocfs2_xattr_search * xs)1095 static int ocfs2_xattr_find_entry(struct inode *inode, int name_index,
1096 				  const char *name,
1097 				  struct ocfs2_xattr_search *xs)
1098 {
1099 	struct ocfs2_xattr_entry *entry;
1100 	size_t name_len;
1101 	int i, name_offset, cmp = 1;
1102 
1103 	if (name == NULL)
1104 		return -EINVAL;
1105 
1106 	name_len = strlen(name);
1107 	entry = xs->here;
1108 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1109 		if ((void *)entry >= xs->end) {
1110 			ocfs2_error(inode->i_sb, "corrupted xattr entries");
1111 			return -EFSCORRUPTED;
1112 		}
1113 		cmp = name_index - ocfs2_xattr_get_type(entry);
1114 		if (!cmp)
1115 			cmp = name_len - entry->xe_name_len;
1116 		if (!cmp) {
1117 			name_offset = le16_to_cpu(entry->xe_name_offset);
1118 			if ((xs->base + name_offset + name_len) > xs->end) {
1119 				ocfs2_error(inode->i_sb,
1120 					    "corrupted xattr entries");
1121 				return -EFSCORRUPTED;
1122 			}
1123 			cmp = memcmp(name, (xs->base + name_offset), name_len);
1124 		}
1125 		if (cmp == 0)
1126 			break;
1127 		entry += 1;
1128 	}
1129 	xs->here = entry;
1130 
1131 	return cmp ? -ENODATA : 0;
1132 }
1133 
ocfs2_xattr_get_value_outside(struct inode * inode,struct ocfs2_xattr_value_root * xv,void * buffer,size_t len)1134 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1135 					 struct ocfs2_xattr_value_root *xv,
1136 					 void *buffer,
1137 					 size_t len)
1138 {
1139 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
1140 	u64 blkno;
1141 	int i, ret = 0;
1142 	size_t cplen, blocksize;
1143 	struct buffer_head *bh = NULL;
1144 	struct ocfs2_extent_list *el;
1145 
1146 	el = &xv->xr_list;
1147 	clusters = le32_to_cpu(xv->xr_clusters);
1148 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1149 	blocksize = inode->i_sb->s_blocksize;
1150 
1151 	cpos = 0;
1152 	while (cpos < clusters) {
1153 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1154 					       &num_clusters, el, NULL);
1155 		if (ret) {
1156 			mlog_errno(ret);
1157 			goto out;
1158 		}
1159 
1160 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1161 		/* Copy ocfs2_xattr_value */
1162 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1163 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1164 					       &bh, NULL);
1165 			if (ret) {
1166 				mlog_errno(ret);
1167 				goto out;
1168 			}
1169 
1170 			cplen = len >= blocksize ? blocksize : len;
1171 			memcpy(buffer, bh->b_data, cplen);
1172 			len -= cplen;
1173 			buffer += cplen;
1174 
1175 			brelse(bh);
1176 			bh = NULL;
1177 			if (len == 0)
1178 				break;
1179 		}
1180 		cpos += num_clusters;
1181 	}
1182 out:
1183 	return ret;
1184 }
1185 
ocfs2_xattr_ibody_get(struct inode * inode,int name_index,const char * name,void * buffer,size_t buffer_size,struct ocfs2_xattr_search * xs)1186 static int ocfs2_xattr_ibody_get(struct inode *inode,
1187 				 int name_index,
1188 				 const char *name,
1189 				 void *buffer,
1190 				 size_t buffer_size,
1191 				 struct ocfs2_xattr_search *xs)
1192 {
1193 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1194 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1195 	struct ocfs2_xattr_value_root *xv;
1196 	size_t size;
1197 	int ret = 0;
1198 
1199 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1200 		return -ENODATA;
1201 
1202 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1203 	xs->header = (struct ocfs2_xattr_header *)
1204 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1205 	xs->base = (void *)xs->header;
1206 	xs->here = xs->header->xh_entries;
1207 
1208 	ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
1209 	if (ret)
1210 		return ret;
1211 	size = le64_to_cpu(xs->here->xe_value_size);
1212 	if (buffer) {
1213 		if (size > buffer_size)
1214 			return -ERANGE;
1215 		if (ocfs2_xattr_is_local(xs->here)) {
1216 			memcpy(buffer, (void *)xs->base +
1217 			       le16_to_cpu(xs->here->xe_name_offset) +
1218 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1219 		} else {
1220 			xv = (struct ocfs2_xattr_value_root *)
1221 				(xs->base + le16_to_cpu(
1222 				 xs->here->xe_name_offset) +
1223 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1224 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1225 							    buffer, size);
1226 			if (ret < 0) {
1227 				mlog_errno(ret);
1228 				return ret;
1229 			}
1230 		}
1231 	}
1232 
1233 	return size;
1234 }
1235 
ocfs2_xattr_block_get(struct inode * inode,int name_index,const char * name,void * buffer,size_t buffer_size,struct ocfs2_xattr_search * xs)1236 static int ocfs2_xattr_block_get(struct inode *inode,
1237 				 int name_index,
1238 				 const char *name,
1239 				 void *buffer,
1240 				 size_t buffer_size,
1241 				 struct ocfs2_xattr_search *xs)
1242 {
1243 	struct ocfs2_xattr_block *xb;
1244 	struct ocfs2_xattr_value_root *xv;
1245 	size_t size;
1246 	int ret = -ENODATA, name_offset, name_len, i;
1247 	int block_off;
1248 
1249 	xs->bucket = ocfs2_xattr_bucket_new(inode);
1250 	if (!xs->bucket) {
1251 		ret = -ENOMEM;
1252 		mlog_errno(ret);
1253 		goto cleanup;
1254 	}
1255 
1256 	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1257 	if (ret) {
1258 		mlog_errno(ret);
1259 		goto cleanup;
1260 	}
1261 
1262 	if (xs->not_found) {
1263 		ret = -ENODATA;
1264 		goto cleanup;
1265 	}
1266 
1267 	xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1268 	size = le64_to_cpu(xs->here->xe_value_size);
1269 	if (buffer) {
1270 		ret = -ERANGE;
1271 		if (size > buffer_size)
1272 			goto cleanup;
1273 
1274 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
1275 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1276 		i = xs->here - xs->header->xh_entries;
1277 
1278 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1279 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1280 								bucket_xh(xs->bucket),
1281 								i,
1282 								&block_off,
1283 								&name_offset);
1284 			if (ret) {
1285 				mlog_errno(ret);
1286 				goto cleanup;
1287 			}
1288 			xs->base = bucket_block(xs->bucket, block_off);
1289 		}
1290 		if (ocfs2_xattr_is_local(xs->here)) {
1291 			memcpy(buffer, (void *)xs->base +
1292 			       name_offset + name_len, size);
1293 		} else {
1294 			xv = (struct ocfs2_xattr_value_root *)
1295 				(xs->base + name_offset + name_len);
1296 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1297 							    buffer, size);
1298 			if (ret < 0) {
1299 				mlog_errno(ret);
1300 				goto cleanup;
1301 			}
1302 		}
1303 	}
1304 	ret = size;
1305 cleanup:
1306 	ocfs2_xattr_bucket_free(xs->bucket);
1307 
1308 	brelse(xs->xattr_bh);
1309 	xs->xattr_bh = NULL;
1310 	return ret;
1311 }
1312 
ocfs2_xattr_get_nolock(struct inode * inode,struct buffer_head * di_bh,int name_index,const char * name,void * buffer,size_t buffer_size)1313 int ocfs2_xattr_get_nolock(struct inode *inode,
1314 			   struct buffer_head *di_bh,
1315 			   int name_index,
1316 			   const char *name,
1317 			   void *buffer,
1318 			   size_t buffer_size)
1319 {
1320 	int ret;
1321 	struct ocfs2_dinode *di = NULL;
1322 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1323 	struct ocfs2_xattr_search xis = {
1324 		.not_found = -ENODATA,
1325 	};
1326 	struct ocfs2_xattr_search xbs = {
1327 		.not_found = -ENODATA,
1328 	};
1329 
1330 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1331 		return -EOPNOTSUPP;
1332 
1333 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1334 		return -ENODATA;
1335 
1336 	xis.inode_bh = xbs.inode_bh = di_bh;
1337 	di = (struct ocfs2_dinode *)di_bh->b_data;
1338 
1339 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1340 				    buffer_size, &xis);
1341 	if (ret == -ENODATA && di->i_xattr_loc)
1342 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1343 					    buffer_size, &xbs);
1344 
1345 	return ret;
1346 }
1347 
1348 /* ocfs2_xattr_get()
1349  *
1350  * Copy an extended attribute into the buffer provided.
1351  * Buffer is NULL to compute the size of buffer required.
1352  */
ocfs2_xattr_get(struct inode * inode,int name_index,const char * name,void * buffer,size_t buffer_size)1353 static int ocfs2_xattr_get(struct inode *inode,
1354 			   int name_index,
1355 			   const char *name,
1356 			   void *buffer,
1357 			   size_t buffer_size)
1358 {
1359 	int ret, had_lock;
1360 	struct buffer_head *di_bh = NULL;
1361 	struct ocfs2_lock_holder oh;
1362 
1363 	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
1364 	if (had_lock < 0) {
1365 		mlog_errno(had_lock);
1366 		return had_lock;
1367 	}
1368 	down_read(&OCFS2_I(inode)->ip_xattr_sem);
1369 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1370 				     name, buffer, buffer_size);
1371 	up_read(&OCFS2_I(inode)->ip_xattr_sem);
1372 
1373 	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
1374 
1375 	brelse(di_bh);
1376 
1377 	return ret;
1378 }
1379 
__ocfs2_xattr_set_value_outside(struct inode * inode,handle_t * handle,struct ocfs2_xattr_value_buf * vb,const void * value,int value_len)1380 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1381 					   handle_t *handle,
1382 					   struct ocfs2_xattr_value_buf *vb,
1383 					   const void *value,
1384 					   int value_len)
1385 {
1386 	int ret = 0, i, cp_len;
1387 	u16 blocksize = inode->i_sb->s_blocksize;
1388 	u32 p_cluster, num_clusters;
1389 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1390 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1391 	u64 blkno;
1392 	struct buffer_head *bh = NULL;
1393 	unsigned int ext_flags;
1394 	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1395 
1396 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1397 
1398 	while (cpos < clusters) {
1399 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1400 					       &num_clusters, &xv->xr_list,
1401 					       &ext_flags);
1402 		if (ret) {
1403 			mlog_errno(ret);
1404 			goto out;
1405 		}
1406 
1407 		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1408 
1409 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1410 
1411 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1412 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1413 					       &bh, NULL);
1414 			if (ret) {
1415 				mlog_errno(ret);
1416 				goto out;
1417 			}
1418 
1419 			ret = ocfs2_journal_access(handle,
1420 						   INODE_CACHE(inode),
1421 						   bh,
1422 						   OCFS2_JOURNAL_ACCESS_WRITE);
1423 			if (ret < 0) {
1424 				mlog_errno(ret);
1425 				goto out;
1426 			}
1427 
1428 			cp_len = value_len > blocksize ? blocksize : value_len;
1429 			memcpy(bh->b_data, value, cp_len);
1430 			value_len -= cp_len;
1431 			value += cp_len;
1432 			if (cp_len < blocksize)
1433 				memset(bh->b_data + cp_len, 0,
1434 				       blocksize - cp_len);
1435 
1436 			ocfs2_journal_dirty(handle, bh);
1437 			brelse(bh);
1438 			bh = NULL;
1439 
1440 			/*
1441 			 * XXX: do we need to empty all the following
1442 			 * blocks in this cluster?
1443 			 */
1444 			if (!value_len)
1445 				break;
1446 		}
1447 		cpos += num_clusters;
1448 	}
1449 out:
1450 	brelse(bh);
1451 
1452 	return ret;
1453 }
1454 
ocfs2_xa_check_space_helper(int needed_space,int free_start,int num_entries)1455 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1456 				       int num_entries)
1457 {
1458 	int free_space;
1459 
1460 	if (!needed_space)
1461 		return 0;
1462 
1463 	free_space = free_start -
1464 		sizeof(struct ocfs2_xattr_header) -
1465 		(num_entries * sizeof(struct ocfs2_xattr_entry)) -
1466 		OCFS2_XATTR_HEADER_GAP;
1467 	if (free_space < 0)
1468 		return -EIO;
1469 	if (free_space < needed_space)
1470 		return -ENOSPC;
1471 
1472 	return 0;
1473 }
1474 
ocfs2_xa_journal_access(handle_t * handle,struct ocfs2_xa_loc * loc,int type)1475 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1476 				   int type)
1477 {
1478 	return loc->xl_ops->xlo_journal_access(handle, loc, type);
1479 }
1480 
ocfs2_xa_journal_dirty(handle_t * handle,struct ocfs2_xa_loc * loc)1481 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1482 {
1483 	loc->xl_ops->xlo_journal_dirty(handle, loc);
1484 }
1485 
1486 /* Give a pointer into the storage for the given offset */
ocfs2_xa_offset_pointer(struct ocfs2_xa_loc * loc,int offset)1487 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1488 {
1489 	BUG_ON(offset >= loc->xl_size);
1490 	return loc->xl_ops->xlo_offset_pointer(loc, offset);
1491 }
1492 
1493 /*
1494  * Wipe the name+value pair and allow the storage to reclaim it.  This
1495  * must be followed by either removal of the entry or a call to
1496  * ocfs2_xa_add_namevalue().
1497  */
ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc * loc)1498 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1499 {
1500 	loc->xl_ops->xlo_wipe_namevalue(loc);
1501 }
1502 
1503 /*
1504  * Find lowest offset to a name+value pair.  This is the start of our
1505  * downward-growing free space.
1506  */
ocfs2_xa_get_free_start(struct ocfs2_xa_loc * loc)1507 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1508 {
1509 	return loc->xl_ops->xlo_get_free_start(loc);
1510 }
1511 
1512 /* Can we reuse loc->xl_entry for xi? */
ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi)1513 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1514 				    struct ocfs2_xattr_info *xi)
1515 {
1516 	return loc->xl_ops->xlo_can_reuse(loc, xi);
1517 }
1518 
1519 /* How much free space is needed to set the new value */
ocfs2_xa_check_space(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi)1520 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1521 				struct ocfs2_xattr_info *xi)
1522 {
1523 	return loc->xl_ops->xlo_check_space(loc, xi);
1524 }
1525 
ocfs2_xa_add_entry(struct ocfs2_xa_loc * loc,u32 name_hash)1526 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1527 {
1528 	loc->xl_ops->xlo_add_entry(loc, name_hash);
1529 	loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1530 	/*
1531 	 * We can't leave the new entry's xe_name_offset at zero or
1532 	 * add_namevalue() will go nuts.  We set it to the size of our
1533 	 * storage so that it can never be less than any other entry.
1534 	 */
1535 	loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1536 }
1537 
ocfs2_xa_add_namevalue(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi)1538 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1539 				   struct ocfs2_xattr_info *xi)
1540 {
1541 	int size = namevalue_size_xi(xi);
1542 	int nameval_offset;
1543 	char *nameval_buf;
1544 
1545 	loc->xl_ops->xlo_add_namevalue(loc, size);
1546 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1547 	loc->xl_entry->xe_name_len = xi->xi_name_len;
1548 	ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1549 	ocfs2_xattr_set_local(loc->xl_entry,
1550 			      xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1551 
1552 	nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1553 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1554 	memset(nameval_buf, 0, size);
1555 	memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1556 }
1557 
ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_value_buf * vb)1558 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1559 				    struct ocfs2_xattr_value_buf *vb)
1560 {
1561 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1562 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1563 
1564 	/* Value bufs are for value trees */
1565 	BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1566 	BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1567 	       (name_size + OCFS2_XATTR_ROOT_SIZE));
1568 
1569 	loc->xl_ops->xlo_fill_value_buf(loc, vb);
1570 	vb->vb_xv =
1571 		(struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1572 							nameval_offset +
1573 							name_size);
1574 }
1575 
ocfs2_xa_block_journal_access(handle_t * handle,struct ocfs2_xa_loc * loc,int type)1576 static int ocfs2_xa_block_journal_access(handle_t *handle,
1577 					 struct ocfs2_xa_loc *loc, int type)
1578 {
1579 	struct buffer_head *bh = loc->xl_storage;
1580 	ocfs2_journal_access_func access;
1581 
1582 	if (loc->xl_size == (bh->b_size -
1583 			     offsetof(struct ocfs2_xattr_block,
1584 				      xb_attrs.xb_header)))
1585 		access = ocfs2_journal_access_xb;
1586 	else
1587 		access = ocfs2_journal_access_di;
1588 	return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1589 }
1590 
ocfs2_xa_block_journal_dirty(handle_t * handle,struct ocfs2_xa_loc * loc)1591 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1592 					 struct ocfs2_xa_loc *loc)
1593 {
1594 	struct buffer_head *bh = loc->xl_storage;
1595 
1596 	ocfs2_journal_dirty(handle, bh);
1597 }
1598 
ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc * loc,int offset)1599 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1600 					   int offset)
1601 {
1602 	return (char *)loc->xl_header + offset;
1603 }
1604 
ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi)1605 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1606 				    struct ocfs2_xattr_info *xi)
1607 {
1608 	/*
1609 	 * Block storage is strict.  If the sizes aren't exact, we will
1610 	 * remove the old one and reinsert the new.
1611 	 */
1612 	return namevalue_size_xe(loc->xl_entry) ==
1613 		namevalue_size_xi(xi);
1614 }
1615 
ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc * loc)1616 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1617 {
1618 	struct ocfs2_xattr_header *xh = loc->xl_header;
1619 	int i, count = le16_to_cpu(xh->xh_count);
1620 	int offset, free_start = loc->xl_size;
1621 
1622 	for (i = 0; i < count; i++) {
1623 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1624 		if (offset < free_start)
1625 			free_start = offset;
1626 	}
1627 
1628 	return free_start;
1629 }
1630 
ocfs2_xa_block_check_space(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi)1631 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1632 				      struct ocfs2_xattr_info *xi)
1633 {
1634 	int count = le16_to_cpu(loc->xl_header->xh_count);
1635 	int free_start = ocfs2_xa_get_free_start(loc);
1636 	int needed_space = ocfs2_xi_entry_usage(xi);
1637 
1638 	/*
1639 	 * Block storage will reclaim the original entry before inserting
1640 	 * the new value, so we only need the difference.  If the new
1641 	 * entry is smaller than the old one, we don't need anything.
1642 	 */
1643 	if (loc->xl_entry) {
1644 		/* Don't need space if we're reusing! */
1645 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1646 			needed_space = 0;
1647 		else
1648 			needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1649 	}
1650 	if (needed_space < 0)
1651 		needed_space = 0;
1652 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1653 }
1654 
1655 /*
1656  * Block storage for xattrs keeps the name+value pairs compacted.  When
1657  * we remove one, we have to shift any that preceded it towards the end.
1658  */
ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc * loc)1659 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1660 {
1661 	int i, offset;
1662 	int namevalue_offset, first_namevalue_offset, namevalue_size;
1663 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1664 	struct ocfs2_xattr_header *xh = loc->xl_header;
1665 	int count = le16_to_cpu(xh->xh_count);
1666 
1667 	namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1668 	namevalue_size = namevalue_size_xe(entry);
1669 	first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1670 
1671 	/* Shift the name+value pairs */
1672 	memmove((char *)xh + first_namevalue_offset + namevalue_size,
1673 		(char *)xh + first_namevalue_offset,
1674 		namevalue_offset - first_namevalue_offset);
1675 	memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1676 
1677 	/* Now tell xh->xh_entries about it */
1678 	for (i = 0; i < count; i++) {
1679 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1680 		if (offset <= namevalue_offset)
1681 			le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1682 				     namevalue_size);
1683 	}
1684 
1685 	/*
1686 	 * Note that we don't update xh_free_start or xh_name_value_len
1687 	 * because they're not used in block-stored xattrs.
1688 	 */
1689 }
1690 
ocfs2_xa_block_add_entry(struct ocfs2_xa_loc * loc,u32 name_hash)1691 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1692 {
1693 	int count = le16_to_cpu(loc->xl_header->xh_count);
1694 	loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1695 	le16_add_cpu(&loc->xl_header->xh_count, 1);
1696 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1697 }
1698 
ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc * loc,int size)1699 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1700 {
1701 	int free_start = ocfs2_xa_get_free_start(loc);
1702 
1703 	loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1704 }
1705 
ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_value_buf * vb)1706 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1707 					  struct ocfs2_xattr_value_buf *vb)
1708 {
1709 	struct buffer_head *bh = loc->xl_storage;
1710 
1711 	if (loc->xl_size == (bh->b_size -
1712 			     offsetof(struct ocfs2_xattr_block,
1713 				      xb_attrs.xb_header)))
1714 		vb->vb_access = ocfs2_journal_access_xb;
1715 	else
1716 		vb->vb_access = ocfs2_journal_access_di;
1717 	vb->vb_bh = bh;
1718 }
1719 
1720 /*
1721  * Operations for xattrs stored in blocks.  This includes inline inode
1722  * storage and unindexed ocfs2_xattr_blocks.
1723  */
1724 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1725 	.xlo_journal_access	= ocfs2_xa_block_journal_access,
1726 	.xlo_journal_dirty	= ocfs2_xa_block_journal_dirty,
1727 	.xlo_offset_pointer	= ocfs2_xa_block_offset_pointer,
1728 	.xlo_check_space	= ocfs2_xa_block_check_space,
1729 	.xlo_can_reuse		= ocfs2_xa_block_can_reuse,
1730 	.xlo_get_free_start	= ocfs2_xa_block_get_free_start,
1731 	.xlo_wipe_namevalue	= ocfs2_xa_block_wipe_namevalue,
1732 	.xlo_add_entry		= ocfs2_xa_block_add_entry,
1733 	.xlo_add_namevalue	= ocfs2_xa_block_add_namevalue,
1734 	.xlo_fill_value_buf	= ocfs2_xa_block_fill_value_buf,
1735 };
1736 
ocfs2_xa_bucket_journal_access(handle_t * handle,struct ocfs2_xa_loc * loc,int type)1737 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1738 					  struct ocfs2_xa_loc *loc, int type)
1739 {
1740 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1741 
1742 	return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1743 }
1744 
ocfs2_xa_bucket_journal_dirty(handle_t * handle,struct ocfs2_xa_loc * loc)1745 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1746 					  struct ocfs2_xa_loc *loc)
1747 {
1748 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1749 
1750 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1751 }
1752 
ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc * loc,int offset)1753 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1754 					    int offset)
1755 {
1756 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1757 	int block, block_offset;
1758 
1759 	/* The header is at the front of the bucket */
1760 	block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1761 	block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1762 
1763 	return bucket_block(bucket, block) + block_offset;
1764 }
1765 
ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi)1766 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1767 				     struct ocfs2_xattr_info *xi)
1768 {
1769 	return namevalue_size_xe(loc->xl_entry) >=
1770 		namevalue_size_xi(xi);
1771 }
1772 
ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc * loc)1773 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1774 {
1775 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1776 	return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1777 }
1778 
ocfs2_bucket_align_free_start(struct super_block * sb,int free_start,int size)1779 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1780 					 int free_start, int size)
1781 {
1782 	/*
1783 	 * We need to make sure that the name+value pair fits within
1784 	 * one block.
1785 	 */
1786 	if (((free_start - size) >> sb->s_blocksize_bits) !=
1787 	    ((free_start - 1) >> sb->s_blocksize_bits))
1788 		free_start -= free_start % sb->s_blocksize;
1789 
1790 	return free_start;
1791 }
1792 
ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi)1793 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1794 				       struct ocfs2_xattr_info *xi)
1795 {
1796 	int rc;
1797 	int count = le16_to_cpu(loc->xl_header->xh_count);
1798 	int free_start = ocfs2_xa_get_free_start(loc);
1799 	int needed_space = ocfs2_xi_entry_usage(xi);
1800 	int size = namevalue_size_xi(xi);
1801 	struct super_block *sb = loc->xl_inode->i_sb;
1802 
1803 	/*
1804 	 * Bucket storage does not reclaim name+value pairs it cannot
1805 	 * reuse.  They live as holes until the bucket fills, and then
1806 	 * the bucket is defragmented.  However, the bucket can reclaim
1807 	 * the ocfs2_xattr_entry.
1808 	 */
1809 	if (loc->xl_entry) {
1810 		/* Don't need space if we're reusing! */
1811 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1812 			needed_space = 0;
1813 		else
1814 			needed_space -= sizeof(struct ocfs2_xattr_entry);
1815 	}
1816 	BUG_ON(needed_space < 0);
1817 
1818 	if (free_start < size) {
1819 		if (needed_space)
1820 			return -ENOSPC;
1821 	} else {
1822 		/*
1823 		 * First we check if it would fit in the first place.
1824 		 * Below, we align the free start to a block.  This may
1825 		 * slide us below the minimum gap.  By checking unaligned
1826 		 * first, we avoid that error.
1827 		 */
1828 		rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1829 						 count);
1830 		if (rc)
1831 			return rc;
1832 		free_start = ocfs2_bucket_align_free_start(sb, free_start,
1833 							   size);
1834 	}
1835 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1836 }
1837 
ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc * loc)1838 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1839 {
1840 	le16_add_cpu(&loc->xl_header->xh_name_value_len,
1841 		     -namevalue_size_xe(loc->xl_entry));
1842 }
1843 
ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc * loc,u32 name_hash)1844 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1845 {
1846 	struct ocfs2_xattr_header *xh = loc->xl_header;
1847 	int count = le16_to_cpu(xh->xh_count);
1848 	int low = 0, high = count - 1, tmp;
1849 	struct ocfs2_xattr_entry *tmp_xe;
1850 
1851 	/*
1852 	 * We keep buckets sorted by name_hash, so we need to find
1853 	 * our insert place.
1854 	 */
1855 	while (low <= high && count) {
1856 		tmp = (low + high) / 2;
1857 		tmp_xe = &xh->xh_entries[tmp];
1858 
1859 		if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1860 			low = tmp + 1;
1861 		else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1862 			high = tmp - 1;
1863 		else {
1864 			low = tmp;
1865 			break;
1866 		}
1867 	}
1868 
1869 	if (low != count)
1870 		memmove(&xh->xh_entries[low + 1],
1871 			&xh->xh_entries[low],
1872 			((count - low) * sizeof(struct ocfs2_xattr_entry)));
1873 
1874 	le16_add_cpu(&xh->xh_count, 1);
1875 	loc->xl_entry = &xh->xh_entries[low];
1876 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1877 }
1878 
ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc * loc,int size)1879 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1880 {
1881 	int free_start = ocfs2_xa_get_free_start(loc);
1882 	struct ocfs2_xattr_header *xh = loc->xl_header;
1883 	struct super_block *sb = loc->xl_inode->i_sb;
1884 	int nameval_offset;
1885 
1886 	free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1887 	nameval_offset = free_start - size;
1888 	loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1889 	xh->xh_free_start = cpu_to_le16(nameval_offset);
1890 	le16_add_cpu(&xh->xh_name_value_len, size);
1891 
1892 }
1893 
ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_value_buf * vb)1894 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1895 					   struct ocfs2_xattr_value_buf *vb)
1896 {
1897 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1898 	struct super_block *sb = loc->xl_inode->i_sb;
1899 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1900 	int size = namevalue_size_xe(loc->xl_entry);
1901 	int block_offset = nameval_offset >> sb->s_blocksize_bits;
1902 
1903 	/* Values are not allowed to straddle block boundaries */
1904 	BUG_ON(block_offset !=
1905 	       ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1906 	/* We expect the bucket to be filled in */
1907 	BUG_ON(!bucket->bu_bhs[block_offset]);
1908 
1909 	vb->vb_access = ocfs2_journal_access;
1910 	vb->vb_bh = bucket->bu_bhs[block_offset];
1911 }
1912 
1913 /* Operations for xattrs stored in buckets. */
1914 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1915 	.xlo_journal_access	= ocfs2_xa_bucket_journal_access,
1916 	.xlo_journal_dirty	= ocfs2_xa_bucket_journal_dirty,
1917 	.xlo_offset_pointer	= ocfs2_xa_bucket_offset_pointer,
1918 	.xlo_check_space	= ocfs2_xa_bucket_check_space,
1919 	.xlo_can_reuse		= ocfs2_xa_bucket_can_reuse,
1920 	.xlo_get_free_start	= ocfs2_xa_bucket_get_free_start,
1921 	.xlo_wipe_namevalue	= ocfs2_xa_bucket_wipe_namevalue,
1922 	.xlo_add_entry		= ocfs2_xa_bucket_add_entry,
1923 	.xlo_add_namevalue	= ocfs2_xa_bucket_add_namevalue,
1924 	.xlo_fill_value_buf	= ocfs2_xa_bucket_fill_value_buf,
1925 };
1926 
ocfs2_xa_value_clusters(struct ocfs2_xa_loc * loc)1927 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1928 {
1929 	struct ocfs2_xattr_value_buf vb;
1930 
1931 	if (ocfs2_xattr_is_local(loc->xl_entry))
1932 		return 0;
1933 
1934 	ocfs2_xa_fill_value_buf(loc, &vb);
1935 	return le32_to_cpu(vb.vb_xv->xr_clusters);
1936 }
1937 
ocfs2_xa_value_truncate(struct ocfs2_xa_loc * loc,u64 bytes,struct ocfs2_xattr_set_ctxt * ctxt)1938 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1939 				   struct ocfs2_xattr_set_ctxt *ctxt)
1940 {
1941 	int trunc_rc, access_rc;
1942 	struct ocfs2_xattr_value_buf vb;
1943 
1944 	ocfs2_xa_fill_value_buf(loc, &vb);
1945 	trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1946 					      ctxt);
1947 
1948 	/*
1949 	 * The caller of ocfs2_xa_value_truncate() has already called
1950 	 * ocfs2_xa_journal_access on the loc.  However, The truncate code
1951 	 * calls ocfs2_extend_trans().  This may commit the previous
1952 	 * transaction and open a new one.  If this is a bucket, truncate
1953 	 * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1954 	 * the caller is expecting to dirty the entire bucket.  So we must
1955 	 * reset the journal work.  We do this even if truncate has failed,
1956 	 * as it could have failed after committing the extend.
1957 	 */
1958 	access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1959 					    OCFS2_JOURNAL_ACCESS_WRITE);
1960 
1961 	/* Errors in truncate take precedence */
1962 	return trunc_rc ? trunc_rc : access_rc;
1963 }
1964 
ocfs2_xa_remove_entry(struct ocfs2_xa_loc * loc)1965 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1966 {
1967 	int index, count;
1968 	struct ocfs2_xattr_header *xh = loc->xl_header;
1969 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1970 
1971 	ocfs2_xa_wipe_namevalue(loc);
1972 	loc->xl_entry = NULL;
1973 
1974 	le16_add_cpu(&xh->xh_count, -1);
1975 	count = le16_to_cpu(xh->xh_count);
1976 
1977 	/*
1978 	 * Only zero out the entry if there are more remaining.  This is
1979 	 * important for an empty bucket, as it keeps track of the
1980 	 * bucket's hash value.  It doesn't hurt empty block storage.
1981 	 */
1982 	if (count) {
1983 		index = ((char *)entry - (char *)&xh->xh_entries) /
1984 			sizeof(struct ocfs2_xattr_entry);
1985 		memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1986 			(count - index) * sizeof(struct ocfs2_xattr_entry));
1987 		memset(&xh->xh_entries[count], 0,
1988 		       sizeof(struct ocfs2_xattr_entry));
1989 	}
1990 }
1991 
1992 /*
1993  * If we have a problem adjusting the size of an external value during
1994  * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1995  * in an intermediate state.  For example, the value may be partially
1996  * truncated.
1997  *
1998  * If the value tree hasn't changed, the extend/truncate went nowhere.
1999  * We have nothing to do.  The caller can treat it as a straight error.
2000  *
2001  * If the value tree got partially truncated, we now have a corrupted
2002  * extended attribute.  We're going to wipe its entry and leak the
2003  * clusters.  Better to leak some storage than leave a corrupt entry.
2004  *
2005  * If the value tree grew, it obviously didn't grow enough for the
2006  * new entry.  We're not going to try and reclaim those clusters either.
2007  * If there was already an external value there (orig_clusters != 0),
2008  * the new clusters are attached safely and we can just leave the old
2009  * value in place.  If there was no external value there, we remove
2010  * the entry.
2011  *
2012  * This way, the xattr block we store in the journal will be consistent.
2013  * If the size change broke because of the journal, no changes will hit
2014  * disk anyway.
2015  */
ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc * loc,const char * what,unsigned int orig_clusters)2016 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
2017 					    const char *what,
2018 					    unsigned int orig_clusters)
2019 {
2020 	unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
2021 	char *nameval_buf = ocfs2_xa_offset_pointer(loc,
2022 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2023 
2024 	if (new_clusters < orig_clusters) {
2025 		mlog(ML_ERROR,
2026 		     "Partial truncate while %s xattr %.*s.  Leaking "
2027 		     "%u clusters and removing the entry\n",
2028 		     what, loc->xl_entry->xe_name_len, nameval_buf,
2029 		     orig_clusters - new_clusters);
2030 		ocfs2_xa_remove_entry(loc);
2031 	} else if (!orig_clusters) {
2032 		mlog(ML_ERROR,
2033 		     "Unable to allocate an external value for xattr "
2034 		     "%.*s safely.  Leaking %u clusters and removing the "
2035 		     "entry\n",
2036 		     loc->xl_entry->xe_name_len, nameval_buf,
2037 		     new_clusters - orig_clusters);
2038 		ocfs2_xa_remove_entry(loc);
2039 	} else if (new_clusters > orig_clusters)
2040 		mlog(ML_ERROR,
2041 		     "Unable to grow xattr %.*s safely.  %u new clusters "
2042 		     "have been added, but the value will not be "
2043 		     "modified\n",
2044 		     loc->xl_entry->xe_name_len, nameval_buf,
2045 		     new_clusters - orig_clusters);
2046 }
2047 
ocfs2_xa_remove(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_set_ctxt * ctxt)2048 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
2049 			   struct ocfs2_xattr_set_ctxt *ctxt)
2050 {
2051 	int rc = 0;
2052 	unsigned int orig_clusters;
2053 
2054 	if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2055 		orig_clusters = ocfs2_xa_value_clusters(loc);
2056 		rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2057 		if (rc) {
2058 			mlog_errno(rc);
2059 			/*
2060 			 * Since this is remove, we can return 0 if
2061 			 * ocfs2_xa_cleanup_value_truncate() is going to
2062 			 * wipe the entry anyway.  So we check the
2063 			 * cluster count as well.
2064 			 */
2065 			if (orig_clusters != ocfs2_xa_value_clusters(loc))
2066 				rc = 0;
2067 			ocfs2_xa_cleanup_value_truncate(loc, "removing",
2068 							orig_clusters);
2069 			goto out;
2070 		}
2071 	}
2072 
2073 	ocfs2_xa_remove_entry(loc);
2074 
2075 out:
2076 	return rc;
2077 }
2078 
ocfs2_xa_install_value_root(struct ocfs2_xa_loc * loc)2079 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2080 {
2081 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2082 	char *nameval_buf;
2083 
2084 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2085 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2086 	memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2087 }
2088 
2089 /*
2090  * Take an existing entry and make it ready for the new value.  This
2091  * won't allocate space, but it may free space.  It should be ready for
2092  * ocfs2_xa_prepare_entry() to finish the work.
2093  */
ocfs2_xa_reuse_entry(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_set_ctxt * ctxt)2094 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2095 				struct ocfs2_xattr_info *xi,
2096 				struct ocfs2_xattr_set_ctxt *ctxt)
2097 {
2098 	int rc = 0;
2099 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2100 	unsigned int orig_clusters;
2101 	char *nameval_buf;
2102 	int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2103 	int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2104 
2105 	BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2106 	       name_size);
2107 
2108 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2109 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2110 	if (xe_local) {
2111 		memset(nameval_buf + name_size, 0,
2112 		       namevalue_size_xe(loc->xl_entry) - name_size);
2113 		if (!xi_local)
2114 			ocfs2_xa_install_value_root(loc);
2115 	} else {
2116 		orig_clusters = ocfs2_xa_value_clusters(loc);
2117 		if (xi_local) {
2118 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2119 			if (rc < 0)
2120 				mlog_errno(rc);
2121 			else
2122 				memset(nameval_buf + name_size, 0,
2123 				       namevalue_size_xe(loc->xl_entry) -
2124 				       name_size);
2125 		} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2126 			   xi->xi_value_len) {
2127 			rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2128 						     ctxt);
2129 			if (rc < 0)
2130 				mlog_errno(rc);
2131 		}
2132 
2133 		if (rc) {
2134 			ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2135 							orig_clusters);
2136 			goto out;
2137 		}
2138 	}
2139 
2140 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2141 	ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2142 
2143 out:
2144 	return rc;
2145 }
2146 
2147 /*
2148  * Prepares loc->xl_entry to receive the new xattr.  This includes
2149  * properly setting up the name+value pair region.  If loc->xl_entry
2150  * already exists, it will take care of modifying it appropriately.
2151  *
2152  * Note that this modifies the data.  You did journal_access already,
2153  * right?
2154  */
ocfs2_xa_prepare_entry(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi,u32 name_hash,struct ocfs2_xattr_set_ctxt * ctxt)2155 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2156 				  struct ocfs2_xattr_info *xi,
2157 				  u32 name_hash,
2158 				  struct ocfs2_xattr_set_ctxt *ctxt)
2159 {
2160 	int rc = 0;
2161 	unsigned int orig_clusters;
2162 	__le64 orig_value_size = 0;
2163 
2164 	rc = ocfs2_xa_check_space(loc, xi);
2165 	if (rc)
2166 		goto out;
2167 
2168 	if (loc->xl_entry) {
2169 		if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2170 			orig_value_size = loc->xl_entry->xe_value_size;
2171 			rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2172 			if (rc)
2173 				goto out;
2174 			goto alloc_value;
2175 		}
2176 
2177 		if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2178 			orig_clusters = ocfs2_xa_value_clusters(loc);
2179 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2180 			if (rc) {
2181 				mlog_errno(rc);
2182 				ocfs2_xa_cleanup_value_truncate(loc,
2183 								"overwriting",
2184 								orig_clusters);
2185 				goto out;
2186 			}
2187 		}
2188 		ocfs2_xa_wipe_namevalue(loc);
2189 	} else
2190 		ocfs2_xa_add_entry(loc, name_hash);
2191 
2192 	/*
2193 	 * If we get here, we have a blank entry.  Fill it.  We grow our
2194 	 * name+value pair back from the end.
2195 	 */
2196 	ocfs2_xa_add_namevalue(loc, xi);
2197 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2198 		ocfs2_xa_install_value_root(loc);
2199 
2200 alloc_value:
2201 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2202 		orig_clusters = ocfs2_xa_value_clusters(loc);
2203 		rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2204 		if (rc < 0) {
2205 			ctxt->set_abort = 1;
2206 			ocfs2_xa_cleanup_value_truncate(loc, "growing",
2207 							orig_clusters);
2208 			/*
2209 			 * If we were growing an existing value,
2210 			 * ocfs2_xa_cleanup_value_truncate() won't remove
2211 			 * the entry. We need to restore the original value
2212 			 * size.
2213 			 */
2214 			if (loc->xl_entry) {
2215 				BUG_ON(!orig_value_size);
2216 				loc->xl_entry->xe_value_size = orig_value_size;
2217 			}
2218 			mlog_errno(rc);
2219 		}
2220 	}
2221 
2222 out:
2223 	return rc;
2224 }
2225 
2226 /*
2227  * Store the value portion of the name+value pair.  This will skip
2228  * values that are stored externally.  Their tree roots were set up
2229  * by ocfs2_xa_prepare_entry().
2230  */
ocfs2_xa_store_value(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_set_ctxt * ctxt)2231 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2232 				struct ocfs2_xattr_info *xi,
2233 				struct ocfs2_xattr_set_ctxt *ctxt)
2234 {
2235 	int rc = 0;
2236 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2237 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2238 	char *nameval_buf;
2239 	struct ocfs2_xattr_value_buf vb;
2240 
2241 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2242 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2243 		ocfs2_xa_fill_value_buf(loc, &vb);
2244 		rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2245 						     ctxt->handle, &vb,
2246 						     xi->xi_value,
2247 						     xi->xi_value_len);
2248 	} else
2249 		memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2250 
2251 	return rc;
2252 }
2253 
ocfs2_xa_set(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_set_ctxt * ctxt)2254 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2255 			struct ocfs2_xattr_info *xi,
2256 			struct ocfs2_xattr_set_ctxt *ctxt)
2257 {
2258 	int ret;
2259 	u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2260 					      xi->xi_name_len);
2261 
2262 	ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2263 				      OCFS2_JOURNAL_ACCESS_WRITE);
2264 	if (ret) {
2265 		mlog_errno(ret);
2266 		goto out;
2267 	}
2268 
2269 	/*
2270 	 * From here on out, everything is going to modify the buffer a
2271 	 * little.  Errors are going to leave the xattr header in a
2272 	 * sane state.  Thus, even with errors we dirty the sucker.
2273 	 */
2274 
2275 	/* Don't worry, we are never called with !xi_value and !xl_entry */
2276 	if (!xi->xi_value) {
2277 		ret = ocfs2_xa_remove(loc, ctxt);
2278 		goto out_dirty;
2279 	}
2280 
2281 	ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2282 	if (ret) {
2283 		if (ret != -ENOSPC)
2284 			mlog_errno(ret);
2285 		goto out_dirty;
2286 	}
2287 
2288 	ret = ocfs2_xa_store_value(loc, xi, ctxt);
2289 	if (ret)
2290 		mlog_errno(ret);
2291 
2292 out_dirty:
2293 	ocfs2_xa_journal_dirty(ctxt->handle, loc);
2294 
2295 out:
2296 	return ret;
2297 }
2298 
ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc * loc,struct inode * inode,struct buffer_head * bh,struct ocfs2_xattr_entry * entry)2299 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2300 				     struct inode *inode,
2301 				     struct buffer_head *bh,
2302 				     struct ocfs2_xattr_entry *entry)
2303 {
2304 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2305 
2306 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2307 
2308 	loc->xl_inode = inode;
2309 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2310 	loc->xl_storage = bh;
2311 	loc->xl_entry = entry;
2312 	loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2313 	loc->xl_header =
2314 		(struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2315 					      loc->xl_size);
2316 }
2317 
ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc * loc,struct inode * inode,struct buffer_head * bh,struct ocfs2_xattr_entry * entry)2318 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2319 					  struct inode *inode,
2320 					  struct buffer_head *bh,
2321 					  struct ocfs2_xattr_entry *entry)
2322 {
2323 	struct ocfs2_xattr_block *xb =
2324 		(struct ocfs2_xattr_block *)bh->b_data;
2325 
2326 	BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2327 
2328 	loc->xl_inode = inode;
2329 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2330 	loc->xl_storage = bh;
2331 	loc->xl_header = &(xb->xb_attrs.xb_header);
2332 	loc->xl_entry = entry;
2333 	loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2334 					     xb_attrs.xb_header);
2335 }
2336 
ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc * loc,struct ocfs2_xattr_bucket * bucket,struct ocfs2_xattr_entry * entry)2337 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2338 					   struct ocfs2_xattr_bucket *bucket,
2339 					   struct ocfs2_xattr_entry *entry)
2340 {
2341 	loc->xl_inode = bucket->bu_inode;
2342 	loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2343 	loc->xl_storage = bucket;
2344 	loc->xl_header = bucket_xh(bucket);
2345 	loc->xl_entry = entry;
2346 	loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2347 }
2348 
2349 /*
2350  * In xattr remove, if it is stored outside and refcounted, we may have
2351  * the chance to split the refcount tree. So need the allocators.
2352  */
ocfs2_lock_xattr_remove_allocators(struct inode * inode,struct ocfs2_xattr_value_root * xv,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh,struct ocfs2_alloc_context ** meta_ac,int * ref_credits)2353 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2354 					struct ocfs2_xattr_value_root *xv,
2355 					struct ocfs2_caching_info *ref_ci,
2356 					struct buffer_head *ref_root_bh,
2357 					struct ocfs2_alloc_context **meta_ac,
2358 					int *ref_credits)
2359 {
2360 	int ret, meta_add = 0;
2361 	u32 p_cluster, num_clusters;
2362 	unsigned int ext_flags;
2363 
2364 	*ref_credits = 0;
2365 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2366 				       &num_clusters,
2367 				       &xv->xr_list,
2368 				       &ext_flags);
2369 	if (ret) {
2370 		mlog_errno(ret);
2371 		goto out;
2372 	}
2373 
2374 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2375 		goto out;
2376 
2377 	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2378 						 ref_root_bh, xv,
2379 						 &meta_add, ref_credits);
2380 	if (ret) {
2381 		mlog_errno(ret);
2382 		goto out;
2383 	}
2384 
2385 	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2386 						meta_add, meta_ac);
2387 	if (ret)
2388 		mlog_errno(ret);
2389 
2390 out:
2391 	return ret;
2392 }
2393 
ocfs2_remove_value_outside(struct inode * inode,struct ocfs2_xattr_value_buf * vb,struct ocfs2_xattr_header * header,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh)2394 static int ocfs2_remove_value_outside(struct inode*inode,
2395 				      struct ocfs2_xattr_value_buf *vb,
2396 				      struct ocfs2_xattr_header *header,
2397 				      struct ocfs2_caching_info *ref_ci,
2398 				      struct buffer_head *ref_root_bh)
2399 {
2400 	int ret = 0, i, ref_credits;
2401 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2402 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2403 	void *val;
2404 
2405 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2406 
2407 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2408 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2409 
2410 		if (ocfs2_xattr_is_local(entry))
2411 			continue;
2412 
2413 		val = (void *)header +
2414 			le16_to_cpu(entry->xe_name_offset);
2415 		vb->vb_xv = (struct ocfs2_xattr_value_root *)
2416 			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2417 
2418 		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2419 							 ref_ci, ref_root_bh,
2420 							 &ctxt.meta_ac,
2421 							 &ref_credits);
2422 
2423 		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2424 					ocfs2_remove_extent_credits(osb->sb));
2425 		if (IS_ERR(ctxt.handle)) {
2426 			ret = PTR_ERR(ctxt.handle);
2427 			mlog_errno(ret);
2428 			break;
2429 		}
2430 
2431 		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2432 
2433 		ocfs2_commit_trans(osb, ctxt.handle);
2434 		if (ctxt.meta_ac) {
2435 			ocfs2_free_alloc_context(ctxt.meta_ac);
2436 			ctxt.meta_ac = NULL;
2437 		}
2438 
2439 		if (ret < 0) {
2440 			mlog_errno(ret);
2441 			break;
2442 		}
2443 
2444 	}
2445 
2446 	if (ctxt.meta_ac)
2447 		ocfs2_free_alloc_context(ctxt.meta_ac);
2448 	ocfs2_schedule_truncate_log_flush(osb, 1);
2449 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
2450 	return ret;
2451 }
2452 
ocfs2_xattr_ibody_remove(struct inode * inode,struct buffer_head * di_bh,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh)2453 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2454 				    struct buffer_head *di_bh,
2455 				    struct ocfs2_caching_info *ref_ci,
2456 				    struct buffer_head *ref_root_bh)
2457 {
2458 
2459 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2460 	struct ocfs2_xattr_header *header;
2461 	int ret;
2462 	struct ocfs2_xattr_value_buf vb = {
2463 		.vb_bh = di_bh,
2464 		.vb_access = ocfs2_journal_access_di,
2465 	};
2466 
2467 	header = (struct ocfs2_xattr_header *)
2468 		 ((void *)di + inode->i_sb->s_blocksize -
2469 		 le16_to_cpu(di->i_xattr_inline_size));
2470 
2471 	ret = ocfs2_remove_value_outside(inode, &vb, header,
2472 					 ref_ci, ref_root_bh);
2473 
2474 	return ret;
2475 }
2476 
2477 struct ocfs2_rm_xattr_bucket_para {
2478 	struct ocfs2_caching_info *ref_ci;
2479 	struct buffer_head *ref_root_bh;
2480 };
2481 
ocfs2_xattr_block_remove(struct inode * inode,struct buffer_head * blk_bh,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh)2482 static int ocfs2_xattr_block_remove(struct inode *inode,
2483 				    struct buffer_head *blk_bh,
2484 				    struct ocfs2_caching_info *ref_ci,
2485 				    struct buffer_head *ref_root_bh)
2486 {
2487 	struct ocfs2_xattr_block *xb;
2488 	int ret = 0;
2489 	struct ocfs2_xattr_value_buf vb = {
2490 		.vb_bh = blk_bh,
2491 		.vb_access = ocfs2_journal_access_xb,
2492 	};
2493 	struct ocfs2_rm_xattr_bucket_para args = {
2494 		.ref_ci = ref_ci,
2495 		.ref_root_bh = ref_root_bh,
2496 	};
2497 
2498 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2499 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2500 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2501 		ret = ocfs2_remove_value_outside(inode, &vb, header,
2502 						 ref_ci, ref_root_bh);
2503 	} else
2504 		ret = ocfs2_iterate_xattr_index_block(inode,
2505 						blk_bh,
2506 						ocfs2_rm_xattr_cluster,
2507 						&args);
2508 
2509 	return ret;
2510 }
2511 
ocfs2_xattr_free_block(struct inode * inode,u64 block,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh)2512 static int ocfs2_xattr_free_block(struct inode *inode,
2513 				  u64 block,
2514 				  struct ocfs2_caching_info *ref_ci,
2515 				  struct buffer_head *ref_root_bh)
2516 {
2517 	struct inode *xb_alloc_inode;
2518 	struct buffer_head *xb_alloc_bh = NULL;
2519 	struct buffer_head *blk_bh = NULL;
2520 	struct ocfs2_xattr_block *xb;
2521 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2522 	handle_t *handle;
2523 	int ret = 0;
2524 	u64 blk, bg_blkno;
2525 	u16 bit;
2526 
2527 	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2528 	if (ret < 0) {
2529 		mlog_errno(ret);
2530 		goto out;
2531 	}
2532 
2533 	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2534 	if (ret < 0) {
2535 		mlog_errno(ret);
2536 		goto out;
2537 	}
2538 
2539 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2540 	blk = le64_to_cpu(xb->xb_blkno);
2541 	bit = le16_to_cpu(xb->xb_suballoc_bit);
2542 	if (xb->xb_suballoc_loc)
2543 		bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2544 	else
2545 		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2546 
2547 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2548 				EXTENT_ALLOC_SYSTEM_INODE,
2549 				le16_to_cpu(xb->xb_suballoc_slot));
2550 	if (!xb_alloc_inode) {
2551 		ret = -ENOMEM;
2552 		mlog_errno(ret);
2553 		goto out;
2554 	}
2555 	inode_lock(xb_alloc_inode);
2556 
2557 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2558 	if (ret < 0) {
2559 		mlog_errno(ret);
2560 		goto out_mutex;
2561 	}
2562 
2563 	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2564 	if (IS_ERR(handle)) {
2565 		ret = PTR_ERR(handle);
2566 		mlog_errno(ret);
2567 		goto out_unlock;
2568 	}
2569 
2570 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2571 				       bit, bg_blkno, 1);
2572 	if (ret < 0)
2573 		mlog_errno(ret);
2574 
2575 	ocfs2_commit_trans(osb, handle);
2576 out_unlock:
2577 	ocfs2_inode_unlock(xb_alloc_inode, 1);
2578 	brelse(xb_alloc_bh);
2579 out_mutex:
2580 	inode_unlock(xb_alloc_inode);
2581 	iput(xb_alloc_inode);
2582 out:
2583 	brelse(blk_bh);
2584 	return ret;
2585 }
2586 
2587 /*
2588  * ocfs2_xattr_remove()
2589  *
2590  * Free extended attribute resources associated with this inode.
2591  */
ocfs2_xattr_remove(struct inode * inode,struct buffer_head * di_bh)2592 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2593 {
2594 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2595 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2596 	struct ocfs2_refcount_tree *ref_tree = NULL;
2597 	struct buffer_head *ref_root_bh = NULL;
2598 	struct ocfs2_caching_info *ref_ci = NULL;
2599 	handle_t *handle;
2600 	int ret;
2601 
2602 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2603 		return 0;
2604 
2605 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2606 		return 0;
2607 
2608 	if (ocfs2_is_refcount_inode(inode)) {
2609 		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2610 					       le64_to_cpu(di->i_refcount_loc),
2611 					       1, &ref_tree, &ref_root_bh);
2612 		if (ret) {
2613 			mlog_errno(ret);
2614 			goto out;
2615 		}
2616 		ref_ci = &ref_tree->rf_ci;
2617 
2618 	}
2619 
2620 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2621 		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2622 					       ref_ci, ref_root_bh);
2623 		if (ret < 0) {
2624 			mlog_errno(ret);
2625 			goto out;
2626 		}
2627 	}
2628 
2629 	if (di->i_xattr_loc) {
2630 		ret = ocfs2_xattr_free_block(inode,
2631 					     le64_to_cpu(di->i_xattr_loc),
2632 					     ref_ci, ref_root_bh);
2633 		if (ret < 0) {
2634 			mlog_errno(ret);
2635 			goto out;
2636 		}
2637 	}
2638 
2639 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2640 				   OCFS2_INODE_UPDATE_CREDITS);
2641 	if (IS_ERR(handle)) {
2642 		ret = PTR_ERR(handle);
2643 		mlog_errno(ret);
2644 		goto out;
2645 	}
2646 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2647 				      OCFS2_JOURNAL_ACCESS_WRITE);
2648 	if (ret) {
2649 		mlog_errno(ret);
2650 		goto out_commit;
2651 	}
2652 
2653 	di->i_xattr_loc = 0;
2654 
2655 	spin_lock(&oi->ip_lock);
2656 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2657 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2658 	spin_unlock(&oi->ip_lock);
2659 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
2660 
2661 	ocfs2_journal_dirty(handle, di_bh);
2662 out_commit:
2663 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2664 out:
2665 	if (ref_tree)
2666 		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2667 	brelse(ref_root_bh);
2668 	return ret;
2669 }
2670 
ocfs2_xattr_has_space_inline(struct inode * inode,struct ocfs2_dinode * di)2671 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2672 					struct ocfs2_dinode *di)
2673 {
2674 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2675 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2676 	int free;
2677 
2678 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2679 		return 0;
2680 
2681 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2682 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2683 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2684 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
2685 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
2686 			le64_to_cpu(di->i_size);
2687 	} else {
2688 		struct ocfs2_extent_list *el = &di->id2.i_list;
2689 		free = (le16_to_cpu(el->l_count) -
2690 			le16_to_cpu(el->l_next_free_rec)) *
2691 			sizeof(struct ocfs2_extent_rec);
2692 	}
2693 	if (free >= xattrsize)
2694 		return 1;
2695 
2696 	return 0;
2697 }
2698 
2699 /*
2700  * ocfs2_xattr_ibody_find()
2701  *
2702  * Find extended attribute in inode block and
2703  * fill search info into struct ocfs2_xattr_search.
2704  */
ocfs2_xattr_ibody_find(struct inode * inode,int name_index,const char * name,struct ocfs2_xattr_search * xs)2705 static int ocfs2_xattr_ibody_find(struct inode *inode,
2706 				  int name_index,
2707 				  const char *name,
2708 				  struct ocfs2_xattr_search *xs)
2709 {
2710 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2711 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2712 	int ret;
2713 	int has_space = 0;
2714 
2715 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2716 		return 0;
2717 
2718 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2719 		down_read(&oi->ip_alloc_sem);
2720 		has_space = ocfs2_xattr_has_space_inline(inode, di);
2721 		up_read(&oi->ip_alloc_sem);
2722 		if (!has_space)
2723 			return 0;
2724 	}
2725 
2726 	xs->xattr_bh = xs->inode_bh;
2727 	xs->end = (void *)di + inode->i_sb->s_blocksize;
2728 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2729 		xs->header = (struct ocfs2_xattr_header *)
2730 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
2731 	else
2732 		xs->header = (struct ocfs2_xattr_header *)
2733 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2734 	xs->base = (void *)xs->header;
2735 	xs->here = xs->header->xh_entries;
2736 
2737 	/* Find the named attribute. */
2738 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2739 		ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
2740 		if (ret && ret != -ENODATA)
2741 			return ret;
2742 		xs->not_found = ret;
2743 	}
2744 
2745 	return 0;
2746 }
2747 
ocfs2_xattr_ibody_init(struct inode * inode,struct buffer_head * di_bh,struct ocfs2_xattr_set_ctxt * ctxt)2748 static int ocfs2_xattr_ibody_init(struct inode *inode,
2749 				  struct buffer_head *di_bh,
2750 				  struct ocfs2_xattr_set_ctxt *ctxt)
2751 {
2752 	int ret;
2753 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2754 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2755 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2756 	unsigned int xattrsize = osb->s_xattr_inline_size;
2757 
2758 	if (!ocfs2_xattr_has_space_inline(inode, di)) {
2759 		ret = -ENOSPC;
2760 		goto out;
2761 	}
2762 
2763 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2764 				      OCFS2_JOURNAL_ACCESS_WRITE);
2765 	if (ret) {
2766 		mlog_errno(ret);
2767 		goto out;
2768 	}
2769 
2770 	/*
2771 	 * Adjust extent record count or inline data size
2772 	 * to reserve space for extended attribute.
2773 	 */
2774 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2775 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2776 		le16_add_cpu(&idata->id_count, -xattrsize);
2777 	} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2778 		struct ocfs2_extent_list *el = &di->id2.i_list;
2779 		le16_add_cpu(&el->l_count, -(xattrsize /
2780 					     sizeof(struct ocfs2_extent_rec)));
2781 	}
2782 	di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2783 
2784 	spin_lock(&oi->ip_lock);
2785 	oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2786 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2787 	spin_unlock(&oi->ip_lock);
2788 
2789 	ocfs2_journal_dirty(ctxt->handle, di_bh);
2790 
2791 out:
2792 	return ret;
2793 }
2794 
2795 /*
2796  * ocfs2_xattr_ibody_set()
2797  *
2798  * Set, replace or remove an extended attribute into inode block.
2799  *
2800  */
ocfs2_xattr_ibody_set(struct inode * inode,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xs,struct ocfs2_xattr_set_ctxt * ctxt)2801 static int ocfs2_xattr_ibody_set(struct inode *inode,
2802 				 struct ocfs2_xattr_info *xi,
2803 				 struct ocfs2_xattr_search *xs,
2804 				 struct ocfs2_xattr_set_ctxt *ctxt)
2805 {
2806 	int ret;
2807 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2808 	struct ocfs2_xa_loc loc;
2809 
2810 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2811 		return -ENOSPC;
2812 
2813 	down_write(&oi->ip_alloc_sem);
2814 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2815 		ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2816 		if (ret) {
2817 			if (ret != -ENOSPC)
2818 				mlog_errno(ret);
2819 			goto out;
2820 		}
2821 	}
2822 
2823 	ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2824 				 xs->not_found ? NULL : xs->here);
2825 	ret = ocfs2_xa_set(&loc, xi, ctxt);
2826 	if (ret) {
2827 		if (ret != -ENOSPC)
2828 			mlog_errno(ret);
2829 		goto out;
2830 	}
2831 	xs->here = loc.xl_entry;
2832 
2833 out:
2834 	up_write(&oi->ip_alloc_sem);
2835 
2836 	return ret;
2837 }
2838 
2839 /*
2840  * ocfs2_xattr_block_find()
2841  *
2842  * Find extended attribute in external block and
2843  * fill search info into struct ocfs2_xattr_search.
2844  */
ocfs2_xattr_block_find(struct inode * inode,int name_index,const char * name,struct ocfs2_xattr_search * xs)2845 static int ocfs2_xattr_block_find(struct inode *inode,
2846 				  int name_index,
2847 				  const char *name,
2848 				  struct ocfs2_xattr_search *xs)
2849 {
2850 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2851 	struct buffer_head *blk_bh = NULL;
2852 	struct ocfs2_xattr_block *xb;
2853 	int ret = 0;
2854 
2855 	if (!di->i_xattr_loc)
2856 		return ret;
2857 
2858 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2859 				     &blk_bh);
2860 	if (ret < 0) {
2861 		mlog_errno(ret);
2862 		return ret;
2863 	}
2864 
2865 	xs->xattr_bh = blk_bh;
2866 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2867 
2868 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2869 		xs->header = &xb->xb_attrs.xb_header;
2870 		xs->base = (void *)xs->header;
2871 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2872 		xs->here = xs->header->xh_entries;
2873 
2874 		ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
2875 	} else
2876 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2877 						   name_index,
2878 						   name, xs);
2879 
2880 	if (ret && ret != -ENODATA) {
2881 		xs->xattr_bh = NULL;
2882 		goto cleanup;
2883 	}
2884 	xs->not_found = ret;
2885 	return 0;
2886 cleanup:
2887 	brelse(blk_bh);
2888 
2889 	return ret;
2890 }
2891 
ocfs2_create_xattr_block(struct inode * inode,struct buffer_head * inode_bh,struct ocfs2_xattr_set_ctxt * ctxt,int indexed,struct buffer_head ** ret_bh)2892 static int ocfs2_create_xattr_block(struct inode *inode,
2893 				    struct buffer_head *inode_bh,
2894 				    struct ocfs2_xattr_set_ctxt *ctxt,
2895 				    int indexed,
2896 				    struct buffer_head **ret_bh)
2897 {
2898 	int ret;
2899 	u16 suballoc_bit_start;
2900 	u32 num_got;
2901 	u64 suballoc_loc, first_blkno;
2902 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2903 	struct buffer_head *new_bh = NULL;
2904 	struct ocfs2_xattr_block *xblk;
2905 
2906 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2907 				      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2908 	if (ret < 0) {
2909 		mlog_errno(ret);
2910 		goto end;
2911 	}
2912 
2913 	ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2914 				   &suballoc_loc, &suballoc_bit_start,
2915 				   &num_got, &first_blkno);
2916 	if (ret < 0) {
2917 		mlog_errno(ret);
2918 		goto end;
2919 	}
2920 
2921 	new_bh = sb_getblk(inode->i_sb, first_blkno);
2922 	if (!new_bh) {
2923 		ret = -ENOMEM;
2924 		mlog_errno(ret);
2925 		goto end;
2926 	}
2927 
2928 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2929 
2930 	ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2931 				      new_bh,
2932 				      OCFS2_JOURNAL_ACCESS_CREATE);
2933 	if (ret < 0) {
2934 		mlog_errno(ret);
2935 		goto end;
2936 	}
2937 
2938 	/* Initialize ocfs2_xattr_block */
2939 	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2940 	memset(xblk, 0, inode->i_sb->s_blocksize);
2941 	strscpy(xblk->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE);
2942 	xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2943 	xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2944 	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2945 	xblk->xb_fs_generation =
2946 		cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2947 	xblk->xb_blkno = cpu_to_le64(first_blkno);
2948 	if (indexed) {
2949 		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2950 		xr->xt_clusters = cpu_to_le32(1);
2951 		xr->xt_last_eb_blk = 0;
2952 		xr->xt_list.l_tree_depth = 0;
2953 		xr->xt_list.l_count = cpu_to_le16(
2954 					ocfs2_xattr_recs_per_xb(inode->i_sb));
2955 		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2956 		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2957 	}
2958 	ocfs2_journal_dirty(ctxt->handle, new_bh);
2959 
2960 	/* Add it to the inode */
2961 	di->i_xattr_loc = cpu_to_le64(first_blkno);
2962 
2963 	spin_lock(&OCFS2_I(inode)->ip_lock);
2964 	OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2965 	di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2966 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2967 
2968 	ocfs2_journal_dirty(ctxt->handle, inode_bh);
2969 
2970 	*ret_bh = new_bh;
2971 	new_bh = NULL;
2972 
2973 end:
2974 	brelse(new_bh);
2975 	return ret;
2976 }
2977 
2978 /*
2979  * ocfs2_xattr_block_set()
2980  *
2981  * Set, replace or remove an extended attribute into external block.
2982  *
2983  */
ocfs2_xattr_block_set(struct inode * inode,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xs,struct ocfs2_xattr_set_ctxt * ctxt)2984 static int ocfs2_xattr_block_set(struct inode *inode,
2985 				 struct ocfs2_xattr_info *xi,
2986 				 struct ocfs2_xattr_search *xs,
2987 				 struct ocfs2_xattr_set_ctxt *ctxt)
2988 {
2989 	struct buffer_head *new_bh = NULL;
2990 	struct ocfs2_xattr_block *xblk = NULL;
2991 	int ret;
2992 	struct ocfs2_xa_loc loc;
2993 
2994 	if (!xs->xattr_bh) {
2995 		ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2996 					       0, &new_bh);
2997 		if (ret) {
2998 			mlog_errno(ret);
2999 			goto end;
3000 		}
3001 
3002 		xs->xattr_bh = new_bh;
3003 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
3004 		xs->header = &xblk->xb_attrs.xb_header;
3005 		xs->base = (void *)xs->header;
3006 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
3007 		xs->here = xs->header->xh_entries;
3008 	} else
3009 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
3010 
3011 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
3012 		ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
3013 					      xs->not_found ? NULL : xs->here);
3014 
3015 		ret = ocfs2_xa_set(&loc, xi, ctxt);
3016 		if (!ret)
3017 			xs->here = loc.xl_entry;
3018 		else if ((ret != -ENOSPC) || ctxt->set_abort)
3019 			goto end;
3020 		else {
3021 			ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
3022 			if (ret)
3023 				goto end;
3024 		}
3025 	}
3026 
3027 	if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
3028 		ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
3029 
3030 end:
3031 	return ret;
3032 }
3033 
3034 /* Check whether the new xattr can be inserted into the inode. */
ocfs2_xattr_can_be_in_inode(struct inode * inode,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xs)3035 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
3036 				       struct ocfs2_xattr_info *xi,
3037 				       struct ocfs2_xattr_search *xs)
3038 {
3039 	struct ocfs2_xattr_entry *last;
3040 	int free, i;
3041 	size_t min_offs = xs->end - xs->base;
3042 
3043 	if (!xs->header)
3044 		return 0;
3045 
3046 	last = xs->header->xh_entries;
3047 
3048 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
3049 		size_t offs = le16_to_cpu(last->xe_name_offset);
3050 		if (offs < min_offs)
3051 			min_offs = offs;
3052 		last += 1;
3053 	}
3054 
3055 	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
3056 	if (free < 0)
3057 		return 0;
3058 
3059 	BUG_ON(!xs->not_found);
3060 
3061 	if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3062 		return 1;
3063 
3064 	return 0;
3065 }
3066 
ocfs2_calc_xattr_set_need(struct inode * inode,struct ocfs2_dinode * di,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xis,struct ocfs2_xattr_search * xbs,int * clusters_need,int * meta_need,int * credits_need)3067 static int ocfs2_calc_xattr_set_need(struct inode *inode,
3068 				     struct ocfs2_dinode *di,
3069 				     struct ocfs2_xattr_info *xi,
3070 				     struct ocfs2_xattr_search *xis,
3071 				     struct ocfs2_xattr_search *xbs,
3072 				     int *clusters_need,
3073 				     int *meta_need,
3074 				     int *credits_need)
3075 {
3076 	int ret = 0, old_in_xb = 0;
3077 	int clusters_add = 0, meta_add = 0, credits = 0;
3078 	struct buffer_head *bh = NULL;
3079 	struct ocfs2_xattr_block *xb = NULL;
3080 	struct ocfs2_xattr_entry *xe = NULL;
3081 	struct ocfs2_xattr_value_root *xv = NULL;
3082 	char *base = NULL;
3083 	int name_offset, name_len = 0;
3084 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3085 						    xi->xi_value_len);
3086 	u64 value_size;
3087 
3088 	/*
3089 	 * Calculate the clusters we need to write.
3090 	 * No matter whether we replace an old one or add a new one,
3091 	 * we need this for writing.
3092 	 */
3093 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3094 		credits += new_clusters *
3095 			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
3096 
3097 	if (xis->not_found && xbs->not_found) {
3098 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3099 
3100 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3101 			clusters_add += new_clusters;
3102 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3103 							&def_xv.xv.xr_list);
3104 		}
3105 
3106 		goto meta_guess;
3107 	}
3108 
3109 	if (!xis->not_found) {
3110 		xe = xis->here;
3111 		name_offset = le16_to_cpu(xe->xe_name_offset);
3112 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3113 		base = xis->base;
3114 		credits += OCFS2_INODE_UPDATE_CREDITS;
3115 	} else {
3116 		int i, block_off = 0;
3117 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3118 		xe = xbs->here;
3119 		name_offset = le16_to_cpu(xe->xe_name_offset);
3120 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3121 		i = xbs->here - xbs->header->xh_entries;
3122 		old_in_xb = 1;
3123 
3124 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3125 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3126 							bucket_xh(xbs->bucket),
3127 							i, &block_off,
3128 							&name_offset);
3129 			base = bucket_block(xbs->bucket, block_off);
3130 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3131 		} else {
3132 			base = xbs->base;
3133 			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3134 		}
3135 	}
3136 
3137 	/*
3138 	 * delete a xattr doesn't need metadata and cluster allocation.
3139 	 * so just calculate the credits and return.
3140 	 *
3141 	 * The credits for removing the value tree will be extended
3142 	 * by ocfs2_remove_extent itself.
3143 	 */
3144 	if (!xi->xi_value) {
3145 		if (!ocfs2_xattr_is_local(xe))
3146 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3147 
3148 		goto out;
3149 	}
3150 
3151 	/* do cluster allocation guess first. */
3152 	value_size = le64_to_cpu(xe->xe_value_size);
3153 
3154 	if (old_in_xb) {
3155 		/*
3156 		 * In xattr set, we always try to set the xe in inode first,
3157 		 * so if it can be inserted into inode successfully, the old
3158 		 * one will be removed from the xattr block, and this xattr
3159 		 * will be inserted into inode as a new xattr in inode.
3160 		 */
3161 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3162 			clusters_add += new_clusters;
3163 			credits += ocfs2_remove_extent_credits(inode->i_sb) +
3164 				    OCFS2_INODE_UPDATE_CREDITS;
3165 			if (!ocfs2_xattr_is_local(xe))
3166 				credits += ocfs2_calc_extend_credits(
3167 							inode->i_sb,
3168 							&def_xv.xv.xr_list);
3169 			goto out;
3170 		}
3171 	}
3172 
3173 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3174 		/* the new values will be stored outside. */
3175 		u32 old_clusters = 0;
3176 
3177 		if (!ocfs2_xattr_is_local(xe)) {
3178 			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
3179 								 value_size);
3180 			xv = (struct ocfs2_xattr_value_root *)
3181 			     (base + name_offset + name_len);
3182 			value_size = OCFS2_XATTR_ROOT_SIZE;
3183 		} else
3184 			xv = &def_xv.xv;
3185 
3186 		if (old_clusters >= new_clusters) {
3187 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3188 			goto out;
3189 		} else {
3190 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3191 			clusters_add += new_clusters - old_clusters;
3192 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3193 							     &xv->xr_list);
3194 			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3195 				goto out;
3196 		}
3197 	} else {
3198 		/*
3199 		 * Now the new value will be stored inside. So if the new
3200 		 * value is smaller than the size of value root or the old
3201 		 * value, we don't need any allocation, otherwise we have
3202 		 * to guess metadata allocation.
3203 		 */
3204 		if ((ocfs2_xattr_is_local(xe) &&
3205 		     (value_size >= xi->xi_value_len)) ||
3206 		    (!ocfs2_xattr_is_local(xe) &&
3207 		     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3208 			goto out;
3209 	}
3210 
3211 meta_guess:
3212 	/* calculate metadata allocation. */
3213 	if (di->i_xattr_loc) {
3214 		if (!xbs->xattr_bh) {
3215 			ret = ocfs2_read_xattr_block(inode,
3216 						     le64_to_cpu(di->i_xattr_loc),
3217 						     &bh);
3218 			if (ret) {
3219 				mlog_errno(ret);
3220 				goto out;
3221 			}
3222 
3223 			xb = (struct ocfs2_xattr_block *)bh->b_data;
3224 		} else
3225 			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3226 
3227 		/*
3228 		 * If there is already an xattr tree, good, we can calculate
3229 		 * like other b-trees. Otherwise we may have the chance of
3230 		 * create a tree, the credit calculation is borrowed from
3231 		 * ocfs2_calc_extend_credits with root_el = NULL. And the
3232 		 * new tree will be cluster based, so no meta is needed.
3233 		 */
3234 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3235 			struct ocfs2_extent_list *el =
3236 				 &xb->xb_attrs.xb_root.xt_list;
3237 			meta_add += ocfs2_extend_meta_needed(el);
3238 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3239 							     el);
3240 		} else
3241 			credits += OCFS2_SUBALLOC_ALLOC + 1;
3242 
3243 		/*
3244 		 * This cluster will be used either for new bucket or for
3245 		 * new xattr block.
3246 		 * If the cluster size is the same as the bucket size, one
3247 		 * more is needed since we may need to extend the bucket
3248 		 * also.
3249 		 */
3250 		clusters_add += 1;
3251 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3252 		if (OCFS2_XATTR_BUCKET_SIZE ==
3253 			OCFS2_SB(inode->i_sb)->s_clustersize) {
3254 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3255 			clusters_add += 1;
3256 		}
3257 	} else {
3258 		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3259 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3260 			struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
3261 			meta_add += ocfs2_extend_meta_needed(el);
3262 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3263 							     el);
3264 		} else {
3265 			meta_add += 1;
3266 		}
3267 	}
3268 out:
3269 	if (clusters_need)
3270 		*clusters_need = clusters_add;
3271 	if (meta_need)
3272 		*meta_need = meta_add;
3273 	if (credits_need)
3274 		*credits_need = credits;
3275 	brelse(bh);
3276 	return ret;
3277 }
3278 
ocfs2_init_xattr_set_ctxt(struct inode * inode,struct ocfs2_dinode * di,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xis,struct ocfs2_xattr_search * xbs,struct ocfs2_xattr_set_ctxt * ctxt,int extra_meta,int * credits)3279 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3280 				     struct ocfs2_dinode *di,
3281 				     struct ocfs2_xattr_info *xi,
3282 				     struct ocfs2_xattr_search *xis,
3283 				     struct ocfs2_xattr_search *xbs,
3284 				     struct ocfs2_xattr_set_ctxt *ctxt,
3285 				     int extra_meta,
3286 				     int *credits)
3287 {
3288 	int clusters_add, meta_add, ret;
3289 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3290 
3291 	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3292 
3293 	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3294 
3295 	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3296 					&clusters_add, &meta_add, credits);
3297 	if (ret) {
3298 		mlog_errno(ret);
3299 		return ret;
3300 	}
3301 
3302 	meta_add += extra_meta;
3303 	trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3304 					clusters_add, *credits);
3305 
3306 	if (meta_add) {
3307 		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3308 							&ctxt->meta_ac);
3309 		if (ret) {
3310 			mlog_errno(ret);
3311 			goto out;
3312 		}
3313 	}
3314 
3315 	if (clusters_add) {
3316 		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3317 		if (ret)
3318 			mlog_errno(ret);
3319 	}
3320 out:
3321 	if (ret) {
3322 		if (ctxt->meta_ac) {
3323 			ocfs2_free_alloc_context(ctxt->meta_ac);
3324 			ctxt->meta_ac = NULL;
3325 		}
3326 
3327 		/*
3328 		 * We cannot have an error and a non null ctxt->data_ac.
3329 		 */
3330 	}
3331 
3332 	return ret;
3333 }
3334 
__ocfs2_xattr_set_handle(struct inode * inode,struct ocfs2_dinode * di,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xis,struct ocfs2_xattr_search * xbs,struct ocfs2_xattr_set_ctxt * ctxt)3335 static int __ocfs2_xattr_set_handle(struct inode *inode,
3336 				    struct ocfs2_dinode *di,
3337 				    struct ocfs2_xattr_info *xi,
3338 				    struct ocfs2_xattr_search *xis,
3339 				    struct ocfs2_xattr_search *xbs,
3340 				    struct ocfs2_xattr_set_ctxt *ctxt)
3341 {
3342 	int ret = 0, credits, old_found;
3343 
3344 	if (!xi->xi_value) {
3345 		/* Remove existing extended attribute */
3346 		if (!xis->not_found)
3347 			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3348 		else if (!xbs->not_found)
3349 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3350 	} else {
3351 		/* We always try to set extended attribute into inode first*/
3352 		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3353 		if (!ret && !xbs->not_found) {
3354 			/*
3355 			 * If succeed and that extended attribute existing in
3356 			 * external block, then we will remove it.
3357 			 */
3358 			xi->xi_value = NULL;
3359 			xi->xi_value_len = 0;
3360 
3361 			old_found = xis->not_found;
3362 			xis->not_found = -ENODATA;
3363 			ret = ocfs2_calc_xattr_set_need(inode,
3364 							di,
3365 							xi,
3366 							xis,
3367 							xbs,
3368 							NULL,
3369 							NULL,
3370 							&credits);
3371 			xis->not_found = old_found;
3372 			if (ret) {
3373 				mlog_errno(ret);
3374 				goto out;
3375 			}
3376 
3377 			ret = ocfs2_extend_trans(ctxt->handle, credits);
3378 			if (ret) {
3379 				mlog_errno(ret);
3380 				goto out;
3381 			}
3382 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3383 		} else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3384 			if (di->i_xattr_loc && !xbs->xattr_bh) {
3385 				ret = ocfs2_xattr_block_find(inode,
3386 							     xi->xi_name_index,
3387 							     xi->xi_name, xbs);
3388 				if (ret)
3389 					goto out;
3390 
3391 				old_found = xis->not_found;
3392 				xis->not_found = -ENODATA;
3393 				ret = ocfs2_calc_xattr_set_need(inode,
3394 								di,
3395 								xi,
3396 								xis,
3397 								xbs,
3398 								NULL,
3399 								NULL,
3400 								&credits);
3401 				xis->not_found = old_found;
3402 				if (ret) {
3403 					mlog_errno(ret);
3404 					goto out;
3405 				}
3406 
3407 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3408 				if (ret) {
3409 					mlog_errno(ret);
3410 					goto out;
3411 				}
3412 			}
3413 			/*
3414 			 * If no space in inode, we will set extended attribute
3415 			 * into external block.
3416 			 */
3417 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3418 			if (ret)
3419 				goto out;
3420 			if (!xis->not_found) {
3421 				/*
3422 				 * If succeed and that extended attribute
3423 				 * existing in inode, we will remove it.
3424 				 */
3425 				xi->xi_value = NULL;
3426 				xi->xi_value_len = 0;
3427 				xbs->not_found = -ENODATA;
3428 				ret = ocfs2_calc_xattr_set_need(inode,
3429 								di,
3430 								xi,
3431 								xis,
3432 								xbs,
3433 								NULL,
3434 								NULL,
3435 								&credits);
3436 				if (ret) {
3437 					mlog_errno(ret);
3438 					goto out;
3439 				}
3440 
3441 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3442 				if (ret) {
3443 					mlog_errno(ret);
3444 					goto out;
3445 				}
3446 				ret = ocfs2_xattr_ibody_set(inode, xi,
3447 							    xis, ctxt);
3448 			}
3449 		}
3450 	}
3451 
3452 	if (!ret) {
3453 		/* Update inode ctime. */
3454 		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3455 					      xis->inode_bh,
3456 					      OCFS2_JOURNAL_ACCESS_WRITE);
3457 		if (ret) {
3458 			mlog_errno(ret);
3459 			goto out;
3460 		}
3461 
3462 		inode_set_ctime_current(inode);
3463 		di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
3464 		di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
3465 		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3466 	}
3467 out:
3468 	return ret;
3469 }
3470 
3471 /*
3472  * This function only called duing creating inode
3473  * for init security/acl xattrs of the new inode.
3474  * All transanction credits have been reserved in mknod.
3475  */
ocfs2_xattr_set_handle(handle_t * handle,struct inode * inode,struct buffer_head * di_bh,int name_index,const char * name,const void * value,size_t value_len,int flags,struct ocfs2_alloc_context * meta_ac,struct ocfs2_alloc_context * data_ac)3476 int ocfs2_xattr_set_handle(handle_t *handle,
3477 			   struct inode *inode,
3478 			   struct buffer_head *di_bh,
3479 			   int name_index,
3480 			   const char *name,
3481 			   const void *value,
3482 			   size_t value_len,
3483 			   int flags,
3484 			   struct ocfs2_alloc_context *meta_ac,
3485 			   struct ocfs2_alloc_context *data_ac)
3486 {
3487 	struct ocfs2_dinode *di;
3488 	int ret;
3489 
3490 	struct ocfs2_xattr_info xi = {
3491 		.xi_name_index = name_index,
3492 		.xi_name = name,
3493 		.xi_name_len = strlen(name),
3494 		.xi_value = value,
3495 		.xi_value_len = value_len,
3496 	};
3497 
3498 	struct ocfs2_xattr_search xis = {
3499 		.not_found = -ENODATA,
3500 	};
3501 
3502 	struct ocfs2_xattr_search xbs = {
3503 		.not_found = -ENODATA,
3504 	};
3505 
3506 	struct ocfs2_xattr_set_ctxt ctxt = {
3507 		.handle = handle,
3508 		.meta_ac = meta_ac,
3509 		.data_ac = data_ac,
3510 	};
3511 
3512 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3513 		return -EOPNOTSUPP;
3514 
3515 	/*
3516 	 * In extreme situation, may need xattr bucket when
3517 	 * block size is too small. And we have already reserved
3518 	 * the credits for bucket in mknod.
3519 	 */
3520 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3521 		xbs.bucket = ocfs2_xattr_bucket_new(inode);
3522 		if (!xbs.bucket) {
3523 			mlog_errno(-ENOMEM);
3524 			return -ENOMEM;
3525 		}
3526 	}
3527 
3528 	xis.inode_bh = xbs.inode_bh = di_bh;
3529 	di = (struct ocfs2_dinode *)di_bh->b_data;
3530 
3531 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3532 
3533 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3534 	if (ret)
3535 		goto cleanup;
3536 	if (xis.not_found) {
3537 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3538 		if (ret)
3539 			goto cleanup;
3540 	}
3541 
3542 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3543 
3544 cleanup:
3545 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3546 	brelse(xbs.xattr_bh);
3547 	ocfs2_xattr_bucket_free(xbs.bucket);
3548 
3549 	return ret;
3550 }
3551 
3552 /*
3553  * ocfs2_xattr_set()
3554  *
3555  * Set, replace or remove an extended attribute for this inode.
3556  * value is NULL to remove an existing extended attribute, else either
3557  * create or replace an extended attribute.
3558  */
ocfs2_xattr_set(struct inode * inode,int name_index,const char * name,const void * value,size_t value_len,int flags)3559 int ocfs2_xattr_set(struct inode *inode,
3560 		    int name_index,
3561 		    const char *name,
3562 		    const void *value,
3563 		    size_t value_len,
3564 		    int flags)
3565 {
3566 	struct buffer_head *di_bh = NULL;
3567 	struct ocfs2_dinode *di;
3568 	int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
3569 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3570 	struct inode *tl_inode = osb->osb_tl_inode;
3571 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
3572 	struct ocfs2_refcount_tree *ref_tree = NULL;
3573 	struct ocfs2_lock_holder oh;
3574 
3575 	struct ocfs2_xattr_info xi = {
3576 		.xi_name_index = name_index,
3577 		.xi_name = name,
3578 		.xi_name_len = strlen(name),
3579 		.xi_value = value,
3580 		.xi_value_len = value_len,
3581 	};
3582 
3583 	struct ocfs2_xattr_search xis = {
3584 		.not_found = -ENODATA,
3585 	};
3586 
3587 	struct ocfs2_xattr_search xbs = {
3588 		.not_found = -ENODATA,
3589 	};
3590 
3591 	if (!ocfs2_supports_xattr(osb))
3592 		return -EOPNOTSUPP;
3593 
3594 	/*
3595 	 * Only xbs will be used on indexed trees.  xis doesn't need a
3596 	 * bucket.
3597 	 */
3598 	xbs.bucket = ocfs2_xattr_bucket_new(inode);
3599 	if (!xbs.bucket) {
3600 		mlog_errno(-ENOMEM);
3601 		return -ENOMEM;
3602 	}
3603 
3604 	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
3605 	if (had_lock < 0) {
3606 		ret = had_lock;
3607 		mlog_errno(ret);
3608 		goto cleanup_nolock;
3609 	}
3610 	xis.inode_bh = xbs.inode_bh = di_bh;
3611 	di = (struct ocfs2_dinode *)di_bh->b_data;
3612 
3613 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3614 	/*
3615 	 * Scan inode and external block to find the same name
3616 	 * extended attribute and collect search information.
3617 	 */
3618 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3619 	if (ret)
3620 		goto cleanup;
3621 	if (xis.not_found) {
3622 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3623 		if (ret)
3624 			goto cleanup;
3625 	}
3626 
3627 	if (xis.not_found && xbs.not_found) {
3628 		ret = -ENODATA;
3629 		if (flags & XATTR_REPLACE)
3630 			goto cleanup;
3631 		ret = 0;
3632 		if (!value)
3633 			goto cleanup;
3634 	} else {
3635 		ret = -EEXIST;
3636 		if (flags & XATTR_CREATE)
3637 			goto cleanup;
3638 	}
3639 
3640 	/* Check whether the value is refcounted and do some preparation. */
3641 	if (ocfs2_is_refcount_inode(inode) &&
3642 	    (!xis.not_found || !xbs.not_found)) {
3643 		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3644 						   &xis, &xbs, &ref_tree,
3645 						   &ref_meta, &ref_credits);
3646 		if (ret) {
3647 			mlog_errno(ret);
3648 			goto cleanup;
3649 		}
3650 	}
3651 
3652 	inode_lock(tl_inode);
3653 
3654 	if (ocfs2_truncate_log_needs_flush(osb)) {
3655 		ret = __ocfs2_flush_truncate_log(osb);
3656 		if (ret < 0) {
3657 			inode_unlock(tl_inode);
3658 			mlog_errno(ret);
3659 			goto cleanup;
3660 		}
3661 	}
3662 	inode_unlock(tl_inode);
3663 
3664 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3665 					&xbs, &ctxt, ref_meta, &credits);
3666 	if (ret) {
3667 		mlog_errno(ret);
3668 		goto cleanup;
3669 	}
3670 
3671 	/* we need to update inode's ctime field, so add credit for it. */
3672 	credits += OCFS2_INODE_UPDATE_CREDITS;
3673 	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3674 	if (IS_ERR(ctxt.handle)) {
3675 		ret = PTR_ERR(ctxt.handle);
3676 		mlog_errno(ret);
3677 		goto out_free_ac;
3678 	}
3679 
3680 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3681 	ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
3682 
3683 	ocfs2_commit_trans(osb, ctxt.handle);
3684 
3685 out_free_ac:
3686 	if (ctxt.data_ac)
3687 		ocfs2_free_alloc_context(ctxt.data_ac);
3688 	if (ctxt.meta_ac)
3689 		ocfs2_free_alloc_context(ctxt.meta_ac);
3690 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3691 		ocfs2_schedule_truncate_log_flush(osb, 1);
3692 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
3693 
3694 cleanup:
3695 	if (ref_tree)
3696 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3697 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3698 	if (!value && !ret) {
3699 		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3700 		if (ret)
3701 			mlog_errno(ret);
3702 	}
3703 	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
3704 cleanup_nolock:
3705 	brelse(di_bh);
3706 	brelse(xbs.xattr_bh);
3707 	ocfs2_xattr_bucket_free(xbs.bucket);
3708 
3709 	return ret;
3710 }
3711 
3712 /*
3713  * Find the xattr extent rec which may contains name_hash.
3714  * e_cpos will be the first name hash of the xattr rec.
3715  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3716  */
ocfs2_xattr_get_rec(struct inode * inode,u32 name_hash,u64 * p_blkno,u32 * e_cpos,u32 * num_clusters,struct ocfs2_extent_list * el)3717 static int ocfs2_xattr_get_rec(struct inode *inode,
3718 			       u32 name_hash,
3719 			       u64 *p_blkno,
3720 			       u32 *e_cpos,
3721 			       u32 *num_clusters,
3722 			       struct ocfs2_extent_list *el)
3723 {
3724 	int ret = 0, i;
3725 	struct buffer_head *eb_bh = NULL;
3726 	struct ocfs2_extent_block *eb;
3727 	struct ocfs2_extent_rec *rec = NULL;
3728 	u64 e_blkno = 0;
3729 
3730 	if (el->l_tree_depth) {
3731 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3732 				      &eb_bh);
3733 		if (ret) {
3734 			mlog_errno(ret);
3735 			goto out;
3736 		}
3737 
3738 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3739 		el = &eb->h_list;
3740 
3741 		if (el->l_tree_depth) {
3742 			ret = ocfs2_error(inode->i_sb,
3743 					  "Inode %lu has non zero tree depth in xattr tree block %llu\n",
3744 					  inode->i_ino,
3745 					  (unsigned long long)eb_bh->b_blocknr);
3746 			goto out;
3747 		}
3748 	}
3749 
3750 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3751 		rec = &el->l_recs[i];
3752 
3753 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3754 			e_blkno = le64_to_cpu(rec->e_blkno);
3755 			break;
3756 		}
3757 	}
3758 
3759 	if (!e_blkno) {
3760 		ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
3761 				  inode->i_ino,
3762 				  le32_to_cpu(rec->e_cpos),
3763 				  ocfs2_rec_clusters(el, rec));
3764 		goto out;
3765 	}
3766 
3767 	*p_blkno = le64_to_cpu(rec->e_blkno);
3768 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3769 	if (e_cpos)
3770 		*e_cpos = le32_to_cpu(rec->e_cpos);
3771 out:
3772 	brelse(eb_bh);
3773 	return ret;
3774 }
3775 
3776 typedef int (xattr_bucket_func)(struct inode *inode,
3777 				struct ocfs2_xattr_bucket *bucket,
3778 				void *para);
3779 
ocfs2_find_xe_in_bucket(struct inode * inode,struct ocfs2_xattr_bucket * bucket,int name_index,const char * name,u32 name_hash,u16 * xe_index,int * found)3780 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3781 				   struct ocfs2_xattr_bucket *bucket,
3782 				   int name_index,
3783 				   const char *name,
3784 				   u32 name_hash,
3785 				   u16 *xe_index,
3786 				   int *found)
3787 {
3788 	int i, ret = 0, cmp = 1, block_off, new_offset;
3789 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3790 	size_t name_len = strlen(name);
3791 	struct ocfs2_xattr_entry *xe = NULL;
3792 	char *xe_name;
3793 
3794 	/*
3795 	 * We don't use binary search in the bucket because there
3796 	 * may be multiple entries with the same name hash.
3797 	 */
3798 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3799 		xe = &xh->xh_entries[i];
3800 
3801 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
3802 			continue;
3803 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3804 			break;
3805 
3806 		cmp = name_index - ocfs2_xattr_get_type(xe);
3807 		if (!cmp)
3808 			cmp = name_len - xe->xe_name_len;
3809 		if (cmp)
3810 			continue;
3811 
3812 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3813 							xh,
3814 							i,
3815 							&block_off,
3816 							&new_offset);
3817 		if (ret) {
3818 			mlog_errno(ret);
3819 			break;
3820 		}
3821 
3822 
3823 		xe_name = bucket_block(bucket, block_off) + new_offset;
3824 		if (!memcmp(name, xe_name, name_len)) {
3825 			*xe_index = i;
3826 			*found = 1;
3827 			ret = 0;
3828 			break;
3829 		}
3830 	}
3831 
3832 	return ret;
3833 }
3834 
3835 /*
3836  * Find the specified xattr entry in a series of buckets.
3837  * This series start from p_blkno and last for num_clusters.
3838  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3839  * the num of the valid buckets.
3840  *
3841  * Return the buffer_head this xattr should reside in. And if the xattr's
3842  * hash is in the gap of 2 buckets, return the lower bucket.
3843  */
ocfs2_xattr_bucket_find(struct inode * inode,int name_index,const char * name,u32 name_hash,u64 p_blkno,u32 first_hash,u32 num_clusters,struct ocfs2_xattr_search * xs)3844 static int ocfs2_xattr_bucket_find(struct inode *inode,
3845 				   int name_index,
3846 				   const char *name,
3847 				   u32 name_hash,
3848 				   u64 p_blkno,
3849 				   u32 first_hash,
3850 				   u32 num_clusters,
3851 				   struct ocfs2_xattr_search *xs)
3852 {
3853 	int ret, found = 0;
3854 	struct ocfs2_xattr_header *xh = NULL;
3855 	struct ocfs2_xattr_entry *xe = NULL;
3856 	u16 index = 0;
3857 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3858 	int low_bucket = 0, bucket, high_bucket;
3859 	struct ocfs2_xattr_bucket *search;
3860 	u64 blkno, lower_blkno = 0;
3861 
3862 	search = ocfs2_xattr_bucket_new(inode);
3863 	if (!search) {
3864 		ret = -ENOMEM;
3865 		mlog_errno(ret);
3866 		goto out;
3867 	}
3868 
3869 	ret = ocfs2_read_xattr_bucket(search, p_blkno);
3870 	if (ret) {
3871 		mlog_errno(ret);
3872 		goto out;
3873 	}
3874 
3875 	xh = bucket_xh(search);
3876 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3877 	while (low_bucket <= high_bucket) {
3878 		ocfs2_xattr_bucket_relse(search);
3879 
3880 		bucket = (low_bucket + high_bucket) / 2;
3881 		blkno = p_blkno + bucket * blk_per_bucket;
3882 		ret = ocfs2_read_xattr_bucket(search, blkno);
3883 		if (ret) {
3884 			mlog_errno(ret);
3885 			goto out;
3886 		}
3887 
3888 		xh = bucket_xh(search);
3889 		xe = &xh->xh_entries[0];
3890 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3891 			high_bucket = bucket - 1;
3892 			continue;
3893 		}
3894 
3895 		/*
3896 		 * Check whether the hash of the last entry in our
3897 		 * bucket is larger than the search one. for an empty
3898 		 * bucket, the last one is also the first one.
3899 		 */
3900 		if (xh->xh_count)
3901 			xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3902 
3903 		/* record lower_blkno which may be the insert place. */
3904 		lower_blkno = blkno;
3905 
3906 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3907 			low_bucket = bucket + 1;
3908 			continue;
3909 		}
3910 
3911 		/* the searched xattr should reside in this bucket if exists. */
3912 		ret = ocfs2_find_xe_in_bucket(inode, search,
3913 					      name_index, name, name_hash,
3914 					      &index, &found);
3915 		if (ret) {
3916 			mlog_errno(ret);
3917 			goto out;
3918 		}
3919 		break;
3920 	}
3921 
3922 	/*
3923 	 * Record the bucket we have found.
3924 	 * When the xattr's hash value is in the gap of 2 buckets, we will
3925 	 * always set it to the previous bucket.
3926 	 */
3927 	if (!lower_blkno)
3928 		lower_blkno = p_blkno;
3929 
3930 	/* This should be in cache - we just read it during the search */
3931 	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3932 	if (ret) {
3933 		mlog_errno(ret);
3934 		goto out;
3935 	}
3936 
3937 	xs->header = bucket_xh(xs->bucket);
3938 	xs->base = bucket_block(xs->bucket, 0);
3939 	xs->end = xs->base + inode->i_sb->s_blocksize;
3940 
3941 	if (found) {
3942 		xs->here = &xs->header->xh_entries[index];
3943 		trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3944 			name, name_index, name_hash,
3945 			(unsigned long long)bucket_blkno(xs->bucket),
3946 			index);
3947 	} else
3948 		ret = -ENODATA;
3949 
3950 out:
3951 	ocfs2_xattr_bucket_free(search);
3952 	return ret;
3953 }
3954 
ocfs2_xattr_index_block_find(struct inode * inode,struct buffer_head * root_bh,int name_index,const char * name,struct ocfs2_xattr_search * xs)3955 static int ocfs2_xattr_index_block_find(struct inode *inode,
3956 					struct buffer_head *root_bh,
3957 					int name_index,
3958 					const char *name,
3959 					struct ocfs2_xattr_search *xs)
3960 {
3961 	int ret;
3962 	struct ocfs2_xattr_block *xb =
3963 			(struct ocfs2_xattr_block *)root_bh->b_data;
3964 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3965 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3966 	u64 p_blkno = 0;
3967 	u32 first_hash, num_clusters = 0;
3968 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3969 
3970 	if (le16_to_cpu(el->l_next_free_rec) == 0)
3971 		return -ENODATA;
3972 
3973 	trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3974 					name, name_index, name_hash,
3975 					(unsigned long long)root_bh->b_blocknr,
3976 					-1);
3977 
3978 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3979 				  &num_clusters, el);
3980 	if (ret) {
3981 		mlog_errno(ret);
3982 		goto out;
3983 	}
3984 
3985 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3986 
3987 	trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3988 					name, name_index, first_hash,
3989 					(unsigned long long)p_blkno,
3990 					num_clusters);
3991 
3992 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3993 				      p_blkno, first_hash, num_clusters, xs);
3994 
3995 out:
3996 	return ret;
3997 }
3998 
ocfs2_iterate_xattr_buckets(struct inode * inode,u64 blkno,u32 clusters,xattr_bucket_func * func,void * para)3999 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
4000 				       u64 blkno,
4001 				       u32 clusters,
4002 				       xattr_bucket_func *func,
4003 				       void *para)
4004 {
4005 	int i, ret = 0;
4006 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
4007 	u32 num_buckets = clusters * bpc;
4008 	struct ocfs2_xattr_bucket *bucket;
4009 
4010 	bucket = ocfs2_xattr_bucket_new(inode);
4011 	if (!bucket) {
4012 		mlog_errno(-ENOMEM);
4013 		return -ENOMEM;
4014 	}
4015 
4016 	trace_ocfs2_iterate_xattr_buckets(
4017 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
4018 		(unsigned long long)blkno, clusters);
4019 
4020 	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
4021 		ret = ocfs2_read_xattr_bucket(bucket, blkno);
4022 		if (ret) {
4023 			mlog_errno(ret);
4024 			break;
4025 		}
4026 
4027 		/*
4028 		 * The real bucket num in this series of blocks is stored
4029 		 * in the 1st bucket.
4030 		 */
4031 		if (i == 0)
4032 			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
4033 
4034 		trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
4035 		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
4036 		if (func) {
4037 			ret = func(inode, bucket, para);
4038 			if (ret && ret != -ERANGE)
4039 				mlog_errno(ret);
4040 			/* Fall through to bucket_relse() */
4041 		}
4042 
4043 		ocfs2_xattr_bucket_relse(bucket);
4044 		if (ret)
4045 			break;
4046 	}
4047 
4048 	ocfs2_xattr_bucket_free(bucket);
4049 	return ret;
4050 }
4051 
4052 struct ocfs2_xattr_tree_list {
4053 	char *buffer;
4054 	size_t buffer_size;
4055 	size_t result;
4056 };
4057 
ocfs2_xattr_bucket_get_name_value(struct super_block * sb,struct ocfs2_xattr_header * xh,int index,int * block_off,int * new_offset)4058 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
4059 					     struct ocfs2_xattr_header *xh,
4060 					     int index,
4061 					     int *block_off,
4062 					     int *new_offset)
4063 {
4064 	u16 name_offset;
4065 
4066 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4067 		return -EINVAL;
4068 
4069 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4070 
4071 	*block_off = name_offset >> sb->s_blocksize_bits;
4072 	*new_offset = name_offset % sb->s_blocksize;
4073 
4074 	return 0;
4075 }
4076 
ocfs2_list_xattr_bucket(struct inode * inode,struct ocfs2_xattr_bucket * bucket,void * para)4077 static int ocfs2_list_xattr_bucket(struct inode *inode,
4078 				   struct ocfs2_xattr_bucket *bucket,
4079 				   void *para)
4080 {
4081 	int ret = 0, type;
4082 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4083 	int i, block_off, new_offset;
4084 	const char *name;
4085 
4086 	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4087 		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4088 		type = ocfs2_xattr_get_type(entry);
4089 
4090 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4091 							bucket_xh(bucket),
4092 							i,
4093 							&block_off,
4094 							&new_offset);
4095 		if (ret)
4096 			break;
4097 
4098 		name = (const char *)bucket_block(bucket, block_off) +
4099 			new_offset;
4100 		ret = ocfs2_xattr_list_entry(inode->i_sb,
4101 					     xl->buffer,
4102 					     xl->buffer_size,
4103 					     &xl->result,
4104 					     type, name,
4105 					     entry->xe_name_len);
4106 		if (ret)
4107 			break;
4108 	}
4109 
4110 	return ret;
4111 }
4112 
ocfs2_iterate_xattr_index_block(struct inode * inode,struct buffer_head * blk_bh,xattr_tree_rec_func * rec_func,void * para)4113 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4114 					   struct buffer_head *blk_bh,
4115 					   xattr_tree_rec_func *rec_func,
4116 					   void *para)
4117 {
4118 	struct ocfs2_xattr_block *xb =
4119 			(struct ocfs2_xattr_block *)blk_bh->b_data;
4120 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4121 	int ret = 0;
4122 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4123 	u64 p_blkno = 0;
4124 
4125 	if (!el->l_next_free_rec || !rec_func)
4126 		return 0;
4127 
4128 	while (name_hash > 0) {
4129 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4130 					  &e_cpos, &num_clusters, el);
4131 		if (ret) {
4132 			mlog_errno(ret);
4133 			break;
4134 		}
4135 
4136 		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4137 			       num_clusters, para);
4138 		if (ret) {
4139 			if (ret != -ERANGE)
4140 				mlog_errno(ret);
4141 			break;
4142 		}
4143 
4144 		if (e_cpos == 0)
4145 			break;
4146 
4147 		name_hash = e_cpos - 1;
4148 	}
4149 
4150 	return ret;
4151 
4152 }
4153 
ocfs2_list_xattr_tree_rec(struct inode * inode,struct buffer_head * root_bh,u64 blkno,u32 cpos,u32 len,void * para)4154 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4155 				     struct buffer_head *root_bh,
4156 				     u64 blkno, u32 cpos, u32 len, void *para)
4157 {
4158 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4159 					   ocfs2_list_xattr_bucket, para);
4160 }
4161 
ocfs2_xattr_tree_list_index_block(struct inode * inode,struct buffer_head * blk_bh,char * buffer,size_t buffer_size)4162 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4163 					     struct buffer_head *blk_bh,
4164 					     char *buffer,
4165 					     size_t buffer_size)
4166 {
4167 	int ret;
4168 	struct ocfs2_xattr_tree_list xl = {
4169 		.buffer = buffer,
4170 		.buffer_size = buffer_size,
4171 		.result = 0,
4172 	};
4173 
4174 	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4175 					      ocfs2_list_xattr_tree_rec, &xl);
4176 	if (ret) {
4177 		mlog_errno(ret);
4178 		goto out;
4179 	}
4180 
4181 	ret = xl.result;
4182 out:
4183 	return ret;
4184 }
4185 
cmp_xe(const void * a,const void * b)4186 static int cmp_xe(const void *a, const void *b)
4187 {
4188 	const struct ocfs2_xattr_entry *l = a, *r = b;
4189 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
4190 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
4191 
4192 	if (l_hash > r_hash)
4193 		return 1;
4194 	if (l_hash < r_hash)
4195 		return -1;
4196 	return 0;
4197 }
4198 
4199 /*
4200  * When the ocfs2_xattr_block is filled up, new bucket will be created
4201  * and all the xattr entries will be moved to the new bucket.
4202  * The header goes at the start of the bucket, and the names+values are
4203  * filled from the end.  This is why *target starts as the last buffer.
4204  * Note: we need to sort the entries since they are not saved in order
4205  * in the ocfs2_xattr_block.
4206  */
ocfs2_cp_xattr_block_to_bucket(struct inode * inode,struct buffer_head * xb_bh,struct ocfs2_xattr_bucket * bucket)4207 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4208 					   struct buffer_head *xb_bh,
4209 					   struct ocfs2_xattr_bucket *bucket)
4210 {
4211 	int i, blocksize = inode->i_sb->s_blocksize;
4212 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4213 	u16 offset, size, off_change;
4214 	struct ocfs2_xattr_entry *xe;
4215 	struct ocfs2_xattr_block *xb =
4216 				(struct ocfs2_xattr_block *)xb_bh->b_data;
4217 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4218 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4219 	u16 count = le16_to_cpu(xb_xh->xh_count);
4220 	char *src = xb_bh->b_data;
4221 	char *target = bucket_block(bucket, blks - 1);
4222 
4223 	trace_ocfs2_cp_xattr_block_to_bucket_begin(
4224 				(unsigned long long)xb_bh->b_blocknr,
4225 				(unsigned long long)bucket_blkno(bucket));
4226 
4227 	for (i = 0; i < blks; i++)
4228 		memset(bucket_block(bucket, i), 0, blocksize);
4229 
4230 	/*
4231 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
4232 	 * there is a offset change corresponding to the change of
4233 	 * ocfs2_xattr_header's position.
4234 	 */
4235 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4236 	xe = &xb_xh->xh_entries[count - 1];
4237 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4238 	size = blocksize - offset;
4239 
4240 	/* copy all the names and values. */
4241 	memcpy(target + offset, src + offset, size);
4242 
4243 	/* Init new header now. */
4244 	xh->xh_count = xb_xh->xh_count;
4245 	xh->xh_num_buckets = cpu_to_le16(1);
4246 	xh->xh_name_value_len = cpu_to_le16(size);
4247 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4248 
4249 	/* copy all the entries. */
4250 	target = bucket_block(bucket, 0);
4251 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4252 	size = count * sizeof(struct ocfs2_xattr_entry);
4253 	memcpy(target + offset, (char *)xb_xh + offset, size);
4254 
4255 	/* Change the xe offset for all the xe because of the move. */
4256 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4257 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4258 	for (i = 0; i < count; i++)
4259 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4260 
4261 	trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4262 
4263 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4264 	     cmp_xe, NULL);
4265 }
4266 
4267 /*
4268  * After we move xattr from block to index btree, we have to
4269  * update ocfs2_xattr_search to the new xe and base.
4270  *
4271  * When the entry is in xattr block, xattr_bh indicates the storage place.
4272  * While if the entry is in index b-tree, "bucket" indicates the
4273  * real place of the xattr.
4274  */
ocfs2_xattr_update_xattr_search(struct inode * inode,struct ocfs2_xattr_search * xs,struct buffer_head * old_bh)4275 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4276 					    struct ocfs2_xattr_search *xs,
4277 					    struct buffer_head *old_bh)
4278 {
4279 	char *buf = old_bh->b_data;
4280 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4281 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4282 	int i;
4283 
4284 	xs->header = bucket_xh(xs->bucket);
4285 	xs->base = bucket_block(xs->bucket, 0);
4286 	xs->end = xs->base + inode->i_sb->s_blocksize;
4287 
4288 	if (xs->not_found)
4289 		return;
4290 
4291 	i = xs->here - old_xh->xh_entries;
4292 	xs->here = &xs->header->xh_entries[i];
4293 }
4294 
ocfs2_xattr_create_index_block(struct inode * inode,struct ocfs2_xattr_search * xs,struct ocfs2_xattr_set_ctxt * ctxt)4295 static int ocfs2_xattr_create_index_block(struct inode *inode,
4296 					  struct ocfs2_xattr_search *xs,
4297 					  struct ocfs2_xattr_set_ctxt *ctxt)
4298 {
4299 	int ret;
4300 	u32 bit_off, len;
4301 	u64 blkno;
4302 	handle_t *handle = ctxt->handle;
4303 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
4304 	struct buffer_head *xb_bh = xs->xattr_bh;
4305 	struct ocfs2_xattr_block *xb =
4306 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4307 	struct ocfs2_xattr_tree_root *xr;
4308 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
4309 
4310 	trace_ocfs2_xattr_create_index_block_begin(
4311 				(unsigned long long)xb_bh->b_blocknr);
4312 
4313 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4314 	BUG_ON(!xs->bucket);
4315 
4316 	/*
4317 	 * XXX:
4318 	 * We can use this lock for now, and maybe move to a dedicated mutex
4319 	 * if performance becomes a problem later.
4320 	 */
4321 	down_write(&oi->ip_alloc_sem);
4322 
4323 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4324 				      OCFS2_JOURNAL_ACCESS_WRITE);
4325 	if (ret) {
4326 		mlog_errno(ret);
4327 		goto out;
4328 	}
4329 
4330 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4331 				     1, 1, &bit_off, &len);
4332 	if (ret) {
4333 		mlog_errno(ret);
4334 		goto out;
4335 	}
4336 
4337 	/*
4338 	 * The bucket may spread in many blocks, and
4339 	 * we will only touch the 1st block and the last block
4340 	 * in the whole bucket(one for entry and one for data).
4341 	 */
4342 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4343 
4344 	trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4345 
4346 	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
4347 	if (ret) {
4348 		mlog_errno(ret);
4349 		goto out;
4350 	}
4351 
4352 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4353 						OCFS2_JOURNAL_ACCESS_CREATE);
4354 	if (ret) {
4355 		mlog_errno(ret);
4356 		goto out;
4357 	}
4358 
4359 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4360 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4361 
4362 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4363 
4364 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4365 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4366 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
4367 
4368 	xr = &xb->xb_attrs.xb_root;
4369 	xr->xt_clusters = cpu_to_le32(1);
4370 	xr->xt_last_eb_blk = 0;
4371 	xr->xt_list.l_tree_depth = 0;
4372 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4373 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4374 
4375 	xr->xt_list.l_recs[0].e_cpos = 0;
4376 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4377 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4378 
4379 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4380 
4381 	ocfs2_journal_dirty(handle, xb_bh);
4382 
4383 out:
4384 	up_write(&oi->ip_alloc_sem);
4385 
4386 	return ret;
4387 }
4388 
cmp_xe_offset(const void * a,const void * b)4389 static int cmp_xe_offset(const void *a, const void *b)
4390 {
4391 	const struct ocfs2_xattr_entry *l = a, *r = b;
4392 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4393 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4394 
4395 	if (l_name_offset < r_name_offset)
4396 		return 1;
4397 	if (l_name_offset > r_name_offset)
4398 		return -1;
4399 	return 0;
4400 }
4401 
4402 /*
4403  * defrag a xattr bucket if we find that the bucket has some
4404  * holes between name/value pairs.
4405  * We will move all the name/value pairs to the end of the bucket
4406  * so that we can spare some space for insertion.
4407  */
ocfs2_defrag_xattr_bucket(struct inode * inode,handle_t * handle,struct ocfs2_xattr_bucket * bucket)4408 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4409 				     handle_t *handle,
4410 				     struct ocfs2_xattr_bucket *bucket)
4411 {
4412 	int ret, i;
4413 	size_t end, offset, len;
4414 	struct ocfs2_xattr_header *xh;
4415 	char *entries, *buf, *bucket_buf = NULL;
4416 	u64 blkno = bucket_blkno(bucket);
4417 	u16 xh_free_start;
4418 	size_t blocksize = inode->i_sb->s_blocksize;
4419 	struct ocfs2_xattr_entry *xe;
4420 
4421 	/*
4422 	 * In order to make the operation more efficient and generic,
4423 	 * we copy all the blocks into a contiguous memory and do the
4424 	 * defragment there, so if anything is error, we will not touch
4425 	 * the real block.
4426 	 */
4427 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4428 	if (!bucket_buf) {
4429 		ret = -EIO;
4430 		goto out;
4431 	}
4432 
4433 	buf = bucket_buf;
4434 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4435 		memcpy(buf, bucket_block(bucket, i), blocksize);
4436 
4437 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4438 						OCFS2_JOURNAL_ACCESS_WRITE);
4439 	if (ret < 0) {
4440 		mlog_errno(ret);
4441 		goto out;
4442 	}
4443 
4444 	xh = (struct ocfs2_xattr_header *)bucket_buf;
4445 	entries = (char *)xh->xh_entries;
4446 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4447 
4448 	trace_ocfs2_defrag_xattr_bucket(
4449 	     (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4450 	     xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4451 
4452 	/*
4453 	 * sort all the entries by their offset.
4454 	 * the largest will be the first, so that we can
4455 	 * move them to the end one by one.
4456 	 */
4457 	sort(entries, le16_to_cpu(xh->xh_count),
4458 	     sizeof(struct ocfs2_xattr_entry),
4459 	     cmp_xe_offset, NULL);
4460 
4461 	/* Move all name/values to the end of the bucket. */
4462 	xe = xh->xh_entries;
4463 	end = OCFS2_XATTR_BUCKET_SIZE;
4464 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4465 		offset = le16_to_cpu(xe->xe_name_offset);
4466 		len = namevalue_size_xe(xe);
4467 
4468 		/*
4469 		 * We must make sure that the name/value pair
4470 		 * exist in the same block. So adjust end to
4471 		 * the previous block end if needed.
4472 		 */
4473 		if (((end - len) / blocksize !=
4474 			(end - 1) / blocksize))
4475 			end = end - end % blocksize;
4476 
4477 		if (end > offset + len) {
4478 			memmove(bucket_buf + end - len,
4479 				bucket_buf + offset, len);
4480 			xe->xe_name_offset = cpu_to_le16(end - len);
4481 		}
4482 
4483 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4484 				"bucket %llu\n", (unsigned long long)blkno);
4485 
4486 		end -= len;
4487 	}
4488 
4489 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4490 			"bucket %llu\n", (unsigned long long)blkno);
4491 
4492 	if (xh_free_start == end)
4493 		goto out;
4494 
4495 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4496 	xh->xh_free_start = cpu_to_le16(end);
4497 
4498 	/* sort the entries by their name_hash. */
4499 	sort(entries, le16_to_cpu(xh->xh_count),
4500 	     sizeof(struct ocfs2_xattr_entry),
4501 	     cmp_xe, NULL);
4502 
4503 	buf = bucket_buf;
4504 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4505 		memcpy(bucket_block(bucket, i), buf, blocksize);
4506 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4507 
4508 out:
4509 	kfree(bucket_buf);
4510 	return ret;
4511 }
4512 
4513 /*
4514  * prev_blkno points to the start of an existing extent.  new_blkno
4515  * points to a newly allocated extent.  Because we know each of our
4516  * clusters contains more than bucket, we can easily split one cluster
4517  * at a bucket boundary.  So we take the last cluster of the existing
4518  * extent and split it down the middle.  We move the last half of the
4519  * buckets in the last cluster of the existing extent over to the new
4520  * extent.
4521  *
4522  * first_bh is the buffer at prev_blkno so we can update the existing
4523  * extent's bucket count.  header_bh is the bucket were we were hoping
4524  * to insert our xattr.  If the bucket move places the target in the new
4525  * extent, we'll update first_bh and header_bh after modifying the old
4526  * extent.
4527  *
4528  * first_hash will be set as the 1st xe's name_hash in the new extent.
4529  */
ocfs2_mv_xattr_bucket_cross_cluster(struct inode * inode,handle_t * handle,struct ocfs2_xattr_bucket * first,struct ocfs2_xattr_bucket * target,u64 new_blkno,u32 num_clusters,u32 * first_hash)4530 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4531 					       handle_t *handle,
4532 					       struct ocfs2_xattr_bucket *first,
4533 					       struct ocfs2_xattr_bucket *target,
4534 					       u64 new_blkno,
4535 					       u32 num_clusters,
4536 					       u32 *first_hash)
4537 {
4538 	int ret;
4539 	struct super_block *sb = inode->i_sb;
4540 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4541 	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4542 	int to_move = num_buckets / 2;
4543 	u64 src_blkno;
4544 	u64 last_cluster_blkno = bucket_blkno(first) +
4545 		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4546 
4547 	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4548 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4549 
4550 	trace_ocfs2_mv_xattr_bucket_cross_cluster(
4551 				(unsigned long long)last_cluster_blkno,
4552 				(unsigned long long)new_blkno);
4553 
4554 	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4555 				     last_cluster_blkno, new_blkno,
4556 				     to_move, first_hash);
4557 	if (ret) {
4558 		mlog_errno(ret);
4559 		goto out;
4560 	}
4561 
4562 	/* This is the first bucket that got moved */
4563 	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4564 
4565 	/*
4566 	 * If the target bucket was part of the moved buckets, we need to
4567 	 * update first and target.
4568 	 */
4569 	if (bucket_blkno(target) >= src_blkno) {
4570 		/* Find the block for the new target bucket */
4571 		src_blkno = new_blkno +
4572 			(bucket_blkno(target) - src_blkno);
4573 
4574 		ocfs2_xattr_bucket_relse(first);
4575 		ocfs2_xattr_bucket_relse(target);
4576 
4577 		/*
4578 		 * These shouldn't fail - the buffers are in the
4579 		 * journal from ocfs2_cp_xattr_bucket().
4580 		 */
4581 		ret = ocfs2_read_xattr_bucket(first, new_blkno);
4582 		if (ret) {
4583 			mlog_errno(ret);
4584 			goto out;
4585 		}
4586 		ret = ocfs2_read_xattr_bucket(target, src_blkno);
4587 		if (ret)
4588 			mlog_errno(ret);
4589 
4590 	}
4591 
4592 out:
4593 	return ret;
4594 }
4595 
4596 /*
4597  * Find the suitable pos when we divide a bucket into 2.
4598  * We have to make sure the xattrs with the same hash value exist
4599  * in the same bucket.
4600  *
4601  * If this ocfs2_xattr_header covers more than one hash value, find a
4602  * place where the hash value changes.  Try to find the most even split.
4603  * The most common case is that all entries have different hash values,
4604  * and the first check we make will find a place to split.
4605  */
ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header * xh)4606 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4607 {
4608 	struct ocfs2_xattr_entry *entries = xh->xh_entries;
4609 	int count = le16_to_cpu(xh->xh_count);
4610 	int delta, middle = count / 2;
4611 
4612 	/*
4613 	 * We start at the middle.  Each step gets farther away in both
4614 	 * directions.  We therefore hit the change in hash value
4615 	 * nearest to the middle.  Note that this loop does not execute for
4616 	 * count < 2.
4617 	 */
4618 	for (delta = 0; delta < middle; delta++) {
4619 		/* Let's check delta earlier than middle */
4620 		if (cmp_xe(&entries[middle - delta - 1],
4621 			   &entries[middle - delta]))
4622 			return middle - delta;
4623 
4624 		/* For even counts, don't walk off the end */
4625 		if ((middle + delta + 1) == count)
4626 			continue;
4627 
4628 		/* Now try delta past middle */
4629 		if (cmp_xe(&entries[middle + delta],
4630 			   &entries[middle + delta + 1]))
4631 			return middle + delta + 1;
4632 	}
4633 
4634 	/* Every entry had the same hash */
4635 	return count;
4636 }
4637 
4638 /*
4639  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4640  * first_hash will record the 1st hash of the new bucket.
4641  *
4642  * Normally half of the xattrs will be moved.  But we have to make
4643  * sure that the xattrs with the same hash value are stored in the
4644  * same bucket. If all the xattrs in this bucket have the same hash
4645  * value, the new bucket will be initialized as an empty one and the
4646  * first_hash will be initialized as (hash_value+1).
4647  */
ocfs2_divide_xattr_bucket(struct inode * inode,handle_t * handle,u64 blk,u64 new_blk,u32 * first_hash,int new_bucket_head)4648 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4649 				    handle_t *handle,
4650 				    u64 blk,
4651 				    u64 new_blk,
4652 				    u32 *first_hash,
4653 				    int new_bucket_head)
4654 {
4655 	int ret, i;
4656 	int count, start, len, name_value_len = 0, name_offset = 0;
4657 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4658 	struct ocfs2_xattr_header *xh;
4659 	struct ocfs2_xattr_entry *xe;
4660 	int blocksize = inode->i_sb->s_blocksize;
4661 
4662 	trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4663 					      (unsigned long long)new_blk);
4664 
4665 	s_bucket = ocfs2_xattr_bucket_new(inode);
4666 	t_bucket = ocfs2_xattr_bucket_new(inode);
4667 	if (!s_bucket || !t_bucket) {
4668 		ret = -ENOMEM;
4669 		mlog_errno(ret);
4670 		goto out;
4671 	}
4672 
4673 	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4674 	if (ret) {
4675 		mlog_errno(ret);
4676 		goto out;
4677 	}
4678 
4679 	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4680 						OCFS2_JOURNAL_ACCESS_WRITE);
4681 	if (ret) {
4682 		mlog_errno(ret);
4683 		goto out;
4684 	}
4685 
4686 	/*
4687 	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4688 	 * there's no need to read it.
4689 	 */
4690 	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
4691 	if (ret) {
4692 		mlog_errno(ret);
4693 		goto out;
4694 	}
4695 
4696 	/*
4697 	 * Hey, if we're overwriting t_bucket, what difference does
4698 	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4699 	 * same part of ocfs2_cp_xattr_bucket().
4700 	 */
4701 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4702 						new_bucket_head ?
4703 						OCFS2_JOURNAL_ACCESS_CREATE :
4704 						OCFS2_JOURNAL_ACCESS_WRITE);
4705 	if (ret) {
4706 		mlog_errno(ret);
4707 		goto out;
4708 	}
4709 
4710 	xh = bucket_xh(s_bucket);
4711 	count = le16_to_cpu(xh->xh_count);
4712 	start = ocfs2_xattr_find_divide_pos(xh);
4713 
4714 	if (start == count) {
4715 		xe = &xh->xh_entries[start-1];
4716 
4717 		/*
4718 		 * initialized a new empty bucket here.
4719 		 * The hash value is set as one larger than
4720 		 * that of the last entry in the previous bucket.
4721 		 */
4722 		for (i = 0; i < t_bucket->bu_blocks; i++)
4723 			memset(bucket_block(t_bucket, i), 0, blocksize);
4724 
4725 		xh = bucket_xh(t_bucket);
4726 		xh->xh_free_start = cpu_to_le16(blocksize);
4727 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4728 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4729 
4730 		goto set_num_buckets;
4731 	}
4732 
4733 	/* copy the whole bucket to the new first. */
4734 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4735 
4736 	/* update the new bucket. */
4737 	xh = bucket_xh(t_bucket);
4738 
4739 	/*
4740 	 * Calculate the total name/value len and xh_free_start for
4741 	 * the old bucket first.
4742 	 */
4743 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
4744 	name_value_len = 0;
4745 	for (i = 0; i < start; i++) {
4746 		xe = &xh->xh_entries[i];
4747 		name_value_len += namevalue_size_xe(xe);
4748 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4749 			name_offset = le16_to_cpu(xe->xe_name_offset);
4750 	}
4751 
4752 	/*
4753 	 * Now begin the modification to the new bucket.
4754 	 *
4755 	 * In the new bucket, We just move the xattr entry to the beginning
4756 	 * and don't touch the name/value. So there will be some holes in the
4757 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4758 	 * called.
4759 	 */
4760 	xe = &xh->xh_entries[start];
4761 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4762 	trace_ocfs2_divide_xattr_bucket_move(len,
4763 			(int)((char *)xe - (char *)xh),
4764 			(int)((char *)xh->xh_entries - (char *)xh));
4765 	memmove((char *)xh->xh_entries, (char *)xe, len);
4766 	xe = &xh->xh_entries[count - start];
4767 	len = sizeof(struct ocfs2_xattr_entry) * start;
4768 	memset((char *)xe, 0, len);
4769 
4770 	le16_add_cpu(&xh->xh_count, -start);
4771 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4772 
4773 	/* Calculate xh_free_start for the new bucket. */
4774 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4775 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4776 		xe = &xh->xh_entries[i];
4777 		if (le16_to_cpu(xe->xe_name_offset) <
4778 		    le16_to_cpu(xh->xh_free_start))
4779 			xh->xh_free_start = xe->xe_name_offset;
4780 	}
4781 
4782 set_num_buckets:
4783 	/* set xh->xh_num_buckets for the new xh. */
4784 	if (new_bucket_head)
4785 		xh->xh_num_buckets = cpu_to_le16(1);
4786 	else
4787 		xh->xh_num_buckets = 0;
4788 
4789 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4790 
4791 	/* store the first_hash of the new bucket. */
4792 	if (first_hash)
4793 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4794 
4795 	/*
4796 	 * Now only update the 1st block of the old bucket.  If we
4797 	 * just added a new empty bucket, there is no need to modify
4798 	 * it.
4799 	 */
4800 	if (start == count)
4801 		goto out;
4802 
4803 	xh = bucket_xh(s_bucket);
4804 	memset(&xh->xh_entries[start], 0,
4805 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
4806 	xh->xh_count = cpu_to_le16(start);
4807 	xh->xh_free_start = cpu_to_le16(name_offset);
4808 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
4809 
4810 	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4811 
4812 out:
4813 	ocfs2_xattr_bucket_free(s_bucket);
4814 	ocfs2_xattr_bucket_free(t_bucket);
4815 
4816 	return ret;
4817 }
4818 
4819 /*
4820  * Copy xattr from one bucket to another bucket.
4821  *
4822  * The caller must make sure that the journal transaction
4823  * has enough space for journaling.
4824  */
ocfs2_cp_xattr_bucket(struct inode * inode,handle_t * handle,u64 s_blkno,u64 t_blkno,int t_is_new)4825 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4826 				 handle_t *handle,
4827 				 u64 s_blkno,
4828 				 u64 t_blkno,
4829 				 int t_is_new)
4830 {
4831 	int ret;
4832 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4833 
4834 	BUG_ON(s_blkno == t_blkno);
4835 
4836 	trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4837 				    (unsigned long long)t_blkno,
4838 				    t_is_new);
4839 
4840 	s_bucket = ocfs2_xattr_bucket_new(inode);
4841 	t_bucket = ocfs2_xattr_bucket_new(inode);
4842 	if (!s_bucket || !t_bucket) {
4843 		ret = -ENOMEM;
4844 		mlog_errno(ret);
4845 		goto out;
4846 	}
4847 
4848 	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4849 	if (ret)
4850 		goto out;
4851 
4852 	/*
4853 	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4854 	 * there's no need to read it.
4855 	 */
4856 	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
4857 	if (ret)
4858 		goto out;
4859 
4860 	/*
4861 	 * Hey, if we're overwriting t_bucket, what difference does
4862 	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4863 	 * cluster to fill, we came here from
4864 	 * ocfs2_mv_xattr_buckets(), and it is really new -
4865 	 * ACCESS_CREATE is required.  But we also might have moved data
4866 	 * out of t_bucket before extending back into it.
4867 	 * ocfs2_add_new_xattr_bucket() can do this - its call to
4868 	 * ocfs2_add_new_xattr_cluster() may have created a new extent
4869 	 * and copied out the end of the old extent.  Then it re-extends
4870 	 * the old extent back to create space for new xattrs.  That's
4871 	 * how we get here, and the bucket isn't really new.
4872 	 */
4873 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4874 						t_is_new ?
4875 						OCFS2_JOURNAL_ACCESS_CREATE :
4876 						OCFS2_JOURNAL_ACCESS_WRITE);
4877 	if (ret)
4878 		goto out;
4879 
4880 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4881 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4882 
4883 out:
4884 	ocfs2_xattr_bucket_free(t_bucket);
4885 	ocfs2_xattr_bucket_free(s_bucket);
4886 
4887 	return ret;
4888 }
4889 
4890 /*
4891  * src_blk points to the start of an existing extent.  last_blk points to
4892  * last cluster in that extent.  to_blk points to a newly allocated
4893  * extent.  We copy the buckets from the cluster at last_blk to the new
4894  * extent.  If start_bucket is non-zero, we skip that many buckets before
4895  * we start copying.  The new extent's xh_num_buckets gets set to the
4896  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4897  * by the same amount.
4898  */
ocfs2_mv_xattr_buckets(struct inode * inode,handle_t * handle,u64 src_blk,u64 last_blk,u64 to_blk,unsigned int start_bucket,u32 * first_hash)4899 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4900 				  u64 src_blk, u64 last_blk, u64 to_blk,
4901 				  unsigned int start_bucket,
4902 				  u32 *first_hash)
4903 {
4904 	int i, ret, credits;
4905 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4906 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4907 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4908 	struct ocfs2_xattr_bucket *old_first, *new_first;
4909 
4910 	trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4911 				     (unsigned long long)to_blk);
4912 
4913 	BUG_ON(start_bucket >= num_buckets);
4914 	if (start_bucket) {
4915 		num_buckets -= start_bucket;
4916 		last_blk += (start_bucket * blks_per_bucket);
4917 	}
4918 
4919 	/* The first bucket of the original extent */
4920 	old_first = ocfs2_xattr_bucket_new(inode);
4921 	/* The first bucket of the new extent */
4922 	new_first = ocfs2_xattr_bucket_new(inode);
4923 	if (!old_first || !new_first) {
4924 		ret = -ENOMEM;
4925 		mlog_errno(ret);
4926 		goto out;
4927 	}
4928 
4929 	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4930 	if (ret) {
4931 		mlog_errno(ret);
4932 		goto out;
4933 	}
4934 
4935 	/*
4936 	 * We need to update the first bucket of the old extent and all
4937 	 * the buckets going to the new extent.
4938 	 */
4939 	credits = ((num_buckets + 1) * blks_per_bucket);
4940 	ret = ocfs2_extend_trans(handle, credits);
4941 	if (ret) {
4942 		mlog_errno(ret);
4943 		goto out;
4944 	}
4945 
4946 	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4947 						OCFS2_JOURNAL_ACCESS_WRITE);
4948 	if (ret) {
4949 		mlog_errno(ret);
4950 		goto out;
4951 	}
4952 
4953 	for (i = 0; i < num_buckets; i++) {
4954 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4955 					    last_blk + (i * blks_per_bucket),
4956 					    to_blk + (i * blks_per_bucket),
4957 					    1);
4958 		if (ret) {
4959 			mlog_errno(ret);
4960 			goto out;
4961 		}
4962 	}
4963 
4964 	/*
4965 	 * Get the new bucket ready before we dirty anything
4966 	 * (This actually shouldn't fail, because we already dirtied
4967 	 * it once in ocfs2_cp_xattr_bucket()).
4968 	 */
4969 	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4970 	if (ret) {
4971 		mlog_errno(ret);
4972 		goto out;
4973 	}
4974 	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4975 						OCFS2_JOURNAL_ACCESS_WRITE);
4976 	if (ret) {
4977 		mlog_errno(ret);
4978 		goto out;
4979 	}
4980 
4981 	/* Now update the headers */
4982 	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4983 	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4984 
4985 	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4986 	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4987 
4988 	if (first_hash)
4989 		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4990 
4991 out:
4992 	ocfs2_xattr_bucket_free(new_first);
4993 	ocfs2_xattr_bucket_free(old_first);
4994 	return ret;
4995 }
4996 
4997 /*
4998  * Move some xattrs in this cluster to the new cluster.
4999  * This function should only be called when bucket size == cluster size.
5000  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
5001  */
ocfs2_divide_xattr_cluster(struct inode * inode,handle_t * handle,u64 prev_blk,u64 new_blk,u32 * first_hash)5002 static int ocfs2_divide_xattr_cluster(struct inode *inode,
5003 				      handle_t *handle,
5004 				      u64 prev_blk,
5005 				      u64 new_blk,
5006 				      u32 *first_hash)
5007 {
5008 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5009 	int ret, credits = 2 * blk_per_bucket;
5010 
5011 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
5012 
5013 	ret = ocfs2_extend_trans(handle, credits);
5014 	if (ret) {
5015 		mlog_errno(ret);
5016 		return ret;
5017 	}
5018 
5019 	/* Move half of the xattr in start_blk to the next bucket. */
5020 	return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
5021 					  new_blk, first_hash, 1);
5022 }
5023 
5024 /*
5025  * Move some xattrs from the old cluster to the new one since they are not
5026  * contiguous in ocfs2 xattr tree.
5027  *
5028  * new_blk starts a new separate cluster, and we will move some xattrs from
5029  * prev_blk to it. v_start will be set as the first name hash value in this
5030  * new cluster so that it can be used as e_cpos during tree insertion and
5031  * don't collide with our original b-tree operations. first_bh and header_bh
5032  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
5033  * to extend the insert bucket.
5034  *
5035  * The problem is how much xattr should we move to the new one and when should
5036  * we update first_bh and header_bh?
5037  * 1. If cluster size > bucket size, that means the previous cluster has more
5038  *    than 1 bucket, so just move half nums of bucket into the new cluster and
5039  *    update the first_bh and header_bh if the insert bucket has been moved
5040  *    to the new cluster.
5041  * 2. If cluster_size == bucket_size:
5042  *    a) If the previous extent rec has more than one cluster and the insert
5043  *       place isn't in the last cluster, copy the entire last cluster to the
5044  *       new one. This time, we don't need to update the first_bh and header_bh
5045  *       since they will not be moved into the new cluster.
5046  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
5047  *       the new one. And we set the extend flag to zero if the insert place is
5048  *       moved into the new allocated cluster since no extend is needed.
5049  */
ocfs2_adjust_xattr_cross_cluster(struct inode * inode,handle_t * handle,struct ocfs2_xattr_bucket * first,struct ocfs2_xattr_bucket * target,u64 new_blk,u32 prev_clusters,u32 * v_start,int * extend)5050 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5051 					    handle_t *handle,
5052 					    struct ocfs2_xattr_bucket *first,
5053 					    struct ocfs2_xattr_bucket *target,
5054 					    u64 new_blk,
5055 					    u32 prev_clusters,
5056 					    u32 *v_start,
5057 					    int *extend)
5058 {
5059 	int ret;
5060 
5061 	trace_ocfs2_adjust_xattr_cross_cluster(
5062 			(unsigned long long)bucket_blkno(first),
5063 			(unsigned long long)new_blk, prev_clusters);
5064 
5065 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5066 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5067 							  handle,
5068 							  first, target,
5069 							  new_blk,
5070 							  prev_clusters,
5071 							  v_start);
5072 		if (ret)
5073 			mlog_errno(ret);
5074 	} else {
5075 		/* The start of the last cluster in the first extent */
5076 		u64 last_blk = bucket_blkno(first) +
5077 			((prev_clusters - 1) *
5078 			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
5079 
5080 		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5081 			ret = ocfs2_mv_xattr_buckets(inode, handle,
5082 						     bucket_blkno(first),
5083 						     last_blk, new_blk, 0,
5084 						     v_start);
5085 			if (ret)
5086 				mlog_errno(ret);
5087 		} else {
5088 			ret = ocfs2_divide_xattr_cluster(inode, handle,
5089 							 last_blk, new_blk,
5090 							 v_start);
5091 			if (ret)
5092 				mlog_errno(ret);
5093 
5094 			if ((bucket_blkno(target) == last_blk) && extend)
5095 				*extend = 0;
5096 		}
5097 	}
5098 
5099 	return ret;
5100 }
5101 
5102 /*
5103  * Add a new cluster for xattr storage.
5104  *
5105  * If the new cluster is contiguous with the previous one, it will be
5106  * appended to the same extent record, and num_clusters will be updated.
5107  * If not, we will insert a new extent for it and move some xattrs in
5108  * the last cluster into the new allocated one.
5109  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5110  * lose the benefits of hashing because we'll have to search large leaves.
5111  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5112  * if it's bigger).
5113  *
5114  * first_bh is the first block of the previous extent rec and header_bh
5115  * indicates the bucket we will insert the new xattrs. They will be updated
5116  * when the header_bh is moved into the new cluster.
5117  */
ocfs2_add_new_xattr_cluster(struct inode * inode,struct buffer_head * root_bh,struct ocfs2_xattr_bucket * first,struct ocfs2_xattr_bucket * target,u32 * num_clusters,u32 prev_cpos,int * extend,struct ocfs2_xattr_set_ctxt * ctxt)5118 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5119 				       struct buffer_head *root_bh,
5120 				       struct ocfs2_xattr_bucket *first,
5121 				       struct ocfs2_xattr_bucket *target,
5122 				       u32 *num_clusters,
5123 				       u32 prev_cpos,
5124 				       int *extend,
5125 				       struct ocfs2_xattr_set_ctxt *ctxt)
5126 {
5127 	int ret;
5128 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5129 	u32 prev_clusters = *num_clusters;
5130 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5131 	u64 block;
5132 	handle_t *handle = ctxt->handle;
5133 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5134 	struct ocfs2_extent_tree et;
5135 
5136 	trace_ocfs2_add_new_xattr_cluster_begin(
5137 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
5138 		(unsigned long long)bucket_blkno(first),
5139 		prev_cpos, prev_clusters);
5140 
5141 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5142 
5143 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5144 				      OCFS2_JOURNAL_ACCESS_WRITE);
5145 	if (ret < 0) {
5146 		mlog_errno(ret);
5147 		goto leave;
5148 	}
5149 
5150 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5151 				     clusters_to_add, &bit_off, &num_bits);
5152 	if (ret < 0) {
5153 		if (ret != -ENOSPC)
5154 			mlog_errno(ret);
5155 		goto leave;
5156 	}
5157 
5158 	BUG_ON(num_bits > clusters_to_add);
5159 
5160 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5161 	trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5162 
5163 	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5164 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5165 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5166 		/*
5167 		 * If this cluster is contiguous with the old one and
5168 		 * adding this new cluster, we don't surpass the limit of
5169 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5170 		 * initialized and used like other buckets in the previous
5171 		 * cluster.
5172 		 * So add it as a contiguous one. The caller will handle
5173 		 * its init process.
5174 		 */
5175 		v_start = prev_cpos + prev_clusters;
5176 		*num_clusters = prev_clusters + num_bits;
5177 	} else {
5178 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
5179 						       handle,
5180 						       first,
5181 						       target,
5182 						       block,
5183 						       prev_clusters,
5184 						       &v_start,
5185 						       extend);
5186 		if (ret) {
5187 			mlog_errno(ret);
5188 			goto leave;
5189 		}
5190 	}
5191 
5192 	trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5193 						 v_start, num_bits);
5194 	ret = ocfs2_insert_extent(handle, &et, v_start, block,
5195 				  num_bits, 0, ctxt->meta_ac);
5196 	if (ret < 0) {
5197 		mlog_errno(ret);
5198 		goto leave;
5199 	}
5200 
5201 	ocfs2_journal_dirty(handle, root_bh);
5202 
5203 leave:
5204 	return ret;
5205 }
5206 
5207 /*
5208  * We are given an extent.  'first' is the bucket at the very front of
5209  * the extent.  The extent has space for an additional bucket past
5210  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5211  * of the target bucket.  We wish to shift every bucket past the target
5212  * down one, filling in that additional space.  When we get back to the
5213  * target, we split the target between itself and the now-empty bucket
5214  * at target+1 (aka, target_blkno + blks_per_bucket).
5215  */
ocfs2_extend_xattr_bucket(struct inode * inode,handle_t * handle,struct ocfs2_xattr_bucket * first,u64 target_blk,u32 num_clusters)5216 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5217 				     handle_t *handle,
5218 				     struct ocfs2_xattr_bucket *first,
5219 				     u64 target_blk,
5220 				     u32 num_clusters)
5221 {
5222 	int ret, credits;
5223 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5224 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5225 	u64 end_blk;
5226 	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5227 
5228 	trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5229 					(unsigned long long)bucket_blkno(first),
5230 					num_clusters, new_bucket);
5231 
5232 	/* The extent must have room for an additional bucket */
5233 	BUG_ON(new_bucket >=
5234 	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5235 
5236 	/* end_blk points to the last existing bucket */
5237 	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5238 
5239 	/*
5240 	 * end_blk is the start of the last existing bucket.
5241 	 * Thus, (end_blk - target_blk) covers the target bucket and
5242 	 * every bucket after it up to, but not including, the last
5243 	 * existing bucket.  Then we add the last existing bucket, the
5244 	 * new bucket, and the first bucket (3 * blk_per_bucket).
5245 	 */
5246 	credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5247 	ret = ocfs2_extend_trans(handle, credits);
5248 	if (ret) {
5249 		mlog_errno(ret);
5250 		goto out;
5251 	}
5252 
5253 	ret = ocfs2_xattr_bucket_journal_access(handle, first,
5254 						OCFS2_JOURNAL_ACCESS_WRITE);
5255 	if (ret) {
5256 		mlog_errno(ret);
5257 		goto out;
5258 	}
5259 
5260 	while (end_blk != target_blk) {
5261 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5262 					    end_blk + blk_per_bucket, 0);
5263 		if (ret)
5264 			goto out;
5265 		end_blk -= blk_per_bucket;
5266 	}
5267 
5268 	/* Move half of the xattr in target_blkno to the next bucket. */
5269 	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5270 					target_blk + blk_per_bucket, NULL, 0);
5271 
5272 	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5273 	ocfs2_xattr_bucket_journal_dirty(handle, first);
5274 
5275 out:
5276 	return ret;
5277 }
5278 
5279 /*
5280  * Add new xattr bucket in an extent record and adjust the buckets
5281  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5282  * bucket we want to insert into.
5283  *
5284  * In the easy case, we will move all the buckets after target down by
5285  * one. Half of target's xattrs will be moved to the next bucket.
5286  *
5287  * If current cluster is full, we'll allocate a new one.  This may not
5288  * be contiguous.  The underlying calls will make sure that there is
5289  * space for the insert, shifting buckets around if necessary.
5290  * 'target' may be moved by those calls.
5291  */
ocfs2_add_new_xattr_bucket(struct inode * inode,struct buffer_head * xb_bh,struct ocfs2_xattr_bucket * target,struct ocfs2_xattr_set_ctxt * ctxt)5292 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5293 				      struct buffer_head *xb_bh,
5294 				      struct ocfs2_xattr_bucket *target,
5295 				      struct ocfs2_xattr_set_ctxt *ctxt)
5296 {
5297 	struct ocfs2_xattr_block *xb =
5298 			(struct ocfs2_xattr_block *)xb_bh->b_data;
5299 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5300 	struct ocfs2_extent_list *el = &xb_root->xt_list;
5301 	u32 name_hash =
5302 		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5303 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5304 	int ret, num_buckets, extend = 1;
5305 	u64 p_blkno;
5306 	u32 e_cpos, num_clusters;
5307 	/* The bucket at the front of the extent */
5308 	struct ocfs2_xattr_bucket *first;
5309 
5310 	trace_ocfs2_add_new_xattr_bucket(
5311 				(unsigned long long)bucket_blkno(target));
5312 
5313 	/* The first bucket of the original extent */
5314 	first = ocfs2_xattr_bucket_new(inode);
5315 	if (!first) {
5316 		ret = -ENOMEM;
5317 		mlog_errno(ret);
5318 		goto out;
5319 	}
5320 
5321 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5322 				  &num_clusters, el);
5323 	if (ret) {
5324 		mlog_errno(ret);
5325 		goto out;
5326 	}
5327 
5328 	ret = ocfs2_read_xattr_bucket(first, p_blkno);
5329 	if (ret) {
5330 		mlog_errno(ret);
5331 		goto out;
5332 	}
5333 
5334 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5335 	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5336 		/*
5337 		 * This can move first+target if the target bucket moves
5338 		 * to the new extent.
5339 		 */
5340 		ret = ocfs2_add_new_xattr_cluster(inode,
5341 						  xb_bh,
5342 						  first,
5343 						  target,
5344 						  &num_clusters,
5345 						  e_cpos,
5346 						  &extend,
5347 						  ctxt);
5348 		if (ret) {
5349 			mlog_errno(ret);
5350 			goto out;
5351 		}
5352 	}
5353 
5354 	if (extend) {
5355 		ret = ocfs2_extend_xattr_bucket(inode,
5356 						ctxt->handle,
5357 						first,
5358 						bucket_blkno(target),
5359 						num_clusters);
5360 		if (ret)
5361 			mlog_errno(ret);
5362 	}
5363 
5364 out:
5365 	ocfs2_xattr_bucket_free(first);
5366 
5367 	return ret;
5368 }
5369 
5370 /*
5371  * Truncate the specified xe_off entry in xattr bucket.
5372  * bucket is indicated by header_bh and len is the new length.
5373  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5374  *
5375  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5376  */
ocfs2_xattr_bucket_value_truncate(struct inode * inode,struct ocfs2_xattr_bucket * bucket,int xe_off,int len,struct ocfs2_xattr_set_ctxt * ctxt)5377 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5378 					     struct ocfs2_xattr_bucket *bucket,
5379 					     int xe_off,
5380 					     int len,
5381 					     struct ocfs2_xattr_set_ctxt *ctxt)
5382 {
5383 	int ret, offset;
5384 	u64 value_blk;
5385 	struct ocfs2_xattr_entry *xe;
5386 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5387 	size_t blocksize = inode->i_sb->s_blocksize;
5388 	struct ocfs2_xattr_value_buf vb = {
5389 		.vb_access = ocfs2_journal_access,
5390 	};
5391 
5392 	xe = &xh->xh_entries[xe_off];
5393 
5394 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5395 
5396 	offset = le16_to_cpu(xe->xe_name_offset) +
5397 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
5398 
5399 	value_blk = offset / blocksize;
5400 
5401 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
5402 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5403 
5404 	vb.vb_bh = bucket->bu_bhs[value_blk];
5405 	BUG_ON(!vb.vb_bh);
5406 
5407 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5408 		(vb.vb_bh->b_data + offset % blocksize);
5409 
5410 	/*
5411 	 * From here on out we have to dirty the bucket.  The generic
5412 	 * value calls only modify one of the bucket's bhs, but we need
5413 	 * to send the bucket at once.  So if they error, they *could* have
5414 	 * modified something.  We have to assume they did, and dirty
5415 	 * the whole bucket.  This leaves us in a consistent state.
5416 	 */
5417 	trace_ocfs2_xattr_bucket_value_truncate(
5418 			(unsigned long long)bucket_blkno(bucket), xe_off, len);
5419 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5420 	if (ret) {
5421 		mlog_errno(ret);
5422 		goto out;
5423 	}
5424 
5425 	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5426 						OCFS2_JOURNAL_ACCESS_WRITE);
5427 	if (ret) {
5428 		mlog_errno(ret);
5429 		goto out;
5430 	}
5431 
5432 	xe->xe_value_size = cpu_to_le64(len);
5433 
5434 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5435 
5436 out:
5437 	return ret;
5438 }
5439 
ocfs2_rm_xattr_cluster(struct inode * inode,struct buffer_head * root_bh,u64 blkno,u32 cpos,u32 len,void * para)5440 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5441 				  struct buffer_head *root_bh,
5442 				  u64 blkno,
5443 				  u32 cpos,
5444 				  u32 len,
5445 				  void *para)
5446 {
5447 	int ret;
5448 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5449 	struct inode *tl_inode = osb->osb_tl_inode;
5450 	handle_t *handle;
5451 	struct ocfs2_xattr_block *xb =
5452 			(struct ocfs2_xattr_block *)root_bh->b_data;
5453 	struct ocfs2_alloc_context *meta_ac = NULL;
5454 	struct ocfs2_cached_dealloc_ctxt dealloc;
5455 	struct ocfs2_extent_tree et;
5456 
5457 	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5458 					  ocfs2_delete_xattr_in_bucket, para);
5459 	if (ret) {
5460 		mlog_errno(ret);
5461 		return ret;
5462 	}
5463 
5464 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5465 
5466 	ocfs2_init_dealloc_ctxt(&dealloc);
5467 
5468 	trace_ocfs2_rm_xattr_cluster(
5469 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
5470 			(unsigned long long)blkno, cpos, len);
5471 
5472 	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5473 					       len);
5474 
5475 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5476 	if (ret) {
5477 		mlog_errno(ret);
5478 		return ret;
5479 	}
5480 
5481 	inode_lock(tl_inode);
5482 
5483 	if (ocfs2_truncate_log_needs_flush(osb)) {
5484 		ret = __ocfs2_flush_truncate_log(osb);
5485 		if (ret < 0) {
5486 			mlog_errno(ret);
5487 			goto out;
5488 		}
5489 	}
5490 
5491 	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5492 	if (IS_ERR(handle)) {
5493 		ret = -ENOMEM;
5494 		mlog_errno(ret);
5495 		goto out;
5496 	}
5497 
5498 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5499 				      OCFS2_JOURNAL_ACCESS_WRITE);
5500 	if (ret) {
5501 		mlog_errno(ret);
5502 		goto out_commit;
5503 	}
5504 
5505 	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5506 				  &dealloc);
5507 	if (ret) {
5508 		mlog_errno(ret);
5509 		goto out_commit;
5510 	}
5511 
5512 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5513 	ocfs2_journal_dirty(handle, root_bh);
5514 
5515 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5516 	if (ret)
5517 		mlog_errno(ret);
5518 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
5519 
5520 out_commit:
5521 	ocfs2_commit_trans(osb, handle);
5522 out:
5523 	ocfs2_schedule_truncate_log_flush(osb, 1);
5524 
5525 	inode_unlock(tl_inode);
5526 
5527 	if (meta_ac)
5528 		ocfs2_free_alloc_context(meta_ac);
5529 
5530 	ocfs2_run_deallocs(osb, &dealloc);
5531 
5532 	return ret;
5533 }
5534 
5535 /*
5536  * check whether the xattr bucket is filled up with the same hash value.
5537  * If we want to insert the xattr with the same hash, return -ENOSPC.
5538  * If we want to insert a xattr with different hash value, go ahead
5539  * and ocfs2_divide_xattr_bucket will handle this.
5540  */
ocfs2_check_xattr_bucket_collision(struct inode * inode,struct ocfs2_xattr_bucket * bucket,const char * name)5541 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5542 					      struct ocfs2_xattr_bucket *bucket,
5543 					      const char *name)
5544 {
5545 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5546 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5547 
5548 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5549 		return 0;
5550 
5551 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5552 	    xh->xh_entries[0].xe_name_hash) {
5553 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5554 		     "hash = %u\n",
5555 		     (unsigned long long)bucket_blkno(bucket),
5556 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5557 		return -ENOSPC;
5558 	}
5559 
5560 	return 0;
5561 }
5562 
5563 /*
5564  * Try to set the entry in the current bucket.  If we fail, the caller
5565  * will handle getting us another bucket.
5566  */
ocfs2_xattr_set_entry_bucket(struct inode * inode,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xs,struct ocfs2_xattr_set_ctxt * ctxt)5567 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5568 					struct ocfs2_xattr_info *xi,
5569 					struct ocfs2_xattr_search *xs,
5570 					struct ocfs2_xattr_set_ctxt *ctxt)
5571 {
5572 	int ret;
5573 	struct ocfs2_xa_loc loc;
5574 
5575 	trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5576 
5577 	ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5578 				       xs->not_found ? NULL : xs->here);
5579 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5580 	if (!ret) {
5581 		xs->here = loc.xl_entry;
5582 		goto out;
5583 	}
5584 	if (ret != -ENOSPC) {
5585 		mlog_errno(ret);
5586 		goto out;
5587 	}
5588 
5589 	/* Ok, we need space.  Let's try defragmenting the bucket. */
5590 	ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5591 					xs->bucket);
5592 	if (ret) {
5593 		mlog_errno(ret);
5594 		goto out;
5595 	}
5596 
5597 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5598 	if (!ret) {
5599 		xs->here = loc.xl_entry;
5600 		goto out;
5601 	}
5602 	if (ret != -ENOSPC)
5603 		mlog_errno(ret);
5604 
5605 
5606 out:
5607 	return ret;
5608 }
5609 
ocfs2_xattr_set_entry_index_block(struct inode * inode,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xs,struct ocfs2_xattr_set_ctxt * ctxt)5610 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5611 					     struct ocfs2_xattr_info *xi,
5612 					     struct ocfs2_xattr_search *xs,
5613 					     struct ocfs2_xattr_set_ctxt *ctxt)
5614 {
5615 	int ret;
5616 
5617 	trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5618 
5619 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5620 	if (!ret)
5621 		goto out;
5622 	if (ret != -ENOSPC) {
5623 		mlog_errno(ret);
5624 		goto out;
5625 	}
5626 
5627 	/* Ack, need more space.  Let's try to get another bucket! */
5628 
5629 	/*
5630 	 * We do not allow for overlapping ranges between buckets. And
5631 	 * the maximum number of collisions we will allow for then is
5632 	 * one bucket's worth, so check it here whether we need to
5633 	 * add a new bucket for the insert.
5634 	 */
5635 	ret = ocfs2_check_xattr_bucket_collision(inode,
5636 						 xs->bucket,
5637 						 xi->xi_name);
5638 	if (ret) {
5639 		mlog_errno(ret);
5640 		goto out;
5641 	}
5642 
5643 	ret = ocfs2_add_new_xattr_bucket(inode,
5644 					 xs->xattr_bh,
5645 					 xs->bucket,
5646 					 ctxt);
5647 	if (ret) {
5648 		mlog_errno(ret);
5649 		goto out;
5650 	}
5651 
5652 	/*
5653 	 * ocfs2_add_new_xattr_bucket() will have updated
5654 	 * xs->bucket if it moved, but it will not have updated
5655 	 * any of the other search fields.  Thus, we drop it and
5656 	 * re-search.  Everything should be cached, so it'll be
5657 	 * quick.
5658 	 */
5659 	ocfs2_xattr_bucket_relse(xs->bucket);
5660 	ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5661 					   xi->xi_name_index,
5662 					   xi->xi_name, xs);
5663 	if (ret && ret != -ENODATA)
5664 		goto out;
5665 	xs->not_found = ret;
5666 
5667 	/* Ok, we have a new bucket, let's try again */
5668 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5669 	if (ret && (ret != -ENOSPC))
5670 		mlog_errno(ret);
5671 
5672 out:
5673 	return ret;
5674 }
5675 
ocfs2_delete_xattr_in_bucket(struct inode * inode,struct ocfs2_xattr_bucket * bucket,void * para)5676 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5677 					struct ocfs2_xattr_bucket *bucket,
5678 					void *para)
5679 {
5680 	int ret = 0, ref_credits;
5681 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5682 	u16 i;
5683 	struct ocfs2_xattr_entry *xe;
5684 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5685 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5686 	int credits = ocfs2_remove_extent_credits(osb->sb) +
5687 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5688 	struct ocfs2_xattr_value_root *xv;
5689 	struct ocfs2_rm_xattr_bucket_para *args =
5690 			(struct ocfs2_rm_xattr_bucket_para *)para;
5691 
5692 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5693 
5694 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5695 		xe = &xh->xh_entries[i];
5696 		if (ocfs2_xattr_is_local(xe))
5697 			continue;
5698 
5699 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5700 						      i, &xv, NULL);
5701 		if (ret) {
5702 			mlog_errno(ret);
5703 			break;
5704 		}
5705 
5706 		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5707 							 args->ref_ci,
5708 							 args->ref_root_bh,
5709 							 &ctxt.meta_ac,
5710 							 &ref_credits);
5711 
5712 		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5713 		if (IS_ERR(ctxt.handle)) {
5714 			ret = PTR_ERR(ctxt.handle);
5715 			mlog_errno(ret);
5716 			break;
5717 		}
5718 
5719 		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5720 							i, 0, &ctxt);
5721 
5722 		ocfs2_commit_trans(osb, ctxt.handle);
5723 		if (ctxt.meta_ac) {
5724 			ocfs2_free_alloc_context(ctxt.meta_ac);
5725 			ctxt.meta_ac = NULL;
5726 		}
5727 		if (ret) {
5728 			mlog_errno(ret);
5729 			break;
5730 		}
5731 	}
5732 
5733 	if (ctxt.meta_ac)
5734 		ocfs2_free_alloc_context(ctxt.meta_ac);
5735 	ocfs2_schedule_truncate_log_flush(osb, 1);
5736 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
5737 	return ret;
5738 }
5739 
5740 /*
5741  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5742  * or change the extent record flag), we need to recalculate
5743  * the metaecc for the whole bucket. So it is done here.
5744  *
5745  * Note:
5746  * We have to give the extra credits for the caller.
5747  */
ocfs2_xattr_bucket_post_refcount(struct inode * inode,handle_t * handle,void * para)5748 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5749 					    handle_t *handle,
5750 					    void *para)
5751 {
5752 	int ret;
5753 	struct ocfs2_xattr_bucket *bucket =
5754 			(struct ocfs2_xattr_bucket *)para;
5755 
5756 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5757 						OCFS2_JOURNAL_ACCESS_WRITE);
5758 	if (ret) {
5759 		mlog_errno(ret);
5760 		return ret;
5761 	}
5762 
5763 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5764 
5765 	return 0;
5766 }
5767 
5768 /*
5769  * Special action we need if the xattr value is refcounted.
5770  *
5771  * 1. If the xattr is refcounted, lock the tree.
5772  * 2. CoW the xattr if we are setting the new value and the value
5773  *    will be stored outside.
5774  * 3. In other case, decrease_refcount will work for us, so just
5775  *    lock the refcount tree, calculate the meta and credits is OK.
5776  *
5777  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5778  * currently CoW is a completed transaction, while this function
5779  * will also lock the allocators and let us deadlock. So we will
5780  * CoW the whole xattr value.
5781  */
ocfs2_prepare_refcount_xattr(struct inode * inode,struct ocfs2_dinode * di,struct ocfs2_xattr_info * xi,struct ocfs2_xattr_search * xis,struct ocfs2_xattr_search * xbs,struct ocfs2_refcount_tree ** ref_tree,int * meta_add,int * credits)5782 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5783 					struct ocfs2_dinode *di,
5784 					struct ocfs2_xattr_info *xi,
5785 					struct ocfs2_xattr_search *xis,
5786 					struct ocfs2_xattr_search *xbs,
5787 					struct ocfs2_refcount_tree **ref_tree,
5788 					int *meta_add,
5789 					int *credits)
5790 {
5791 	int ret = 0;
5792 	struct ocfs2_xattr_block *xb;
5793 	struct ocfs2_xattr_entry *xe;
5794 	char *base;
5795 	u32 p_cluster, num_clusters;
5796 	unsigned int ext_flags;
5797 	int name_offset, name_len;
5798 	struct ocfs2_xattr_value_buf vb;
5799 	struct ocfs2_xattr_bucket *bucket = NULL;
5800 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5801 	struct ocfs2_post_refcount refcount;
5802 	struct ocfs2_post_refcount *p = NULL;
5803 	struct buffer_head *ref_root_bh = NULL;
5804 
5805 	if (!xis->not_found) {
5806 		xe = xis->here;
5807 		name_offset = le16_to_cpu(xe->xe_name_offset);
5808 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5809 		base = xis->base;
5810 		vb.vb_bh = xis->inode_bh;
5811 		vb.vb_access = ocfs2_journal_access_di;
5812 	} else {
5813 		int i, block_off = 0;
5814 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5815 		xe = xbs->here;
5816 		name_offset = le16_to_cpu(xe->xe_name_offset);
5817 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5818 		i = xbs->here - xbs->header->xh_entries;
5819 
5820 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5821 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5822 							bucket_xh(xbs->bucket),
5823 							i, &block_off,
5824 							&name_offset);
5825 			if (ret) {
5826 				mlog_errno(ret);
5827 				goto out;
5828 			}
5829 			base = bucket_block(xbs->bucket, block_off);
5830 			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5831 			vb.vb_access = ocfs2_journal_access;
5832 
5833 			if (ocfs2_meta_ecc(osb)) {
5834 				/*create parameters for ocfs2_post_refcount. */
5835 				bucket = xbs->bucket;
5836 				refcount.credits = bucket->bu_blocks;
5837 				refcount.para = bucket;
5838 				refcount.func =
5839 					ocfs2_xattr_bucket_post_refcount;
5840 				p = &refcount;
5841 			}
5842 		} else {
5843 			base = xbs->base;
5844 			vb.vb_bh = xbs->xattr_bh;
5845 			vb.vb_access = ocfs2_journal_access_xb;
5846 		}
5847 	}
5848 
5849 	if (ocfs2_xattr_is_local(xe))
5850 		goto out;
5851 
5852 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5853 				(base + name_offset + name_len);
5854 
5855 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5856 				       &num_clusters, &vb.vb_xv->xr_list,
5857 				       &ext_flags);
5858 	if (ret) {
5859 		mlog_errno(ret);
5860 		goto out;
5861 	}
5862 
5863 	/*
5864 	 * We just need to check the 1st extent record, since we always
5865 	 * CoW the whole xattr. So there shouldn't be a xattr with
5866 	 * some REFCOUNT extent recs after the 1st one.
5867 	 */
5868 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5869 		goto out;
5870 
5871 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5872 				       1, ref_tree, &ref_root_bh);
5873 	if (ret) {
5874 		mlog_errno(ret);
5875 		goto out;
5876 	}
5877 
5878 	/*
5879 	 * If we are deleting the xattr or the new size will be stored inside,
5880 	 * cool, leave it there, the xattr truncate process will remove them
5881 	 * for us(it still needs the refcount tree lock and the meta, credits).
5882 	 * And the worse case is that every cluster truncate will split the
5883 	 * refcount tree, and make the original extent become 3. So we will need
5884 	 * 2 * cluster more extent recs at most.
5885 	 */
5886 	if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5887 
5888 		ret = ocfs2_refcounted_xattr_delete_need(inode,
5889 							 &(*ref_tree)->rf_ci,
5890 							 ref_root_bh, vb.vb_xv,
5891 							 meta_add, credits);
5892 		if (ret)
5893 			mlog_errno(ret);
5894 		goto out;
5895 	}
5896 
5897 	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5898 				       *ref_tree, ref_root_bh, 0,
5899 				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5900 	if (ret)
5901 		mlog_errno(ret);
5902 
5903 out:
5904 	brelse(ref_root_bh);
5905 	return ret;
5906 }
5907 
5908 /*
5909  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5910  * The physical clusters will be added to refcount tree.
5911  */
ocfs2_xattr_value_attach_refcount(struct inode * inode,struct ocfs2_xattr_value_root * xv,struct ocfs2_extent_tree * value_et,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh,struct ocfs2_cached_dealloc_ctxt * dealloc,struct ocfs2_post_refcount * refcount)5912 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5913 				struct ocfs2_xattr_value_root *xv,
5914 				struct ocfs2_extent_tree *value_et,
5915 				struct ocfs2_caching_info *ref_ci,
5916 				struct buffer_head *ref_root_bh,
5917 				struct ocfs2_cached_dealloc_ctxt *dealloc,
5918 				struct ocfs2_post_refcount *refcount)
5919 {
5920 	int ret = 0;
5921 	u32 clusters = le32_to_cpu(xv->xr_clusters);
5922 	u32 cpos, p_cluster, num_clusters;
5923 	struct ocfs2_extent_list *el = &xv->xr_list;
5924 	unsigned int ext_flags;
5925 
5926 	cpos = 0;
5927 	while (cpos < clusters) {
5928 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5929 					       &num_clusters, el, &ext_flags);
5930 		if (ret) {
5931 			mlog_errno(ret);
5932 			break;
5933 		}
5934 
5935 		cpos += num_clusters;
5936 		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5937 			continue;
5938 
5939 		BUG_ON(!p_cluster);
5940 
5941 		ret = ocfs2_add_refcount_flag(inode, value_et,
5942 					      ref_ci, ref_root_bh,
5943 					      cpos - num_clusters,
5944 					      p_cluster, num_clusters,
5945 					      dealloc, refcount);
5946 		if (ret) {
5947 			mlog_errno(ret);
5948 			break;
5949 		}
5950 	}
5951 
5952 	return ret;
5953 }
5954 
5955 /*
5956  * Given a normal ocfs2_xattr_header, refcount all the entries which
5957  * have value stored outside.
5958  * Used for xattrs stored in inode and ocfs2_xattr_block.
5959  */
ocfs2_xattr_attach_refcount_normal(struct inode * inode,struct ocfs2_xattr_value_buf * vb,struct ocfs2_xattr_header * header,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh,struct ocfs2_cached_dealloc_ctxt * dealloc)5960 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5961 				struct ocfs2_xattr_value_buf *vb,
5962 				struct ocfs2_xattr_header *header,
5963 				struct ocfs2_caching_info *ref_ci,
5964 				struct buffer_head *ref_root_bh,
5965 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5966 {
5967 
5968 	struct ocfs2_xattr_entry *xe;
5969 	struct ocfs2_xattr_value_root *xv;
5970 	struct ocfs2_extent_tree et;
5971 	int i, ret = 0;
5972 
5973 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5974 		xe = &header->xh_entries[i];
5975 
5976 		if (ocfs2_xattr_is_local(xe))
5977 			continue;
5978 
5979 		xv = (struct ocfs2_xattr_value_root *)((void *)header +
5980 			le16_to_cpu(xe->xe_name_offset) +
5981 			OCFS2_XATTR_SIZE(xe->xe_name_len));
5982 
5983 		vb->vb_xv = xv;
5984 		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5985 
5986 		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5987 							ref_ci, ref_root_bh,
5988 							dealloc, NULL);
5989 		if (ret) {
5990 			mlog_errno(ret);
5991 			break;
5992 		}
5993 	}
5994 
5995 	return ret;
5996 }
5997 
ocfs2_xattr_inline_attach_refcount(struct inode * inode,struct buffer_head * fe_bh,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh,struct ocfs2_cached_dealloc_ctxt * dealloc)5998 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5999 				struct buffer_head *fe_bh,
6000 				struct ocfs2_caching_info *ref_ci,
6001 				struct buffer_head *ref_root_bh,
6002 				struct ocfs2_cached_dealloc_ctxt *dealloc)
6003 {
6004 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6005 	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
6006 				(fe_bh->b_data + inode->i_sb->s_blocksize -
6007 				le16_to_cpu(di->i_xattr_inline_size));
6008 	struct ocfs2_xattr_value_buf vb = {
6009 		.vb_bh = fe_bh,
6010 		.vb_access = ocfs2_journal_access_di,
6011 	};
6012 
6013 	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6014 						  ref_ci, ref_root_bh, dealloc);
6015 }
6016 
6017 struct ocfs2_xattr_tree_value_refcount_para {
6018 	struct ocfs2_caching_info *ref_ci;
6019 	struct buffer_head *ref_root_bh;
6020 	struct ocfs2_cached_dealloc_ctxt *dealloc;
6021 };
6022 
ocfs2_get_xattr_tree_value_root(struct super_block * sb,struct ocfs2_xattr_bucket * bucket,int offset,struct ocfs2_xattr_value_root ** xv,struct buffer_head ** bh)6023 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
6024 					   struct ocfs2_xattr_bucket *bucket,
6025 					   int offset,
6026 					   struct ocfs2_xattr_value_root **xv,
6027 					   struct buffer_head **bh)
6028 {
6029 	int ret, block_off, name_offset;
6030 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
6031 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6032 	void *base;
6033 
6034 	ret = ocfs2_xattr_bucket_get_name_value(sb,
6035 						bucket_xh(bucket),
6036 						offset,
6037 						&block_off,
6038 						&name_offset);
6039 	if (ret) {
6040 		mlog_errno(ret);
6041 		goto out;
6042 	}
6043 
6044 	base = bucket_block(bucket, block_off);
6045 
6046 	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6047 			 OCFS2_XATTR_SIZE(xe->xe_name_len));
6048 
6049 	if (bh)
6050 		*bh = bucket->bu_bhs[block_off];
6051 out:
6052 	return ret;
6053 }
6054 
6055 /*
6056  * For a given xattr bucket, refcount all the entries which
6057  * have value stored outside.
6058  */
ocfs2_xattr_bucket_value_refcount(struct inode * inode,struct ocfs2_xattr_bucket * bucket,void * para)6059 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6060 					     struct ocfs2_xattr_bucket *bucket,
6061 					     void *para)
6062 {
6063 	int i, ret = 0;
6064 	struct ocfs2_extent_tree et;
6065 	struct ocfs2_xattr_tree_value_refcount_para *ref =
6066 			(struct ocfs2_xattr_tree_value_refcount_para *)para;
6067 	struct ocfs2_xattr_header *xh =
6068 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6069 	struct ocfs2_xattr_entry *xe;
6070 	struct ocfs2_xattr_value_buf vb = {
6071 		.vb_access = ocfs2_journal_access,
6072 	};
6073 	struct ocfs2_post_refcount refcount = {
6074 		.credits = bucket->bu_blocks,
6075 		.para = bucket,
6076 		.func = ocfs2_xattr_bucket_post_refcount,
6077 	};
6078 	struct ocfs2_post_refcount *p = NULL;
6079 
6080 	/* We only need post_refcount if we support metaecc. */
6081 	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6082 		p = &refcount;
6083 
6084 	trace_ocfs2_xattr_bucket_value_refcount(
6085 				(unsigned long long)bucket_blkno(bucket),
6086 				le16_to_cpu(xh->xh_count));
6087 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6088 		xe = &xh->xh_entries[i];
6089 
6090 		if (ocfs2_xattr_is_local(xe))
6091 			continue;
6092 
6093 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6094 						      &vb.vb_xv, &vb.vb_bh);
6095 		if (ret) {
6096 			mlog_errno(ret);
6097 			break;
6098 		}
6099 
6100 		ocfs2_init_xattr_value_extent_tree(&et,
6101 						   INODE_CACHE(inode), &vb);
6102 
6103 		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6104 							&et, ref->ref_ci,
6105 							ref->ref_root_bh,
6106 							ref->dealloc, p);
6107 		if (ret) {
6108 			mlog_errno(ret);
6109 			break;
6110 		}
6111 	}
6112 
6113 	return ret;
6114 
6115 }
6116 
ocfs2_refcount_xattr_tree_rec(struct inode * inode,struct buffer_head * root_bh,u64 blkno,u32 cpos,u32 len,void * para)6117 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6118 				     struct buffer_head *root_bh,
6119 				     u64 blkno, u32 cpos, u32 len, void *para)
6120 {
6121 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6122 					   ocfs2_xattr_bucket_value_refcount,
6123 					   para);
6124 }
6125 
ocfs2_xattr_block_attach_refcount(struct inode * inode,struct buffer_head * blk_bh,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh,struct ocfs2_cached_dealloc_ctxt * dealloc)6126 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6127 				struct buffer_head *blk_bh,
6128 				struct ocfs2_caching_info *ref_ci,
6129 				struct buffer_head *ref_root_bh,
6130 				struct ocfs2_cached_dealloc_ctxt *dealloc)
6131 {
6132 	int ret = 0;
6133 	struct ocfs2_xattr_block *xb =
6134 				(struct ocfs2_xattr_block *)blk_bh->b_data;
6135 
6136 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6137 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6138 		struct ocfs2_xattr_value_buf vb = {
6139 			.vb_bh = blk_bh,
6140 			.vb_access = ocfs2_journal_access_xb,
6141 		};
6142 
6143 		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6144 							 ref_ci, ref_root_bh,
6145 							 dealloc);
6146 	} else {
6147 		struct ocfs2_xattr_tree_value_refcount_para para = {
6148 			.ref_ci = ref_ci,
6149 			.ref_root_bh = ref_root_bh,
6150 			.dealloc = dealloc,
6151 		};
6152 
6153 		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6154 						ocfs2_refcount_xattr_tree_rec,
6155 						&para);
6156 	}
6157 
6158 	return ret;
6159 }
6160 
ocfs2_xattr_attach_refcount_tree(struct inode * inode,struct buffer_head * fe_bh,struct ocfs2_caching_info * ref_ci,struct buffer_head * ref_root_bh,struct ocfs2_cached_dealloc_ctxt * dealloc)6161 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6162 				     struct buffer_head *fe_bh,
6163 				     struct ocfs2_caching_info *ref_ci,
6164 				     struct buffer_head *ref_root_bh,
6165 				     struct ocfs2_cached_dealloc_ctxt *dealloc)
6166 {
6167 	int ret = 0;
6168 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
6169 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6170 	struct buffer_head *blk_bh = NULL;
6171 
6172 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6173 		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6174 							 ref_ci, ref_root_bh,
6175 							 dealloc);
6176 		if (ret) {
6177 			mlog_errno(ret);
6178 			goto out;
6179 		}
6180 	}
6181 
6182 	if (!di->i_xattr_loc)
6183 		goto out;
6184 
6185 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6186 				     &blk_bh);
6187 	if (ret < 0) {
6188 		mlog_errno(ret);
6189 		goto out;
6190 	}
6191 
6192 	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6193 						ref_root_bh, dealloc);
6194 	if (ret)
6195 		mlog_errno(ret);
6196 
6197 	brelse(blk_bh);
6198 out:
6199 
6200 	return ret;
6201 }
6202 
6203 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6204 /*
6205  * Store the information we need in xattr reflink.
6206  * old_bh and new_bh are inode bh for the old and new inode.
6207  */
6208 struct ocfs2_xattr_reflink {
6209 	struct inode *old_inode;
6210 	struct inode *new_inode;
6211 	struct buffer_head *old_bh;
6212 	struct buffer_head *new_bh;
6213 	struct ocfs2_caching_info *ref_ci;
6214 	struct buffer_head *ref_root_bh;
6215 	struct ocfs2_cached_dealloc_ctxt *dealloc;
6216 	should_xattr_reflinked *xattr_reflinked;
6217 };
6218 
6219 /*
6220  * Given a xattr header and xe offset,
6221  * return the proper xv and the corresponding bh.
6222  * xattr in inode, block and xattr tree have different implementations.
6223  */
6224 typedef int (get_xattr_value_root)(struct super_block *sb,
6225 				   struct buffer_head *bh,
6226 				   struct ocfs2_xattr_header *xh,
6227 				   int offset,
6228 				   struct ocfs2_xattr_value_root **xv,
6229 				   struct buffer_head **ret_bh,
6230 				   void *para);
6231 
6232 /*
6233  * Calculate all the xattr value root metadata stored in this xattr header and
6234  * credits we need if we create them from the scratch.
6235  * We use get_xattr_value_root so that all types of xattr container can use it.
6236  */
ocfs2_value_metas_in_xattr_header(struct super_block * sb,struct buffer_head * bh,struct ocfs2_xattr_header * xh,int * metas,int * credits,int * num_recs,get_xattr_value_root * func,void * para)6237 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6238 					     struct buffer_head *bh,
6239 					     struct ocfs2_xattr_header *xh,
6240 					     int *metas, int *credits,
6241 					     int *num_recs,
6242 					     get_xattr_value_root *func,
6243 					     void *para)
6244 {
6245 	int i, ret = 0;
6246 	struct ocfs2_xattr_value_root *xv;
6247 	struct ocfs2_xattr_entry *xe;
6248 
6249 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6250 		xe = &xh->xh_entries[i];
6251 		if (ocfs2_xattr_is_local(xe))
6252 			continue;
6253 
6254 		ret = func(sb, bh, xh, i, &xv, NULL, para);
6255 		if (ret) {
6256 			mlog_errno(ret);
6257 			break;
6258 		}
6259 
6260 		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6261 			  le16_to_cpu(xv->xr_list.l_next_free_rec);
6262 
6263 		*credits += ocfs2_calc_extend_credits(sb,
6264 						&def_xv.xv.xr_list);
6265 
6266 		/*
6267 		 * If the value is a tree with depth > 1, We don't go deep
6268 		 * to the extent block, so just calculate a maximum record num.
6269 		 */
6270 		if (!xv->xr_list.l_tree_depth)
6271 			*num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6272 		else
6273 			*num_recs += ocfs2_clusters_for_bytes(sb,
6274 							      XATTR_SIZE_MAX);
6275 	}
6276 
6277 	return ret;
6278 }
6279 
6280 /* Used by xattr inode and block to return the right xv and buffer_head. */
ocfs2_get_xattr_value_root(struct super_block * sb,struct buffer_head * bh,struct ocfs2_xattr_header * xh,int offset,struct ocfs2_xattr_value_root ** xv,struct buffer_head ** ret_bh,void * para)6281 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6282 				      struct buffer_head *bh,
6283 				      struct ocfs2_xattr_header *xh,
6284 				      int offset,
6285 				      struct ocfs2_xattr_value_root **xv,
6286 				      struct buffer_head **ret_bh,
6287 				      void *para)
6288 {
6289 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6290 
6291 	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6292 		le16_to_cpu(xe->xe_name_offset) +
6293 		OCFS2_XATTR_SIZE(xe->xe_name_len));
6294 
6295 	if (ret_bh)
6296 		*ret_bh = bh;
6297 
6298 	return 0;
6299 }
6300 
6301 /*
6302  * Lock the meta_ac and calculate how much credits we need for reflink xattrs.
6303  * It is only used for inline xattr and xattr block.
6304  */
ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super * osb,struct ocfs2_xattr_header * xh,struct buffer_head * ref_root_bh,int * credits,struct ocfs2_alloc_context ** meta_ac)6305 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6306 					struct ocfs2_xattr_header *xh,
6307 					struct buffer_head *ref_root_bh,
6308 					int *credits,
6309 					struct ocfs2_alloc_context **meta_ac)
6310 {
6311 	int ret, meta_add = 0, num_recs = 0;
6312 	struct ocfs2_refcount_block *rb =
6313 			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
6314 
6315 	*credits = 0;
6316 
6317 	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6318 						&meta_add, credits, &num_recs,
6319 						ocfs2_get_xattr_value_root,
6320 						NULL);
6321 	if (ret) {
6322 		mlog_errno(ret);
6323 		goto out;
6324 	}
6325 
6326 	/*
6327 	 * We need to add/modify num_recs in refcount tree, so just calculate
6328 	 * an approximate number we need for refcount tree change.
6329 	 * Sometimes we need to split the tree, and after split,  half recs
6330 	 * will be moved to the new block, and a new block can only provide
6331 	 * half number of recs. So we multiple new blocks by 2.
6332 	 */
6333 	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6334 	meta_add += num_recs;
6335 	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6336 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6337 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6338 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6339 	else
6340 		*credits += 1;
6341 
6342 	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6343 	if (ret)
6344 		mlog_errno(ret);
6345 
6346 out:
6347 	return ret;
6348 }
6349 
6350 /*
6351  * Given a xattr header, reflink all the xattrs in this container.
6352  * It can be used for inode, block and bucket.
6353  *
6354  * NOTE:
6355  * Before we call this function, the caller has memcpy the xattr in
6356  * old_xh to the new_xh.
6357  *
6358  * If args.xattr_reflinked is set, call it to decide whether the xe should
6359  * be reflinked or not. If not, remove it from the new xattr header.
6360  */
ocfs2_reflink_xattr_header(handle_t * handle,struct ocfs2_xattr_reflink * args,struct buffer_head * old_bh,struct ocfs2_xattr_header * xh,struct buffer_head * new_bh,struct ocfs2_xattr_header * new_xh,struct ocfs2_xattr_value_buf * vb,struct ocfs2_alloc_context * meta_ac,get_xattr_value_root * func,void * para)6361 static int ocfs2_reflink_xattr_header(handle_t *handle,
6362 				      struct ocfs2_xattr_reflink *args,
6363 				      struct buffer_head *old_bh,
6364 				      struct ocfs2_xattr_header *xh,
6365 				      struct buffer_head *new_bh,
6366 				      struct ocfs2_xattr_header *new_xh,
6367 				      struct ocfs2_xattr_value_buf *vb,
6368 				      struct ocfs2_alloc_context *meta_ac,
6369 				      get_xattr_value_root *func,
6370 				      void *para)
6371 {
6372 	int ret = 0, i, j;
6373 	struct super_block *sb = args->old_inode->i_sb;
6374 	struct buffer_head *value_bh;
6375 	struct ocfs2_xattr_entry *xe, *last;
6376 	struct ocfs2_xattr_value_root *xv, *new_xv;
6377 	struct ocfs2_extent_tree data_et;
6378 	u32 clusters, cpos, p_cluster, num_clusters;
6379 	unsigned int ext_flags = 0;
6380 
6381 	trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6382 					 le16_to_cpu(xh->xh_count));
6383 
6384 	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1;
6385 	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6386 		xe = &xh->xh_entries[i];
6387 
6388 		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6389 			xe = &new_xh->xh_entries[j];
6390 
6391 			le16_add_cpu(&new_xh->xh_count, -1);
6392 			if (new_xh->xh_count) {
6393 				memmove(xe, xe + 1,
6394 					(void *)last - (void *)xe);
6395 				memset(last, 0,
6396 				       sizeof(struct ocfs2_xattr_entry));
6397 			}
6398 
6399 			/*
6400 			 * We don't want j to increase in the next round since
6401 			 * it is already moved ahead.
6402 			 */
6403 			j--;
6404 			continue;
6405 		}
6406 
6407 		if (ocfs2_xattr_is_local(xe))
6408 			continue;
6409 
6410 		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6411 		if (ret) {
6412 			mlog_errno(ret);
6413 			break;
6414 		}
6415 
6416 		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6417 		if (ret) {
6418 			mlog_errno(ret);
6419 			break;
6420 		}
6421 
6422 		/*
6423 		 * For the xattr which has l_tree_depth = 0, all the extent
6424 		 * recs have already be copied to the new xh with the
6425 		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6426 		 * increase the refount count int the refcount tree.
6427 		 *
6428 		 * For the xattr which has l_tree_depth > 0, we need
6429 		 * to initialize it to the empty default value root,
6430 		 * and then insert the extents one by one.
6431 		 */
6432 		if (xv->xr_list.l_tree_depth) {
6433 			memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE);
6434 			vb->vb_xv = new_xv;
6435 			vb->vb_bh = value_bh;
6436 			ocfs2_init_xattr_value_extent_tree(&data_et,
6437 					INODE_CACHE(args->new_inode), vb);
6438 		}
6439 
6440 		clusters = le32_to_cpu(xv->xr_clusters);
6441 		cpos = 0;
6442 		while (cpos < clusters) {
6443 			ret = ocfs2_xattr_get_clusters(args->old_inode,
6444 						       cpos,
6445 						       &p_cluster,
6446 						       &num_clusters,
6447 						       &xv->xr_list,
6448 						       &ext_flags);
6449 			if (ret) {
6450 				mlog_errno(ret);
6451 				goto out;
6452 			}
6453 
6454 			BUG_ON(!p_cluster);
6455 
6456 			if (xv->xr_list.l_tree_depth) {
6457 				ret = ocfs2_insert_extent(handle,
6458 						&data_et, cpos,
6459 						ocfs2_clusters_to_blocks(
6460 							args->old_inode->i_sb,
6461 							p_cluster),
6462 						num_clusters, ext_flags,
6463 						meta_ac);
6464 				if (ret) {
6465 					mlog_errno(ret);
6466 					goto out;
6467 				}
6468 			}
6469 
6470 			ret = ocfs2_increase_refcount(handle, args->ref_ci,
6471 						      args->ref_root_bh,
6472 						      p_cluster, num_clusters,
6473 						      meta_ac, args->dealloc);
6474 			if (ret) {
6475 				mlog_errno(ret);
6476 				goto out;
6477 			}
6478 
6479 			cpos += num_clusters;
6480 		}
6481 	}
6482 
6483 out:
6484 	return ret;
6485 }
6486 
ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink * args)6487 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6488 {
6489 	int ret = 0, credits = 0;
6490 	handle_t *handle;
6491 	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6492 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6493 	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6494 	int header_off = osb->sb->s_blocksize - inline_size;
6495 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6496 					(args->old_bh->b_data + header_off);
6497 	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6498 					(args->new_bh->b_data + header_off);
6499 	struct ocfs2_alloc_context *meta_ac = NULL;
6500 	struct ocfs2_inode_info *new_oi;
6501 	struct ocfs2_dinode *new_di;
6502 	struct ocfs2_xattr_value_buf vb = {
6503 		.vb_bh = args->new_bh,
6504 		.vb_access = ocfs2_journal_access_di,
6505 	};
6506 
6507 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6508 						  &credits, &meta_ac);
6509 	if (ret) {
6510 		mlog_errno(ret);
6511 		goto out;
6512 	}
6513 
6514 	handle = ocfs2_start_trans(osb, credits);
6515 	if (IS_ERR(handle)) {
6516 		ret = PTR_ERR(handle);
6517 		mlog_errno(ret);
6518 		goto out;
6519 	}
6520 
6521 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6522 				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6523 	if (ret) {
6524 		mlog_errno(ret);
6525 		goto out_commit;
6526 	}
6527 
6528 	memcpy(args->new_bh->b_data + header_off,
6529 	       args->old_bh->b_data + header_off, inline_size);
6530 
6531 	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6532 	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6533 
6534 	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6535 					 args->new_bh, new_xh, &vb, meta_ac,
6536 					 ocfs2_get_xattr_value_root, NULL);
6537 	if (ret) {
6538 		mlog_errno(ret);
6539 		goto out_commit;
6540 	}
6541 
6542 	new_oi = OCFS2_I(args->new_inode);
6543 
6544 	spin_lock(&new_oi->ip_lock);
6545 	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6546 	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6547 	spin_unlock(&new_oi->ip_lock);
6548 
6549 	ocfs2_journal_dirty(handle, args->new_bh);
6550 
6551 out_commit:
6552 	ocfs2_commit_trans(osb, handle);
6553 
6554 out:
6555 	if (meta_ac)
6556 		ocfs2_free_alloc_context(meta_ac);
6557 	return ret;
6558 }
6559 
ocfs2_create_empty_xattr_block(struct inode * inode,struct buffer_head * fe_bh,struct buffer_head ** ret_bh,int indexed)6560 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6561 					  struct buffer_head *fe_bh,
6562 					  struct buffer_head **ret_bh,
6563 					  int indexed)
6564 {
6565 	int ret;
6566 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6567 	struct ocfs2_xattr_set_ctxt ctxt;
6568 
6569 	memset(&ctxt, 0, sizeof(ctxt));
6570 	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6571 	if (ret < 0) {
6572 		mlog_errno(ret);
6573 		return ret;
6574 	}
6575 
6576 	ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6577 	if (IS_ERR(ctxt.handle)) {
6578 		ret = PTR_ERR(ctxt.handle);
6579 		mlog_errno(ret);
6580 		goto out;
6581 	}
6582 
6583 	trace_ocfs2_create_empty_xattr_block(
6584 				(unsigned long long)fe_bh->b_blocknr, indexed);
6585 	ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6586 				       ret_bh);
6587 	if (ret)
6588 		mlog_errno(ret);
6589 
6590 	ocfs2_commit_trans(osb, ctxt.handle);
6591 out:
6592 	ocfs2_free_alloc_context(ctxt.meta_ac);
6593 	return ret;
6594 }
6595 
ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink * args,struct buffer_head * blk_bh,struct buffer_head * new_blk_bh)6596 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6597 				     struct buffer_head *blk_bh,
6598 				     struct buffer_head *new_blk_bh)
6599 {
6600 	int ret = 0, credits = 0;
6601 	handle_t *handle;
6602 	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6603 	struct ocfs2_dinode *new_di;
6604 	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6605 	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6606 	struct ocfs2_xattr_block *xb =
6607 			(struct ocfs2_xattr_block *)blk_bh->b_data;
6608 	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6609 	struct ocfs2_xattr_block *new_xb =
6610 			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
6611 	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6612 	struct ocfs2_alloc_context *meta_ac;
6613 	struct ocfs2_xattr_value_buf vb = {
6614 		.vb_bh = new_blk_bh,
6615 		.vb_access = ocfs2_journal_access_xb,
6616 	};
6617 
6618 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6619 						  &credits, &meta_ac);
6620 	if (ret) {
6621 		mlog_errno(ret);
6622 		return ret;
6623 	}
6624 
6625 	/* One more credits in case we need to add xattr flags in new inode. */
6626 	handle = ocfs2_start_trans(osb, credits + 1);
6627 	if (IS_ERR(handle)) {
6628 		ret = PTR_ERR(handle);
6629 		mlog_errno(ret);
6630 		goto out;
6631 	}
6632 
6633 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6634 		ret = ocfs2_journal_access_di(handle,
6635 					      INODE_CACHE(args->new_inode),
6636 					      args->new_bh,
6637 					      OCFS2_JOURNAL_ACCESS_WRITE);
6638 		if (ret) {
6639 			mlog_errno(ret);
6640 			goto out_commit;
6641 		}
6642 	}
6643 
6644 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6645 				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6646 	if (ret) {
6647 		mlog_errno(ret);
6648 		goto out_commit;
6649 	}
6650 
6651 	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6652 	       osb->sb->s_blocksize - header_off);
6653 
6654 	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6655 					 new_blk_bh, new_xh, &vb, meta_ac,
6656 					 ocfs2_get_xattr_value_root, NULL);
6657 	if (ret) {
6658 		mlog_errno(ret);
6659 		goto out_commit;
6660 	}
6661 
6662 	ocfs2_journal_dirty(handle, new_blk_bh);
6663 
6664 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6665 		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6666 		spin_lock(&new_oi->ip_lock);
6667 		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6668 		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6669 		spin_unlock(&new_oi->ip_lock);
6670 
6671 		ocfs2_journal_dirty(handle, args->new_bh);
6672 	}
6673 
6674 out_commit:
6675 	ocfs2_commit_trans(osb, handle);
6676 
6677 out:
6678 	ocfs2_free_alloc_context(meta_ac);
6679 	return ret;
6680 }
6681 
6682 struct ocfs2_reflink_xattr_tree_args {
6683 	struct ocfs2_xattr_reflink *reflink;
6684 	struct buffer_head *old_blk_bh;
6685 	struct buffer_head *new_blk_bh;
6686 	struct ocfs2_xattr_bucket *old_bucket;
6687 	struct ocfs2_xattr_bucket *new_bucket;
6688 };
6689 
6690 /*
6691  * NOTE:
6692  * We have to handle the case that both old bucket and new bucket
6693  * will call this function to get the right ret_bh.
6694  * So The caller must give us the right bh.
6695  */
ocfs2_get_reflink_xattr_value_root(struct super_block * sb,struct buffer_head * bh,struct ocfs2_xattr_header * xh,int offset,struct ocfs2_xattr_value_root ** xv,struct buffer_head ** ret_bh,void * para)6696 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6697 					struct buffer_head *bh,
6698 					struct ocfs2_xattr_header *xh,
6699 					int offset,
6700 					struct ocfs2_xattr_value_root **xv,
6701 					struct buffer_head **ret_bh,
6702 					void *para)
6703 {
6704 	struct ocfs2_reflink_xattr_tree_args *args =
6705 			(struct ocfs2_reflink_xattr_tree_args *)para;
6706 	struct ocfs2_xattr_bucket *bucket;
6707 
6708 	if (bh == args->old_bucket->bu_bhs[0])
6709 		bucket = args->old_bucket;
6710 	else
6711 		bucket = args->new_bucket;
6712 
6713 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6714 					       xv, ret_bh);
6715 }
6716 
6717 struct ocfs2_value_tree_metas {
6718 	int num_metas;
6719 	int credits;
6720 	int num_recs;
6721 };
6722 
ocfs2_value_tree_metas_in_bucket(struct super_block * sb,struct buffer_head * bh,struct ocfs2_xattr_header * xh,int offset,struct ocfs2_xattr_value_root ** xv,struct buffer_head ** ret_bh,void * para)6723 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6724 					struct buffer_head *bh,
6725 					struct ocfs2_xattr_header *xh,
6726 					int offset,
6727 					struct ocfs2_xattr_value_root **xv,
6728 					struct buffer_head **ret_bh,
6729 					void *para)
6730 {
6731 	struct ocfs2_xattr_bucket *bucket =
6732 				(struct ocfs2_xattr_bucket *)para;
6733 
6734 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6735 					       xv, ret_bh);
6736 }
6737 
ocfs2_calc_value_tree_metas(struct inode * inode,struct ocfs2_xattr_bucket * bucket,void * para)6738 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6739 				      struct ocfs2_xattr_bucket *bucket,
6740 				      void *para)
6741 {
6742 	struct ocfs2_value_tree_metas *metas =
6743 			(struct ocfs2_value_tree_metas *)para;
6744 	struct ocfs2_xattr_header *xh =
6745 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6746 
6747 	/* Add the credits for this bucket first. */
6748 	metas->credits += bucket->bu_blocks;
6749 	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6750 					xh, &metas->num_metas,
6751 					&metas->credits, &metas->num_recs,
6752 					ocfs2_value_tree_metas_in_bucket,
6753 					bucket);
6754 }
6755 
6756 /*
6757  * Given a xattr extent rec starting from blkno and having len clusters,
6758  * iterate all the buckets calculate how much metadata we need for reflinking
6759  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6760  */
ocfs2_lock_reflink_xattr_rec_allocators(struct ocfs2_reflink_xattr_tree_args * args,struct ocfs2_extent_tree * xt_et,u64 blkno,u32 len,int * credits,struct ocfs2_alloc_context ** meta_ac,struct ocfs2_alloc_context ** data_ac)6761 static int ocfs2_lock_reflink_xattr_rec_allocators(
6762 				struct ocfs2_reflink_xattr_tree_args *args,
6763 				struct ocfs2_extent_tree *xt_et,
6764 				u64 blkno, u32 len, int *credits,
6765 				struct ocfs2_alloc_context **meta_ac,
6766 				struct ocfs2_alloc_context **data_ac)
6767 {
6768 	int ret, num_free_extents;
6769 	struct ocfs2_value_tree_metas metas;
6770 	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6771 	struct ocfs2_refcount_block *rb;
6772 
6773 	memset(&metas, 0, sizeof(metas));
6774 
6775 	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6776 					  ocfs2_calc_value_tree_metas, &metas);
6777 	if (ret) {
6778 		mlog_errno(ret);
6779 		goto out;
6780 	}
6781 
6782 	*credits = metas.credits;
6783 
6784 	/*
6785 	 * Calculate we need for refcount tree change.
6786 	 *
6787 	 * We need to add/modify num_recs in refcount tree, so just calculate
6788 	 * an approximate number we need for refcount tree change.
6789 	 * Sometimes we need to split the tree, and after split,  half recs
6790 	 * will be moved to the new block, and a new block can only provide
6791 	 * half number of recs. So we multiple new blocks by 2.
6792 	 * In the end, we have to add credits for modifying the already
6793 	 * existed refcount block.
6794 	 */
6795 	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6796 	metas.num_recs =
6797 		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6798 		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6799 	metas.num_metas += metas.num_recs;
6800 	*credits += metas.num_recs +
6801 		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6802 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6803 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6804 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6805 	else
6806 		*credits += 1;
6807 
6808 	/* count in the xattr tree change. */
6809 	num_free_extents = ocfs2_num_free_extents(xt_et);
6810 	if (num_free_extents < 0) {
6811 		ret = num_free_extents;
6812 		mlog_errno(ret);
6813 		goto out;
6814 	}
6815 
6816 	if (num_free_extents < len)
6817 		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6818 
6819 	*credits += ocfs2_calc_extend_credits(osb->sb,
6820 					      xt_et->et_root_el);
6821 
6822 	if (metas.num_metas) {
6823 		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6824 							meta_ac);
6825 		if (ret) {
6826 			mlog_errno(ret);
6827 			goto out;
6828 		}
6829 	}
6830 
6831 	if (len) {
6832 		ret = ocfs2_reserve_clusters(osb, len, data_ac);
6833 		if (ret)
6834 			mlog_errno(ret);
6835 	}
6836 out:
6837 	if (ret) {
6838 		if (*meta_ac) {
6839 			ocfs2_free_alloc_context(*meta_ac);
6840 			*meta_ac = NULL;
6841 		}
6842 	}
6843 
6844 	return ret;
6845 }
6846 
ocfs2_reflink_xattr_bucket(handle_t * handle,u64 blkno,u64 new_blkno,u32 clusters,u32 * cpos,int num_buckets,struct ocfs2_alloc_context * meta_ac,struct ocfs2_alloc_context * data_ac,struct ocfs2_reflink_xattr_tree_args * args)6847 static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6848 				u64 blkno, u64 new_blkno, u32 clusters,
6849 				u32 *cpos, int num_buckets,
6850 				struct ocfs2_alloc_context *meta_ac,
6851 				struct ocfs2_alloc_context *data_ac,
6852 				struct ocfs2_reflink_xattr_tree_args *args)
6853 {
6854 	int i, j, ret = 0;
6855 	struct super_block *sb = args->reflink->old_inode->i_sb;
6856 	int bpb = args->old_bucket->bu_blocks;
6857 	struct ocfs2_xattr_value_buf vb = {
6858 		.vb_access = ocfs2_journal_access,
6859 	};
6860 
6861 	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6862 		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6863 		if (ret) {
6864 			mlog_errno(ret);
6865 			break;
6866 		}
6867 
6868 		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
6869 		if (ret) {
6870 			mlog_errno(ret);
6871 			break;
6872 		}
6873 
6874 		ret = ocfs2_xattr_bucket_journal_access(handle,
6875 						args->new_bucket,
6876 						OCFS2_JOURNAL_ACCESS_CREATE);
6877 		if (ret) {
6878 			mlog_errno(ret);
6879 			break;
6880 		}
6881 
6882 		for (j = 0; j < bpb; j++)
6883 			memcpy(bucket_block(args->new_bucket, j),
6884 			       bucket_block(args->old_bucket, j),
6885 			       sb->s_blocksize);
6886 
6887 		/*
6888 		 * Record the start cpos so that we can use it to initialize
6889 		 * our xattr tree we also set the xh_num_bucket for the new
6890 		 * bucket.
6891 		 */
6892 		if (i == 0) {
6893 			*cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6894 					    xh_entries[0].xe_name_hash);
6895 			bucket_xh(args->new_bucket)->xh_num_buckets =
6896 				cpu_to_le16(num_buckets);
6897 		}
6898 
6899 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6900 
6901 		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6902 					args->old_bucket->bu_bhs[0],
6903 					bucket_xh(args->old_bucket),
6904 					args->new_bucket->bu_bhs[0],
6905 					bucket_xh(args->new_bucket),
6906 					&vb, meta_ac,
6907 					ocfs2_get_reflink_xattr_value_root,
6908 					args);
6909 		if (ret) {
6910 			mlog_errno(ret);
6911 			break;
6912 		}
6913 
6914 		/*
6915 		 * Re-access and dirty the bucket to calculate metaecc.
6916 		 * Because we may extend the transaction in reflink_xattr_header
6917 		 * which will let the already accessed block gone.
6918 		 */
6919 		ret = ocfs2_xattr_bucket_journal_access(handle,
6920 						args->new_bucket,
6921 						OCFS2_JOURNAL_ACCESS_WRITE);
6922 		if (ret) {
6923 			mlog_errno(ret);
6924 			break;
6925 		}
6926 
6927 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6928 
6929 		ocfs2_xattr_bucket_relse(args->old_bucket);
6930 		ocfs2_xattr_bucket_relse(args->new_bucket);
6931 	}
6932 
6933 	ocfs2_xattr_bucket_relse(args->old_bucket);
6934 	ocfs2_xattr_bucket_relse(args->new_bucket);
6935 	return ret;
6936 }
6937 
ocfs2_reflink_xattr_buckets(handle_t * handle,struct inode * inode,struct ocfs2_reflink_xattr_tree_args * args,struct ocfs2_extent_tree * et,struct ocfs2_alloc_context * meta_ac,struct ocfs2_alloc_context * data_ac,u64 blkno,u32 cpos,u32 len)6938 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6939 				struct inode *inode,
6940 				struct ocfs2_reflink_xattr_tree_args *args,
6941 				struct ocfs2_extent_tree *et,
6942 				struct ocfs2_alloc_context *meta_ac,
6943 				struct ocfs2_alloc_context *data_ac,
6944 				u64 blkno, u32 cpos, u32 len)
6945 {
6946 	int ret, first_inserted = 0;
6947 	u32 p_cluster, num_clusters, reflink_cpos = 0;
6948 	u64 new_blkno;
6949 	unsigned int num_buckets, reflink_buckets;
6950 	unsigned int bpc =
6951 		ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6952 
6953 	ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6954 	if (ret) {
6955 		mlog_errno(ret);
6956 		goto out;
6957 	}
6958 	num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6959 	ocfs2_xattr_bucket_relse(args->old_bucket);
6960 
6961 	while (len && num_buckets) {
6962 		ret = ocfs2_claim_clusters(handle, data_ac,
6963 					   1, &p_cluster, &num_clusters);
6964 		if (ret) {
6965 			mlog_errno(ret);
6966 			goto out;
6967 		}
6968 
6969 		new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6970 		reflink_buckets = min(num_buckets, bpc * num_clusters);
6971 
6972 		ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6973 						 new_blkno, num_clusters,
6974 						 &reflink_cpos, reflink_buckets,
6975 						 meta_ac, data_ac, args);
6976 		if (ret) {
6977 			mlog_errno(ret);
6978 			goto out;
6979 		}
6980 
6981 		/*
6982 		 * For the 1st allocated cluster, we make it use the same cpos
6983 		 * so that the xattr tree looks the same as the original one
6984 		 * in the most case.
6985 		 */
6986 		if (!first_inserted) {
6987 			reflink_cpos = cpos;
6988 			first_inserted = 1;
6989 		}
6990 		ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6991 					  num_clusters, 0, meta_ac);
6992 		if (ret)
6993 			mlog_errno(ret);
6994 
6995 		trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
6996 						  num_clusters, reflink_cpos);
6997 
6998 		len -= num_clusters;
6999 		blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
7000 		num_buckets -= reflink_buckets;
7001 	}
7002 out:
7003 	return ret;
7004 }
7005 
7006 /*
7007  * Create the same xattr extent record in the new inode's xattr tree.
7008  */
ocfs2_reflink_xattr_rec(struct inode * inode,struct buffer_head * root_bh,u64 blkno,u32 cpos,u32 len,void * para)7009 static int ocfs2_reflink_xattr_rec(struct inode *inode,
7010 				   struct buffer_head *root_bh,
7011 				   u64 blkno,
7012 				   u32 cpos,
7013 				   u32 len,
7014 				   void *para)
7015 {
7016 	int ret, credits = 0;
7017 	handle_t *handle;
7018 	struct ocfs2_reflink_xattr_tree_args *args =
7019 			(struct ocfs2_reflink_xattr_tree_args *)para;
7020 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7021 	struct ocfs2_alloc_context *meta_ac = NULL;
7022 	struct ocfs2_alloc_context *data_ac = NULL;
7023 	struct ocfs2_extent_tree et;
7024 
7025 	trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
7026 
7027 	ocfs2_init_xattr_tree_extent_tree(&et,
7028 					  INODE_CACHE(args->reflink->new_inode),
7029 					  args->new_blk_bh);
7030 
7031 	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7032 						      len, &credits,
7033 						      &meta_ac, &data_ac);
7034 	if (ret) {
7035 		mlog_errno(ret);
7036 		goto out;
7037 	}
7038 
7039 	handle = ocfs2_start_trans(osb, credits);
7040 	if (IS_ERR(handle)) {
7041 		ret = PTR_ERR(handle);
7042 		mlog_errno(ret);
7043 		goto out;
7044 	}
7045 
7046 	ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
7047 					  meta_ac, data_ac,
7048 					  blkno, cpos, len);
7049 	if (ret)
7050 		mlog_errno(ret);
7051 
7052 	ocfs2_commit_trans(osb, handle);
7053 
7054 out:
7055 	if (meta_ac)
7056 		ocfs2_free_alloc_context(meta_ac);
7057 	if (data_ac)
7058 		ocfs2_free_alloc_context(data_ac);
7059 	return ret;
7060 }
7061 
7062 /*
7063  * Create reflinked xattr buckets.
7064  * We will add bucket one by one, and refcount all the xattrs in the bucket
7065  * if they are stored outside.
7066  */
ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink * args,struct buffer_head * blk_bh,struct buffer_head * new_blk_bh)7067 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7068 				    struct buffer_head *blk_bh,
7069 				    struct buffer_head *new_blk_bh)
7070 {
7071 	int ret;
7072 	struct ocfs2_reflink_xattr_tree_args para;
7073 
7074 	memset(&para, 0, sizeof(para));
7075 	para.reflink = args;
7076 	para.old_blk_bh = blk_bh;
7077 	para.new_blk_bh = new_blk_bh;
7078 
7079 	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7080 	if (!para.old_bucket) {
7081 		mlog_errno(-ENOMEM);
7082 		return -ENOMEM;
7083 	}
7084 
7085 	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7086 	if (!para.new_bucket) {
7087 		ret = -ENOMEM;
7088 		mlog_errno(ret);
7089 		goto out;
7090 	}
7091 
7092 	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7093 					      ocfs2_reflink_xattr_rec,
7094 					      &para);
7095 	if (ret)
7096 		mlog_errno(ret);
7097 
7098 out:
7099 	ocfs2_xattr_bucket_free(para.old_bucket);
7100 	ocfs2_xattr_bucket_free(para.new_bucket);
7101 	return ret;
7102 }
7103 
ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink * args,struct buffer_head * blk_bh)7104 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7105 					struct buffer_head *blk_bh)
7106 {
7107 	int ret, indexed = 0;
7108 	struct buffer_head *new_blk_bh = NULL;
7109 	struct ocfs2_xattr_block *xb =
7110 			(struct ocfs2_xattr_block *)blk_bh->b_data;
7111 
7112 
7113 	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7114 		indexed = 1;
7115 
7116 	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7117 					     &new_blk_bh, indexed);
7118 	if (ret) {
7119 		mlog_errno(ret);
7120 		goto out;
7121 	}
7122 
7123 	if (!indexed)
7124 		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7125 	else
7126 		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7127 	if (ret)
7128 		mlog_errno(ret);
7129 
7130 out:
7131 	brelse(new_blk_bh);
7132 	return ret;
7133 }
7134 
ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry * xe)7135 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7136 {
7137 	int type = ocfs2_xattr_get_type(xe);
7138 
7139 	return type != OCFS2_XATTR_INDEX_SECURITY &&
7140 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7141 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7142 }
7143 
ocfs2_reflink_xattrs(struct inode * old_inode,struct buffer_head * old_bh,struct inode * new_inode,struct buffer_head * new_bh,bool preserve_security)7144 int ocfs2_reflink_xattrs(struct inode *old_inode,
7145 			 struct buffer_head *old_bh,
7146 			 struct inode *new_inode,
7147 			 struct buffer_head *new_bh,
7148 			 bool preserve_security)
7149 {
7150 	int ret;
7151 	struct ocfs2_xattr_reflink args;
7152 	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7153 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7154 	struct buffer_head *blk_bh = NULL;
7155 	struct ocfs2_cached_dealloc_ctxt dealloc;
7156 	struct ocfs2_refcount_tree *ref_tree;
7157 	struct buffer_head *ref_root_bh = NULL;
7158 
7159 	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7160 				       le64_to_cpu(di->i_refcount_loc),
7161 				       1, &ref_tree, &ref_root_bh);
7162 	if (ret) {
7163 		mlog_errno(ret);
7164 		goto out;
7165 	}
7166 
7167 	ocfs2_init_dealloc_ctxt(&dealloc);
7168 
7169 	args.old_inode = old_inode;
7170 	args.new_inode = new_inode;
7171 	args.old_bh = old_bh;
7172 	args.new_bh = new_bh;
7173 	args.ref_ci = &ref_tree->rf_ci;
7174 	args.ref_root_bh = ref_root_bh;
7175 	args.dealloc = &dealloc;
7176 	if (preserve_security)
7177 		args.xattr_reflinked = NULL;
7178 	else
7179 		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7180 
7181 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7182 		ret = ocfs2_reflink_xattr_inline(&args);
7183 		if (ret) {
7184 			mlog_errno(ret);
7185 			goto out_unlock;
7186 		}
7187 	}
7188 
7189 	if (!di->i_xattr_loc)
7190 		goto out_unlock;
7191 
7192 	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7193 				     &blk_bh);
7194 	if (ret < 0) {
7195 		mlog_errno(ret);
7196 		goto out_unlock;
7197 	}
7198 
7199 	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7200 	if (ret)
7201 		mlog_errno(ret);
7202 
7203 	brelse(blk_bh);
7204 
7205 out_unlock:
7206 	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7207 				   ref_tree, 1);
7208 	brelse(ref_root_bh);
7209 
7210 	if (ocfs2_dealloc_has_cluster(&dealloc)) {
7211 		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7212 		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7213 	}
7214 
7215 out:
7216 	return ret;
7217 }
7218 
7219 /*
7220  * Initialize security and acl for a already created inode.
7221  * Used for reflink a non-preserve-security file.
7222  *
7223  * It uses common api like ocfs2_xattr_set, so the caller
7224  * must not hold any lock expect i_rwsem.
7225  */
ocfs2_init_security_and_acl(struct inode * dir,struct inode * inode,const struct qstr * qstr)7226 int ocfs2_init_security_and_acl(struct inode *dir,
7227 				struct inode *inode,
7228 				const struct qstr *qstr)
7229 {
7230 	int ret = 0;
7231 	struct buffer_head *dir_bh = NULL;
7232 
7233 	ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7234 	if (ret) {
7235 		mlog_errno(ret);
7236 		goto leave;
7237 	}
7238 
7239 	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7240 	if (ret) {
7241 		mlog_errno(ret);
7242 		goto leave;
7243 	}
7244 	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7245 	if (ret)
7246 		mlog_errno(ret);
7247 
7248 	ocfs2_inode_unlock(dir, 0);
7249 	brelse(dir_bh);
7250 leave:
7251 	return ret;
7252 }
7253 
7254 /*
7255  * 'security' attributes support
7256  */
ocfs2_xattr_security_get(const struct xattr_handler * handler,struct dentry * unused,struct inode * inode,const char * name,void * buffer,size_t size)7257 static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
7258 				    struct dentry *unused, struct inode *inode,
7259 				    const char *name, void *buffer, size_t size)
7260 {
7261 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY,
7262 			       name, buffer, size);
7263 }
7264 
ocfs2_xattr_security_set(const struct xattr_handler * handler,struct mnt_idmap * idmap,struct dentry * unused,struct inode * inode,const char * name,const void * value,size_t size,int flags)7265 static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
7266 				    struct mnt_idmap *idmap,
7267 				    struct dentry *unused, struct inode *inode,
7268 				    const char *name, const void *value,
7269 				    size_t size, int flags)
7270 {
7271 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7272 			       name, value, size, flags);
7273 }
7274 
ocfs2_initxattrs(struct inode * inode,const struct xattr * xattr_array,void * fs_info)7275 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
7276 		     void *fs_info)
7277 {
7278 	struct ocfs2_security_xattr_info *si = fs_info;
7279 	const struct xattr *xattr;
7280 	int err = 0;
7281 
7282 	if (si) {
7283 		si->value = kmemdup(xattr_array->value, xattr_array->value_len,
7284 				    GFP_KERNEL);
7285 		if (!si->value)
7286 			return -ENOMEM;
7287 
7288 		si->name = xattr_array->name;
7289 		si->value_len = xattr_array->value_len;
7290 		return 0;
7291 	}
7292 
7293 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
7294 		err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7295 				      xattr->name, xattr->value,
7296 				      xattr->value_len, XATTR_CREATE);
7297 		if (err)
7298 			break;
7299 	}
7300 	return err;
7301 }
7302 
ocfs2_init_security_get(struct inode * inode,struct inode * dir,const struct qstr * qstr,struct ocfs2_security_xattr_info * si)7303 int ocfs2_init_security_get(struct inode *inode,
7304 			    struct inode *dir,
7305 			    const struct qstr *qstr,
7306 			    struct ocfs2_security_xattr_info *si)
7307 {
7308 	int ret;
7309 
7310 	/* check whether ocfs2 support feature xattr */
7311 	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7312 		return -EOPNOTSUPP;
7313 	if (si) {
7314 		ret = security_inode_init_security(inode, dir, qstr,
7315 						   &ocfs2_initxattrs, si);
7316 		/*
7317 		 * security_inode_init_security() does not return -EOPNOTSUPP,
7318 		 * we have to check the xattr ourselves.
7319 		 */
7320 		if (!ret && !si->name)
7321 			si->enable = 0;
7322 
7323 		return ret;
7324 	}
7325 
7326 	return security_inode_init_security(inode, dir, qstr,
7327 					    &ocfs2_initxattrs, NULL);
7328 }
7329 
ocfs2_init_security_set(handle_t * handle,struct inode * inode,struct buffer_head * di_bh,struct ocfs2_security_xattr_info * si,struct ocfs2_alloc_context * xattr_ac,struct ocfs2_alloc_context * data_ac)7330 int ocfs2_init_security_set(handle_t *handle,
7331 			    struct inode *inode,
7332 			    struct buffer_head *di_bh,
7333 			    struct ocfs2_security_xattr_info *si,
7334 			    struct ocfs2_alloc_context *xattr_ac,
7335 			    struct ocfs2_alloc_context *data_ac)
7336 {
7337 	return ocfs2_xattr_set_handle(handle, inode, di_bh,
7338 				     OCFS2_XATTR_INDEX_SECURITY,
7339 				     si->name, si->value, si->value_len, 0,
7340 				     xattr_ac, data_ac);
7341 }
7342 
7343 const struct xattr_handler ocfs2_xattr_security_handler = {
7344 	.prefix	= XATTR_SECURITY_PREFIX,
7345 	.get	= ocfs2_xattr_security_get,
7346 	.set	= ocfs2_xattr_security_set,
7347 };
7348 
7349 /*
7350  * 'trusted' attributes support
7351  */
ocfs2_xattr_trusted_get(const struct xattr_handler * handler,struct dentry * unused,struct inode * inode,const char * name,void * buffer,size_t size)7352 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
7353 				   struct dentry *unused, struct inode *inode,
7354 				   const char *name, void *buffer, size_t size)
7355 {
7356 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED,
7357 			       name, buffer, size);
7358 }
7359 
ocfs2_xattr_trusted_set(const struct xattr_handler * handler,struct mnt_idmap * idmap,struct dentry * unused,struct inode * inode,const char * name,const void * value,size_t size,int flags)7360 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
7361 				   struct mnt_idmap *idmap,
7362 				   struct dentry *unused, struct inode *inode,
7363 				   const char *name, const void *value,
7364 				   size_t size, int flags)
7365 {
7366 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED,
7367 			       name, value, size, flags);
7368 }
7369 
7370 const struct xattr_handler ocfs2_xattr_trusted_handler = {
7371 	.prefix	= XATTR_TRUSTED_PREFIX,
7372 	.get	= ocfs2_xattr_trusted_get,
7373 	.set	= ocfs2_xattr_trusted_set,
7374 };
7375 
7376 /*
7377  * 'user' attributes support
7378  */
ocfs2_xattr_user_get(const struct xattr_handler * handler,struct dentry * unused,struct inode * inode,const char * name,void * buffer,size_t size)7379 static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
7380 				struct dentry *unused, struct inode *inode,
7381 				const char *name, void *buffer, size_t size)
7382 {
7383 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7384 
7385 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7386 		return -EOPNOTSUPP;
7387 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
7388 			       buffer, size);
7389 }
7390 
ocfs2_xattr_user_set(const struct xattr_handler * handler,struct mnt_idmap * idmap,struct dentry * unused,struct inode * inode,const char * name,const void * value,size_t size,int flags)7391 static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
7392 				struct mnt_idmap *idmap,
7393 				struct dentry *unused, struct inode *inode,
7394 				const char *name, const void *value,
7395 				size_t size, int flags)
7396 {
7397 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7398 
7399 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7400 		return -EOPNOTSUPP;
7401 
7402 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER,
7403 			       name, value, size, flags);
7404 }
7405 
7406 const struct xattr_handler ocfs2_xattr_user_handler = {
7407 	.prefix	= XATTR_USER_PREFIX,
7408 	.get	= ocfs2_xattr_user_get,
7409 	.set	= ocfs2_xattr_user_set,
7410 };
7411