1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xattr.c 4 * 5 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 6 * 7 * CREDITS: 8 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 10 */ 11 12 #include <linux/capability.h> 13 #include <linux/fs.h> 14 #include <linux/types.h> 15 #include <linux/slab.h> 16 #include <linux/highmem.h> 17 #include <linux/pagemap.h> 18 #include <linux/uio.h> 19 #include <linux/sched.h> 20 #include <linux/splice.h> 21 #include <linux/mount.h> 22 #include <linux/writeback.h> 23 #include <linux/falloc.h> 24 #include <linux/sort.h> 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/string.h> 28 #include <linux/security.h> 29 30 #include <cluster/masklog.h> 31 32 #include "ocfs2.h" 33 #include "alloc.h" 34 #include "blockcheck.h" 35 #include "dlmglue.h" 36 #include "file.h" 37 #include "symlink.h" 38 #include "sysfile.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "ocfs2_fs.h" 42 #include "suballoc.h" 43 #include "uptodate.h" 44 #include "buffer_head_io.h" 45 #include "super.h" 46 #include "xattr.h" 47 #include "refcounttree.h" 48 #include "acl.h" 49 #include "ocfs2_trace.h" 50 51 struct ocfs2_xattr_def_value_root { 52 /* Must be last as it ends in a flexible-array member. */ 53 TRAILING_OVERLAP(struct ocfs2_xattr_value_root, xv, xr_list.l_recs, 54 struct ocfs2_extent_rec er; 55 ); 56 }; 57 static_assert(offsetof(struct ocfs2_xattr_def_value_root, xv.xr_list.l_recs) == 58 offsetof(struct ocfs2_xattr_def_value_root, er)); 59 60 struct ocfs2_xattr_bucket { 61 /* The inode these xattrs are associated with */ 62 struct inode *bu_inode; 63 64 /* The actual buffers that make up the bucket */ 65 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 66 67 /* How many blocks make up one bucket for this filesystem */ 68 int bu_blocks; 69 }; 70 71 struct ocfs2_xattr_set_ctxt { 72 handle_t *handle; 73 struct ocfs2_alloc_context *meta_ac; 74 struct ocfs2_alloc_context *data_ac; 75 struct ocfs2_cached_dealloc_ctxt dealloc; 76 int set_abort; 77 }; 78 79 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 80 #define OCFS2_XATTR_INLINE_SIZE 80 81 #define OCFS2_XATTR_HEADER_GAP 4 82 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 83 - sizeof(struct ocfs2_xattr_header) \ 84 - OCFS2_XATTR_HEADER_GAP) 85 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 86 - sizeof(struct ocfs2_xattr_block) \ 87 - sizeof(struct ocfs2_xattr_header) \ 88 - OCFS2_XATTR_HEADER_GAP) 89 90 static struct ocfs2_xattr_def_value_root def_xv = { 91 .xv.xr_list.l_count = cpu_to_le16(1), 92 }; 93 94 const struct xattr_handler * const ocfs2_xattr_handlers[] = { 95 &ocfs2_xattr_user_handler, 96 &ocfs2_xattr_trusted_handler, 97 &ocfs2_xattr_security_handler, 98 NULL 99 }; 100 101 static const struct xattr_handler * const ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 102 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 103 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, 104 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, 105 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 106 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 107 }; 108 109 struct ocfs2_xattr_info { 110 int xi_name_index; 111 const char *xi_name; 112 int xi_name_len; 113 const void *xi_value; 114 size_t xi_value_len; 115 }; 116 117 struct ocfs2_xattr_search { 118 struct buffer_head *inode_bh; 119 /* 120 * xattr_bh point to the block buffer head which has extended attribute 121 * when extended attribute in inode, xattr_bh is equal to inode_bh. 122 */ 123 struct buffer_head *xattr_bh; 124 struct ocfs2_xattr_header *header; 125 struct ocfs2_xattr_bucket *bucket; 126 void *base; 127 void *end; 128 struct ocfs2_xattr_entry *here; 129 int not_found; 130 }; 131 132 /* Operations on struct ocfs2_xa_entry */ 133 struct ocfs2_xa_loc; 134 struct ocfs2_xa_loc_operations { 135 /* 136 * Journal functions 137 */ 138 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 139 int type); 140 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 141 142 /* 143 * Return a pointer to the appropriate buffer in loc->xl_storage 144 * at the given offset from loc->xl_header. 145 */ 146 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 147 148 /* Can we reuse the existing entry for the new value? */ 149 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 150 struct ocfs2_xattr_info *xi); 151 152 /* How much space is needed for the new value? */ 153 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 154 struct ocfs2_xattr_info *xi); 155 156 /* 157 * Return the offset of the first name+value pair. This is 158 * the start of our downward-filling free space. 159 */ 160 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 161 162 /* 163 * Remove the name+value at this location. Do whatever is 164 * appropriate with the remaining name+value pairs. 165 */ 166 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 167 168 /* Fill xl_entry with a new entry */ 169 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 170 171 /* Add name+value storage to an entry */ 172 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 173 174 /* 175 * Initialize the value buf's access and bh fields for this entry. 176 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 177 */ 178 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 179 struct ocfs2_xattr_value_buf *vb); 180 }; 181 182 /* 183 * Describes an xattr entry location. This is a memory structure 184 * tracking the on-disk structure. 185 */ 186 struct ocfs2_xa_loc { 187 /* This xattr belongs to this inode */ 188 struct inode *xl_inode; 189 190 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 191 struct ocfs2_xattr_header *xl_header; 192 193 /* Bytes from xl_header to the end of the storage */ 194 int xl_size; 195 196 /* 197 * The ocfs2_xattr_entry this location describes. If this is 198 * NULL, this location describes the on-disk structure where it 199 * would have been. 200 */ 201 struct ocfs2_xattr_entry *xl_entry; 202 203 /* 204 * Internal housekeeping 205 */ 206 207 /* Buffer(s) containing this entry */ 208 void *xl_storage; 209 210 /* Operations on the storage backing this location */ 211 const struct ocfs2_xa_loc_operations *xl_ops; 212 }; 213 214 /* 215 * Convenience functions to calculate how much space is needed for a 216 * given name+value pair 217 */ 218 static int namevalue_size(int name_len, uint64_t value_len) 219 { 220 if (value_len > OCFS2_XATTR_INLINE_SIZE) 221 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 222 else 223 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 224 } 225 226 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 227 { 228 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 229 } 230 231 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 232 { 233 u64 value_len = le64_to_cpu(xe->xe_value_size); 234 235 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 236 ocfs2_xattr_is_local(xe)); 237 return namevalue_size(xe->xe_name_len, value_len); 238 } 239 240 241 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 242 struct ocfs2_xattr_header *xh, 243 int index, 244 int *block_off, 245 int *new_offset); 246 247 static int ocfs2_xattr_block_find(struct inode *inode, 248 int name_index, 249 const char *name, 250 struct ocfs2_xattr_search *xs); 251 static int ocfs2_xattr_index_block_find(struct inode *inode, 252 struct buffer_head *root_bh, 253 int name_index, 254 const char *name, 255 struct ocfs2_xattr_search *xs); 256 257 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 258 struct buffer_head *blk_bh, 259 char *buffer, 260 size_t buffer_size); 261 262 static int ocfs2_xattr_create_index_block(struct inode *inode, 263 struct ocfs2_xattr_search *xs, 264 struct ocfs2_xattr_set_ctxt *ctxt); 265 266 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 267 struct ocfs2_xattr_info *xi, 268 struct ocfs2_xattr_search *xs, 269 struct ocfs2_xattr_set_ctxt *ctxt); 270 271 typedef int (xattr_tree_rec_func)(struct inode *inode, 272 struct buffer_head *root_bh, 273 u64 blkno, u32 cpos, u32 len, void *para); 274 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 275 struct buffer_head *root_bh, 276 xattr_tree_rec_func *rec_func, 277 void *para); 278 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 279 struct ocfs2_xattr_bucket *bucket, 280 void *para); 281 static int ocfs2_rm_xattr_cluster(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, 284 u32 cpos, 285 u32 len, 286 void *para); 287 288 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 289 u64 src_blk, u64 last_blk, u64 to_blk, 290 unsigned int start_bucket, 291 u32 *first_hash); 292 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 293 struct ocfs2_dinode *di, 294 struct ocfs2_xattr_info *xi, 295 struct ocfs2_xattr_search *xis, 296 struct ocfs2_xattr_search *xbs, 297 struct ocfs2_refcount_tree **ref_tree, 298 int *meta_need, 299 int *credits); 300 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 301 struct ocfs2_xattr_bucket *bucket, 302 int offset, 303 struct ocfs2_xattr_value_root **xv, 304 struct buffer_head **bh); 305 306 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 307 { 308 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 309 } 310 311 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 312 { 313 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 314 } 315 316 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 317 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 318 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 319 320 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 321 { 322 struct ocfs2_xattr_bucket *bucket; 323 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 324 325 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 326 327 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 328 if (bucket) { 329 bucket->bu_inode = inode; 330 bucket->bu_blocks = blks; 331 } 332 333 return bucket; 334 } 335 336 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 337 { 338 int i; 339 340 for (i = 0; i < bucket->bu_blocks; i++) { 341 brelse(bucket->bu_bhs[i]); 342 bucket->bu_bhs[i] = NULL; 343 } 344 } 345 346 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 347 { 348 if (bucket) { 349 ocfs2_xattr_bucket_relse(bucket); 350 bucket->bu_inode = NULL; 351 kfree(bucket); 352 } 353 } 354 355 /* 356 * A bucket that has never been written to disk doesn't need to be 357 * read. We just need the buffer_heads. Don't call this for 358 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 359 * them fully. 360 */ 361 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 362 u64 xb_blkno, int new) 363 { 364 int i, rc = 0; 365 366 for (i = 0; i < bucket->bu_blocks; i++) { 367 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 368 xb_blkno + i); 369 if (!bucket->bu_bhs[i]) { 370 rc = -ENOMEM; 371 mlog_errno(rc); 372 break; 373 } 374 375 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 376 bucket->bu_bhs[i])) { 377 if (new) 378 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 379 bucket->bu_bhs[i]); 380 else { 381 set_buffer_uptodate(bucket->bu_bhs[i]); 382 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 383 bucket->bu_bhs[i]); 384 } 385 } 386 } 387 388 if (rc) 389 ocfs2_xattr_bucket_relse(bucket); 390 return rc; 391 } 392 393 /* Read the xattr bucket at xb_blkno */ 394 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 395 u64 xb_blkno) 396 { 397 int rc; 398 399 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 400 bucket->bu_blocks, bucket->bu_bhs, 0, 401 NULL); 402 if (!rc) { 403 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 404 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 405 bucket->bu_bhs, 406 bucket->bu_blocks, 407 &bucket_xh(bucket)->xh_check); 408 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 409 if (rc) 410 mlog_errno(rc); 411 } 412 413 if (rc) 414 ocfs2_xattr_bucket_relse(bucket); 415 return rc; 416 } 417 418 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 419 struct ocfs2_xattr_bucket *bucket, 420 int type) 421 { 422 int i, rc = 0; 423 424 for (i = 0; i < bucket->bu_blocks; i++) { 425 rc = ocfs2_journal_access(handle, 426 INODE_CACHE(bucket->bu_inode), 427 bucket->bu_bhs[i], type); 428 if (rc) { 429 mlog_errno(rc); 430 break; 431 } 432 } 433 434 return rc; 435 } 436 437 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 438 struct ocfs2_xattr_bucket *bucket) 439 { 440 int i; 441 442 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 443 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 444 bucket->bu_bhs, bucket->bu_blocks, 445 &bucket_xh(bucket)->xh_check); 446 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 447 448 for (i = 0; i < bucket->bu_blocks; i++) 449 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 450 } 451 452 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 453 struct ocfs2_xattr_bucket *src) 454 { 455 int i; 456 int blocksize = src->bu_inode->i_sb->s_blocksize; 457 458 BUG_ON(dest->bu_blocks != src->bu_blocks); 459 BUG_ON(dest->bu_inode != src->bu_inode); 460 461 for (i = 0; i < src->bu_blocks; i++) { 462 memcpy(bucket_block(dest, i), bucket_block(src, i), 463 blocksize); 464 } 465 } 466 467 static int ocfs2_validate_xattr_block(struct super_block *sb, 468 struct buffer_head *bh) 469 { 470 int rc; 471 struct ocfs2_xattr_block *xb = 472 (struct ocfs2_xattr_block *)bh->b_data; 473 474 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 475 476 BUG_ON(!buffer_uptodate(bh)); 477 478 /* 479 * If the ecc fails, we return the error but otherwise 480 * leave the filesystem running. We know any error is 481 * local to this block. 482 */ 483 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 484 if (rc) 485 return rc; 486 487 /* 488 * Errors after here are fatal 489 */ 490 491 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 492 return ocfs2_error(sb, 493 "Extended attribute block #%llu has bad signature %.*s\n", 494 (unsigned long long)bh->b_blocknr, 7, 495 xb->xb_signature); 496 } 497 498 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 499 return ocfs2_error(sb, 500 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 501 (unsigned long long)bh->b_blocknr, 502 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 503 } 504 505 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 506 return ocfs2_error(sb, 507 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 508 (unsigned long long)bh->b_blocknr, 509 le32_to_cpu(xb->xb_fs_generation)); 510 } 511 512 return 0; 513 } 514 515 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 516 struct buffer_head **bh) 517 { 518 int rc; 519 struct buffer_head *tmp = *bh; 520 521 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 522 ocfs2_validate_xattr_block); 523 524 /* If ocfs2_read_block() got us a new bh, pass it up. */ 525 if (!rc && !*bh) 526 *bh = tmp; 527 528 return rc; 529 } 530 531 static inline const char *ocfs2_xattr_prefix(int name_index) 532 { 533 const struct xattr_handler *handler = NULL; 534 535 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 536 handler = ocfs2_xattr_handler_map[name_index]; 537 return handler ? xattr_prefix(handler) : NULL; 538 } 539 540 static u32 ocfs2_xattr_name_hash(struct inode *inode, 541 const char *name, 542 int name_len) 543 { 544 /* Get hash value of uuid from super block */ 545 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 546 int i; 547 548 /* hash extended attribute name */ 549 for (i = 0; i < name_len; i++) { 550 hash = (hash << OCFS2_HASH_SHIFT) ^ 551 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 552 *name++; 553 } 554 555 return hash; 556 } 557 558 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 559 { 560 return namevalue_size(name_len, value_len) + 561 sizeof(struct ocfs2_xattr_entry); 562 } 563 564 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 565 { 566 return namevalue_size_xi(xi) + 567 sizeof(struct ocfs2_xattr_entry); 568 } 569 570 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 571 { 572 return namevalue_size_xe(xe) + 573 sizeof(struct ocfs2_xattr_entry); 574 } 575 576 int ocfs2_calc_security_init(struct inode *dir, 577 struct ocfs2_security_xattr_info *si, 578 int *want_clusters, 579 int *xattr_credits, 580 struct ocfs2_alloc_context **xattr_ac) 581 { 582 int ret = 0; 583 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 584 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 585 si->value_len); 586 587 /* 588 * The max space of security xattr taken inline is 589 * 256(name) + 80(value) + 16(entry) = 352 bytes, 590 * So reserve one metadata block for it is ok. 591 */ 592 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 593 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 594 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 595 if (ret) { 596 mlog_errno(ret); 597 return ret; 598 } 599 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 600 } 601 602 /* reserve clusters for xattr value which will be set in B tree*/ 603 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 604 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 605 si->value_len); 606 607 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 608 new_clusters); 609 *want_clusters += new_clusters; 610 } 611 return ret; 612 } 613 614 int ocfs2_calc_xattr_init(struct inode *dir, 615 struct buffer_head *dir_bh, 616 umode_t mode, 617 struct ocfs2_security_xattr_info *si, 618 int *want_clusters, 619 int *xattr_credits, 620 int *want_meta) 621 { 622 int ret = 0; 623 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 624 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 625 626 if (si->enable) 627 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 628 si->value_len); 629 630 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 631 down_read(&OCFS2_I(dir)->ip_xattr_sem); 632 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 633 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 634 "", NULL, 0); 635 up_read(&OCFS2_I(dir)->ip_xattr_sem); 636 if (acl_len > 0) { 637 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 638 if (S_ISDIR(mode)) 639 a_size <<= 1; 640 } else if (acl_len != 0 && acl_len != -ENODATA) { 641 ret = acl_len; 642 mlog_errno(ret); 643 return ret; 644 } 645 } 646 647 if (!(s_size + a_size)) 648 return ret; 649 650 /* 651 * The max space of security xattr taken inline is 652 * 256(name) + 80(value) + 16(entry) = 352 bytes, 653 * The max space of acl xattr taken inline is 654 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 655 * when blocksize = 512, may reserve one more cluster for 656 * xattr bucket, otherwise reserve one metadata block 657 * for them is ok. 658 * If this is a new directory with inline data, 659 * we choose to reserve the entire inline area for 660 * directory contents and force an external xattr block. 661 */ 662 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 663 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 664 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 665 *want_meta = *want_meta + 1; 666 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 667 } 668 669 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 670 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 671 *want_clusters += 1; 672 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 673 } 674 675 /* 676 * reserve credits and clusters for xattrs which has large value 677 * and have to be set outside 678 */ 679 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 680 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 681 si->value_len); 682 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 683 new_clusters); 684 *want_clusters += new_clusters; 685 } 686 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 687 acl_len > OCFS2_XATTR_INLINE_SIZE) { 688 /* for directory, it has DEFAULT and ACCESS two types of acls */ 689 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 690 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 691 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 692 new_clusters); 693 *want_clusters += new_clusters; 694 } 695 696 return ret; 697 } 698 699 static int ocfs2_xattr_extend_allocation(struct inode *inode, 700 u32 clusters_to_add, 701 struct ocfs2_xattr_value_buf *vb, 702 struct ocfs2_xattr_set_ctxt *ctxt) 703 { 704 int status = 0, credits; 705 handle_t *handle = ctxt->handle; 706 enum ocfs2_alloc_restarted why; 707 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 708 struct ocfs2_extent_tree et; 709 710 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 711 712 while (clusters_to_add) { 713 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 714 715 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 716 OCFS2_JOURNAL_ACCESS_WRITE); 717 if (status < 0) { 718 mlog_errno(status); 719 break; 720 } 721 722 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 723 status = ocfs2_add_clusters_in_btree(handle, 724 &et, 725 &logical_start, 726 clusters_to_add, 727 0, 728 ctxt->data_ac, 729 ctxt->meta_ac, 730 &why); 731 if ((status < 0) && (status != -EAGAIN)) { 732 if (status != -ENOSPC) 733 mlog_errno(status); 734 break; 735 } 736 737 ocfs2_journal_dirty(handle, vb->vb_bh); 738 739 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 740 prev_clusters; 741 742 if (why != RESTART_NONE && clusters_to_add) { 743 /* 744 * We can only fail in case the alloc file doesn't give 745 * up enough clusters. 746 */ 747 BUG_ON(why == RESTART_META); 748 749 credits = ocfs2_calc_extend_credits(inode->i_sb, 750 &vb->vb_xv->xr_list); 751 status = ocfs2_extend_trans(handle, credits); 752 if (status < 0) { 753 status = -ENOMEM; 754 mlog_errno(status); 755 break; 756 } 757 } 758 } 759 760 return status; 761 } 762 763 static int __ocfs2_remove_xattr_range(struct inode *inode, 764 struct ocfs2_xattr_value_buf *vb, 765 u32 cpos, u32 phys_cpos, u32 len, 766 unsigned int ext_flags, 767 struct ocfs2_xattr_set_ctxt *ctxt) 768 { 769 int ret; 770 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 771 handle_t *handle = ctxt->handle; 772 struct ocfs2_extent_tree et; 773 774 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 775 776 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 777 OCFS2_JOURNAL_ACCESS_WRITE); 778 if (ret) { 779 mlog_errno(ret); 780 goto out; 781 } 782 783 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 784 &ctxt->dealloc); 785 if (ret) { 786 mlog_errno(ret); 787 goto out; 788 } 789 790 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 791 ocfs2_journal_dirty(handle, vb->vb_bh); 792 793 if (ext_flags & OCFS2_EXT_REFCOUNTED) 794 ret = ocfs2_decrease_refcount(inode, handle, 795 ocfs2_blocks_to_clusters(inode->i_sb, 796 phys_blkno), 797 len, ctxt->meta_ac, &ctxt->dealloc, 1); 798 else 799 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 800 phys_blkno, len); 801 if (ret) 802 mlog_errno(ret); 803 804 out: 805 return ret; 806 } 807 808 static int ocfs2_xattr_shrink_size(struct inode *inode, 809 u32 old_clusters, 810 u32 new_clusters, 811 struct ocfs2_xattr_value_buf *vb, 812 struct ocfs2_xattr_set_ctxt *ctxt) 813 { 814 int ret = 0; 815 unsigned int ext_flags; 816 u32 trunc_len, cpos, phys_cpos, alloc_size; 817 u64 block; 818 819 if (old_clusters <= new_clusters) 820 return 0; 821 822 cpos = new_clusters; 823 trunc_len = old_clusters - new_clusters; 824 while (trunc_len) { 825 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 826 &alloc_size, 827 &vb->vb_xv->xr_list, &ext_flags); 828 if (ret) { 829 mlog_errno(ret); 830 goto out; 831 } 832 833 if (alloc_size > trunc_len) 834 alloc_size = trunc_len; 835 836 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 837 phys_cpos, alloc_size, 838 ext_flags, ctxt); 839 if (ret) { 840 mlog_errno(ret); 841 goto out; 842 } 843 844 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 845 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 846 block, alloc_size); 847 cpos += alloc_size; 848 trunc_len -= alloc_size; 849 } 850 851 out: 852 return ret; 853 } 854 855 static int ocfs2_xattr_value_truncate(struct inode *inode, 856 struct ocfs2_xattr_value_buf *vb, 857 int len, 858 struct ocfs2_xattr_set_ctxt *ctxt) 859 { 860 int ret; 861 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 862 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 863 864 if (new_clusters == old_clusters) 865 return 0; 866 867 if (new_clusters > old_clusters) 868 ret = ocfs2_xattr_extend_allocation(inode, 869 new_clusters - old_clusters, 870 vb, ctxt); 871 else 872 ret = ocfs2_xattr_shrink_size(inode, 873 old_clusters, new_clusters, 874 vb, ctxt); 875 876 return ret; 877 } 878 879 static int ocfs2_xattr_list_entry(struct super_block *sb, 880 char *buffer, size_t size, 881 size_t *result, int type, 882 const char *name, int name_len) 883 { 884 char *p = buffer + *result; 885 const char *prefix; 886 int prefix_len; 887 int total_len; 888 889 switch(type) { 890 case OCFS2_XATTR_INDEX_USER: 891 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 892 return 0; 893 break; 894 895 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 896 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 897 if (!(sb->s_flags & SB_POSIXACL)) 898 return 0; 899 break; 900 901 case OCFS2_XATTR_INDEX_TRUSTED: 902 if (!capable(CAP_SYS_ADMIN)) 903 return 0; 904 break; 905 } 906 907 prefix = ocfs2_xattr_prefix(type); 908 if (!prefix) 909 return 0; 910 prefix_len = strlen(prefix); 911 total_len = prefix_len + name_len + 1; 912 *result += total_len; 913 914 /* we are just looking for how big our buffer needs to be */ 915 if (!size) 916 return 0; 917 918 if (*result > size) 919 return -ERANGE; 920 921 memcpy(p, prefix, prefix_len); 922 memcpy(p + prefix_len, name, name_len); 923 p[prefix_len + name_len] = '\0'; 924 925 return 0; 926 } 927 928 static int ocfs2_xattr_list_entries(struct inode *inode, 929 struct ocfs2_xattr_header *header, 930 char *buffer, size_t buffer_size) 931 { 932 size_t result = 0; 933 int i, type, ret; 934 const char *name; 935 936 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 937 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 938 type = ocfs2_xattr_get_type(entry); 939 name = (const char *)header + 940 le16_to_cpu(entry->xe_name_offset); 941 942 ret = ocfs2_xattr_list_entry(inode->i_sb, 943 buffer, buffer_size, 944 &result, type, name, 945 entry->xe_name_len); 946 if (ret) 947 return ret; 948 } 949 950 return result; 951 } 952 953 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 954 struct ocfs2_dinode *di) 955 { 956 struct ocfs2_xattr_header *xh; 957 int i; 958 959 xh = (struct ocfs2_xattr_header *) 960 ((void *)di + inode->i_sb->s_blocksize - 961 le16_to_cpu(di->i_xattr_inline_size)); 962 963 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 964 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 965 return 1; 966 967 return 0; 968 } 969 970 static int ocfs2_xattr_ibody_list(struct inode *inode, 971 struct ocfs2_dinode *di, 972 char *buffer, 973 size_t buffer_size) 974 { 975 struct ocfs2_xattr_header *header = NULL; 976 struct ocfs2_inode_info *oi = OCFS2_I(inode); 977 int ret = 0; 978 u16 xattr_count; 979 size_t max_entries; 980 u16 inline_size; 981 982 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 983 return ret; 984 985 inline_size = le16_to_cpu(di->i_xattr_inline_size); 986 987 /* Validate inline size is reasonable */ 988 if (inline_size > inode->i_sb->s_blocksize || 989 inline_size < sizeof(struct ocfs2_xattr_header)) { 990 ocfs2_error(inode->i_sb, 991 "Invalid xattr inline size %u in inode %llu\n", 992 inline_size, 993 (unsigned long long)OCFS2_I(inode)->ip_blkno); 994 return -EFSCORRUPTED; 995 } 996 997 header = (struct ocfs2_xattr_header *) 998 ((void *)di + inode->i_sb->s_blocksize - inline_size); 999 1000 xattr_count = le16_to_cpu(header->xh_count); 1001 max_entries = (inline_size - sizeof(struct ocfs2_xattr_header)) / 1002 sizeof(struct ocfs2_xattr_entry); 1003 1004 if (xattr_count > max_entries) { 1005 ocfs2_error(inode->i_sb, 1006 "xattr entry count %u exceeds maximum %zu in inode %llu\n", 1007 xattr_count, max_entries, 1008 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1009 return -EFSCORRUPTED; 1010 } 1011 1012 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 1013 1014 return ret; 1015 } 1016 1017 static int ocfs2_xattr_block_list(struct inode *inode, 1018 struct ocfs2_dinode *di, 1019 char *buffer, 1020 size_t buffer_size) 1021 { 1022 struct buffer_head *blk_bh = NULL; 1023 struct ocfs2_xattr_block *xb; 1024 int ret = 0; 1025 1026 if (!di->i_xattr_loc) 1027 return ret; 1028 1029 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1030 &blk_bh); 1031 if (ret < 0) { 1032 mlog_errno(ret); 1033 return ret; 1034 } 1035 1036 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1037 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1038 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1039 ret = ocfs2_xattr_list_entries(inode, header, 1040 buffer, buffer_size); 1041 } else 1042 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1043 buffer, buffer_size); 1044 1045 brelse(blk_bh); 1046 1047 return ret; 1048 } 1049 1050 ssize_t ocfs2_listxattr(struct dentry *dentry, 1051 char *buffer, 1052 size_t size) 1053 { 1054 int ret = 0, i_ret = 0, b_ret = 0; 1055 struct buffer_head *di_bh = NULL; 1056 struct ocfs2_dinode *di = NULL; 1057 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1058 1059 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1060 return -EOPNOTSUPP; 1061 1062 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1063 return ret; 1064 1065 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1066 if (ret < 0) { 1067 mlog_errno(ret); 1068 return ret; 1069 } 1070 1071 di = (struct ocfs2_dinode *)di_bh->b_data; 1072 1073 down_read(&oi->ip_xattr_sem); 1074 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1075 if (i_ret < 0) 1076 b_ret = 0; 1077 else { 1078 if (buffer) { 1079 buffer += i_ret; 1080 size -= i_ret; 1081 } 1082 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1083 buffer, size); 1084 if (b_ret < 0) 1085 i_ret = 0; 1086 } 1087 up_read(&oi->ip_xattr_sem); 1088 ocfs2_inode_unlock(d_inode(dentry), 0); 1089 1090 brelse(di_bh); 1091 1092 return i_ret + b_ret; 1093 } 1094 1095 static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, 1096 const char *name, 1097 struct ocfs2_xattr_search *xs) 1098 { 1099 struct ocfs2_xattr_entry *entry; 1100 size_t name_len; 1101 int i, name_offset, cmp = 1; 1102 1103 if (name == NULL) 1104 return -EINVAL; 1105 1106 name_len = strlen(name); 1107 entry = xs->here; 1108 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1109 if ((void *)entry >= xs->end) { 1110 ocfs2_error(inode->i_sb, "corrupted xattr entries"); 1111 return -EFSCORRUPTED; 1112 } 1113 cmp = name_index - ocfs2_xattr_get_type(entry); 1114 if (!cmp) 1115 cmp = name_len - entry->xe_name_len; 1116 if (!cmp) { 1117 name_offset = le16_to_cpu(entry->xe_name_offset); 1118 if ((xs->base + name_offset + name_len) > xs->end) { 1119 ocfs2_error(inode->i_sb, 1120 "corrupted xattr entries"); 1121 return -EFSCORRUPTED; 1122 } 1123 cmp = memcmp(name, (xs->base + name_offset), name_len); 1124 } 1125 if (cmp == 0) 1126 break; 1127 entry += 1; 1128 } 1129 xs->here = entry; 1130 1131 return cmp ? -ENODATA : 0; 1132 } 1133 1134 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1135 struct ocfs2_xattr_value_root *xv, 1136 void *buffer, 1137 size_t len) 1138 { 1139 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1140 u64 blkno; 1141 int i, ret = 0; 1142 size_t cplen, blocksize; 1143 struct buffer_head *bh = NULL; 1144 struct ocfs2_extent_list *el; 1145 1146 el = &xv->xr_list; 1147 clusters = le32_to_cpu(xv->xr_clusters); 1148 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1149 blocksize = inode->i_sb->s_blocksize; 1150 1151 cpos = 0; 1152 while (cpos < clusters) { 1153 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1154 &num_clusters, el, NULL); 1155 if (ret) { 1156 mlog_errno(ret); 1157 goto out; 1158 } 1159 1160 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1161 /* Copy ocfs2_xattr_value */ 1162 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1163 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1164 &bh, NULL); 1165 if (ret) { 1166 mlog_errno(ret); 1167 goto out; 1168 } 1169 1170 cplen = len >= blocksize ? blocksize : len; 1171 memcpy(buffer, bh->b_data, cplen); 1172 len -= cplen; 1173 buffer += cplen; 1174 1175 brelse(bh); 1176 bh = NULL; 1177 if (len == 0) 1178 break; 1179 } 1180 cpos += num_clusters; 1181 } 1182 out: 1183 return ret; 1184 } 1185 1186 static int ocfs2_xattr_ibody_get(struct inode *inode, 1187 int name_index, 1188 const char *name, 1189 void *buffer, 1190 size_t buffer_size, 1191 struct ocfs2_xattr_search *xs) 1192 { 1193 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1194 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1195 struct ocfs2_xattr_value_root *xv; 1196 size_t size; 1197 int ret = 0; 1198 1199 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1200 return -ENODATA; 1201 1202 xs->end = (void *)di + inode->i_sb->s_blocksize; 1203 xs->header = (struct ocfs2_xattr_header *) 1204 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1205 xs->base = (void *)xs->header; 1206 xs->here = xs->header->xh_entries; 1207 1208 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 1209 if (ret) 1210 return ret; 1211 size = le64_to_cpu(xs->here->xe_value_size); 1212 if (buffer) { 1213 if (size > buffer_size) 1214 return -ERANGE; 1215 if (ocfs2_xattr_is_local(xs->here)) { 1216 memcpy(buffer, (void *)xs->base + 1217 le16_to_cpu(xs->here->xe_name_offset) + 1218 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1219 } else { 1220 xv = (struct ocfs2_xattr_value_root *) 1221 (xs->base + le16_to_cpu( 1222 xs->here->xe_name_offset) + 1223 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1224 ret = ocfs2_xattr_get_value_outside(inode, xv, 1225 buffer, size); 1226 if (ret < 0) { 1227 mlog_errno(ret); 1228 return ret; 1229 } 1230 } 1231 } 1232 1233 return size; 1234 } 1235 1236 static int ocfs2_xattr_block_get(struct inode *inode, 1237 int name_index, 1238 const char *name, 1239 void *buffer, 1240 size_t buffer_size, 1241 struct ocfs2_xattr_search *xs) 1242 { 1243 struct ocfs2_xattr_block *xb; 1244 struct ocfs2_xattr_value_root *xv; 1245 size_t size; 1246 int ret = -ENODATA, name_offset, name_len, i; 1247 int block_off; 1248 1249 xs->bucket = ocfs2_xattr_bucket_new(inode); 1250 if (!xs->bucket) { 1251 ret = -ENOMEM; 1252 mlog_errno(ret); 1253 goto cleanup; 1254 } 1255 1256 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1257 if (ret) { 1258 mlog_errno(ret); 1259 goto cleanup; 1260 } 1261 1262 if (xs->not_found) { 1263 ret = -ENODATA; 1264 goto cleanup; 1265 } 1266 1267 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1268 size = le64_to_cpu(xs->here->xe_value_size); 1269 if (buffer) { 1270 ret = -ERANGE; 1271 if (size > buffer_size) 1272 goto cleanup; 1273 1274 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1275 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1276 i = xs->here - xs->header->xh_entries; 1277 1278 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1279 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1280 bucket_xh(xs->bucket), 1281 i, 1282 &block_off, 1283 &name_offset); 1284 if (ret) { 1285 mlog_errno(ret); 1286 goto cleanup; 1287 } 1288 xs->base = bucket_block(xs->bucket, block_off); 1289 } 1290 if (ocfs2_xattr_is_local(xs->here)) { 1291 memcpy(buffer, (void *)xs->base + 1292 name_offset + name_len, size); 1293 } else { 1294 xv = (struct ocfs2_xattr_value_root *) 1295 (xs->base + name_offset + name_len); 1296 ret = ocfs2_xattr_get_value_outside(inode, xv, 1297 buffer, size); 1298 if (ret < 0) { 1299 mlog_errno(ret); 1300 goto cleanup; 1301 } 1302 } 1303 } 1304 ret = size; 1305 cleanup: 1306 ocfs2_xattr_bucket_free(xs->bucket); 1307 1308 brelse(xs->xattr_bh); 1309 xs->xattr_bh = NULL; 1310 return ret; 1311 } 1312 1313 int ocfs2_xattr_get_nolock(struct inode *inode, 1314 struct buffer_head *di_bh, 1315 int name_index, 1316 const char *name, 1317 void *buffer, 1318 size_t buffer_size) 1319 { 1320 int ret; 1321 struct ocfs2_dinode *di = NULL; 1322 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1323 struct ocfs2_xattr_search xis = { 1324 .not_found = -ENODATA, 1325 }; 1326 struct ocfs2_xattr_search xbs = { 1327 .not_found = -ENODATA, 1328 }; 1329 1330 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1331 return -EOPNOTSUPP; 1332 1333 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1334 return -ENODATA; 1335 1336 xis.inode_bh = xbs.inode_bh = di_bh; 1337 di = (struct ocfs2_dinode *)di_bh->b_data; 1338 1339 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1340 buffer_size, &xis); 1341 if (ret == -ENODATA && di->i_xattr_loc) 1342 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1343 buffer_size, &xbs); 1344 1345 return ret; 1346 } 1347 1348 /* ocfs2_xattr_get() 1349 * 1350 * Copy an extended attribute into the buffer provided. 1351 * Buffer is NULL to compute the size of buffer required. 1352 */ 1353 static int ocfs2_xattr_get(struct inode *inode, 1354 int name_index, 1355 const char *name, 1356 void *buffer, 1357 size_t buffer_size) 1358 { 1359 int ret, had_lock; 1360 struct buffer_head *di_bh = NULL; 1361 struct ocfs2_lock_holder oh; 1362 1363 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1364 if (had_lock < 0) { 1365 mlog_errno(had_lock); 1366 return had_lock; 1367 } 1368 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1369 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1370 name, buffer, buffer_size); 1371 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1372 1373 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1374 1375 brelse(di_bh); 1376 1377 return ret; 1378 } 1379 1380 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1381 handle_t *handle, 1382 struct ocfs2_xattr_value_buf *vb, 1383 const void *value, 1384 int value_len) 1385 { 1386 int ret = 0, i, cp_len; 1387 u16 blocksize = inode->i_sb->s_blocksize; 1388 u32 p_cluster, num_clusters; 1389 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1390 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1391 u64 blkno; 1392 struct buffer_head *bh = NULL; 1393 unsigned int ext_flags; 1394 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1395 1396 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1397 1398 while (cpos < clusters) { 1399 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1400 &num_clusters, &xv->xr_list, 1401 &ext_flags); 1402 if (ret) { 1403 mlog_errno(ret); 1404 goto out; 1405 } 1406 1407 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1408 1409 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1410 1411 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1412 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1413 &bh, NULL); 1414 if (ret) { 1415 mlog_errno(ret); 1416 goto out; 1417 } 1418 1419 ret = ocfs2_journal_access(handle, 1420 INODE_CACHE(inode), 1421 bh, 1422 OCFS2_JOURNAL_ACCESS_WRITE); 1423 if (ret < 0) { 1424 mlog_errno(ret); 1425 goto out; 1426 } 1427 1428 cp_len = value_len > blocksize ? blocksize : value_len; 1429 memcpy(bh->b_data, value, cp_len); 1430 value_len -= cp_len; 1431 value += cp_len; 1432 if (cp_len < blocksize) 1433 memset(bh->b_data + cp_len, 0, 1434 blocksize - cp_len); 1435 1436 ocfs2_journal_dirty(handle, bh); 1437 brelse(bh); 1438 bh = NULL; 1439 1440 /* 1441 * XXX: do we need to empty all the following 1442 * blocks in this cluster? 1443 */ 1444 if (!value_len) 1445 break; 1446 } 1447 cpos += num_clusters; 1448 } 1449 out: 1450 brelse(bh); 1451 1452 return ret; 1453 } 1454 1455 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1456 int num_entries) 1457 { 1458 int free_space; 1459 1460 if (!needed_space) 1461 return 0; 1462 1463 free_space = free_start - 1464 sizeof(struct ocfs2_xattr_header) - 1465 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1466 OCFS2_XATTR_HEADER_GAP; 1467 if (free_space < 0) 1468 return -EIO; 1469 if (free_space < needed_space) 1470 return -ENOSPC; 1471 1472 return 0; 1473 } 1474 1475 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1476 int type) 1477 { 1478 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1479 } 1480 1481 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1482 { 1483 loc->xl_ops->xlo_journal_dirty(handle, loc); 1484 } 1485 1486 /* Give a pointer into the storage for the given offset */ 1487 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1488 { 1489 BUG_ON(offset >= loc->xl_size); 1490 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1491 } 1492 1493 /* 1494 * Wipe the name+value pair and allow the storage to reclaim it. This 1495 * must be followed by either removal of the entry or a call to 1496 * ocfs2_xa_add_namevalue(). 1497 */ 1498 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1499 { 1500 loc->xl_ops->xlo_wipe_namevalue(loc); 1501 } 1502 1503 /* 1504 * Find lowest offset to a name+value pair. This is the start of our 1505 * downward-growing free space. 1506 */ 1507 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1508 { 1509 return loc->xl_ops->xlo_get_free_start(loc); 1510 } 1511 1512 /* Can we reuse loc->xl_entry for xi? */ 1513 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1514 struct ocfs2_xattr_info *xi) 1515 { 1516 return loc->xl_ops->xlo_can_reuse(loc, xi); 1517 } 1518 1519 /* How much free space is needed to set the new value */ 1520 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1521 struct ocfs2_xattr_info *xi) 1522 { 1523 return loc->xl_ops->xlo_check_space(loc, xi); 1524 } 1525 1526 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1527 { 1528 loc->xl_ops->xlo_add_entry(loc, name_hash); 1529 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1530 /* 1531 * We can't leave the new entry's xe_name_offset at zero or 1532 * add_namevalue() will go nuts. We set it to the size of our 1533 * storage so that it can never be less than any other entry. 1534 */ 1535 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1536 } 1537 1538 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1539 struct ocfs2_xattr_info *xi) 1540 { 1541 int size = namevalue_size_xi(xi); 1542 int nameval_offset; 1543 char *nameval_buf; 1544 1545 loc->xl_ops->xlo_add_namevalue(loc, size); 1546 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1547 loc->xl_entry->xe_name_len = xi->xi_name_len; 1548 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1549 ocfs2_xattr_set_local(loc->xl_entry, 1550 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1551 1552 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1553 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1554 memset(nameval_buf, 0, size); 1555 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1556 } 1557 1558 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1559 struct ocfs2_xattr_value_buf *vb) 1560 { 1561 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1562 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1563 1564 /* Value bufs are for value trees */ 1565 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1566 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1567 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1568 1569 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1570 vb->vb_xv = 1571 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1572 nameval_offset + 1573 name_size); 1574 } 1575 1576 static int ocfs2_xa_block_journal_access(handle_t *handle, 1577 struct ocfs2_xa_loc *loc, int type) 1578 { 1579 struct buffer_head *bh = loc->xl_storage; 1580 ocfs2_journal_access_func access; 1581 1582 if (loc->xl_size == (bh->b_size - 1583 offsetof(struct ocfs2_xattr_block, 1584 xb_attrs.xb_header))) 1585 access = ocfs2_journal_access_xb; 1586 else 1587 access = ocfs2_journal_access_di; 1588 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1589 } 1590 1591 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1592 struct ocfs2_xa_loc *loc) 1593 { 1594 struct buffer_head *bh = loc->xl_storage; 1595 1596 ocfs2_journal_dirty(handle, bh); 1597 } 1598 1599 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1600 int offset) 1601 { 1602 return (char *)loc->xl_header + offset; 1603 } 1604 1605 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1606 struct ocfs2_xattr_info *xi) 1607 { 1608 /* 1609 * Block storage is strict. If the sizes aren't exact, we will 1610 * remove the old one and reinsert the new. 1611 */ 1612 return namevalue_size_xe(loc->xl_entry) == 1613 namevalue_size_xi(xi); 1614 } 1615 1616 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1617 { 1618 struct ocfs2_xattr_header *xh = loc->xl_header; 1619 int i, count = le16_to_cpu(xh->xh_count); 1620 int offset, free_start = loc->xl_size; 1621 1622 for (i = 0; i < count; i++) { 1623 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1624 if (offset < free_start) 1625 free_start = offset; 1626 } 1627 1628 return free_start; 1629 } 1630 1631 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1632 struct ocfs2_xattr_info *xi) 1633 { 1634 int count = le16_to_cpu(loc->xl_header->xh_count); 1635 int free_start = ocfs2_xa_get_free_start(loc); 1636 int needed_space = ocfs2_xi_entry_usage(xi); 1637 1638 /* 1639 * Block storage will reclaim the original entry before inserting 1640 * the new value, so we only need the difference. If the new 1641 * entry is smaller than the old one, we don't need anything. 1642 */ 1643 if (loc->xl_entry) { 1644 /* Don't need space if we're reusing! */ 1645 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1646 needed_space = 0; 1647 else 1648 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1649 } 1650 if (needed_space < 0) 1651 needed_space = 0; 1652 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1653 } 1654 1655 /* 1656 * Block storage for xattrs keeps the name+value pairs compacted. When 1657 * we remove one, we have to shift any that preceded it towards the end. 1658 */ 1659 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1660 { 1661 int i, offset; 1662 int namevalue_offset, first_namevalue_offset, namevalue_size; 1663 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1664 struct ocfs2_xattr_header *xh = loc->xl_header; 1665 int count = le16_to_cpu(xh->xh_count); 1666 1667 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1668 namevalue_size = namevalue_size_xe(entry); 1669 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1670 1671 /* Shift the name+value pairs */ 1672 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1673 (char *)xh + first_namevalue_offset, 1674 namevalue_offset - first_namevalue_offset); 1675 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1676 1677 /* Now tell xh->xh_entries about it */ 1678 for (i = 0; i < count; i++) { 1679 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1680 if (offset <= namevalue_offset) 1681 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1682 namevalue_size); 1683 } 1684 1685 /* 1686 * Note that we don't update xh_free_start or xh_name_value_len 1687 * because they're not used in block-stored xattrs. 1688 */ 1689 } 1690 1691 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1692 { 1693 int count = le16_to_cpu(loc->xl_header->xh_count); 1694 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1695 le16_add_cpu(&loc->xl_header->xh_count, 1); 1696 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1697 } 1698 1699 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1700 { 1701 int free_start = ocfs2_xa_get_free_start(loc); 1702 1703 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1704 } 1705 1706 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1707 struct ocfs2_xattr_value_buf *vb) 1708 { 1709 struct buffer_head *bh = loc->xl_storage; 1710 1711 if (loc->xl_size == (bh->b_size - 1712 offsetof(struct ocfs2_xattr_block, 1713 xb_attrs.xb_header))) 1714 vb->vb_access = ocfs2_journal_access_xb; 1715 else 1716 vb->vb_access = ocfs2_journal_access_di; 1717 vb->vb_bh = bh; 1718 } 1719 1720 /* 1721 * Operations for xattrs stored in blocks. This includes inline inode 1722 * storage and unindexed ocfs2_xattr_blocks. 1723 */ 1724 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1725 .xlo_journal_access = ocfs2_xa_block_journal_access, 1726 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1727 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1728 .xlo_check_space = ocfs2_xa_block_check_space, 1729 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1730 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1731 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1732 .xlo_add_entry = ocfs2_xa_block_add_entry, 1733 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1734 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1735 }; 1736 1737 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1738 struct ocfs2_xa_loc *loc, int type) 1739 { 1740 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1741 1742 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1743 } 1744 1745 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1746 struct ocfs2_xa_loc *loc) 1747 { 1748 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1749 1750 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1751 } 1752 1753 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1754 int offset) 1755 { 1756 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1757 int block, block_offset; 1758 1759 /* The header is at the front of the bucket */ 1760 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1761 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1762 1763 return bucket_block(bucket, block) + block_offset; 1764 } 1765 1766 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1767 struct ocfs2_xattr_info *xi) 1768 { 1769 return namevalue_size_xe(loc->xl_entry) >= 1770 namevalue_size_xi(xi); 1771 } 1772 1773 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1774 { 1775 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1776 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1777 } 1778 1779 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1780 int free_start, int size) 1781 { 1782 /* 1783 * We need to make sure that the name+value pair fits within 1784 * one block. 1785 */ 1786 if (((free_start - size) >> sb->s_blocksize_bits) != 1787 ((free_start - 1) >> sb->s_blocksize_bits)) 1788 free_start -= free_start % sb->s_blocksize; 1789 1790 return free_start; 1791 } 1792 1793 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1794 struct ocfs2_xattr_info *xi) 1795 { 1796 int rc; 1797 int count = le16_to_cpu(loc->xl_header->xh_count); 1798 int free_start = ocfs2_xa_get_free_start(loc); 1799 int needed_space = ocfs2_xi_entry_usage(xi); 1800 int size = namevalue_size_xi(xi); 1801 struct super_block *sb = loc->xl_inode->i_sb; 1802 1803 /* 1804 * Bucket storage does not reclaim name+value pairs it cannot 1805 * reuse. They live as holes until the bucket fills, and then 1806 * the bucket is defragmented. However, the bucket can reclaim 1807 * the ocfs2_xattr_entry. 1808 */ 1809 if (loc->xl_entry) { 1810 /* Don't need space if we're reusing! */ 1811 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1812 needed_space = 0; 1813 else 1814 needed_space -= sizeof(struct ocfs2_xattr_entry); 1815 } 1816 BUG_ON(needed_space < 0); 1817 1818 if (free_start < size) { 1819 if (needed_space) 1820 return -ENOSPC; 1821 } else { 1822 /* 1823 * First we check if it would fit in the first place. 1824 * Below, we align the free start to a block. This may 1825 * slide us below the minimum gap. By checking unaligned 1826 * first, we avoid that error. 1827 */ 1828 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1829 count); 1830 if (rc) 1831 return rc; 1832 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1833 size); 1834 } 1835 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1836 } 1837 1838 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1839 { 1840 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1841 -namevalue_size_xe(loc->xl_entry)); 1842 } 1843 1844 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1845 { 1846 struct ocfs2_xattr_header *xh = loc->xl_header; 1847 int count = le16_to_cpu(xh->xh_count); 1848 int low = 0, high = count - 1, tmp; 1849 struct ocfs2_xattr_entry *tmp_xe; 1850 1851 /* 1852 * We keep buckets sorted by name_hash, so we need to find 1853 * our insert place. 1854 */ 1855 while (low <= high && count) { 1856 tmp = (low + high) / 2; 1857 tmp_xe = &xh->xh_entries[tmp]; 1858 1859 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1860 low = tmp + 1; 1861 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1862 high = tmp - 1; 1863 else { 1864 low = tmp; 1865 break; 1866 } 1867 } 1868 1869 if (low != count) 1870 memmove(&xh->xh_entries[low + 1], 1871 &xh->xh_entries[low], 1872 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1873 1874 le16_add_cpu(&xh->xh_count, 1); 1875 loc->xl_entry = &xh->xh_entries[low]; 1876 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1877 } 1878 1879 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1880 { 1881 int free_start = ocfs2_xa_get_free_start(loc); 1882 struct ocfs2_xattr_header *xh = loc->xl_header; 1883 struct super_block *sb = loc->xl_inode->i_sb; 1884 int nameval_offset; 1885 1886 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1887 nameval_offset = free_start - size; 1888 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1889 xh->xh_free_start = cpu_to_le16(nameval_offset); 1890 le16_add_cpu(&xh->xh_name_value_len, size); 1891 1892 } 1893 1894 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1895 struct ocfs2_xattr_value_buf *vb) 1896 { 1897 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1898 struct super_block *sb = loc->xl_inode->i_sb; 1899 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1900 int size = namevalue_size_xe(loc->xl_entry); 1901 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1902 1903 /* Values are not allowed to straddle block boundaries */ 1904 BUG_ON(block_offset != 1905 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1906 /* We expect the bucket to be filled in */ 1907 BUG_ON(!bucket->bu_bhs[block_offset]); 1908 1909 vb->vb_access = ocfs2_journal_access; 1910 vb->vb_bh = bucket->bu_bhs[block_offset]; 1911 } 1912 1913 /* Operations for xattrs stored in buckets. */ 1914 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1915 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1916 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1917 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1918 .xlo_check_space = ocfs2_xa_bucket_check_space, 1919 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1920 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1921 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1922 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1923 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1924 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1925 }; 1926 1927 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1928 { 1929 struct ocfs2_xattr_value_buf vb; 1930 1931 if (ocfs2_xattr_is_local(loc->xl_entry)) 1932 return 0; 1933 1934 ocfs2_xa_fill_value_buf(loc, &vb); 1935 return le32_to_cpu(vb.vb_xv->xr_clusters); 1936 } 1937 1938 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1939 struct ocfs2_xattr_set_ctxt *ctxt) 1940 { 1941 int trunc_rc, access_rc; 1942 struct ocfs2_xattr_value_buf vb; 1943 1944 ocfs2_xa_fill_value_buf(loc, &vb); 1945 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1946 ctxt); 1947 1948 /* 1949 * The caller of ocfs2_xa_value_truncate() has already called 1950 * ocfs2_xa_journal_access on the loc. However, The truncate code 1951 * calls ocfs2_extend_trans(). This may commit the previous 1952 * transaction and open a new one. If this is a bucket, truncate 1953 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1954 * the caller is expecting to dirty the entire bucket. So we must 1955 * reset the journal work. We do this even if truncate has failed, 1956 * as it could have failed after committing the extend. 1957 */ 1958 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1959 OCFS2_JOURNAL_ACCESS_WRITE); 1960 1961 /* Errors in truncate take precedence */ 1962 return trunc_rc ? trunc_rc : access_rc; 1963 } 1964 1965 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1966 { 1967 int index, count; 1968 struct ocfs2_xattr_header *xh = loc->xl_header; 1969 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1970 1971 ocfs2_xa_wipe_namevalue(loc); 1972 loc->xl_entry = NULL; 1973 1974 count = le16_to_cpu(xh->xh_count) - 1; 1975 1976 /* 1977 * Only zero out the entry if there are more remaining. This is 1978 * important for an empty bucket, as it keeps track of the 1979 * bucket's hash value. It doesn't hurt empty block storage. 1980 */ 1981 if (count) { 1982 index = ((char *)entry - (char *)&xh->xh_entries) / 1983 sizeof(struct ocfs2_xattr_entry); 1984 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1985 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1986 memset(&xh->xh_entries[count], 0, 1987 sizeof(struct ocfs2_xattr_entry)); 1988 } 1989 1990 xh->xh_count = cpu_to_le16(count); 1991 } 1992 1993 /* 1994 * If we have a problem adjusting the size of an external value during 1995 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1996 * in an intermediate state. For example, the value may be partially 1997 * truncated. 1998 * 1999 * If the value tree hasn't changed, the extend/truncate went nowhere. 2000 * We have nothing to do. The caller can treat it as a straight error. 2001 * 2002 * If the value tree got partially truncated, we now have a corrupted 2003 * extended attribute. We're going to wipe its entry and leak the 2004 * clusters. Better to leak some storage than leave a corrupt entry. 2005 * 2006 * If the value tree grew, it obviously didn't grow enough for the 2007 * new entry. We're not going to try and reclaim those clusters either. 2008 * If there was already an external value there (orig_clusters != 0), 2009 * the new clusters are attached safely and we can just leave the old 2010 * value in place. If there was no external value there, we remove 2011 * the entry. 2012 * 2013 * This way, the xattr block we store in the journal will be consistent. 2014 * If the size change broke because of the journal, no changes will hit 2015 * disk anyway. 2016 */ 2017 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 2018 const char *what, 2019 unsigned int orig_clusters) 2020 { 2021 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 2022 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 2023 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2024 2025 if (new_clusters < orig_clusters) { 2026 mlog(ML_ERROR, 2027 "Partial truncate while %s xattr %.*s. Leaking " 2028 "%u clusters and removing the entry\n", 2029 what, loc->xl_entry->xe_name_len, nameval_buf, 2030 orig_clusters - new_clusters); 2031 ocfs2_xa_remove_entry(loc); 2032 } else if (!orig_clusters) { 2033 mlog(ML_ERROR, 2034 "Unable to allocate an external value for xattr " 2035 "%.*s safely. Leaking %u clusters and removing the " 2036 "entry\n", 2037 loc->xl_entry->xe_name_len, nameval_buf, 2038 new_clusters - orig_clusters); 2039 ocfs2_xa_remove_entry(loc); 2040 } else if (new_clusters > orig_clusters) 2041 mlog(ML_ERROR, 2042 "Unable to grow xattr %.*s safely. %u new clusters " 2043 "have been added, but the value will not be " 2044 "modified\n", 2045 loc->xl_entry->xe_name_len, nameval_buf, 2046 new_clusters - orig_clusters); 2047 } 2048 2049 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2050 struct ocfs2_xattr_set_ctxt *ctxt) 2051 { 2052 int rc = 0; 2053 unsigned int orig_clusters; 2054 2055 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2056 orig_clusters = ocfs2_xa_value_clusters(loc); 2057 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2058 if (rc) { 2059 mlog_errno(rc); 2060 /* 2061 * Since this is remove, we can return 0 if 2062 * ocfs2_xa_cleanup_value_truncate() is going to 2063 * wipe the entry anyway. So we check the 2064 * cluster count as well. 2065 */ 2066 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2067 rc = 0; 2068 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2069 orig_clusters); 2070 goto out; 2071 } 2072 } 2073 2074 ocfs2_xa_remove_entry(loc); 2075 2076 out: 2077 return rc; 2078 } 2079 2080 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2081 { 2082 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2083 char *nameval_buf; 2084 2085 nameval_buf = ocfs2_xa_offset_pointer(loc, 2086 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2087 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2088 } 2089 2090 /* 2091 * Take an existing entry and make it ready for the new value. This 2092 * won't allocate space, but it may free space. It should be ready for 2093 * ocfs2_xa_prepare_entry() to finish the work. 2094 */ 2095 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2096 struct ocfs2_xattr_info *xi, 2097 struct ocfs2_xattr_set_ctxt *ctxt) 2098 { 2099 int rc = 0; 2100 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2101 unsigned int orig_clusters; 2102 char *nameval_buf; 2103 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2104 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2105 2106 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2107 name_size); 2108 2109 nameval_buf = ocfs2_xa_offset_pointer(loc, 2110 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2111 if (xe_local) { 2112 memset(nameval_buf + name_size, 0, 2113 namevalue_size_xe(loc->xl_entry) - name_size); 2114 if (!xi_local) 2115 ocfs2_xa_install_value_root(loc); 2116 } else { 2117 orig_clusters = ocfs2_xa_value_clusters(loc); 2118 if (xi_local) { 2119 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2120 if (rc < 0) 2121 mlog_errno(rc); 2122 else 2123 memset(nameval_buf + name_size, 0, 2124 namevalue_size_xe(loc->xl_entry) - 2125 name_size); 2126 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2127 xi->xi_value_len) { 2128 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2129 ctxt); 2130 if (rc < 0) 2131 mlog_errno(rc); 2132 } 2133 2134 if (rc) { 2135 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2136 orig_clusters); 2137 goto out; 2138 } 2139 } 2140 2141 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2142 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2143 2144 out: 2145 return rc; 2146 } 2147 2148 /* 2149 * Prepares loc->xl_entry to receive the new xattr. This includes 2150 * properly setting up the name+value pair region. If loc->xl_entry 2151 * already exists, it will take care of modifying it appropriately. 2152 * 2153 * Note that this modifies the data. You did journal_access already, 2154 * right? 2155 */ 2156 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2157 struct ocfs2_xattr_info *xi, 2158 u32 name_hash, 2159 struct ocfs2_xattr_set_ctxt *ctxt) 2160 { 2161 int rc = 0; 2162 unsigned int orig_clusters; 2163 __le64 orig_value_size = 0; 2164 2165 rc = ocfs2_xa_check_space(loc, xi); 2166 if (rc) 2167 goto out; 2168 2169 if (loc->xl_entry) { 2170 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2171 orig_value_size = loc->xl_entry->xe_value_size; 2172 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2173 if (rc) 2174 goto out; 2175 goto alloc_value; 2176 } 2177 2178 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2179 orig_clusters = ocfs2_xa_value_clusters(loc); 2180 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2181 if (rc) { 2182 mlog_errno(rc); 2183 ocfs2_xa_cleanup_value_truncate(loc, 2184 "overwriting", 2185 orig_clusters); 2186 goto out; 2187 } 2188 } 2189 ocfs2_xa_wipe_namevalue(loc); 2190 } else 2191 ocfs2_xa_add_entry(loc, name_hash); 2192 2193 /* 2194 * If we get here, we have a blank entry. Fill it. We grow our 2195 * name+value pair back from the end. 2196 */ 2197 ocfs2_xa_add_namevalue(loc, xi); 2198 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2199 ocfs2_xa_install_value_root(loc); 2200 2201 alloc_value: 2202 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2203 orig_clusters = ocfs2_xa_value_clusters(loc); 2204 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2205 if (rc < 0) { 2206 ctxt->set_abort = 1; 2207 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2208 orig_clusters); 2209 /* 2210 * If we were growing an existing value, 2211 * ocfs2_xa_cleanup_value_truncate() won't remove 2212 * the entry. We need to restore the original value 2213 * size. 2214 */ 2215 if (loc->xl_entry) { 2216 BUG_ON(!orig_value_size); 2217 loc->xl_entry->xe_value_size = orig_value_size; 2218 } 2219 mlog_errno(rc); 2220 } 2221 } 2222 2223 out: 2224 return rc; 2225 } 2226 2227 /* 2228 * Store the value portion of the name+value pair. This will skip 2229 * values that are stored externally. Their tree roots were set up 2230 * by ocfs2_xa_prepare_entry(). 2231 */ 2232 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2233 struct ocfs2_xattr_info *xi, 2234 struct ocfs2_xattr_set_ctxt *ctxt) 2235 { 2236 int rc = 0; 2237 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2238 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2239 char *nameval_buf; 2240 struct ocfs2_xattr_value_buf vb; 2241 2242 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2243 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2244 ocfs2_xa_fill_value_buf(loc, &vb); 2245 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2246 ctxt->handle, &vb, 2247 xi->xi_value, 2248 xi->xi_value_len); 2249 } else 2250 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2251 2252 return rc; 2253 } 2254 2255 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2256 struct ocfs2_xattr_info *xi, 2257 struct ocfs2_xattr_set_ctxt *ctxt) 2258 { 2259 int ret; 2260 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2261 xi->xi_name_len); 2262 2263 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2264 OCFS2_JOURNAL_ACCESS_WRITE); 2265 if (ret) { 2266 mlog_errno(ret); 2267 goto out; 2268 } 2269 2270 /* 2271 * From here on out, everything is going to modify the buffer a 2272 * little. Errors are going to leave the xattr header in a 2273 * sane state. Thus, even with errors we dirty the sucker. 2274 */ 2275 2276 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2277 if (!xi->xi_value) { 2278 ret = ocfs2_xa_remove(loc, ctxt); 2279 goto out_dirty; 2280 } 2281 2282 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2283 if (ret) { 2284 if (ret != -ENOSPC) 2285 mlog_errno(ret); 2286 goto out_dirty; 2287 } 2288 2289 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2290 if (ret) 2291 mlog_errno(ret); 2292 2293 out_dirty: 2294 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2295 2296 out: 2297 return ret; 2298 } 2299 2300 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2301 struct inode *inode, 2302 struct buffer_head *bh, 2303 struct ocfs2_xattr_entry *entry) 2304 { 2305 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2306 2307 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2308 2309 loc->xl_inode = inode; 2310 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2311 loc->xl_storage = bh; 2312 loc->xl_entry = entry; 2313 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2314 loc->xl_header = 2315 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2316 loc->xl_size); 2317 } 2318 2319 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2320 struct inode *inode, 2321 struct buffer_head *bh, 2322 struct ocfs2_xattr_entry *entry) 2323 { 2324 struct ocfs2_xattr_block *xb = 2325 (struct ocfs2_xattr_block *)bh->b_data; 2326 2327 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2328 2329 loc->xl_inode = inode; 2330 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2331 loc->xl_storage = bh; 2332 loc->xl_header = &(xb->xb_attrs.xb_header); 2333 loc->xl_entry = entry; 2334 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2335 xb_attrs.xb_header); 2336 } 2337 2338 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2339 struct ocfs2_xattr_bucket *bucket, 2340 struct ocfs2_xattr_entry *entry) 2341 { 2342 loc->xl_inode = bucket->bu_inode; 2343 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2344 loc->xl_storage = bucket; 2345 loc->xl_header = bucket_xh(bucket); 2346 loc->xl_entry = entry; 2347 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2348 } 2349 2350 /* 2351 * In xattr remove, if it is stored outside and refcounted, we may have 2352 * the chance to split the refcount tree. So need the allocators. 2353 */ 2354 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2355 struct ocfs2_xattr_value_root *xv, 2356 struct ocfs2_caching_info *ref_ci, 2357 struct buffer_head *ref_root_bh, 2358 struct ocfs2_alloc_context **meta_ac, 2359 int *ref_credits) 2360 { 2361 int ret, meta_add = 0; 2362 u32 p_cluster, num_clusters; 2363 unsigned int ext_flags; 2364 2365 *ref_credits = 0; 2366 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2367 &num_clusters, 2368 &xv->xr_list, 2369 &ext_flags); 2370 if (ret) { 2371 mlog_errno(ret); 2372 goto out; 2373 } 2374 2375 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2376 goto out; 2377 2378 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2379 ref_root_bh, xv, 2380 &meta_add, ref_credits); 2381 if (ret) { 2382 mlog_errno(ret); 2383 goto out; 2384 } 2385 2386 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2387 meta_add, meta_ac); 2388 if (ret) 2389 mlog_errno(ret); 2390 2391 out: 2392 return ret; 2393 } 2394 2395 static int ocfs2_remove_value_outside(struct inode*inode, 2396 struct ocfs2_xattr_value_buf *vb, 2397 struct ocfs2_xattr_header *header, 2398 struct ocfs2_caching_info *ref_ci, 2399 struct buffer_head *ref_root_bh) 2400 { 2401 int ret = 0, i, ref_credits; 2402 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2403 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2404 void *val; 2405 2406 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2407 2408 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2409 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2410 2411 if (ocfs2_xattr_is_local(entry)) 2412 continue; 2413 2414 val = (void *)header + 2415 le16_to_cpu(entry->xe_name_offset); 2416 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2417 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2418 2419 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2420 ref_ci, ref_root_bh, 2421 &ctxt.meta_ac, 2422 &ref_credits); 2423 2424 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2425 ocfs2_remove_extent_credits(osb->sb)); 2426 if (IS_ERR(ctxt.handle)) { 2427 ret = PTR_ERR(ctxt.handle); 2428 mlog_errno(ret); 2429 break; 2430 } 2431 2432 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2433 2434 ocfs2_commit_trans(osb, ctxt.handle); 2435 if (ctxt.meta_ac) { 2436 ocfs2_free_alloc_context(ctxt.meta_ac); 2437 ctxt.meta_ac = NULL; 2438 } 2439 2440 if (ret < 0) { 2441 mlog_errno(ret); 2442 break; 2443 } 2444 2445 } 2446 2447 if (ctxt.meta_ac) 2448 ocfs2_free_alloc_context(ctxt.meta_ac); 2449 ocfs2_schedule_truncate_log_flush(osb, 1); 2450 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2451 return ret; 2452 } 2453 2454 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2455 struct buffer_head *di_bh, 2456 struct ocfs2_caching_info *ref_ci, 2457 struct buffer_head *ref_root_bh) 2458 { 2459 2460 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2461 struct ocfs2_xattr_header *header; 2462 int ret; 2463 struct ocfs2_xattr_value_buf vb = { 2464 .vb_bh = di_bh, 2465 .vb_access = ocfs2_journal_access_di, 2466 }; 2467 2468 header = (struct ocfs2_xattr_header *) 2469 ((void *)di + inode->i_sb->s_blocksize - 2470 le16_to_cpu(di->i_xattr_inline_size)); 2471 2472 ret = ocfs2_remove_value_outside(inode, &vb, header, 2473 ref_ci, ref_root_bh); 2474 2475 return ret; 2476 } 2477 2478 struct ocfs2_rm_xattr_bucket_para { 2479 struct ocfs2_caching_info *ref_ci; 2480 struct buffer_head *ref_root_bh; 2481 }; 2482 2483 static int ocfs2_xattr_block_remove(struct inode *inode, 2484 struct buffer_head *blk_bh, 2485 struct ocfs2_caching_info *ref_ci, 2486 struct buffer_head *ref_root_bh) 2487 { 2488 struct ocfs2_xattr_block *xb; 2489 int ret = 0; 2490 struct ocfs2_xattr_value_buf vb = { 2491 .vb_bh = blk_bh, 2492 .vb_access = ocfs2_journal_access_xb, 2493 }; 2494 struct ocfs2_rm_xattr_bucket_para args = { 2495 .ref_ci = ref_ci, 2496 .ref_root_bh = ref_root_bh, 2497 }; 2498 2499 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2500 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2501 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2502 ret = ocfs2_remove_value_outside(inode, &vb, header, 2503 ref_ci, ref_root_bh); 2504 } else 2505 ret = ocfs2_iterate_xattr_index_block(inode, 2506 blk_bh, 2507 ocfs2_rm_xattr_cluster, 2508 &args); 2509 2510 return ret; 2511 } 2512 2513 static int ocfs2_xattr_free_block(struct inode *inode, 2514 u64 block, 2515 struct ocfs2_caching_info *ref_ci, 2516 struct buffer_head *ref_root_bh) 2517 { 2518 struct inode *xb_alloc_inode; 2519 struct buffer_head *xb_alloc_bh = NULL; 2520 struct buffer_head *blk_bh = NULL; 2521 struct ocfs2_xattr_block *xb; 2522 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2523 handle_t *handle; 2524 int ret = 0; 2525 u64 blk, bg_blkno; 2526 u16 bit; 2527 2528 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2529 if (ret < 0) { 2530 mlog_errno(ret); 2531 goto out; 2532 } 2533 2534 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2535 if (ret < 0) { 2536 mlog_errno(ret); 2537 goto out; 2538 } 2539 2540 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2541 blk = le64_to_cpu(xb->xb_blkno); 2542 bit = le16_to_cpu(xb->xb_suballoc_bit); 2543 if (xb->xb_suballoc_loc) 2544 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2545 else 2546 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2547 2548 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2549 EXTENT_ALLOC_SYSTEM_INODE, 2550 le16_to_cpu(xb->xb_suballoc_slot)); 2551 if (!xb_alloc_inode) { 2552 ret = -ENOMEM; 2553 mlog_errno(ret); 2554 goto out; 2555 } 2556 inode_lock(xb_alloc_inode); 2557 2558 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2559 if (ret < 0) { 2560 mlog_errno(ret); 2561 goto out_mutex; 2562 } 2563 2564 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2565 if (IS_ERR(handle)) { 2566 ret = PTR_ERR(handle); 2567 mlog_errno(ret); 2568 goto out_unlock; 2569 } 2570 2571 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2572 bit, bg_blkno, 1); 2573 if (ret < 0) 2574 mlog_errno(ret); 2575 2576 ocfs2_commit_trans(osb, handle); 2577 out_unlock: 2578 ocfs2_inode_unlock(xb_alloc_inode, 1); 2579 brelse(xb_alloc_bh); 2580 out_mutex: 2581 inode_unlock(xb_alloc_inode); 2582 iput(xb_alloc_inode); 2583 out: 2584 brelse(blk_bh); 2585 return ret; 2586 } 2587 2588 /* 2589 * ocfs2_xattr_remove() 2590 * 2591 * Free extended attribute resources associated with this inode. 2592 */ 2593 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2594 { 2595 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2596 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2597 struct ocfs2_refcount_tree *ref_tree = NULL; 2598 struct buffer_head *ref_root_bh = NULL; 2599 struct ocfs2_caching_info *ref_ci = NULL; 2600 handle_t *handle; 2601 int ret; 2602 2603 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2604 return 0; 2605 2606 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2607 return 0; 2608 2609 if (ocfs2_is_refcount_inode(inode)) { 2610 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2611 le64_to_cpu(di->i_refcount_loc), 2612 1, &ref_tree, &ref_root_bh); 2613 if (ret) { 2614 mlog_errno(ret); 2615 goto out; 2616 } 2617 ref_ci = &ref_tree->rf_ci; 2618 2619 } 2620 2621 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2622 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2623 ref_ci, ref_root_bh); 2624 if (ret < 0) { 2625 mlog_errno(ret); 2626 goto out; 2627 } 2628 } 2629 2630 if (di->i_xattr_loc) { 2631 ret = ocfs2_xattr_free_block(inode, 2632 le64_to_cpu(di->i_xattr_loc), 2633 ref_ci, ref_root_bh); 2634 if (ret < 0) { 2635 mlog_errno(ret); 2636 goto out; 2637 } 2638 } 2639 2640 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2641 OCFS2_INODE_UPDATE_CREDITS); 2642 if (IS_ERR(handle)) { 2643 ret = PTR_ERR(handle); 2644 mlog_errno(ret); 2645 goto out; 2646 } 2647 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2648 OCFS2_JOURNAL_ACCESS_WRITE); 2649 if (ret) { 2650 mlog_errno(ret); 2651 goto out_commit; 2652 } 2653 2654 di->i_xattr_loc = 0; 2655 2656 spin_lock(&oi->ip_lock); 2657 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2658 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2659 spin_unlock(&oi->ip_lock); 2660 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2661 2662 ocfs2_journal_dirty(handle, di_bh); 2663 out_commit: 2664 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2665 out: 2666 if (ref_tree) 2667 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2668 brelse(ref_root_bh); 2669 return ret; 2670 } 2671 2672 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2673 struct ocfs2_dinode *di) 2674 { 2675 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2676 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2677 int free; 2678 2679 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2680 return 0; 2681 2682 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2683 struct ocfs2_inline_data *idata = &di->id2.i_data; 2684 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2685 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2686 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2687 le64_to_cpu(di->i_size); 2688 } else { 2689 struct ocfs2_extent_list *el = &di->id2.i_list; 2690 free = (le16_to_cpu(el->l_count) - 2691 le16_to_cpu(el->l_next_free_rec)) * 2692 sizeof(struct ocfs2_extent_rec); 2693 } 2694 if (free >= xattrsize) 2695 return 1; 2696 2697 return 0; 2698 } 2699 2700 /* 2701 * ocfs2_xattr_ibody_find() 2702 * 2703 * Find extended attribute in inode block and 2704 * fill search info into struct ocfs2_xattr_search. 2705 */ 2706 static int ocfs2_xattr_ibody_find(struct inode *inode, 2707 int name_index, 2708 const char *name, 2709 struct ocfs2_xattr_search *xs) 2710 { 2711 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2712 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2713 int ret; 2714 int has_space = 0; 2715 2716 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2717 return 0; 2718 2719 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2720 down_read(&oi->ip_alloc_sem); 2721 has_space = ocfs2_xattr_has_space_inline(inode, di); 2722 up_read(&oi->ip_alloc_sem); 2723 if (!has_space) 2724 return 0; 2725 } 2726 2727 xs->xattr_bh = xs->inode_bh; 2728 xs->end = (void *)di + inode->i_sb->s_blocksize; 2729 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2730 xs->header = (struct ocfs2_xattr_header *) 2731 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2732 else 2733 xs->header = (struct ocfs2_xattr_header *) 2734 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2735 xs->base = (void *)xs->header; 2736 xs->here = xs->header->xh_entries; 2737 2738 /* Find the named attribute. */ 2739 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2740 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 2741 if (ret && ret != -ENODATA) 2742 return ret; 2743 xs->not_found = ret; 2744 } 2745 2746 return 0; 2747 } 2748 2749 static int ocfs2_xattr_ibody_init(struct inode *inode, 2750 struct buffer_head *di_bh, 2751 struct ocfs2_xattr_set_ctxt *ctxt) 2752 { 2753 int ret; 2754 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2755 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2756 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2757 unsigned int xattrsize = osb->s_xattr_inline_size; 2758 2759 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2760 ret = -ENOSPC; 2761 goto out; 2762 } 2763 2764 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2765 OCFS2_JOURNAL_ACCESS_WRITE); 2766 if (ret) { 2767 mlog_errno(ret); 2768 goto out; 2769 } 2770 2771 /* 2772 * Adjust extent record count or inline data size 2773 * to reserve space for extended attribute. 2774 */ 2775 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2776 struct ocfs2_inline_data *idata = &di->id2.i_data; 2777 le16_add_cpu(&idata->id_count, -xattrsize); 2778 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2779 struct ocfs2_extent_list *el = &di->id2.i_list; 2780 le16_add_cpu(&el->l_count, -(xattrsize / 2781 sizeof(struct ocfs2_extent_rec))); 2782 } 2783 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2784 2785 spin_lock(&oi->ip_lock); 2786 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2787 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2788 spin_unlock(&oi->ip_lock); 2789 2790 ocfs2_journal_dirty(ctxt->handle, di_bh); 2791 2792 out: 2793 return ret; 2794 } 2795 2796 /* 2797 * ocfs2_xattr_ibody_set() 2798 * 2799 * Set, replace or remove an extended attribute into inode block. 2800 * 2801 */ 2802 static int ocfs2_xattr_ibody_set(struct inode *inode, 2803 struct ocfs2_xattr_info *xi, 2804 struct ocfs2_xattr_search *xs, 2805 struct ocfs2_xattr_set_ctxt *ctxt) 2806 { 2807 int ret; 2808 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2809 struct ocfs2_xa_loc loc; 2810 2811 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2812 return -ENOSPC; 2813 2814 down_write(&oi->ip_alloc_sem); 2815 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2816 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2817 if (ret) { 2818 if (ret != -ENOSPC) 2819 mlog_errno(ret); 2820 goto out; 2821 } 2822 } 2823 2824 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2825 xs->not_found ? NULL : xs->here); 2826 ret = ocfs2_xa_set(&loc, xi, ctxt); 2827 if (ret) { 2828 if (ret != -ENOSPC) 2829 mlog_errno(ret); 2830 goto out; 2831 } 2832 xs->here = loc.xl_entry; 2833 2834 out: 2835 up_write(&oi->ip_alloc_sem); 2836 2837 return ret; 2838 } 2839 2840 /* 2841 * ocfs2_xattr_block_find() 2842 * 2843 * Find extended attribute in external block and 2844 * fill search info into struct ocfs2_xattr_search. 2845 */ 2846 static int ocfs2_xattr_block_find(struct inode *inode, 2847 int name_index, 2848 const char *name, 2849 struct ocfs2_xattr_search *xs) 2850 { 2851 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2852 struct buffer_head *blk_bh = NULL; 2853 struct ocfs2_xattr_block *xb; 2854 int ret = 0; 2855 2856 if (!di->i_xattr_loc) 2857 return ret; 2858 2859 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2860 &blk_bh); 2861 if (ret < 0) { 2862 mlog_errno(ret); 2863 return ret; 2864 } 2865 2866 xs->xattr_bh = blk_bh; 2867 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2868 2869 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2870 xs->header = &xb->xb_attrs.xb_header; 2871 xs->base = (void *)xs->header; 2872 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2873 xs->here = xs->header->xh_entries; 2874 2875 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 2876 } else 2877 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2878 name_index, 2879 name, xs); 2880 2881 if (ret && ret != -ENODATA) { 2882 xs->xattr_bh = NULL; 2883 goto cleanup; 2884 } 2885 xs->not_found = ret; 2886 return 0; 2887 cleanup: 2888 brelse(blk_bh); 2889 2890 return ret; 2891 } 2892 2893 static int ocfs2_create_xattr_block(struct inode *inode, 2894 struct buffer_head *inode_bh, 2895 struct ocfs2_xattr_set_ctxt *ctxt, 2896 int indexed, 2897 struct buffer_head **ret_bh) 2898 { 2899 int ret; 2900 u16 suballoc_bit_start; 2901 u32 num_got; 2902 u64 suballoc_loc, first_blkno; 2903 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2904 struct buffer_head *new_bh = NULL; 2905 struct ocfs2_xattr_block *xblk; 2906 2907 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2908 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2909 if (ret < 0) { 2910 mlog_errno(ret); 2911 goto end; 2912 } 2913 2914 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2915 &suballoc_loc, &suballoc_bit_start, 2916 &num_got, &first_blkno); 2917 if (ret < 0) { 2918 mlog_errno(ret); 2919 goto end; 2920 } 2921 2922 new_bh = sb_getblk(inode->i_sb, first_blkno); 2923 if (!new_bh) { 2924 ret = -ENOMEM; 2925 mlog_errno(ret); 2926 goto end; 2927 } 2928 2929 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2930 2931 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2932 new_bh, 2933 OCFS2_JOURNAL_ACCESS_CREATE); 2934 if (ret < 0) { 2935 mlog_errno(ret); 2936 goto end; 2937 } 2938 2939 /* Initialize ocfs2_xattr_block */ 2940 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2941 memset(xblk, 0, inode->i_sb->s_blocksize); 2942 strscpy(xblk->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE); 2943 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2944 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2945 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2946 xblk->xb_fs_generation = 2947 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2948 xblk->xb_blkno = cpu_to_le64(first_blkno); 2949 if (indexed) { 2950 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2951 xr->xt_clusters = cpu_to_le32(1); 2952 xr->xt_last_eb_blk = 0; 2953 xr->xt_list.l_tree_depth = 0; 2954 xr->xt_list.l_count = cpu_to_le16( 2955 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2956 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2957 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2958 } 2959 ocfs2_journal_dirty(ctxt->handle, new_bh); 2960 2961 /* Add it to the inode */ 2962 di->i_xattr_loc = cpu_to_le64(first_blkno); 2963 2964 spin_lock(&OCFS2_I(inode)->ip_lock); 2965 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2966 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2967 spin_unlock(&OCFS2_I(inode)->ip_lock); 2968 2969 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2970 2971 *ret_bh = new_bh; 2972 new_bh = NULL; 2973 2974 end: 2975 brelse(new_bh); 2976 return ret; 2977 } 2978 2979 /* 2980 * ocfs2_xattr_block_set() 2981 * 2982 * Set, replace or remove an extended attribute into external block. 2983 * 2984 */ 2985 static int ocfs2_xattr_block_set(struct inode *inode, 2986 struct ocfs2_xattr_info *xi, 2987 struct ocfs2_xattr_search *xs, 2988 struct ocfs2_xattr_set_ctxt *ctxt) 2989 { 2990 struct buffer_head *new_bh = NULL; 2991 struct ocfs2_xattr_block *xblk = NULL; 2992 int ret; 2993 struct ocfs2_xa_loc loc; 2994 2995 if (!xs->xattr_bh) { 2996 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2997 0, &new_bh); 2998 if (ret) { 2999 mlog_errno(ret); 3000 goto end; 3001 } 3002 3003 xs->xattr_bh = new_bh; 3004 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 3005 xs->header = &xblk->xb_attrs.xb_header; 3006 xs->base = (void *)xs->header; 3007 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 3008 xs->here = xs->header->xh_entries; 3009 } else 3010 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 3011 3012 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 3013 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 3014 xs->not_found ? NULL : xs->here); 3015 3016 ret = ocfs2_xa_set(&loc, xi, ctxt); 3017 if (!ret) 3018 xs->here = loc.xl_entry; 3019 else if ((ret != -ENOSPC) || ctxt->set_abort) 3020 goto end; 3021 else { 3022 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 3023 if (ret) 3024 goto end; 3025 } 3026 } 3027 3028 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 3029 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 3030 3031 end: 3032 return ret; 3033 } 3034 3035 /* Check whether the new xattr can be inserted into the inode. */ 3036 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 3037 struct ocfs2_xattr_info *xi, 3038 struct ocfs2_xattr_search *xs) 3039 { 3040 struct ocfs2_xattr_entry *last; 3041 int free, i; 3042 size_t min_offs = xs->end - xs->base; 3043 3044 if (!xs->header) 3045 return 0; 3046 3047 last = xs->header->xh_entries; 3048 3049 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3050 size_t offs = le16_to_cpu(last->xe_name_offset); 3051 if (offs < min_offs) 3052 min_offs = offs; 3053 last += 1; 3054 } 3055 3056 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3057 if (free < 0) 3058 return 0; 3059 3060 BUG_ON(!xs->not_found); 3061 3062 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3063 return 1; 3064 3065 return 0; 3066 } 3067 3068 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3069 struct ocfs2_dinode *di, 3070 struct ocfs2_xattr_info *xi, 3071 struct ocfs2_xattr_search *xis, 3072 struct ocfs2_xattr_search *xbs, 3073 int *clusters_need, 3074 int *meta_need, 3075 int *credits_need) 3076 { 3077 int ret = 0, old_in_xb = 0; 3078 int clusters_add = 0, meta_add = 0, credits = 0; 3079 struct buffer_head *bh = NULL; 3080 struct ocfs2_xattr_block *xb = NULL; 3081 struct ocfs2_xattr_entry *xe = NULL; 3082 struct ocfs2_xattr_value_root *xv = NULL; 3083 char *base = NULL; 3084 int name_offset, name_len = 0; 3085 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3086 xi->xi_value_len); 3087 u64 value_size; 3088 3089 /* 3090 * Calculate the clusters we need to write. 3091 * No matter whether we replace an old one or add a new one, 3092 * we need this for writing. 3093 */ 3094 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3095 credits += new_clusters * 3096 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3097 3098 if (xis->not_found && xbs->not_found) { 3099 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3100 3101 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3102 clusters_add += new_clusters; 3103 credits += ocfs2_calc_extend_credits(inode->i_sb, 3104 &def_xv.xv.xr_list); 3105 } 3106 3107 goto meta_guess; 3108 } 3109 3110 if (!xis->not_found) { 3111 xe = xis->here; 3112 name_offset = le16_to_cpu(xe->xe_name_offset); 3113 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3114 base = xis->base; 3115 credits += OCFS2_INODE_UPDATE_CREDITS; 3116 } else { 3117 int i, block_off = 0; 3118 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3119 xe = xbs->here; 3120 name_offset = le16_to_cpu(xe->xe_name_offset); 3121 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3122 i = xbs->here - xbs->header->xh_entries; 3123 old_in_xb = 1; 3124 3125 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3126 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3127 bucket_xh(xbs->bucket), 3128 i, &block_off, 3129 &name_offset); 3130 base = bucket_block(xbs->bucket, block_off); 3131 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3132 } else { 3133 base = xbs->base; 3134 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3135 } 3136 } 3137 3138 /* 3139 * delete a xattr doesn't need metadata and cluster allocation. 3140 * so just calculate the credits and return. 3141 * 3142 * The credits for removing the value tree will be extended 3143 * by ocfs2_remove_extent itself. 3144 */ 3145 if (!xi->xi_value) { 3146 if (!ocfs2_xattr_is_local(xe)) 3147 credits += ocfs2_remove_extent_credits(inode->i_sb); 3148 3149 goto out; 3150 } 3151 3152 /* do cluster allocation guess first. */ 3153 value_size = le64_to_cpu(xe->xe_value_size); 3154 3155 if (old_in_xb) { 3156 /* 3157 * In xattr set, we always try to set the xe in inode first, 3158 * so if it can be inserted into inode successfully, the old 3159 * one will be removed from the xattr block, and this xattr 3160 * will be inserted into inode as a new xattr in inode. 3161 */ 3162 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3163 clusters_add += new_clusters; 3164 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3165 OCFS2_INODE_UPDATE_CREDITS; 3166 if (!ocfs2_xattr_is_local(xe)) 3167 credits += ocfs2_calc_extend_credits( 3168 inode->i_sb, 3169 &def_xv.xv.xr_list); 3170 goto out; 3171 } 3172 } 3173 3174 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3175 /* the new values will be stored outside. */ 3176 u32 old_clusters = 0; 3177 3178 if (!ocfs2_xattr_is_local(xe)) { 3179 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3180 value_size); 3181 xv = (struct ocfs2_xattr_value_root *) 3182 (base + name_offset + name_len); 3183 value_size = OCFS2_XATTR_ROOT_SIZE; 3184 } else 3185 xv = &def_xv.xv; 3186 3187 if (old_clusters >= new_clusters) { 3188 credits += ocfs2_remove_extent_credits(inode->i_sb); 3189 goto out; 3190 } else { 3191 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3192 clusters_add += new_clusters - old_clusters; 3193 credits += ocfs2_calc_extend_credits(inode->i_sb, 3194 &xv->xr_list); 3195 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3196 goto out; 3197 } 3198 } else { 3199 /* 3200 * Now the new value will be stored inside. So if the new 3201 * value is smaller than the size of value root or the old 3202 * value, we don't need any allocation, otherwise we have 3203 * to guess metadata allocation. 3204 */ 3205 if ((ocfs2_xattr_is_local(xe) && 3206 (value_size >= xi->xi_value_len)) || 3207 (!ocfs2_xattr_is_local(xe) && 3208 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3209 goto out; 3210 } 3211 3212 meta_guess: 3213 /* calculate metadata allocation. */ 3214 if (di->i_xattr_loc) { 3215 if (!xbs->xattr_bh) { 3216 ret = ocfs2_read_xattr_block(inode, 3217 le64_to_cpu(di->i_xattr_loc), 3218 &bh); 3219 if (ret) { 3220 mlog_errno(ret); 3221 goto out; 3222 } 3223 3224 xb = (struct ocfs2_xattr_block *)bh->b_data; 3225 } else 3226 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3227 3228 /* 3229 * If there is already an xattr tree, good, we can calculate 3230 * like other b-trees. Otherwise we may have the chance of 3231 * create a tree, the credit calculation is borrowed from 3232 * ocfs2_calc_extend_credits with root_el = NULL. And the 3233 * new tree will be cluster based, so no meta is needed. 3234 */ 3235 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3236 struct ocfs2_extent_list *el = 3237 &xb->xb_attrs.xb_root.xt_list; 3238 meta_add += ocfs2_extend_meta_needed(el); 3239 credits += ocfs2_calc_extend_credits(inode->i_sb, 3240 el); 3241 } else 3242 credits += OCFS2_SUBALLOC_ALLOC + 1; 3243 3244 /* 3245 * This cluster will be used either for new bucket or for 3246 * new xattr block. 3247 * If the cluster size is the same as the bucket size, one 3248 * more is needed since we may need to extend the bucket 3249 * also. 3250 */ 3251 clusters_add += 1; 3252 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3253 if (OCFS2_XATTR_BUCKET_SIZE == 3254 OCFS2_SB(inode->i_sb)->s_clustersize) { 3255 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3256 clusters_add += 1; 3257 } 3258 } else { 3259 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3260 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3261 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3262 meta_add += ocfs2_extend_meta_needed(el); 3263 credits += ocfs2_calc_extend_credits(inode->i_sb, 3264 el); 3265 } else { 3266 meta_add += 1; 3267 } 3268 } 3269 out: 3270 if (clusters_need) 3271 *clusters_need = clusters_add; 3272 if (meta_need) 3273 *meta_need = meta_add; 3274 if (credits_need) 3275 *credits_need = credits; 3276 brelse(bh); 3277 return ret; 3278 } 3279 3280 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3281 struct ocfs2_dinode *di, 3282 struct ocfs2_xattr_info *xi, 3283 struct ocfs2_xattr_search *xis, 3284 struct ocfs2_xattr_search *xbs, 3285 struct ocfs2_xattr_set_ctxt *ctxt, 3286 int extra_meta, 3287 int *credits) 3288 { 3289 int clusters_add, meta_add, ret; 3290 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3291 3292 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3293 3294 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3295 3296 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3297 &clusters_add, &meta_add, credits); 3298 if (ret) { 3299 mlog_errno(ret); 3300 return ret; 3301 } 3302 3303 meta_add += extra_meta; 3304 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3305 clusters_add, *credits); 3306 3307 if (meta_add) { 3308 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3309 &ctxt->meta_ac); 3310 if (ret) { 3311 mlog_errno(ret); 3312 goto out; 3313 } 3314 } 3315 3316 if (clusters_add) { 3317 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3318 if (ret) 3319 mlog_errno(ret); 3320 } 3321 out: 3322 if (ret) { 3323 if (ctxt->meta_ac) { 3324 ocfs2_free_alloc_context(ctxt->meta_ac); 3325 ctxt->meta_ac = NULL; 3326 } 3327 3328 /* 3329 * We cannot have an error and a non null ctxt->data_ac. 3330 */ 3331 } 3332 3333 return ret; 3334 } 3335 3336 static int __ocfs2_xattr_set_handle(struct inode *inode, 3337 struct ocfs2_dinode *di, 3338 struct ocfs2_xattr_info *xi, 3339 struct ocfs2_xattr_search *xis, 3340 struct ocfs2_xattr_search *xbs, 3341 struct ocfs2_xattr_set_ctxt *ctxt) 3342 { 3343 int ret = 0, credits, old_found; 3344 3345 if (!xi->xi_value) { 3346 /* Remove existing extended attribute */ 3347 if (!xis->not_found) 3348 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3349 else if (!xbs->not_found) 3350 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3351 } else { 3352 /* We always try to set extended attribute into inode first*/ 3353 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3354 if (!ret && !xbs->not_found) { 3355 /* 3356 * If succeed and that extended attribute existing in 3357 * external block, then we will remove it. 3358 */ 3359 xi->xi_value = NULL; 3360 xi->xi_value_len = 0; 3361 3362 old_found = xis->not_found; 3363 xis->not_found = -ENODATA; 3364 ret = ocfs2_calc_xattr_set_need(inode, 3365 di, 3366 xi, 3367 xis, 3368 xbs, 3369 NULL, 3370 NULL, 3371 &credits); 3372 xis->not_found = old_found; 3373 if (ret) { 3374 mlog_errno(ret); 3375 goto out; 3376 } 3377 3378 ret = ocfs2_extend_trans(ctxt->handle, credits); 3379 if (ret) { 3380 mlog_errno(ret); 3381 goto out; 3382 } 3383 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3384 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3385 if (di->i_xattr_loc && !xbs->xattr_bh) { 3386 ret = ocfs2_xattr_block_find(inode, 3387 xi->xi_name_index, 3388 xi->xi_name, xbs); 3389 if (ret) 3390 goto out; 3391 3392 old_found = xis->not_found; 3393 xis->not_found = -ENODATA; 3394 ret = ocfs2_calc_xattr_set_need(inode, 3395 di, 3396 xi, 3397 xis, 3398 xbs, 3399 NULL, 3400 NULL, 3401 &credits); 3402 xis->not_found = old_found; 3403 if (ret) { 3404 mlog_errno(ret); 3405 goto out; 3406 } 3407 3408 ret = ocfs2_extend_trans(ctxt->handle, credits); 3409 if (ret) { 3410 mlog_errno(ret); 3411 goto out; 3412 } 3413 } 3414 /* 3415 * If no space in inode, we will set extended attribute 3416 * into external block. 3417 */ 3418 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3419 if (ret) 3420 goto out; 3421 if (!xis->not_found) { 3422 /* 3423 * If succeed and that extended attribute 3424 * existing in inode, we will remove it. 3425 */ 3426 xi->xi_value = NULL; 3427 xi->xi_value_len = 0; 3428 xbs->not_found = -ENODATA; 3429 ret = ocfs2_calc_xattr_set_need(inode, 3430 di, 3431 xi, 3432 xis, 3433 xbs, 3434 NULL, 3435 NULL, 3436 &credits); 3437 if (ret) { 3438 mlog_errno(ret); 3439 goto out; 3440 } 3441 3442 ret = ocfs2_extend_trans(ctxt->handle, credits); 3443 if (ret) { 3444 mlog_errno(ret); 3445 goto out; 3446 } 3447 ret = ocfs2_xattr_ibody_set(inode, xi, 3448 xis, ctxt); 3449 } 3450 } 3451 } 3452 3453 if (!ret) { 3454 /* Update inode ctime. */ 3455 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3456 xis->inode_bh, 3457 OCFS2_JOURNAL_ACCESS_WRITE); 3458 if (ret) { 3459 mlog_errno(ret); 3460 goto out; 3461 } 3462 3463 inode_set_ctime_current(inode); 3464 di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); 3465 di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); 3466 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3467 } 3468 out: 3469 return ret; 3470 } 3471 3472 /* 3473 * This function only called duing creating inode 3474 * for init security/acl xattrs of the new inode. 3475 * All transanction credits have been reserved in mknod. 3476 */ 3477 int ocfs2_xattr_set_handle(handle_t *handle, 3478 struct inode *inode, 3479 struct buffer_head *di_bh, 3480 int name_index, 3481 const char *name, 3482 const void *value, 3483 size_t value_len, 3484 int flags, 3485 struct ocfs2_alloc_context *meta_ac, 3486 struct ocfs2_alloc_context *data_ac) 3487 { 3488 struct ocfs2_dinode *di; 3489 int ret; 3490 3491 struct ocfs2_xattr_info xi = { 3492 .xi_name_index = name_index, 3493 .xi_name = name, 3494 .xi_name_len = strlen(name), 3495 .xi_value = value, 3496 .xi_value_len = value_len, 3497 }; 3498 3499 struct ocfs2_xattr_search xis = { 3500 .not_found = -ENODATA, 3501 }; 3502 3503 struct ocfs2_xattr_search xbs = { 3504 .not_found = -ENODATA, 3505 }; 3506 3507 struct ocfs2_xattr_set_ctxt ctxt = { 3508 .handle = handle, 3509 .meta_ac = meta_ac, 3510 .data_ac = data_ac, 3511 }; 3512 3513 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3514 return -EOPNOTSUPP; 3515 3516 /* 3517 * In extreme situation, may need xattr bucket when 3518 * block size is too small. And we have already reserved 3519 * the credits for bucket in mknod. 3520 */ 3521 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3522 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3523 if (!xbs.bucket) { 3524 mlog_errno(-ENOMEM); 3525 return -ENOMEM; 3526 } 3527 } 3528 3529 xis.inode_bh = xbs.inode_bh = di_bh; 3530 di = (struct ocfs2_dinode *)di_bh->b_data; 3531 3532 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3533 3534 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3535 if (ret) 3536 goto cleanup; 3537 if (xis.not_found) { 3538 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3539 if (ret) 3540 goto cleanup; 3541 } 3542 3543 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3544 3545 cleanup: 3546 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3547 brelse(xbs.xattr_bh); 3548 ocfs2_xattr_bucket_free(xbs.bucket); 3549 3550 return ret; 3551 } 3552 3553 /* 3554 * ocfs2_xattr_set() 3555 * 3556 * Set, replace or remove an extended attribute for this inode. 3557 * value is NULL to remove an existing extended attribute, else either 3558 * create or replace an extended attribute. 3559 */ 3560 int ocfs2_xattr_set(struct inode *inode, 3561 int name_index, 3562 const char *name, 3563 const void *value, 3564 size_t value_len, 3565 int flags) 3566 { 3567 struct buffer_head *di_bh = NULL; 3568 struct ocfs2_dinode *di; 3569 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3570 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3571 struct inode *tl_inode = osb->osb_tl_inode; 3572 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3573 struct ocfs2_refcount_tree *ref_tree = NULL; 3574 struct ocfs2_lock_holder oh; 3575 3576 struct ocfs2_xattr_info xi = { 3577 .xi_name_index = name_index, 3578 .xi_name = name, 3579 .xi_name_len = strlen(name), 3580 .xi_value = value, 3581 .xi_value_len = value_len, 3582 }; 3583 3584 struct ocfs2_xattr_search xis = { 3585 .not_found = -ENODATA, 3586 }; 3587 3588 struct ocfs2_xattr_search xbs = { 3589 .not_found = -ENODATA, 3590 }; 3591 3592 if (!ocfs2_supports_xattr(osb)) 3593 return -EOPNOTSUPP; 3594 3595 /* 3596 * Only xbs will be used on indexed trees. xis doesn't need a 3597 * bucket. 3598 */ 3599 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3600 if (!xbs.bucket) { 3601 mlog_errno(-ENOMEM); 3602 return -ENOMEM; 3603 } 3604 3605 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3606 if (had_lock < 0) { 3607 ret = had_lock; 3608 mlog_errno(ret); 3609 goto cleanup_nolock; 3610 } 3611 xis.inode_bh = xbs.inode_bh = di_bh; 3612 di = (struct ocfs2_dinode *)di_bh->b_data; 3613 3614 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3615 /* 3616 * Scan inode and external block to find the same name 3617 * extended attribute and collect search information. 3618 */ 3619 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3620 if (ret) 3621 goto cleanup; 3622 if (xis.not_found) { 3623 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3624 if (ret) 3625 goto cleanup; 3626 } 3627 3628 if (xis.not_found && xbs.not_found) { 3629 ret = -ENODATA; 3630 if (flags & XATTR_REPLACE) 3631 goto cleanup; 3632 ret = 0; 3633 if (!value) 3634 goto cleanup; 3635 } else { 3636 ret = -EEXIST; 3637 if (flags & XATTR_CREATE) 3638 goto cleanup; 3639 } 3640 3641 /* Check whether the value is refcounted and do some preparation. */ 3642 if (ocfs2_is_refcount_inode(inode) && 3643 (!xis.not_found || !xbs.not_found)) { 3644 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3645 &xis, &xbs, &ref_tree, 3646 &ref_meta, &ref_credits); 3647 if (ret) { 3648 mlog_errno(ret); 3649 goto cleanup; 3650 } 3651 } 3652 3653 inode_lock(tl_inode); 3654 3655 if (ocfs2_truncate_log_needs_flush(osb)) { 3656 ret = __ocfs2_flush_truncate_log(osb); 3657 if (ret < 0) { 3658 inode_unlock(tl_inode); 3659 mlog_errno(ret); 3660 goto cleanup; 3661 } 3662 } 3663 inode_unlock(tl_inode); 3664 3665 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3666 &xbs, &ctxt, ref_meta, &credits); 3667 if (ret) { 3668 mlog_errno(ret); 3669 goto cleanup; 3670 } 3671 3672 /* we need to update inode's ctime field, so add credit for it. */ 3673 credits += OCFS2_INODE_UPDATE_CREDITS; 3674 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3675 if (IS_ERR(ctxt.handle)) { 3676 ret = PTR_ERR(ctxt.handle); 3677 mlog_errno(ret); 3678 goto out_free_ac; 3679 } 3680 3681 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3682 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3683 3684 ocfs2_commit_trans(osb, ctxt.handle); 3685 3686 out_free_ac: 3687 if (ctxt.data_ac) 3688 ocfs2_free_alloc_context(ctxt.data_ac); 3689 if (ctxt.meta_ac) 3690 ocfs2_free_alloc_context(ctxt.meta_ac); 3691 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3692 ocfs2_schedule_truncate_log_flush(osb, 1); 3693 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3694 3695 cleanup: 3696 if (ref_tree) 3697 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3698 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3699 if (!value && !ret) { 3700 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3701 if (ret) 3702 mlog_errno(ret); 3703 } 3704 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3705 cleanup_nolock: 3706 brelse(di_bh); 3707 brelse(xbs.xattr_bh); 3708 ocfs2_xattr_bucket_free(xbs.bucket); 3709 3710 return ret; 3711 } 3712 3713 /* 3714 * Find the xattr extent rec which may contains name_hash. 3715 * e_cpos will be the first name hash of the xattr rec. 3716 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3717 */ 3718 static int ocfs2_xattr_get_rec(struct inode *inode, 3719 u32 name_hash, 3720 u64 *p_blkno, 3721 u32 *e_cpos, 3722 u32 *num_clusters, 3723 struct ocfs2_extent_list *el) 3724 { 3725 int ret = 0, i; 3726 struct buffer_head *eb_bh = NULL; 3727 struct ocfs2_extent_block *eb; 3728 struct ocfs2_extent_rec *rec = NULL; 3729 u64 e_blkno = 0; 3730 3731 if (el->l_tree_depth) { 3732 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3733 &eb_bh); 3734 if (ret) { 3735 mlog_errno(ret); 3736 goto out; 3737 } 3738 3739 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3740 el = &eb->h_list; 3741 3742 if (el->l_tree_depth) { 3743 ret = ocfs2_error(inode->i_sb, 3744 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3745 inode->i_ino, 3746 (unsigned long long)eb_bh->b_blocknr); 3747 goto out; 3748 } 3749 } 3750 3751 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3752 rec = &el->l_recs[i]; 3753 3754 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3755 e_blkno = le64_to_cpu(rec->e_blkno); 3756 break; 3757 } 3758 } 3759 3760 if (!e_blkno) { 3761 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3762 inode->i_ino, 3763 le32_to_cpu(rec->e_cpos), 3764 ocfs2_rec_clusters(el, rec)); 3765 goto out; 3766 } 3767 3768 *p_blkno = le64_to_cpu(rec->e_blkno); 3769 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3770 if (e_cpos) 3771 *e_cpos = le32_to_cpu(rec->e_cpos); 3772 out: 3773 brelse(eb_bh); 3774 return ret; 3775 } 3776 3777 typedef int (xattr_bucket_func)(struct inode *inode, 3778 struct ocfs2_xattr_bucket *bucket, 3779 void *para); 3780 3781 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3782 struct ocfs2_xattr_bucket *bucket, 3783 int name_index, 3784 const char *name, 3785 u32 name_hash, 3786 u16 *xe_index, 3787 int *found) 3788 { 3789 int i, ret = 0, cmp = 1, block_off, new_offset; 3790 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3791 size_t name_len = strlen(name); 3792 struct ocfs2_xattr_entry *xe = NULL; 3793 char *xe_name; 3794 3795 /* 3796 * We don't use binary search in the bucket because there 3797 * may be multiple entries with the same name hash. 3798 */ 3799 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3800 xe = &xh->xh_entries[i]; 3801 3802 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3803 continue; 3804 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3805 break; 3806 3807 cmp = name_index - ocfs2_xattr_get_type(xe); 3808 if (!cmp) 3809 cmp = name_len - xe->xe_name_len; 3810 if (cmp) 3811 continue; 3812 3813 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3814 xh, 3815 i, 3816 &block_off, 3817 &new_offset); 3818 if (ret) { 3819 mlog_errno(ret); 3820 break; 3821 } 3822 3823 3824 xe_name = bucket_block(bucket, block_off) + new_offset; 3825 if (!memcmp(name, xe_name, name_len)) { 3826 *xe_index = i; 3827 *found = 1; 3828 ret = 0; 3829 break; 3830 } 3831 } 3832 3833 return ret; 3834 } 3835 3836 /* 3837 * Find the specified xattr entry in a series of buckets. 3838 * This series start from p_blkno and last for num_clusters. 3839 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3840 * the num of the valid buckets. 3841 * 3842 * Return the buffer_head this xattr should reside in. And if the xattr's 3843 * hash is in the gap of 2 buckets, return the lower bucket. 3844 */ 3845 static int ocfs2_xattr_bucket_find(struct inode *inode, 3846 int name_index, 3847 const char *name, 3848 u32 name_hash, 3849 u64 p_blkno, 3850 u32 first_hash, 3851 u32 num_clusters, 3852 struct ocfs2_xattr_search *xs) 3853 { 3854 int ret, found = 0; 3855 struct ocfs2_xattr_header *xh = NULL; 3856 struct ocfs2_xattr_entry *xe = NULL; 3857 u16 index = 0; 3858 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3859 int low_bucket = 0, bucket, high_bucket; 3860 struct ocfs2_xattr_bucket *search; 3861 u64 blkno, lower_blkno = 0; 3862 3863 search = ocfs2_xattr_bucket_new(inode); 3864 if (!search) { 3865 ret = -ENOMEM; 3866 mlog_errno(ret); 3867 goto out; 3868 } 3869 3870 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3871 if (ret) { 3872 mlog_errno(ret); 3873 goto out; 3874 } 3875 3876 xh = bucket_xh(search); 3877 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3878 while (low_bucket <= high_bucket) { 3879 ocfs2_xattr_bucket_relse(search); 3880 3881 bucket = (low_bucket + high_bucket) / 2; 3882 blkno = p_blkno + bucket * blk_per_bucket; 3883 ret = ocfs2_read_xattr_bucket(search, blkno); 3884 if (ret) { 3885 mlog_errno(ret); 3886 goto out; 3887 } 3888 3889 xh = bucket_xh(search); 3890 xe = &xh->xh_entries[0]; 3891 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3892 high_bucket = bucket - 1; 3893 continue; 3894 } 3895 3896 /* 3897 * Check whether the hash of the last entry in our 3898 * bucket is larger than the search one. for an empty 3899 * bucket, the last one is also the first one. 3900 */ 3901 if (xh->xh_count) 3902 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3903 3904 /* record lower_blkno which may be the insert place. */ 3905 lower_blkno = blkno; 3906 3907 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3908 low_bucket = bucket + 1; 3909 continue; 3910 } 3911 3912 /* the searched xattr should reside in this bucket if exists. */ 3913 ret = ocfs2_find_xe_in_bucket(inode, search, 3914 name_index, name, name_hash, 3915 &index, &found); 3916 if (ret) { 3917 mlog_errno(ret); 3918 goto out; 3919 } 3920 break; 3921 } 3922 3923 /* 3924 * Record the bucket we have found. 3925 * When the xattr's hash value is in the gap of 2 buckets, we will 3926 * always set it to the previous bucket. 3927 */ 3928 if (!lower_blkno) 3929 lower_blkno = p_blkno; 3930 3931 /* This should be in cache - we just read it during the search */ 3932 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3933 if (ret) { 3934 mlog_errno(ret); 3935 goto out; 3936 } 3937 3938 xs->header = bucket_xh(xs->bucket); 3939 xs->base = bucket_block(xs->bucket, 0); 3940 xs->end = xs->base + inode->i_sb->s_blocksize; 3941 3942 if (found) { 3943 xs->here = &xs->header->xh_entries[index]; 3944 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3945 name, name_index, name_hash, 3946 (unsigned long long)bucket_blkno(xs->bucket), 3947 index); 3948 } else 3949 ret = -ENODATA; 3950 3951 out: 3952 ocfs2_xattr_bucket_free(search); 3953 return ret; 3954 } 3955 3956 static int ocfs2_xattr_index_block_find(struct inode *inode, 3957 struct buffer_head *root_bh, 3958 int name_index, 3959 const char *name, 3960 struct ocfs2_xattr_search *xs) 3961 { 3962 int ret; 3963 struct ocfs2_xattr_block *xb = 3964 (struct ocfs2_xattr_block *)root_bh->b_data; 3965 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3966 struct ocfs2_extent_list *el = &xb_root->xt_list; 3967 u64 p_blkno = 0; 3968 u32 first_hash, num_clusters = 0; 3969 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3970 3971 if (le16_to_cpu(el->l_next_free_rec) == 0) 3972 return -ENODATA; 3973 3974 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3975 name, name_index, name_hash, 3976 (unsigned long long)root_bh->b_blocknr, 3977 -1); 3978 3979 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3980 &num_clusters, el); 3981 if (ret) { 3982 mlog_errno(ret); 3983 goto out; 3984 } 3985 3986 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3987 3988 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3989 name, name_index, first_hash, 3990 (unsigned long long)p_blkno, 3991 num_clusters); 3992 3993 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3994 p_blkno, first_hash, num_clusters, xs); 3995 3996 out: 3997 return ret; 3998 } 3999 4000 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 4001 u64 blkno, 4002 u32 clusters, 4003 xattr_bucket_func *func, 4004 void *para) 4005 { 4006 int i, ret = 0; 4007 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 4008 u32 num_buckets = clusters * bpc; 4009 struct ocfs2_xattr_bucket *bucket; 4010 4011 bucket = ocfs2_xattr_bucket_new(inode); 4012 if (!bucket) { 4013 mlog_errno(-ENOMEM); 4014 return -ENOMEM; 4015 } 4016 4017 trace_ocfs2_iterate_xattr_buckets( 4018 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4019 (unsigned long long)blkno, clusters); 4020 4021 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 4022 ret = ocfs2_read_xattr_bucket(bucket, blkno); 4023 if (ret) { 4024 mlog_errno(ret); 4025 break; 4026 } 4027 4028 /* 4029 * The real bucket num in this series of blocks is stored 4030 * in the 1st bucket. 4031 */ 4032 if (i == 0) 4033 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 4034 4035 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 4036 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 4037 if (func) { 4038 ret = func(inode, bucket, para); 4039 if (ret && ret != -ERANGE) 4040 mlog_errno(ret); 4041 /* Fall through to bucket_relse() */ 4042 } 4043 4044 ocfs2_xattr_bucket_relse(bucket); 4045 if (ret) 4046 break; 4047 } 4048 4049 ocfs2_xattr_bucket_free(bucket); 4050 return ret; 4051 } 4052 4053 struct ocfs2_xattr_tree_list { 4054 char *buffer; 4055 size_t buffer_size; 4056 size_t result; 4057 }; 4058 4059 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4060 struct ocfs2_xattr_header *xh, 4061 int index, 4062 int *block_off, 4063 int *new_offset) 4064 { 4065 u16 name_offset; 4066 4067 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4068 return -EINVAL; 4069 4070 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4071 4072 *block_off = name_offset >> sb->s_blocksize_bits; 4073 *new_offset = name_offset % sb->s_blocksize; 4074 4075 return 0; 4076 } 4077 4078 static int ocfs2_list_xattr_bucket(struct inode *inode, 4079 struct ocfs2_xattr_bucket *bucket, 4080 void *para) 4081 { 4082 int ret = 0, type; 4083 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4084 int i, block_off, new_offset; 4085 const char *name; 4086 4087 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4088 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4089 type = ocfs2_xattr_get_type(entry); 4090 4091 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4092 bucket_xh(bucket), 4093 i, 4094 &block_off, 4095 &new_offset); 4096 if (ret) 4097 break; 4098 4099 name = (const char *)bucket_block(bucket, block_off) + 4100 new_offset; 4101 ret = ocfs2_xattr_list_entry(inode->i_sb, 4102 xl->buffer, 4103 xl->buffer_size, 4104 &xl->result, 4105 type, name, 4106 entry->xe_name_len); 4107 if (ret) 4108 break; 4109 } 4110 4111 return ret; 4112 } 4113 4114 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4115 struct buffer_head *blk_bh, 4116 xattr_tree_rec_func *rec_func, 4117 void *para) 4118 { 4119 struct ocfs2_xattr_block *xb = 4120 (struct ocfs2_xattr_block *)blk_bh->b_data; 4121 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4122 int ret = 0; 4123 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4124 u64 p_blkno = 0; 4125 4126 if (!el->l_next_free_rec || !rec_func) 4127 return 0; 4128 4129 while (name_hash > 0) { 4130 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4131 &e_cpos, &num_clusters, el); 4132 if (ret) { 4133 mlog_errno(ret); 4134 break; 4135 } 4136 4137 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4138 num_clusters, para); 4139 if (ret) { 4140 if (ret != -ERANGE) 4141 mlog_errno(ret); 4142 break; 4143 } 4144 4145 if (e_cpos == 0) 4146 break; 4147 4148 name_hash = e_cpos - 1; 4149 } 4150 4151 return ret; 4152 4153 } 4154 4155 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4156 struct buffer_head *root_bh, 4157 u64 blkno, u32 cpos, u32 len, void *para) 4158 { 4159 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4160 ocfs2_list_xattr_bucket, para); 4161 } 4162 4163 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4164 struct buffer_head *blk_bh, 4165 char *buffer, 4166 size_t buffer_size) 4167 { 4168 int ret; 4169 struct ocfs2_xattr_tree_list xl = { 4170 .buffer = buffer, 4171 .buffer_size = buffer_size, 4172 .result = 0, 4173 }; 4174 4175 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4176 ocfs2_list_xattr_tree_rec, &xl); 4177 if (ret) { 4178 mlog_errno(ret); 4179 goto out; 4180 } 4181 4182 ret = xl.result; 4183 out: 4184 return ret; 4185 } 4186 4187 static int cmp_xe(const void *a, const void *b) 4188 { 4189 const struct ocfs2_xattr_entry *l = a, *r = b; 4190 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4191 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4192 4193 if (l_hash > r_hash) 4194 return 1; 4195 if (l_hash < r_hash) 4196 return -1; 4197 return 0; 4198 } 4199 4200 /* 4201 * When the ocfs2_xattr_block is filled up, new bucket will be created 4202 * and all the xattr entries will be moved to the new bucket. 4203 * The header goes at the start of the bucket, and the names+values are 4204 * filled from the end. This is why *target starts as the last buffer. 4205 * Note: we need to sort the entries since they are not saved in order 4206 * in the ocfs2_xattr_block. 4207 */ 4208 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4209 struct buffer_head *xb_bh, 4210 struct ocfs2_xattr_bucket *bucket) 4211 { 4212 int i, blocksize = inode->i_sb->s_blocksize; 4213 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4214 u16 offset, size, off_change; 4215 struct ocfs2_xattr_entry *xe; 4216 struct ocfs2_xattr_block *xb = 4217 (struct ocfs2_xattr_block *)xb_bh->b_data; 4218 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4219 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4220 u16 count = le16_to_cpu(xb_xh->xh_count); 4221 char *src = xb_bh->b_data; 4222 char *target = bucket_block(bucket, blks - 1); 4223 4224 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4225 (unsigned long long)xb_bh->b_blocknr, 4226 (unsigned long long)bucket_blkno(bucket)); 4227 4228 for (i = 0; i < blks; i++) 4229 memset(bucket_block(bucket, i), 0, blocksize); 4230 4231 /* 4232 * Since the xe_name_offset is based on ocfs2_xattr_header, 4233 * there is a offset change corresponding to the change of 4234 * ocfs2_xattr_header's position. 4235 */ 4236 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4237 xe = &xb_xh->xh_entries[count - 1]; 4238 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4239 size = blocksize - offset; 4240 4241 /* copy all the names and values. */ 4242 memcpy(target + offset, src + offset, size); 4243 4244 /* Init new header now. */ 4245 xh->xh_count = xb_xh->xh_count; 4246 xh->xh_num_buckets = cpu_to_le16(1); 4247 xh->xh_name_value_len = cpu_to_le16(size); 4248 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4249 4250 /* copy all the entries. */ 4251 target = bucket_block(bucket, 0); 4252 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4253 size = count * sizeof(struct ocfs2_xattr_entry); 4254 memcpy(target + offset, (char *)xb_xh + offset, size); 4255 4256 /* Change the xe offset for all the xe because of the move. */ 4257 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4258 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4259 for (i = 0; i < count; i++) 4260 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4261 4262 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4263 4264 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4265 cmp_xe, NULL); 4266 } 4267 4268 /* 4269 * After we move xattr from block to index btree, we have to 4270 * update ocfs2_xattr_search to the new xe and base. 4271 * 4272 * When the entry is in xattr block, xattr_bh indicates the storage place. 4273 * While if the entry is in index b-tree, "bucket" indicates the 4274 * real place of the xattr. 4275 */ 4276 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4277 struct ocfs2_xattr_search *xs, 4278 struct buffer_head *old_bh) 4279 { 4280 char *buf = old_bh->b_data; 4281 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4282 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4283 int i; 4284 4285 xs->header = bucket_xh(xs->bucket); 4286 xs->base = bucket_block(xs->bucket, 0); 4287 xs->end = xs->base + inode->i_sb->s_blocksize; 4288 4289 if (xs->not_found) 4290 return; 4291 4292 i = xs->here - old_xh->xh_entries; 4293 xs->here = &xs->header->xh_entries[i]; 4294 } 4295 4296 static int ocfs2_xattr_create_index_block(struct inode *inode, 4297 struct ocfs2_xattr_search *xs, 4298 struct ocfs2_xattr_set_ctxt *ctxt) 4299 { 4300 int ret; 4301 u32 bit_off, len; 4302 u64 blkno; 4303 handle_t *handle = ctxt->handle; 4304 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4305 struct buffer_head *xb_bh = xs->xattr_bh; 4306 struct ocfs2_xattr_block *xb = 4307 (struct ocfs2_xattr_block *)xb_bh->b_data; 4308 struct ocfs2_xattr_tree_root *xr; 4309 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4310 4311 trace_ocfs2_xattr_create_index_block_begin( 4312 (unsigned long long)xb_bh->b_blocknr); 4313 4314 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4315 BUG_ON(!xs->bucket); 4316 4317 /* 4318 * XXX: 4319 * We can use this lock for now, and maybe move to a dedicated mutex 4320 * if performance becomes a problem later. 4321 */ 4322 down_write(&oi->ip_alloc_sem); 4323 4324 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4325 OCFS2_JOURNAL_ACCESS_WRITE); 4326 if (ret) { 4327 mlog_errno(ret); 4328 goto out; 4329 } 4330 4331 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4332 1, 1, &bit_off, &len); 4333 if (ret) { 4334 mlog_errno(ret); 4335 goto out; 4336 } 4337 4338 /* 4339 * The bucket may spread in many blocks, and 4340 * we will only touch the 1st block and the last block 4341 * in the whole bucket(one for entry and one for data). 4342 */ 4343 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4344 4345 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4346 4347 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4348 if (ret) { 4349 mlog_errno(ret); 4350 goto out; 4351 } 4352 4353 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4354 OCFS2_JOURNAL_ACCESS_CREATE); 4355 if (ret) { 4356 mlog_errno(ret); 4357 goto out; 4358 } 4359 4360 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4361 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4362 4363 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4364 4365 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4366 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4367 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4368 4369 xr = &xb->xb_attrs.xb_root; 4370 xr->xt_clusters = cpu_to_le32(1); 4371 xr->xt_last_eb_blk = 0; 4372 xr->xt_list.l_tree_depth = 0; 4373 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4374 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4375 4376 xr->xt_list.l_recs[0].e_cpos = 0; 4377 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4378 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4379 4380 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4381 4382 ocfs2_journal_dirty(handle, xb_bh); 4383 4384 out: 4385 up_write(&oi->ip_alloc_sem); 4386 4387 return ret; 4388 } 4389 4390 static int cmp_xe_offset(const void *a, const void *b) 4391 { 4392 const struct ocfs2_xattr_entry *l = a, *r = b; 4393 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4394 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4395 4396 if (l_name_offset < r_name_offset) 4397 return 1; 4398 if (l_name_offset > r_name_offset) 4399 return -1; 4400 return 0; 4401 } 4402 4403 /* 4404 * defrag a xattr bucket if we find that the bucket has some 4405 * holes between name/value pairs. 4406 * We will move all the name/value pairs to the end of the bucket 4407 * so that we can spare some space for insertion. 4408 */ 4409 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4410 handle_t *handle, 4411 struct ocfs2_xattr_bucket *bucket) 4412 { 4413 int ret, i; 4414 size_t end, offset, len; 4415 struct ocfs2_xattr_header *xh; 4416 char *entries, *buf, *bucket_buf = NULL; 4417 u64 blkno = bucket_blkno(bucket); 4418 u16 xh_free_start; 4419 size_t blocksize = inode->i_sb->s_blocksize; 4420 struct ocfs2_xattr_entry *xe; 4421 4422 /* 4423 * In order to make the operation more efficient and generic, 4424 * we copy all the blocks into a contiguous memory and do the 4425 * defragment there, so if anything is error, we will not touch 4426 * the real block. 4427 */ 4428 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4429 if (!bucket_buf) { 4430 ret = -EIO; 4431 goto out; 4432 } 4433 4434 buf = bucket_buf; 4435 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4436 memcpy(buf, bucket_block(bucket, i), blocksize); 4437 4438 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4439 OCFS2_JOURNAL_ACCESS_WRITE); 4440 if (ret < 0) { 4441 mlog_errno(ret); 4442 goto out; 4443 } 4444 4445 xh = (struct ocfs2_xattr_header *)bucket_buf; 4446 entries = (char *)xh->xh_entries; 4447 xh_free_start = le16_to_cpu(xh->xh_free_start); 4448 4449 trace_ocfs2_defrag_xattr_bucket( 4450 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4451 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4452 4453 /* 4454 * sort all the entries by their offset. 4455 * the largest will be the first, so that we can 4456 * move them to the end one by one. 4457 */ 4458 sort(entries, le16_to_cpu(xh->xh_count), 4459 sizeof(struct ocfs2_xattr_entry), 4460 cmp_xe_offset, NULL); 4461 4462 /* Move all name/values to the end of the bucket. */ 4463 xe = xh->xh_entries; 4464 end = OCFS2_XATTR_BUCKET_SIZE; 4465 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4466 offset = le16_to_cpu(xe->xe_name_offset); 4467 len = namevalue_size_xe(xe); 4468 4469 /* 4470 * We must make sure that the name/value pair 4471 * exist in the same block. So adjust end to 4472 * the previous block end if needed. 4473 */ 4474 if (((end - len) / blocksize != 4475 (end - 1) / blocksize)) 4476 end = end - end % blocksize; 4477 4478 if (end > offset + len) { 4479 memmove(bucket_buf + end - len, 4480 bucket_buf + offset, len); 4481 xe->xe_name_offset = cpu_to_le16(end - len); 4482 } 4483 4484 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4485 "bucket %llu\n", (unsigned long long)blkno); 4486 4487 end -= len; 4488 } 4489 4490 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4491 "bucket %llu\n", (unsigned long long)blkno); 4492 4493 if (xh_free_start == end) 4494 goto out; 4495 4496 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4497 xh->xh_free_start = cpu_to_le16(end); 4498 4499 /* sort the entries by their name_hash. */ 4500 sort(entries, le16_to_cpu(xh->xh_count), 4501 sizeof(struct ocfs2_xattr_entry), 4502 cmp_xe, NULL); 4503 4504 buf = bucket_buf; 4505 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4506 memcpy(bucket_block(bucket, i), buf, blocksize); 4507 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4508 4509 out: 4510 kfree(bucket_buf); 4511 return ret; 4512 } 4513 4514 /* 4515 * prev_blkno points to the start of an existing extent. new_blkno 4516 * points to a newly allocated extent. Because we know each of our 4517 * clusters contains more than bucket, we can easily split one cluster 4518 * at a bucket boundary. So we take the last cluster of the existing 4519 * extent and split it down the middle. We move the last half of the 4520 * buckets in the last cluster of the existing extent over to the new 4521 * extent. 4522 * 4523 * first_bh is the buffer at prev_blkno so we can update the existing 4524 * extent's bucket count. header_bh is the bucket were we were hoping 4525 * to insert our xattr. If the bucket move places the target in the new 4526 * extent, we'll update first_bh and header_bh after modifying the old 4527 * extent. 4528 * 4529 * first_hash will be set as the 1st xe's name_hash in the new extent. 4530 */ 4531 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4532 handle_t *handle, 4533 struct ocfs2_xattr_bucket *first, 4534 struct ocfs2_xattr_bucket *target, 4535 u64 new_blkno, 4536 u32 num_clusters, 4537 u32 *first_hash) 4538 { 4539 int ret; 4540 struct super_block *sb = inode->i_sb; 4541 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4542 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4543 int to_move = num_buckets / 2; 4544 u64 src_blkno; 4545 u64 last_cluster_blkno = bucket_blkno(first) + 4546 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4547 4548 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4549 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4550 4551 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4552 (unsigned long long)last_cluster_blkno, 4553 (unsigned long long)new_blkno); 4554 4555 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4556 last_cluster_blkno, new_blkno, 4557 to_move, first_hash); 4558 if (ret) { 4559 mlog_errno(ret); 4560 goto out; 4561 } 4562 4563 /* This is the first bucket that got moved */ 4564 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4565 4566 /* 4567 * If the target bucket was part of the moved buckets, we need to 4568 * update first and target. 4569 */ 4570 if (bucket_blkno(target) >= src_blkno) { 4571 /* Find the block for the new target bucket */ 4572 src_blkno = new_blkno + 4573 (bucket_blkno(target) - src_blkno); 4574 4575 ocfs2_xattr_bucket_relse(first); 4576 ocfs2_xattr_bucket_relse(target); 4577 4578 /* 4579 * These shouldn't fail - the buffers are in the 4580 * journal from ocfs2_cp_xattr_bucket(). 4581 */ 4582 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4583 if (ret) { 4584 mlog_errno(ret); 4585 goto out; 4586 } 4587 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4588 if (ret) 4589 mlog_errno(ret); 4590 4591 } 4592 4593 out: 4594 return ret; 4595 } 4596 4597 /* 4598 * Find the suitable pos when we divide a bucket into 2. 4599 * We have to make sure the xattrs with the same hash value exist 4600 * in the same bucket. 4601 * 4602 * If this ocfs2_xattr_header covers more than one hash value, find a 4603 * place where the hash value changes. Try to find the most even split. 4604 * The most common case is that all entries have different hash values, 4605 * and the first check we make will find a place to split. 4606 */ 4607 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4608 { 4609 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4610 int count = le16_to_cpu(xh->xh_count); 4611 int delta, middle = count / 2; 4612 4613 /* 4614 * We start at the middle. Each step gets farther away in both 4615 * directions. We therefore hit the change in hash value 4616 * nearest to the middle. Note that this loop does not execute for 4617 * count < 2. 4618 */ 4619 for (delta = 0; delta < middle; delta++) { 4620 /* Let's check delta earlier than middle */ 4621 if (cmp_xe(&entries[middle - delta - 1], 4622 &entries[middle - delta])) 4623 return middle - delta; 4624 4625 /* For even counts, don't walk off the end */ 4626 if ((middle + delta + 1) == count) 4627 continue; 4628 4629 /* Now try delta past middle */ 4630 if (cmp_xe(&entries[middle + delta], 4631 &entries[middle + delta + 1])) 4632 return middle + delta + 1; 4633 } 4634 4635 /* Every entry had the same hash */ 4636 return count; 4637 } 4638 4639 /* 4640 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4641 * first_hash will record the 1st hash of the new bucket. 4642 * 4643 * Normally half of the xattrs will be moved. But we have to make 4644 * sure that the xattrs with the same hash value are stored in the 4645 * same bucket. If all the xattrs in this bucket have the same hash 4646 * value, the new bucket will be initialized as an empty one and the 4647 * first_hash will be initialized as (hash_value+1). 4648 */ 4649 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4650 handle_t *handle, 4651 u64 blk, 4652 u64 new_blk, 4653 u32 *first_hash, 4654 int new_bucket_head) 4655 { 4656 int ret, i; 4657 int count, start, len, name_value_len = 0, name_offset = 0; 4658 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4659 struct ocfs2_xattr_header *xh; 4660 struct ocfs2_xattr_entry *xe; 4661 int blocksize = inode->i_sb->s_blocksize; 4662 4663 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4664 (unsigned long long)new_blk); 4665 4666 s_bucket = ocfs2_xattr_bucket_new(inode); 4667 t_bucket = ocfs2_xattr_bucket_new(inode); 4668 if (!s_bucket || !t_bucket) { 4669 ret = -ENOMEM; 4670 mlog_errno(ret); 4671 goto out; 4672 } 4673 4674 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4675 if (ret) { 4676 mlog_errno(ret); 4677 goto out; 4678 } 4679 4680 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4681 OCFS2_JOURNAL_ACCESS_WRITE); 4682 if (ret) { 4683 mlog_errno(ret); 4684 goto out; 4685 } 4686 4687 /* 4688 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4689 * there's no need to read it. 4690 */ 4691 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4692 if (ret) { 4693 mlog_errno(ret); 4694 goto out; 4695 } 4696 4697 /* 4698 * Hey, if we're overwriting t_bucket, what difference does 4699 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4700 * same part of ocfs2_cp_xattr_bucket(). 4701 */ 4702 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4703 new_bucket_head ? 4704 OCFS2_JOURNAL_ACCESS_CREATE : 4705 OCFS2_JOURNAL_ACCESS_WRITE); 4706 if (ret) { 4707 mlog_errno(ret); 4708 goto out; 4709 } 4710 4711 xh = bucket_xh(s_bucket); 4712 count = le16_to_cpu(xh->xh_count); 4713 start = ocfs2_xattr_find_divide_pos(xh); 4714 4715 if (start == count) { 4716 xe = &xh->xh_entries[start-1]; 4717 4718 /* 4719 * initialized a new empty bucket here. 4720 * The hash value is set as one larger than 4721 * that of the last entry in the previous bucket. 4722 */ 4723 for (i = 0; i < t_bucket->bu_blocks; i++) 4724 memset(bucket_block(t_bucket, i), 0, blocksize); 4725 4726 xh = bucket_xh(t_bucket); 4727 xh->xh_free_start = cpu_to_le16(blocksize); 4728 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4729 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4730 4731 goto set_num_buckets; 4732 } 4733 4734 /* copy the whole bucket to the new first. */ 4735 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4736 4737 /* update the new bucket. */ 4738 xh = bucket_xh(t_bucket); 4739 4740 /* 4741 * Calculate the total name/value len and xh_free_start for 4742 * the old bucket first. 4743 */ 4744 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4745 name_value_len = 0; 4746 for (i = 0; i < start; i++) { 4747 xe = &xh->xh_entries[i]; 4748 name_value_len += namevalue_size_xe(xe); 4749 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4750 name_offset = le16_to_cpu(xe->xe_name_offset); 4751 } 4752 4753 /* 4754 * Now begin the modification to the new bucket. 4755 * 4756 * In the new bucket, We just move the xattr entry to the beginning 4757 * and don't touch the name/value. So there will be some holes in the 4758 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4759 * called. 4760 */ 4761 xe = &xh->xh_entries[start]; 4762 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4763 trace_ocfs2_divide_xattr_bucket_move(len, 4764 (int)((char *)xe - (char *)xh), 4765 (int)((char *)xh->xh_entries - (char *)xh)); 4766 memmove((char *)xh->xh_entries, (char *)xe, len); 4767 xe = &xh->xh_entries[count - start]; 4768 len = sizeof(struct ocfs2_xattr_entry) * start; 4769 memset((char *)xe, 0, len); 4770 4771 le16_add_cpu(&xh->xh_count, -start); 4772 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4773 4774 /* Calculate xh_free_start for the new bucket. */ 4775 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4776 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4777 xe = &xh->xh_entries[i]; 4778 if (le16_to_cpu(xe->xe_name_offset) < 4779 le16_to_cpu(xh->xh_free_start)) 4780 xh->xh_free_start = xe->xe_name_offset; 4781 } 4782 4783 set_num_buckets: 4784 /* set xh->xh_num_buckets for the new xh. */ 4785 if (new_bucket_head) 4786 xh->xh_num_buckets = cpu_to_le16(1); 4787 else 4788 xh->xh_num_buckets = 0; 4789 4790 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4791 4792 /* store the first_hash of the new bucket. */ 4793 if (first_hash) 4794 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4795 4796 /* 4797 * Now only update the 1st block of the old bucket. If we 4798 * just added a new empty bucket, there is no need to modify 4799 * it. 4800 */ 4801 if (start == count) 4802 goto out; 4803 4804 xh = bucket_xh(s_bucket); 4805 memset(&xh->xh_entries[start], 0, 4806 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4807 xh->xh_count = cpu_to_le16(start); 4808 xh->xh_free_start = cpu_to_le16(name_offset); 4809 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4810 4811 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4812 4813 out: 4814 ocfs2_xattr_bucket_free(s_bucket); 4815 ocfs2_xattr_bucket_free(t_bucket); 4816 4817 return ret; 4818 } 4819 4820 /* 4821 * Copy xattr from one bucket to another bucket. 4822 * 4823 * The caller must make sure that the journal transaction 4824 * has enough space for journaling. 4825 */ 4826 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4827 handle_t *handle, 4828 u64 s_blkno, 4829 u64 t_blkno, 4830 int t_is_new) 4831 { 4832 int ret; 4833 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4834 4835 BUG_ON(s_blkno == t_blkno); 4836 4837 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4838 (unsigned long long)t_blkno, 4839 t_is_new); 4840 4841 s_bucket = ocfs2_xattr_bucket_new(inode); 4842 t_bucket = ocfs2_xattr_bucket_new(inode); 4843 if (!s_bucket || !t_bucket) { 4844 ret = -ENOMEM; 4845 mlog_errno(ret); 4846 goto out; 4847 } 4848 4849 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4850 if (ret) 4851 goto out; 4852 4853 /* 4854 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4855 * there's no need to read it. 4856 */ 4857 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4858 if (ret) 4859 goto out; 4860 4861 /* 4862 * Hey, if we're overwriting t_bucket, what difference does 4863 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4864 * cluster to fill, we came here from 4865 * ocfs2_mv_xattr_buckets(), and it is really new - 4866 * ACCESS_CREATE is required. But we also might have moved data 4867 * out of t_bucket before extending back into it. 4868 * ocfs2_add_new_xattr_bucket() can do this - its call to 4869 * ocfs2_add_new_xattr_cluster() may have created a new extent 4870 * and copied out the end of the old extent. Then it re-extends 4871 * the old extent back to create space for new xattrs. That's 4872 * how we get here, and the bucket isn't really new. 4873 */ 4874 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4875 t_is_new ? 4876 OCFS2_JOURNAL_ACCESS_CREATE : 4877 OCFS2_JOURNAL_ACCESS_WRITE); 4878 if (ret) 4879 goto out; 4880 4881 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4882 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4883 4884 out: 4885 ocfs2_xattr_bucket_free(t_bucket); 4886 ocfs2_xattr_bucket_free(s_bucket); 4887 4888 return ret; 4889 } 4890 4891 /* 4892 * src_blk points to the start of an existing extent. last_blk points to 4893 * last cluster in that extent. to_blk points to a newly allocated 4894 * extent. We copy the buckets from the cluster at last_blk to the new 4895 * extent. If start_bucket is non-zero, we skip that many buckets before 4896 * we start copying. The new extent's xh_num_buckets gets set to the 4897 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4898 * by the same amount. 4899 */ 4900 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4901 u64 src_blk, u64 last_blk, u64 to_blk, 4902 unsigned int start_bucket, 4903 u32 *first_hash) 4904 { 4905 int i, ret, credits; 4906 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4907 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4908 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4909 struct ocfs2_xattr_bucket *old_first, *new_first; 4910 4911 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4912 (unsigned long long)to_blk); 4913 4914 BUG_ON(start_bucket >= num_buckets); 4915 if (start_bucket) { 4916 num_buckets -= start_bucket; 4917 last_blk += (start_bucket * blks_per_bucket); 4918 } 4919 4920 /* The first bucket of the original extent */ 4921 old_first = ocfs2_xattr_bucket_new(inode); 4922 /* The first bucket of the new extent */ 4923 new_first = ocfs2_xattr_bucket_new(inode); 4924 if (!old_first || !new_first) { 4925 ret = -ENOMEM; 4926 mlog_errno(ret); 4927 goto out; 4928 } 4929 4930 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4931 if (ret) { 4932 mlog_errno(ret); 4933 goto out; 4934 } 4935 4936 /* 4937 * We need to update the first bucket of the old extent and all 4938 * the buckets going to the new extent. 4939 */ 4940 credits = ((num_buckets + 1) * blks_per_bucket); 4941 ret = ocfs2_extend_trans(handle, credits); 4942 if (ret) { 4943 mlog_errno(ret); 4944 goto out; 4945 } 4946 4947 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4948 OCFS2_JOURNAL_ACCESS_WRITE); 4949 if (ret) { 4950 mlog_errno(ret); 4951 goto out; 4952 } 4953 4954 for (i = 0; i < num_buckets; i++) { 4955 ret = ocfs2_cp_xattr_bucket(inode, handle, 4956 last_blk + (i * blks_per_bucket), 4957 to_blk + (i * blks_per_bucket), 4958 1); 4959 if (ret) { 4960 mlog_errno(ret); 4961 goto out; 4962 } 4963 } 4964 4965 /* 4966 * Get the new bucket ready before we dirty anything 4967 * (This actually shouldn't fail, because we already dirtied 4968 * it once in ocfs2_cp_xattr_bucket()). 4969 */ 4970 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4971 if (ret) { 4972 mlog_errno(ret); 4973 goto out; 4974 } 4975 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4976 OCFS2_JOURNAL_ACCESS_WRITE); 4977 if (ret) { 4978 mlog_errno(ret); 4979 goto out; 4980 } 4981 4982 /* Now update the headers */ 4983 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4984 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4985 4986 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4987 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4988 4989 if (first_hash) 4990 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4991 4992 out: 4993 ocfs2_xattr_bucket_free(new_first); 4994 ocfs2_xattr_bucket_free(old_first); 4995 return ret; 4996 } 4997 4998 /* 4999 * Move some xattrs in this cluster to the new cluster. 5000 * This function should only be called when bucket size == cluster size. 5001 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 5002 */ 5003 static int ocfs2_divide_xattr_cluster(struct inode *inode, 5004 handle_t *handle, 5005 u64 prev_blk, 5006 u64 new_blk, 5007 u32 *first_hash) 5008 { 5009 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5010 int ret, credits = 2 * blk_per_bucket; 5011 5012 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 5013 5014 ret = ocfs2_extend_trans(handle, credits); 5015 if (ret) { 5016 mlog_errno(ret); 5017 return ret; 5018 } 5019 5020 /* Move half of the xattr in start_blk to the next bucket. */ 5021 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 5022 new_blk, first_hash, 1); 5023 } 5024 5025 /* 5026 * Move some xattrs from the old cluster to the new one since they are not 5027 * contiguous in ocfs2 xattr tree. 5028 * 5029 * new_blk starts a new separate cluster, and we will move some xattrs from 5030 * prev_blk to it. v_start will be set as the first name hash value in this 5031 * new cluster so that it can be used as e_cpos during tree insertion and 5032 * don't collide with our original b-tree operations. first_bh and header_bh 5033 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5034 * to extend the insert bucket. 5035 * 5036 * The problem is how much xattr should we move to the new one and when should 5037 * we update first_bh and header_bh? 5038 * 1. If cluster size > bucket size, that means the previous cluster has more 5039 * than 1 bucket, so just move half nums of bucket into the new cluster and 5040 * update the first_bh and header_bh if the insert bucket has been moved 5041 * to the new cluster. 5042 * 2. If cluster_size == bucket_size: 5043 * a) If the previous extent rec has more than one cluster and the insert 5044 * place isn't in the last cluster, copy the entire last cluster to the 5045 * new one. This time, we don't need to update the first_bh and header_bh 5046 * since they will not be moved into the new cluster. 5047 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5048 * the new one. And we set the extend flag to zero if the insert place is 5049 * moved into the new allocated cluster since no extend is needed. 5050 */ 5051 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5052 handle_t *handle, 5053 struct ocfs2_xattr_bucket *first, 5054 struct ocfs2_xattr_bucket *target, 5055 u64 new_blk, 5056 u32 prev_clusters, 5057 u32 *v_start, 5058 int *extend) 5059 { 5060 int ret; 5061 5062 trace_ocfs2_adjust_xattr_cross_cluster( 5063 (unsigned long long)bucket_blkno(first), 5064 (unsigned long long)new_blk, prev_clusters); 5065 5066 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5067 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5068 handle, 5069 first, target, 5070 new_blk, 5071 prev_clusters, 5072 v_start); 5073 if (ret) 5074 mlog_errno(ret); 5075 } else { 5076 /* The start of the last cluster in the first extent */ 5077 u64 last_blk = bucket_blkno(first) + 5078 ((prev_clusters - 1) * 5079 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5080 5081 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5082 ret = ocfs2_mv_xattr_buckets(inode, handle, 5083 bucket_blkno(first), 5084 last_blk, new_blk, 0, 5085 v_start); 5086 if (ret) 5087 mlog_errno(ret); 5088 } else { 5089 ret = ocfs2_divide_xattr_cluster(inode, handle, 5090 last_blk, new_blk, 5091 v_start); 5092 if (ret) 5093 mlog_errno(ret); 5094 5095 if ((bucket_blkno(target) == last_blk) && extend) 5096 *extend = 0; 5097 } 5098 } 5099 5100 return ret; 5101 } 5102 5103 /* 5104 * Add a new cluster for xattr storage. 5105 * 5106 * If the new cluster is contiguous with the previous one, it will be 5107 * appended to the same extent record, and num_clusters will be updated. 5108 * If not, we will insert a new extent for it and move some xattrs in 5109 * the last cluster into the new allocated one. 5110 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5111 * lose the benefits of hashing because we'll have to search large leaves. 5112 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5113 * if it's bigger). 5114 * 5115 * first_bh is the first block of the previous extent rec and header_bh 5116 * indicates the bucket we will insert the new xattrs. They will be updated 5117 * when the header_bh is moved into the new cluster. 5118 */ 5119 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5120 struct buffer_head *root_bh, 5121 struct ocfs2_xattr_bucket *first, 5122 struct ocfs2_xattr_bucket *target, 5123 u32 *num_clusters, 5124 u32 prev_cpos, 5125 int *extend, 5126 struct ocfs2_xattr_set_ctxt *ctxt) 5127 { 5128 int ret; 5129 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5130 u32 prev_clusters = *num_clusters; 5131 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5132 u64 block; 5133 handle_t *handle = ctxt->handle; 5134 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5135 struct ocfs2_extent_tree et; 5136 5137 trace_ocfs2_add_new_xattr_cluster_begin( 5138 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5139 (unsigned long long)bucket_blkno(first), 5140 prev_cpos, prev_clusters); 5141 5142 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5143 5144 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5145 OCFS2_JOURNAL_ACCESS_WRITE); 5146 if (ret < 0) { 5147 mlog_errno(ret); 5148 goto leave; 5149 } 5150 5151 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5152 clusters_to_add, &bit_off, &num_bits); 5153 if (ret < 0) { 5154 if (ret != -ENOSPC) 5155 mlog_errno(ret); 5156 goto leave; 5157 } 5158 5159 BUG_ON(num_bits > clusters_to_add); 5160 5161 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5162 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5163 5164 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5165 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5166 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5167 /* 5168 * If this cluster is contiguous with the old one and 5169 * adding this new cluster, we don't surpass the limit of 5170 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5171 * initialized and used like other buckets in the previous 5172 * cluster. 5173 * So add it as a contiguous one. The caller will handle 5174 * its init process. 5175 */ 5176 v_start = prev_cpos + prev_clusters; 5177 *num_clusters = prev_clusters + num_bits; 5178 } else { 5179 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5180 handle, 5181 first, 5182 target, 5183 block, 5184 prev_clusters, 5185 &v_start, 5186 extend); 5187 if (ret) { 5188 mlog_errno(ret); 5189 goto leave; 5190 } 5191 } 5192 5193 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5194 v_start, num_bits); 5195 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5196 num_bits, 0, ctxt->meta_ac); 5197 if (ret < 0) { 5198 mlog_errno(ret); 5199 goto leave; 5200 } 5201 5202 ocfs2_journal_dirty(handle, root_bh); 5203 5204 leave: 5205 return ret; 5206 } 5207 5208 /* 5209 * We are given an extent. 'first' is the bucket at the very front of 5210 * the extent. The extent has space for an additional bucket past 5211 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5212 * of the target bucket. We wish to shift every bucket past the target 5213 * down one, filling in that additional space. When we get back to the 5214 * target, we split the target between itself and the now-empty bucket 5215 * at target+1 (aka, target_blkno + blks_per_bucket). 5216 */ 5217 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5218 handle_t *handle, 5219 struct ocfs2_xattr_bucket *first, 5220 u64 target_blk, 5221 u32 num_clusters) 5222 { 5223 int ret, credits; 5224 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5225 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5226 u64 end_blk; 5227 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5228 5229 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5230 (unsigned long long)bucket_blkno(first), 5231 num_clusters, new_bucket); 5232 5233 /* The extent must have room for an additional bucket */ 5234 BUG_ON(new_bucket >= 5235 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5236 5237 /* end_blk points to the last existing bucket */ 5238 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5239 5240 /* 5241 * end_blk is the start of the last existing bucket. 5242 * Thus, (end_blk - target_blk) covers the target bucket and 5243 * every bucket after it up to, but not including, the last 5244 * existing bucket. Then we add the last existing bucket, the 5245 * new bucket, and the first bucket (3 * blk_per_bucket). 5246 */ 5247 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5248 ret = ocfs2_extend_trans(handle, credits); 5249 if (ret) { 5250 mlog_errno(ret); 5251 goto out; 5252 } 5253 5254 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5255 OCFS2_JOURNAL_ACCESS_WRITE); 5256 if (ret) { 5257 mlog_errno(ret); 5258 goto out; 5259 } 5260 5261 while (end_blk != target_blk) { 5262 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5263 end_blk + blk_per_bucket, 0); 5264 if (ret) 5265 goto out; 5266 end_blk -= blk_per_bucket; 5267 } 5268 5269 /* Move half of the xattr in target_blkno to the next bucket. */ 5270 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5271 target_blk + blk_per_bucket, NULL, 0); 5272 5273 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5274 ocfs2_xattr_bucket_journal_dirty(handle, first); 5275 5276 out: 5277 return ret; 5278 } 5279 5280 /* 5281 * Add new xattr bucket in an extent record and adjust the buckets 5282 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5283 * bucket we want to insert into. 5284 * 5285 * In the easy case, we will move all the buckets after target down by 5286 * one. Half of target's xattrs will be moved to the next bucket. 5287 * 5288 * If current cluster is full, we'll allocate a new one. This may not 5289 * be contiguous. The underlying calls will make sure that there is 5290 * space for the insert, shifting buckets around if necessary. 5291 * 'target' may be moved by those calls. 5292 */ 5293 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5294 struct buffer_head *xb_bh, 5295 struct ocfs2_xattr_bucket *target, 5296 struct ocfs2_xattr_set_ctxt *ctxt) 5297 { 5298 struct ocfs2_xattr_block *xb = 5299 (struct ocfs2_xattr_block *)xb_bh->b_data; 5300 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5301 struct ocfs2_extent_list *el = &xb_root->xt_list; 5302 u32 name_hash = 5303 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5304 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5305 int ret, num_buckets, extend = 1; 5306 u64 p_blkno; 5307 u32 e_cpos, num_clusters; 5308 /* The bucket at the front of the extent */ 5309 struct ocfs2_xattr_bucket *first; 5310 5311 trace_ocfs2_add_new_xattr_bucket( 5312 (unsigned long long)bucket_blkno(target)); 5313 5314 /* The first bucket of the original extent */ 5315 first = ocfs2_xattr_bucket_new(inode); 5316 if (!first) { 5317 ret = -ENOMEM; 5318 mlog_errno(ret); 5319 goto out; 5320 } 5321 5322 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5323 &num_clusters, el); 5324 if (ret) { 5325 mlog_errno(ret); 5326 goto out; 5327 } 5328 5329 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5330 if (ret) { 5331 mlog_errno(ret); 5332 goto out; 5333 } 5334 5335 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5336 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5337 /* 5338 * This can move first+target if the target bucket moves 5339 * to the new extent. 5340 */ 5341 ret = ocfs2_add_new_xattr_cluster(inode, 5342 xb_bh, 5343 first, 5344 target, 5345 &num_clusters, 5346 e_cpos, 5347 &extend, 5348 ctxt); 5349 if (ret) { 5350 mlog_errno(ret); 5351 goto out; 5352 } 5353 } 5354 5355 if (extend) { 5356 ret = ocfs2_extend_xattr_bucket(inode, 5357 ctxt->handle, 5358 first, 5359 bucket_blkno(target), 5360 num_clusters); 5361 if (ret) 5362 mlog_errno(ret); 5363 } 5364 5365 out: 5366 ocfs2_xattr_bucket_free(first); 5367 5368 return ret; 5369 } 5370 5371 /* 5372 * Truncate the specified xe_off entry in xattr bucket. 5373 * bucket is indicated by header_bh and len is the new length. 5374 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5375 * 5376 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5377 */ 5378 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5379 struct ocfs2_xattr_bucket *bucket, 5380 int xe_off, 5381 int len, 5382 struct ocfs2_xattr_set_ctxt *ctxt) 5383 { 5384 int ret, offset; 5385 u64 value_blk; 5386 struct ocfs2_xattr_entry *xe; 5387 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5388 size_t blocksize = inode->i_sb->s_blocksize; 5389 struct ocfs2_xattr_value_buf vb = { 5390 .vb_access = ocfs2_journal_access, 5391 }; 5392 5393 xe = &xh->xh_entries[xe_off]; 5394 5395 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5396 5397 offset = le16_to_cpu(xe->xe_name_offset) + 5398 OCFS2_XATTR_SIZE(xe->xe_name_len); 5399 5400 value_blk = offset / blocksize; 5401 5402 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5403 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5404 5405 vb.vb_bh = bucket->bu_bhs[value_blk]; 5406 BUG_ON(!vb.vb_bh); 5407 5408 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5409 (vb.vb_bh->b_data + offset % blocksize); 5410 5411 /* 5412 * From here on out we have to dirty the bucket. The generic 5413 * value calls only modify one of the bucket's bhs, but we need 5414 * to send the bucket at once. So if they error, they *could* have 5415 * modified something. We have to assume they did, and dirty 5416 * the whole bucket. This leaves us in a consistent state. 5417 */ 5418 trace_ocfs2_xattr_bucket_value_truncate( 5419 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5420 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5421 if (ret) { 5422 mlog_errno(ret); 5423 goto out; 5424 } 5425 5426 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5427 OCFS2_JOURNAL_ACCESS_WRITE); 5428 if (ret) { 5429 mlog_errno(ret); 5430 goto out; 5431 } 5432 5433 xe->xe_value_size = cpu_to_le64(len); 5434 5435 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5436 5437 out: 5438 return ret; 5439 } 5440 5441 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5442 struct buffer_head *root_bh, 5443 u64 blkno, 5444 u32 cpos, 5445 u32 len, 5446 void *para) 5447 { 5448 int ret; 5449 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5450 struct inode *tl_inode = osb->osb_tl_inode; 5451 handle_t *handle; 5452 struct ocfs2_xattr_block *xb = 5453 (struct ocfs2_xattr_block *)root_bh->b_data; 5454 struct ocfs2_alloc_context *meta_ac = NULL; 5455 struct ocfs2_cached_dealloc_ctxt dealloc; 5456 struct ocfs2_extent_tree et; 5457 5458 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5459 ocfs2_delete_xattr_in_bucket, para); 5460 if (ret) { 5461 mlog_errno(ret); 5462 return ret; 5463 } 5464 5465 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5466 5467 ocfs2_init_dealloc_ctxt(&dealloc); 5468 5469 trace_ocfs2_rm_xattr_cluster( 5470 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5471 (unsigned long long)blkno, cpos, len); 5472 5473 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5474 len); 5475 5476 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5477 if (ret) { 5478 mlog_errno(ret); 5479 return ret; 5480 } 5481 5482 inode_lock(tl_inode); 5483 5484 if (ocfs2_truncate_log_needs_flush(osb)) { 5485 ret = __ocfs2_flush_truncate_log(osb); 5486 if (ret < 0) { 5487 mlog_errno(ret); 5488 goto out; 5489 } 5490 } 5491 5492 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5493 if (IS_ERR(handle)) { 5494 ret = -ENOMEM; 5495 mlog_errno(ret); 5496 goto out; 5497 } 5498 5499 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5500 OCFS2_JOURNAL_ACCESS_WRITE); 5501 if (ret) { 5502 mlog_errno(ret); 5503 goto out_commit; 5504 } 5505 5506 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5507 &dealloc); 5508 if (ret) { 5509 mlog_errno(ret); 5510 goto out_commit; 5511 } 5512 5513 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5514 ocfs2_journal_dirty(handle, root_bh); 5515 5516 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5517 if (ret) 5518 mlog_errno(ret); 5519 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5520 5521 out_commit: 5522 ocfs2_commit_trans(osb, handle); 5523 out: 5524 ocfs2_schedule_truncate_log_flush(osb, 1); 5525 5526 inode_unlock(tl_inode); 5527 5528 if (meta_ac) 5529 ocfs2_free_alloc_context(meta_ac); 5530 5531 ocfs2_run_deallocs(osb, &dealloc); 5532 5533 return ret; 5534 } 5535 5536 /* 5537 * check whether the xattr bucket is filled up with the same hash value. 5538 * If we want to insert the xattr with the same hash, return -ENOSPC. 5539 * If we want to insert a xattr with different hash value, go ahead 5540 * and ocfs2_divide_xattr_bucket will handle this. 5541 */ 5542 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5543 struct ocfs2_xattr_bucket *bucket, 5544 const char *name) 5545 { 5546 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5547 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5548 5549 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5550 return 0; 5551 5552 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5553 xh->xh_entries[0].xe_name_hash) { 5554 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5555 "hash = %u\n", 5556 (unsigned long long)bucket_blkno(bucket), 5557 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5558 return -ENOSPC; 5559 } 5560 5561 return 0; 5562 } 5563 5564 /* 5565 * Try to set the entry in the current bucket. If we fail, the caller 5566 * will handle getting us another bucket. 5567 */ 5568 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5569 struct ocfs2_xattr_info *xi, 5570 struct ocfs2_xattr_search *xs, 5571 struct ocfs2_xattr_set_ctxt *ctxt) 5572 { 5573 int ret; 5574 struct ocfs2_xa_loc loc; 5575 5576 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5577 5578 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5579 xs->not_found ? NULL : xs->here); 5580 ret = ocfs2_xa_set(&loc, xi, ctxt); 5581 if (!ret) { 5582 xs->here = loc.xl_entry; 5583 goto out; 5584 } 5585 if (ret != -ENOSPC) { 5586 mlog_errno(ret); 5587 goto out; 5588 } 5589 5590 /* Ok, we need space. Let's try defragmenting the bucket. */ 5591 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5592 xs->bucket); 5593 if (ret) { 5594 mlog_errno(ret); 5595 goto out; 5596 } 5597 5598 ret = ocfs2_xa_set(&loc, xi, ctxt); 5599 if (!ret) { 5600 xs->here = loc.xl_entry; 5601 goto out; 5602 } 5603 if (ret != -ENOSPC) 5604 mlog_errno(ret); 5605 5606 5607 out: 5608 return ret; 5609 } 5610 5611 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5612 struct ocfs2_xattr_info *xi, 5613 struct ocfs2_xattr_search *xs, 5614 struct ocfs2_xattr_set_ctxt *ctxt) 5615 { 5616 int ret; 5617 5618 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5619 5620 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5621 if (!ret) 5622 goto out; 5623 if (ret != -ENOSPC) { 5624 mlog_errno(ret); 5625 goto out; 5626 } 5627 5628 /* Ack, need more space. Let's try to get another bucket! */ 5629 5630 /* 5631 * We do not allow for overlapping ranges between buckets. And 5632 * the maximum number of collisions we will allow for then is 5633 * one bucket's worth, so check it here whether we need to 5634 * add a new bucket for the insert. 5635 */ 5636 ret = ocfs2_check_xattr_bucket_collision(inode, 5637 xs->bucket, 5638 xi->xi_name); 5639 if (ret) { 5640 mlog_errno(ret); 5641 goto out; 5642 } 5643 5644 ret = ocfs2_add_new_xattr_bucket(inode, 5645 xs->xattr_bh, 5646 xs->bucket, 5647 ctxt); 5648 if (ret) { 5649 mlog_errno(ret); 5650 goto out; 5651 } 5652 5653 /* 5654 * ocfs2_add_new_xattr_bucket() will have updated 5655 * xs->bucket if it moved, but it will not have updated 5656 * any of the other search fields. Thus, we drop it and 5657 * re-search. Everything should be cached, so it'll be 5658 * quick. 5659 */ 5660 ocfs2_xattr_bucket_relse(xs->bucket); 5661 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5662 xi->xi_name_index, 5663 xi->xi_name, xs); 5664 if (ret && ret != -ENODATA) 5665 goto out; 5666 xs->not_found = ret; 5667 5668 /* Ok, we have a new bucket, let's try again */ 5669 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5670 if (ret && (ret != -ENOSPC)) 5671 mlog_errno(ret); 5672 5673 out: 5674 return ret; 5675 } 5676 5677 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5678 struct ocfs2_xattr_bucket *bucket, 5679 void *para) 5680 { 5681 int ret = 0, ref_credits; 5682 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5683 u16 i; 5684 struct ocfs2_xattr_entry *xe; 5685 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5686 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5687 int credits = ocfs2_remove_extent_credits(osb->sb) + 5688 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5689 struct ocfs2_xattr_value_root *xv; 5690 struct ocfs2_rm_xattr_bucket_para *args = 5691 (struct ocfs2_rm_xattr_bucket_para *)para; 5692 5693 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5694 5695 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5696 xe = &xh->xh_entries[i]; 5697 if (ocfs2_xattr_is_local(xe)) 5698 continue; 5699 5700 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5701 i, &xv, NULL); 5702 if (ret) { 5703 mlog_errno(ret); 5704 break; 5705 } 5706 5707 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5708 args->ref_ci, 5709 args->ref_root_bh, 5710 &ctxt.meta_ac, 5711 &ref_credits); 5712 5713 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5714 if (IS_ERR(ctxt.handle)) { 5715 ret = PTR_ERR(ctxt.handle); 5716 mlog_errno(ret); 5717 break; 5718 } 5719 5720 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5721 i, 0, &ctxt); 5722 5723 ocfs2_commit_trans(osb, ctxt.handle); 5724 if (ctxt.meta_ac) { 5725 ocfs2_free_alloc_context(ctxt.meta_ac); 5726 ctxt.meta_ac = NULL; 5727 } 5728 if (ret) { 5729 mlog_errno(ret); 5730 break; 5731 } 5732 } 5733 5734 if (ctxt.meta_ac) 5735 ocfs2_free_alloc_context(ctxt.meta_ac); 5736 ocfs2_schedule_truncate_log_flush(osb, 1); 5737 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5738 return ret; 5739 } 5740 5741 /* 5742 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5743 * or change the extent record flag), we need to recalculate 5744 * the metaecc for the whole bucket. So it is done here. 5745 * 5746 * Note: 5747 * We have to give the extra credits for the caller. 5748 */ 5749 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5750 handle_t *handle, 5751 void *para) 5752 { 5753 int ret; 5754 struct ocfs2_xattr_bucket *bucket = 5755 (struct ocfs2_xattr_bucket *)para; 5756 5757 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5758 OCFS2_JOURNAL_ACCESS_WRITE); 5759 if (ret) { 5760 mlog_errno(ret); 5761 return ret; 5762 } 5763 5764 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5765 5766 return 0; 5767 } 5768 5769 /* 5770 * Special action we need if the xattr value is refcounted. 5771 * 5772 * 1. If the xattr is refcounted, lock the tree. 5773 * 2. CoW the xattr if we are setting the new value and the value 5774 * will be stored outside. 5775 * 3. In other case, decrease_refcount will work for us, so just 5776 * lock the refcount tree, calculate the meta and credits is OK. 5777 * 5778 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5779 * currently CoW is a completed transaction, while this function 5780 * will also lock the allocators and let us deadlock. So we will 5781 * CoW the whole xattr value. 5782 */ 5783 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5784 struct ocfs2_dinode *di, 5785 struct ocfs2_xattr_info *xi, 5786 struct ocfs2_xattr_search *xis, 5787 struct ocfs2_xattr_search *xbs, 5788 struct ocfs2_refcount_tree **ref_tree, 5789 int *meta_add, 5790 int *credits) 5791 { 5792 int ret = 0; 5793 struct ocfs2_xattr_block *xb; 5794 struct ocfs2_xattr_entry *xe; 5795 char *base; 5796 u32 p_cluster, num_clusters; 5797 unsigned int ext_flags; 5798 int name_offset, name_len; 5799 struct ocfs2_xattr_value_buf vb; 5800 struct ocfs2_xattr_bucket *bucket = NULL; 5801 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5802 struct ocfs2_post_refcount refcount; 5803 struct ocfs2_post_refcount *p = NULL; 5804 struct buffer_head *ref_root_bh = NULL; 5805 5806 if (!xis->not_found) { 5807 xe = xis->here; 5808 name_offset = le16_to_cpu(xe->xe_name_offset); 5809 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5810 base = xis->base; 5811 vb.vb_bh = xis->inode_bh; 5812 vb.vb_access = ocfs2_journal_access_di; 5813 } else { 5814 int i, block_off = 0; 5815 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5816 xe = xbs->here; 5817 name_offset = le16_to_cpu(xe->xe_name_offset); 5818 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5819 i = xbs->here - xbs->header->xh_entries; 5820 5821 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5822 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5823 bucket_xh(xbs->bucket), 5824 i, &block_off, 5825 &name_offset); 5826 if (ret) { 5827 mlog_errno(ret); 5828 goto out; 5829 } 5830 base = bucket_block(xbs->bucket, block_off); 5831 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5832 vb.vb_access = ocfs2_journal_access; 5833 5834 if (ocfs2_meta_ecc(osb)) { 5835 /*create parameters for ocfs2_post_refcount. */ 5836 bucket = xbs->bucket; 5837 refcount.credits = bucket->bu_blocks; 5838 refcount.para = bucket; 5839 refcount.func = 5840 ocfs2_xattr_bucket_post_refcount; 5841 p = &refcount; 5842 } 5843 } else { 5844 base = xbs->base; 5845 vb.vb_bh = xbs->xattr_bh; 5846 vb.vb_access = ocfs2_journal_access_xb; 5847 } 5848 } 5849 5850 if (ocfs2_xattr_is_local(xe)) 5851 goto out; 5852 5853 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5854 (base + name_offset + name_len); 5855 5856 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5857 &num_clusters, &vb.vb_xv->xr_list, 5858 &ext_flags); 5859 if (ret) { 5860 mlog_errno(ret); 5861 goto out; 5862 } 5863 5864 /* 5865 * We just need to check the 1st extent record, since we always 5866 * CoW the whole xattr. So there shouldn't be a xattr with 5867 * some REFCOUNT extent recs after the 1st one. 5868 */ 5869 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5870 goto out; 5871 5872 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5873 1, ref_tree, &ref_root_bh); 5874 if (ret) { 5875 mlog_errno(ret); 5876 goto out; 5877 } 5878 5879 /* 5880 * If we are deleting the xattr or the new size will be stored inside, 5881 * cool, leave it there, the xattr truncate process will remove them 5882 * for us(it still needs the refcount tree lock and the meta, credits). 5883 * And the worse case is that every cluster truncate will split the 5884 * refcount tree, and make the original extent become 3. So we will need 5885 * 2 * cluster more extent recs at most. 5886 */ 5887 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5888 5889 ret = ocfs2_refcounted_xattr_delete_need(inode, 5890 &(*ref_tree)->rf_ci, 5891 ref_root_bh, vb.vb_xv, 5892 meta_add, credits); 5893 if (ret) 5894 mlog_errno(ret); 5895 goto out; 5896 } 5897 5898 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5899 *ref_tree, ref_root_bh, 0, 5900 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5901 if (ret) 5902 mlog_errno(ret); 5903 5904 out: 5905 brelse(ref_root_bh); 5906 return ret; 5907 } 5908 5909 /* 5910 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5911 * The physical clusters will be added to refcount tree. 5912 */ 5913 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5914 struct ocfs2_xattr_value_root *xv, 5915 struct ocfs2_extent_tree *value_et, 5916 struct ocfs2_caching_info *ref_ci, 5917 struct buffer_head *ref_root_bh, 5918 struct ocfs2_cached_dealloc_ctxt *dealloc, 5919 struct ocfs2_post_refcount *refcount) 5920 { 5921 int ret = 0; 5922 u32 clusters = le32_to_cpu(xv->xr_clusters); 5923 u32 cpos, p_cluster, num_clusters; 5924 struct ocfs2_extent_list *el = &xv->xr_list; 5925 unsigned int ext_flags; 5926 5927 cpos = 0; 5928 while (cpos < clusters) { 5929 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5930 &num_clusters, el, &ext_flags); 5931 if (ret) { 5932 mlog_errno(ret); 5933 break; 5934 } 5935 5936 cpos += num_clusters; 5937 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5938 continue; 5939 5940 BUG_ON(!p_cluster); 5941 5942 ret = ocfs2_add_refcount_flag(inode, value_et, 5943 ref_ci, ref_root_bh, 5944 cpos - num_clusters, 5945 p_cluster, num_clusters, 5946 dealloc, refcount); 5947 if (ret) { 5948 mlog_errno(ret); 5949 break; 5950 } 5951 } 5952 5953 return ret; 5954 } 5955 5956 /* 5957 * Given a normal ocfs2_xattr_header, refcount all the entries which 5958 * have value stored outside. 5959 * Used for xattrs stored in inode and ocfs2_xattr_block. 5960 */ 5961 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5962 struct ocfs2_xattr_value_buf *vb, 5963 struct ocfs2_xattr_header *header, 5964 struct ocfs2_caching_info *ref_ci, 5965 struct buffer_head *ref_root_bh, 5966 struct ocfs2_cached_dealloc_ctxt *dealloc) 5967 { 5968 5969 struct ocfs2_xattr_entry *xe; 5970 struct ocfs2_xattr_value_root *xv; 5971 struct ocfs2_extent_tree et; 5972 int i, ret = 0; 5973 5974 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5975 xe = &header->xh_entries[i]; 5976 5977 if (ocfs2_xattr_is_local(xe)) 5978 continue; 5979 5980 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5981 le16_to_cpu(xe->xe_name_offset) + 5982 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5983 5984 vb->vb_xv = xv; 5985 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5986 5987 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5988 ref_ci, ref_root_bh, 5989 dealloc, NULL); 5990 if (ret) { 5991 mlog_errno(ret); 5992 break; 5993 } 5994 } 5995 5996 return ret; 5997 } 5998 5999 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 6000 struct buffer_head *fe_bh, 6001 struct ocfs2_caching_info *ref_ci, 6002 struct buffer_head *ref_root_bh, 6003 struct ocfs2_cached_dealloc_ctxt *dealloc) 6004 { 6005 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6006 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 6007 (fe_bh->b_data + inode->i_sb->s_blocksize - 6008 le16_to_cpu(di->i_xattr_inline_size)); 6009 struct ocfs2_xattr_value_buf vb = { 6010 .vb_bh = fe_bh, 6011 .vb_access = ocfs2_journal_access_di, 6012 }; 6013 6014 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6015 ref_ci, ref_root_bh, dealloc); 6016 } 6017 6018 struct ocfs2_xattr_tree_value_refcount_para { 6019 struct ocfs2_caching_info *ref_ci; 6020 struct buffer_head *ref_root_bh; 6021 struct ocfs2_cached_dealloc_ctxt *dealloc; 6022 }; 6023 6024 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 6025 struct ocfs2_xattr_bucket *bucket, 6026 int offset, 6027 struct ocfs2_xattr_value_root **xv, 6028 struct buffer_head **bh) 6029 { 6030 int ret, block_off, name_offset; 6031 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6032 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6033 void *base; 6034 6035 ret = ocfs2_xattr_bucket_get_name_value(sb, 6036 bucket_xh(bucket), 6037 offset, 6038 &block_off, 6039 &name_offset); 6040 if (ret) { 6041 mlog_errno(ret); 6042 goto out; 6043 } 6044 6045 base = bucket_block(bucket, block_off); 6046 6047 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6048 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6049 6050 if (bh) 6051 *bh = bucket->bu_bhs[block_off]; 6052 out: 6053 return ret; 6054 } 6055 6056 /* 6057 * For a given xattr bucket, refcount all the entries which 6058 * have value stored outside. 6059 */ 6060 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6061 struct ocfs2_xattr_bucket *bucket, 6062 void *para) 6063 { 6064 int i, ret = 0; 6065 struct ocfs2_extent_tree et; 6066 struct ocfs2_xattr_tree_value_refcount_para *ref = 6067 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6068 struct ocfs2_xattr_header *xh = 6069 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6070 struct ocfs2_xattr_entry *xe; 6071 struct ocfs2_xattr_value_buf vb = { 6072 .vb_access = ocfs2_journal_access, 6073 }; 6074 struct ocfs2_post_refcount refcount = { 6075 .credits = bucket->bu_blocks, 6076 .para = bucket, 6077 .func = ocfs2_xattr_bucket_post_refcount, 6078 }; 6079 struct ocfs2_post_refcount *p = NULL; 6080 6081 /* We only need post_refcount if we support metaecc. */ 6082 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6083 p = &refcount; 6084 6085 trace_ocfs2_xattr_bucket_value_refcount( 6086 (unsigned long long)bucket_blkno(bucket), 6087 le16_to_cpu(xh->xh_count)); 6088 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6089 xe = &xh->xh_entries[i]; 6090 6091 if (ocfs2_xattr_is_local(xe)) 6092 continue; 6093 6094 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6095 &vb.vb_xv, &vb.vb_bh); 6096 if (ret) { 6097 mlog_errno(ret); 6098 break; 6099 } 6100 6101 ocfs2_init_xattr_value_extent_tree(&et, 6102 INODE_CACHE(inode), &vb); 6103 6104 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6105 &et, ref->ref_ci, 6106 ref->ref_root_bh, 6107 ref->dealloc, p); 6108 if (ret) { 6109 mlog_errno(ret); 6110 break; 6111 } 6112 } 6113 6114 return ret; 6115 6116 } 6117 6118 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6119 struct buffer_head *root_bh, 6120 u64 blkno, u32 cpos, u32 len, void *para) 6121 { 6122 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6123 ocfs2_xattr_bucket_value_refcount, 6124 para); 6125 } 6126 6127 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6128 struct buffer_head *blk_bh, 6129 struct ocfs2_caching_info *ref_ci, 6130 struct buffer_head *ref_root_bh, 6131 struct ocfs2_cached_dealloc_ctxt *dealloc) 6132 { 6133 int ret = 0; 6134 struct ocfs2_xattr_block *xb = 6135 (struct ocfs2_xattr_block *)blk_bh->b_data; 6136 6137 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6138 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6139 struct ocfs2_xattr_value_buf vb = { 6140 .vb_bh = blk_bh, 6141 .vb_access = ocfs2_journal_access_xb, 6142 }; 6143 6144 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6145 ref_ci, ref_root_bh, 6146 dealloc); 6147 } else { 6148 struct ocfs2_xattr_tree_value_refcount_para para = { 6149 .ref_ci = ref_ci, 6150 .ref_root_bh = ref_root_bh, 6151 .dealloc = dealloc, 6152 }; 6153 6154 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6155 ocfs2_refcount_xattr_tree_rec, 6156 ¶); 6157 } 6158 6159 return ret; 6160 } 6161 6162 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6163 struct buffer_head *fe_bh, 6164 struct ocfs2_caching_info *ref_ci, 6165 struct buffer_head *ref_root_bh, 6166 struct ocfs2_cached_dealloc_ctxt *dealloc) 6167 { 6168 int ret = 0; 6169 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6170 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6171 struct buffer_head *blk_bh = NULL; 6172 6173 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6174 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6175 ref_ci, ref_root_bh, 6176 dealloc); 6177 if (ret) { 6178 mlog_errno(ret); 6179 goto out; 6180 } 6181 } 6182 6183 if (!di->i_xattr_loc) 6184 goto out; 6185 6186 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6187 &blk_bh); 6188 if (ret < 0) { 6189 mlog_errno(ret); 6190 goto out; 6191 } 6192 6193 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6194 ref_root_bh, dealloc); 6195 if (ret) 6196 mlog_errno(ret); 6197 6198 brelse(blk_bh); 6199 out: 6200 6201 return ret; 6202 } 6203 6204 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6205 /* 6206 * Store the information we need in xattr reflink. 6207 * old_bh and new_bh are inode bh for the old and new inode. 6208 */ 6209 struct ocfs2_xattr_reflink { 6210 struct inode *old_inode; 6211 struct inode *new_inode; 6212 struct buffer_head *old_bh; 6213 struct buffer_head *new_bh; 6214 struct ocfs2_caching_info *ref_ci; 6215 struct buffer_head *ref_root_bh; 6216 struct ocfs2_cached_dealloc_ctxt *dealloc; 6217 should_xattr_reflinked *xattr_reflinked; 6218 }; 6219 6220 /* 6221 * Given a xattr header and xe offset, 6222 * return the proper xv and the corresponding bh. 6223 * xattr in inode, block and xattr tree have different implementations. 6224 */ 6225 typedef int (get_xattr_value_root)(struct super_block *sb, 6226 struct buffer_head *bh, 6227 struct ocfs2_xattr_header *xh, 6228 int offset, 6229 struct ocfs2_xattr_value_root **xv, 6230 struct buffer_head **ret_bh, 6231 void *para); 6232 6233 /* 6234 * Calculate all the xattr value root metadata stored in this xattr header and 6235 * credits we need if we create them from the scratch. 6236 * We use get_xattr_value_root so that all types of xattr container can use it. 6237 */ 6238 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6239 struct buffer_head *bh, 6240 struct ocfs2_xattr_header *xh, 6241 int *metas, int *credits, 6242 int *num_recs, 6243 get_xattr_value_root *func, 6244 void *para) 6245 { 6246 int i, ret = 0; 6247 struct ocfs2_xattr_value_root *xv; 6248 struct ocfs2_xattr_entry *xe; 6249 6250 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6251 xe = &xh->xh_entries[i]; 6252 if (ocfs2_xattr_is_local(xe)) 6253 continue; 6254 6255 ret = func(sb, bh, xh, i, &xv, NULL, para); 6256 if (ret) { 6257 mlog_errno(ret); 6258 break; 6259 } 6260 6261 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6262 le16_to_cpu(xv->xr_list.l_next_free_rec); 6263 6264 *credits += ocfs2_calc_extend_credits(sb, 6265 &def_xv.xv.xr_list); 6266 6267 /* 6268 * If the value is a tree with depth > 1, We don't go deep 6269 * to the extent block, so just calculate a maximum record num. 6270 */ 6271 if (!xv->xr_list.l_tree_depth) 6272 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6273 else 6274 *num_recs += ocfs2_clusters_for_bytes(sb, 6275 XATTR_SIZE_MAX); 6276 } 6277 6278 return ret; 6279 } 6280 6281 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6282 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6283 struct buffer_head *bh, 6284 struct ocfs2_xattr_header *xh, 6285 int offset, 6286 struct ocfs2_xattr_value_root **xv, 6287 struct buffer_head **ret_bh, 6288 void *para) 6289 { 6290 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6291 6292 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6293 le16_to_cpu(xe->xe_name_offset) + 6294 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6295 6296 if (ret_bh) 6297 *ret_bh = bh; 6298 6299 return 0; 6300 } 6301 6302 /* 6303 * Lock the meta_ac and calculate how much credits we need for reflink xattrs. 6304 * It is only used for inline xattr and xattr block. 6305 */ 6306 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6307 struct ocfs2_xattr_header *xh, 6308 struct buffer_head *ref_root_bh, 6309 int *credits, 6310 struct ocfs2_alloc_context **meta_ac) 6311 { 6312 int ret, meta_add = 0, num_recs = 0; 6313 struct ocfs2_refcount_block *rb = 6314 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6315 6316 *credits = 0; 6317 6318 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6319 &meta_add, credits, &num_recs, 6320 ocfs2_get_xattr_value_root, 6321 NULL); 6322 if (ret) { 6323 mlog_errno(ret); 6324 goto out; 6325 } 6326 6327 /* 6328 * We need to add/modify num_recs in refcount tree, so just calculate 6329 * an approximate number we need for refcount tree change. 6330 * Sometimes we need to split the tree, and after split, half recs 6331 * will be moved to the new block, and a new block can only provide 6332 * half number of recs. So we multiple new blocks by 2. 6333 */ 6334 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6335 meta_add += num_recs; 6336 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6337 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6338 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6339 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6340 else 6341 *credits += 1; 6342 6343 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6344 if (ret) 6345 mlog_errno(ret); 6346 6347 out: 6348 return ret; 6349 } 6350 6351 /* 6352 * Given a xattr header, reflink all the xattrs in this container. 6353 * It can be used for inode, block and bucket. 6354 * 6355 * NOTE: 6356 * Before we call this function, the caller has memcpy the xattr in 6357 * old_xh to the new_xh. 6358 * 6359 * If args.xattr_reflinked is set, call it to decide whether the xe should 6360 * be reflinked or not. If not, remove it from the new xattr header. 6361 */ 6362 static int ocfs2_reflink_xattr_header(handle_t *handle, 6363 struct ocfs2_xattr_reflink *args, 6364 struct buffer_head *old_bh, 6365 struct ocfs2_xattr_header *xh, 6366 struct buffer_head *new_bh, 6367 struct ocfs2_xattr_header *new_xh, 6368 struct ocfs2_xattr_value_buf *vb, 6369 struct ocfs2_alloc_context *meta_ac, 6370 get_xattr_value_root *func, 6371 void *para) 6372 { 6373 int ret = 0, i, j; 6374 struct super_block *sb = args->old_inode->i_sb; 6375 struct buffer_head *value_bh; 6376 struct ocfs2_xattr_entry *xe, *last; 6377 struct ocfs2_xattr_value_root *xv, *new_xv; 6378 struct ocfs2_extent_tree data_et; 6379 u32 clusters, cpos, p_cluster, num_clusters; 6380 unsigned int ext_flags = 0; 6381 6382 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6383 le16_to_cpu(xh->xh_count)); 6384 6385 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1; 6386 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6387 xe = &xh->xh_entries[i]; 6388 6389 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6390 xe = &new_xh->xh_entries[j]; 6391 6392 le16_add_cpu(&new_xh->xh_count, -1); 6393 if (new_xh->xh_count) { 6394 memmove(xe, xe + 1, 6395 (void *)last - (void *)xe); 6396 memset(last, 0, 6397 sizeof(struct ocfs2_xattr_entry)); 6398 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1; 6399 } else { 6400 memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); 6401 last = NULL; 6402 } 6403 6404 /* 6405 * We don't want j to increase in the next round since 6406 * it is already moved ahead. 6407 */ 6408 j--; 6409 continue; 6410 } 6411 6412 if (ocfs2_xattr_is_local(xe)) 6413 continue; 6414 6415 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6416 if (ret) { 6417 mlog_errno(ret); 6418 break; 6419 } 6420 6421 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6422 if (ret) { 6423 mlog_errno(ret); 6424 break; 6425 } 6426 6427 /* 6428 * For the xattr which has l_tree_depth = 0, all the extent 6429 * recs have already be copied to the new xh with the 6430 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6431 * increase the refount count int the refcount tree. 6432 * 6433 * For the xattr which has l_tree_depth > 0, we need 6434 * to initialize it to the empty default value root, 6435 * and then insert the extents one by one. 6436 */ 6437 if (xv->xr_list.l_tree_depth) { 6438 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6439 vb->vb_xv = new_xv; 6440 vb->vb_bh = value_bh; 6441 ocfs2_init_xattr_value_extent_tree(&data_et, 6442 INODE_CACHE(args->new_inode), vb); 6443 } 6444 6445 clusters = le32_to_cpu(xv->xr_clusters); 6446 cpos = 0; 6447 while (cpos < clusters) { 6448 ret = ocfs2_xattr_get_clusters(args->old_inode, 6449 cpos, 6450 &p_cluster, 6451 &num_clusters, 6452 &xv->xr_list, 6453 &ext_flags); 6454 if (ret) { 6455 mlog_errno(ret); 6456 goto out; 6457 } 6458 6459 BUG_ON(!p_cluster); 6460 6461 if (xv->xr_list.l_tree_depth) { 6462 ret = ocfs2_insert_extent(handle, 6463 &data_et, cpos, 6464 ocfs2_clusters_to_blocks( 6465 args->old_inode->i_sb, 6466 p_cluster), 6467 num_clusters, ext_flags, 6468 meta_ac); 6469 if (ret) { 6470 mlog_errno(ret); 6471 goto out; 6472 } 6473 } 6474 6475 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6476 args->ref_root_bh, 6477 p_cluster, num_clusters, 6478 meta_ac, args->dealloc); 6479 if (ret) { 6480 mlog_errno(ret); 6481 goto out; 6482 } 6483 6484 cpos += num_clusters; 6485 } 6486 } 6487 6488 out: 6489 return ret; 6490 } 6491 6492 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6493 { 6494 int ret = 0, credits = 0; 6495 handle_t *handle; 6496 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6497 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6498 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6499 int header_off = osb->sb->s_blocksize - inline_size; 6500 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6501 (args->old_bh->b_data + header_off); 6502 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6503 (args->new_bh->b_data + header_off); 6504 struct ocfs2_alloc_context *meta_ac = NULL; 6505 struct ocfs2_inode_info *new_oi; 6506 struct ocfs2_dinode *new_di; 6507 struct ocfs2_xattr_value_buf vb = { 6508 .vb_bh = args->new_bh, 6509 .vb_access = ocfs2_journal_access_di, 6510 }; 6511 6512 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6513 &credits, &meta_ac); 6514 if (ret) { 6515 mlog_errno(ret); 6516 goto out; 6517 } 6518 6519 handle = ocfs2_start_trans(osb, credits); 6520 if (IS_ERR(handle)) { 6521 ret = PTR_ERR(handle); 6522 mlog_errno(ret); 6523 goto out; 6524 } 6525 6526 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6527 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6528 if (ret) { 6529 mlog_errno(ret); 6530 goto out_commit; 6531 } 6532 6533 memcpy(args->new_bh->b_data + header_off, 6534 args->old_bh->b_data + header_off, inline_size); 6535 6536 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6537 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6538 6539 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6540 args->new_bh, new_xh, &vb, meta_ac, 6541 ocfs2_get_xattr_value_root, NULL); 6542 if (ret) { 6543 mlog_errno(ret); 6544 goto out_commit; 6545 } 6546 6547 new_oi = OCFS2_I(args->new_inode); 6548 6549 spin_lock(&new_oi->ip_lock); 6550 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6551 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6552 spin_unlock(&new_oi->ip_lock); 6553 6554 ocfs2_journal_dirty(handle, args->new_bh); 6555 6556 out_commit: 6557 ocfs2_commit_trans(osb, handle); 6558 6559 out: 6560 if (meta_ac) 6561 ocfs2_free_alloc_context(meta_ac); 6562 return ret; 6563 } 6564 6565 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6566 struct buffer_head *fe_bh, 6567 struct buffer_head **ret_bh, 6568 int indexed) 6569 { 6570 int ret; 6571 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6572 struct ocfs2_xattr_set_ctxt ctxt; 6573 6574 memset(&ctxt, 0, sizeof(ctxt)); 6575 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6576 if (ret < 0) { 6577 mlog_errno(ret); 6578 return ret; 6579 } 6580 6581 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6582 if (IS_ERR(ctxt.handle)) { 6583 ret = PTR_ERR(ctxt.handle); 6584 mlog_errno(ret); 6585 goto out; 6586 } 6587 6588 trace_ocfs2_create_empty_xattr_block( 6589 (unsigned long long)fe_bh->b_blocknr, indexed); 6590 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6591 ret_bh); 6592 if (ret) 6593 mlog_errno(ret); 6594 6595 ocfs2_commit_trans(osb, ctxt.handle); 6596 out: 6597 ocfs2_free_alloc_context(ctxt.meta_ac); 6598 return ret; 6599 } 6600 6601 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6602 struct buffer_head *blk_bh, 6603 struct buffer_head *new_blk_bh) 6604 { 6605 int ret = 0, credits = 0; 6606 handle_t *handle; 6607 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6608 struct ocfs2_dinode *new_di; 6609 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6610 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6611 struct ocfs2_xattr_block *xb = 6612 (struct ocfs2_xattr_block *)blk_bh->b_data; 6613 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6614 struct ocfs2_xattr_block *new_xb = 6615 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6616 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6617 struct ocfs2_alloc_context *meta_ac; 6618 struct ocfs2_xattr_value_buf vb = { 6619 .vb_bh = new_blk_bh, 6620 .vb_access = ocfs2_journal_access_xb, 6621 }; 6622 6623 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6624 &credits, &meta_ac); 6625 if (ret) { 6626 mlog_errno(ret); 6627 return ret; 6628 } 6629 6630 /* One more credits in case we need to add xattr flags in new inode. */ 6631 handle = ocfs2_start_trans(osb, credits + 1); 6632 if (IS_ERR(handle)) { 6633 ret = PTR_ERR(handle); 6634 mlog_errno(ret); 6635 goto out; 6636 } 6637 6638 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6639 ret = ocfs2_journal_access_di(handle, 6640 INODE_CACHE(args->new_inode), 6641 args->new_bh, 6642 OCFS2_JOURNAL_ACCESS_WRITE); 6643 if (ret) { 6644 mlog_errno(ret); 6645 goto out_commit; 6646 } 6647 } 6648 6649 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6650 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6651 if (ret) { 6652 mlog_errno(ret); 6653 goto out_commit; 6654 } 6655 6656 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6657 osb->sb->s_blocksize - header_off); 6658 6659 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6660 new_blk_bh, new_xh, &vb, meta_ac, 6661 ocfs2_get_xattr_value_root, NULL); 6662 if (ret) { 6663 mlog_errno(ret); 6664 goto out_commit; 6665 } 6666 6667 ocfs2_journal_dirty(handle, new_blk_bh); 6668 6669 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6670 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6671 spin_lock(&new_oi->ip_lock); 6672 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6673 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6674 spin_unlock(&new_oi->ip_lock); 6675 6676 ocfs2_journal_dirty(handle, args->new_bh); 6677 } 6678 6679 out_commit: 6680 ocfs2_commit_trans(osb, handle); 6681 6682 out: 6683 ocfs2_free_alloc_context(meta_ac); 6684 return ret; 6685 } 6686 6687 struct ocfs2_reflink_xattr_tree_args { 6688 struct ocfs2_xattr_reflink *reflink; 6689 struct buffer_head *old_blk_bh; 6690 struct buffer_head *new_blk_bh; 6691 struct ocfs2_xattr_bucket *old_bucket; 6692 struct ocfs2_xattr_bucket *new_bucket; 6693 }; 6694 6695 /* 6696 * NOTE: 6697 * We have to handle the case that both old bucket and new bucket 6698 * will call this function to get the right ret_bh. 6699 * So The caller must give us the right bh. 6700 */ 6701 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6702 struct buffer_head *bh, 6703 struct ocfs2_xattr_header *xh, 6704 int offset, 6705 struct ocfs2_xattr_value_root **xv, 6706 struct buffer_head **ret_bh, 6707 void *para) 6708 { 6709 struct ocfs2_reflink_xattr_tree_args *args = 6710 (struct ocfs2_reflink_xattr_tree_args *)para; 6711 struct ocfs2_xattr_bucket *bucket; 6712 6713 if (bh == args->old_bucket->bu_bhs[0]) 6714 bucket = args->old_bucket; 6715 else 6716 bucket = args->new_bucket; 6717 6718 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6719 xv, ret_bh); 6720 } 6721 6722 struct ocfs2_value_tree_metas { 6723 int num_metas; 6724 int credits; 6725 int num_recs; 6726 }; 6727 6728 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6729 struct buffer_head *bh, 6730 struct ocfs2_xattr_header *xh, 6731 int offset, 6732 struct ocfs2_xattr_value_root **xv, 6733 struct buffer_head **ret_bh, 6734 void *para) 6735 { 6736 struct ocfs2_xattr_bucket *bucket = 6737 (struct ocfs2_xattr_bucket *)para; 6738 6739 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6740 xv, ret_bh); 6741 } 6742 6743 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6744 struct ocfs2_xattr_bucket *bucket, 6745 void *para) 6746 { 6747 struct ocfs2_value_tree_metas *metas = 6748 (struct ocfs2_value_tree_metas *)para; 6749 struct ocfs2_xattr_header *xh = 6750 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6751 6752 /* Add the credits for this bucket first. */ 6753 metas->credits += bucket->bu_blocks; 6754 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6755 xh, &metas->num_metas, 6756 &metas->credits, &metas->num_recs, 6757 ocfs2_value_tree_metas_in_bucket, 6758 bucket); 6759 } 6760 6761 /* 6762 * Given a xattr extent rec starting from blkno and having len clusters, 6763 * iterate all the buckets calculate how much metadata we need for reflinking 6764 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6765 */ 6766 static int ocfs2_lock_reflink_xattr_rec_allocators( 6767 struct ocfs2_reflink_xattr_tree_args *args, 6768 struct ocfs2_extent_tree *xt_et, 6769 u64 blkno, u32 len, int *credits, 6770 struct ocfs2_alloc_context **meta_ac, 6771 struct ocfs2_alloc_context **data_ac) 6772 { 6773 int ret, num_free_extents; 6774 struct ocfs2_value_tree_metas metas; 6775 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6776 struct ocfs2_refcount_block *rb; 6777 6778 memset(&metas, 0, sizeof(metas)); 6779 6780 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6781 ocfs2_calc_value_tree_metas, &metas); 6782 if (ret) { 6783 mlog_errno(ret); 6784 goto out; 6785 } 6786 6787 *credits = metas.credits; 6788 6789 /* 6790 * Calculate we need for refcount tree change. 6791 * 6792 * We need to add/modify num_recs in refcount tree, so just calculate 6793 * an approximate number we need for refcount tree change. 6794 * Sometimes we need to split the tree, and after split, half recs 6795 * will be moved to the new block, and a new block can only provide 6796 * half number of recs. So we multiple new blocks by 2. 6797 * In the end, we have to add credits for modifying the already 6798 * existed refcount block. 6799 */ 6800 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6801 metas.num_recs = 6802 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6803 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6804 metas.num_metas += metas.num_recs; 6805 *credits += metas.num_recs + 6806 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6807 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6808 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6809 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6810 else 6811 *credits += 1; 6812 6813 /* count in the xattr tree change. */ 6814 num_free_extents = ocfs2_num_free_extents(xt_et); 6815 if (num_free_extents < 0) { 6816 ret = num_free_extents; 6817 mlog_errno(ret); 6818 goto out; 6819 } 6820 6821 if (num_free_extents < len) 6822 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6823 6824 *credits += ocfs2_calc_extend_credits(osb->sb, 6825 xt_et->et_root_el); 6826 6827 if (metas.num_metas) { 6828 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6829 meta_ac); 6830 if (ret) { 6831 mlog_errno(ret); 6832 goto out; 6833 } 6834 } 6835 6836 if (len) { 6837 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6838 if (ret) 6839 mlog_errno(ret); 6840 } 6841 out: 6842 if (ret) { 6843 if (*meta_ac) { 6844 ocfs2_free_alloc_context(*meta_ac); 6845 *meta_ac = NULL; 6846 } 6847 } 6848 6849 return ret; 6850 } 6851 6852 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6853 u64 blkno, u64 new_blkno, u32 clusters, 6854 u32 *cpos, int num_buckets, 6855 struct ocfs2_alloc_context *meta_ac, 6856 struct ocfs2_alloc_context *data_ac, 6857 struct ocfs2_reflink_xattr_tree_args *args) 6858 { 6859 int i, j, ret = 0; 6860 struct super_block *sb = args->reflink->old_inode->i_sb; 6861 int bpb = args->old_bucket->bu_blocks; 6862 struct ocfs2_xattr_value_buf vb = { 6863 .vb_access = ocfs2_journal_access, 6864 }; 6865 6866 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6867 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6868 if (ret) { 6869 mlog_errno(ret); 6870 break; 6871 } 6872 6873 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6874 if (ret) { 6875 mlog_errno(ret); 6876 break; 6877 } 6878 6879 ret = ocfs2_xattr_bucket_journal_access(handle, 6880 args->new_bucket, 6881 OCFS2_JOURNAL_ACCESS_CREATE); 6882 if (ret) { 6883 mlog_errno(ret); 6884 break; 6885 } 6886 6887 for (j = 0; j < bpb; j++) 6888 memcpy(bucket_block(args->new_bucket, j), 6889 bucket_block(args->old_bucket, j), 6890 sb->s_blocksize); 6891 6892 /* 6893 * Record the start cpos so that we can use it to initialize 6894 * our xattr tree we also set the xh_num_bucket for the new 6895 * bucket. 6896 */ 6897 if (i == 0) { 6898 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6899 xh_entries[0].xe_name_hash); 6900 bucket_xh(args->new_bucket)->xh_num_buckets = 6901 cpu_to_le16(num_buckets); 6902 } 6903 6904 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6905 6906 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6907 args->old_bucket->bu_bhs[0], 6908 bucket_xh(args->old_bucket), 6909 args->new_bucket->bu_bhs[0], 6910 bucket_xh(args->new_bucket), 6911 &vb, meta_ac, 6912 ocfs2_get_reflink_xattr_value_root, 6913 args); 6914 if (ret) { 6915 mlog_errno(ret); 6916 break; 6917 } 6918 6919 /* 6920 * Re-access and dirty the bucket to calculate metaecc. 6921 * Because we may extend the transaction in reflink_xattr_header 6922 * which will let the already accessed block gone. 6923 */ 6924 ret = ocfs2_xattr_bucket_journal_access(handle, 6925 args->new_bucket, 6926 OCFS2_JOURNAL_ACCESS_WRITE); 6927 if (ret) { 6928 mlog_errno(ret); 6929 break; 6930 } 6931 6932 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6933 6934 ocfs2_xattr_bucket_relse(args->old_bucket); 6935 ocfs2_xattr_bucket_relse(args->new_bucket); 6936 } 6937 6938 ocfs2_xattr_bucket_relse(args->old_bucket); 6939 ocfs2_xattr_bucket_relse(args->new_bucket); 6940 return ret; 6941 } 6942 6943 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6944 struct inode *inode, 6945 struct ocfs2_reflink_xattr_tree_args *args, 6946 struct ocfs2_extent_tree *et, 6947 struct ocfs2_alloc_context *meta_ac, 6948 struct ocfs2_alloc_context *data_ac, 6949 u64 blkno, u32 cpos, u32 len) 6950 { 6951 int ret, first_inserted = 0; 6952 u32 p_cluster, num_clusters, reflink_cpos = 0; 6953 u64 new_blkno; 6954 unsigned int num_buckets, reflink_buckets; 6955 unsigned int bpc = 6956 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6957 6958 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6959 if (ret) { 6960 mlog_errno(ret); 6961 goto out; 6962 } 6963 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6964 ocfs2_xattr_bucket_relse(args->old_bucket); 6965 6966 while (len && num_buckets) { 6967 ret = ocfs2_claim_clusters(handle, data_ac, 6968 1, &p_cluster, &num_clusters); 6969 if (ret) { 6970 mlog_errno(ret); 6971 goto out; 6972 } 6973 6974 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6975 reflink_buckets = min(num_buckets, bpc * num_clusters); 6976 6977 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6978 new_blkno, num_clusters, 6979 &reflink_cpos, reflink_buckets, 6980 meta_ac, data_ac, args); 6981 if (ret) { 6982 mlog_errno(ret); 6983 goto out; 6984 } 6985 6986 /* 6987 * For the 1st allocated cluster, we make it use the same cpos 6988 * so that the xattr tree looks the same as the original one 6989 * in the most case. 6990 */ 6991 if (!first_inserted) { 6992 reflink_cpos = cpos; 6993 first_inserted = 1; 6994 } 6995 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6996 num_clusters, 0, meta_ac); 6997 if (ret) 6998 mlog_errno(ret); 6999 7000 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 7001 num_clusters, reflink_cpos); 7002 7003 len -= num_clusters; 7004 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 7005 num_buckets -= reflink_buckets; 7006 } 7007 out: 7008 return ret; 7009 } 7010 7011 /* 7012 * Create the same xattr extent record in the new inode's xattr tree. 7013 */ 7014 static int ocfs2_reflink_xattr_rec(struct inode *inode, 7015 struct buffer_head *root_bh, 7016 u64 blkno, 7017 u32 cpos, 7018 u32 len, 7019 void *para) 7020 { 7021 int ret, credits = 0; 7022 handle_t *handle; 7023 struct ocfs2_reflink_xattr_tree_args *args = 7024 (struct ocfs2_reflink_xattr_tree_args *)para; 7025 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7026 struct ocfs2_alloc_context *meta_ac = NULL; 7027 struct ocfs2_alloc_context *data_ac = NULL; 7028 struct ocfs2_extent_tree et; 7029 7030 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7031 7032 ocfs2_init_xattr_tree_extent_tree(&et, 7033 INODE_CACHE(args->reflink->new_inode), 7034 args->new_blk_bh); 7035 7036 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7037 len, &credits, 7038 &meta_ac, &data_ac); 7039 if (ret) { 7040 mlog_errno(ret); 7041 goto out; 7042 } 7043 7044 handle = ocfs2_start_trans(osb, credits); 7045 if (IS_ERR(handle)) { 7046 ret = PTR_ERR(handle); 7047 mlog_errno(ret); 7048 goto out; 7049 } 7050 7051 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7052 meta_ac, data_ac, 7053 blkno, cpos, len); 7054 if (ret) 7055 mlog_errno(ret); 7056 7057 ocfs2_commit_trans(osb, handle); 7058 7059 out: 7060 if (meta_ac) 7061 ocfs2_free_alloc_context(meta_ac); 7062 if (data_ac) 7063 ocfs2_free_alloc_context(data_ac); 7064 return ret; 7065 } 7066 7067 /* 7068 * Create reflinked xattr buckets. 7069 * We will add bucket one by one, and refcount all the xattrs in the bucket 7070 * if they are stored outside. 7071 */ 7072 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7073 struct buffer_head *blk_bh, 7074 struct buffer_head *new_blk_bh) 7075 { 7076 int ret; 7077 struct ocfs2_reflink_xattr_tree_args para; 7078 7079 memset(¶, 0, sizeof(para)); 7080 para.reflink = args; 7081 para.old_blk_bh = blk_bh; 7082 para.new_blk_bh = new_blk_bh; 7083 7084 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7085 if (!para.old_bucket) { 7086 mlog_errno(-ENOMEM); 7087 return -ENOMEM; 7088 } 7089 7090 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7091 if (!para.new_bucket) { 7092 ret = -ENOMEM; 7093 mlog_errno(ret); 7094 goto out; 7095 } 7096 7097 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7098 ocfs2_reflink_xattr_rec, 7099 ¶); 7100 if (ret) 7101 mlog_errno(ret); 7102 7103 out: 7104 ocfs2_xattr_bucket_free(para.old_bucket); 7105 ocfs2_xattr_bucket_free(para.new_bucket); 7106 return ret; 7107 } 7108 7109 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7110 struct buffer_head *blk_bh) 7111 { 7112 int ret, indexed = 0; 7113 struct buffer_head *new_blk_bh = NULL; 7114 struct ocfs2_xattr_block *xb = 7115 (struct ocfs2_xattr_block *)blk_bh->b_data; 7116 7117 7118 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7119 indexed = 1; 7120 7121 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7122 &new_blk_bh, indexed); 7123 if (ret) { 7124 mlog_errno(ret); 7125 goto out; 7126 } 7127 7128 if (!indexed) 7129 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7130 else 7131 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7132 if (ret) 7133 mlog_errno(ret); 7134 7135 out: 7136 brelse(new_blk_bh); 7137 return ret; 7138 } 7139 7140 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7141 { 7142 int type = ocfs2_xattr_get_type(xe); 7143 7144 return type != OCFS2_XATTR_INDEX_SECURITY && 7145 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7146 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7147 } 7148 7149 int ocfs2_reflink_xattrs(struct inode *old_inode, 7150 struct buffer_head *old_bh, 7151 struct inode *new_inode, 7152 struct buffer_head *new_bh, 7153 bool preserve_security) 7154 { 7155 int ret; 7156 struct ocfs2_xattr_reflink args; 7157 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7158 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7159 struct buffer_head *blk_bh = NULL; 7160 struct ocfs2_cached_dealloc_ctxt dealloc; 7161 struct ocfs2_refcount_tree *ref_tree; 7162 struct buffer_head *ref_root_bh = NULL; 7163 7164 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7165 le64_to_cpu(di->i_refcount_loc), 7166 1, &ref_tree, &ref_root_bh); 7167 if (ret) { 7168 mlog_errno(ret); 7169 goto out; 7170 } 7171 7172 ocfs2_init_dealloc_ctxt(&dealloc); 7173 7174 args.old_inode = old_inode; 7175 args.new_inode = new_inode; 7176 args.old_bh = old_bh; 7177 args.new_bh = new_bh; 7178 args.ref_ci = &ref_tree->rf_ci; 7179 args.ref_root_bh = ref_root_bh; 7180 args.dealloc = &dealloc; 7181 if (preserve_security) 7182 args.xattr_reflinked = NULL; 7183 else 7184 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7185 7186 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7187 ret = ocfs2_reflink_xattr_inline(&args); 7188 if (ret) { 7189 mlog_errno(ret); 7190 goto out_unlock; 7191 } 7192 } 7193 7194 if (!di->i_xattr_loc) 7195 goto out_unlock; 7196 7197 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7198 &blk_bh); 7199 if (ret < 0) { 7200 mlog_errno(ret); 7201 goto out_unlock; 7202 } 7203 7204 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7205 if (ret) 7206 mlog_errno(ret); 7207 7208 brelse(blk_bh); 7209 7210 out_unlock: 7211 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7212 ref_tree, 1); 7213 brelse(ref_root_bh); 7214 7215 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7216 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7217 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7218 } 7219 7220 out: 7221 return ret; 7222 } 7223 7224 /* 7225 * Initialize security and acl for a already created inode. 7226 * Used for reflink a non-preserve-security file. 7227 * 7228 * It uses common api like ocfs2_xattr_set, so the caller 7229 * must not hold any lock expect i_rwsem. 7230 */ 7231 int ocfs2_init_security_and_acl(struct inode *dir, 7232 struct inode *inode, 7233 const struct qstr *qstr) 7234 { 7235 int ret = 0; 7236 struct buffer_head *dir_bh = NULL; 7237 7238 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7239 if (ret) { 7240 mlog_errno(ret); 7241 goto leave; 7242 } 7243 7244 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7245 if (ret) { 7246 mlog_errno(ret); 7247 goto leave; 7248 } 7249 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7250 if (ret) 7251 mlog_errno(ret); 7252 7253 ocfs2_inode_unlock(dir, 0); 7254 brelse(dir_bh); 7255 leave: 7256 return ret; 7257 } 7258 7259 /* 7260 * 'security' attributes support 7261 */ 7262 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7263 struct dentry *unused, struct inode *inode, 7264 const char *name, void *buffer, size_t size) 7265 { 7266 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7267 name, buffer, size); 7268 } 7269 7270 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7271 struct mnt_idmap *idmap, 7272 struct dentry *unused, struct inode *inode, 7273 const char *name, const void *value, 7274 size_t size, int flags) 7275 { 7276 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7277 name, value, size, flags); 7278 } 7279 7280 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7281 void *fs_info) 7282 { 7283 struct ocfs2_security_xattr_info *si = fs_info; 7284 const struct xattr *xattr; 7285 int err = 0; 7286 7287 if (si) { 7288 si->value = kmemdup(xattr_array->value, xattr_array->value_len, 7289 GFP_KERNEL); 7290 if (!si->value) 7291 return -ENOMEM; 7292 7293 si->name = xattr_array->name; 7294 si->value_len = xattr_array->value_len; 7295 return 0; 7296 } 7297 7298 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7299 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7300 xattr->name, xattr->value, 7301 xattr->value_len, XATTR_CREATE); 7302 if (err) 7303 break; 7304 } 7305 return err; 7306 } 7307 7308 int ocfs2_init_security_get(struct inode *inode, 7309 struct inode *dir, 7310 const struct qstr *qstr, 7311 struct ocfs2_security_xattr_info *si) 7312 { 7313 int ret; 7314 7315 /* check whether ocfs2 support feature xattr */ 7316 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7317 return -EOPNOTSUPP; 7318 if (si) { 7319 ret = security_inode_init_security(inode, dir, qstr, 7320 &ocfs2_initxattrs, si); 7321 /* 7322 * security_inode_init_security() does not return -EOPNOTSUPP, 7323 * we have to check the xattr ourselves. 7324 */ 7325 if (!ret && !si->name) 7326 si->enable = 0; 7327 7328 return ret; 7329 } 7330 7331 return security_inode_init_security(inode, dir, qstr, 7332 &ocfs2_initxattrs, NULL); 7333 } 7334 7335 int ocfs2_init_security_set(handle_t *handle, 7336 struct inode *inode, 7337 struct buffer_head *di_bh, 7338 struct ocfs2_security_xattr_info *si, 7339 struct ocfs2_alloc_context *xattr_ac, 7340 struct ocfs2_alloc_context *data_ac) 7341 { 7342 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7343 OCFS2_XATTR_INDEX_SECURITY, 7344 si->name, si->value, si->value_len, 0, 7345 xattr_ac, data_ac); 7346 } 7347 7348 const struct xattr_handler ocfs2_xattr_security_handler = { 7349 .prefix = XATTR_SECURITY_PREFIX, 7350 .get = ocfs2_xattr_security_get, 7351 .set = ocfs2_xattr_security_set, 7352 }; 7353 7354 /* 7355 * 'trusted' attributes support 7356 */ 7357 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7358 struct dentry *unused, struct inode *inode, 7359 const char *name, void *buffer, size_t size) 7360 { 7361 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7362 name, buffer, size); 7363 } 7364 7365 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7366 struct mnt_idmap *idmap, 7367 struct dentry *unused, struct inode *inode, 7368 const char *name, const void *value, 7369 size_t size, int flags) 7370 { 7371 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7372 name, value, size, flags); 7373 } 7374 7375 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7376 .prefix = XATTR_TRUSTED_PREFIX, 7377 .get = ocfs2_xattr_trusted_get, 7378 .set = ocfs2_xattr_trusted_set, 7379 }; 7380 7381 /* 7382 * 'user' attributes support 7383 */ 7384 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7385 struct dentry *unused, struct inode *inode, 7386 const char *name, void *buffer, size_t size) 7387 { 7388 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7389 7390 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7391 return -EOPNOTSUPP; 7392 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7393 buffer, size); 7394 } 7395 7396 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7397 struct mnt_idmap *idmap, 7398 struct dentry *unused, struct inode *inode, 7399 const char *name, const void *value, 7400 size_t size, int flags) 7401 { 7402 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7403 7404 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7405 return -EOPNOTSUPP; 7406 7407 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7408 name, value, size, flags); 7409 } 7410 7411 const struct xattr_handler ocfs2_xattr_user_handler = { 7412 .prefix = XATTR_USER_PREFIX, 7413 .get = ocfs2_xattr_user_get, 7414 .set = ocfs2_xattr_user_set, 7415 }; 7416