1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xattr.c 4 * 5 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 6 * 7 * CREDITS: 8 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 10 */ 11 12 #include <linux/capability.h> 13 #include <linux/fs.h> 14 #include <linux/types.h> 15 #include <linux/slab.h> 16 #include <linux/highmem.h> 17 #include <linux/pagemap.h> 18 #include <linux/uio.h> 19 #include <linux/sched.h> 20 #include <linux/splice.h> 21 #include <linux/mount.h> 22 #include <linux/writeback.h> 23 #include <linux/falloc.h> 24 #include <linux/sort.h> 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/string.h> 28 #include <linux/security.h> 29 30 #include <cluster/masklog.h> 31 32 #include "ocfs2.h" 33 #include "alloc.h" 34 #include "blockcheck.h" 35 #include "dlmglue.h" 36 #include "file.h" 37 #include "symlink.h" 38 #include "sysfile.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "ocfs2_fs.h" 42 #include "suballoc.h" 43 #include "uptodate.h" 44 #include "buffer_head_io.h" 45 #include "super.h" 46 #include "xattr.h" 47 #include "refcounttree.h" 48 #include "acl.h" 49 #include "ocfs2_trace.h" 50 51 struct ocfs2_xattr_def_value_root { 52 struct ocfs2_xattr_value_root xv; 53 struct ocfs2_extent_rec er; 54 }; 55 56 struct ocfs2_xattr_bucket { 57 /* The inode these xattrs are associated with */ 58 struct inode *bu_inode; 59 60 /* The actual buffers that make up the bucket */ 61 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 62 63 /* How many blocks make up one bucket for this filesystem */ 64 int bu_blocks; 65 }; 66 67 struct ocfs2_xattr_set_ctxt { 68 handle_t *handle; 69 struct ocfs2_alloc_context *meta_ac; 70 struct ocfs2_alloc_context *data_ac; 71 struct ocfs2_cached_dealloc_ctxt dealloc; 72 int set_abort; 73 }; 74 75 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 76 #define OCFS2_XATTR_INLINE_SIZE 80 77 #define OCFS2_XATTR_HEADER_GAP 4 78 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 79 - sizeof(struct ocfs2_xattr_header) \ 80 - OCFS2_XATTR_HEADER_GAP) 81 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 82 - sizeof(struct ocfs2_xattr_block) \ 83 - sizeof(struct ocfs2_xattr_header) \ 84 - OCFS2_XATTR_HEADER_GAP) 85 86 static struct ocfs2_xattr_def_value_root def_xv = { 87 .xv.xr_list.l_count = cpu_to_le16(1), 88 }; 89 90 const struct xattr_handler *ocfs2_xattr_handlers[] = { 91 &ocfs2_xattr_user_handler, 92 &ocfs2_xattr_trusted_handler, 93 &ocfs2_xattr_security_handler, 94 NULL 95 }; 96 97 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 98 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 99 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, 100 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, 101 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 102 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 103 }; 104 105 struct ocfs2_xattr_info { 106 int xi_name_index; 107 const char *xi_name; 108 int xi_name_len; 109 const void *xi_value; 110 size_t xi_value_len; 111 }; 112 113 struct ocfs2_xattr_search { 114 struct buffer_head *inode_bh; 115 /* 116 * xattr_bh point to the block buffer head which has extended attribute 117 * when extended attribute in inode, xattr_bh is equal to inode_bh. 118 */ 119 struct buffer_head *xattr_bh; 120 struct ocfs2_xattr_header *header; 121 struct ocfs2_xattr_bucket *bucket; 122 void *base; 123 void *end; 124 struct ocfs2_xattr_entry *here; 125 int not_found; 126 }; 127 128 /* Operations on struct ocfs2_xa_entry */ 129 struct ocfs2_xa_loc; 130 struct ocfs2_xa_loc_operations { 131 /* 132 * Journal functions 133 */ 134 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 135 int type); 136 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 137 138 /* 139 * Return a pointer to the appropriate buffer in loc->xl_storage 140 * at the given offset from loc->xl_header. 141 */ 142 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 143 144 /* Can we reuse the existing entry for the new value? */ 145 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 146 struct ocfs2_xattr_info *xi); 147 148 /* How much space is needed for the new value? */ 149 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 150 struct ocfs2_xattr_info *xi); 151 152 /* 153 * Return the offset of the first name+value pair. This is 154 * the start of our downward-filling free space. 155 */ 156 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 157 158 /* 159 * Remove the name+value at this location. Do whatever is 160 * appropriate with the remaining name+value pairs. 161 */ 162 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 163 164 /* Fill xl_entry with a new entry */ 165 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 166 167 /* Add name+value storage to an entry */ 168 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 169 170 /* 171 * Initialize the value buf's access and bh fields for this entry. 172 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 173 */ 174 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 175 struct ocfs2_xattr_value_buf *vb); 176 }; 177 178 /* 179 * Describes an xattr entry location. This is a memory structure 180 * tracking the on-disk structure. 181 */ 182 struct ocfs2_xa_loc { 183 /* This xattr belongs to this inode */ 184 struct inode *xl_inode; 185 186 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 187 struct ocfs2_xattr_header *xl_header; 188 189 /* Bytes from xl_header to the end of the storage */ 190 int xl_size; 191 192 /* 193 * The ocfs2_xattr_entry this location describes. If this is 194 * NULL, this location describes the on-disk structure where it 195 * would have been. 196 */ 197 struct ocfs2_xattr_entry *xl_entry; 198 199 /* 200 * Internal housekeeping 201 */ 202 203 /* Buffer(s) containing this entry */ 204 void *xl_storage; 205 206 /* Operations on the storage backing this location */ 207 const struct ocfs2_xa_loc_operations *xl_ops; 208 }; 209 210 /* 211 * Convenience functions to calculate how much space is needed for a 212 * given name+value pair 213 */ 214 static int namevalue_size(int name_len, uint64_t value_len) 215 { 216 if (value_len > OCFS2_XATTR_INLINE_SIZE) 217 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 218 else 219 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 220 } 221 222 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 223 { 224 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 225 } 226 227 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 228 { 229 u64 value_len = le64_to_cpu(xe->xe_value_size); 230 231 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 232 ocfs2_xattr_is_local(xe)); 233 return namevalue_size(xe->xe_name_len, value_len); 234 } 235 236 237 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 238 struct ocfs2_xattr_header *xh, 239 int index, 240 int *block_off, 241 int *new_offset); 242 243 static int ocfs2_xattr_block_find(struct inode *inode, 244 int name_index, 245 const char *name, 246 struct ocfs2_xattr_search *xs); 247 static int ocfs2_xattr_index_block_find(struct inode *inode, 248 struct buffer_head *root_bh, 249 int name_index, 250 const char *name, 251 struct ocfs2_xattr_search *xs); 252 253 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 254 struct buffer_head *blk_bh, 255 char *buffer, 256 size_t buffer_size); 257 258 static int ocfs2_xattr_create_index_block(struct inode *inode, 259 struct ocfs2_xattr_search *xs, 260 struct ocfs2_xattr_set_ctxt *ctxt); 261 262 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 263 struct ocfs2_xattr_info *xi, 264 struct ocfs2_xattr_search *xs, 265 struct ocfs2_xattr_set_ctxt *ctxt); 266 267 typedef int (xattr_tree_rec_func)(struct inode *inode, 268 struct buffer_head *root_bh, 269 u64 blkno, u32 cpos, u32 len, void *para); 270 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 271 struct buffer_head *root_bh, 272 xattr_tree_rec_func *rec_func, 273 void *para); 274 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 275 struct ocfs2_xattr_bucket *bucket, 276 void *para); 277 static int ocfs2_rm_xattr_cluster(struct inode *inode, 278 struct buffer_head *root_bh, 279 u64 blkno, 280 u32 cpos, 281 u32 len, 282 void *para); 283 284 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 285 u64 src_blk, u64 last_blk, u64 to_blk, 286 unsigned int start_bucket, 287 u32 *first_hash); 288 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 289 struct ocfs2_dinode *di, 290 struct ocfs2_xattr_info *xi, 291 struct ocfs2_xattr_search *xis, 292 struct ocfs2_xattr_search *xbs, 293 struct ocfs2_refcount_tree **ref_tree, 294 int *meta_need, 295 int *credits); 296 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 297 struct ocfs2_xattr_bucket *bucket, 298 int offset, 299 struct ocfs2_xattr_value_root **xv, 300 struct buffer_head **bh); 301 302 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 303 { 304 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 305 } 306 307 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 308 { 309 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 310 } 311 312 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 313 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 314 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 315 316 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 317 { 318 struct ocfs2_xattr_bucket *bucket; 319 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 320 321 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 322 323 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 324 if (bucket) { 325 bucket->bu_inode = inode; 326 bucket->bu_blocks = blks; 327 } 328 329 return bucket; 330 } 331 332 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 333 { 334 int i; 335 336 for (i = 0; i < bucket->bu_blocks; i++) { 337 brelse(bucket->bu_bhs[i]); 338 bucket->bu_bhs[i] = NULL; 339 } 340 } 341 342 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 343 { 344 if (bucket) { 345 ocfs2_xattr_bucket_relse(bucket); 346 bucket->bu_inode = NULL; 347 kfree(bucket); 348 } 349 } 350 351 /* 352 * A bucket that has never been written to disk doesn't need to be 353 * read. We just need the buffer_heads. Don't call this for 354 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 355 * them fully. 356 */ 357 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 358 u64 xb_blkno, int new) 359 { 360 int i, rc = 0; 361 362 for (i = 0; i < bucket->bu_blocks; i++) { 363 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 364 xb_blkno + i); 365 if (!bucket->bu_bhs[i]) { 366 rc = -ENOMEM; 367 mlog_errno(rc); 368 break; 369 } 370 371 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 372 bucket->bu_bhs[i])) { 373 if (new) 374 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 375 bucket->bu_bhs[i]); 376 else { 377 set_buffer_uptodate(bucket->bu_bhs[i]); 378 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 379 bucket->bu_bhs[i]); 380 } 381 } 382 } 383 384 if (rc) 385 ocfs2_xattr_bucket_relse(bucket); 386 return rc; 387 } 388 389 /* Read the xattr bucket at xb_blkno */ 390 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 391 u64 xb_blkno) 392 { 393 int rc; 394 395 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 396 bucket->bu_blocks, bucket->bu_bhs, 0, 397 NULL); 398 if (!rc) { 399 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 400 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 401 bucket->bu_bhs, 402 bucket->bu_blocks, 403 &bucket_xh(bucket)->xh_check); 404 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 405 if (rc) 406 mlog_errno(rc); 407 } 408 409 if (rc) 410 ocfs2_xattr_bucket_relse(bucket); 411 return rc; 412 } 413 414 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 415 struct ocfs2_xattr_bucket *bucket, 416 int type) 417 { 418 int i, rc = 0; 419 420 for (i = 0; i < bucket->bu_blocks; i++) { 421 rc = ocfs2_journal_access(handle, 422 INODE_CACHE(bucket->bu_inode), 423 bucket->bu_bhs[i], type); 424 if (rc) { 425 mlog_errno(rc); 426 break; 427 } 428 } 429 430 return rc; 431 } 432 433 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 434 struct ocfs2_xattr_bucket *bucket) 435 { 436 int i; 437 438 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 439 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 440 bucket->bu_bhs, bucket->bu_blocks, 441 &bucket_xh(bucket)->xh_check); 442 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 443 444 for (i = 0; i < bucket->bu_blocks; i++) 445 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 446 } 447 448 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 449 struct ocfs2_xattr_bucket *src) 450 { 451 int i; 452 int blocksize = src->bu_inode->i_sb->s_blocksize; 453 454 BUG_ON(dest->bu_blocks != src->bu_blocks); 455 BUG_ON(dest->bu_inode != src->bu_inode); 456 457 for (i = 0; i < src->bu_blocks; i++) { 458 memcpy(bucket_block(dest, i), bucket_block(src, i), 459 blocksize); 460 } 461 } 462 463 static int ocfs2_validate_xattr_block(struct super_block *sb, 464 struct buffer_head *bh) 465 { 466 int rc; 467 struct ocfs2_xattr_block *xb = 468 (struct ocfs2_xattr_block *)bh->b_data; 469 470 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 471 472 BUG_ON(!buffer_uptodate(bh)); 473 474 /* 475 * If the ecc fails, we return the error but otherwise 476 * leave the filesystem running. We know any error is 477 * local to this block. 478 */ 479 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 480 if (rc) 481 return rc; 482 483 /* 484 * Errors after here are fatal 485 */ 486 487 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 488 return ocfs2_error(sb, 489 "Extended attribute block #%llu has bad signature %.*s\n", 490 (unsigned long long)bh->b_blocknr, 7, 491 xb->xb_signature); 492 } 493 494 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 495 return ocfs2_error(sb, 496 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 497 (unsigned long long)bh->b_blocknr, 498 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 499 } 500 501 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 502 return ocfs2_error(sb, 503 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 504 (unsigned long long)bh->b_blocknr, 505 le32_to_cpu(xb->xb_fs_generation)); 506 } 507 508 return 0; 509 } 510 511 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 512 struct buffer_head **bh) 513 { 514 int rc; 515 struct buffer_head *tmp = *bh; 516 517 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 518 ocfs2_validate_xattr_block); 519 520 /* If ocfs2_read_block() got us a new bh, pass it up. */ 521 if (!rc && !*bh) 522 *bh = tmp; 523 524 return rc; 525 } 526 527 static inline const char *ocfs2_xattr_prefix(int name_index) 528 { 529 const struct xattr_handler *handler = NULL; 530 531 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 532 handler = ocfs2_xattr_handler_map[name_index]; 533 return handler ? xattr_prefix(handler) : NULL; 534 } 535 536 static u32 ocfs2_xattr_name_hash(struct inode *inode, 537 const char *name, 538 int name_len) 539 { 540 /* Get hash value of uuid from super block */ 541 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 542 int i; 543 544 /* hash extended attribute name */ 545 for (i = 0; i < name_len; i++) { 546 hash = (hash << OCFS2_HASH_SHIFT) ^ 547 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 548 *name++; 549 } 550 551 return hash; 552 } 553 554 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 555 { 556 return namevalue_size(name_len, value_len) + 557 sizeof(struct ocfs2_xattr_entry); 558 } 559 560 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 561 { 562 return namevalue_size_xi(xi) + 563 sizeof(struct ocfs2_xattr_entry); 564 } 565 566 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 567 { 568 return namevalue_size_xe(xe) + 569 sizeof(struct ocfs2_xattr_entry); 570 } 571 572 int ocfs2_calc_security_init(struct inode *dir, 573 struct ocfs2_security_xattr_info *si, 574 int *want_clusters, 575 int *xattr_credits, 576 struct ocfs2_alloc_context **xattr_ac) 577 { 578 int ret = 0; 579 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 580 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 581 si->value_len); 582 583 /* 584 * The max space of security xattr taken inline is 585 * 256(name) + 80(value) + 16(entry) = 352 bytes, 586 * So reserve one metadata block for it is ok. 587 */ 588 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 589 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 590 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 591 if (ret) { 592 mlog_errno(ret); 593 return ret; 594 } 595 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 596 } 597 598 /* reserve clusters for xattr value which will be set in B tree*/ 599 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 600 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 601 si->value_len); 602 603 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 604 new_clusters); 605 *want_clusters += new_clusters; 606 } 607 return ret; 608 } 609 610 int ocfs2_calc_xattr_init(struct inode *dir, 611 struct buffer_head *dir_bh, 612 umode_t mode, 613 struct ocfs2_security_xattr_info *si, 614 int *want_clusters, 615 int *xattr_credits, 616 int *want_meta) 617 { 618 int ret = 0; 619 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 620 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 621 622 if (si->enable) 623 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 624 si->value_len); 625 626 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 627 down_read(&OCFS2_I(dir)->ip_xattr_sem); 628 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 629 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 630 "", NULL, 0); 631 up_read(&OCFS2_I(dir)->ip_xattr_sem); 632 if (acl_len > 0) { 633 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 634 if (S_ISDIR(mode)) 635 a_size <<= 1; 636 } else if (acl_len != 0 && acl_len != -ENODATA) { 637 ret = acl_len; 638 mlog_errno(ret); 639 return ret; 640 } 641 } 642 643 if (!(s_size + a_size)) 644 return ret; 645 646 /* 647 * The max space of security xattr taken inline is 648 * 256(name) + 80(value) + 16(entry) = 352 bytes, 649 * The max space of acl xattr taken inline is 650 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 651 * when blocksize = 512, may reserve one more cluser for 652 * xattr bucket, otherwise reserve one metadata block 653 * for them is ok. 654 * If this is a new directory with inline data, 655 * we choose to reserve the entire inline area for 656 * directory contents and force an external xattr block. 657 */ 658 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 659 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 660 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 661 *want_meta = *want_meta + 1; 662 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 663 } 664 665 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 666 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 667 *want_clusters += 1; 668 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 669 } 670 671 /* 672 * reserve credits and clusters for xattrs which has large value 673 * and have to be set outside 674 */ 675 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 676 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 677 si->value_len); 678 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 679 new_clusters); 680 *want_clusters += new_clusters; 681 } 682 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 683 acl_len > OCFS2_XATTR_INLINE_SIZE) { 684 /* for directory, it has DEFAULT and ACCESS two types of acls */ 685 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 686 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 687 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 688 new_clusters); 689 *want_clusters += new_clusters; 690 } 691 692 return ret; 693 } 694 695 static int ocfs2_xattr_extend_allocation(struct inode *inode, 696 u32 clusters_to_add, 697 struct ocfs2_xattr_value_buf *vb, 698 struct ocfs2_xattr_set_ctxt *ctxt) 699 { 700 int status = 0, credits; 701 handle_t *handle = ctxt->handle; 702 enum ocfs2_alloc_restarted why; 703 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 704 struct ocfs2_extent_tree et; 705 706 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 707 708 while (clusters_to_add) { 709 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 710 711 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 712 OCFS2_JOURNAL_ACCESS_WRITE); 713 if (status < 0) { 714 mlog_errno(status); 715 break; 716 } 717 718 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 719 status = ocfs2_add_clusters_in_btree(handle, 720 &et, 721 &logical_start, 722 clusters_to_add, 723 0, 724 ctxt->data_ac, 725 ctxt->meta_ac, 726 &why); 727 if ((status < 0) && (status != -EAGAIN)) { 728 if (status != -ENOSPC) 729 mlog_errno(status); 730 break; 731 } 732 733 ocfs2_journal_dirty(handle, vb->vb_bh); 734 735 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 736 prev_clusters; 737 738 if (why != RESTART_NONE && clusters_to_add) { 739 /* 740 * We can only fail in case the alloc file doesn't give 741 * up enough clusters. 742 */ 743 BUG_ON(why == RESTART_META); 744 745 credits = ocfs2_calc_extend_credits(inode->i_sb, 746 &vb->vb_xv->xr_list); 747 status = ocfs2_extend_trans(handle, credits); 748 if (status < 0) { 749 status = -ENOMEM; 750 mlog_errno(status); 751 break; 752 } 753 } 754 } 755 756 return status; 757 } 758 759 static int __ocfs2_remove_xattr_range(struct inode *inode, 760 struct ocfs2_xattr_value_buf *vb, 761 u32 cpos, u32 phys_cpos, u32 len, 762 unsigned int ext_flags, 763 struct ocfs2_xattr_set_ctxt *ctxt) 764 { 765 int ret; 766 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 767 handle_t *handle = ctxt->handle; 768 struct ocfs2_extent_tree et; 769 770 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 771 772 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 773 OCFS2_JOURNAL_ACCESS_WRITE); 774 if (ret) { 775 mlog_errno(ret); 776 goto out; 777 } 778 779 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 780 &ctxt->dealloc); 781 if (ret) { 782 mlog_errno(ret); 783 goto out; 784 } 785 786 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 787 ocfs2_journal_dirty(handle, vb->vb_bh); 788 789 if (ext_flags & OCFS2_EXT_REFCOUNTED) 790 ret = ocfs2_decrease_refcount(inode, handle, 791 ocfs2_blocks_to_clusters(inode->i_sb, 792 phys_blkno), 793 len, ctxt->meta_ac, &ctxt->dealloc, 1); 794 else 795 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 796 phys_blkno, len); 797 if (ret) 798 mlog_errno(ret); 799 800 out: 801 return ret; 802 } 803 804 static int ocfs2_xattr_shrink_size(struct inode *inode, 805 u32 old_clusters, 806 u32 new_clusters, 807 struct ocfs2_xattr_value_buf *vb, 808 struct ocfs2_xattr_set_ctxt *ctxt) 809 { 810 int ret = 0; 811 unsigned int ext_flags; 812 u32 trunc_len, cpos, phys_cpos, alloc_size; 813 u64 block; 814 815 if (old_clusters <= new_clusters) 816 return 0; 817 818 cpos = new_clusters; 819 trunc_len = old_clusters - new_clusters; 820 while (trunc_len) { 821 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 822 &alloc_size, 823 &vb->vb_xv->xr_list, &ext_flags); 824 if (ret) { 825 mlog_errno(ret); 826 goto out; 827 } 828 829 if (alloc_size > trunc_len) 830 alloc_size = trunc_len; 831 832 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 833 phys_cpos, alloc_size, 834 ext_flags, ctxt); 835 if (ret) { 836 mlog_errno(ret); 837 goto out; 838 } 839 840 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 841 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 842 block, alloc_size); 843 cpos += alloc_size; 844 trunc_len -= alloc_size; 845 } 846 847 out: 848 return ret; 849 } 850 851 static int ocfs2_xattr_value_truncate(struct inode *inode, 852 struct ocfs2_xattr_value_buf *vb, 853 int len, 854 struct ocfs2_xattr_set_ctxt *ctxt) 855 { 856 int ret; 857 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 858 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 859 860 if (new_clusters == old_clusters) 861 return 0; 862 863 if (new_clusters > old_clusters) 864 ret = ocfs2_xattr_extend_allocation(inode, 865 new_clusters - old_clusters, 866 vb, ctxt); 867 else 868 ret = ocfs2_xattr_shrink_size(inode, 869 old_clusters, new_clusters, 870 vb, ctxt); 871 872 return ret; 873 } 874 875 static int ocfs2_xattr_list_entry(struct super_block *sb, 876 char *buffer, size_t size, 877 size_t *result, int type, 878 const char *name, int name_len) 879 { 880 char *p = buffer + *result; 881 const char *prefix; 882 int prefix_len; 883 int total_len; 884 885 switch(type) { 886 case OCFS2_XATTR_INDEX_USER: 887 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 888 return 0; 889 break; 890 891 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 892 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 893 if (!(sb->s_flags & SB_POSIXACL)) 894 return 0; 895 break; 896 897 case OCFS2_XATTR_INDEX_TRUSTED: 898 if (!capable(CAP_SYS_ADMIN)) 899 return 0; 900 break; 901 } 902 903 prefix = ocfs2_xattr_prefix(type); 904 if (!prefix) 905 return 0; 906 prefix_len = strlen(prefix); 907 total_len = prefix_len + name_len + 1; 908 *result += total_len; 909 910 /* we are just looking for how big our buffer needs to be */ 911 if (!size) 912 return 0; 913 914 if (*result > size) 915 return -ERANGE; 916 917 memcpy(p, prefix, prefix_len); 918 memcpy(p + prefix_len, name, name_len); 919 p[prefix_len + name_len] = '\0'; 920 921 return 0; 922 } 923 924 static int ocfs2_xattr_list_entries(struct inode *inode, 925 struct ocfs2_xattr_header *header, 926 char *buffer, size_t buffer_size) 927 { 928 size_t result = 0; 929 int i, type, ret; 930 const char *name; 931 932 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 933 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 934 type = ocfs2_xattr_get_type(entry); 935 name = (const char *)header + 936 le16_to_cpu(entry->xe_name_offset); 937 938 ret = ocfs2_xattr_list_entry(inode->i_sb, 939 buffer, buffer_size, 940 &result, type, name, 941 entry->xe_name_len); 942 if (ret) 943 return ret; 944 } 945 946 return result; 947 } 948 949 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 950 struct ocfs2_dinode *di) 951 { 952 struct ocfs2_xattr_header *xh; 953 int i; 954 955 xh = (struct ocfs2_xattr_header *) 956 ((void *)di + inode->i_sb->s_blocksize - 957 le16_to_cpu(di->i_xattr_inline_size)); 958 959 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 960 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 961 return 1; 962 963 return 0; 964 } 965 966 static int ocfs2_xattr_ibody_list(struct inode *inode, 967 struct ocfs2_dinode *di, 968 char *buffer, 969 size_t buffer_size) 970 { 971 struct ocfs2_xattr_header *header = NULL; 972 struct ocfs2_inode_info *oi = OCFS2_I(inode); 973 int ret = 0; 974 975 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 976 return ret; 977 978 header = (struct ocfs2_xattr_header *) 979 ((void *)di + inode->i_sb->s_blocksize - 980 le16_to_cpu(di->i_xattr_inline_size)); 981 982 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 983 984 return ret; 985 } 986 987 static int ocfs2_xattr_block_list(struct inode *inode, 988 struct ocfs2_dinode *di, 989 char *buffer, 990 size_t buffer_size) 991 { 992 struct buffer_head *blk_bh = NULL; 993 struct ocfs2_xattr_block *xb; 994 int ret = 0; 995 996 if (!di->i_xattr_loc) 997 return ret; 998 999 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1000 &blk_bh); 1001 if (ret < 0) { 1002 mlog_errno(ret); 1003 return ret; 1004 } 1005 1006 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1007 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1008 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1009 ret = ocfs2_xattr_list_entries(inode, header, 1010 buffer, buffer_size); 1011 } else 1012 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1013 buffer, buffer_size); 1014 1015 brelse(blk_bh); 1016 1017 return ret; 1018 } 1019 1020 ssize_t ocfs2_listxattr(struct dentry *dentry, 1021 char *buffer, 1022 size_t size) 1023 { 1024 int ret = 0, i_ret = 0, b_ret = 0; 1025 struct buffer_head *di_bh = NULL; 1026 struct ocfs2_dinode *di = NULL; 1027 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1028 1029 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1030 return -EOPNOTSUPP; 1031 1032 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1033 return ret; 1034 1035 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1036 if (ret < 0) { 1037 mlog_errno(ret); 1038 return ret; 1039 } 1040 1041 di = (struct ocfs2_dinode *)di_bh->b_data; 1042 1043 down_read(&oi->ip_xattr_sem); 1044 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1045 if (i_ret < 0) 1046 b_ret = 0; 1047 else { 1048 if (buffer) { 1049 buffer += i_ret; 1050 size -= i_ret; 1051 } 1052 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1053 buffer, size); 1054 if (b_ret < 0) 1055 i_ret = 0; 1056 } 1057 up_read(&oi->ip_xattr_sem); 1058 ocfs2_inode_unlock(d_inode(dentry), 0); 1059 1060 brelse(di_bh); 1061 1062 return i_ret + b_ret; 1063 } 1064 1065 static int ocfs2_xattr_find_entry(int name_index, 1066 const char *name, 1067 struct ocfs2_xattr_search *xs) 1068 { 1069 struct ocfs2_xattr_entry *entry; 1070 size_t name_len; 1071 int i, cmp = 1; 1072 1073 if (name == NULL) 1074 return -EINVAL; 1075 1076 name_len = strlen(name); 1077 entry = xs->here; 1078 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1079 cmp = name_index - ocfs2_xattr_get_type(entry); 1080 if (!cmp) 1081 cmp = name_len - entry->xe_name_len; 1082 if (!cmp) 1083 cmp = memcmp(name, (xs->base + 1084 le16_to_cpu(entry->xe_name_offset)), 1085 name_len); 1086 if (cmp == 0) 1087 break; 1088 entry += 1; 1089 } 1090 xs->here = entry; 1091 1092 return cmp ? -ENODATA : 0; 1093 } 1094 1095 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1096 struct ocfs2_xattr_value_root *xv, 1097 void *buffer, 1098 size_t len) 1099 { 1100 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1101 u64 blkno; 1102 int i, ret = 0; 1103 size_t cplen, blocksize; 1104 struct buffer_head *bh = NULL; 1105 struct ocfs2_extent_list *el; 1106 1107 el = &xv->xr_list; 1108 clusters = le32_to_cpu(xv->xr_clusters); 1109 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1110 blocksize = inode->i_sb->s_blocksize; 1111 1112 cpos = 0; 1113 while (cpos < clusters) { 1114 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1115 &num_clusters, el, NULL); 1116 if (ret) { 1117 mlog_errno(ret); 1118 goto out; 1119 } 1120 1121 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1122 /* Copy ocfs2_xattr_value */ 1123 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1124 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1125 &bh, NULL); 1126 if (ret) { 1127 mlog_errno(ret); 1128 goto out; 1129 } 1130 1131 cplen = len >= blocksize ? blocksize : len; 1132 memcpy(buffer, bh->b_data, cplen); 1133 len -= cplen; 1134 buffer += cplen; 1135 1136 brelse(bh); 1137 bh = NULL; 1138 if (len == 0) 1139 break; 1140 } 1141 cpos += num_clusters; 1142 } 1143 out: 1144 return ret; 1145 } 1146 1147 static int ocfs2_xattr_ibody_get(struct inode *inode, 1148 int name_index, 1149 const char *name, 1150 void *buffer, 1151 size_t buffer_size, 1152 struct ocfs2_xattr_search *xs) 1153 { 1154 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1155 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1156 struct ocfs2_xattr_value_root *xv; 1157 size_t size; 1158 int ret = 0; 1159 1160 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1161 return -ENODATA; 1162 1163 xs->end = (void *)di + inode->i_sb->s_blocksize; 1164 xs->header = (struct ocfs2_xattr_header *) 1165 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1166 xs->base = (void *)xs->header; 1167 xs->here = xs->header->xh_entries; 1168 1169 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1170 if (ret) 1171 return ret; 1172 size = le64_to_cpu(xs->here->xe_value_size); 1173 if (buffer) { 1174 if (size > buffer_size) 1175 return -ERANGE; 1176 if (ocfs2_xattr_is_local(xs->here)) { 1177 memcpy(buffer, (void *)xs->base + 1178 le16_to_cpu(xs->here->xe_name_offset) + 1179 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1180 } else { 1181 xv = (struct ocfs2_xattr_value_root *) 1182 (xs->base + le16_to_cpu( 1183 xs->here->xe_name_offset) + 1184 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1185 ret = ocfs2_xattr_get_value_outside(inode, xv, 1186 buffer, size); 1187 if (ret < 0) { 1188 mlog_errno(ret); 1189 return ret; 1190 } 1191 } 1192 } 1193 1194 return size; 1195 } 1196 1197 static int ocfs2_xattr_block_get(struct inode *inode, 1198 int name_index, 1199 const char *name, 1200 void *buffer, 1201 size_t buffer_size, 1202 struct ocfs2_xattr_search *xs) 1203 { 1204 struct ocfs2_xattr_block *xb; 1205 struct ocfs2_xattr_value_root *xv; 1206 size_t size; 1207 int ret = -ENODATA, name_offset, name_len, i; 1208 int block_off; 1209 1210 xs->bucket = ocfs2_xattr_bucket_new(inode); 1211 if (!xs->bucket) { 1212 ret = -ENOMEM; 1213 mlog_errno(ret); 1214 goto cleanup; 1215 } 1216 1217 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1218 if (ret) { 1219 mlog_errno(ret); 1220 goto cleanup; 1221 } 1222 1223 if (xs->not_found) { 1224 ret = -ENODATA; 1225 goto cleanup; 1226 } 1227 1228 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1229 size = le64_to_cpu(xs->here->xe_value_size); 1230 if (buffer) { 1231 ret = -ERANGE; 1232 if (size > buffer_size) 1233 goto cleanup; 1234 1235 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1236 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1237 i = xs->here - xs->header->xh_entries; 1238 1239 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1240 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1241 bucket_xh(xs->bucket), 1242 i, 1243 &block_off, 1244 &name_offset); 1245 if (ret) { 1246 mlog_errno(ret); 1247 goto cleanup; 1248 } 1249 xs->base = bucket_block(xs->bucket, block_off); 1250 } 1251 if (ocfs2_xattr_is_local(xs->here)) { 1252 memcpy(buffer, (void *)xs->base + 1253 name_offset + name_len, size); 1254 } else { 1255 xv = (struct ocfs2_xattr_value_root *) 1256 (xs->base + name_offset + name_len); 1257 ret = ocfs2_xattr_get_value_outside(inode, xv, 1258 buffer, size); 1259 if (ret < 0) { 1260 mlog_errno(ret); 1261 goto cleanup; 1262 } 1263 } 1264 } 1265 ret = size; 1266 cleanup: 1267 ocfs2_xattr_bucket_free(xs->bucket); 1268 1269 brelse(xs->xattr_bh); 1270 xs->xattr_bh = NULL; 1271 return ret; 1272 } 1273 1274 int ocfs2_xattr_get_nolock(struct inode *inode, 1275 struct buffer_head *di_bh, 1276 int name_index, 1277 const char *name, 1278 void *buffer, 1279 size_t buffer_size) 1280 { 1281 int ret; 1282 struct ocfs2_dinode *di = NULL; 1283 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1284 struct ocfs2_xattr_search xis = { 1285 .not_found = -ENODATA, 1286 }; 1287 struct ocfs2_xattr_search xbs = { 1288 .not_found = -ENODATA, 1289 }; 1290 1291 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1292 return -EOPNOTSUPP; 1293 1294 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1295 return -ENODATA; 1296 1297 xis.inode_bh = xbs.inode_bh = di_bh; 1298 di = (struct ocfs2_dinode *)di_bh->b_data; 1299 1300 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1301 buffer_size, &xis); 1302 if (ret == -ENODATA && di->i_xattr_loc) 1303 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1304 buffer_size, &xbs); 1305 1306 return ret; 1307 } 1308 1309 /* ocfs2_xattr_get() 1310 * 1311 * Copy an extended attribute into the buffer provided. 1312 * Buffer is NULL to compute the size of buffer required. 1313 */ 1314 static int ocfs2_xattr_get(struct inode *inode, 1315 int name_index, 1316 const char *name, 1317 void *buffer, 1318 size_t buffer_size) 1319 { 1320 int ret, had_lock; 1321 struct buffer_head *di_bh = NULL; 1322 struct ocfs2_lock_holder oh; 1323 1324 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1325 if (had_lock < 0) { 1326 mlog_errno(had_lock); 1327 return had_lock; 1328 } 1329 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1330 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1331 name, buffer, buffer_size); 1332 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1333 1334 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1335 1336 brelse(di_bh); 1337 1338 return ret; 1339 } 1340 1341 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1342 handle_t *handle, 1343 struct ocfs2_xattr_value_buf *vb, 1344 const void *value, 1345 int value_len) 1346 { 1347 int ret = 0, i, cp_len; 1348 u16 blocksize = inode->i_sb->s_blocksize; 1349 u32 p_cluster, num_clusters; 1350 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1351 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1352 u64 blkno; 1353 struct buffer_head *bh = NULL; 1354 unsigned int ext_flags; 1355 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1356 1357 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1358 1359 while (cpos < clusters) { 1360 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1361 &num_clusters, &xv->xr_list, 1362 &ext_flags); 1363 if (ret) { 1364 mlog_errno(ret); 1365 goto out; 1366 } 1367 1368 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1369 1370 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1371 1372 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1373 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1374 &bh, NULL); 1375 if (ret) { 1376 mlog_errno(ret); 1377 goto out; 1378 } 1379 1380 ret = ocfs2_journal_access(handle, 1381 INODE_CACHE(inode), 1382 bh, 1383 OCFS2_JOURNAL_ACCESS_WRITE); 1384 if (ret < 0) { 1385 mlog_errno(ret); 1386 goto out; 1387 } 1388 1389 cp_len = value_len > blocksize ? blocksize : value_len; 1390 memcpy(bh->b_data, value, cp_len); 1391 value_len -= cp_len; 1392 value += cp_len; 1393 if (cp_len < blocksize) 1394 memset(bh->b_data + cp_len, 0, 1395 blocksize - cp_len); 1396 1397 ocfs2_journal_dirty(handle, bh); 1398 brelse(bh); 1399 bh = NULL; 1400 1401 /* 1402 * XXX: do we need to empty all the following 1403 * blocks in this cluster? 1404 */ 1405 if (!value_len) 1406 break; 1407 } 1408 cpos += num_clusters; 1409 } 1410 out: 1411 brelse(bh); 1412 1413 return ret; 1414 } 1415 1416 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1417 int num_entries) 1418 { 1419 int free_space; 1420 1421 if (!needed_space) 1422 return 0; 1423 1424 free_space = free_start - 1425 sizeof(struct ocfs2_xattr_header) - 1426 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1427 OCFS2_XATTR_HEADER_GAP; 1428 if (free_space < 0) 1429 return -EIO; 1430 if (free_space < needed_space) 1431 return -ENOSPC; 1432 1433 return 0; 1434 } 1435 1436 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1437 int type) 1438 { 1439 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1440 } 1441 1442 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1443 { 1444 loc->xl_ops->xlo_journal_dirty(handle, loc); 1445 } 1446 1447 /* Give a pointer into the storage for the given offset */ 1448 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1449 { 1450 BUG_ON(offset >= loc->xl_size); 1451 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1452 } 1453 1454 /* 1455 * Wipe the name+value pair and allow the storage to reclaim it. This 1456 * must be followed by either removal of the entry or a call to 1457 * ocfs2_xa_add_namevalue(). 1458 */ 1459 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1460 { 1461 loc->xl_ops->xlo_wipe_namevalue(loc); 1462 } 1463 1464 /* 1465 * Find lowest offset to a name+value pair. This is the start of our 1466 * downward-growing free space. 1467 */ 1468 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1469 { 1470 return loc->xl_ops->xlo_get_free_start(loc); 1471 } 1472 1473 /* Can we reuse loc->xl_entry for xi? */ 1474 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1475 struct ocfs2_xattr_info *xi) 1476 { 1477 return loc->xl_ops->xlo_can_reuse(loc, xi); 1478 } 1479 1480 /* How much free space is needed to set the new value */ 1481 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1482 struct ocfs2_xattr_info *xi) 1483 { 1484 return loc->xl_ops->xlo_check_space(loc, xi); 1485 } 1486 1487 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1488 { 1489 loc->xl_ops->xlo_add_entry(loc, name_hash); 1490 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1491 /* 1492 * We can't leave the new entry's xe_name_offset at zero or 1493 * add_namevalue() will go nuts. We set it to the size of our 1494 * storage so that it can never be less than any other entry. 1495 */ 1496 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1497 } 1498 1499 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1500 struct ocfs2_xattr_info *xi) 1501 { 1502 int size = namevalue_size_xi(xi); 1503 int nameval_offset; 1504 char *nameval_buf; 1505 1506 loc->xl_ops->xlo_add_namevalue(loc, size); 1507 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1508 loc->xl_entry->xe_name_len = xi->xi_name_len; 1509 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1510 ocfs2_xattr_set_local(loc->xl_entry, 1511 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1512 1513 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1514 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1515 memset(nameval_buf, 0, size); 1516 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1517 } 1518 1519 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1520 struct ocfs2_xattr_value_buf *vb) 1521 { 1522 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1523 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1524 1525 /* Value bufs are for value trees */ 1526 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1527 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1528 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1529 1530 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1531 vb->vb_xv = 1532 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1533 nameval_offset + 1534 name_size); 1535 } 1536 1537 static int ocfs2_xa_block_journal_access(handle_t *handle, 1538 struct ocfs2_xa_loc *loc, int type) 1539 { 1540 struct buffer_head *bh = loc->xl_storage; 1541 ocfs2_journal_access_func access; 1542 1543 if (loc->xl_size == (bh->b_size - 1544 offsetof(struct ocfs2_xattr_block, 1545 xb_attrs.xb_header))) 1546 access = ocfs2_journal_access_xb; 1547 else 1548 access = ocfs2_journal_access_di; 1549 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1550 } 1551 1552 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1553 struct ocfs2_xa_loc *loc) 1554 { 1555 struct buffer_head *bh = loc->xl_storage; 1556 1557 ocfs2_journal_dirty(handle, bh); 1558 } 1559 1560 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1561 int offset) 1562 { 1563 return (char *)loc->xl_header + offset; 1564 } 1565 1566 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1567 struct ocfs2_xattr_info *xi) 1568 { 1569 /* 1570 * Block storage is strict. If the sizes aren't exact, we will 1571 * remove the old one and reinsert the new. 1572 */ 1573 return namevalue_size_xe(loc->xl_entry) == 1574 namevalue_size_xi(xi); 1575 } 1576 1577 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1578 { 1579 struct ocfs2_xattr_header *xh = loc->xl_header; 1580 int i, count = le16_to_cpu(xh->xh_count); 1581 int offset, free_start = loc->xl_size; 1582 1583 for (i = 0; i < count; i++) { 1584 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1585 if (offset < free_start) 1586 free_start = offset; 1587 } 1588 1589 return free_start; 1590 } 1591 1592 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1593 struct ocfs2_xattr_info *xi) 1594 { 1595 int count = le16_to_cpu(loc->xl_header->xh_count); 1596 int free_start = ocfs2_xa_get_free_start(loc); 1597 int needed_space = ocfs2_xi_entry_usage(xi); 1598 1599 /* 1600 * Block storage will reclaim the original entry before inserting 1601 * the new value, so we only need the difference. If the new 1602 * entry is smaller than the old one, we don't need anything. 1603 */ 1604 if (loc->xl_entry) { 1605 /* Don't need space if we're reusing! */ 1606 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1607 needed_space = 0; 1608 else 1609 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1610 } 1611 if (needed_space < 0) 1612 needed_space = 0; 1613 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1614 } 1615 1616 /* 1617 * Block storage for xattrs keeps the name+value pairs compacted. When 1618 * we remove one, we have to shift any that preceded it towards the end. 1619 */ 1620 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1621 { 1622 int i, offset; 1623 int namevalue_offset, first_namevalue_offset, namevalue_size; 1624 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1625 struct ocfs2_xattr_header *xh = loc->xl_header; 1626 int count = le16_to_cpu(xh->xh_count); 1627 1628 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1629 namevalue_size = namevalue_size_xe(entry); 1630 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1631 1632 /* Shift the name+value pairs */ 1633 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1634 (char *)xh + first_namevalue_offset, 1635 namevalue_offset - first_namevalue_offset); 1636 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1637 1638 /* Now tell xh->xh_entries about it */ 1639 for (i = 0; i < count; i++) { 1640 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1641 if (offset <= namevalue_offset) 1642 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1643 namevalue_size); 1644 } 1645 1646 /* 1647 * Note that we don't update xh_free_start or xh_name_value_len 1648 * because they're not used in block-stored xattrs. 1649 */ 1650 } 1651 1652 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1653 { 1654 int count = le16_to_cpu(loc->xl_header->xh_count); 1655 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1656 le16_add_cpu(&loc->xl_header->xh_count, 1); 1657 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1658 } 1659 1660 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1661 { 1662 int free_start = ocfs2_xa_get_free_start(loc); 1663 1664 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1665 } 1666 1667 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1668 struct ocfs2_xattr_value_buf *vb) 1669 { 1670 struct buffer_head *bh = loc->xl_storage; 1671 1672 if (loc->xl_size == (bh->b_size - 1673 offsetof(struct ocfs2_xattr_block, 1674 xb_attrs.xb_header))) 1675 vb->vb_access = ocfs2_journal_access_xb; 1676 else 1677 vb->vb_access = ocfs2_journal_access_di; 1678 vb->vb_bh = bh; 1679 } 1680 1681 /* 1682 * Operations for xattrs stored in blocks. This includes inline inode 1683 * storage and unindexed ocfs2_xattr_blocks. 1684 */ 1685 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1686 .xlo_journal_access = ocfs2_xa_block_journal_access, 1687 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1688 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1689 .xlo_check_space = ocfs2_xa_block_check_space, 1690 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1691 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1692 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1693 .xlo_add_entry = ocfs2_xa_block_add_entry, 1694 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1695 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1696 }; 1697 1698 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1699 struct ocfs2_xa_loc *loc, int type) 1700 { 1701 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1702 1703 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1704 } 1705 1706 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1707 struct ocfs2_xa_loc *loc) 1708 { 1709 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1710 1711 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1712 } 1713 1714 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1715 int offset) 1716 { 1717 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1718 int block, block_offset; 1719 1720 /* The header is at the front of the bucket */ 1721 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1722 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1723 1724 return bucket_block(bucket, block) + block_offset; 1725 } 1726 1727 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1728 struct ocfs2_xattr_info *xi) 1729 { 1730 return namevalue_size_xe(loc->xl_entry) >= 1731 namevalue_size_xi(xi); 1732 } 1733 1734 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1735 { 1736 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1737 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1738 } 1739 1740 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1741 int free_start, int size) 1742 { 1743 /* 1744 * We need to make sure that the name+value pair fits within 1745 * one block. 1746 */ 1747 if (((free_start - size) >> sb->s_blocksize_bits) != 1748 ((free_start - 1) >> sb->s_blocksize_bits)) 1749 free_start -= free_start % sb->s_blocksize; 1750 1751 return free_start; 1752 } 1753 1754 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1755 struct ocfs2_xattr_info *xi) 1756 { 1757 int rc; 1758 int count = le16_to_cpu(loc->xl_header->xh_count); 1759 int free_start = ocfs2_xa_get_free_start(loc); 1760 int needed_space = ocfs2_xi_entry_usage(xi); 1761 int size = namevalue_size_xi(xi); 1762 struct super_block *sb = loc->xl_inode->i_sb; 1763 1764 /* 1765 * Bucket storage does not reclaim name+value pairs it cannot 1766 * reuse. They live as holes until the bucket fills, and then 1767 * the bucket is defragmented. However, the bucket can reclaim 1768 * the ocfs2_xattr_entry. 1769 */ 1770 if (loc->xl_entry) { 1771 /* Don't need space if we're reusing! */ 1772 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1773 needed_space = 0; 1774 else 1775 needed_space -= sizeof(struct ocfs2_xattr_entry); 1776 } 1777 BUG_ON(needed_space < 0); 1778 1779 if (free_start < size) { 1780 if (needed_space) 1781 return -ENOSPC; 1782 } else { 1783 /* 1784 * First we check if it would fit in the first place. 1785 * Below, we align the free start to a block. This may 1786 * slide us below the minimum gap. By checking unaligned 1787 * first, we avoid that error. 1788 */ 1789 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1790 count); 1791 if (rc) 1792 return rc; 1793 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1794 size); 1795 } 1796 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1797 } 1798 1799 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1800 { 1801 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1802 -namevalue_size_xe(loc->xl_entry)); 1803 } 1804 1805 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1806 { 1807 struct ocfs2_xattr_header *xh = loc->xl_header; 1808 int count = le16_to_cpu(xh->xh_count); 1809 int low = 0, high = count - 1, tmp; 1810 struct ocfs2_xattr_entry *tmp_xe; 1811 1812 /* 1813 * We keep buckets sorted by name_hash, so we need to find 1814 * our insert place. 1815 */ 1816 while (low <= high && count) { 1817 tmp = (low + high) / 2; 1818 tmp_xe = &xh->xh_entries[tmp]; 1819 1820 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1821 low = tmp + 1; 1822 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1823 high = tmp - 1; 1824 else { 1825 low = tmp; 1826 break; 1827 } 1828 } 1829 1830 if (low != count) 1831 memmove(&xh->xh_entries[low + 1], 1832 &xh->xh_entries[low], 1833 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1834 1835 le16_add_cpu(&xh->xh_count, 1); 1836 loc->xl_entry = &xh->xh_entries[low]; 1837 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1838 } 1839 1840 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1841 { 1842 int free_start = ocfs2_xa_get_free_start(loc); 1843 struct ocfs2_xattr_header *xh = loc->xl_header; 1844 struct super_block *sb = loc->xl_inode->i_sb; 1845 int nameval_offset; 1846 1847 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1848 nameval_offset = free_start - size; 1849 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1850 xh->xh_free_start = cpu_to_le16(nameval_offset); 1851 le16_add_cpu(&xh->xh_name_value_len, size); 1852 1853 } 1854 1855 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1856 struct ocfs2_xattr_value_buf *vb) 1857 { 1858 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1859 struct super_block *sb = loc->xl_inode->i_sb; 1860 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1861 int size = namevalue_size_xe(loc->xl_entry); 1862 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1863 1864 /* Values are not allowed to straddle block boundaries */ 1865 BUG_ON(block_offset != 1866 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1867 /* We expect the bucket to be filled in */ 1868 BUG_ON(!bucket->bu_bhs[block_offset]); 1869 1870 vb->vb_access = ocfs2_journal_access; 1871 vb->vb_bh = bucket->bu_bhs[block_offset]; 1872 } 1873 1874 /* Operations for xattrs stored in buckets. */ 1875 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1876 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1877 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1878 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1879 .xlo_check_space = ocfs2_xa_bucket_check_space, 1880 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1881 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1882 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1883 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1884 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1885 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1886 }; 1887 1888 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1889 { 1890 struct ocfs2_xattr_value_buf vb; 1891 1892 if (ocfs2_xattr_is_local(loc->xl_entry)) 1893 return 0; 1894 1895 ocfs2_xa_fill_value_buf(loc, &vb); 1896 return le32_to_cpu(vb.vb_xv->xr_clusters); 1897 } 1898 1899 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1900 struct ocfs2_xattr_set_ctxt *ctxt) 1901 { 1902 int trunc_rc, access_rc; 1903 struct ocfs2_xattr_value_buf vb; 1904 1905 ocfs2_xa_fill_value_buf(loc, &vb); 1906 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1907 ctxt); 1908 1909 /* 1910 * The caller of ocfs2_xa_value_truncate() has already called 1911 * ocfs2_xa_journal_access on the loc. However, The truncate code 1912 * calls ocfs2_extend_trans(). This may commit the previous 1913 * transaction and open a new one. If this is a bucket, truncate 1914 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1915 * the caller is expecting to dirty the entire bucket. So we must 1916 * reset the journal work. We do this even if truncate has failed, 1917 * as it could have failed after committing the extend. 1918 */ 1919 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1920 OCFS2_JOURNAL_ACCESS_WRITE); 1921 1922 /* Errors in truncate take precedence */ 1923 return trunc_rc ? trunc_rc : access_rc; 1924 } 1925 1926 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1927 { 1928 int index, count; 1929 struct ocfs2_xattr_header *xh = loc->xl_header; 1930 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1931 1932 ocfs2_xa_wipe_namevalue(loc); 1933 loc->xl_entry = NULL; 1934 1935 le16_add_cpu(&xh->xh_count, -1); 1936 count = le16_to_cpu(xh->xh_count); 1937 1938 /* 1939 * Only zero out the entry if there are more remaining. This is 1940 * important for an empty bucket, as it keeps track of the 1941 * bucket's hash value. It doesn't hurt empty block storage. 1942 */ 1943 if (count) { 1944 index = ((char *)entry - (char *)&xh->xh_entries) / 1945 sizeof(struct ocfs2_xattr_entry); 1946 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1947 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1948 memset(&xh->xh_entries[count], 0, 1949 sizeof(struct ocfs2_xattr_entry)); 1950 } 1951 } 1952 1953 /* 1954 * If we have a problem adjusting the size of an external value during 1955 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1956 * in an intermediate state. For example, the value may be partially 1957 * truncated. 1958 * 1959 * If the value tree hasn't changed, the extend/truncate went nowhere. 1960 * We have nothing to do. The caller can treat it as a straight error. 1961 * 1962 * If the value tree got partially truncated, we now have a corrupted 1963 * extended attribute. We're going to wipe its entry and leak the 1964 * clusters. Better to leak some storage than leave a corrupt entry. 1965 * 1966 * If the value tree grew, it obviously didn't grow enough for the 1967 * new entry. We're not going to try and reclaim those clusters either. 1968 * If there was already an external value there (orig_clusters != 0), 1969 * the new clusters are attached safely and we can just leave the old 1970 * value in place. If there was no external value there, we remove 1971 * the entry. 1972 * 1973 * This way, the xattr block we store in the journal will be consistent. 1974 * If the size change broke because of the journal, no changes will hit 1975 * disk anyway. 1976 */ 1977 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1978 const char *what, 1979 unsigned int orig_clusters) 1980 { 1981 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1982 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1983 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1984 1985 if (new_clusters < orig_clusters) { 1986 mlog(ML_ERROR, 1987 "Partial truncate while %s xattr %.*s. Leaking " 1988 "%u clusters and removing the entry\n", 1989 what, loc->xl_entry->xe_name_len, nameval_buf, 1990 orig_clusters - new_clusters); 1991 ocfs2_xa_remove_entry(loc); 1992 } else if (!orig_clusters) { 1993 mlog(ML_ERROR, 1994 "Unable to allocate an external value for xattr " 1995 "%.*s safely. Leaking %u clusters and removing the " 1996 "entry\n", 1997 loc->xl_entry->xe_name_len, nameval_buf, 1998 new_clusters - orig_clusters); 1999 ocfs2_xa_remove_entry(loc); 2000 } else if (new_clusters > orig_clusters) 2001 mlog(ML_ERROR, 2002 "Unable to grow xattr %.*s safely. %u new clusters " 2003 "have been added, but the value will not be " 2004 "modified\n", 2005 loc->xl_entry->xe_name_len, nameval_buf, 2006 new_clusters - orig_clusters); 2007 } 2008 2009 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2010 struct ocfs2_xattr_set_ctxt *ctxt) 2011 { 2012 int rc = 0; 2013 unsigned int orig_clusters; 2014 2015 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2016 orig_clusters = ocfs2_xa_value_clusters(loc); 2017 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2018 if (rc) { 2019 mlog_errno(rc); 2020 /* 2021 * Since this is remove, we can return 0 if 2022 * ocfs2_xa_cleanup_value_truncate() is going to 2023 * wipe the entry anyway. So we check the 2024 * cluster count as well. 2025 */ 2026 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2027 rc = 0; 2028 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2029 orig_clusters); 2030 if (rc) 2031 goto out; 2032 } 2033 } 2034 2035 ocfs2_xa_remove_entry(loc); 2036 2037 out: 2038 return rc; 2039 } 2040 2041 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2042 { 2043 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2044 char *nameval_buf; 2045 2046 nameval_buf = ocfs2_xa_offset_pointer(loc, 2047 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2048 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2049 } 2050 2051 /* 2052 * Take an existing entry and make it ready for the new value. This 2053 * won't allocate space, but it may free space. It should be ready for 2054 * ocfs2_xa_prepare_entry() to finish the work. 2055 */ 2056 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2057 struct ocfs2_xattr_info *xi, 2058 struct ocfs2_xattr_set_ctxt *ctxt) 2059 { 2060 int rc = 0; 2061 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2062 unsigned int orig_clusters; 2063 char *nameval_buf; 2064 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2065 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2066 2067 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2068 name_size); 2069 2070 nameval_buf = ocfs2_xa_offset_pointer(loc, 2071 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2072 if (xe_local) { 2073 memset(nameval_buf + name_size, 0, 2074 namevalue_size_xe(loc->xl_entry) - name_size); 2075 if (!xi_local) 2076 ocfs2_xa_install_value_root(loc); 2077 } else { 2078 orig_clusters = ocfs2_xa_value_clusters(loc); 2079 if (xi_local) { 2080 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2081 if (rc < 0) 2082 mlog_errno(rc); 2083 else 2084 memset(nameval_buf + name_size, 0, 2085 namevalue_size_xe(loc->xl_entry) - 2086 name_size); 2087 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2088 xi->xi_value_len) { 2089 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2090 ctxt); 2091 if (rc < 0) 2092 mlog_errno(rc); 2093 } 2094 2095 if (rc) { 2096 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2097 orig_clusters); 2098 goto out; 2099 } 2100 } 2101 2102 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2103 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2104 2105 out: 2106 return rc; 2107 } 2108 2109 /* 2110 * Prepares loc->xl_entry to receive the new xattr. This includes 2111 * properly setting up the name+value pair region. If loc->xl_entry 2112 * already exists, it will take care of modifying it appropriately. 2113 * 2114 * Note that this modifies the data. You did journal_access already, 2115 * right? 2116 */ 2117 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2118 struct ocfs2_xattr_info *xi, 2119 u32 name_hash, 2120 struct ocfs2_xattr_set_ctxt *ctxt) 2121 { 2122 int rc = 0; 2123 unsigned int orig_clusters; 2124 __le64 orig_value_size = 0; 2125 2126 rc = ocfs2_xa_check_space(loc, xi); 2127 if (rc) 2128 goto out; 2129 2130 if (loc->xl_entry) { 2131 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2132 orig_value_size = loc->xl_entry->xe_value_size; 2133 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2134 if (rc) 2135 goto out; 2136 goto alloc_value; 2137 } 2138 2139 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2140 orig_clusters = ocfs2_xa_value_clusters(loc); 2141 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2142 if (rc) { 2143 mlog_errno(rc); 2144 ocfs2_xa_cleanup_value_truncate(loc, 2145 "overwriting", 2146 orig_clusters); 2147 goto out; 2148 } 2149 } 2150 ocfs2_xa_wipe_namevalue(loc); 2151 } else 2152 ocfs2_xa_add_entry(loc, name_hash); 2153 2154 /* 2155 * If we get here, we have a blank entry. Fill it. We grow our 2156 * name+value pair back from the end. 2157 */ 2158 ocfs2_xa_add_namevalue(loc, xi); 2159 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2160 ocfs2_xa_install_value_root(loc); 2161 2162 alloc_value: 2163 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2164 orig_clusters = ocfs2_xa_value_clusters(loc); 2165 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2166 if (rc < 0) { 2167 ctxt->set_abort = 1; 2168 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2169 orig_clusters); 2170 /* 2171 * If we were growing an existing value, 2172 * ocfs2_xa_cleanup_value_truncate() won't remove 2173 * the entry. We need to restore the original value 2174 * size. 2175 */ 2176 if (loc->xl_entry) { 2177 BUG_ON(!orig_value_size); 2178 loc->xl_entry->xe_value_size = orig_value_size; 2179 } 2180 mlog_errno(rc); 2181 } 2182 } 2183 2184 out: 2185 return rc; 2186 } 2187 2188 /* 2189 * Store the value portion of the name+value pair. This will skip 2190 * values that are stored externally. Their tree roots were set up 2191 * by ocfs2_xa_prepare_entry(). 2192 */ 2193 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2194 struct ocfs2_xattr_info *xi, 2195 struct ocfs2_xattr_set_ctxt *ctxt) 2196 { 2197 int rc = 0; 2198 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2199 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2200 char *nameval_buf; 2201 struct ocfs2_xattr_value_buf vb; 2202 2203 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2204 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2205 ocfs2_xa_fill_value_buf(loc, &vb); 2206 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2207 ctxt->handle, &vb, 2208 xi->xi_value, 2209 xi->xi_value_len); 2210 } else 2211 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2212 2213 return rc; 2214 } 2215 2216 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2217 struct ocfs2_xattr_info *xi, 2218 struct ocfs2_xattr_set_ctxt *ctxt) 2219 { 2220 int ret; 2221 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2222 xi->xi_name_len); 2223 2224 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2225 OCFS2_JOURNAL_ACCESS_WRITE); 2226 if (ret) { 2227 mlog_errno(ret); 2228 goto out; 2229 } 2230 2231 /* 2232 * From here on out, everything is going to modify the buffer a 2233 * little. Errors are going to leave the xattr header in a 2234 * sane state. Thus, even with errors we dirty the sucker. 2235 */ 2236 2237 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2238 if (!xi->xi_value) { 2239 ret = ocfs2_xa_remove(loc, ctxt); 2240 goto out_dirty; 2241 } 2242 2243 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2244 if (ret) { 2245 if (ret != -ENOSPC) 2246 mlog_errno(ret); 2247 goto out_dirty; 2248 } 2249 2250 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2251 if (ret) 2252 mlog_errno(ret); 2253 2254 out_dirty: 2255 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2256 2257 out: 2258 return ret; 2259 } 2260 2261 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2262 struct inode *inode, 2263 struct buffer_head *bh, 2264 struct ocfs2_xattr_entry *entry) 2265 { 2266 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2267 2268 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2269 2270 loc->xl_inode = inode; 2271 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2272 loc->xl_storage = bh; 2273 loc->xl_entry = entry; 2274 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2275 loc->xl_header = 2276 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2277 loc->xl_size); 2278 } 2279 2280 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2281 struct inode *inode, 2282 struct buffer_head *bh, 2283 struct ocfs2_xattr_entry *entry) 2284 { 2285 struct ocfs2_xattr_block *xb = 2286 (struct ocfs2_xattr_block *)bh->b_data; 2287 2288 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2289 2290 loc->xl_inode = inode; 2291 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2292 loc->xl_storage = bh; 2293 loc->xl_header = &(xb->xb_attrs.xb_header); 2294 loc->xl_entry = entry; 2295 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2296 xb_attrs.xb_header); 2297 } 2298 2299 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2300 struct ocfs2_xattr_bucket *bucket, 2301 struct ocfs2_xattr_entry *entry) 2302 { 2303 loc->xl_inode = bucket->bu_inode; 2304 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2305 loc->xl_storage = bucket; 2306 loc->xl_header = bucket_xh(bucket); 2307 loc->xl_entry = entry; 2308 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2309 } 2310 2311 /* 2312 * In xattr remove, if it is stored outside and refcounted, we may have 2313 * the chance to split the refcount tree. So need the allocators. 2314 */ 2315 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2316 struct ocfs2_xattr_value_root *xv, 2317 struct ocfs2_caching_info *ref_ci, 2318 struct buffer_head *ref_root_bh, 2319 struct ocfs2_alloc_context **meta_ac, 2320 int *ref_credits) 2321 { 2322 int ret, meta_add = 0; 2323 u32 p_cluster, num_clusters; 2324 unsigned int ext_flags; 2325 2326 *ref_credits = 0; 2327 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2328 &num_clusters, 2329 &xv->xr_list, 2330 &ext_flags); 2331 if (ret) { 2332 mlog_errno(ret); 2333 goto out; 2334 } 2335 2336 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2337 goto out; 2338 2339 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2340 ref_root_bh, xv, 2341 &meta_add, ref_credits); 2342 if (ret) { 2343 mlog_errno(ret); 2344 goto out; 2345 } 2346 2347 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2348 meta_add, meta_ac); 2349 if (ret) 2350 mlog_errno(ret); 2351 2352 out: 2353 return ret; 2354 } 2355 2356 static int ocfs2_remove_value_outside(struct inode*inode, 2357 struct ocfs2_xattr_value_buf *vb, 2358 struct ocfs2_xattr_header *header, 2359 struct ocfs2_caching_info *ref_ci, 2360 struct buffer_head *ref_root_bh) 2361 { 2362 int ret = 0, i, ref_credits; 2363 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2364 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2365 void *val; 2366 2367 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2368 2369 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2370 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2371 2372 if (ocfs2_xattr_is_local(entry)) 2373 continue; 2374 2375 val = (void *)header + 2376 le16_to_cpu(entry->xe_name_offset); 2377 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2378 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2379 2380 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2381 ref_ci, ref_root_bh, 2382 &ctxt.meta_ac, 2383 &ref_credits); 2384 2385 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2386 ocfs2_remove_extent_credits(osb->sb)); 2387 if (IS_ERR(ctxt.handle)) { 2388 ret = PTR_ERR(ctxt.handle); 2389 mlog_errno(ret); 2390 break; 2391 } 2392 2393 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2394 2395 ocfs2_commit_trans(osb, ctxt.handle); 2396 if (ctxt.meta_ac) { 2397 ocfs2_free_alloc_context(ctxt.meta_ac); 2398 ctxt.meta_ac = NULL; 2399 } 2400 2401 if (ret < 0) { 2402 mlog_errno(ret); 2403 break; 2404 } 2405 2406 } 2407 2408 if (ctxt.meta_ac) 2409 ocfs2_free_alloc_context(ctxt.meta_ac); 2410 ocfs2_schedule_truncate_log_flush(osb, 1); 2411 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2412 return ret; 2413 } 2414 2415 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2416 struct buffer_head *di_bh, 2417 struct ocfs2_caching_info *ref_ci, 2418 struct buffer_head *ref_root_bh) 2419 { 2420 2421 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2422 struct ocfs2_xattr_header *header; 2423 int ret; 2424 struct ocfs2_xattr_value_buf vb = { 2425 .vb_bh = di_bh, 2426 .vb_access = ocfs2_journal_access_di, 2427 }; 2428 2429 header = (struct ocfs2_xattr_header *) 2430 ((void *)di + inode->i_sb->s_blocksize - 2431 le16_to_cpu(di->i_xattr_inline_size)); 2432 2433 ret = ocfs2_remove_value_outside(inode, &vb, header, 2434 ref_ci, ref_root_bh); 2435 2436 return ret; 2437 } 2438 2439 struct ocfs2_rm_xattr_bucket_para { 2440 struct ocfs2_caching_info *ref_ci; 2441 struct buffer_head *ref_root_bh; 2442 }; 2443 2444 static int ocfs2_xattr_block_remove(struct inode *inode, 2445 struct buffer_head *blk_bh, 2446 struct ocfs2_caching_info *ref_ci, 2447 struct buffer_head *ref_root_bh) 2448 { 2449 struct ocfs2_xattr_block *xb; 2450 int ret = 0; 2451 struct ocfs2_xattr_value_buf vb = { 2452 .vb_bh = blk_bh, 2453 .vb_access = ocfs2_journal_access_xb, 2454 }; 2455 struct ocfs2_rm_xattr_bucket_para args = { 2456 .ref_ci = ref_ci, 2457 .ref_root_bh = ref_root_bh, 2458 }; 2459 2460 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2461 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2462 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2463 ret = ocfs2_remove_value_outside(inode, &vb, header, 2464 ref_ci, ref_root_bh); 2465 } else 2466 ret = ocfs2_iterate_xattr_index_block(inode, 2467 blk_bh, 2468 ocfs2_rm_xattr_cluster, 2469 &args); 2470 2471 return ret; 2472 } 2473 2474 static int ocfs2_xattr_free_block(struct inode *inode, 2475 u64 block, 2476 struct ocfs2_caching_info *ref_ci, 2477 struct buffer_head *ref_root_bh) 2478 { 2479 struct inode *xb_alloc_inode; 2480 struct buffer_head *xb_alloc_bh = NULL; 2481 struct buffer_head *blk_bh = NULL; 2482 struct ocfs2_xattr_block *xb; 2483 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2484 handle_t *handle; 2485 int ret = 0; 2486 u64 blk, bg_blkno; 2487 u16 bit; 2488 2489 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2490 if (ret < 0) { 2491 mlog_errno(ret); 2492 goto out; 2493 } 2494 2495 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2496 if (ret < 0) { 2497 mlog_errno(ret); 2498 goto out; 2499 } 2500 2501 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2502 blk = le64_to_cpu(xb->xb_blkno); 2503 bit = le16_to_cpu(xb->xb_suballoc_bit); 2504 if (xb->xb_suballoc_loc) 2505 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2506 else 2507 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2508 2509 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2510 EXTENT_ALLOC_SYSTEM_INODE, 2511 le16_to_cpu(xb->xb_suballoc_slot)); 2512 if (!xb_alloc_inode) { 2513 ret = -ENOMEM; 2514 mlog_errno(ret); 2515 goto out; 2516 } 2517 inode_lock(xb_alloc_inode); 2518 2519 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2520 if (ret < 0) { 2521 mlog_errno(ret); 2522 goto out_mutex; 2523 } 2524 2525 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2526 if (IS_ERR(handle)) { 2527 ret = PTR_ERR(handle); 2528 mlog_errno(ret); 2529 goto out_unlock; 2530 } 2531 2532 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2533 bit, bg_blkno, 1); 2534 if (ret < 0) 2535 mlog_errno(ret); 2536 2537 ocfs2_commit_trans(osb, handle); 2538 out_unlock: 2539 ocfs2_inode_unlock(xb_alloc_inode, 1); 2540 brelse(xb_alloc_bh); 2541 out_mutex: 2542 inode_unlock(xb_alloc_inode); 2543 iput(xb_alloc_inode); 2544 out: 2545 brelse(blk_bh); 2546 return ret; 2547 } 2548 2549 /* 2550 * ocfs2_xattr_remove() 2551 * 2552 * Free extended attribute resources associated with this inode. 2553 */ 2554 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2555 { 2556 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2557 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2558 struct ocfs2_refcount_tree *ref_tree = NULL; 2559 struct buffer_head *ref_root_bh = NULL; 2560 struct ocfs2_caching_info *ref_ci = NULL; 2561 handle_t *handle; 2562 int ret; 2563 2564 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2565 return 0; 2566 2567 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2568 return 0; 2569 2570 if (ocfs2_is_refcount_inode(inode)) { 2571 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2572 le64_to_cpu(di->i_refcount_loc), 2573 1, &ref_tree, &ref_root_bh); 2574 if (ret) { 2575 mlog_errno(ret); 2576 goto out; 2577 } 2578 ref_ci = &ref_tree->rf_ci; 2579 2580 } 2581 2582 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2583 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2584 ref_ci, ref_root_bh); 2585 if (ret < 0) { 2586 mlog_errno(ret); 2587 goto out; 2588 } 2589 } 2590 2591 if (di->i_xattr_loc) { 2592 ret = ocfs2_xattr_free_block(inode, 2593 le64_to_cpu(di->i_xattr_loc), 2594 ref_ci, ref_root_bh); 2595 if (ret < 0) { 2596 mlog_errno(ret); 2597 goto out; 2598 } 2599 } 2600 2601 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2602 OCFS2_INODE_UPDATE_CREDITS); 2603 if (IS_ERR(handle)) { 2604 ret = PTR_ERR(handle); 2605 mlog_errno(ret); 2606 goto out; 2607 } 2608 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2609 OCFS2_JOURNAL_ACCESS_WRITE); 2610 if (ret) { 2611 mlog_errno(ret); 2612 goto out_commit; 2613 } 2614 2615 di->i_xattr_loc = 0; 2616 2617 spin_lock(&oi->ip_lock); 2618 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2619 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2620 spin_unlock(&oi->ip_lock); 2621 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2622 2623 ocfs2_journal_dirty(handle, di_bh); 2624 out_commit: 2625 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2626 out: 2627 if (ref_tree) 2628 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2629 brelse(ref_root_bh); 2630 return ret; 2631 } 2632 2633 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2634 struct ocfs2_dinode *di) 2635 { 2636 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2637 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2638 int free; 2639 2640 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2641 return 0; 2642 2643 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2644 struct ocfs2_inline_data *idata = &di->id2.i_data; 2645 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2646 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2647 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2648 le64_to_cpu(di->i_size); 2649 } else { 2650 struct ocfs2_extent_list *el = &di->id2.i_list; 2651 free = (le16_to_cpu(el->l_count) - 2652 le16_to_cpu(el->l_next_free_rec)) * 2653 sizeof(struct ocfs2_extent_rec); 2654 } 2655 if (free >= xattrsize) 2656 return 1; 2657 2658 return 0; 2659 } 2660 2661 /* 2662 * ocfs2_xattr_ibody_find() 2663 * 2664 * Find extended attribute in inode block and 2665 * fill search info into struct ocfs2_xattr_search. 2666 */ 2667 static int ocfs2_xattr_ibody_find(struct inode *inode, 2668 int name_index, 2669 const char *name, 2670 struct ocfs2_xattr_search *xs) 2671 { 2672 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2673 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2674 int ret; 2675 int has_space = 0; 2676 2677 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2678 return 0; 2679 2680 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2681 down_read(&oi->ip_alloc_sem); 2682 has_space = ocfs2_xattr_has_space_inline(inode, di); 2683 up_read(&oi->ip_alloc_sem); 2684 if (!has_space) 2685 return 0; 2686 } 2687 2688 xs->xattr_bh = xs->inode_bh; 2689 xs->end = (void *)di + inode->i_sb->s_blocksize; 2690 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2691 xs->header = (struct ocfs2_xattr_header *) 2692 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2693 else 2694 xs->header = (struct ocfs2_xattr_header *) 2695 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2696 xs->base = (void *)xs->header; 2697 xs->here = xs->header->xh_entries; 2698 2699 /* Find the named attribute. */ 2700 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2701 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2702 if (ret && ret != -ENODATA) 2703 return ret; 2704 xs->not_found = ret; 2705 } 2706 2707 return 0; 2708 } 2709 2710 static int ocfs2_xattr_ibody_init(struct inode *inode, 2711 struct buffer_head *di_bh, 2712 struct ocfs2_xattr_set_ctxt *ctxt) 2713 { 2714 int ret; 2715 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2716 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2717 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2718 unsigned int xattrsize = osb->s_xattr_inline_size; 2719 2720 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2721 ret = -ENOSPC; 2722 goto out; 2723 } 2724 2725 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2726 OCFS2_JOURNAL_ACCESS_WRITE); 2727 if (ret) { 2728 mlog_errno(ret); 2729 goto out; 2730 } 2731 2732 /* 2733 * Adjust extent record count or inline data size 2734 * to reserve space for extended attribute. 2735 */ 2736 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2737 struct ocfs2_inline_data *idata = &di->id2.i_data; 2738 le16_add_cpu(&idata->id_count, -xattrsize); 2739 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2740 struct ocfs2_extent_list *el = &di->id2.i_list; 2741 le16_add_cpu(&el->l_count, -(xattrsize / 2742 sizeof(struct ocfs2_extent_rec))); 2743 } 2744 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2745 2746 spin_lock(&oi->ip_lock); 2747 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2748 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2749 spin_unlock(&oi->ip_lock); 2750 2751 ocfs2_journal_dirty(ctxt->handle, di_bh); 2752 2753 out: 2754 return ret; 2755 } 2756 2757 /* 2758 * ocfs2_xattr_ibody_set() 2759 * 2760 * Set, replace or remove an extended attribute into inode block. 2761 * 2762 */ 2763 static int ocfs2_xattr_ibody_set(struct inode *inode, 2764 struct ocfs2_xattr_info *xi, 2765 struct ocfs2_xattr_search *xs, 2766 struct ocfs2_xattr_set_ctxt *ctxt) 2767 { 2768 int ret; 2769 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2770 struct ocfs2_xa_loc loc; 2771 2772 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2773 return -ENOSPC; 2774 2775 down_write(&oi->ip_alloc_sem); 2776 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2777 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2778 if (ret) { 2779 if (ret != -ENOSPC) 2780 mlog_errno(ret); 2781 goto out; 2782 } 2783 } 2784 2785 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2786 xs->not_found ? NULL : xs->here); 2787 ret = ocfs2_xa_set(&loc, xi, ctxt); 2788 if (ret) { 2789 if (ret != -ENOSPC) 2790 mlog_errno(ret); 2791 goto out; 2792 } 2793 xs->here = loc.xl_entry; 2794 2795 out: 2796 up_write(&oi->ip_alloc_sem); 2797 2798 return ret; 2799 } 2800 2801 /* 2802 * ocfs2_xattr_block_find() 2803 * 2804 * Find extended attribute in external block and 2805 * fill search info into struct ocfs2_xattr_search. 2806 */ 2807 static int ocfs2_xattr_block_find(struct inode *inode, 2808 int name_index, 2809 const char *name, 2810 struct ocfs2_xattr_search *xs) 2811 { 2812 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2813 struct buffer_head *blk_bh = NULL; 2814 struct ocfs2_xattr_block *xb; 2815 int ret = 0; 2816 2817 if (!di->i_xattr_loc) 2818 return ret; 2819 2820 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2821 &blk_bh); 2822 if (ret < 0) { 2823 mlog_errno(ret); 2824 return ret; 2825 } 2826 2827 xs->xattr_bh = blk_bh; 2828 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2829 2830 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2831 xs->header = &xb->xb_attrs.xb_header; 2832 xs->base = (void *)xs->header; 2833 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2834 xs->here = xs->header->xh_entries; 2835 2836 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2837 } else 2838 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2839 name_index, 2840 name, xs); 2841 2842 if (ret && ret != -ENODATA) { 2843 xs->xattr_bh = NULL; 2844 goto cleanup; 2845 } 2846 xs->not_found = ret; 2847 return 0; 2848 cleanup: 2849 brelse(blk_bh); 2850 2851 return ret; 2852 } 2853 2854 static int ocfs2_create_xattr_block(struct inode *inode, 2855 struct buffer_head *inode_bh, 2856 struct ocfs2_xattr_set_ctxt *ctxt, 2857 int indexed, 2858 struct buffer_head **ret_bh) 2859 { 2860 int ret; 2861 u16 suballoc_bit_start; 2862 u32 num_got; 2863 u64 suballoc_loc, first_blkno; 2864 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2865 struct buffer_head *new_bh = NULL; 2866 struct ocfs2_xattr_block *xblk; 2867 2868 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2869 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2870 if (ret < 0) { 2871 mlog_errno(ret); 2872 goto end; 2873 } 2874 2875 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2876 &suballoc_loc, &suballoc_bit_start, 2877 &num_got, &first_blkno); 2878 if (ret < 0) { 2879 mlog_errno(ret); 2880 goto end; 2881 } 2882 2883 new_bh = sb_getblk(inode->i_sb, first_blkno); 2884 if (!new_bh) { 2885 ret = -ENOMEM; 2886 mlog_errno(ret); 2887 goto end; 2888 } 2889 2890 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2891 2892 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2893 new_bh, 2894 OCFS2_JOURNAL_ACCESS_CREATE); 2895 if (ret < 0) { 2896 mlog_errno(ret); 2897 goto end; 2898 } 2899 2900 /* Initialize ocfs2_xattr_block */ 2901 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2902 memset(xblk, 0, inode->i_sb->s_blocksize); 2903 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2904 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2905 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2906 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2907 xblk->xb_fs_generation = 2908 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2909 xblk->xb_blkno = cpu_to_le64(first_blkno); 2910 if (indexed) { 2911 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2912 xr->xt_clusters = cpu_to_le32(1); 2913 xr->xt_last_eb_blk = 0; 2914 xr->xt_list.l_tree_depth = 0; 2915 xr->xt_list.l_count = cpu_to_le16( 2916 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2917 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2918 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2919 } 2920 ocfs2_journal_dirty(ctxt->handle, new_bh); 2921 2922 /* Add it to the inode */ 2923 di->i_xattr_loc = cpu_to_le64(first_blkno); 2924 2925 spin_lock(&OCFS2_I(inode)->ip_lock); 2926 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2927 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2928 spin_unlock(&OCFS2_I(inode)->ip_lock); 2929 2930 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2931 2932 *ret_bh = new_bh; 2933 new_bh = NULL; 2934 2935 end: 2936 brelse(new_bh); 2937 return ret; 2938 } 2939 2940 /* 2941 * ocfs2_xattr_block_set() 2942 * 2943 * Set, replace or remove an extended attribute into external block. 2944 * 2945 */ 2946 static int ocfs2_xattr_block_set(struct inode *inode, 2947 struct ocfs2_xattr_info *xi, 2948 struct ocfs2_xattr_search *xs, 2949 struct ocfs2_xattr_set_ctxt *ctxt) 2950 { 2951 struct buffer_head *new_bh = NULL; 2952 struct ocfs2_xattr_block *xblk = NULL; 2953 int ret; 2954 struct ocfs2_xa_loc loc; 2955 2956 if (!xs->xattr_bh) { 2957 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2958 0, &new_bh); 2959 if (ret) { 2960 mlog_errno(ret); 2961 goto end; 2962 } 2963 2964 xs->xattr_bh = new_bh; 2965 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2966 xs->header = &xblk->xb_attrs.xb_header; 2967 xs->base = (void *)xs->header; 2968 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2969 xs->here = xs->header->xh_entries; 2970 } else 2971 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2972 2973 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2974 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2975 xs->not_found ? NULL : xs->here); 2976 2977 ret = ocfs2_xa_set(&loc, xi, ctxt); 2978 if (!ret) 2979 xs->here = loc.xl_entry; 2980 else if ((ret != -ENOSPC) || ctxt->set_abort) 2981 goto end; 2982 else { 2983 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2984 if (ret) 2985 goto end; 2986 } 2987 } 2988 2989 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2990 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2991 2992 end: 2993 return ret; 2994 } 2995 2996 /* Check whether the new xattr can be inserted into the inode. */ 2997 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 2998 struct ocfs2_xattr_info *xi, 2999 struct ocfs2_xattr_search *xs) 3000 { 3001 struct ocfs2_xattr_entry *last; 3002 int free, i; 3003 size_t min_offs = xs->end - xs->base; 3004 3005 if (!xs->header) 3006 return 0; 3007 3008 last = xs->header->xh_entries; 3009 3010 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3011 size_t offs = le16_to_cpu(last->xe_name_offset); 3012 if (offs < min_offs) 3013 min_offs = offs; 3014 last += 1; 3015 } 3016 3017 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3018 if (free < 0) 3019 return 0; 3020 3021 BUG_ON(!xs->not_found); 3022 3023 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3024 return 1; 3025 3026 return 0; 3027 } 3028 3029 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3030 struct ocfs2_dinode *di, 3031 struct ocfs2_xattr_info *xi, 3032 struct ocfs2_xattr_search *xis, 3033 struct ocfs2_xattr_search *xbs, 3034 int *clusters_need, 3035 int *meta_need, 3036 int *credits_need) 3037 { 3038 int ret = 0, old_in_xb = 0; 3039 int clusters_add = 0, meta_add = 0, credits = 0; 3040 struct buffer_head *bh = NULL; 3041 struct ocfs2_xattr_block *xb = NULL; 3042 struct ocfs2_xattr_entry *xe = NULL; 3043 struct ocfs2_xattr_value_root *xv = NULL; 3044 char *base = NULL; 3045 int name_offset, name_len = 0; 3046 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3047 xi->xi_value_len); 3048 u64 value_size; 3049 3050 /* 3051 * Calculate the clusters we need to write. 3052 * No matter whether we replace an old one or add a new one, 3053 * we need this for writing. 3054 */ 3055 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3056 credits += new_clusters * 3057 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3058 3059 if (xis->not_found && xbs->not_found) { 3060 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3061 3062 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3063 clusters_add += new_clusters; 3064 credits += ocfs2_calc_extend_credits(inode->i_sb, 3065 &def_xv.xv.xr_list); 3066 } 3067 3068 goto meta_guess; 3069 } 3070 3071 if (!xis->not_found) { 3072 xe = xis->here; 3073 name_offset = le16_to_cpu(xe->xe_name_offset); 3074 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3075 base = xis->base; 3076 credits += OCFS2_INODE_UPDATE_CREDITS; 3077 } else { 3078 int i, block_off = 0; 3079 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3080 xe = xbs->here; 3081 name_offset = le16_to_cpu(xe->xe_name_offset); 3082 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3083 i = xbs->here - xbs->header->xh_entries; 3084 old_in_xb = 1; 3085 3086 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3087 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3088 bucket_xh(xbs->bucket), 3089 i, &block_off, 3090 &name_offset); 3091 base = bucket_block(xbs->bucket, block_off); 3092 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3093 } else { 3094 base = xbs->base; 3095 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3096 } 3097 } 3098 3099 /* 3100 * delete a xattr doesn't need metadata and cluster allocation. 3101 * so just calculate the credits and return. 3102 * 3103 * The credits for removing the value tree will be extended 3104 * by ocfs2_remove_extent itself. 3105 */ 3106 if (!xi->xi_value) { 3107 if (!ocfs2_xattr_is_local(xe)) 3108 credits += ocfs2_remove_extent_credits(inode->i_sb); 3109 3110 goto out; 3111 } 3112 3113 /* do cluster allocation guess first. */ 3114 value_size = le64_to_cpu(xe->xe_value_size); 3115 3116 if (old_in_xb) { 3117 /* 3118 * In xattr set, we always try to set the xe in inode first, 3119 * so if it can be inserted into inode successfully, the old 3120 * one will be removed from the xattr block, and this xattr 3121 * will be inserted into inode as a new xattr in inode. 3122 */ 3123 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3124 clusters_add += new_clusters; 3125 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3126 OCFS2_INODE_UPDATE_CREDITS; 3127 if (!ocfs2_xattr_is_local(xe)) 3128 credits += ocfs2_calc_extend_credits( 3129 inode->i_sb, 3130 &def_xv.xv.xr_list); 3131 goto out; 3132 } 3133 } 3134 3135 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3136 /* the new values will be stored outside. */ 3137 u32 old_clusters = 0; 3138 3139 if (!ocfs2_xattr_is_local(xe)) { 3140 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3141 value_size); 3142 xv = (struct ocfs2_xattr_value_root *) 3143 (base + name_offset + name_len); 3144 value_size = OCFS2_XATTR_ROOT_SIZE; 3145 } else 3146 xv = &def_xv.xv; 3147 3148 if (old_clusters >= new_clusters) { 3149 credits += ocfs2_remove_extent_credits(inode->i_sb); 3150 goto out; 3151 } else { 3152 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3153 clusters_add += new_clusters - old_clusters; 3154 credits += ocfs2_calc_extend_credits(inode->i_sb, 3155 &xv->xr_list); 3156 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3157 goto out; 3158 } 3159 } else { 3160 /* 3161 * Now the new value will be stored inside. So if the new 3162 * value is smaller than the size of value root or the old 3163 * value, we don't need any allocation, otherwise we have 3164 * to guess metadata allocation. 3165 */ 3166 if ((ocfs2_xattr_is_local(xe) && 3167 (value_size >= xi->xi_value_len)) || 3168 (!ocfs2_xattr_is_local(xe) && 3169 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3170 goto out; 3171 } 3172 3173 meta_guess: 3174 /* calculate metadata allocation. */ 3175 if (di->i_xattr_loc) { 3176 if (!xbs->xattr_bh) { 3177 ret = ocfs2_read_xattr_block(inode, 3178 le64_to_cpu(di->i_xattr_loc), 3179 &bh); 3180 if (ret) { 3181 mlog_errno(ret); 3182 goto out; 3183 } 3184 3185 xb = (struct ocfs2_xattr_block *)bh->b_data; 3186 } else 3187 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3188 3189 /* 3190 * If there is already an xattr tree, good, we can calculate 3191 * like other b-trees. Otherwise we may have the chance of 3192 * create a tree, the credit calculation is borrowed from 3193 * ocfs2_calc_extend_credits with root_el = NULL. And the 3194 * new tree will be cluster based, so no meta is needed. 3195 */ 3196 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3197 struct ocfs2_extent_list *el = 3198 &xb->xb_attrs.xb_root.xt_list; 3199 meta_add += ocfs2_extend_meta_needed(el); 3200 credits += ocfs2_calc_extend_credits(inode->i_sb, 3201 el); 3202 } else 3203 credits += OCFS2_SUBALLOC_ALLOC + 1; 3204 3205 /* 3206 * This cluster will be used either for new bucket or for 3207 * new xattr block. 3208 * If the cluster size is the same as the bucket size, one 3209 * more is needed since we may need to extend the bucket 3210 * also. 3211 */ 3212 clusters_add += 1; 3213 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3214 if (OCFS2_XATTR_BUCKET_SIZE == 3215 OCFS2_SB(inode->i_sb)->s_clustersize) { 3216 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3217 clusters_add += 1; 3218 } 3219 } else { 3220 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3221 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3222 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3223 meta_add += ocfs2_extend_meta_needed(el); 3224 credits += ocfs2_calc_extend_credits(inode->i_sb, 3225 el); 3226 } else { 3227 meta_add += 1; 3228 } 3229 } 3230 out: 3231 if (clusters_need) 3232 *clusters_need = clusters_add; 3233 if (meta_need) 3234 *meta_need = meta_add; 3235 if (credits_need) 3236 *credits_need = credits; 3237 brelse(bh); 3238 return ret; 3239 } 3240 3241 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3242 struct ocfs2_dinode *di, 3243 struct ocfs2_xattr_info *xi, 3244 struct ocfs2_xattr_search *xis, 3245 struct ocfs2_xattr_search *xbs, 3246 struct ocfs2_xattr_set_ctxt *ctxt, 3247 int extra_meta, 3248 int *credits) 3249 { 3250 int clusters_add, meta_add, ret; 3251 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3252 3253 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3254 3255 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3256 3257 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3258 &clusters_add, &meta_add, credits); 3259 if (ret) { 3260 mlog_errno(ret); 3261 return ret; 3262 } 3263 3264 meta_add += extra_meta; 3265 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3266 clusters_add, *credits); 3267 3268 if (meta_add) { 3269 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3270 &ctxt->meta_ac); 3271 if (ret) { 3272 mlog_errno(ret); 3273 goto out; 3274 } 3275 } 3276 3277 if (clusters_add) { 3278 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3279 if (ret) 3280 mlog_errno(ret); 3281 } 3282 out: 3283 if (ret) { 3284 if (ctxt->meta_ac) { 3285 ocfs2_free_alloc_context(ctxt->meta_ac); 3286 ctxt->meta_ac = NULL; 3287 } 3288 3289 /* 3290 * We cannot have an error and a non null ctxt->data_ac. 3291 */ 3292 } 3293 3294 return ret; 3295 } 3296 3297 static int __ocfs2_xattr_set_handle(struct inode *inode, 3298 struct ocfs2_dinode *di, 3299 struct ocfs2_xattr_info *xi, 3300 struct ocfs2_xattr_search *xis, 3301 struct ocfs2_xattr_search *xbs, 3302 struct ocfs2_xattr_set_ctxt *ctxt) 3303 { 3304 int ret = 0, credits, old_found; 3305 3306 if (!xi->xi_value) { 3307 /* Remove existing extended attribute */ 3308 if (!xis->not_found) 3309 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3310 else if (!xbs->not_found) 3311 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3312 } else { 3313 /* We always try to set extended attribute into inode first*/ 3314 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3315 if (!ret && !xbs->not_found) { 3316 /* 3317 * If succeed and that extended attribute existing in 3318 * external block, then we will remove it. 3319 */ 3320 xi->xi_value = NULL; 3321 xi->xi_value_len = 0; 3322 3323 old_found = xis->not_found; 3324 xis->not_found = -ENODATA; 3325 ret = ocfs2_calc_xattr_set_need(inode, 3326 di, 3327 xi, 3328 xis, 3329 xbs, 3330 NULL, 3331 NULL, 3332 &credits); 3333 xis->not_found = old_found; 3334 if (ret) { 3335 mlog_errno(ret); 3336 goto out; 3337 } 3338 3339 ret = ocfs2_extend_trans(ctxt->handle, credits); 3340 if (ret) { 3341 mlog_errno(ret); 3342 goto out; 3343 } 3344 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3345 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3346 if (di->i_xattr_loc && !xbs->xattr_bh) { 3347 ret = ocfs2_xattr_block_find(inode, 3348 xi->xi_name_index, 3349 xi->xi_name, xbs); 3350 if (ret) 3351 goto out; 3352 3353 old_found = xis->not_found; 3354 xis->not_found = -ENODATA; 3355 ret = ocfs2_calc_xattr_set_need(inode, 3356 di, 3357 xi, 3358 xis, 3359 xbs, 3360 NULL, 3361 NULL, 3362 &credits); 3363 xis->not_found = old_found; 3364 if (ret) { 3365 mlog_errno(ret); 3366 goto out; 3367 } 3368 3369 ret = ocfs2_extend_trans(ctxt->handle, credits); 3370 if (ret) { 3371 mlog_errno(ret); 3372 goto out; 3373 } 3374 } 3375 /* 3376 * If no space in inode, we will set extended attribute 3377 * into external block. 3378 */ 3379 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3380 if (ret) 3381 goto out; 3382 if (!xis->not_found) { 3383 /* 3384 * If succeed and that extended attribute 3385 * existing in inode, we will remove it. 3386 */ 3387 xi->xi_value = NULL; 3388 xi->xi_value_len = 0; 3389 xbs->not_found = -ENODATA; 3390 ret = ocfs2_calc_xattr_set_need(inode, 3391 di, 3392 xi, 3393 xis, 3394 xbs, 3395 NULL, 3396 NULL, 3397 &credits); 3398 if (ret) { 3399 mlog_errno(ret); 3400 goto out; 3401 } 3402 3403 ret = ocfs2_extend_trans(ctxt->handle, credits); 3404 if (ret) { 3405 mlog_errno(ret); 3406 goto out; 3407 } 3408 ret = ocfs2_xattr_ibody_set(inode, xi, 3409 xis, ctxt); 3410 } 3411 } 3412 } 3413 3414 if (!ret) { 3415 /* Update inode ctime. */ 3416 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3417 xis->inode_bh, 3418 OCFS2_JOURNAL_ACCESS_WRITE); 3419 if (ret) { 3420 mlog_errno(ret); 3421 goto out; 3422 } 3423 3424 inode->i_ctime = current_time(inode); 3425 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3426 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3427 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3428 } 3429 out: 3430 return ret; 3431 } 3432 3433 /* 3434 * This function only called duing creating inode 3435 * for init security/acl xattrs of the new inode. 3436 * All transanction credits have been reserved in mknod. 3437 */ 3438 int ocfs2_xattr_set_handle(handle_t *handle, 3439 struct inode *inode, 3440 struct buffer_head *di_bh, 3441 int name_index, 3442 const char *name, 3443 const void *value, 3444 size_t value_len, 3445 int flags, 3446 struct ocfs2_alloc_context *meta_ac, 3447 struct ocfs2_alloc_context *data_ac) 3448 { 3449 struct ocfs2_dinode *di; 3450 int ret; 3451 3452 struct ocfs2_xattr_info xi = { 3453 .xi_name_index = name_index, 3454 .xi_name = name, 3455 .xi_name_len = strlen(name), 3456 .xi_value = value, 3457 .xi_value_len = value_len, 3458 }; 3459 3460 struct ocfs2_xattr_search xis = { 3461 .not_found = -ENODATA, 3462 }; 3463 3464 struct ocfs2_xattr_search xbs = { 3465 .not_found = -ENODATA, 3466 }; 3467 3468 struct ocfs2_xattr_set_ctxt ctxt = { 3469 .handle = handle, 3470 .meta_ac = meta_ac, 3471 .data_ac = data_ac, 3472 }; 3473 3474 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3475 return -EOPNOTSUPP; 3476 3477 /* 3478 * In extreme situation, may need xattr bucket when 3479 * block size is too small. And we have already reserved 3480 * the credits for bucket in mknod. 3481 */ 3482 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3483 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3484 if (!xbs.bucket) { 3485 mlog_errno(-ENOMEM); 3486 return -ENOMEM; 3487 } 3488 } 3489 3490 xis.inode_bh = xbs.inode_bh = di_bh; 3491 di = (struct ocfs2_dinode *)di_bh->b_data; 3492 3493 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3494 3495 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3496 if (ret) 3497 goto cleanup; 3498 if (xis.not_found) { 3499 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3500 if (ret) 3501 goto cleanup; 3502 } 3503 3504 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3505 3506 cleanup: 3507 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3508 brelse(xbs.xattr_bh); 3509 ocfs2_xattr_bucket_free(xbs.bucket); 3510 3511 return ret; 3512 } 3513 3514 /* 3515 * ocfs2_xattr_set() 3516 * 3517 * Set, replace or remove an extended attribute for this inode. 3518 * value is NULL to remove an existing extended attribute, else either 3519 * create or replace an extended attribute. 3520 */ 3521 int ocfs2_xattr_set(struct inode *inode, 3522 int name_index, 3523 const char *name, 3524 const void *value, 3525 size_t value_len, 3526 int flags) 3527 { 3528 struct buffer_head *di_bh = NULL; 3529 struct ocfs2_dinode *di; 3530 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3531 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3532 struct inode *tl_inode = osb->osb_tl_inode; 3533 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3534 struct ocfs2_refcount_tree *ref_tree = NULL; 3535 struct ocfs2_lock_holder oh; 3536 3537 struct ocfs2_xattr_info xi = { 3538 .xi_name_index = name_index, 3539 .xi_name = name, 3540 .xi_name_len = strlen(name), 3541 .xi_value = value, 3542 .xi_value_len = value_len, 3543 }; 3544 3545 struct ocfs2_xattr_search xis = { 3546 .not_found = -ENODATA, 3547 }; 3548 3549 struct ocfs2_xattr_search xbs = { 3550 .not_found = -ENODATA, 3551 }; 3552 3553 if (!ocfs2_supports_xattr(osb)) 3554 return -EOPNOTSUPP; 3555 3556 /* 3557 * Only xbs will be used on indexed trees. xis doesn't need a 3558 * bucket. 3559 */ 3560 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3561 if (!xbs.bucket) { 3562 mlog_errno(-ENOMEM); 3563 return -ENOMEM; 3564 } 3565 3566 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3567 if (had_lock < 0) { 3568 ret = had_lock; 3569 mlog_errno(ret); 3570 goto cleanup_nolock; 3571 } 3572 xis.inode_bh = xbs.inode_bh = di_bh; 3573 di = (struct ocfs2_dinode *)di_bh->b_data; 3574 3575 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3576 /* 3577 * Scan inode and external block to find the same name 3578 * extended attribute and collect search information. 3579 */ 3580 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3581 if (ret) 3582 goto cleanup; 3583 if (xis.not_found) { 3584 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3585 if (ret) 3586 goto cleanup; 3587 } 3588 3589 if (xis.not_found && xbs.not_found) { 3590 ret = -ENODATA; 3591 if (flags & XATTR_REPLACE) 3592 goto cleanup; 3593 ret = 0; 3594 if (!value) 3595 goto cleanup; 3596 } else { 3597 ret = -EEXIST; 3598 if (flags & XATTR_CREATE) 3599 goto cleanup; 3600 } 3601 3602 /* Check whether the value is refcounted and do some preparation. */ 3603 if (ocfs2_is_refcount_inode(inode) && 3604 (!xis.not_found || !xbs.not_found)) { 3605 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3606 &xis, &xbs, &ref_tree, 3607 &ref_meta, &ref_credits); 3608 if (ret) { 3609 mlog_errno(ret); 3610 goto cleanup; 3611 } 3612 } 3613 3614 inode_lock(tl_inode); 3615 3616 if (ocfs2_truncate_log_needs_flush(osb)) { 3617 ret = __ocfs2_flush_truncate_log(osb); 3618 if (ret < 0) { 3619 inode_unlock(tl_inode); 3620 mlog_errno(ret); 3621 goto cleanup; 3622 } 3623 } 3624 inode_unlock(tl_inode); 3625 3626 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3627 &xbs, &ctxt, ref_meta, &credits); 3628 if (ret) { 3629 mlog_errno(ret); 3630 goto cleanup; 3631 } 3632 3633 /* we need to update inode's ctime field, so add credit for it. */ 3634 credits += OCFS2_INODE_UPDATE_CREDITS; 3635 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3636 if (IS_ERR(ctxt.handle)) { 3637 ret = PTR_ERR(ctxt.handle); 3638 mlog_errno(ret); 3639 goto out_free_ac; 3640 } 3641 3642 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3643 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3644 3645 ocfs2_commit_trans(osb, ctxt.handle); 3646 3647 out_free_ac: 3648 if (ctxt.data_ac) 3649 ocfs2_free_alloc_context(ctxt.data_ac); 3650 if (ctxt.meta_ac) 3651 ocfs2_free_alloc_context(ctxt.meta_ac); 3652 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3653 ocfs2_schedule_truncate_log_flush(osb, 1); 3654 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3655 3656 cleanup: 3657 if (ref_tree) 3658 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3659 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3660 if (!value && !ret) { 3661 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3662 if (ret) 3663 mlog_errno(ret); 3664 } 3665 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3666 cleanup_nolock: 3667 brelse(di_bh); 3668 brelse(xbs.xattr_bh); 3669 ocfs2_xattr_bucket_free(xbs.bucket); 3670 3671 return ret; 3672 } 3673 3674 /* 3675 * Find the xattr extent rec which may contains name_hash. 3676 * e_cpos will be the first name hash of the xattr rec. 3677 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3678 */ 3679 static int ocfs2_xattr_get_rec(struct inode *inode, 3680 u32 name_hash, 3681 u64 *p_blkno, 3682 u32 *e_cpos, 3683 u32 *num_clusters, 3684 struct ocfs2_extent_list *el) 3685 { 3686 int ret = 0, i; 3687 struct buffer_head *eb_bh = NULL; 3688 struct ocfs2_extent_block *eb; 3689 struct ocfs2_extent_rec *rec = NULL; 3690 u64 e_blkno = 0; 3691 3692 if (el->l_tree_depth) { 3693 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3694 &eb_bh); 3695 if (ret) { 3696 mlog_errno(ret); 3697 goto out; 3698 } 3699 3700 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3701 el = &eb->h_list; 3702 3703 if (el->l_tree_depth) { 3704 ret = ocfs2_error(inode->i_sb, 3705 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3706 inode->i_ino, 3707 (unsigned long long)eb_bh->b_blocknr); 3708 goto out; 3709 } 3710 } 3711 3712 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3713 rec = &el->l_recs[i]; 3714 3715 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3716 e_blkno = le64_to_cpu(rec->e_blkno); 3717 break; 3718 } 3719 } 3720 3721 if (!e_blkno) { 3722 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3723 inode->i_ino, 3724 le32_to_cpu(rec->e_cpos), 3725 ocfs2_rec_clusters(el, rec)); 3726 goto out; 3727 } 3728 3729 *p_blkno = le64_to_cpu(rec->e_blkno); 3730 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3731 if (e_cpos) 3732 *e_cpos = le32_to_cpu(rec->e_cpos); 3733 out: 3734 brelse(eb_bh); 3735 return ret; 3736 } 3737 3738 typedef int (xattr_bucket_func)(struct inode *inode, 3739 struct ocfs2_xattr_bucket *bucket, 3740 void *para); 3741 3742 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3743 struct ocfs2_xattr_bucket *bucket, 3744 int name_index, 3745 const char *name, 3746 u32 name_hash, 3747 u16 *xe_index, 3748 int *found) 3749 { 3750 int i, ret = 0, cmp = 1, block_off, new_offset; 3751 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3752 size_t name_len = strlen(name); 3753 struct ocfs2_xattr_entry *xe = NULL; 3754 char *xe_name; 3755 3756 /* 3757 * We don't use binary search in the bucket because there 3758 * may be multiple entries with the same name hash. 3759 */ 3760 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3761 xe = &xh->xh_entries[i]; 3762 3763 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3764 continue; 3765 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3766 break; 3767 3768 cmp = name_index - ocfs2_xattr_get_type(xe); 3769 if (!cmp) 3770 cmp = name_len - xe->xe_name_len; 3771 if (cmp) 3772 continue; 3773 3774 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3775 xh, 3776 i, 3777 &block_off, 3778 &new_offset); 3779 if (ret) { 3780 mlog_errno(ret); 3781 break; 3782 } 3783 3784 3785 xe_name = bucket_block(bucket, block_off) + new_offset; 3786 if (!memcmp(name, xe_name, name_len)) { 3787 *xe_index = i; 3788 *found = 1; 3789 ret = 0; 3790 break; 3791 } 3792 } 3793 3794 return ret; 3795 } 3796 3797 /* 3798 * Find the specified xattr entry in a series of buckets. 3799 * This series start from p_blkno and last for num_clusters. 3800 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3801 * the num of the valid buckets. 3802 * 3803 * Return the buffer_head this xattr should reside in. And if the xattr's 3804 * hash is in the gap of 2 buckets, return the lower bucket. 3805 */ 3806 static int ocfs2_xattr_bucket_find(struct inode *inode, 3807 int name_index, 3808 const char *name, 3809 u32 name_hash, 3810 u64 p_blkno, 3811 u32 first_hash, 3812 u32 num_clusters, 3813 struct ocfs2_xattr_search *xs) 3814 { 3815 int ret, found = 0; 3816 struct ocfs2_xattr_header *xh = NULL; 3817 struct ocfs2_xattr_entry *xe = NULL; 3818 u16 index = 0; 3819 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3820 int low_bucket = 0, bucket, high_bucket; 3821 struct ocfs2_xattr_bucket *search; 3822 u64 blkno, lower_blkno = 0; 3823 3824 search = ocfs2_xattr_bucket_new(inode); 3825 if (!search) { 3826 ret = -ENOMEM; 3827 mlog_errno(ret); 3828 goto out; 3829 } 3830 3831 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3832 if (ret) { 3833 mlog_errno(ret); 3834 goto out; 3835 } 3836 3837 xh = bucket_xh(search); 3838 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3839 while (low_bucket <= high_bucket) { 3840 ocfs2_xattr_bucket_relse(search); 3841 3842 bucket = (low_bucket + high_bucket) / 2; 3843 blkno = p_blkno + bucket * blk_per_bucket; 3844 ret = ocfs2_read_xattr_bucket(search, blkno); 3845 if (ret) { 3846 mlog_errno(ret); 3847 goto out; 3848 } 3849 3850 xh = bucket_xh(search); 3851 xe = &xh->xh_entries[0]; 3852 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3853 high_bucket = bucket - 1; 3854 continue; 3855 } 3856 3857 /* 3858 * Check whether the hash of the last entry in our 3859 * bucket is larger than the search one. for an empty 3860 * bucket, the last one is also the first one. 3861 */ 3862 if (xh->xh_count) 3863 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3864 3865 /* record lower_blkno which may be the insert place. */ 3866 lower_blkno = blkno; 3867 3868 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3869 low_bucket = bucket + 1; 3870 continue; 3871 } 3872 3873 /* the searched xattr should reside in this bucket if exists. */ 3874 ret = ocfs2_find_xe_in_bucket(inode, search, 3875 name_index, name, name_hash, 3876 &index, &found); 3877 if (ret) { 3878 mlog_errno(ret); 3879 goto out; 3880 } 3881 break; 3882 } 3883 3884 /* 3885 * Record the bucket we have found. 3886 * When the xattr's hash value is in the gap of 2 buckets, we will 3887 * always set it to the previous bucket. 3888 */ 3889 if (!lower_blkno) 3890 lower_blkno = p_blkno; 3891 3892 /* This should be in cache - we just read it during the search */ 3893 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3894 if (ret) { 3895 mlog_errno(ret); 3896 goto out; 3897 } 3898 3899 xs->header = bucket_xh(xs->bucket); 3900 xs->base = bucket_block(xs->bucket, 0); 3901 xs->end = xs->base + inode->i_sb->s_blocksize; 3902 3903 if (found) { 3904 xs->here = &xs->header->xh_entries[index]; 3905 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3906 name, name_index, name_hash, 3907 (unsigned long long)bucket_blkno(xs->bucket), 3908 index); 3909 } else 3910 ret = -ENODATA; 3911 3912 out: 3913 ocfs2_xattr_bucket_free(search); 3914 return ret; 3915 } 3916 3917 static int ocfs2_xattr_index_block_find(struct inode *inode, 3918 struct buffer_head *root_bh, 3919 int name_index, 3920 const char *name, 3921 struct ocfs2_xattr_search *xs) 3922 { 3923 int ret; 3924 struct ocfs2_xattr_block *xb = 3925 (struct ocfs2_xattr_block *)root_bh->b_data; 3926 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3927 struct ocfs2_extent_list *el = &xb_root->xt_list; 3928 u64 p_blkno = 0; 3929 u32 first_hash, num_clusters = 0; 3930 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3931 3932 if (le16_to_cpu(el->l_next_free_rec) == 0) 3933 return -ENODATA; 3934 3935 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3936 name, name_index, name_hash, 3937 (unsigned long long)root_bh->b_blocknr, 3938 -1); 3939 3940 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3941 &num_clusters, el); 3942 if (ret) { 3943 mlog_errno(ret); 3944 goto out; 3945 } 3946 3947 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3948 3949 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3950 name, name_index, first_hash, 3951 (unsigned long long)p_blkno, 3952 num_clusters); 3953 3954 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3955 p_blkno, first_hash, num_clusters, xs); 3956 3957 out: 3958 return ret; 3959 } 3960 3961 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3962 u64 blkno, 3963 u32 clusters, 3964 xattr_bucket_func *func, 3965 void *para) 3966 { 3967 int i, ret = 0; 3968 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3969 u32 num_buckets = clusters * bpc; 3970 struct ocfs2_xattr_bucket *bucket; 3971 3972 bucket = ocfs2_xattr_bucket_new(inode); 3973 if (!bucket) { 3974 mlog_errno(-ENOMEM); 3975 return -ENOMEM; 3976 } 3977 3978 trace_ocfs2_iterate_xattr_buckets( 3979 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3980 (unsigned long long)blkno, clusters); 3981 3982 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3983 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3984 if (ret) { 3985 mlog_errno(ret); 3986 break; 3987 } 3988 3989 /* 3990 * The real bucket num in this series of blocks is stored 3991 * in the 1st bucket. 3992 */ 3993 if (i == 0) 3994 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3995 3996 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 3997 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3998 if (func) { 3999 ret = func(inode, bucket, para); 4000 if (ret && ret != -ERANGE) 4001 mlog_errno(ret); 4002 /* Fall through to bucket_relse() */ 4003 } 4004 4005 ocfs2_xattr_bucket_relse(bucket); 4006 if (ret) 4007 break; 4008 } 4009 4010 ocfs2_xattr_bucket_free(bucket); 4011 return ret; 4012 } 4013 4014 struct ocfs2_xattr_tree_list { 4015 char *buffer; 4016 size_t buffer_size; 4017 size_t result; 4018 }; 4019 4020 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4021 struct ocfs2_xattr_header *xh, 4022 int index, 4023 int *block_off, 4024 int *new_offset) 4025 { 4026 u16 name_offset; 4027 4028 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4029 return -EINVAL; 4030 4031 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4032 4033 *block_off = name_offset >> sb->s_blocksize_bits; 4034 *new_offset = name_offset % sb->s_blocksize; 4035 4036 return 0; 4037 } 4038 4039 static int ocfs2_list_xattr_bucket(struct inode *inode, 4040 struct ocfs2_xattr_bucket *bucket, 4041 void *para) 4042 { 4043 int ret = 0, type; 4044 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4045 int i, block_off, new_offset; 4046 const char *name; 4047 4048 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4049 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4050 type = ocfs2_xattr_get_type(entry); 4051 4052 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4053 bucket_xh(bucket), 4054 i, 4055 &block_off, 4056 &new_offset); 4057 if (ret) 4058 break; 4059 4060 name = (const char *)bucket_block(bucket, block_off) + 4061 new_offset; 4062 ret = ocfs2_xattr_list_entry(inode->i_sb, 4063 xl->buffer, 4064 xl->buffer_size, 4065 &xl->result, 4066 type, name, 4067 entry->xe_name_len); 4068 if (ret) 4069 break; 4070 } 4071 4072 return ret; 4073 } 4074 4075 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4076 struct buffer_head *blk_bh, 4077 xattr_tree_rec_func *rec_func, 4078 void *para) 4079 { 4080 struct ocfs2_xattr_block *xb = 4081 (struct ocfs2_xattr_block *)blk_bh->b_data; 4082 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4083 int ret = 0; 4084 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4085 u64 p_blkno = 0; 4086 4087 if (!el->l_next_free_rec || !rec_func) 4088 return 0; 4089 4090 while (name_hash > 0) { 4091 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4092 &e_cpos, &num_clusters, el); 4093 if (ret) { 4094 mlog_errno(ret); 4095 break; 4096 } 4097 4098 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4099 num_clusters, para); 4100 if (ret) { 4101 if (ret != -ERANGE) 4102 mlog_errno(ret); 4103 break; 4104 } 4105 4106 if (e_cpos == 0) 4107 break; 4108 4109 name_hash = e_cpos - 1; 4110 } 4111 4112 return ret; 4113 4114 } 4115 4116 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4117 struct buffer_head *root_bh, 4118 u64 blkno, u32 cpos, u32 len, void *para) 4119 { 4120 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4121 ocfs2_list_xattr_bucket, para); 4122 } 4123 4124 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4125 struct buffer_head *blk_bh, 4126 char *buffer, 4127 size_t buffer_size) 4128 { 4129 int ret; 4130 struct ocfs2_xattr_tree_list xl = { 4131 .buffer = buffer, 4132 .buffer_size = buffer_size, 4133 .result = 0, 4134 }; 4135 4136 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4137 ocfs2_list_xattr_tree_rec, &xl); 4138 if (ret) { 4139 mlog_errno(ret); 4140 goto out; 4141 } 4142 4143 ret = xl.result; 4144 out: 4145 return ret; 4146 } 4147 4148 static int cmp_xe(const void *a, const void *b) 4149 { 4150 const struct ocfs2_xattr_entry *l = a, *r = b; 4151 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4152 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4153 4154 if (l_hash > r_hash) 4155 return 1; 4156 if (l_hash < r_hash) 4157 return -1; 4158 return 0; 4159 } 4160 4161 static void swap_xe(void *a, void *b, int size) 4162 { 4163 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4164 4165 tmp = *l; 4166 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4167 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4168 } 4169 4170 /* 4171 * When the ocfs2_xattr_block is filled up, new bucket will be created 4172 * and all the xattr entries will be moved to the new bucket. 4173 * The header goes at the start of the bucket, and the names+values are 4174 * filled from the end. This is why *target starts as the last buffer. 4175 * Note: we need to sort the entries since they are not saved in order 4176 * in the ocfs2_xattr_block. 4177 */ 4178 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4179 struct buffer_head *xb_bh, 4180 struct ocfs2_xattr_bucket *bucket) 4181 { 4182 int i, blocksize = inode->i_sb->s_blocksize; 4183 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4184 u16 offset, size, off_change; 4185 struct ocfs2_xattr_entry *xe; 4186 struct ocfs2_xattr_block *xb = 4187 (struct ocfs2_xattr_block *)xb_bh->b_data; 4188 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4189 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4190 u16 count = le16_to_cpu(xb_xh->xh_count); 4191 char *src = xb_bh->b_data; 4192 char *target = bucket_block(bucket, blks - 1); 4193 4194 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4195 (unsigned long long)xb_bh->b_blocknr, 4196 (unsigned long long)bucket_blkno(bucket)); 4197 4198 for (i = 0; i < blks; i++) 4199 memset(bucket_block(bucket, i), 0, blocksize); 4200 4201 /* 4202 * Since the xe_name_offset is based on ocfs2_xattr_header, 4203 * there is a offset change corresponding to the change of 4204 * ocfs2_xattr_header's position. 4205 */ 4206 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4207 xe = &xb_xh->xh_entries[count - 1]; 4208 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4209 size = blocksize - offset; 4210 4211 /* copy all the names and values. */ 4212 memcpy(target + offset, src + offset, size); 4213 4214 /* Init new header now. */ 4215 xh->xh_count = xb_xh->xh_count; 4216 xh->xh_num_buckets = cpu_to_le16(1); 4217 xh->xh_name_value_len = cpu_to_le16(size); 4218 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4219 4220 /* copy all the entries. */ 4221 target = bucket_block(bucket, 0); 4222 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4223 size = count * sizeof(struct ocfs2_xattr_entry); 4224 memcpy(target + offset, (char *)xb_xh + offset, size); 4225 4226 /* Change the xe offset for all the xe because of the move. */ 4227 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4228 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4229 for (i = 0; i < count; i++) 4230 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4231 4232 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4233 4234 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4235 cmp_xe, swap_xe); 4236 } 4237 4238 /* 4239 * After we move xattr from block to index btree, we have to 4240 * update ocfs2_xattr_search to the new xe and base. 4241 * 4242 * When the entry is in xattr block, xattr_bh indicates the storage place. 4243 * While if the entry is in index b-tree, "bucket" indicates the 4244 * real place of the xattr. 4245 */ 4246 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4247 struct ocfs2_xattr_search *xs, 4248 struct buffer_head *old_bh) 4249 { 4250 char *buf = old_bh->b_data; 4251 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4252 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4253 int i; 4254 4255 xs->header = bucket_xh(xs->bucket); 4256 xs->base = bucket_block(xs->bucket, 0); 4257 xs->end = xs->base + inode->i_sb->s_blocksize; 4258 4259 if (xs->not_found) 4260 return; 4261 4262 i = xs->here - old_xh->xh_entries; 4263 xs->here = &xs->header->xh_entries[i]; 4264 } 4265 4266 static int ocfs2_xattr_create_index_block(struct inode *inode, 4267 struct ocfs2_xattr_search *xs, 4268 struct ocfs2_xattr_set_ctxt *ctxt) 4269 { 4270 int ret; 4271 u32 bit_off, len; 4272 u64 blkno; 4273 handle_t *handle = ctxt->handle; 4274 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4275 struct buffer_head *xb_bh = xs->xattr_bh; 4276 struct ocfs2_xattr_block *xb = 4277 (struct ocfs2_xattr_block *)xb_bh->b_data; 4278 struct ocfs2_xattr_tree_root *xr; 4279 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4280 4281 trace_ocfs2_xattr_create_index_block_begin( 4282 (unsigned long long)xb_bh->b_blocknr); 4283 4284 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4285 BUG_ON(!xs->bucket); 4286 4287 /* 4288 * XXX: 4289 * We can use this lock for now, and maybe move to a dedicated mutex 4290 * if performance becomes a problem later. 4291 */ 4292 down_write(&oi->ip_alloc_sem); 4293 4294 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4295 OCFS2_JOURNAL_ACCESS_WRITE); 4296 if (ret) { 4297 mlog_errno(ret); 4298 goto out; 4299 } 4300 4301 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4302 1, 1, &bit_off, &len); 4303 if (ret) { 4304 mlog_errno(ret); 4305 goto out; 4306 } 4307 4308 /* 4309 * The bucket may spread in many blocks, and 4310 * we will only touch the 1st block and the last block 4311 * in the whole bucket(one for entry and one for data). 4312 */ 4313 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4314 4315 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4316 4317 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4318 if (ret) { 4319 mlog_errno(ret); 4320 goto out; 4321 } 4322 4323 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4324 OCFS2_JOURNAL_ACCESS_CREATE); 4325 if (ret) { 4326 mlog_errno(ret); 4327 goto out; 4328 } 4329 4330 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4331 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4332 4333 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4334 4335 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4336 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4337 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4338 4339 xr = &xb->xb_attrs.xb_root; 4340 xr->xt_clusters = cpu_to_le32(1); 4341 xr->xt_last_eb_blk = 0; 4342 xr->xt_list.l_tree_depth = 0; 4343 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4344 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4345 4346 xr->xt_list.l_recs[0].e_cpos = 0; 4347 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4348 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4349 4350 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4351 4352 ocfs2_journal_dirty(handle, xb_bh); 4353 4354 out: 4355 up_write(&oi->ip_alloc_sem); 4356 4357 return ret; 4358 } 4359 4360 static int cmp_xe_offset(const void *a, const void *b) 4361 { 4362 const struct ocfs2_xattr_entry *l = a, *r = b; 4363 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4364 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4365 4366 if (l_name_offset < r_name_offset) 4367 return 1; 4368 if (l_name_offset > r_name_offset) 4369 return -1; 4370 return 0; 4371 } 4372 4373 /* 4374 * defrag a xattr bucket if we find that the bucket has some 4375 * holes beteen name/value pairs. 4376 * We will move all the name/value pairs to the end of the bucket 4377 * so that we can spare some space for insertion. 4378 */ 4379 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4380 handle_t *handle, 4381 struct ocfs2_xattr_bucket *bucket) 4382 { 4383 int ret, i; 4384 size_t end, offset, len; 4385 struct ocfs2_xattr_header *xh; 4386 char *entries, *buf, *bucket_buf = NULL; 4387 u64 blkno = bucket_blkno(bucket); 4388 u16 xh_free_start; 4389 size_t blocksize = inode->i_sb->s_blocksize; 4390 struct ocfs2_xattr_entry *xe; 4391 4392 /* 4393 * In order to make the operation more efficient and generic, 4394 * we copy all the blocks into a contiguous memory and do the 4395 * defragment there, so if anything is error, we will not touch 4396 * the real block. 4397 */ 4398 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4399 if (!bucket_buf) { 4400 ret = -EIO; 4401 goto out; 4402 } 4403 4404 buf = bucket_buf; 4405 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4406 memcpy(buf, bucket_block(bucket, i), blocksize); 4407 4408 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4409 OCFS2_JOURNAL_ACCESS_WRITE); 4410 if (ret < 0) { 4411 mlog_errno(ret); 4412 goto out; 4413 } 4414 4415 xh = (struct ocfs2_xattr_header *)bucket_buf; 4416 entries = (char *)xh->xh_entries; 4417 xh_free_start = le16_to_cpu(xh->xh_free_start); 4418 4419 trace_ocfs2_defrag_xattr_bucket( 4420 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4421 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4422 4423 /* 4424 * sort all the entries by their offset. 4425 * the largest will be the first, so that we can 4426 * move them to the end one by one. 4427 */ 4428 sort(entries, le16_to_cpu(xh->xh_count), 4429 sizeof(struct ocfs2_xattr_entry), 4430 cmp_xe_offset, swap_xe); 4431 4432 /* Move all name/values to the end of the bucket. */ 4433 xe = xh->xh_entries; 4434 end = OCFS2_XATTR_BUCKET_SIZE; 4435 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4436 offset = le16_to_cpu(xe->xe_name_offset); 4437 len = namevalue_size_xe(xe); 4438 4439 /* 4440 * We must make sure that the name/value pair 4441 * exist in the same block. So adjust end to 4442 * the previous block end if needed. 4443 */ 4444 if (((end - len) / blocksize != 4445 (end - 1) / blocksize)) 4446 end = end - end % blocksize; 4447 4448 if (end > offset + len) { 4449 memmove(bucket_buf + end - len, 4450 bucket_buf + offset, len); 4451 xe->xe_name_offset = cpu_to_le16(end - len); 4452 } 4453 4454 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4455 "bucket %llu\n", (unsigned long long)blkno); 4456 4457 end -= len; 4458 } 4459 4460 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4461 "bucket %llu\n", (unsigned long long)blkno); 4462 4463 if (xh_free_start == end) 4464 goto out; 4465 4466 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4467 xh->xh_free_start = cpu_to_le16(end); 4468 4469 /* sort the entries by their name_hash. */ 4470 sort(entries, le16_to_cpu(xh->xh_count), 4471 sizeof(struct ocfs2_xattr_entry), 4472 cmp_xe, swap_xe); 4473 4474 buf = bucket_buf; 4475 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4476 memcpy(bucket_block(bucket, i), buf, blocksize); 4477 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4478 4479 out: 4480 kfree(bucket_buf); 4481 return ret; 4482 } 4483 4484 /* 4485 * prev_blkno points to the start of an existing extent. new_blkno 4486 * points to a newly allocated extent. Because we know each of our 4487 * clusters contains more than bucket, we can easily split one cluster 4488 * at a bucket boundary. So we take the last cluster of the existing 4489 * extent and split it down the middle. We move the last half of the 4490 * buckets in the last cluster of the existing extent over to the new 4491 * extent. 4492 * 4493 * first_bh is the buffer at prev_blkno so we can update the existing 4494 * extent's bucket count. header_bh is the bucket were we were hoping 4495 * to insert our xattr. If the bucket move places the target in the new 4496 * extent, we'll update first_bh and header_bh after modifying the old 4497 * extent. 4498 * 4499 * first_hash will be set as the 1st xe's name_hash in the new extent. 4500 */ 4501 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4502 handle_t *handle, 4503 struct ocfs2_xattr_bucket *first, 4504 struct ocfs2_xattr_bucket *target, 4505 u64 new_blkno, 4506 u32 num_clusters, 4507 u32 *first_hash) 4508 { 4509 int ret; 4510 struct super_block *sb = inode->i_sb; 4511 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4512 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4513 int to_move = num_buckets / 2; 4514 u64 src_blkno; 4515 u64 last_cluster_blkno = bucket_blkno(first) + 4516 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4517 4518 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4519 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4520 4521 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4522 (unsigned long long)last_cluster_blkno, 4523 (unsigned long long)new_blkno); 4524 4525 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4526 last_cluster_blkno, new_blkno, 4527 to_move, first_hash); 4528 if (ret) { 4529 mlog_errno(ret); 4530 goto out; 4531 } 4532 4533 /* This is the first bucket that got moved */ 4534 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4535 4536 /* 4537 * If the target bucket was part of the moved buckets, we need to 4538 * update first and target. 4539 */ 4540 if (bucket_blkno(target) >= src_blkno) { 4541 /* Find the block for the new target bucket */ 4542 src_blkno = new_blkno + 4543 (bucket_blkno(target) - src_blkno); 4544 4545 ocfs2_xattr_bucket_relse(first); 4546 ocfs2_xattr_bucket_relse(target); 4547 4548 /* 4549 * These shouldn't fail - the buffers are in the 4550 * journal from ocfs2_cp_xattr_bucket(). 4551 */ 4552 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4553 if (ret) { 4554 mlog_errno(ret); 4555 goto out; 4556 } 4557 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4558 if (ret) 4559 mlog_errno(ret); 4560 4561 } 4562 4563 out: 4564 return ret; 4565 } 4566 4567 /* 4568 * Find the suitable pos when we divide a bucket into 2. 4569 * We have to make sure the xattrs with the same hash value exist 4570 * in the same bucket. 4571 * 4572 * If this ocfs2_xattr_header covers more than one hash value, find a 4573 * place where the hash value changes. Try to find the most even split. 4574 * The most common case is that all entries have different hash values, 4575 * and the first check we make will find a place to split. 4576 */ 4577 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4578 { 4579 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4580 int count = le16_to_cpu(xh->xh_count); 4581 int delta, middle = count / 2; 4582 4583 /* 4584 * We start at the middle. Each step gets farther away in both 4585 * directions. We therefore hit the change in hash value 4586 * nearest to the middle. Note that this loop does not execute for 4587 * count < 2. 4588 */ 4589 for (delta = 0; delta < middle; delta++) { 4590 /* Let's check delta earlier than middle */ 4591 if (cmp_xe(&entries[middle - delta - 1], 4592 &entries[middle - delta])) 4593 return middle - delta; 4594 4595 /* For even counts, don't walk off the end */ 4596 if ((middle + delta + 1) == count) 4597 continue; 4598 4599 /* Now try delta past middle */ 4600 if (cmp_xe(&entries[middle + delta], 4601 &entries[middle + delta + 1])) 4602 return middle + delta + 1; 4603 } 4604 4605 /* Every entry had the same hash */ 4606 return count; 4607 } 4608 4609 /* 4610 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4611 * first_hash will record the 1st hash of the new bucket. 4612 * 4613 * Normally half of the xattrs will be moved. But we have to make 4614 * sure that the xattrs with the same hash value are stored in the 4615 * same bucket. If all the xattrs in this bucket have the same hash 4616 * value, the new bucket will be initialized as an empty one and the 4617 * first_hash will be initialized as (hash_value+1). 4618 */ 4619 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4620 handle_t *handle, 4621 u64 blk, 4622 u64 new_blk, 4623 u32 *first_hash, 4624 int new_bucket_head) 4625 { 4626 int ret, i; 4627 int count, start, len, name_value_len = 0, name_offset = 0; 4628 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4629 struct ocfs2_xattr_header *xh; 4630 struct ocfs2_xattr_entry *xe; 4631 int blocksize = inode->i_sb->s_blocksize; 4632 4633 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4634 (unsigned long long)new_blk); 4635 4636 s_bucket = ocfs2_xattr_bucket_new(inode); 4637 t_bucket = ocfs2_xattr_bucket_new(inode); 4638 if (!s_bucket || !t_bucket) { 4639 ret = -ENOMEM; 4640 mlog_errno(ret); 4641 goto out; 4642 } 4643 4644 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4645 if (ret) { 4646 mlog_errno(ret); 4647 goto out; 4648 } 4649 4650 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4651 OCFS2_JOURNAL_ACCESS_WRITE); 4652 if (ret) { 4653 mlog_errno(ret); 4654 goto out; 4655 } 4656 4657 /* 4658 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4659 * there's no need to read it. 4660 */ 4661 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4662 if (ret) { 4663 mlog_errno(ret); 4664 goto out; 4665 } 4666 4667 /* 4668 * Hey, if we're overwriting t_bucket, what difference does 4669 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4670 * same part of ocfs2_cp_xattr_bucket(). 4671 */ 4672 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4673 new_bucket_head ? 4674 OCFS2_JOURNAL_ACCESS_CREATE : 4675 OCFS2_JOURNAL_ACCESS_WRITE); 4676 if (ret) { 4677 mlog_errno(ret); 4678 goto out; 4679 } 4680 4681 xh = bucket_xh(s_bucket); 4682 count = le16_to_cpu(xh->xh_count); 4683 start = ocfs2_xattr_find_divide_pos(xh); 4684 4685 if (start == count) { 4686 xe = &xh->xh_entries[start-1]; 4687 4688 /* 4689 * initialized a new empty bucket here. 4690 * The hash value is set as one larger than 4691 * that of the last entry in the previous bucket. 4692 */ 4693 for (i = 0; i < t_bucket->bu_blocks; i++) 4694 memset(bucket_block(t_bucket, i), 0, blocksize); 4695 4696 xh = bucket_xh(t_bucket); 4697 xh->xh_free_start = cpu_to_le16(blocksize); 4698 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4699 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4700 4701 goto set_num_buckets; 4702 } 4703 4704 /* copy the whole bucket to the new first. */ 4705 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4706 4707 /* update the new bucket. */ 4708 xh = bucket_xh(t_bucket); 4709 4710 /* 4711 * Calculate the total name/value len and xh_free_start for 4712 * the old bucket first. 4713 */ 4714 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4715 name_value_len = 0; 4716 for (i = 0; i < start; i++) { 4717 xe = &xh->xh_entries[i]; 4718 name_value_len += namevalue_size_xe(xe); 4719 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4720 name_offset = le16_to_cpu(xe->xe_name_offset); 4721 } 4722 4723 /* 4724 * Now begin the modification to the new bucket. 4725 * 4726 * In the new bucket, We just move the xattr entry to the beginning 4727 * and don't touch the name/value. So there will be some holes in the 4728 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4729 * called. 4730 */ 4731 xe = &xh->xh_entries[start]; 4732 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4733 trace_ocfs2_divide_xattr_bucket_move(len, 4734 (int)((char *)xe - (char *)xh), 4735 (int)((char *)xh->xh_entries - (char *)xh)); 4736 memmove((char *)xh->xh_entries, (char *)xe, len); 4737 xe = &xh->xh_entries[count - start]; 4738 len = sizeof(struct ocfs2_xattr_entry) * start; 4739 memset((char *)xe, 0, len); 4740 4741 le16_add_cpu(&xh->xh_count, -start); 4742 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4743 4744 /* Calculate xh_free_start for the new bucket. */ 4745 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4746 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4747 xe = &xh->xh_entries[i]; 4748 if (le16_to_cpu(xe->xe_name_offset) < 4749 le16_to_cpu(xh->xh_free_start)) 4750 xh->xh_free_start = xe->xe_name_offset; 4751 } 4752 4753 set_num_buckets: 4754 /* set xh->xh_num_buckets for the new xh. */ 4755 if (new_bucket_head) 4756 xh->xh_num_buckets = cpu_to_le16(1); 4757 else 4758 xh->xh_num_buckets = 0; 4759 4760 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4761 4762 /* store the first_hash of the new bucket. */ 4763 if (first_hash) 4764 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4765 4766 /* 4767 * Now only update the 1st block of the old bucket. If we 4768 * just added a new empty bucket, there is no need to modify 4769 * it. 4770 */ 4771 if (start == count) 4772 goto out; 4773 4774 xh = bucket_xh(s_bucket); 4775 memset(&xh->xh_entries[start], 0, 4776 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4777 xh->xh_count = cpu_to_le16(start); 4778 xh->xh_free_start = cpu_to_le16(name_offset); 4779 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4780 4781 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4782 4783 out: 4784 ocfs2_xattr_bucket_free(s_bucket); 4785 ocfs2_xattr_bucket_free(t_bucket); 4786 4787 return ret; 4788 } 4789 4790 /* 4791 * Copy xattr from one bucket to another bucket. 4792 * 4793 * The caller must make sure that the journal transaction 4794 * has enough space for journaling. 4795 */ 4796 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4797 handle_t *handle, 4798 u64 s_blkno, 4799 u64 t_blkno, 4800 int t_is_new) 4801 { 4802 int ret; 4803 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4804 4805 BUG_ON(s_blkno == t_blkno); 4806 4807 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4808 (unsigned long long)t_blkno, 4809 t_is_new); 4810 4811 s_bucket = ocfs2_xattr_bucket_new(inode); 4812 t_bucket = ocfs2_xattr_bucket_new(inode); 4813 if (!s_bucket || !t_bucket) { 4814 ret = -ENOMEM; 4815 mlog_errno(ret); 4816 goto out; 4817 } 4818 4819 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4820 if (ret) 4821 goto out; 4822 4823 /* 4824 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4825 * there's no need to read it. 4826 */ 4827 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4828 if (ret) 4829 goto out; 4830 4831 /* 4832 * Hey, if we're overwriting t_bucket, what difference does 4833 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4834 * cluster to fill, we came here from 4835 * ocfs2_mv_xattr_buckets(), and it is really new - 4836 * ACCESS_CREATE is required. But we also might have moved data 4837 * out of t_bucket before extending back into it. 4838 * ocfs2_add_new_xattr_bucket() can do this - its call to 4839 * ocfs2_add_new_xattr_cluster() may have created a new extent 4840 * and copied out the end of the old extent. Then it re-extends 4841 * the old extent back to create space for new xattrs. That's 4842 * how we get here, and the bucket isn't really new. 4843 */ 4844 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4845 t_is_new ? 4846 OCFS2_JOURNAL_ACCESS_CREATE : 4847 OCFS2_JOURNAL_ACCESS_WRITE); 4848 if (ret) 4849 goto out; 4850 4851 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4852 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4853 4854 out: 4855 ocfs2_xattr_bucket_free(t_bucket); 4856 ocfs2_xattr_bucket_free(s_bucket); 4857 4858 return ret; 4859 } 4860 4861 /* 4862 * src_blk points to the start of an existing extent. last_blk points to 4863 * last cluster in that extent. to_blk points to a newly allocated 4864 * extent. We copy the buckets from the cluster at last_blk to the new 4865 * extent. If start_bucket is non-zero, we skip that many buckets before 4866 * we start copying. The new extent's xh_num_buckets gets set to the 4867 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4868 * by the same amount. 4869 */ 4870 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4871 u64 src_blk, u64 last_blk, u64 to_blk, 4872 unsigned int start_bucket, 4873 u32 *first_hash) 4874 { 4875 int i, ret, credits; 4876 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4877 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4878 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4879 struct ocfs2_xattr_bucket *old_first, *new_first; 4880 4881 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4882 (unsigned long long)to_blk); 4883 4884 BUG_ON(start_bucket >= num_buckets); 4885 if (start_bucket) { 4886 num_buckets -= start_bucket; 4887 last_blk += (start_bucket * blks_per_bucket); 4888 } 4889 4890 /* The first bucket of the original extent */ 4891 old_first = ocfs2_xattr_bucket_new(inode); 4892 /* The first bucket of the new extent */ 4893 new_first = ocfs2_xattr_bucket_new(inode); 4894 if (!old_first || !new_first) { 4895 ret = -ENOMEM; 4896 mlog_errno(ret); 4897 goto out; 4898 } 4899 4900 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4901 if (ret) { 4902 mlog_errno(ret); 4903 goto out; 4904 } 4905 4906 /* 4907 * We need to update the first bucket of the old extent and all 4908 * the buckets going to the new extent. 4909 */ 4910 credits = ((num_buckets + 1) * blks_per_bucket); 4911 ret = ocfs2_extend_trans(handle, credits); 4912 if (ret) { 4913 mlog_errno(ret); 4914 goto out; 4915 } 4916 4917 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4918 OCFS2_JOURNAL_ACCESS_WRITE); 4919 if (ret) { 4920 mlog_errno(ret); 4921 goto out; 4922 } 4923 4924 for (i = 0; i < num_buckets; i++) { 4925 ret = ocfs2_cp_xattr_bucket(inode, handle, 4926 last_blk + (i * blks_per_bucket), 4927 to_blk + (i * blks_per_bucket), 4928 1); 4929 if (ret) { 4930 mlog_errno(ret); 4931 goto out; 4932 } 4933 } 4934 4935 /* 4936 * Get the new bucket ready before we dirty anything 4937 * (This actually shouldn't fail, because we already dirtied 4938 * it once in ocfs2_cp_xattr_bucket()). 4939 */ 4940 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4941 if (ret) { 4942 mlog_errno(ret); 4943 goto out; 4944 } 4945 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4946 OCFS2_JOURNAL_ACCESS_WRITE); 4947 if (ret) { 4948 mlog_errno(ret); 4949 goto out; 4950 } 4951 4952 /* Now update the headers */ 4953 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4954 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4955 4956 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4957 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4958 4959 if (first_hash) 4960 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4961 4962 out: 4963 ocfs2_xattr_bucket_free(new_first); 4964 ocfs2_xattr_bucket_free(old_first); 4965 return ret; 4966 } 4967 4968 /* 4969 * Move some xattrs in this cluster to the new cluster. 4970 * This function should only be called when bucket size == cluster size. 4971 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4972 */ 4973 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4974 handle_t *handle, 4975 u64 prev_blk, 4976 u64 new_blk, 4977 u32 *first_hash) 4978 { 4979 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4980 int ret, credits = 2 * blk_per_bucket; 4981 4982 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4983 4984 ret = ocfs2_extend_trans(handle, credits); 4985 if (ret) { 4986 mlog_errno(ret); 4987 return ret; 4988 } 4989 4990 /* Move half of the xattr in start_blk to the next bucket. */ 4991 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4992 new_blk, first_hash, 1); 4993 } 4994 4995 /* 4996 * Move some xattrs from the old cluster to the new one since they are not 4997 * contiguous in ocfs2 xattr tree. 4998 * 4999 * new_blk starts a new separate cluster, and we will move some xattrs from 5000 * prev_blk to it. v_start will be set as the first name hash value in this 5001 * new cluster so that it can be used as e_cpos during tree insertion and 5002 * don't collide with our original b-tree operations. first_bh and header_bh 5003 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5004 * to extend the insert bucket. 5005 * 5006 * The problem is how much xattr should we move to the new one and when should 5007 * we update first_bh and header_bh? 5008 * 1. If cluster size > bucket size, that means the previous cluster has more 5009 * than 1 bucket, so just move half nums of bucket into the new cluster and 5010 * update the first_bh and header_bh if the insert bucket has been moved 5011 * to the new cluster. 5012 * 2. If cluster_size == bucket_size: 5013 * a) If the previous extent rec has more than one cluster and the insert 5014 * place isn't in the last cluster, copy the entire last cluster to the 5015 * new one. This time, we don't need to upate the first_bh and header_bh 5016 * since they will not be moved into the new cluster. 5017 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5018 * the new one. And we set the extend flag to zero if the insert place is 5019 * moved into the new allocated cluster since no extend is needed. 5020 */ 5021 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5022 handle_t *handle, 5023 struct ocfs2_xattr_bucket *first, 5024 struct ocfs2_xattr_bucket *target, 5025 u64 new_blk, 5026 u32 prev_clusters, 5027 u32 *v_start, 5028 int *extend) 5029 { 5030 int ret; 5031 5032 trace_ocfs2_adjust_xattr_cross_cluster( 5033 (unsigned long long)bucket_blkno(first), 5034 (unsigned long long)new_blk, prev_clusters); 5035 5036 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5037 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5038 handle, 5039 first, target, 5040 new_blk, 5041 prev_clusters, 5042 v_start); 5043 if (ret) 5044 mlog_errno(ret); 5045 } else { 5046 /* The start of the last cluster in the first extent */ 5047 u64 last_blk = bucket_blkno(first) + 5048 ((prev_clusters - 1) * 5049 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5050 5051 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5052 ret = ocfs2_mv_xattr_buckets(inode, handle, 5053 bucket_blkno(first), 5054 last_blk, new_blk, 0, 5055 v_start); 5056 if (ret) 5057 mlog_errno(ret); 5058 } else { 5059 ret = ocfs2_divide_xattr_cluster(inode, handle, 5060 last_blk, new_blk, 5061 v_start); 5062 if (ret) 5063 mlog_errno(ret); 5064 5065 if ((bucket_blkno(target) == last_blk) && extend) 5066 *extend = 0; 5067 } 5068 } 5069 5070 return ret; 5071 } 5072 5073 /* 5074 * Add a new cluster for xattr storage. 5075 * 5076 * If the new cluster is contiguous with the previous one, it will be 5077 * appended to the same extent record, and num_clusters will be updated. 5078 * If not, we will insert a new extent for it and move some xattrs in 5079 * the last cluster into the new allocated one. 5080 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5081 * lose the benefits of hashing because we'll have to search large leaves. 5082 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5083 * if it's bigger). 5084 * 5085 * first_bh is the first block of the previous extent rec and header_bh 5086 * indicates the bucket we will insert the new xattrs. They will be updated 5087 * when the header_bh is moved into the new cluster. 5088 */ 5089 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5090 struct buffer_head *root_bh, 5091 struct ocfs2_xattr_bucket *first, 5092 struct ocfs2_xattr_bucket *target, 5093 u32 *num_clusters, 5094 u32 prev_cpos, 5095 int *extend, 5096 struct ocfs2_xattr_set_ctxt *ctxt) 5097 { 5098 int ret; 5099 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5100 u32 prev_clusters = *num_clusters; 5101 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5102 u64 block; 5103 handle_t *handle = ctxt->handle; 5104 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5105 struct ocfs2_extent_tree et; 5106 5107 trace_ocfs2_add_new_xattr_cluster_begin( 5108 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5109 (unsigned long long)bucket_blkno(first), 5110 prev_cpos, prev_clusters); 5111 5112 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5113 5114 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5115 OCFS2_JOURNAL_ACCESS_WRITE); 5116 if (ret < 0) { 5117 mlog_errno(ret); 5118 goto leave; 5119 } 5120 5121 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5122 clusters_to_add, &bit_off, &num_bits); 5123 if (ret < 0) { 5124 if (ret != -ENOSPC) 5125 mlog_errno(ret); 5126 goto leave; 5127 } 5128 5129 BUG_ON(num_bits > clusters_to_add); 5130 5131 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5132 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5133 5134 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5135 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5136 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5137 /* 5138 * If this cluster is contiguous with the old one and 5139 * adding this new cluster, we don't surpass the limit of 5140 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5141 * initialized and used like other buckets in the previous 5142 * cluster. 5143 * So add it as a contiguous one. The caller will handle 5144 * its init process. 5145 */ 5146 v_start = prev_cpos + prev_clusters; 5147 *num_clusters = prev_clusters + num_bits; 5148 } else { 5149 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5150 handle, 5151 first, 5152 target, 5153 block, 5154 prev_clusters, 5155 &v_start, 5156 extend); 5157 if (ret) { 5158 mlog_errno(ret); 5159 goto leave; 5160 } 5161 } 5162 5163 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5164 v_start, num_bits); 5165 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5166 num_bits, 0, ctxt->meta_ac); 5167 if (ret < 0) { 5168 mlog_errno(ret); 5169 goto leave; 5170 } 5171 5172 ocfs2_journal_dirty(handle, root_bh); 5173 5174 leave: 5175 return ret; 5176 } 5177 5178 /* 5179 * We are given an extent. 'first' is the bucket at the very front of 5180 * the extent. The extent has space for an additional bucket past 5181 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5182 * of the target bucket. We wish to shift every bucket past the target 5183 * down one, filling in that additional space. When we get back to the 5184 * target, we split the target between itself and the now-empty bucket 5185 * at target+1 (aka, target_blkno + blks_per_bucket). 5186 */ 5187 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5188 handle_t *handle, 5189 struct ocfs2_xattr_bucket *first, 5190 u64 target_blk, 5191 u32 num_clusters) 5192 { 5193 int ret, credits; 5194 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5195 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5196 u64 end_blk; 5197 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5198 5199 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5200 (unsigned long long)bucket_blkno(first), 5201 num_clusters, new_bucket); 5202 5203 /* The extent must have room for an additional bucket */ 5204 BUG_ON(new_bucket >= 5205 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5206 5207 /* end_blk points to the last existing bucket */ 5208 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5209 5210 /* 5211 * end_blk is the start of the last existing bucket. 5212 * Thus, (end_blk - target_blk) covers the target bucket and 5213 * every bucket after it up to, but not including, the last 5214 * existing bucket. Then we add the last existing bucket, the 5215 * new bucket, and the first bucket (3 * blk_per_bucket). 5216 */ 5217 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5218 ret = ocfs2_extend_trans(handle, credits); 5219 if (ret) { 5220 mlog_errno(ret); 5221 goto out; 5222 } 5223 5224 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5225 OCFS2_JOURNAL_ACCESS_WRITE); 5226 if (ret) { 5227 mlog_errno(ret); 5228 goto out; 5229 } 5230 5231 while (end_blk != target_blk) { 5232 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5233 end_blk + blk_per_bucket, 0); 5234 if (ret) 5235 goto out; 5236 end_blk -= blk_per_bucket; 5237 } 5238 5239 /* Move half of the xattr in target_blkno to the next bucket. */ 5240 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5241 target_blk + blk_per_bucket, NULL, 0); 5242 5243 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5244 ocfs2_xattr_bucket_journal_dirty(handle, first); 5245 5246 out: 5247 return ret; 5248 } 5249 5250 /* 5251 * Add new xattr bucket in an extent record and adjust the buckets 5252 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5253 * bucket we want to insert into. 5254 * 5255 * In the easy case, we will move all the buckets after target down by 5256 * one. Half of target's xattrs will be moved to the next bucket. 5257 * 5258 * If current cluster is full, we'll allocate a new one. This may not 5259 * be contiguous. The underlying calls will make sure that there is 5260 * space for the insert, shifting buckets around if necessary. 5261 * 'target' may be moved by those calls. 5262 */ 5263 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5264 struct buffer_head *xb_bh, 5265 struct ocfs2_xattr_bucket *target, 5266 struct ocfs2_xattr_set_ctxt *ctxt) 5267 { 5268 struct ocfs2_xattr_block *xb = 5269 (struct ocfs2_xattr_block *)xb_bh->b_data; 5270 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5271 struct ocfs2_extent_list *el = &xb_root->xt_list; 5272 u32 name_hash = 5273 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5274 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5275 int ret, num_buckets, extend = 1; 5276 u64 p_blkno; 5277 u32 e_cpos, num_clusters; 5278 /* The bucket at the front of the extent */ 5279 struct ocfs2_xattr_bucket *first; 5280 5281 trace_ocfs2_add_new_xattr_bucket( 5282 (unsigned long long)bucket_blkno(target)); 5283 5284 /* The first bucket of the original extent */ 5285 first = ocfs2_xattr_bucket_new(inode); 5286 if (!first) { 5287 ret = -ENOMEM; 5288 mlog_errno(ret); 5289 goto out; 5290 } 5291 5292 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5293 &num_clusters, el); 5294 if (ret) { 5295 mlog_errno(ret); 5296 goto out; 5297 } 5298 5299 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5300 if (ret) { 5301 mlog_errno(ret); 5302 goto out; 5303 } 5304 5305 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5306 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5307 /* 5308 * This can move first+target if the target bucket moves 5309 * to the new extent. 5310 */ 5311 ret = ocfs2_add_new_xattr_cluster(inode, 5312 xb_bh, 5313 first, 5314 target, 5315 &num_clusters, 5316 e_cpos, 5317 &extend, 5318 ctxt); 5319 if (ret) { 5320 mlog_errno(ret); 5321 goto out; 5322 } 5323 } 5324 5325 if (extend) { 5326 ret = ocfs2_extend_xattr_bucket(inode, 5327 ctxt->handle, 5328 first, 5329 bucket_blkno(target), 5330 num_clusters); 5331 if (ret) 5332 mlog_errno(ret); 5333 } 5334 5335 out: 5336 ocfs2_xattr_bucket_free(first); 5337 5338 return ret; 5339 } 5340 5341 /* 5342 * Truncate the specified xe_off entry in xattr bucket. 5343 * bucket is indicated by header_bh and len is the new length. 5344 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5345 * 5346 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5347 */ 5348 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5349 struct ocfs2_xattr_bucket *bucket, 5350 int xe_off, 5351 int len, 5352 struct ocfs2_xattr_set_ctxt *ctxt) 5353 { 5354 int ret, offset; 5355 u64 value_blk; 5356 struct ocfs2_xattr_entry *xe; 5357 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5358 size_t blocksize = inode->i_sb->s_blocksize; 5359 struct ocfs2_xattr_value_buf vb = { 5360 .vb_access = ocfs2_journal_access, 5361 }; 5362 5363 xe = &xh->xh_entries[xe_off]; 5364 5365 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5366 5367 offset = le16_to_cpu(xe->xe_name_offset) + 5368 OCFS2_XATTR_SIZE(xe->xe_name_len); 5369 5370 value_blk = offset / blocksize; 5371 5372 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5373 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5374 5375 vb.vb_bh = bucket->bu_bhs[value_blk]; 5376 BUG_ON(!vb.vb_bh); 5377 5378 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5379 (vb.vb_bh->b_data + offset % blocksize); 5380 5381 /* 5382 * From here on out we have to dirty the bucket. The generic 5383 * value calls only modify one of the bucket's bhs, but we need 5384 * to send the bucket at once. So if they error, they *could* have 5385 * modified something. We have to assume they did, and dirty 5386 * the whole bucket. This leaves us in a consistent state. 5387 */ 5388 trace_ocfs2_xattr_bucket_value_truncate( 5389 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5390 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5391 if (ret) { 5392 mlog_errno(ret); 5393 goto out; 5394 } 5395 5396 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5397 OCFS2_JOURNAL_ACCESS_WRITE); 5398 if (ret) { 5399 mlog_errno(ret); 5400 goto out; 5401 } 5402 5403 xe->xe_value_size = cpu_to_le64(len); 5404 5405 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5406 5407 out: 5408 return ret; 5409 } 5410 5411 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5412 struct buffer_head *root_bh, 5413 u64 blkno, 5414 u32 cpos, 5415 u32 len, 5416 void *para) 5417 { 5418 int ret; 5419 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5420 struct inode *tl_inode = osb->osb_tl_inode; 5421 handle_t *handle; 5422 struct ocfs2_xattr_block *xb = 5423 (struct ocfs2_xattr_block *)root_bh->b_data; 5424 struct ocfs2_alloc_context *meta_ac = NULL; 5425 struct ocfs2_cached_dealloc_ctxt dealloc; 5426 struct ocfs2_extent_tree et; 5427 5428 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5429 ocfs2_delete_xattr_in_bucket, para); 5430 if (ret) { 5431 mlog_errno(ret); 5432 return ret; 5433 } 5434 5435 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5436 5437 ocfs2_init_dealloc_ctxt(&dealloc); 5438 5439 trace_ocfs2_rm_xattr_cluster( 5440 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5441 (unsigned long long)blkno, cpos, len); 5442 5443 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5444 len); 5445 5446 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5447 if (ret) { 5448 mlog_errno(ret); 5449 return ret; 5450 } 5451 5452 inode_lock(tl_inode); 5453 5454 if (ocfs2_truncate_log_needs_flush(osb)) { 5455 ret = __ocfs2_flush_truncate_log(osb); 5456 if (ret < 0) { 5457 mlog_errno(ret); 5458 goto out; 5459 } 5460 } 5461 5462 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5463 if (IS_ERR(handle)) { 5464 ret = -ENOMEM; 5465 mlog_errno(ret); 5466 goto out; 5467 } 5468 5469 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5470 OCFS2_JOURNAL_ACCESS_WRITE); 5471 if (ret) { 5472 mlog_errno(ret); 5473 goto out_commit; 5474 } 5475 5476 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5477 &dealloc); 5478 if (ret) { 5479 mlog_errno(ret); 5480 goto out_commit; 5481 } 5482 5483 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5484 ocfs2_journal_dirty(handle, root_bh); 5485 5486 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5487 if (ret) 5488 mlog_errno(ret); 5489 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5490 5491 out_commit: 5492 ocfs2_commit_trans(osb, handle); 5493 out: 5494 ocfs2_schedule_truncate_log_flush(osb, 1); 5495 5496 inode_unlock(tl_inode); 5497 5498 if (meta_ac) 5499 ocfs2_free_alloc_context(meta_ac); 5500 5501 ocfs2_run_deallocs(osb, &dealloc); 5502 5503 return ret; 5504 } 5505 5506 /* 5507 * check whether the xattr bucket is filled up with the same hash value. 5508 * If we want to insert the xattr with the same hash, return -ENOSPC. 5509 * If we want to insert a xattr with different hash value, go ahead 5510 * and ocfs2_divide_xattr_bucket will handle this. 5511 */ 5512 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5513 struct ocfs2_xattr_bucket *bucket, 5514 const char *name) 5515 { 5516 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5517 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5518 5519 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5520 return 0; 5521 5522 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5523 xh->xh_entries[0].xe_name_hash) { 5524 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5525 "hash = %u\n", 5526 (unsigned long long)bucket_blkno(bucket), 5527 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5528 return -ENOSPC; 5529 } 5530 5531 return 0; 5532 } 5533 5534 /* 5535 * Try to set the entry in the current bucket. If we fail, the caller 5536 * will handle getting us another bucket. 5537 */ 5538 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5539 struct ocfs2_xattr_info *xi, 5540 struct ocfs2_xattr_search *xs, 5541 struct ocfs2_xattr_set_ctxt *ctxt) 5542 { 5543 int ret; 5544 struct ocfs2_xa_loc loc; 5545 5546 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5547 5548 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5549 xs->not_found ? NULL : xs->here); 5550 ret = ocfs2_xa_set(&loc, xi, ctxt); 5551 if (!ret) { 5552 xs->here = loc.xl_entry; 5553 goto out; 5554 } 5555 if (ret != -ENOSPC) { 5556 mlog_errno(ret); 5557 goto out; 5558 } 5559 5560 /* Ok, we need space. Let's try defragmenting the bucket. */ 5561 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5562 xs->bucket); 5563 if (ret) { 5564 mlog_errno(ret); 5565 goto out; 5566 } 5567 5568 ret = ocfs2_xa_set(&loc, xi, ctxt); 5569 if (!ret) { 5570 xs->here = loc.xl_entry; 5571 goto out; 5572 } 5573 if (ret != -ENOSPC) 5574 mlog_errno(ret); 5575 5576 5577 out: 5578 return ret; 5579 } 5580 5581 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5582 struct ocfs2_xattr_info *xi, 5583 struct ocfs2_xattr_search *xs, 5584 struct ocfs2_xattr_set_ctxt *ctxt) 5585 { 5586 int ret; 5587 5588 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5589 5590 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5591 if (!ret) 5592 goto out; 5593 if (ret != -ENOSPC) { 5594 mlog_errno(ret); 5595 goto out; 5596 } 5597 5598 /* Ack, need more space. Let's try to get another bucket! */ 5599 5600 /* 5601 * We do not allow for overlapping ranges between buckets. And 5602 * the maximum number of collisions we will allow for then is 5603 * one bucket's worth, so check it here whether we need to 5604 * add a new bucket for the insert. 5605 */ 5606 ret = ocfs2_check_xattr_bucket_collision(inode, 5607 xs->bucket, 5608 xi->xi_name); 5609 if (ret) { 5610 mlog_errno(ret); 5611 goto out; 5612 } 5613 5614 ret = ocfs2_add_new_xattr_bucket(inode, 5615 xs->xattr_bh, 5616 xs->bucket, 5617 ctxt); 5618 if (ret) { 5619 mlog_errno(ret); 5620 goto out; 5621 } 5622 5623 /* 5624 * ocfs2_add_new_xattr_bucket() will have updated 5625 * xs->bucket if it moved, but it will not have updated 5626 * any of the other search fields. Thus, we drop it and 5627 * re-search. Everything should be cached, so it'll be 5628 * quick. 5629 */ 5630 ocfs2_xattr_bucket_relse(xs->bucket); 5631 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5632 xi->xi_name_index, 5633 xi->xi_name, xs); 5634 if (ret && ret != -ENODATA) 5635 goto out; 5636 xs->not_found = ret; 5637 5638 /* Ok, we have a new bucket, let's try again */ 5639 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5640 if (ret && (ret != -ENOSPC)) 5641 mlog_errno(ret); 5642 5643 out: 5644 return ret; 5645 } 5646 5647 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5648 struct ocfs2_xattr_bucket *bucket, 5649 void *para) 5650 { 5651 int ret = 0, ref_credits; 5652 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5653 u16 i; 5654 struct ocfs2_xattr_entry *xe; 5655 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5656 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5657 int credits = ocfs2_remove_extent_credits(osb->sb) + 5658 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5659 struct ocfs2_xattr_value_root *xv; 5660 struct ocfs2_rm_xattr_bucket_para *args = 5661 (struct ocfs2_rm_xattr_bucket_para *)para; 5662 5663 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5664 5665 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5666 xe = &xh->xh_entries[i]; 5667 if (ocfs2_xattr_is_local(xe)) 5668 continue; 5669 5670 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5671 i, &xv, NULL); 5672 if (ret) { 5673 mlog_errno(ret); 5674 break; 5675 } 5676 5677 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5678 args->ref_ci, 5679 args->ref_root_bh, 5680 &ctxt.meta_ac, 5681 &ref_credits); 5682 5683 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5684 if (IS_ERR(ctxt.handle)) { 5685 ret = PTR_ERR(ctxt.handle); 5686 mlog_errno(ret); 5687 break; 5688 } 5689 5690 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5691 i, 0, &ctxt); 5692 5693 ocfs2_commit_trans(osb, ctxt.handle); 5694 if (ctxt.meta_ac) { 5695 ocfs2_free_alloc_context(ctxt.meta_ac); 5696 ctxt.meta_ac = NULL; 5697 } 5698 if (ret) { 5699 mlog_errno(ret); 5700 break; 5701 } 5702 } 5703 5704 if (ctxt.meta_ac) 5705 ocfs2_free_alloc_context(ctxt.meta_ac); 5706 ocfs2_schedule_truncate_log_flush(osb, 1); 5707 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5708 return ret; 5709 } 5710 5711 /* 5712 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5713 * or change the extent record flag), we need to recalculate 5714 * the metaecc for the whole bucket. So it is done here. 5715 * 5716 * Note: 5717 * We have to give the extra credits for the caller. 5718 */ 5719 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5720 handle_t *handle, 5721 void *para) 5722 { 5723 int ret; 5724 struct ocfs2_xattr_bucket *bucket = 5725 (struct ocfs2_xattr_bucket *)para; 5726 5727 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5728 OCFS2_JOURNAL_ACCESS_WRITE); 5729 if (ret) { 5730 mlog_errno(ret); 5731 return ret; 5732 } 5733 5734 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5735 5736 return 0; 5737 } 5738 5739 /* 5740 * Special action we need if the xattr value is refcounted. 5741 * 5742 * 1. If the xattr is refcounted, lock the tree. 5743 * 2. CoW the xattr if we are setting the new value and the value 5744 * will be stored outside. 5745 * 3. In other case, decrease_refcount will work for us, so just 5746 * lock the refcount tree, calculate the meta and credits is OK. 5747 * 5748 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5749 * currently CoW is a completed transaction, while this function 5750 * will also lock the allocators and let us deadlock. So we will 5751 * CoW the whole xattr value. 5752 */ 5753 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5754 struct ocfs2_dinode *di, 5755 struct ocfs2_xattr_info *xi, 5756 struct ocfs2_xattr_search *xis, 5757 struct ocfs2_xattr_search *xbs, 5758 struct ocfs2_refcount_tree **ref_tree, 5759 int *meta_add, 5760 int *credits) 5761 { 5762 int ret = 0; 5763 struct ocfs2_xattr_block *xb; 5764 struct ocfs2_xattr_entry *xe; 5765 char *base; 5766 u32 p_cluster, num_clusters; 5767 unsigned int ext_flags; 5768 int name_offset, name_len; 5769 struct ocfs2_xattr_value_buf vb; 5770 struct ocfs2_xattr_bucket *bucket = NULL; 5771 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5772 struct ocfs2_post_refcount refcount; 5773 struct ocfs2_post_refcount *p = NULL; 5774 struct buffer_head *ref_root_bh = NULL; 5775 5776 if (!xis->not_found) { 5777 xe = xis->here; 5778 name_offset = le16_to_cpu(xe->xe_name_offset); 5779 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5780 base = xis->base; 5781 vb.vb_bh = xis->inode_bh; 5782 vb.vb_access = ocfs2_journal_access_di; 5783 } else { 5784 int i, block_off = 0; 5785 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5786 xe = xbs->here; 5787 name_offset = le16_to_cpu(xe->xe_name_offset); 5788 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5789 i = xbs->here - xbs->header->xh_entries; 5790 5791 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5792 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5793 bucket_xh(xbs->bucket), 5794 i, &block_off, 5795 &name_offset); 5796 if (ret) { 5797 mlog_errno(ret); 5798 goto out; 5799 } 5800 base = bucket_block(xbs->bucket, block_off); 5801 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5802 vb.vb_access = ocfs2_journal_access; 5803 5804 if (ocfs2_meta_ecc(osb)) { 5805 /*create parameters for ocfs2_post_refcount. */ 5806 bucket = xbs->bucket; 5807 refcount.credits = bucket->bu_blocks; 5808 refcount.para = bucket; 5809 refcount.func = 5810 ocfs2_xattr_bucket_post_refcount; 5811 p = &refcount; 5812 } 5813 } else { 5814 base = xbs->base; 5815 vb.vb_bh = xbs->xattr_bh; 5816 vb.vb_access = ocfs2_journal_access_xb; 5817 } 5818 } 5819 5820 if (ocfs2_xattr_is_local(xe)) 5821 goto out; 5822 5823 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5824 (base + name_offset + name_len); 5825 5826 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5827 &num_clusters, &vb.vb_xv->xr_list, 5828 &ext_flags); 5829 if (ret) { 5830 mlog_errno(ret); 5831 goto out; 5832 } 5833 5834 /* 5835 * We just need to check the 1st extent record, since we always 5836 * CoW the whole xattr. So there shouldn't be a xattr with 5837 * some REFCOUNT extent recs after the 1st one. 5838 */ 5839 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5840 goto out; 5841 5842 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5843 1, ref_tree, &ref_root_bh); 5844 if (ret) { 5845 mlog_errno(ret); 5846 goto out; 5847 } 5848 5849 /* 5850 * If we are deleting the xattr or the new size will be stored inside, 5851 * cool, leave it there, the xattr truncate process will remove them 5852 * for us(it still needs the refcount tree lock and the meta, credits). 5853 * And the worse case is that every cluster truncate will split the 5854 * refcount tree, and make the original extent become 3. So we will need 5855 * 2 * cluster more extent recs at most. 5856 */ 5857 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5858 5859 ret = ocfs2_refcounted_xattr_delete_need(inode, 5860 &(*ref_tree)->rf_ci, 5861 ref_root_bh, vb.vb_xv, 5862 meta_add, credits); 5863 if (ret) 5864 mlog_errno(ret); 5865 goto out; 5866 } 5867 5868 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5869 *ref_tree, ref_root_bh, 0, 5870 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5871 if (ret) 5872 mlog_errno(ret); 5873 5874 out: 5875 brelse(ref_root_bh); 5876 return ret; 5877 } 5878 5879 /* 5880 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5881 * The physical clusters will be added to refcount tree. 5882 */ 5883 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5884 struct ocfs2_xattr_value_root *xv, 5885 struct ocfs2_extent_tree *value_et, 5886 struct ocfs2_caching_info *ref_ci, 5887 struct buffer_head *ref_root_bh, 5888 struct ocfs2_cached_dealloc_ctxt *dealloc, 5889 struct ocfs2_post_refcount *refcount) 5890 { 5891 int ret = 0; 5892 u32 clusters = le32_to_cpu(xv->xr_clusters); 5893 u32 cpos, p_cluster, num_clusters; 5894 struct ocfs2_extent_list *el = &xv->xr_list; 5895 unsigned int ext_flags; 5896 5897 cpos = 0; 5898 while (cpos < clusters) { 5899 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5900 &num_clusters, el, &ext_flags); 5901 if (ret) { 5902 mlog_errno(ret); 5903 break; 5904 } 5905 5906 cpos += num_clusters; 5907 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5908 continue; 5909 5910 BUG_ON(!p_cluster); 5911 5912 ret = ocfs2_add_refcount_flag(inode, value_et, 5913 ref_ci, ref_root_bh, 5914 cpos - num_clusters, 5915 p_cluster, num_clusters, 5916 dealloc, refcount); 5917 if (ret) { 5918 mlog_errno(ret); 5919 break; 5920 } 5921 } 5922 5923 return ret; 5924 } 5925 5926 /* 5927 * Given a normal ocfs2_xattr_header, refcount all the entries which 5928 * have value stored outside. 5929 * Used for xattrs stored in inode and ocfs2_xattr_block. 5930 */ 5931 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5932 struct ocfs2_xattr_value_buf *vb, 5933 struct ocfs2_xattr_header *header, 5934 struct ocfs2_caching_info *ref_ci, 5935 struct buffer_head *ref_root_bh, 5936 struct ocfs2_cached_dealloc_ctxt *dealloc) 5937 { 5938 5939 struct ocfs2_xattr_entry *xe; 5940 struct ocfs2_xattr_value_root *xv; 5941 struct ocfs2_extent_tree et; 5942 int i, ret = 0; 5943 5944 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5945 xe = &header->xh_entries[i]; 5946 5947 if (ocfs2_xattr_is_local(xe)) 5948 continue; 5949 5950 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5951 le16_to_cpu(xe->xe_name_offset) + 5952 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5953 5954 vb->vb_xv = xv; 5955 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5956 5957 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5958 ref_ci, ref_root_bh, 5959 dealloc, NULL); 5960 if (ret) { 5961 mlog_errno(ret); 5962 break; 5963 } 5964 } 5965 5966 return ret; 5967 } 5968 5969 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5970 struct buffer_head *fe_bh, 5971 struct ocfs2_caching_info *ref_ci, 5972 struct buffer_head *ref_root_bh, 5973 struct ocfs2_cached_dealloc_ctxt *dealloc) 5974 { 5975 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5976 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5977 (fe_bh->b_data + inode->i_sb->s_blocksize - 5978 le16_to_cpu(di->i_xattr_inline_size)); 5979 struct ocfs2_xattr_value_buf vb = { 5980 .vb_bh = fe_bh, 5981 .vb_access = ocfs2_journal_access_di, 5982 }; 5983 5984 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5985 ref_ci, ref_root_bh, dealloc); 5986 } 5987 5988 struct ocfs2_xattr_tree_value_refcount_para { 5989 struct ocfs2_caching_info *ref_ci; 5990 struct buffer_head *ref_root_bh; 5991 struct ocfs2_cached_dealloc_ctxt *dealloc; 5992 }; 5993 5994 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5995 struct ocfs2_xattr_bucket *bucket, 5996 int offset, 5997 struct ocfs2_xattr_value_root **xv, 5998 struct buffer_head **bh) 5999 { 6000 int ret, block_off, name_offset; 6001 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6002 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6003 void *base; 6004 6005 ret = ocfs2_xattr_bucket_get_name_value(sb, 6006 bucket_xh(bucket), 6007 offset, 6008 &block_off, 6009 &name_offset); 6010 if (ret) { 6011 mlog_errno(ret); 6012 goto out; 6013 } 6014 6015 base = bucket_block(bucket, block_off); 6016 6017 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6018 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6019 6020 if (bh) 6021 *bh = bucket->bu_bhs[block_off]; 6022 out: 6023 return ret; 6024 } 6025 6026 /* 6027 * For a given xattr bucket, refcount all the entries which 6028 * have value stored outside. 6029 */ 6030 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6031 struct ocfs2_xattr_bucket *bucket, 6032 void *para) 6033 { 6034 int i, ret = 0; 6035 struct ocfs2_extent_tree et; 6036 struct ocfs2_xattr_tree_value_refcount_para *ref = 6037 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6038 struct ocfs2_xattr_header *xh = 6039 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6040 struct ocfs2_xattr_entry *xe; 6041 struct ocfs2_xattr_value_buf vb = { 6042 .vb_access = ocfs2_journal_access, 6043 }; 6044 struct ocfs2_post_refcount refcount = { 6045 .credits = bucket->bu_blocks, 6046 .para = bucket, 6047 .func = ocfs2_xattr_bucket_post_refcount, 6048 }; 6049 struct ocfs2_post_refcount *p = NULL; 6050 6051 /* We only need post_refcount if we support metaecc. */ 6052 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6053 p = &refcount; 6054 6055 trace_ocfs2_xattr_bucket_value_refcount( 6056 (unsigned long long)bucket_blkno(bucket), 6057 le16_to_cpu(xh->xh_count)); 6058 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6059 xe = &xh->xh_entries[i]; 6060 6061 if (ocfs2_xattr_is_local(xe)) 6062 continue; 6063 6064 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6065 &vb.vb_xv, &vb.vb_bh); 6066 if (ret) { 6067 mlog_errno(ret); 6068 break; 6069 } 6070 6071 ocfs2_init_xattr_value_extent_tree(&et, 6072 INODE_CACHE(inode), &vb); 6073 6074 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6075 &et, ref->ref_ci, 6076 ref->ref_root_bh, 6077 ref->dealloc, p); 6078 if (ret) { 6079 mlog_errno(ret); 6080 break; 6081 } 6082 } 6083 6084 return ret; 6085 6086 } 6087 6088 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6089 struct buffer_head *root_bh, 6090 u64 blkno, u32 cpos, u32 len, void *para) 6091 { 6092 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6093 ocfs2_xattr_bucket_value_refcount, 6094 para); 6095 } 6096 6097 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6098 struct buffer_head *blk_bh, 6099 struct ocfs2_caching_info *ref_ci, 6100 struct buffer_head *ref_root_bh, 6101 struct ocfs2_cached_dealloc_ctxt *dealloc) 6102 { 6103 int ret = 0; 6104 struct ocfs2_xattr_block *xb = 6105 (struct ocfs2_xattr_block *)blk_bh->b_data; 6106 6107 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6108 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6109 struct ocfs2_xattr_value_buf vb = { 6110 .vb_bh = blk_bh, 6111 .vb_access = ocfs2_journal_access_xb, 6112 }; 6113 6114 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6115 ref_ci, ref_root_bh, 6116 dealloc); 6117 } else { 6118 struct ocfs2_xattr_tree_value_refcount_para para = { 6119 .ref_ci = ref_ci, 6120 .ref_root_bh = ref_root_bh, 6121 .dealloc = dealloc, 6122 }; 6123 6124 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6125 ocfs2_refcount_xattr_tree_rec, 6126 ¶); 6127 } 6128 6129 return ret; 6130 } 6131 6132 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6133 struct buffer_head *fe_bh, 6134 struct ocfs2_caching_info *ref_ci, 6135 struct buffer_head *ref_root_bh, 6136 struct ocfs2_cached_dealloc_ctxt *dealloc) 6137 { 6138 int ret = 0; 6139 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6140 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6141 struct buffer_head *blk_bh = NULL; 6142 6143 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6144 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6145 ref_ci, ref_root_bh, 6146 dealloc); 6147 if (ret) { 6148 mlog_errno(ret); 6149 goto out; 6150 } 6151 } 6152 6153 if (!di->i_xattr_loc) 6154 goto out; 6155 6156 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6157 &blk_bh); 6158 if (ret < 0) { 6159 mlog_errno(ret); 6160 goto out; 6161 } 6162 6163 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6164 ref_root_bh, dealloc); 6165 if (ret) 6166 mlog_errno(ret); 6167 6168 brelse(blk_bh); 6169 out: 6170 6171 return ret; 6172 } 6173 6174 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6175 /* 6176 * Store the information we need in xattr reflink. 6177 * old_bh and new_bh are inode bh for the old and new inode. 6178 */ 6179 struct ocfs2_xattr_reflink { 6180 struct inode *old_inode; 6181 struct inode *new_inode; 6182 struct buffer_head *old_bh; 6183 struct buffer_head *new_bh; 6184 struct ocfs2_caching_info *ref_ci; 6185 struct buffer_head *ref_root_bh; 6186 struct ocfs2_cached_dealloc_ctxt *dealloc; 6187 should_xattr_reflinked *xattr_reflinked; 6188 }; 6189 6190 /* 6191 * Given a xattr header and xe offset, 6192 * return the proper xv and the corresponding bh. 6193 * xattr in inode, block and xattr tree have different implementaions. 6194 */ 6195 typedef int (get_xattr_value_root)(struct super_block *sb, 6196 struct buffer_head *bh, 6197 struct ocfs2_xattr_header *xh, 6198 int offset, 6199 struct ocfs2_xattr_value_root **xv, 6200 struct buffer_head **ret_bh, 6201 void *para); 6202 6203 /* 6204 * Calculate all the xattr value root metadata stored in this xattr header and 6205 * credits we need if we create them from the scratch. 6206 * We use get_xattr_value_root so that all types of xattr container can use it. 6207 */ 6208 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6209 struct buffer_head *bh, 6210 struct ocfs2_xattr_header *xh, 6211 int *metas, int *credits, 6212 int *num_recs, 6213 get_xattr_value_root *func, 6214 void *para) 6215 { 6216 int i, ret = 0; 6217 struct ocfs2_xattr_value_root *xv; 6218 struct ocfs2_xattr_entry *xe; 6219 6220 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6221 xe = &xh->xh_entries[i]; 6222 if (ocfs2_xattr_is_local(xe)) 6223 continue; 6224 6225 ret = func(sb, bh, xh, i, &xv, NULL, para); 6226 if (ret) { 6227 mlog_errno(ret); 6228 break; 6229 } 6230 6231 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6232 le16_to_cpu(xv->xr_list.l_next_free_rec); 6233 6234 *credits += ocfs2_calc_extend_credits(sb, 6235 &def_xv.xv.xr_list); 6236 6237 /* 6238 * If the value is a tree with depth > 1, We don't go deep 6239 * to the extent block, so just calculate a maximum record num. 6240 */ 6241 if (!xv->xr_list.l_tree_depth) 6242 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6243 else 6244 *num_recs += ocfs2_clusters_for_bytes(sb, 6245 XATTR_SIZE_MAX); 6246 } 6247 6248 return ret; 6249 } 6250 6251 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6252 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6253 struct buffer_head *bh, 6254 struct ocfs2_xattr_header *xh, 6255 int offset, 6256 struct ocfs2_xattr_value_root **xv, 6257 struct buffer_head **ret_bh, 6258 void *para) 6259 { 6260 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6261 6262 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6263 le16_to_cpu(xe->xe_name_offset) + 6264 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6265 6266 if (ret_bh) 6267 *ret_bh = bh; 6268 6269 return 0; 6270 } 6271 6272 /* 6273 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6274 * It is only used for inline xattr and xattr block. 6275 */ 6276 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6277 struct ocfs2_xattr_header *xh, 6278 struct buffer_head *ref_root_bh, 6279 int *credits, 6280 struct ocfs2_alloc_context **meta_ac) 6281 { 6282 int ret, meta_add = 0, num_recs = 0; 6283 struct ocfs2_refcount_block *rb = 6284 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6285 6286 *credits = 0; 6287 6288 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6289 &meta_add, credits, &num_recs, 6290 ocfs2_get_xattr_value_root, 6291 NULL); 6292 if (ret) { 6293 mlog_errno(ret); 6294 goto out; 6295 } 6296 6297 /* 6298 * We need to add/modify num_recs in refcount tree, so just calculate 6299 * an approximate number we need for refcount tree change. 6300 * Sometimes we need to split the tree, and after split, half recs 6301 * will be moved to the new block, and a new block can only provide 6302 * half number of recs. So we multiple new blocks by 2. 6303 */ 6304 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6305 meta_add += num_recs; 6306 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6307 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6308 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6309 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6310 else 6311 *credits += 1; 6312 6313 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6314 if (ret) 6315 mlog_errno(ret); 6316 6317 out: 6318 return ret; 6319 } 6320 6321 /* 6322 * Given a xattr header, reflink all the xattrs in this container. 6323 * It can be used for inode, block and bucket. 6324 * 6325 * NOTE: 6326 * Before we call this function, the caller has memcpy the xattr in 6327 * old_xh to the new_xh. 6328 * 6329 * If args.xattr_reflinked is set, call it to decide whether the xe should 6330 * be reflinked or not. If not, remove it from the new xattr header. 6331 */ 6332 static int ocfs2_reflink_xattr_header(handle_t *handle, 6333 struct ocfs2_xattr_reflink *args, 6334 struct buffer_head *old_bh, 6335 struct ocfs2_xattr_header *xh, 6336 struct buffer_head *new_bh, 6337 struct ocfs2_xattr_header *new_xh, 6338 struct ocfs2_xattr_value_buf *vb, 6339 struct ocfs2_alloc_context *meta_ac, 6340 get_xattr_value_root *func, 6341 void *para) 6342 { 6343 int ret = 0, i, j; 6344 struct super_block *sb = args->old_inode->i_sb; 6345 struct buffer_head *value_bh; 6346 struct ocfs2_xattr_entry *xe, *last; 6347 struct ocfs2_xattr_value_root *xv, *new_xv; 6348 struct ocfs2_extent_tree data_et; 6349 u32 clusters, cpos, p_cluster, num_clusters; 6350 unsigned int ext_flags = 0; 6351 6352 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6353 le16_to_cpu(xh->xh_count)); 6354 6355 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6356 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6357 xe = &xh->xh_entries[i]; 6358 6359 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6360 xe = &new_xh->xh_entries[j]; 6361 6362 le16_add_cpu(&new_xh->xh_count, -1); 6363 if (new_xh->xh_count) { 6364 memmove(xe, xe + 1, 6365 (void *)last - (void *)xe); 6366 memset(last, 0, 6367 sizeof(struct ocfs2_xattr_entry)); 6368 } 6369 6370 /* 6371 * We don't want j to increase in the next round since 6372 * it is already moved ahead. 6373 */ 6374 j--; 6375 continue; 6376 } 6377 6378 if (ocfs2_xattr_is_local(xe)) 6379 continue; 6380 6381 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6382 if (ret) { 6383 mlog_errno(ret); 6384 break; 6385 } 6386 6387 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6388 if (ret) { 6389 mlog_errno(ret); 6390 break; 6391 } 6392 6393 /* 6394 * For the xattr which has l_tree_depth = 0, all the extent 6395 * recs have already be copied to the new xh with the 6396 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6397 * increase the refount count int the refcount tree. 6398 * 6399 * For the xattr which has l_tree_depth > 0, we need 6400 * to initialize it to the empty default value root, 6401 * and then insert the extents one by one. 6402 */ 6403 if (xv->xr_list.l_tree_depth) { 6404 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6405 vb->vb_xv = new_xv; 6406 vb->vb_bh = value_bh; 6407 ocfs2_init_xattr_value_extent_tree(&data_et, 6408 INODE_CACHE(args->new_inode), vb); 6409 } 6410 6411 clusters = le32_to_cpu(xv->xr_clusters); 6412 cpos = 0; 6413 while (cpos < clusters) { 6414 ret = ocfs2_xattr_get_clusters(args->old_inode, 6415 cpos, 6416 &p_cluster, 6417 &num_clusters, 6418 &xv->xr_list, 6419 &ext_flags); 6420 if (ret) { 6421 mlog_errno(ret); 6422 goto out; 6423 } 6424 6425 BUG_ON(!p_cluster); 6426 6427 if (xv->xr_list.l_tree_depth) { 6428 ret = ocfs2_insert_extent(handle, 6429 &data_et, cpos, 6430 ocfs2_clusters_to_blocks( 6431 args->old_inode->i_sb, 6432 p_cluster), 6433 num_clusters, ext_flags, 6434 meta_ac); 6435 if (ret) { 6436 mlog_errno(ret); 6437 goto out; 6438 } 6439 } 6440 6441 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6442 args->ref_root_bh, 6443 p_cluster, num_clusters, 6444 meta_ac, args->dealloc); 6445 if (ret) { 6446 mlog_errno(ret); 6447 goto out; 6448 } 6449 6450 cpos += num_clusters; 6451 } 6452 } 6453 6454 out: 6455 return ret; 6456 } 6457 6458 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6459 { 6460 int ret = 0, credits = 0; 6461 handle_t *handle; 6462 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6463 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6464 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6465 int header_off = osb->sb->s_blocksize - inline_size; 6466 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6467 (args->old_bh->b_data + header_off); 6468 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6469 (args->new_bh->b_data + header_off); 6470 struct ocfs2_alloc_context *meta_ac = NULL; 6471 struct ocfs2_inode_info *new_oi; 6472 struct ocfs2_dinode *new_di; 6473 struct ocfs2_xattr_value_buf vb = { 6474 .vb_bh = args->new_bh, 6475 .vb_access = ocfs2_journal_access_di, 6476 }; 6477 6478 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6479 &credits, &meta_ac); 6480 if (ret) { 6481 mlog_errno(ret); 6482 goto out; 6483 } 6484 6485 handle = ocfs2_start_trans(osb, credits); 6486 if (IS_ERR(handle)) { 6487 ret = PTR_ERR(handle); 6488 mlog_errno(ret); 6489 goto out; 6490 } 6491 6492 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6493 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6494 if (ret) { 6495 mlog_errno(ret); 6496 goto out_commit; 6497 } 6498 6499 memcpy(args->new_bh->b_data + header_off, 6500 args->old_bh->b_data + header_off, inline_size); 6501 6502 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6503 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6504 6505 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6506 args->new_bh, new_xh, &vb, meta_ac, 6507 ocfs2_get_xattr_value_root, NULL); 6508 if (ret) { 6509 mlog_errno(ret); 6510 goto out_commit; 6511 } 6512 6513 new_oi = OCFS2_I(args->new_inode); 6514 /* 6515 * Adjust extent record count to reserve space for extended attribute. 6516 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6517 */ 6518 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6519 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6520 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6521 le16_add_cpu(&el->l_count, -(inline_size / 6522 sizeof(struct ocfs2_extent_rec))); 6523 } 6524 spin_lock(&new_oi->ip_lock); 6525 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6526 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6527 spin_unlock(&new_oi->ip_lock); 6528 6529 ocfs2_journal_dirty(handle, args->new_bh); 6530 6531 out_commit: 6532 ocfs2_commit_trans(osb, handle); 6533 6534 out: 6535 if (meta_ac) 6536 ocfs2_free_alloc_context(meta_ac); 6537 return ret; 6538 } 6539 6540 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6541 struct buffer_head *fe_bh, 6542 struct buffer_head **ret_bh, 6543 int indexed) 6544 { 6545 int ret; 6546 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6547 struct ocfs2_xattr_set_ctxt ctxt; 6548 6549 memset(&ctxt, 0, sizeof(ctxt)); 6550 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6551 if (ret < 0) { 6552 mlog_errno(ret); 6553 return ret; 6554 } 6555 6556 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6557 if (IS_ERR(ctxt.handle)) { 6558 ret = PTR_ERR(ctxt.handle); 6559 mlog_errno(ret); 6560 goto out; 6561 } 6562 6563 trace_ocfs2_create_empty_xattr_block( 6564 (unsigned long long)fe_bh->b_blocknr, indexed); 6565 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6566 ret_bh); 6567 if (ret) 6568 mlog_errno(ret); 6569 6570 ocfs2_commit_trans(osb, ctxt.handle); 6571 out: 6572 ocfs2_free_alloc_context(ctxt.meta_ac); 6573 return ret; 6574 } 6575 6576 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6577 struct buffer_head *blk_bh, 6578 struct buffer_head *new_blk_bh) 6579 { 6580 int ret = 0, credits = 0; 6581 handle_t *handle; 6582 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6583 struct ocfs2_dinode *new_di; 6584 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6585 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6586 struct ocfs2_xattr_block *xb = 6587 (struct ocfs2_xattr_block *)blk_bh->b_data; 6588 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6589 struct ocfs2_xattr_block *new_xb = 6590 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6591 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6592 struct ocfs2_alloc_context *meta_ac; 6593 struct ocfs2_xattr_value_buf vb = { 6594 .vb_bh = new_blk_bh, 6595 .vb_access = ocfs2_journal_access_xb, 6596 }; 6597 6598 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6599 &credits, &meta_ac); 6600 if (ret) { 6601 mlog_errno(ret); 6602 return ret; 6603 } 6604 6605 /* One more credits in case we need to add xattr flags in new inode. */ 6606 handle = ocfs2_start_trans(osb, credits + 1); 6607 if (IS_ERR(handle)) { 6608 ret = PTR_ERR(handle); 6609 mlog_errno(ret); 6610 goto out; 6611 } 6612 6613 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6614 ret = ocfs2_journal_access_di(handle, 6615 INODE_CACHE(args->new_inode), 6616 args->new_bh, 6617 OCFS2_JOURNAL_ACCESS_WRITE); 6618 if (ret) { 6619 mlog_errno(ret); 6620 goto out_commit; 6621 } 6622 } 6623 6624 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6625 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6626 if (ret) { 6627 mlog_errno(ret); 6628 goto out_commit; 6629 } 6630 6631 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6632 osb->sb->s_blocksize - header_off); 6633 6634 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6635 new_blk_bh, new_xh, &vb, meta_ac, 6636 ocfs2_get_xattr_value_root, NULL); 6637 if (ret) { 6638 mlog_errno(ret); 6639 goto out_commit; 6640 } 6641 6642 ocfs2_journal_dirty(handle, new_blk_bh); 6643 6644 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6645 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6646 spin_lock(&new_oi->ip_lock); 6647 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6648 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6649 spin_unlock(&new_oi->ip_lock); 6650 6651 ocfs2_journal_dirty(handle, args->new_bh); 6652 } 6653 6654 out_commit: 6655 ocfs2_commit_trans(osb, handle); 6656 6657 out: 6658 ocfs2_free_alloc_context(meta_ac); 6659 return ret; 6660 } 6661 6662 struct ocfs2_reflink_xattr_tree_args { 6663 struct ocfs2_xattr_reflink *reflink; 6664 struct buffer_head *old_blk_bh; 6665 struct buffer_head *new_blk_bh; 6666 struct ocfs2_xattr_bucket *old_bucket; 6667 struct ocfs2_xattr_bucket *new_bucket; 6668 }; 6669 6670 /* 6671 * NOTE: 6672 * We have to handle the case that both old bucket and new bucket 6673 * will call this function to get the right ret_bh. 6674 * So The caller must give us the right bh. 6675 */ 6676 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6677 struct buffer_head *bh, 6678 struct ocfs2_xattr_header *xh, 6679 int offset, 6680 struct ocfs2_xattr_value_root **xv, 6681 struct buffer_head **ret_bh, 6682 void *para) 6683 { 6684 struct ocfs2_reflink_xattr_tree_args *args = 6685 (struct ocfs2_reflink_xattr_tree_args *)para; 6686 struct ocfs2_xattr_bucket *bucket; 6687 6688 if (bh == args->old_bucket->bu_bhs[0]) 6689 bucket = args->old_bucket; 6690 else 6691 bucket = args->new_bucket; 6692 6693 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6694 xv, ret_bh); 6695 } 6696 6697 struct ocfs2_value_tree_metas { 6698 int num_metas; 6699 int credits; 6700 int num_recs; 6701 }; 6702 6703 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6704 struct buffer_head *bh, 6705 struct ocfs2_xattr_header *xh, 6706 int offset, 6707 struct ocfs2_xattr_value_root **xv, 6708 struct buffer_head **ret_bh, 6709 void *para) 6710 { 6711 struct ocfs2_xattr_bucket *bucket = 6712 (struct ocfs2_xattr_bucket *)para; 6713 6714 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6715 xv, ret_bh); 6716 } 6717 6718 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6719 struct ocfs2_xattr_bucket *bucket, 6720 void *para) 6721 { 6722 struct ocfs2_value_tree_metas *metas = 6723 (struct ocfs2_value_tree_metas *)para; 6724 struct ocfs2_xattr_header *xh = 6725 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6726 6727 /* Add the credits for this bucket first. */ 6728 metas->credits += bucket->bu_blocks; 6729 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6730 xh, &metas->num_metas, 6731 &metas->credits, &metas->num_recs, 6732 ocfs2_value_tree_metas_in_bucket, 6733 bucket); 6734 } 6735 6736 /* 6737 * Given a xattr extent rec starting from blkno and having len clusters, 6738 * iterate all the buckets calculate how much metadata we need for reflinking 6739 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6740 */ 6741 static int ocfs2_lock_reflink_xattr_rec_allocators( 6742 struct ocfs2_reflink_xattr_tree_args *args, 6743 struct ocfs2_extent_tree *xt_et, 6744 u64 blkno, u32 len, int *credits, 6745 struct ocfs2_alloc_context **meta_ac, 6746 struct ocfs2_alloc_context **data_ac) 6747 { 6748 int ret, num_free_extents; 6749 struct ocfs2_value_tree_metas metas; 6750 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6751 struct ocfs2_refcount_block *rb; 6752 6753 memset(&metas, 0, sizeof(metas)); 6754 6755 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6756 ocfs2_calc_value_tree_metas, &metas); 6757 if (ret) { 6758 mlog_errno(ret); 6759 goto out; 6760 } 6761 6762 *credits = metas.credits; 6763 6764 /* 6765 * Calculate we need for refcount tree change. 6766 * 6767 * We need to add/modify num_recs in refcount tree, so just calculate 6768 * an approximate number we need for refcount tree change. 6769 * Sometimes we need to split the tree, and after split, half recs 6770 * will be moved to the new block, and a new block can only provide 6771 * half number of recs. So we multiple new blocks by 2. 6772 * In the end, we have to add credits for modifying the already 6773 * existed refcount block. 6774 */ 6775 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6776 metas.num_recs = 6777 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6778 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6779 metas.num_metas += metas.num_recs; 6780 *credits += metas.num_recs + 6781 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6782 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6783 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6784 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6785 else 6786 *credits += 1; 6787 6788 /* count in the xattr tree change. */ 6789 num_free_extents = ocfs2_num_free_extents(xt_et); 6790 if (num_free_extents < 0) { 6791 ret = num_free_extents; 6792 mlog_errno(ret); 6793 goto out; 6794 } 6795 6796 if (num_free_extents < len) 6797 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6798 6799 *credits += ocfs2_calc_extend_credits(osb->sb, 6800 xt_et->et_root_el); 6801 6802 if (metas.num_metas) { 6803 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6804 meta_ac); 6805 if (ret) { 6806 mlog_errno(ret); 6807 goto out; 6808 } 6809 } 6810 6811 if (len) { 6812 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6813 if (ret) 6814 mlog_errno(ret); 6815 } 6816 out: 6817 if (ret) { 6818 if (*meta_ac) { 6819 ocfs2_free_alloc_context(*meta_ac); 6820 *meta_ac = NULL; 6821 } 6822 } 6823 6824 return ret; 6825 } 6826 6827 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6828 u64 blkno, u64 new_blkno, u32 clusters, 6829 u32 *cpos, int num_buckets, 6830 struct ocfs2_alloc_context *meta_ac, 6831 struct ocfs2_alloc_context *data_ac, 6832 struct ocfs2_reflink_xattr_tree_args *args) 6833 { 6834 int i, j, ret = 0; 6835 struct super_block *sb = args->reflink->old_inode->i_sb; 6836 int bpb = args->old_bucket->bu_blocks; 6837 struct ocfs2_xattr_value_buf vb = { 6838 .vb_access = ocfs2_journal_access, 6839 }; 6840 6841 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6842 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6843 if (ret) { 6844 mlog_errno(ret); 6845 break; 6846 } 6847 6848 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6849 if (ret) { 6850 mlog_errno(ret); 6851 break; 6852 } 6853 6854 ret = ocfs2_xattr_bucket_journal_access(handle, 6855 args->new_bucket, 6856 OCFS2_JOURNAL_ACCESS_CREATE); 6857 if (ret) { 6858 mlog_errno(ret); 6859 break; 6860 } 6861 6862 for (j = 0; j < bpb; j++) 6863 memcpy(bucket_block(args->new_bucket, j), 6864 bucket_block(args->old_bucket, j), 6865 sb->s_blocksize); 6866 6867 /* 6868 * Record the start cpos so that we can use it to initialize 6869 * our xattr tree we also set the xh_num_bucket for the new 6870 * bucket. 6871 */ 6872 if (i == 0) { 6873 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6874 xh_entries[0].xe_name_hash); 6875 bucket_xh(args->new_bucket)->xh_num_buckets = 6876 cpu_to_le16(num_buckets); 6877 } 6878 6879 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6880 6881 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6882 args->old_bucket->bu_bhs[0], 6883 bucket_xh(args->old_bucket), 6884 args->new_bucket->bu_bhs[0], 6885 bucket_xh(args->new_bucket), 6886 &vb, meta_ac, 6887 ocfs2_get_reflink_xattr_value_root, 6888 args); 6889 if (ret) { 6890 mlog_errno(ret); 6891 break; 6892 } 6893 6894 /* 6895 * Re-access and dirty the bucket to calculate metaecc. 6896 * Because we may extend the transaction in reflink_xattr_header 6897 * which will let the already accessed block gone. 6898 */ 6899 ret = ocfs2_xattr_bucket_journal_access(handle, 6900 args->new_bucket, 6901 OCFS2_JOURNAL_ACCESS_WRITE); 6902 if (ret) { 6903 mlog_errno(ret); 6904 break; 6905 } 6906 6907 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6908 6909 ocfs2_xattr_bucket_relse(args->old_bucket); 6910 ocfs2_xattr_bucket_relse(args->new_bucket); 6911 } 6912 6913 ocfs2_xattr_bucket_relse(args->old_bucket); 6914 ocfs2_xattr_bucket_relse(args->new_bucket); 6915 return ret; 6916 } 6917 6918 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6919 struct inode *inode, 6920 struct ocfs2_reflink_xattr_tree_args *args, 6921 struct ocfs2_extent_tree *et, 6922 struct ocfs2_alloc_context *meta_ac, 6923 struct ocfs2_alloc_context *data_ac, 6924 u64 blkno, u32 cpos, u32 len) 6925 { 6926 int ret, first_inserted = 0; 6927 u32 p_cluster, num_clusters, reflink_cpos = 0; 6928 u64 new_blkno; 6929 unsigned int num_buckets, reflink_buckets; 6930 unsigned int bpc = 6931 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6932 6933 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6934 if (ret) { 6935 mlog_errno(ret); 6936 goto out; 6937 } 6938 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6939 ocfs2_xattr_bucket_relse(args->old_bucket); 6940 6941 while (len && num_buckets) { 6942 ret = ocfs2_claim_clusters(handle, data_ac, 6943 1, &p_cluster, &num_clusters); 6944 if (ret) { 6945 mlog_errno(ret); 6946 goto out; 6947 } 6948 6949 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6950 reflink_buckets = min(num_buckets, bpc * num_clusters); 6951 6952 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6953 new_blkno, num_clusters, 6954 &reflink_cpos, reflink_buckets, 6955 meta_ac, data_ac, args); 6956 if (ret) { 6957 mlog_errno(ret); 6958 goto out; 6959 } 6960 6961 /* 6962 * For the 1st allocated cluster, we make it use the same cpos 6963 * so that the xattr tree looks the same as the original one 6964 * in the most case. 6965 */ 6966 if (!first_inserted) { 6967 reflink_cpos = cpos; 6968 first_inserted = 1; 6969 } 6970 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6971 num_clusters, 0, meta_ac); 6972 if (ret) 6973 mlog_errno(ret); 6974 6975 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6976 num_clusters, reflink_cpos); 6977 6978 len -= num_clusters; 6979 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6980 num_buckets -= reflink_buckets; 6981 } 6982 out: 6983 return ret; 6984 } 6985 6986 /* 6987 * Create the same xattr extent record in the new inode's xattr tree. 6988 */ 6989 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6990 struct buffer_head *root_bh, 6991 u64 blkno, 6992 u32 cpos, 6993 u32 len, 6994 void *para) 6995 { 6996 int ret, credits = 0; 6997 handle_t *handle; 6998 struct ocfs2_reflink_xattr_tree_args *args = 6999 (struct ocfs2_reflink_xattr_tree_args *)para; 7000 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7001 struct ocfs2_alloc_context *meta_ac = NULL; 7002 struct ocfs2_alloc_context *data_ac = NULL; 7003 struct ocfs2_extent_tree et; 7004 7005 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7006 7007 ocfs2_init_xattr_tree_extent_tree(&et, 7008 INODE_CACHE(args->reflink->new_inode), 7009 args->new_blk_bh); 7010 7011 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7012 len, &credits, 7013 &meta_ac, &data_ac); 7014 if (ret) { 7015 mlog_errno(ret); 7016 goto out; 7017 } 7018 7019 handle = ocfs2_start_trans(osb, credits); 7020 if (IS_ERR(handle)) { 7021 ret = PTR_ERR(handle); 7022 mlog_errno(ret); 7023 goto out; 7024 } 7025 7026 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7027 meta_ac, data_ac, 7028 blkno, cpos, len); 7029 if (ret) 7030 mlog_errno(ret); 7031 7032 ocfs2_commit_trans(osb, handle); 7033 7034 out: 7035 if (meta_ac) 7036 ocfs2_free_alloc_context(meta_ac); 7037 if (data_ac) 7038 ocfs2_free_alloc_context(data_ac); 7039 return ret; 7040 } 7041 7042 /* 7043 * Create reflinked xattr buckets. 7044 * We will add bucket one by one, and refcount all the xattrs in the bucket 7045 * if they are stored outside. 7046 */ 7047 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7048 struct buffer_head *blk_bh, 7049 struct buffer_head *new_blk_bh) 7050 { 7051 int ret; 7052 struct ocfs2_reflink_xattr_tree_args para; 7053 7054 memset(¶, 0, sizeof(para)); 7055 para.reflink = args; 7056 para.old_blk_bh = blk_bh; 7057 para.new_blk_bh = new_blk_bh; 7058 7059 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7060 if (!para.old_bucket) { 7061 mlog_errno(-ENOMEM); 7062 return -ENOMEM; 7063 } 7064 7065 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7066 if (!para.new_bucket) { 7067 ret = -ENOMEM; 7068 mlog_errno(ret); 7069 goto out; 7070 } 7071 7072 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7073 ocfs2_reflink_xattr_rec, 7074 ¶); 7075 if (ret) 7076 mlog_errno(ret); 7077 7078 out: 7079 ocfs2_xattr_bucket_free(para.old_bucket); 7080 ocfs2_xattr_bucket_free(para.new_bucket); 7081 return ret; 7082 } 7083 7084 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7085 struct buffer_head *blk_bh) 7086 { 7087 int ret, indexed = 0; 7088 struct buffer_head *new_blk_bh = NULL; 7089 struct ocfs2_xattr_block *xb = 7090 (struct ocfs2_xattr_block *)blk_bh->b_data; 7091 7092 7093 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7094 indexed = 1; 7095 7096 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7097 &new_blk_bh, indexed); 7098 if (ret) { 7099 mlog_errno(ret); 7100 goto out; 7101 } 7102 7103 if (!indexed) 7104 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7105 else 7106 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7107 if (ret) 7108 mlog_errno(ret); 7109 7110 out: 7111 brelse(new_blk_bh); 7112 return ret; 7113 } 7114 7115 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7116 { 7117 int type = ocfs2_xattr_get_type(xe); 7118 7119 return type != OCFS2_XATTR_INDEX_SECURITY && 7120 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7121 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7122 } 7123 7124 int ocfs2_reflink_xattrs(struct inode *old_inode, 7125 struct buffer_head *old_bh, 7126 struct inode *new_inode, 7127 struct buffer_head *new_bh, 7128 bool preserve_security) 7129 { 7130 int ret; 7131 struct ocfs2_xattr_reflink args; 7132 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7133 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7134 struct buffer_head *blk_bh = NULL; 7135 struct ocfs2_cached_dealloc_ctxt dealloc; 7136 struct ocfs2_refcount_tree *ref_tree; 7137 struct buffer_head *ref_root_bh = NULL; 7138 7139 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7140 le64_to_cpu(di->i_refcount_loc), 7141 1, &ref_tree, &ref_root_bh); 7142 if (ret) { 7143 mlog_errno(ret); 7144 goto out; 7145 } 7146 7147 ocfs2_init_dealloc_ctxt(&dealloc); 7148 7149 args.old_inode = old_inode; 7150 args.new_inode = new_inode; 7151 args.old_bh = old_bh; 7152 args.new_bh = new_bh; 7153 args.ref_ci = &ref_tree->rf_ci; 7154 args.ref_root_bh = ref_root_bh; 7155 args.dealloc = &dealloc; 7156 if (preserve_security) 7157 args.xattr_reflinked = NULL; 7158 else 7159 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7160 7161 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7162 ret = ocfs2_reflink_xattr_inline(&args); 7163 if (ret) { 7164 mlog_errno(ret); 7165 goto out_unlock; 7166 } 7167 } 7168 7169 if (!di->i_xattr_loc) 7170 goto out_unlock; 7171 7172 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7173 &blk_bh); 7174 if (ret < 0) { 7175 mlog_errno(ret); 7176 goto out_unlock; 7177 } 7178 7179 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7180 if (ret) 7181 mlog_errno(ret); 7182 7183 brelse(blk_bh); 7184 7185 out_unlock: 7186 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7187 ref_tree, 1); 7188 brelse(ref_root_bh); 7189 7190 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7191 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7192 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7193 } 7194 7195 out: 7196 return ret; 7197 } 7198 7199 /* 7200 * Initialize security and acl for a already created inode. 7201 * Used for reflink a non-preserve-security file. 7202 * 7203 * It uses common api like ocfs2_xattr_set, so the caller 7204 * must not hold any lock expect i_rwsem. 7205 */ 7206 int ocfs2_init_security_and_acl(struct inode *dir, 7207 struct inode *inode, 7208 const struct qstr *qstr) 7209 { 7210 int ret = 0; 7211 struct buffer_head *dir_bh = NULL; 7212 7213 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7214 if (ret) { 7215 mlog_errno(ret); 7216 goto leave; 7217 } 7218 7219 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7220 if (ret) { 7221 mlog_errno(ret); 7222 goto leave; 7223 } 7224 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7225 if (ret) 7226 mlog_errno(ret); 7227 7228 ocfs2_inode_unlock(dir, 0); 7229 brelse(dir_bh); 7230 leave: 7231 return ret; 7232 } 7233 7234 /* 7235 * 'security' attributes support 7236 */ 7237 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7238 struct dentry *unused, struct inode *inode, 7239 const char *name, void *buffer, size_t size) 7240 { 7241 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7242 name, buffer, size); 7243 } 7244 7245 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7246 struct mnt_idmap *idmap, 7247 struct dentry *unused, struct inode *inode, 7248 const char *name, const void *value, 7249 size_t size, int flags) 7250 { 7251 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7252 name, value, size, flags); 7253 } 7254 7255 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7256 void *fs_info) 7257 { 7258 struct ocfs2_security_xattr_info *si = fs_info; 7259 const struct xattr *xattr; 7260 int err = 0; 7261 7262 if (si) { 7263 si->value = kmemdup(xattr_array->value, xattr_array->value_len, 7264 GFP_KERNEL); 7265 if (!si->value) 7266 return -ENOMEM; 7267 7268 si->name = xattr_array->name; 7269 si->value_len = xattr_array->value_len; 7270 return 0; 7271 } 7272 7273 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7274 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7275 xattr->name, xattr->value, 7276 xattr->value_len, XATTR_CREATE); 7277 if (err) 7278 break; 7279 } 7280 return err; 7281 } 7282 7283 int ocfs2_init_security_get(struct inode *inode, 7284 struct inode *dir, 7285 const struct qstr *qstr, 7286 struct ocfs2_security_xattr_info *si) 7287 { 7288 int ret; 7289 7290 /* check whether ocfs2 support feature xattr */ 7291 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7292 return -EOPNOTSUPP; 7293 if (si) { 7294 ret = security_inode_init_security(inode, dir, qstr, 7295 &ocfs2_initxattrs, si); 7296 /* 7297 * security_inode_init_security() does not return -EOPNOTSUPP, 7298 * we have to check the xattr ourselves. 7299 */ 7300 if (!ret && !si->name) 7301 si->enable = 0; 7302 7303 return ret; 7304 } 7305 7306 return security_inode_init_security(inode, dir, qstr, 7307 &ocfs2_initxattrs, NULL); 7308 } 7309 7310 int ocfs2_init_security_set(handle_t *handle, 7311 struct inode *inode, 7312 struct buffer_head *di_bh, 7313 struct ocfs2_security_xattr_info *si, 7314 struct ocfs2_alloc_context *xattr_ac, 7315 struct ocfs2_alloc_context *data_ac) 7316 { 7317 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7318 OCFS2_XATTR_INDEX_SECURITY, 7319 si->name, si->value, si->value_len, 0, 7320 xattr_ac, data_ac); 7321 } 7322 7323 const struct xattr_handler ocfs2_xattr_security_handler = { 7324 .prefix = XATTR_SECURITY_PREFIX, 7325 .get = ocfs2_xattr_security_get, 7326 .set = ocfs2_xattr_security_set, 7327 }; 7328 7329 /* 7330 * 'trusted' attributes support 7331 */ 7332 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7333 struct dentry *unused, struct inode *inode, 7334 const char *name, void *buffer, size_t size) 7335 { 7336 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7337 name, buffer, size); 7338 } 7339 7340 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7341 struct mnt_idmap *idmap, 7342 struct dentry *unused, struct inode *inode, 7343 const char *name, const void *value, 7344 size_t size, int flags) 7345 { 7346 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7347 name, value, size, flags); 7348 } 7349 7350 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7351 .prefix = XATTR_TRUSTED_PREFIX, 7352 .get = ocfs2_xattr_trusted_get, 7353 .set = ocfs2_xattr_trusted_set, 7354 }; 7355 7356 /* 7357 * 'user' attributes support 7358 */ 7359 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7360 struct dentry *unused, struct inode *inode, 7361 const char *name, void *buffer, size_t size) 7362 { 7363 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7364 7365 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7366 return -EOPNOTSUPP; 7367 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7368 buffer, size); 7369 } 7370 7371 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7372 struct mnt_idmap *idmap, 7373 struct dentry *unused, struct inode *inode, 7374 const char *name, const void *value, 7375 size_t size, int flags) 7376 { 7377 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7378 7379 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7380 return -EOPNOTSUPP; 7381 7382 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7383 name, value, size, flags); 7384 } 7385 7386 const struct xattr_handler ocfs2_xattr_user_handler = { 7387 .prefix = XATTR_USER_PREFIX, 7388 .get = ocfs2_xattr_user_get, 7389 .set = ocfs2_xattr_user_set, 7390 }; 7391