1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xattr.c 4 * 5 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 6 * 7 * CREDITS: 8 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 10 */ 11 12 #include <linux/capability.h> 13 #include <linux/fs.h> 14 #include <linux/types.h> 15 #include <linux/slab.h> 16 #include <linux/highmem.h> 17 #include <linux/pagemap.h> 18 #include <linux/uio.h> 19 #include <linux/sched.h> 20 #include <linux/splice.h> 21 #include <linux/mount.h> 22 #include <linux/writeback.h> 23 #include <linux/falloc.h> 24 #include <linux/sort.h> 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/string.h> 28 #include <linux/security.h> 29 30 #include <cluster/masklog.h> 31 32 #include "ocfs2.h" 33 #include "alloc.h" 34 #include "blockcheck.h" 35 #include "dlmglue.h" 36 #include "file.h" 37 #include "symlink.h" 38 #include "sysfile.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "ocfs2_fs.h" 42 #include "suballoc.h" 43 #include "uptodate.h" 44 #include "buffer_head_io.h" 45 #include "super.h" 46 #include "xattr.h" 47 #include "refcounttree.h" 48 #include "acl.h" 49 #include "ocfs2_trace.h" 50 51 struct ocfs2_xattr_def_value_root { 52 /* Must be last as it ends in a flexible-array member. */ 53 TRAILING_OVERLAP(struct ocfs2_xattr_value_root, xv, xr_list.l_recs, 54 struct ocfs2_extent_rec er; 55 ); 56 }; 57 static_assert(offsetof(struct ocfs2_xattr_def_value_root, xv.xr_list.l_recs) == 58 offsetof(struct ocfs2_xattr_def_value_root, er)); 59 60 struct ocfs2_xattr_bucket { 61 /* The inode these xattrs are associated with */ 62 struct inode *bu_inode; 63 64 /* The actual buffers that make up the bucket */ 65 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 66 67 /* How many blocks make up one bucket for this filesystem */ 68 int bu_blocks; 69 }; 70 71 struct ocfs2_xattr_set_ctxt { 72 handle_t *handle; 73 struct ocfs2_alloc_context *meta_ac; 74 struct ocfs2_alloc_context *data_ac; 75 struct ocfs2_cached_dealloc_ctxt dealloc; 76 int set_abort; 77 }; 78 79 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 80 #define OCFS2_XATTR_INLINE_SIZE 80 81 #define OCFS2_XATTR_HEADER_GAP 4 82 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 83 - sizeof(struct ocfs2_xattr_header) \ 84 - OCFS2_XATTR_HEADER_GAP) 85 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 86 - sizeof(struct ocfs2_xattr_block) \ 87 - sizeof(struct ocfs2_xattr_header) \ 88 - OCFS2_XATTR_HEADER_GAP) 89 90 static struct ocfs2_xattr_def_value_root def_xv = { 91 .xv.xr_list.l_count = cpu_to_le16(1), 92 }; 93 94 const struct xattr_handler * const ocfs2_xattr_handlers[] = { 95 &ocfs2_xattr_user_handler, 96 &ocfs2_xattr_trusted_handler, 97 &ocfs2_xattr_security_handler, 98 NULL 99 }; 100 101 static const struct xattr_handler * const ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 102 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 103 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, 104 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, 105 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 106 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 107 }; 108 109 struct ocfs2_xattr_info { 110 int xi_name_index; 111 const char *xi_name; 112 int xi_name_len; 113 const void *xi_value; 114 size_t xi_value_len; 115 }; 116 117 struct ocfs2_xattr_search { 118 struct buffer_head *inode_bh; 119 /* 120 * xattr_bh point to the block buffer head which has extended attribute 121 * when extended attribute in inode, xattr_bh is equal to inode_bh. 122 */ 123 struct buffer_head *xattr_bh; 124 struct ocfs2_xattr_header *header; 125 struct ocfs2_xattr_bucket *bucket; 126 void *base; 127 void *end; 128 struct ocfs2_xattr_entry *here; 129 int not_found; 130 }; 131 132 /* Operations on struct ocfs2_xa_entry */ 133 struct ocfs2_xa_loc; 134 struct ocfs2_xa_loc_operations { 135 /* 136 * Journal functions 137 */ 138 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 139 int type); 140 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 141 142 /* 143 * Return a pointer to the appropriate buffer in loc->xl_storage 144 * at the given offset from loc->xl_header. 145 */ 146 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 147 148 /* Can we reuse the existing entry for the new value? */ 149 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 150 struct ocfs2_xattr_info *xi); 151 152 /* How much space is needed for the new value? */ 153 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 154 struct ocfs2_xattr_info *xi); 155 156 /* 157 * Return the offset of the first name+value pair. This is 158 * the start of our downward-filling free space. 159 */ 160 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 161 162 /* 163 * Remove the name+value at this location. Do whatever is 164 * appropriate with the remaining name+value pairs. 165 */ 166 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 167 168 /* Fill xl_entry with a new entry */ 169 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 170 171 /* Add name+value storage to an entry */ 172 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 173 174 /* 175 * Initialize the value buf's access and bh fields for this entry. 176 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 177 */ 178 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 179 struct ocfs2_xattr_value_buf *vb); 180 }; 181 182 /* 183 * Describes an xattr entry location. This is a memory structure 184 * tracking the on-disk structure. 185 */ 186 struct ocfs2_xa_loc { 187 /* This xattr belongs to this inode */ 188 struct inode *xl_inode; 189 190 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 191 struct ocfs2_xattr_header *xl_header; 192 193 /* Bytes from xl_header to the end of the storage */ 194 int xl_size; 195 196 /* 197 * The ocfs2_xattr_entry this location describes. If this is 198 * NULL, this location describes the on-disk structure where it 199 * would have been. 200 */ 201 struct ocfs2_xattr_entry *xl_entry; 202 203 /* 204 * Internal housekeeping 205 */ 206 207 /* Buffer(s) containing this entry */ 208 void *xl_storage; 209 210 /* Operations on the storage backing this location */ 211 const struct ocfs2_xa_loc_operations *xl_ops; 212 }; 213 214 /* 215 * Convenience functions to calculate how much space is needed for a 216 * given name+value pair 217 */ 218 static int namevalue_size(int name_len, uint64_t value_len) 219 { 220 if (value_len > OCFS2_XATTR_INLINE_SIZE) 221 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 222 else 223 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 224 } 225 226 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 227 { 228 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 229 } 230 231 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 232 { 233 u64 value_len = le64_to_cpu(xe->xe_value_size); 234 235 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 236 ocfs2_xattr_is_local(xe)); 237 return namevalue_size(xe->xe_name_len, value_len); 238 } 239 240 241 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 242 struct ocfs2_xattr_header *xh, 243 int index, 244 int *block_off, 245 int *new_offset); 246 247 static int ocfs2_xattr_block_find(struct inode *inode, 248 int name_index, 249 const char *name, 250 struct ocfs2_xattr_search *xs); 251 static int ocfs2_xattr_index_block_find(struct inode *inode, 252 struct buffer_head *root_bh, 253 int name_index, 254 const char *name, 255 struct ocfs2_xattr_search *xs); 256 257 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 258 struct buffer_head *blk_bh, 259 char *buffer, 260 size_t buffer_size); 261 262 static int ocfs2_xattr_create_index_block(struct inode *inode, 263 struct ocfs2_xattr_search *xs, 264 struct ocfs2_xattr_set_ctxt *ctxt); 265 266 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 267 struct ocfs2_xattr_info *xi, 268 struct ocfs2_xattr_search *xs, 269 struct ocfs2_xattr_set_ctxt *ctxt); 270 271 typedef int (xattr_tree_rec_func)(struct inode *inode, 272 struct buffer_head *root_bh, 273 u64 blkno, u32 cpos, u32 len, void *para); 274 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 275 struct buffer_head *root_bh, 276 xattr_tree_rec_func *rec_func, 277 void *para); 278 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 279 struct ocfs2_xattr_bucket *bucket, 280 void *para); 281 static int ocfs2_rm_xattr_cluster(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, 284 u32 cpos, 285 u32 len, 286 void *para); 287 288 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 289 u64 src_blk, u64 last_blk, u64 to_blk, 290 unsigned int start_bucket, 291 u32 *first_hash); 292 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 293 struct ocfs2_dinode *di, 294 struct ocfs2_xattr_info *xi, 295 struct ocfs2_xattr_search *xis, 296 struct ocfs2_xattr_search *xbs, 297 struct ocfs2_refcount_tree **ref_tree, 298 int *meta_need, 299 int *credits); 300 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 301 struct ocfs2_xattr_bucket *bucket, 302 int offset, 303 struct ocfs2_xattr_value_root **xv, 304 struct buffer_head **bh); 305 306 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 307 { 308 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 309 } 310 311 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 312 { 313 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 314 } 315 316 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 317 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 318 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 319 320 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 321 { 322 struct ocfs2_xattr_bucket *bucket; 323 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 324 325 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 326 327 bucket = kzalloc_obj(struct ocfs2_xattr_bucket, GFP_NOFS); 328 if (bucket) { 329 bucket->bu_inode = inode; 330 bucket->bu_blocks = blks; 331 } 332 333 return bucket; 334 } 335 336 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 337 { 338 int i; 339 340 for (i = 0; i < bucket->bu_blocks; i++) { 341 brelse(bucket->bu_bhs[i]); 342 bucket->bu_bhs[i] = NULL; 343 } 344 } 345 346 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 347 { 348 if (bucket) { 349 ocfs2_xattr_bucket_relse(bucket); 350 bucket->bu_inode = NULL; 351 kfree(bucket); 352 } 353 } 354 355 /* 356 * A bucket that has never been written to disk doesn't need to be 357 * read. We just need the buffer_heads. Don't call this for 358 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 359 * them fully. 360 */ 361 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 362 u64 xb_blkno, int new) 363 { 364 int i, rc = 0; 365 366 for (i = 0; i < bucket->bu_blocks; i++) { 367 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 368 xb_blkno + i); 369 if (!bucket->bu_bhs[i]) { 370 rc = -ENOMEM; 371 mlog_errno(rc); 372 break; 373 } 374 375 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 376 bucket->bu_bhs[i])) { 377 if (new) 378 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 379 bucket->bu_bhs[i]); 380 else { 381 set_buffer_uptodate(bucket->bu_bhs[i]); 382 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 383 bucket->bu_bhs[i]); 384 } 385 } 386 } 387 388 if (rc) 389 ocfs2_xattr_bucket_relse(bucket); 390 return rc; 391 } 392 393 /* Read the xattr bucket at xb_blkno */ 394 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 395 u64 xb_blkno) 396 { 397 int rc; 398 399 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 400 bucket->bu_blocks, bucket->bu_bhs, 0, 401 NULL); 402 if (!rc) { 403 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 404 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 405 bucket->bu_bhs, 406 bucket->bu_blocks, 407 &bucket_xh(bucket)->xh_check); 408 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 409 if (rc) 410 mlog_errno(rc); 411 } 412 413 if (rc) 414 ocfs2_xattr_bucket_relse(bucket); 415 return rc; 416 } 417 418 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 419 struct ocfs2_xattr_bucket *bucket, 420 int type) 421 { 422 int i, rc = 0; 423 424 for (i = 0; i < bucket->bu_blocks; i++) { 425 rc = ocfs2_journal_access(handle, 426 INODE_CACHE(bucket->bu_inode), 427 bucket->bu_bhs[i], type); 428 if (rc) { 429 mlog_errno(rc); 430 break; 431 } 432 } 433 434 return rc; 435 } 436 437 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 438 struct ocfs2_xattr_bucket *bucket) 439 { 440 int i; 441 442 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 443 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 444 bucket->bu_bhs, bucket->bu_blocks, 445 &bucket_xh(bucket)->xh_check); 446 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 447 448 for (i = 0; i < bucket->bu_blocks; i++) 449 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 450 } 451 452 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 453 struct ocfs2_xattr_bucket *src) 454 { 455 int i; 456 int blocksize = src->bu_inode->i_sb->s_blocksize; 457 458 BUG_ON(dest->bu_blocks != src->bu_blocks); 459 BUG_ON(dest->bu_inode != src->bu_inode); 460 461 for (i = 0; i < src->bu_blocks; i++) { 462 memcpy(bucket_block(dest, i), bucket_block(src, i), 463 blocksize); 464 } 465 } 466 467 static int ocfs2_validate_xattr_block(struct super_block *sb, 468 struct buffer_head *bh) 469 { 470 int rc; 471 struct ocfs2_xattr_block *xb = 472 (struct ocfs2_xattr_block *)bh->b_data; 473 474 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 475 476 BUG_ON(!buffer_uptodate(bh)); 477 478 /* 479 * If the ecc fails, we return the error but otherwise 480 * leave the filesystem running. We know any error is 481 * local to this block. 482 */ 483 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 484 if (rc) 485 return rc; 486 487 /* 488 * Errors after here are fatal 489 */ 490 491 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 492 return ocfs2_error(sb, 493 "Extended attribute block #%llu has bad signature %.*s\n", 494 (unsigned long long)bh->b_blocknr, 7, 495 xb->xb_signature); 496 } 497 498 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 499 return ocfs2_error(sb, 500 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 501 (unsigned long long)bh->b_blocknr, 502 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 503 } 504 505 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 506 return ocfs2_error(sb, 507 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 508 (unsigned long long)bh->b_blocknr, 509 le32_to_cpu(xb->xb_fs_generation)); 510 } 511 512 return 0; 513 } 514 515 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 516 struct buffer_head **bh) 517 { 518 int rc; 519 struct buffer_head *tmp = *bh; 520 521 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 522 ocfs2_validate_xattr_block); 523 524 /* If ocfs2_read_block() got us a new bh, pass it up. */ 525 if (!rc && !*bh) 526 *bh = tmp; 527 528 return rc; 529 } 530 531 static inline const char *ocfs2_xattr_prefix(int name_index) 532 { 533 const struct xattr_handler *handler = NULL; 534 535 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 536 handler = ocfs2_xattr_handler_map[name_index]; 537 return handler ? xattr_prefix(handler) : NULL; 538 } 539 540 static u32 ocfs2_xattr_name_hash(struct inode *inode, 541 const char *name, 542 int name_len) 543 { 544 /* Get hash value of uuid from super block */ 545 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 546 int i; 547 548 /* hash extended attribute name */ 549 for (i = 0; i < name_len; i++) { 550 hash = (hash << OCFS2_HASH_SHIFT) ^ 551 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 552 *name++; 553 } 554 555 return hash; 556 } 557 558 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 559 { 560 return namevalue_size(name_len, value_len) + 561 sizeof(struct ocfs2_xattr_entry); 562 } 563 564 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 565 { 566 return namevalue_size_xi(xi) + 567 sizeof(struct ocfs2_xattr_entry); 568 } 569 570 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 571 { 572 return namevalue_size_xe(xe) + 573 sizeof(struct ocfs2_xattr_entry); 574 } 575 576 int ocfs2_calc_security_init(struct inode *dir, 577 struct ocfs2_security_xattr_info *si, 578 int *want_clusters, 579 int *xattr_credits, 580 struct ocfs2_alloc_context **xattr_ac) 581 { 582 int ret = 0; 583 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 584 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 585 si->value_len); 586 587 /* 588 * The max space of security xattr taken inline is 589 * 256(name) + 80(value) + 16(entry) = 352 bytes, 590 * So reserve one metadata block for it is ok. 591 */ 592 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 593 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 594 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 595 if (ret) { 596 mlog_errno(ret); 597 return ret; 598 } 599 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 600 } 601 602 /* reserve clusters for xattr value which will be set in B tree*/ 603 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 604 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 605 si->value_len); 606 607 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 608 new_clusters); 609 *want_clusters += new_clusters; 610 } 611 return ret; 612 } 613 614 int ocfs2_calc_xattr_init(struct inode *dir, 615 struct buffer_head *dir_bh, 616 umode_t mode, 617 struct ocfs2_security_xattr_info *si, 618 int *want_clusters, 619 int *xattr_credits, 620 int *want_meta) 621 { 622 int ret = 0; 623 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 624 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 625 626 if (si->enable) 627 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 628 si->value_len); 629 630 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 631 down_read(&OCFS2_I(dir)->ip_xattr_sem); 632 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 633 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 634 "", NULL, 0); 635 up_read(&OCFS2_I(dir)->ip_xattr_sem); 636 if (acl_len > 0) { 637 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 638 if (S_ISDIR(mode)) 639 a_size <<= 1; 640 } else if (acl_len != 0 && acl_len != -ENODATA) { 641 ret = acl_len; 642 mlog_errno(ret); 643 return ret; 644 } 645 } 646 647 if (!(s_size + a_size)) 648 return ret; 649 650 /* 651 * The max space of security xattr taken inline is 652 * 256(name) + 80(value) + 16(entry) = 352 bytes, 653 * The max space of acl xattr taken inline is 654 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 655 * when blocksize = 512, may reserve one more cluster for 656 * xattr bucket, otherwise reserve one metadata block 657 * for them is ok. 658 * If this is a new directory with inline data, 659 * we choose to reserve the entire inline area for 660 * directory contents and force an external xattr block. 661 */ 662 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 663 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 664 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 665 *want_meta = *want_meta + 1; 666 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 667 } 668 669 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 670 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 671 *want_clusters += 1; 672 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 673 } 674 675 /* 676 * reserve credits and clusters for xattrs which has large value 677 * and have to be set outside 678 */ 679 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 680 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 681 si->value_len); 682 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 683 new_clusters); 684 *want_clusters += new_clusters; 685 } 686 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 687 acl_len > OCFS2_XATTR_INLINE_SIZE) { 688 /* for directory, it has DEFAULT and ACCESS two types of acls */ 689 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 690 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 691 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 692 new_clusters); 693 *want_clusters += new_clusters; 694 } 695 696 return ret; 697 } 698 699 static int ocfs2_xattr_extend_allocation(struct inode *inode, 700 u32 clusters_to_add, 701 struct ocfs2_xattr_value_buf *vb, 702 struct ocfs2_xattr_set_ctxt *ctxt) 703 { 704 int status = 0, credits; 705 handle_t *handle = ctxt->handle; 706 enum ocfs2_alloc_restarted why; 707 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 708 struct ocfs2_extent_tree et; 709 710 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 711 712 while (clusters_to_add) { 713 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 714 715 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 716 OCFS2_JOURNAL_ACCESS_WRITE); 717 if (status < 0) { 718 mlog_errno(status); 719 break; 720 } 721 722 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 723 status = ocfs2_add_clusters_in_btree(handle, 724 &et, 725 &logical_start, 726 clusters_to_add, 727 0, 728 ctxt->data_ac, 729 ctxt->meta_ac, 730 &why); 731 if ((status < 0) && (status != -EAGAIN)) { 732 if (status != -ENOSPC) 733 mlog_errno(status); 734 break; 735 } 736 737 ocfs2_journal_dirty(handle, vb->vb_bh); 738 739 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 740 prev_clusters; 741 742 if (why != RESTART_NONE && clusters_to_add) { 743 /* 744 * We can only fail in case the alloc file doesn't give 745 * up enough clusters. 746 */ 747 BUG_ON(why == RESTART_META); 748 749 credits = ocfs2_calc_extend_credits(inode->i_sb, 750 &vb->vb_xv->xr_list); 751 status = ocfs2_extend_trans(handle, credits); 752 if (status < 0) { 753 status = -ENOMEM; 754 mlog_errno(status); 755 break; 756 } 757 } 758 } 759 760 return status; 761 } 762 763 static int __ocfs2_remove_xattr_range(struct inode *inode, 764 struct ocfs2_xattr_value_buf *vb, 765 u32 cpos, u32 phys_cpos, u32 len, 766 unsigned int ext_flags, 767 struct ocfs2_xattr_set_ctxt *ctxt) 768 { 769 int ret; 770 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 771 handle_t *handle = ctxt->handle; 772 struct ocfs2_extent_tree et; 773 774 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 775 776 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 777 OCFS2_JOURNAL_ACCESS_WRITE); 778 if (ret) { 779 mlog_errno(ret); 780 goto out; 781 } 782 783 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 784 &ctxt->dealloc); 785 if (ret) { 786 mlog_errno(ret); 787 goto out; 788 } 789 790 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 791 ocfs2_journal_dirty(handle, vb->vb_bh); 792 793 if (ext_flags & OCFS2_EXT_REFCOUNTED) 794 ret = ocfs2_decrease_refcount(inode, handle, 795 ocfs2_blocks_to_clusters(inode->i_sb, 796 phys_blkno), 797 len, ctxt->meta_ac, &ctxt->dealloc, 1); 798 else 799 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 800 phys_blkno, len); 801 if (ret) 802 mlog_errno(ret); 803 804 out: 805 return ret; 806 } 807 808 static int ocfs2_xattr_shrink_size(struct inode *inode, 809 u32 old_clusters, 810 u32 new_clusters, 811 struct ocfs2_xattr_value_buf *vb, 812 struct ocfs2_xattr_set_ctxt *ctxt) 813 { 814 int ret = 0; 815 unsigned int ext_flags; 816 u32 trunc_len, cpos, phys_cpos, alloc_size; 817 u64 block; 818 819 if (old_clusters <= new_clusters) 820 return 0; 821 822 cpos = new_clusters; 823 trunc_len = old_clusters - new_clusters; 824 while (trunc_len) { 825 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 826 &alloc_size, 827 &vb->vb_xv->xr_list, &ext_flags); 828 if (ret) { 829 mlog_errno(ret); 830 goto out; 831 } 832 833 if (alloc_size > trunc_len) 834 alloc_size = trunc_len; 835 836 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 837 phys_cpos, alloc_size, 838 ext_flags, ctxt); 839 if (ret) { 840 mlog_errno(ret); 841 goto out; 842 } 843 844 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 845 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 846 block, alloc_size); 847 cpos += alloc_size; 848 trunc_len -= alloc_size; 849 } 850 851 out: 852 return ret; 853 } 854 855 static int ocfs2_xattr_value_truncate(struct inode *inode, 856 struct ocfs2_xattr_value_buf *vb, 857 int len, 858 struct ocfs2_xattr_set_ctxt *ctxt) 859 { 860 int ret; 861 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 862 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 863 864 if (new_clusters == old_clusters) 865 return 0; 866 867 if (new_clusters > old_clusters) 868 ret = ocfs2_xattr_extend_allocation(inode, 869 new_clusters - old_clusters, 870 vb, ctxt); 871 else 872 ret = ocfs2_xattr_shrink_size(inode, 873 old_clusters, new_clusters, 874 vb, ctxt); 875 876 return ret; 877 } 878 879 static int ocfs2_xattr_list_entry(struct super_block *sb, 880 char *buffer, size_t size, 881 size_t *result, int type, 882 const char *name, int name_len) 883 { 884 char *p = buffer + *result; 885 const char *prefix; 886 int prefix_len; 887 int total_len; 888 889 switch(type) { 890 case OCFS2_XATTR_INDEX_USER: 891 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 892 return 0; 893 break; 894 895 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 896 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 897 if (!(sb->s_flags & SB_POSIXACL)) 898 return 0; 899 break; 900 901 case OCFS2_XATTR_INDEX_TRUSTED: 902 if (!capable(CAP_SYS_ADMIN)) 903 return 0; 904 break; 905 } 906 907 prefix = ocfs2_xattr_prefix(type); 908 if (!prefix) 909 return 0; 910 prefix_len = strlen(prefix); 911 total_len = prefix_len + name_len + 1; 912 *result += total_len; 913 914 /* No buffer means we are only looking for the required size. */ 915 if (!buffer) 916 return 0; 917 918 if (*result > size) 919 return -ERANGE; 920 921 memcpy(p, prefix, prefix_len); 922 memcpy(p + prefix_len, name, name_len); 923 p[prefix_len + name_len] = '\0'; 924 925 return 0; 926 } 927 928 static int ocfs2_xattr_list_entries(struct inode *inode, 929 struct ocfs2_xattr_header *header, 930 char *buffer, size_t buffer_size) 931 { 932 size_t result = 0; 933 int i, type, ret; 934 const char *name; 935 936 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 937 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 938 type = ocfs2_xattr_get_type(entry); 939 name = (const char *)header + 940 le16_to_cpu(entry->xe_name_offset); 941 942 ret = ocfs2_xattr_list_entry(inode->i_sb, 943 buffer, buffer_size, 944 &result, type, name, 945 entry->xe_name_len); 946 if (ret) 947 return ret; 948 } 949 950 return result; 951 } 952 953 static int ocfs2_xattr_ibody_lookup_header(struct inode *inode, 954 struct ocfs2_dinode *di, 955 struct ocfs2_xattr_header **header) 956 { 957 u16 xattr_count; 958 size_t max_entries; 959 u16 inline_size = le16_to_cpu(di->i_xattr_inline_size); 960 961 if (inline_size > inode->i_sb->s_blocksize || 962 inline_size < sizeof(struct ocfs2_xattr_header)) { 963 ocfs2_error(inode->i_sb, 964 "Invalid xattr inline size %u in inode %llu\n", 965 inline_size, 966 (unsigned long long)OCFS2_I(inode)->ip_blkno); 967 return -EFSCORRUPTED; 968 } 969 970 *header = (struct ocfs2_xattr_header *) 971 ((void *)di + inode->i_sb->s_blocksize - inline_size); 972 973 xattr_count = le16_to_cpu((*header)->xh_count); 974 max_entries = (inline_size - sizeof(struct ocfs2_xattr_header)) / 975 sizeof(struct ocfs2_xattr_entry); 976 977 if (xattr_count > max_entries) { 978 ocfs2_error(inode->i_sb, 979 "xattr entry count %u exceeds maximum %zu in inode %llu\n", 980 xattr_count, max_entries, 981 (unsigned long long)OCFS2_I(inode)->ip_blkno); 982 return -EFSCORRUPTED; 983 } 984 985 return 0; 986 } 987 988 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 989 struct ocfs2_dinode *di) 990 { 991 struct ocfs2_xattr_header *xh; 992 int ret; 993 int i; 994 995 ret = ocfs2_xattr_ibody_lookup_header(inode, di, &xh); 996 if (ret) 997 return 1; 998 999 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 1000 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 1001 return 1; 1002 1003 return 0; 1004 } 1005 1006 static int ocfs2_xattr_ibody_list(struct inode *inode, 1007 struct ocfs2_dinode *di, 1008 char *buffer, 1009 size_t buffer_size) 1010 { 1011 struct ocfs2_xattr_header *header = NULL; 1012 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1013 int ret = 0; 1014 1015 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1016 return ret; 1017 1018 ret = ocfs2_xattr_ibody_lookup_header(inode, di, &header); 1019 if (ret) 1020 return ret; 1021 1022 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 1023 1024 return ret; 1025 } 1026 1027 static int ocfs2_xattr_block_list(struct inode *inode, 1028 struct ocfs2_dinode *di, 1029 char *buffer, 1030 size_t buffer_size) 1031 { 1032 struct buffer_head *blk_bh = NULL; 1033 struct ocfs2_xattr_block *xb; 1034 int ret = 0; 1035 1036 if (!di->i_xattr_loc) 1037 return ret; 1038 1039 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1040 &blk_bh); 1041 if (ret < 0) { 1042 mlog_errno(ret); 1043 return ret; 1044 } 1045 1046 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1047 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1048 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1049 ret = ocfs2_xattr_list_entries(inode, header, 1050 buffer, buffer_size); 1051 } else 1052 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1053 buffer, buffer_size); 1054 1055 brelse(blk_bh); 1056 1057 return ret; 1058 } 1059 1060 ssize_t ocfs2_listxattr(struct dentry *dentry, 1061 char *buffer, 1062 size_t size) 1063 { 1064 int ret = 0, i_ret = 0, b_ret = 0; 1065 struct buffer_head *di_bh = NULL; 1066 struct ocfs2_dinode *di = NULL; 1067 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1068 1069 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1070 return -EOPNOTSUPP; 1071 1072 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1073 return ret; 1074 1075 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1076 if (ret < 0) { 1077 mlog_errno(ret); 1078 return ret; 1079 } 1080 1081 di = (struct ocfs2_dinode *)di_bh->b_data; 1082 1083 down_read(&oi->ip_xattr_sem); 1084 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1085 if (i_ret < 0) 1086 b_ret = 0; 1087 else { 1088 if (buffer) { 1089 buffer += i_ret; 1090 size -= i_ret; 1091 } 1092 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1093 buffer, size); 1094 if (b_ret < 0) 1095 i_ret = 0; 1096 } 1097 up_read(&oi->ip_xattr_sem); 1098 ocfs2_inode_unlock(d_inode(dentry), 0); 1099 1100 brelse(di_bh); 1101 1102 return i_ret + b_ret; 1103 } 1104 1105 static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, 1106 const char *name, 1107 struct ocfs2_xattr_search *xs) 1108 { 1109 struct ocfs2_xattr_entry *entry; 1110 size_t name_len; 1111 int i, name_offset, cmp = 1; 1112 1113 if (name == NULL) 1114 return -EINVAL; 1115 1116 name_len = strlen(name); 1117 entry = xs->here; 1118 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1119 if ((void *)entry >= xs->end) { 1120 ocfs2_error(inode->i_sb, "corrupted xattr entries"); 1121 return -EFSCORRUPTED; 1122 } 1123 cmp = name_index - ocfs2_xattr_get_type(entry); 1124 if (!cmp) 1125 cmp = name_len - entry->xe_name_len; 1126 if (!cmp) { 1127 name_offset = le16_to_cpu(entry->xe_name_offset); 1128 if ((xs->base + name_offset + name_len) > xs->end) { 1129 ocfs2_error(inode->i_sb, 1130 "corrupted xattr entries"); 1131 return -EFSCORRUPTED; 1132 } 1133 cmp = memcmp(name, (xs->base + name_offset), name_len); 1134 } 1135 if (cmp == 0) 1136 break; 1137 entry += 1; 1138 } 1139 xs->here = entry; 1140 1141 return cmp ? -ENODATA : 0; 1142 } 1143 1144 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1145 struct ocfs2_xattr_value_root *xv, 1146 void *buffer, 1147 size_t len) 1148 { 1149 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1150 u64 blkno; 1151 int i, ret = 0; 1152 size_t cplen, blocksize; 1153 struct buffer_head *bh = NULL; 1154 struct ocfs2_extent_list *el; 1155 1156 el = &xv->xr_list; 1157 clusters = le32_to_cpu(xv->xr_clusters); 1158 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1159 blocksize = inode->i_sb->s_blocksize; 1160 1161 cpos = 0; 1162 while (cpos < clusters) { 1163 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1164 &num_clusters, el, NULL); 1165 if (ret) { 1166 mlog_errno(ret); 1167 goto out; 1168 } 1169 1170 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1171 /* Copy ocfs2_xattr_value */ 1172 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1173 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1174 &bh, NULL); 1175 if (ret) { 1176 mlog_errno(ret); 1177 goto out; 1178 } 1179 1180 cplen = len >= blocksize ? blocksize : len; 1181 memcpy(buffer, bh->b_data, cplen); 1182 len -= cplen; 1183 buffer += cplen; 1184 1185 brelse(bh); 1186 bh = NULL; 1187 if (len == 0) 1188 break; 1189 } 1190 cpos += num_clusters; 1191 } 1192 out: 1193 return ret; 1194 } 1195 1196 static int ocfs2_xattr_ibody_get(struct inode *inode, 1197 int name_index, 1198 const char *name, 1199 void *buffer, 1200 size_t buffer_size, 1201 struct ocfs2_xattr_search *xs) 1202 { 1203 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1204 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1205 struct ocfs2_xattr_value_root *xv; 1206 size_t size; 1207 int ret = 0; 1208 1209 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1210 return -ENODATA; 1211 1212 xs->end = (void *)di + inode->i_sb->s_blocksize; 1213 ret = ocfs2_xattr_ibody_lookup_header(inode, di, &xs->header); 1214 if (ret) 1215 return ret; 1216 xs->base = (void *)xs->header; 1217 xs->here = xs->header->xh_entries; 1218 1219 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 1220 if (ret) 1221 return ret; 1222 size = le64_to_cpu(xs->here->xe_value_size); 1223 if (buffer) { 1224 if (size > buffer_size) 1225 return -ERANGE; 1226 if (ocfs2_xattr_is_local(xs->here)) { 1227 memcpy(buffer, (void *)xs->base + 1228 le16_to_cpu(xs->here->xe_name_offset) + 1229 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1230 } else { 1231 xv = (struct ocfs2_xattr_value_root *) 1232 (xs->base + le16_to_cpu( 1233 xs->here->xe_name_offset) + 1234 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1235 ret = ocfs2_xattr_get_value_outside(inode, xv, 1236 buffer, size); 1237 if (ret < 0) { 1238 mlog_errno(ret); 1239 return ret; 1240 } 1241 } 1242 } 1243 1244 return size; 1245 } 1246 1247 static int ocfs2_xattr_block_get(struct inode *inode, 1248 int name_index, 1249 const char *name, 1250 void *buffer, 1251 size_t buffer_size, 1252 struct ocfs2_xattr_search *xs) 1253 { 1254 struct ocfs2_xattr_block *xb; 1255 struct ocfs2_xattr_value_root *xv; 1256 size_t size; 1257 int ret = -ENODATA, name_offset, name_len, i; 1258 int block_off; 1259 1260 xs->bucket = ocfs2_xattr_bucket_new(inode); 1261 if (!xs->bucket) { 1262 ret = -ENOMEM; 1263 mlog_errno(ret); 1264 goto cleanup; 1265 } 1266 1267 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1268 if (ret) { 1269 mlog_errno(ret); 1270 goto cleanup; 1271 } 1272 1273 if (xs->not_found) { 1274 ret = -ENODATA; 1275 goto cleanup; 1276 } 1277 1278 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1279 size = le64_to_cpu(xs->here->xe_value_size); 1280 if (buffer) { 1281 ret = -ERANGE; 1282 if (size > buffer_size) 1283 goto cleanup; 1284 1285 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1286 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1287 i = xs->here - xs->header->xh_entries; 1288 1289 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1290 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1291 bucket_xh(xs->bucket), 1292 i, 1293 &block_off, 1294 &name_offset); 1295 if (ret) { 1296 mlog_errno(ret); 1297 goto cleanup; 1298 } 1299 xs->base = bucket_block(xs->bucket, block_off); 1300 } 1301 if (ocfs2_xattr_is_local(xs->here)) { 1302 memcpy(buffer, (void *)xs->base + 1303 name_offset + name_len, size); 1304 } else { 1305 xv = (struct ocfs2_xattr_value_root *) 1306 (xs->base + name_offset + name_len); 1307 ret = ocfs2_xattr_get_value_outside(inode, xv, 1308 buffer, size); 1309 if (ret < 0) { 1310 mlog_errno(ret); 1311 goto cleanup; 1312 } 1313 } 1314 } 1315 ret = size; 1316 cleanup: 1317 ocfs2_xattr_bucket_free(xs->bucket); 1318 1319 brelse(xs->xattr_bh); 1320 xs->xattr_bh = NULL; 1321 return ret; 1322 } 1323 1324 int ocfs2_xattr_get_nolock(struct inode *inode, 1325 struct buffer_head *di_bh, 1326 int name_index, 1327 const char *name, 1328 void *buffer, 1329 size_t buffer_size) 1330 { 1331 int ret; 1332 struct ocfs2_dinode *di = NULL; 1333 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1334 struct ocfs2_xattr_search xis = { 1335 .not_found = -ENODATA, 1336 }; 1337 struct ocfs2_xattr_search xbs = { 1338 .not_found = -ENODATA, 1339 }; 1340 1341 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1342 return -EOPNOTSUPP; 1343 1344 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1345 return -ENODATA; 1346 1347 xis.inode_bh = xbs.inode_bh = di_bh; 1348 di = (struct ocfs2_dinode *)di_bh->b_data; 1349 1350 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1351 buffer_size, &xis); 1352 if (ret == -ENODATA && di->i_xattr_loc) 1353 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1354 buffer_size, &xbs); 1355 1356 return ret; 1357 } 1358 1359 /* ocfs2_xattr_get() 1360 * 1361 * Copy an extended attribute into the buffer provided. 1362 * Buffer is NULL to compute the size of buffer required. 1363 */ 1364 static int ocfs2_xattr_get(struct inode *inode, 1365 int name_index, 1366 const char *name, 1367 void *buffer, 1368 size_t buffer_size) 1369 { 1370 int ret, had_lock; 1371 struct buffer_head *di_bh = NULL; 1372 struct ocfs2_lock_holder oh; 1373 1374 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1375 if (had_lock < 0) { 1376 mlog_errno(had_lock); 1377 return had_lock; 1378 } 1379 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1380 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1381 name, buffer, buffer_size); 1382 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1383 1384 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1385 1386 brelse(di_bh); 1387 1388 return ret; 1389 } 1390 1391 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1392 handle_t *handle, 1393 struct ocfs2_xattr_value_buf *vb, 1394 const void *value, 1395 int value_len) 1396 { 1397 int ret = 0, i, cp_len; 1398 u16 blocksize = inode->i_sb->s_blocksize; 1399 u32 p_cluster, num_clusters; 1400 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1401 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1402 u64 blkno; 1403 struct buffer_head *bh = NULL; 1404 unsigned int ext_flags; 1405 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1406 1407 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1408 1409 while (cpos < clusters) { 1410 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1411 &num_clusters, &xv->xr_list, 1412 &ext_flags); 1413 if (ret) { 1414 mlog_errno(ret); 1415 goto out; 1416 } 1417 1418 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1419 1420 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1421 1422 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1423 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1424 &bh, NULL); 1425 if (ret) { 1426 mlog_errno(ret); 1427 goto out; 1428 } 1429 1430 ret = ocfs2_journal_access(handle, 1431 INODE_CACHE(inode), 1432 bh, 1433 OCFS2_JOURNAL_ACCESS_WRITE); 1434 if (ret < 0) { 1435 mlog_errno(ret); 1436 goto out; 1437 } 1438 1439 cp_len = value_len > blocksize ? blocksize : value_len; 1440 memcpy(bh->b_data, value, cp_len); 1441 value_len -= cp_len; 1442 value += cp_len; 1443 if (cp_len < blocksize) 1444 memset(bh->b_data + cp_len, 0, 1445 blocksize - cp_len); 1446 1447 ocfs2_journal_dirty(handle, bh); 1448 brelse(bh); 1449 bh = NULL; 1450 1451 /* 1452 * XXX: do we need to empty all the following 1453 * blocks in this cluster? 1454 */ 1455 if (!value_len) 1456 break; 1457 } 1458 cpos += num_clusters; 1459 } 1460 out: 1461 brelse(bh); 1462 1463 return ret; 1464 } 1465 1466 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1467 int num_entries) 1468 { 1469 int free_space; 1470 1471 if (!needed_space) 1472 return 0; 1473 1474 free_space = free_start - 1475 sizeof(struct ocfs2_xattr_header) - 1476 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1477 OCFS2_XATTR_HEADER_GAP; 1478 if (free_space < 0) 1479 return -EIO; 1480 if (free_space < needed_space) 1481 return -ENOSPC; 1482 1483 return 0; 1484 } 1485 1486 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1487 int type) 1488 { 1489 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1490 } 1491 1492 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1493 { 1494 loc->xl_ops->xlo_journal_dirty(handle, loc); 1495 } 1496 1497 /* Give a pointer into the storage for the given offset */ 1498 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1499 { 1500 BUG_ON(offset >= loc->xl_size); 1501 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1502 } 1503 1504 /* 1505 * Wipe the name+value pair and allow the storage to reclaim it. This 1506 * must be followed by either removal of the entry or a call to 1507 * ocfs2_xa_add_namevalue(). 1508 */ 1509 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1510 { 1511 loc->xl_ops->xlo_wipe_namevalue(loc); 1512 } 1513 1514 /* 1515 * Find lowest offset to a name+value pair. This is the start of our 1516 * downward-growing free space. 1517 */ 1518 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1519 { 1520 return loc->xl_ops->xlo_get_free_start(loc); 1521 } 1522 1523 /* Can we reuse loc->xl_entry for xi? */ 1524 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1525 struct ocfs2_xattr_info *xi) 1526 { 1527 return loc->xl_ops->xlo_can_reuse(loc, xi); 1528 } 1529 1530 /* How much free space is needed to set the new value */ 1531 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1532 struct ocfs2_xattr_info *xi) 1533 { 1534 return loc->xl_ops->xlo_check_space(loc, xi); 1535 } 1536 1537 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1538 { 1539 loc->xl_ops->xlo_add_entry(loc, name_hash); 1540 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1541 /* 1542 * We can't leave the new entry's xe_name_offset at zero or 1543 * add_namevalue() will go nuts. We set it to the size of our 1544 * storage so that it can never be less than any other entry. 1545 */ 1546 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1547 } 1548 1549 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1550 struct ocfs2_xattr_info *xi) 1551 { 1552 int size = namevalue_size_xi(xi); 1553 int nameval_offset; 1554 char *nameval_buf; 1555 1556 loc->xl_ops->xlo_add_namevalue(loc, size); 1557 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1558 loc->xl_entry->xe_name_len = xi->xi_name_len; 1559 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1560 ocfs2_xattr_set_local(loc->xl_entry, 1561 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1562 1563 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1564 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1565 memset(nameval_buf, 0, size); 1566 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1567 } 1568 1569 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1570 struct ocfs2_xattr_value_buf *vb) 1571 { 1572 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1573 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1574 1575 /* Value bufs are for value trees */ 1576 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1577 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1578 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1579 1580 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1581 vb->vb_xv = 1582 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1583 nameval_offset + 1584 name_size); 1585 } 1586 1587 static int ocfs2_xa_block_journal_access(handle_t *handle, 1588 struct ocfs2_xa_loc *loc, int type) 1589 { 1590 struct buffer_head *bh = loc->xl_storage; 1591 ocfs2_journal_access_func access; 1592 1593 if (loc->xl_size == (bh->b_size - 1594 offsetof(struct ocfs2_xattr_block, 1595 xb_attrs.xb_header))) 1596 access = ocfs2_journal_access_xb; 1597 else 1598 access = ocfs2_journal_access_di; 1599 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1600 } 1601 1602 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1603 struct ocfs2_xa_loc *loc) 1604 { 1605 struct buffer_head *bh = loc->xl_storage; 1606 1607 ocfs2_journal_dirty(handle, bh); 1608 } 1609 1610 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1611 int offset) 1612 { 1613 return (char *)loc->xl_header + offset; 1614 } 1615 1616 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1617 struct ocfs2_xattr_info *xi) 1618 { 1619 /* 1620 * Block storage is strict. If the sizes aren't exact, we will 1621 * remove the old one and reinsert the new. 1622 */ 1623 return namevalue_size_xe(loc->xl_entry) == 1624 namevalue_size_xi(xi); 1625 } 1626 1627 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1628 { 1629 struct ocfs2_xattr_header *xh = loc->xl_header; 1630 int i, count = le16_to_cpu(xh->xh_count); 1631 int offset, free_start = loc->xl_size; 1632 1633 for (i = 0; i < count; i++) { 1634 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1635 if (offset < free_start) 1636 free_start = offset; 1637 } 1638 1639 return free_start; 1640 } 1641 1642 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1643 struct ocfs2_xattr_info *xi) 1644 { 1645 int count = le16_to_cpu(loc->xl_header->xh_count); 1646 int free_start = ocfs2_xa_get_free_start(loc); 1647 int needed_space = ocfs2_xi_entry_usage(xi); 1648 1649 /* 1650 * Block storage will reclaim the original entry before inserting 1651 * the new value, so we only need the difference. If the new 1652 * entry is smaller than the old one, we don't need anything. 1653 */ 1654 if (loc->xl_entry) { 1655 /* Don't need space if we're reusing! */ 1656 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1657 needed_space = 0; 1658 else 1659 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1660 } 1661 if (needed_space < 0) 1662 needed_space = 0; 1663 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1664 } 1665 1666 /* 1667 * Block storage for xattrs keeps the name+value pairs compacted. When 1668 * we remove one, we have to shift any that preceded it towards the end. 1669 */ 1670 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1671 { 1672 int i, offset; 1673 int namevalue_offset, first_namevalue_offset, namevalue_size; 1674 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1675 struct ocfs2_xattr_header *xh = loc->xl_header; 1676 int count = le16_to_cpu(xh->xh_count); 1677 1678 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1679 namevalue_size = namevalue_size_xe(entry); 1680 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1681 1682 /* Shift the name+value pairs */ 1683 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1684 (char *)xh + first_namevalue_offset, 1685 namevalue_offset - first_namevalue_offset); 1686 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1687 1688 /* Now tell xh->xh_entries about it */ 1689 for (i = 0; i < count; i++) { 1690 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1691 if (offset <= namevalue_offset) 1692 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1693 namevalue_size); 1694 } 1695 1696 /* 1697 * Note that we don't update xh_free_start or xh_name_value_len 1698 * because they're not used in block-stored xattrs. 1699 */ 1700 } 1701 1702 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1703 { 1704 int count = le16_to_cpu(loc->xl_header->xh_count); 1705 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1706 le16_add_cpu(&loc->xl_header->xh_count, 1); 1707 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1708 } 1709 1710 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1711 { 1712 int free_start = ocfs2_xa_get_free_start(loc); 1713 1714 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1715 } 1716 1717 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1718 struct ocfs2_xattr_value_buf *vb) 1719 { 1720 struct buffer_head *bh = loc->xl_storage; 1721 1722 if (loc->xl_size == (bh->b_size - 1723 offsetof(struct ocfs2_xattr_block, 1724 xb_attrs.xb_header))) 1725 vb->vb_access = ocfs2_journal_access_xb; 1726 else 1727 vb->vb_access = ocfs2_journal_access_di; 1728 vb->vb_bh = bh; 1729 } 1730 1731 /* 1732 * Operations for xattrs stored in blocks. This includes inline inode 1733 * storage and unindexed ocfs2_xattr_blocks. 1734 */ 1735 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1736 .xlo_journal_access = ocfs2_xa_block_journal_access, 1737 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1738 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1739 .xlo_check_space = ocfs2_xa_block_check_space, 1740 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1741 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1742 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1743 .xlo_add_entry = ocfs2_xa_block_add_entry, 1744 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1745 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1746 }; 1747 1748 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1749 struct ocfs2_xa_loc *loc, int type) 1750 { 1751 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1752 1753 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1754 } 1755 1756 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1757 struct ocfs2_xa_loc *loc) 1758 { 1759 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1760 1761 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1762 } 1763 1764 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1765 int offset) 1766 { 1767 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1768 int block, block_offset; 1769 1770 /* The header is at the front of the bucket */ 1771 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1772 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1773 1774 return bucket_block(bucket, block) + block_offset; 1775 } 1776 1777 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1778 struct ocfs2_xattr_info *xi) 1779 { 1780 return namevalue_size_xe(loc->xl_entry) >= 1781 namevalue_size_xi(xi); 1782 } 1783 1784 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1785 { 1786 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1787 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1788 } 1789 1790 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1791 int free_start, int size) 1792 { 1793 /* 1794 * We need to make sure that the name+value pair fits within 1795 * one block. 1796 */ 1797 if (((free_start - size) >> sb->s_blocksize_bits) != 1798 ((free_start - 1) >> sb->s_blocksize_bits)) 1799 free_start -= free_start % sb->s_blocksize; 1800 1801 return free_start; 1802 } 1803 1804 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1805 struct ocfs2_xattr_info *xi) 1806 { 1807 int rc; 1808 int count = le16_to_cpu(loc->xl_header->xh_count); 1809 int free_start = ocfs2_xa_get_free_start(loc); 1810 int needed_space = ocfs2_xi_entry_usage(xi); 1811 int size = namevalue_size_xi(xi); 1812 struct super_block *sb = loc->xl_inode->i_sb; 1813 1814 /* 1815 * Bucket storage does not reclaim name+value pairs it cannot 1816 * reuse. They live as holes until the bucket fills, and then 1817 * the bucket is defragmented. However, the bucket can reclaim 1818 * the ocfs2_xattr_entry. 1819 */ 1820 if (loc->xl_entry) { 1821 /* Don't need space if we're reusing! */ 1822 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1823 needed_space = 0; 1824 else 1825 needed_space -= sizeof(struct ocfs2_xattr_entry); 1826 } 1827 BUG_ON(needed_space < 0); 1828 1829 if (free_start < size) { 1830 if (needed_space) 1831 return -ENOSPC; 1832 } else { 1833 /* 1834 * First we check if it would fit in the first place. 1835 * Below, we align the free start to a block. This may 1836 * slide us below the minimum gap. By checking unaligned 1837 * first, we avoid that error. 1838 */ 1839 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1840 count); 1841 if (rc) 1842 return rc; 1843 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1844 size); 1845 } 1846 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1847 } 1848 1849 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1850 { 1851 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1852 -namevalue_size_xe(loc->xl_entry)); 1853 } 1854 1855 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1856 { 1857 struct ocfs2_xattr_header *xh = loc->xl_header; 1858 int count = le16_to_cpu(xh->xh_count); 1859 int low = 0, high = count - 1, tmp; 1860 struct ocfs2_xattr_entry *tmp_xe; 1861 1862 /* 1863 * We keep buckets sorted by name_hash, so we need to find 1864 * our insert place. 1865 */ 1866 while (low <= high && count) { 1867 tmp = (low + high) / 2; 1868 tmp_xe = &xh->xh_entries[tmp]; 1869 1870 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1871 low = tmp + 1; 1872 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1873 high = tmp - 1; 1874 else { 1875 low = tmp; 1876 break; 1877 } 1878 } 1879 1880 if (low != count) 1881 memmove(&xh->xh_entries[low + 1], 1882 &xh->xh_entries[low], 1883 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1884 1885 le16_add_cpu(&xh->xh_count, 1); 1886 loc->xl_entry = &xh->xh_entries[low]; 1887 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1888 } 1889 1890 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1891 { 1892 int free_start = ocfs2_xa_get_free_start(loc); 1893 struct ocfs2_xattr_header *xh = loc->xl_header; 1894 struct super_block *sb = loc->xl_inode->i_sb; 1895 int nameval_offset; 1896 1897 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1898 nameval_offset = free_start - size; 1899 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1900 xh->xh_free_start = cpu_to_le16(nameval_offset); 1901 le16_add_cpu(&xh->xh_name_value_len, size); 1902 1903 } 1904 1905 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1906 struct ocfs2_xattr_value_buf *vb) 1907 { 1908 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1909 struct super_block *sb = loc->xl_inode->i_sb; 1910 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1911 int size = namevalue_size_xe(loc->xl_entry); 1912 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1913 1914 /* Values are not allowed to straddle block boundaries */ 1915 BUG_ON(block_offset != 1916 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1917 /* We expect the bucket to be filled in */ 1918 BUG_ON(!bucket->bu_bhs[block_offset]); 1919 1920 vb->vb_access = ocfs2_journal_access; 1921 vb->vb_bh = bucket->bu_bhs[block_offset]; 1922 } 1923 1924 /* Operations for xattrs stored in buckets. */ 1925 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1926 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1927 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1928 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1929 .xlo_check_space = ocfs2_xa_bucket_check_space, 1930 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1931 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1932 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1933 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1934 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1935 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1936 }; 1937 1938 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1939 { 1940 struct ocfs2_xattr_value_buf vb; 1941 1942 if (ocfs2_xattr_is_local(loc->xl_entry)) 1943 return 0; 1944 1945 ocfs2_xa_fill_value_buf(loc, &vb); 1946 return le32_to_cpu(vb.vb_xv->xr_clusters); 1947 } 1948 1949 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1950 struct ocfs2_xattr_set_ctxt *ctxt) 1951 { 1952 int trunc_rc, access_rc; 1953 struct ocfs2_xattr_value_buf vb; 1954 1955 ocfs2_xa_fill_value_buf(loc, &vb); 1956 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1957 ctxt); 1958 1959 /* 1960 * The caller of ocfs2_xa_value_truncate() has already called 1961 * ocfs2_xa_journal_access on the loc. However, The truncate code 1962 * calls ocfs2_extend_trans(). This may commit the previous 1963 * transaction and open a new one. If this is a bucket, truncate 1964 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1965 * the caller is expecting to dirty the entire bucket. So we must 1966 * reset the journal work. We do this even if truncate has failed, 1967 * as it could have failed after committing the extend. 1968 */ 1969 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1970 OCFS2_JOURNAL_ACCESS_WRITE); 1971 1972 /* Errors in truncate take precedence */ 1973 return trunc_rc ? trunc_rc : access_rc; 1974 } 1975 1976 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1977 { 1978 int index, count; 1979 struct ocfs2_xattr_header *xh = loc->xl_header; 1980 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1981 1982 ocfs2_xa_wipe_namevalue(loc); 1983 loc->xl_entry = NULL; 1984 1985 count = le16_to_cpu(xh->xh_count) - 1; 1986 1987 /* 1988 * Only zero out the entry if there are more remaining. This is 1989 * important for an empty bucket, as it keeps track of the 1990 * bucket's hash value. It doesn't hurt empty block storage. 1991 */ 1992 if (count) { 1993 index = ((char *)entry - (char *)&xh->xh_entries) / 1994 sizeof(struct ocfs2_xattr_entry); 1995 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1996 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1997 memset(&xh->xh_entries[count], 0, 1998 sizeof(struct ocfs2_xattr_entry)); 1999 } 2000 2001 xh->xh_count = cpu_to_le16(count); 2002 } 2003 2004 /* 2005 * If we have a problem adjusting the size of an external value during 2006 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 2007 * in an intermediate state. For example, the value may be partially 2008 * truncated. 2009 * 2010 * If the value tree hasn't changed, the extend/truncate went nowhere. 2011 * We have nothing to do. The caller can treat it as a straight error. 2012 * 2013 * If the value tree got partially truncated, we now have a corrupted 2014 * extended attribute. We're going to wipe its entry and leak the 2015 * clusters. Better to leak some storage than leave a corrupt entry. 2016 * 2017 * If the value tree grew, it obviously didn't grow enough for the 2018 * new entry. We're not going to try and reclaim those clusters either. 2019 * If there was already an external value there (orig_clusters != 0), 2020 * the new clusters are attached safely and we can just leave the old 2021 * value in place. If there was no external value there, we remove 2022 * the entry. 2023 * 2024 * This way, the xattr block we store in the journal will be consistent. 2025 * If the size change broke because of the journal, no changes will hit 2026 * disk anyway. 2027 */ 2028 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 2029 const char *what, 2030 unsigned int orig_clusters) 2031 { 2032 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 2033 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 2034 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2035 2036 if (new_clusters < orig_clusters) { 2037 mlog(ML_ERROR, 2038 "Partial truncate while %s xattr %.*s. Leaking " 2039 "%u clusters and removing the entry\n", 2040 what, loc->xl_entry->xe_name_len, nameval_buf, 2041 orig_clusters - new_clusters); 2042 ocfs2_xa_remove_entry(loc); 2043 } else if (!orig_clusters) { 2044 mlog(ML_ERROR, 2045 "Unable to allocate an external value for xattr " 2046 "%.*s safely. Leaking %u clusters and removing the " 2047 "entry\n", 2048 loc->xl_entry->xe_name_len, nameval_buf, 2049 new_clusters - orig_clusters); 2050 ocfs2_xa_remove_entry(loc); 2051 } else if (new_clusters > orig_clusters) 2052 mlog(ML_ERROR, 2053 "Unable to grow xattr %.*s safely. %u new clusters " 2054 "have been added, but the value will not be " 2055 "modified\n", 2056 loc->xl_entry->xe_name_len, nameval_buf, 2057 new_clusters - orig_clusters); 2058 } 2059 2060 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2061 struct ocfs2_xattr_set_ctxt *ctxt) 2062 { 2063 int rc = 0; 2064 unsigned int orig_clusters; 2065 2066 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2067 orig_clusters = ocfs2_xa_value_clusters(loc); 2068 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2069 if (rc) { 2070 mlog_errno(rc); 2071 /* 2072 * Since this is remove, we can return 0 if 2073 * ocfs2_xa_cleanup_value_truncate() is going to 2074 * wipe the entry anyway. So we check the 2075 * cluster count as well. 2076 */ 2077 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2078 rc = 0; 2079 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2080 orig_clusters); 2081 goto out; 2082 } 2083 } 2084 2085 ocfs2_xa_remove_entry(loc); 2086 2087 out: 2088 return rc; 2089 } 2090 2091 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2092 { 2093 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2094 char *nameval_buf; 2095 2096 nameval_buf = ocfs2_xa_offset_pointer(loc, 2097 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2098 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2099 } 2100 2101 /* 2102 * Take an existing entry and make it ready for the new value. This 2103 * won't allocate space, but it may free space. It should be ready for 2104 * ocfs2_xa_prepare_entry() to finish the work. 2105 */ 2106 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2107 struct ocfs2_xattr_info *xi, 2108 struct ocfs2_xattr_set_ctxt *ctxt) 2109 { 2110 int rc = 0; 2111 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2112 unsigned int orig_clusters; 2113 char *nameval_buf; 2114 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2115 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2116 2117 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2118 name_size); 2119 2120 nameval_buf = ocfs2_xa_offset_pointer(loc, 2121 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2122 if (xe_local) { 2123 memset(nameval_buf + name_size, 0, 2124 namevalue_size_xe(loc->xl_entry) - name_size); 2125 if (!xi_local) 2126 ocfs2_xa_install_value_root(loc); 2127 } else { 2128 orig_clusters = ocfs2_xa_value_clusters(loc); 2129 if (xi_local) { 2130 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2131 if (rc < 0) 2132 mlog_errno(rc); 2133 else 2134 memset(nameval_buf + name_size, 0, 2135 namevalue_size_xe(loc->xl_entry) - 2136 name_size); 2137 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2138 xi->xi_value_len) { 2139 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2140 ctxt); 2141 if (rc < 0) 2142 mlog_errno(rc); 2143 } 2144 2145 if (rc) { 2146 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2147 orig_clusters); 2148 goto out; 2149 } 2150 } 2151 2152 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2153 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2154 2155 out: 2156 return rc; 2157 } 2158 2159 /* 2160 * Prepares loc->xl_entry to receive the new xattr. This includes 2161 * properly setting up the name+value pair region. If loc->xl_entry 2162 * already exists, it will take care of modifying it appropriately. 2163 * 2164 * Note that this modifies the data. You did journal_access already, 2165 * right? 2166 */ 2167 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2168 struct ocfs2_xattr_info *xi, 2169 u32 name_hash, 2170 struct ocfs2_xattr_set_ctxt *ctxt) 2171 { 2172 int rc = 0; 2173 unsigned int orig_clusters; 2174 __le64 orig_value_size = 0; 2175 2176 rc = ocfs2_xa_check_space(loc, xi); 2177 if (rc) 2178 goto out; 2179 2180 if (loc->xl_entry) { 2181 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2182 orig_value_size = loc->xl_entry->xe_value_size; 2183 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2184 if (rc) 2185 goto out; 2186 goto alloc_value; 2187 } 2188 2189 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2190 orig_clusters = ocfs2_xa_value_clusters(loc); 2191 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2192 if (rc) { 2193 mlog_errno(rc); 2194 ocfs2_xa_cleanup_value_truncate(loc, 2195 "overwriting", 2196 orig_clusters); 2197 goto out; 2198 } 2199 } 2200 ocfs2_xa_wipe_namevalue(loc); 2201 } else 2202 ocfs2_xa_add_entry(loc, name_hash); 2203 2204 /* 2205 * If we get here, we have a blank entry. Fill it. We grow our 2206 * name+value pair back from the end. 2207 */ 2208 ocfs2_xa_add_namevalue(loc, xi); 2209 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2210 ocfs2_xa_install_value_root(loc); 2211 2212 alloc_value: 2213 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2214 orig_clusters = ocfs2_xa_value_clusters(loc); 2215 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2216 if (rc < 0) { 2217 ctxt->set_abort = 1; 2218 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2219 orig_clusters); 2220 /* 2221 * If we were growing an existing value, 2222 * ocfs2_xa_cleanup_value_truncate() won't remove 2223 * the entry. We need to restore the original value 2224 * size. 2225 */ 2226 if (loc->xl_entry) { 2227 BUG_ON(!orig_value_size); 2228 loc->xl_entry->xe_value_size = orig_value_size; 2229 } 2230 mlog_errno(rc); 2231 } 2232 } 2233 2234 out: 2235 return rc; 2236 } 2237 2238 /* 2239 * Store the value portion of the name+value pair. This will skip 2240 * values that are stored externally. Their tree roots were set up 2241 * by ocfs2_xa_prepare_entry(). 2242 */ 2243 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2244 struct ocfs2_xattr_info *xi, 2245 struct ocfs2_xattr_set_ctxt *ctxt) 2246 { 2247 int rc = 0; 2248 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2249 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2250 char *nameval_buf; 2251 struct ocfs2_xattr_value_buf vb; 2252 2253 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2254 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2255 ocfs2_xa_fill_value_buf(loc, &vb); 2256 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2257 ctxt->handle, &vb, 2258 xi->xi_value, 2259 xi->xi_value_len); 2260 } else 2261 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2262 2263 return rc; 2264 } 2265 2266 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2267 struct ocfs2_xattr_info *xi, 2268 struct ocfs2_xattr_set_ctxt *ctxt) 2269 { 2270 int ret; 2271 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2272 xi->xi_name_len); 2273 2274 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2275 OCFS2_JOURNAL_ACCESS_WRITE); 2276 if (ret) { 2277 mlog_errno(ret); 2278 goto out; 2279 } 2280 2281 /* 2282 * From here on out, everything is going to modify the buffer a 2283 * little. Errors are going to leave the xattr header in a 2284 * sane state. Thus, even with errors we dirty the sucker. 2285 */ 2286 2287 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2288 if (!xi->xi_value) { 2289 ret = ocfs2_xa_remove(loc, ctxt); 2290 goto out_dirty; 2291 } 2292 2293 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2294 if (ret) { 2295 if (ret != -ENOSPC) 2296 mlog_errno(ret); 2297 goto out_dirty; 2298 } 2299 2300 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2301 if (ret) 2302 mlog_errno(ret); 2303 2304 out_dirty: 2305 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2306 2307 out: 2308 return ret; 2309 } 2310 2311 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2312 struct inode *inode, 2313 struct buffer_head *bh, 2314 struct ocfs2_xattr_entry *entry) 2315 { 2316 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2317 2318 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2319 2320 loc->xl_inode = inode; 2321 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2322 loc->xl_storage = bh; 2323 loc->xl_entry = entry; 2324 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2325 loc->xl_header = 2326 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2327 loc->xl_size); 2328 } 2329 2330 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2331 struct inode *inode, 2332 struct buffer_head *bh, 2333 struct ocfs2_xattr_entry *entry) 2334 { 2335 struct ocfs2_xattr_block *xb = 2336 (struct ocfs2_xattr_block *)bh->b_data; 2337 2338 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2339 2340 loc->xl_inode = inode; 2341 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2342 loc->xl_storage = bh; 2343 loc->xl_header = &(xb->xb_attrs.xb_header); 2344 loc->xl_entry = entry; 2345 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2346 xb_attrs.xb_header); 2347 } 2348 2349 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2350 struct ocfs2_xattr_bucket *bucket, 2351 struct ocfs2_xattr_entry *entry) 2352 { 2353 loc->xl_inode = bucket->bu_inode; 2354 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2355 loc->xl_storage = bucket; 2356 loc->xl_header = bucket_xh(bucket); 2357 loc->xl_entry = entry; 2358 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2359 } 2360 2361 /* 2362 * In xattr remove, if it is stored outside and refcounted, we may have 2363 * the chance to split the refcount tree. So need the allocators. 2364 */ 2365 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2366 struct ocfs2_xattr_value_root *xv, 2367 struct ocfs2_caching_info *ref_ci, 2368 struct buffer_head *ref_root_bh, 2369 struct ocfs2_alloc_context **meta_ac, 2370 int *ref_credits) 2371 { 2372 int ret, meta_add = 0; 2373 u32 p_cluster, num_clusters; 2374 unsigned int ext_flags; 2375 2376 *ref_credits = 0; 2377 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2378 &num_clusters, 2379 &xv->xr_list, 2380 &ext_flags); 2381 if (ret) { 2382 mlog_errno(ret); 2383 goto out; 2384 } 2385 2386 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2387 goto out; 2388 2389 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2390 ref_root_bh, xv, 2391 &meta_add, ref_credits); 2392 if (ret) { 2393 mlog_errno(ret); 2394 goto out; 2395 } 2396 2397 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2398 meta_add, meta_ac); 2399 if (ret) 2400 mlog_errno(ret); 2401 2402 out: 2403 return ret; 2404 } 2405 2406 static int ocfs2_remove_value_outside(struct inode*inode, 2407 struct ocfs2_xattr_value_buf *vb, 2408 struct ocfs2_xattr_header *header, 2409 struct ocfs2_caching_info *ref_ci, 2410 struct buffer_head *ref_root_bh) 2411 { 2412 int ret = 0, i, ref_credits; 2413 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2414 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2415 void *val; 2416 2417 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2418 2419 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2420 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2421 2422 if (ocfs2_xattr_is_local(entry)) 2423 continue; 2424 2425 val = (void *)header + 2426 le16_to_cpu(entry->xe_name_offset); 2427 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2428 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2429 2430 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2431 ref_ci, ref_root_bh, 2432 &ctxt.meta_ac, 2433 &ref_credits); 2434 2435 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2436 ocfs2_remove_extent_credits(osb->sb)); 2437 if (IS_ERR(ctxt.handle)) { 2438 ret = PTR_ERR(ctxt.handle); 2439 mlog_errno(ret); 2440 break; 2441 } 2442 2443 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2444 2445 ocfs2_commit_trans(osb, ctxt.handle); 2446 if (ctxt.meta_ac) { 2447 ocfs2_free_alloc_context(ctxt.meta_ac); 2448 ctxt.meta_ac = NULL; 2449 } 2450 2451 if (ret < 0) { 2452 mlog_errno(ret); 2453 break; 2454 } 2455 2456 } 2457 2458 if (ctxt.meta_ac) 2459 ocfs2_free_alloc_context(ctxt.meta_ac); 2460 ocfs2_schedule_truncate_log_flush(osb, 1); 2461 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2462 return ret; 2463 } 2464 2465 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2466 struct buffer_head *di_bh, 2467 struct ocfs2_caching_info *ref_ci, 2468 struct buffer_head *ref_root_bh) 2469 { 2470 2471 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2472 struct ocfs2_xattr_header *header; 2473 int ret; 2474 struct ocfs2_xattr_value_buf vb = { 2475 .vb_bh = di_bh, 2476 .vb_access = ocfs2_journal_access_di, 2477 }; 2478 2479 ret = ocfs2_xattr_ibody_lookup_header(inode, di, &header); 2480 if (ret) 2481 return ret; 2482 2483 ret = ocfs2_remove_value_outside(inode, &vb, header, 2484 ref_ci, ref_root_bh); 2485 2486 return ret; 2487 } 2488 2489 struct ocfs2_rm_xattr_bucket_para { 2490 struct ocfs2_caching_info *ref_ci; 2491 struct buffer_head *ref_root_bh; 2492 }; 2493 2494 static int ocfs2_xattr_block_remove(struct inode *inode, 2495 struct buffer_head *blk_bh, 2496 struct ocfs2_caching_info *ref_ci, 2497 struct buffer_head *ref_root_bh) 2498 { 2499 struct ocfs2_xattr_block *xb; 2500 int ret = 0; 2501 struct ocfs2_xattr_value_buf vb = { 2502 .vb_bh = blk_bh, 2503 .vb_access = ocfs2_journal_access_xb, 2504 }; 2505 struct ocfs2_rm_xattr_bucket_para args = { 2506 .ref_ci = ref_ci, 2507 .ref_root_bh = ref_root_bh, 2508 }; 2509 2510 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2511 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2512 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2513 ret = ocfs2_remove_value_outside(inode, &vb, header, 2514 ref_ci, ref_root_bh); 2515 } else 2516 ret = ocfs2_iterate_xattr_index_block(inode, 2517 blk_bh, 2518 ocfs2_rm_xattr_cluster, 2519 &args); 2520 2521 return ret; 2522 } 2523 2524 static int ocfs2_xattr_free_block(struct inode *inode, 2525 u64 block, 2526 struct ocfs2_caching_info *ref_ci, 2527 struct buffer_head *ref_root_bh) 2528 { 2529 struct inode *xb_alloc_inode; 2530 struct buffer_head *xb_alloc_bh = NULL; 2531 struct buffer_head *blk_bh = NULL; 2532 struct ocfs2_xattr_block *xb; 2533 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2534 handle_t *handle; 2535 int ret = 0; 2536 u64 blk, bg_blkno; 2537 u16 bit; 2538 2539 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2540 if (ret < 0) { 2541 mlog_errno(ret); 2542 goto out; 2543 } 2544 2545 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2546 if (ret < 0) { 2547 mlog_errno(ret); 2548 goto out; 2549 } 2550 2551 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2552 blk = le64_to_cpu(xb->xb_blkno); 2553 bit = le16_to_cpu(xb->xb_suballoc_bit); 2554 if (xb->xb_suballoc_loc) 2555 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2556 else 2557 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2558 2559 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2560 EXTENT_ALLOC_SYSTEM_INODE, 2561 le16_to_cpu(xb->xb_suballoc_slot)); 2562 if (!xb_alloc_inode) { 2563 ret = -ENOMEM; 2564 mlog_errno(ret); 2565 goto out; 2566 } 2567 inode_lock(xb_alloc_inode); 2568 2569 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2570 if (ret < 0) { 2571 mlog_errno(ret); 2572 goto out_mutex; 2573 } 2574 2575 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2576 if (IS_ERR(handle)) { 2577 ret = PTR_ERR(handle); 2578 mlog_errno(ret); 2579 goto out_unlock; 2580 } 2581 2582 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2583 bit, bg_blkno, 1); 2584 if (ret < 0) 2585 mlog_errno(ret); 2586 2587 ocfs2_commit_trans(osb, handle); 2588 out_unlock: 2589 ocfs2_inode_unlock(xb_alloc_inode, 1); 2590 brelse(xb_alloc_bh); 2591 out_mutex: 2592 inode_unlock(xb_alloc_inode); 2593 iput(xb_alloc_inode); 2594 out: 2595 brelse(blk_bh); 2596 return ret; 2597 } 2598 2599 /* 2600 * ocfs2_xattr_remove() 2601 * 2602 * Free extended attribute resources associated with this inode. 2603 */ 2604 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2605 { 2606 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2607 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2608 struct ocfs2_refcount_tree *ref_tree = NULL; 2609 struct buffer_head *ref_root_bh = NULL; 2610 struct ocfs2_caching_info *ref_ci = NULL; 2611 handle_t *handle; 2612 int ret; 2613 2614 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2615 return 0; 2616 2617 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2618 return 0; 2619 2620 if (ocfs2_is_refcount_inode(inode)) { 2621 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2622 le64_to_cpu(di->i_refcount_loc), 2623 1, &ref_tree, &ref_root_bh); 2624 if (ret) { 2625 mlog_errno(ret); 2626 goto out; 2627 } 2628 ref_ci = &ref_tree->rf_ci; 2629 2630 } 2631 2632 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2633 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2634 ref_ci, ref_root_bh); 2635 if (ret < 0) { 2636 mlog_errno(ret); 2637 goto out; 2638 } 2639 } 2640 2641 if (di->i_xattr_loc) { 2642 ret = ocfs2_xattr_free_block(inode, 2643 le64_to_cpu(di->i_xattr_loc), 2644 ref_ci, ref_root_bh); 2645 if (ret < 0) { 2646 mlog_errno(ret); 2647 goto out; 2648 } 2649 } 2650 2651 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2652 OCFS2_INODE_UPDATE_CREDITS); 2653 if (IS_ERR(handle)) { 2654 ret = PTR_ERR(handle); 2655 mlog_errno(ret); 2656 goto out; 2657 } 2658 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2659 OCFS2_JOURNAL_ACCESS_WRITE); 2660 if (ret) { 2661 mlog_errno(ret); 2662 goto out_commit; 2663 } 2664 2665 di->i_xattr_loc = 0; 2666 2667 spin_lock(&oi->ip_lock); 2668 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2669 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2670 spin_unlock(&oi->ip_lock); 2671 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2672 2673 ocfs2_journal_dirty(handle, di_bh); 2674 out_commit: 2675 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2676 out: 2677 if (ref_tree) 2678 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2679 brelse(ref_root_bh); 2680 return ret; 2681 } 2682 2683 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2684 struct ocfs2_dinode *di) 2685 { 2686 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2687 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2688 int free; 2689 2690 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2691 return 0; 2692 2693 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2694 struct ocfs2_inline_data *idata = &di->id2.i_data; 2695 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2696 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2697 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2698 le64_to_cpu(di->i_size); 2699 } else { 2700 struct ocfs2_extent_list *el = &di->id2.i_list; 2701 free = (le16_to_cpu(el->l_count) - 2702 le16_to_cpu(el->l_next_free_rec)) * 2703 sizeof(struct ocfs2_extent_rec); 2704 } 2705 if (free >= xattrsize) 2706 return 1; 2707 2708 return 0; 2709 } 2710 2711 /* 2712 * ocfs2_xattr_ibody_find() 2713 * 2714 * Find extended attribute in inode block and 2715 * fill search info into struct ocfs2_xattr_search. 2716 */ 2717 static int ocfs2_xattr_ibody_find(struct inode *inode, 2718 int name_index, 2719 const char *name, 2720 struct ocfs2_xattr_search *xs) 2721 { 2722 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2723 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2724 int ret; 2725 int has_space = 0; 2726 2727 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2728 return 0; 2729 2730 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2731 down_read(&oi->ip_alloc_sem); 2732 has_space = ocfs2_xattr_has_space_inline(inode, di); 2733 up_read(&oi->ip_alloc_sem); 2734 if (!has_space) 2735 return 0; 2736 } 2737 2738 xs->xattr_bh = xs->inode_bh; 2739 xs->end = (void *)di + inode->i_sb->s_blocksize; 2740 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2741 ret = ocfs2_xattr_ibody_lookup_header(inode, di, &xs->header); 2742 if (ret) 2743 return ret; 2744 } else { 2745 xs->header = (struct ocfs2_xattr_header *) 2746 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2747 } 2748 xs->base = (void *)xs->header; 2749 xs->here = xs->header->xh_entries; 2750 2751 /* Find the named attribute. */ 2752 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2753 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 2754 if (ret && ret != -ENODATA) 2755 return ret; 2756 xs->not_found = ret; 2757 } 2758 2759 return 0; 2760 } 2761 2762 static int ocfs2_xattr_ibody_init(struct inode *inode, 2763 struct buffer_head *di_bh, 2764 struct ocfs2_xattr_set_ctxt *ctxt) 2765 { 2766 int ret; 2767 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2768 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2769 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2770 unsigned int xattrsize = osb->s_xattr_inline_size; 2771 2772 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2773 ret = -ENOSPC; 2774 goto out; 2775 } 2776 2777 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2778 OCFS2_JOURNAL_ACCESS_WRITE); 2779 if (ret) { 2780 mlog_errno(ret); 2781 goto out; 2782 } 2783 2784 /* 2785 * Adjust extent record count or inline data size 2786 * to reserve space for extended attribute. 2787 */ 2788 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2789 struct ocfs2_inline_data *idata = &di->id2.i_data; 2790 le16_add_cpu(&idata->id_count, -xattrsize); 2791 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2792 struct ocfs2_extent_list *el = &di->id2.i_list; 2793 le16_add_cpu(&el->l_count, -(xattrsize / 2794 sizeof(struct ocfs2_extent_rec))); 2795 } 2796 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2797 2798 spin_lock(&oi->ip_lock); 2799 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2800 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2801 spin_unlock(&oi->ip_lock); 2802 2803 ocfs2_journal_dirty(ctxt->handle, di_bh); 2804 2805 out: 2806 return ret; 2807 } 2808 2809 /* 2810 * ocfs2_xattr_ibody_set() 2811 * 2812 * Set, replace or remove an extended attribute into inode block. 2813 * 2814 */ 2815 static int ocfs2_xattr_ibody_set(struct inode *inode, 2816 struct ocfs2_xattr_info *xi, 2817 struct ocfs2_xattr_search *xs, 2818 struct ocfs2_xattr_set_ctxt *ctxt) 2819 { 2820 int ret; 2821 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2822 struct ocfs2_xa_loc loc; 2823 2824 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2825 return -ENOSPC; 2826 2827 down_write(&oi->ip_alloc_sem); 2828 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2829 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2830 if (ret) { 2831 if (ret != -ENOSPC) 2832 mlog_errno(ret); 2833 goto out; 2834 } 2835 } 2836 2837 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2838 xs->not_found ? NULL : xs->here); 2839 ret = ocfs2_xa_set(&loc, xi, ctxt); 2840 if (ret) { 2841 if (ret != -ENOSPC) 2842 mlog_errno(ret); 2843 goto out; 2844 } 2845 xs->here = loc.xl_entry; 2846 2847 out: 2848 up_write(&oi->ip_alloc_sem); 2849 2850 return ret; 2851 } 2852 2853 /* 2854 * ocfs2_xattr_block_find() 2855 * 2856 * Find extended attribute in external block and 2857 * fill search info into struct ocfs2_xattr_search. 2858 */ 2859 static int ocfs2_xattr_block_find(struct inode *inode, 2860 int name_index, 2861 const char *name, 2862 struct ocfs2_xattr_search *xs) 2863 { 2864 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2865 struct buffer_head *blk_bh = NULL; 2866 struct ocfs2_xattr_block *xb; 2867 int ret = 0; 2868 2869 if (!di->i_xattr_loc) 2870 return ret; 2871 2872 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2873 &blk_bh); 2874 if (ret < 0) { 2875 mlog_errno(ret); 2876 return ret; 2877 } 2878 2879 xs->xattr_bh = blk_bh; 2880 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2881 2882 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2883 xs->header = &xb->xb_attrs.xb_header; 2884 xs->base = (void *)xs->header; 2885 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2886 xs->here = xs->header->xh_entries; 2887 2888 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 2889 } else 2890 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2891 name_index, 2892 name, xs); 2893 2894 if (ret && ret != -ENODATA) { 2895 xs->xattr_bh = NULL; 2896 goto cleanup; 2897 } 2898 xs->not_found = ret; 2899 return 0; 2900 cleanup: 2901 brelse(blk_bh); 2902 2903 return ret; 2904 } 2905 2906 static int ocfs2_create_xattr_block(struct inode *inode, 2907 struct buffer_head *inode_bh, 2908 struct ocfs2_xattr_set_ctxt *ctxt, 2909 int indexed, 2910 struct buffer_head **ret_bh) 2911 { 2912 int ret; 2913 u16 suballoc_bit_start; 2914 u32 num_got; 2915 u64 suballoc_loc, first_blkno; 2916 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2917 struct buffer_head *new_bh = NULL; 2918 struct ocfs2_xattr_block *xblk; 2919 2920 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2921 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2922 if (ret < 0) { 2923 mlog_errno(ret); 2924 goto end; 2925 } 2926 2927 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2928 &suballoc_loc, &suballoc_bit_start, 2929 &num_got, &first_blkno); 2930 if (ret < 0) { 2931 mlog_errno(ret); 2932 goto end; 2933 } 2934 2935 new_bh = sb_getblk(inode->i_sb, first_blkno); 2936 if (!new_bh) { 2937 ret = -ENOMEM; 2938 mlog_errno(ret); 2939 goto end; 2940 } 2941 2942 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2943 2944 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2945 new_bh, 2946 OCFS2_JOURNAL_ACCESS_CREATE); 2947 if (ret < 0) { 2948 mlog_errno(ret); 2949 goto end; 2950 } 2951 2952 /* Initialize ocfs2_xattr_block */ 2953 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2954 memset(xblk, 0, inode->i_sb->s_blocksize); 2955 strscpy(xblk->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE); 2956 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2957 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2958 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2959 xblk->xb_fs_generation = 2960 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2961 xblk->xb_blkno = cpu_to_le64(first_blkno); 2962 if (indexed) { 2963 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2964 xr->xt_clusters = cpu_to_le32(1); 2965 xr->xt_last_eb_blk = 0; 2966 xr->xt_list.l_tree_depth = 0; 2967 xr->xt_list.l_count = cpu_to_le16( 2968 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2969 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2970 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2971 } 2972 ocfs2_journal_dirty(ctxt->handle, new_bh); 2973 2974 /* Add it to the inode */ 2975 di->i_xattr_loc = cpu_to_le64(first_blkno); 2976 2977 spin_lock(&OCFS2_I(inode)->ip_lock); 2978 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2979 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2980 spin_unlock(&OCFS2_I(inode)->ip_lock); 2981 2982 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2983 2984 *ret_bh = new_bh; 2985 new_bh = NULL; 2986 2987 end: 2988 brelse(new_bh); 2989 return ret; 2990 } 2991 2992 /* 2993 * ocfs2_xattr_block_set() 2994 * 2995 * Set, replace or remove an extended attribute into external block. 2996 * 2997 */ 2998 static int ocfs2_xattr_block_set(struct inode *inode, 2999 struct ocfs2_xattr_info *xi, 3000 struct ocfs2_xattr_search *xs, 3001 struct ocfs2_xattr_set_ctxt *ctxt) 3002 { 3003 struct buffer_head *new_bh = NULL; 3004 struct ocfs2_xattr_block *xblk = NULL; 3005 int ret; 3006 struct ocfs2_xa_loc loc; 3007 3008 if (!xs->xattr_bh) { 3009 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 3010 0, &new_bh); 3011 if (ret) { 3012 mlog_errno(ret); 3013 goto end; 3014 } 3015 3016 xs->xattr_bh = new_bh; 3017 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 3018 xs->header = &xblk->xb_attrs.xb_header; 3019 xs->base = (void *)xs->header; 3020 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 3021 xs->here = xs->header->xh_entries; 3022 } else 3023 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 3024 3025 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 3026 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 3027 xs->not_found ? NULL : xs->here); 3028 3029 ret = ocfs2_xa_set(&loc, xi, ctxt); 3030 if (!ret) 3031 xs->here = loc.xl_entry; 3032 else if ((ret != -ENOSPC) || ctxt->set_abort) 3033 goto end; 3034 else { 3035 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 3036 if (ret) 3037 goto end; 3038 } 3039 } 3040 3041 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 3042 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 3043 3044 end: 3045 return ret; 3046 } 3047 3048 /* Check whether the new xattr can be inserted into the inode. */ 3049 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 3050 struct ocfs2_xattr_info *xi, 3051 struct ocfs2_xattr_search *xs) 3052 { 3053 struct ocfs2_xattr_entry *last; 3054 int free, i; 3055 size_t min_offs = xs->end - xs->base; 3056 3057 if (!xs->header) 3058 return 0; 3059 3060 last = xs->header->xh_entries; 3061 3062 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3063 size_t offs = le16_to_cpu(last->xe_name_offset); 3064 if (offs < min_offs) 3065 min_offs = offs; 3066 last += 1; 3067 } 3068 3069 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3070 if (free < 0) 3071 return 0; 3072 3073 BUG_ON(!xs->not_found); 3074 3075 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3076 return 1; 3077 3078 return 0; 3079 } 3080 3081 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3082 struct ocfs2_dinode *di, 3083 struct ocfs2_xattr_info *xi, 3084 struct ocfs2_xattr_search *xis, 3085 struct ocfs2_xattr_search *xbs, 3086 int *clusters_need, 3087 int *meta_need, 3088 int *credits_need) 3089 { 3090 int ret = 0, old_in_xb = 0; 3091 int clusters_add = 0, meta_add = 0, credits = 0; 3092 struct buffer_head *bh = NULL; 3093 struct ocfs2_xattr_block *xb = NULL; 3094 struct ocfs2_xattr_entry *xe = NULL; 3095 struct ocfs2_xattr_value_root *xv = NULL; 3096 char *base = NULL; 3097 int name_offset, name_len = 0; 3098 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3099 xi->xi_value_len); 3100 u64 value_size; 3101 3102 /* 3103 * Calculate the clusters we need to write. 3104 * No matter whether we replace an old one or add a new one, 3105 * we need this for writing. 3106 */ 3107 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3108 credits += new_clusters * 3109 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3110 3111 if (xis->not_found && xbs->not_found) { 3112 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3113 3114 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3115 clusters_add += new_clusters; 3116 credits += ocfs2_calc_extend_credits(inode->i_sb, 3117 &def_xv.xv.xr_list); 3118 } 3119 3120 goto meta_guess; 3121 } 3122 3123 if (!xis->not_found) { 3124 xe = xis->here; 3125 name_offset = le16_to_cpu(xe->xe_name_offset); 3126 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3127 base = xis->base; 3128 credits += OCFS2_INODE_UPDATE_CREDITS; 3129 } else { 3130 int i, block_off = 0; 3131 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3132 xe = xbs->here; 3133 name_offset = le16_to_cpu(xe->xe_name_offset); 3134 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3135 i = xbs->here - xbs->header->xh_entries; 3136 old_in_xb = 1; 3137 3138 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3139 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3140 bucket_xh(xbs->bucket), 3141 i, &block_off, 3142 &name_offset); 3143 base = bucket_block(xbs->bucket, block_off); 3144 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3145 } else { 3146 base = xbs->base; 3147 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3148 } 3149 } 3150 3151 /* 3152 * delete a xattr doesn't need metadata and cluster allocation. 3153 * so just calculate the credits and return. 3154 * 3155 * The credits for removing the value tree will be extended 3156 * by ocfs2_remove_extent itself. 3157 */ 3158 if (!xi->xi_value) { 3159 if (!ocfs2_xattr_is_local(xe)) 3160 credits += ocfs2_remove_extent_credits(inode->i_sb); 3161 3162 goto out; 3163 } 3164 3165 /* do cluster allocation guess first. */ 3166 value_size = le64_to_cpu(xe->xe_value_size); 3167 3168 if (old_in_xb) { 3169 /* 3170 * In xattr set, we always try to set the xe in inode first, 3171 * so if it can be inserted into inode successfully, the old 3172 * one will be removed from the xattr block, and this xattr 3173 * will be inserted into inode as a new xattr in inode. 3174 */ 3175 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3176 clusters_add += new_clusters; 3177 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3178 OCFS2_INODE_UPDATE_CREDITS; 3179 if (!ocfs2_xattr_is_local(xe)) 3180 credits += ocfs2_calc_extend_credits( 3181 inode->i_sb, 3182 &def_xv.xv.xr_list); 3183 goto out; 3184 } 3185 } 3186 3187 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3188 /* the new values will be stored outside. */ 3189 u32 old_clusters = 0; 3190 3191 if (!ocfs2_xattr_is_local(xe)) { 3192 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3193 value_size); 3194 xv = (struct ocfs2_xattr_value_root *) 3195 (base + name_offset + name_len); 3196 value_size = OCFS2_XATTR_ROOT_SIZE; 3197 } else 3198 xv = &def_xv.xv; 3199 3200 if (old_clusters >= new_clusters) { 3201 credits += ocfs2_remove_extent_credits(inode->i_sb); 3202 goto out; 3203 } else { 3204 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3205 clusters_add += new_clusters - old_clusters; 3206 credits += ocfs2_calc_extend_credits(inode->i_sb, 3207 &xv->xr_list); 3208 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3209 goto out; 3210 } 3211 } else { 3212 /* 3213 * Now the new value will be stored inside. So if the new 3214 * value is smaller than the size of value root or the old 3215 * value, we don't need any allocation, otherwise we have 3216 * to guess metadata allocation. 3217 */ 3218 if ((ocfs2_xattr_is_local(xe) && 3219 (value_size >= xi->xi_value_len)) || 3220 (!ocfs2_xattr_is_local(xe) && 3221 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3222 goto out; 3223 } 3224 3225 meta_guess: 3226 /* calculate metadata allocation. */ 3227 if (di->i_xattr_loc) { 3228 if (!xbs->xattr_bh) { 3229 ret = ocfs2_read_xattr_block(inode, 3230 le64_to_cpu(di->i_xattr_loc), 3231 &bh); 3232 if (ret) { 3233 mlog_errno(ret); 3234 goto out; 3235 } 3236 3237 xb = (struct ocfs2_xattr_block *)bh->b_data; 3238 } else 3239 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3240 3241 /* 3242 * If there is already an xattr tree, good, we can calculate 3243 * like other b-trees. Otherwise we may have the chance of 3244 * create a tree, the credit calculation is borrowed from 3245 * ocfs2_calc_extend_credits with root_el = NULL. And the 3246 * new tree will be cluster based, so no meta is needed. 3247 */ 3248 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3249 struct ocfs2_extent_list *el = 3250 &xb->xb_attrs.xb_root.xt_list; 3251 meta_add += ocfs2_extend_meta_needed(el); 3252 credits += ocfs2_calc_extend_credits(inode->i_sb, 3253 el); 3254 } else 3255 credits += OCFS2_SUBALLOC_ALLOC + 1; 3256 3257 /* 3258 * This cluster will be used either for new bucket or for 3259 * new xattr block. 3260 * If the cluster size is the same as the bucket size, one 3261 * more is needed since we may need to extend the bucket 3262 * also. 3263 */ 3264 clusters_add += 1; 3265 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3266 if (OCFS2_XATTR_BUCKET_SIZE == 3267 OCFS2_SB(inode->i_sb)->s_clustersize) { 3268 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3269 clusters_add += 1; 3270 } 3271 } else { 3272 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3273 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3274 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3275 meta_add += ocfs2_extend_meta_needed(el); 3276 credits += ocfs2_calc_extend_credits(inode->i_sb, 3277 el); 3278 } else { 3279 meta_add += 1; 3280 } 3281 } 3282 out: 3283 if (clusters_need) 3284 *clusters_need = clusters_add; 3285 if (meta_need) 3286 *meta_need = meta_add; 3287 if (credits_need) 3288 *credits_need = credits; 3289 brelse(bh); 3290 return ret; 3291 } 3292 3293 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3294 struct ocfs2_dinode *di, 3295 struct ocfs2_xattr_info *xi, 3296 struct ocfs2_xattr_search *xis, 3297 struct ocfs2_xattr_search *xbs, 3298 struct ocfs2_xattr_set_ctxt *ctxt, 3299 int extra_meta, 3300 int *credits) 3301 { 3302 int clusters_add, meta_add, ret; 3303 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3304 3305 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3306 3307 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3308 3309 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3310 &clusters_add, &meta_add, credits); 3311 if (ret) { 3312 mlog_errno(ret); 3313 return ret; 3314 } 3315 3316 meta_add += extra_meta; 3317 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3318 clusters_add, *credits); 3319 3320 if (meta_add) { 3321 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3322 &ctxt->meta_ac); 3323 if (ret) { 3324 mlog_errno(ret); 3325 goto out; 3326 } 3327 } 3328 3329 if (clusters_add) { 3330 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3331 if (ret) 3332 mlog_errno(ret); 3333 } 3334 out: 3335 if (ret) { 3336 if (ctxt->meta_ac) { 3337 ocfs2_free_alloc_context(ctxt->meta_ac); 3338 ctxt->meta_ac = NULL; 3339 } 3340 3341 /* 3342 * We cannot have an error and a non null ctxt->data_ac. 3343 */ 3344 } 3345 3346 return ret; 3347 } 3348 3349 static int __ocfs2_xattr_set_handle(struct inode *inode, 3350 struct ocfs2_dinode *di, 3351 struct ocfs2_xattr_info *xi, 3352 struct ocfs2_xattr_search *xis, 3353 struct ocfs2_xattr_search *xbs, 3354 struct ocfs2_xattr_set_ctxt *ctxt) 3355 { 3356 int ret = 0, credits, old_found; 3357 3358 if (!xi->xi_value) { 3359 /* Remove existing extended attribute */ 3360 if (!xis->not_found) 3361 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3362 else if (!xbs->not_found) 3363 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3364 } else { 3365 /* We always try to set extended attribute into inode first*/ 3366 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3367 if (!ret && !xbs->not_found) { 3368 /* 3369 * If succeed and that extended attribute existing in 3370 * external block, then we will remove it. 3371 */ 3372 xi->xi_value = NULL; 3373 xi->xi_value_len = 0; 3374 3375 old_found = xis->not_found; 3376 xis->not_found = -ENODATA; 3377 ret = ocfs2_calc_xattr_set_need(inode, 3378 di, 3379 xi, 3380 xis, 3381 xbs, 3382 NULL, 3383 NULL, 3384 &credits); 3385 xis->not_found = old_found; 3386 if (ret) { 3387 mlog_errno(ret); 3388 goto out; 3389 } 3390 3391 ret = ocfs2_extend_trans(ctxt->handle, credits); 3392 if (ret) { 3393 mlog_errno(ret); 3394 goto out; 3395 } 3396 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3397 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3398 if (di->i_xattr_loc && !xbs->xattr_bh) { 3399 ret = ocfs2_xattr_block_find(inode, 3400 xi->xi_name_index, 3401 xi->xi_name, xbs); 3402 if (ret) 3403 goto out; 3404 3405 old_found = xis->not_found; 3406 xis->not_found = -ENODATA; 3407 ret = ocfs2_calc_xattr_set_need(inode, 3408 di, 3409 xi, 3410 xis, 3411 xbs, 3412 NULL, 3413 NULL, 3414 &credits); 3415 xis->not_found = old_found; 3416 if (ret) { 3417 mlog_errno(ret); 3418 goto out; 3419 } 3420 3421 ret = ocfs2_extend_trans(ctxt->handle, credits); 3422 if (ret) { 3423 mlog_errno(ret); 3424 goto out; 3425 } 3426 } 3427 /* 3428 * If no space in inode, we will set extended attribute 3429 * into external block. 3430 */ 3431 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3432 if (ret) 3433 goto out; 3434 if (!xis->not_found) { 3435 /* 3436 * If succeed and that extended attribute 3437 * existing in inode, we will remove it. 3438 */ 3439 xi->xi_value = NULL; 3440 xi->xi_value_len = 0; 3441 xbs->not_found = -ENODATA; 3442 ret = ocfs2_calc_xattr_set_need(inode, 3443 di, 3444 xi, 3445 xis, 3446 xbs, 3447 NULL, 3448 NULL, 3449 &credits); 3450 if (ret) { 3451 mlog_errno(ret); 3452 goto out; 3453 } 3454 3455 ret = ocfs2_extend_trans(ctxt->handle, credits); 3456 if (ret) { 3457 mlog_errno(ret); 3458 goto out; 3459 } 3460 ret = ocfs2_xattr_ibody_set(inode, xi, 3461 xis, ctxt); 3462 } 3463 } 3464 } 3465 3466 if (!ret) { 3467 /* Update inode ctime. */ 3468 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3469 xis->inode_bh, 3470 OCFS2_JOURNAL_ACCESS_WRITE); 3471 if (ret) { 3472 mlog_errno(ret); 3473 goto out; 3474 } 3475 3476 inode_set_ctime_current(inode); 3477 di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); 3478 di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); 3479 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3480 } 3481 out: 3482 return ret; 3483 } 3484 3485 /* 3486 * This function only called duing creating inode 3487 * for init security/acl xattrs of the new inode. 3488 * All transanction credits have been reserved in mknod. 3489 */ 3490 int ocfs2_xattr_set_handle(handle_t *handle, 3491 struct inode *inode, 3492 struct buffer_head *di_bh, 3493 int name_index, 3494 const char *name, 3495 const void *value, 3496 size_t value_len, 3497 int flags, 3498 struct ocfs2_alloc_context *meta_ac, 3499 struct ocfs2_alloc_context *data_ac) 3500 { 3501 struct ocfs2_dinode *di; 3502 int ret; 3503 3504 struct ocfs2_xattr_info xi = { 3505 .xi_name_index = name_index, 3506 .xi_name = name, 3507 .xi_name_len = strlen(name), 3508 .xi_value = value, 3509 .xi_value_len = value_len, 3510 }; 3511 3512 struct ocfs2_xattr_search xis = { 3513 .not_found = -ENODATA, 3514 }; 3515 3516 struct ocfs2_xattr_search xbs = { 3517 .not_found = -ENODATA, 3518 }; 3519 3520 struct ocfs2_xattr_set_ctxt ctxt = { 3521 .handle = handle, 3522 .meta_ac = meta_ac, 3523 .data_ac = data_ac, 3524 }; 3525 3526 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3527 return -EOPNOTSUPP; 3528 3529 /* 3530 * In extreme situation, may need xattr bucket when 3531 * block size is too small. And we have already reserved 3532 * the credits for bucket in mknod. 3533 */ 3534 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3535 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3536 if (!xbs.bucket) { 3537 mlog_errno(-ENOMEM); 3538 return -ENOMEM; 3539 } 3540 } 3541 3542 xis.inode_bh = xbs.inode_bh = di_bh; 3543 di = (struct ocfs2_dinode *)di_bh->b_data; 3544 3545 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3546 3547 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3548 if (ret) 3549 goto cleanup; 3550 if (xis.not_found) { 3551 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3552 if (ret) 3553 goto cleanup; 3554 } 3555 3556 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3557 3558 cleanup: 3559 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3560 brelse(xbs.xattr_bh); 3561 ocfs2_xattr_bucket_free(xbs.bucket); 3562 3563 return ret; 3564 } 3565 3566 /* 3567 * ocfs2_xattr_set() 3568 * 3569 * Set, replace or remove an extended attribute for this inode. 3570 * value is NULL to remove an existing extended attribute, else either 3571 * create or replace an extended attribute. 3572 */ 3573 int ocfs2_xattr_set(struct inode *inode, 3574 int name_index, 3575 const char *name, 3576 const void *value, 3577 size_t value_len, 3578 int flags) 3579 { 3580 struct buffer_head *di_bh = NULL; 3581 struct ocfs2_dinode *di; 3582 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3583 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3584 struct inode *tl_inode = osb->osb_tl_inode; 3585 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3586 struct ocfs2_refcount_tree *ref_tree = NULL; 3587 struct ocfs2_lock_holder oh; 3588 3589 struct ocfs2_xattr_info xi = { 3590 .xi_name_index = name_index, 3591 .xi_name = name, 3592 .xi_name_len = strlen(name), 3593 .xi_value = value, 3594 .xi_value_len = value_len, 3595 }; 3596 3597 struct ocfs2_xattr_search xis = { 3598 .not_found = -ENODATA, 3599 }; 3600 3601 struct ocfs2_xattr_search xbs = { 3602 .not_found = -ENODATA, 3603 }; 3604 3605 if (!ocfs2_supports_xattr(osb)) 3606 return -EOPNOTSUPP; 3607 3608 /* 3609 * Only xbs will be used on indexed trees. xis doesn't need a 3610 * bucket. 3611 */ 3612 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3613 if (!xbs.bucket) { 3614 mlog_errno(-ENOMEM); 3615 return -ENOMEM; 3616 } 3617 3618 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3619 if (had_lock < 0) { 3620 ret = had_lock; 3621 mlog_errno(ret); 3622 goto cleanup_nolock; 3623 } 3624 xis.inode_bh = xbs.inode_bh = di_bh; 3625 di = (struct ocfs2_dinode *)di_bh->b_data; 3626 3627 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3628 /* 3629 * Scan inode and external block to find the same name 3630 * extended attribute and collect search information. 3631 */ 3632 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3633 if (ret) 3634 goto cleanup; 3635 if (xis.not_found) { 3636 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3637 if (ret) 3638 goto cleanup; 3639 } 3640 3641 if (xis.not_found && xbs.not_found) { 3642 ret = -ENODATA; 3643 if (flags & XATTR_REPLACE) 3644 goto cleanup; 3645 ret = 0; 3646 if (!value) 3647 goto cleanup; 3648 } else { 3649 ret = -EEXIST; 3650 if (flags & XATTR_CREATE) 3651 goto cleanup; 3652 } 3653 3654 /* Check whether the value is refcounted and do some preparation. */ 3655 if (ocfs2_is_refcount_inode(inode) && 3656 (!xis.not_found || !xbs.not_found)) { 3657 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3658 &xis, &xbs, &ref_tree, 3659 &ref_meta, &ref_credits); 3660 if (ret) { 3661 mlog_errno(ret); 3662 goto cleanup; 3663 } 3664 } 3665 3666 inode_lock(tl_inode); 3667 3668 if (ocfs2_truncate_log_needs_flush(osb)) { 3669 ret = __ocfs2_flush_truncate_log(osb); 3670 if (ret < 0) { 3671 inode_unlock(tl_inode); 3672 mlog_errno(ret); 3673 goto cleanup; 3674 } 3675 } 3676 inode_unlock(tl_inode); 3677 3678 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3679 &xbs, &ctxt, ref_meta, &credits); 3680 if (ret) { 3681 mlog_errno(ret); 3682 goto cleanup; 3683 } 3684 3685 /* we need to update inode's ctime field, so add credit for it. */ 3686 credits += OCFS2_INODE_UPDATE_CREDITS; 3687 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3688 if (IS_ERR(ctxt.handle)) { 3689 ret = PTR_ERR(ctxt.handle); 3690 mlog_errno(ret); 3691 goto out_free_ac; 3692 } 3693 3694 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3695 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3696 3697 ocfs2_commit_trans(osb, ctxt.handle); 3698 3699 out_free_ac: 3700 if (ctxt.data_ac) 3701 ocfs2_free_alloc_context(ctxt.data_ac); 3702 if (ctxt.meta_ac) 3703 ocfs2_free_alloc_context(ctxt.meta_ac); 3704 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3705 ocfs2_schedule_truncate_log_flush(osb, 1); 3706 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3707 3708 cleanup: 3709 if (ref_tree) 3710 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3711 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3712 if (!value && !ret) { 3713 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3714 if (ret) 3715 mlog_errno(ret); 3716 } 3717 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3718 cleanup_nolock: 3719 brelse(di_bh); 3720 brelse(xbs.xattr_bh); 3721 ocfs2_xattr_bucket_free(xbs.bucket); 3722 3723 return ret; 3724 } 3725 3726 /* 3727 * Find the xattr extent rec which may contains name_hash. 3728 * e_cpos will be the first name hash of the xattr rec. 3729 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3730 */ 3731 static int ocfs2_xattr_get_rec(struct inode *inode, 3732 u32 name_hash, 3733 u64 *p_blkno, 3734 u32 *e_cpos, 3735 u32 *num_clusters, 3736 struct ocfs2_extent_list *el) 3737 { 3738 int ret = 0, i; 3739 struct buffer_head *eb_bh = NULL; 3740 struct ocfs2_extent_block *eb; 3741 struct ocfs2_extent_rec *rec = NULL; 3742 u64 e_blkno = 0; 3743 3744 if (el->l_tree_depth) { 3745 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3746 &eb_bh); 3747 if (ret) { 3748 mlog_errno(ret); 3749 goto out; 3750 } 3751 3752 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3753 el = &eb->h_list; 3754 3755 if (el->l_tree_depth) { 3756 ret = ocfs2_error(inode->i_sb, 3757 "Inode %llu has non zero tree depth in xattr tree block %llu\n", 3758 inode->i_ino, 3759 (unsigned long long)eb_bh->b_blocknr); 3760 goto out; 3761 } 3762 } 3763 3764 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3765 rec = &el->l_recs[i]; 3766 3767 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3768 e_blkno = le64_to_cpu(rec->e_blkno); 3769 break; 3770 } 3771 } 3772 3773 if (!e_blkno) { 3774 ret = ocfs2_error(inode->i_sb, "Inode %llu has bad extent record (%u, %u, 0) in xattr\n", 3775 inode->i_ino, 3776 le32_to_cpu(rec->e_cpos), 3777 ocfs2_rec_clusters(el, rec)); 3778 goto out; 3779 } 3780 3781 *p_blkno = le64_to_cpu(rec->e_blkno); 3782 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3783 if (e_cpos) 3784 *e_cpos = le32_to_cpu(rec->e_cpos); 3785 out: 3786 brelse(eb_bh); 3787 return ret; 3788 } 3789 3790 typedef int (xattr_bucket_func)(struct inode *inode, 3791 struct ocfs2_xattr_bucket *bucket, 3792 void *para); 3793 3794 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3795 struct ocfs2_xattr_bucket *bucket, 3796 int name_index, 3797 const char *name, 3798 u32 name_hash, 3799 u16 *xe_index, 3800 int *found) 3801 { 3802 int i, ret = 0, cmp = 1, block_off, new_offset; 3803 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3804 size_t name_len = strlen(name); 3805 struct ocfs2_xattr_entry *xe = NULL; 3806 char *xe_name; 3807 3808 /* 3809 * We don't use binary search in the bucket because there 3810 * may be multiple entries with the same name hash. 3811 */ 3812 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3813 xe = &xh->xh_entries[i]; 3814 3815 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3816 continue; 3817 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3818 break; 3819 3820 cmp = name_index - ocfs2_xattr_get_type(xe); 3821 if (!cmp) 3822 cmp = name_len - xe->xe_name_len; 3823 if (cmp) 3824 continue; 3825 3826 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3827 xh, 3828 i, 3829 &block_off, 3830 &new_offset); 3831 if (ret) { 3832 mlog_errno(ret); 3833 break; 3834 } 3835 3836 3837 xe_name = bucket_block(bucket, block_off) + new_offset; 3838 if (!memcmp(name, xe_name, name_len)) { 3839 *xe_index = i; 3840 *found = 1; 3841 ret = 0; 3842 break; 3843 } 3844 } 3845 3846 return ret; 3847 } 3848 3849 /* 3850 * Find the specified xattr entry in a series of buckets. 3851 * This series start from p_blkno and last for num_clusters. 3852 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3853 * the num of the valid buckets. 3854 * 3855 * Return the buffer_head this xattr should reside in. And if the xattr's 3856 * hash is in the gap of 2 buckets, return the lower bucket. 3857 */ 3858 static int ocfs2_xattr_bucket_find(struct inode *inode, 3859 int name_index, 3860 const char *name, 3861 u32 name_hash, 3862 u64 p_blkno, 3863 u32 first_hash, 3864 u32 num_clusters, 3865 struct ocfs2_xattr_search *xs) 3866 { 3867 int ret, found = 0; 3868 struct ocfs2_xattr_header *xh = NULL; 3869 struct ocfs2_xattr_entry *xe = NULL; 3870 u16 index = 0; 3871 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3872 int low_bucket = 0, bucket, high_bucket; 3873 struct ocfs2_xattr_bucket *search; 3874 u64 blkno, lower_blkno = 0; 3875 3876 search = ocfs2_xattr_bucket_new(inode); 3877 if (!search) { 3878 ret = -ENOMEM; 3879 mlog_errno(ret); 3880 goto out; 3881 } 3882 3883 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3884 if (ret) { 3885 mlog_errno(ret); 3886 goto out; 3887 } 3888 3889 xh = bucket_xh(search); 3890 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3891 while (low_bucket <= high_bucket) { 3892 ocfs2_xattr_bucket_relse(search); 3893 3894 bucket = (low_bucket + high_bucket) / 2; 3895 blkno = p_blkno + bucket * blk_per_bucket; 3896 ret = ocfs2_read_xattr_bucket(search, blkno); 3897 if (ret) { 3898 mlog_errno(ret); 3899 goto out; 3900 } 3901 3902 xh = bucket_xh(search); 3903 xe = &xh->xh_entries[0]; 3904 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3905 high_bucket = bucket - 1; 3906 continue; 3907 } 3908 3909 /* 3910 * Check whether the hash of the last entry in our 3911 * bucket is larger than the search one. for an empty 3912 * bucket, the last one is also the first one. 3913 */ 3914 if (xh->xh_count) 3915 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3916 3917 /* record lower_blkno which may be the insert place. */ 3918 lower_blkno = blkno; 3919 3920 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3921 low_bucket = bucket + 1; 3922 continue; 3923 } 3924 3925 /* the searched xattr should reside in this bucket if exists. */ 3926 ret = ocfs2_find_xe_in_bucket(inode, search, 3927 name_index, name, name_hash, 3928 &index, &found); 3929 if (ret) { 3930 mlog_errno(ret); 3931 goto out; 3932 } 3933 break; 3934 } 3935 3936 /* 3937 * Record the bucket we have found. 3938 * When the xattr's hash value is in the gap of 2 buckets, we will 3939 * always set it to the previous bucket. 3940 */ 3941 if (!lower_blkno) 3942 lower_blkno = p_blkno; 3943 3944 /* This should be in cache - we just read it during the search */ 3945 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3946 if (ret) { 3947 mlog_errno(ret); 3948 goto out; 3949 } 3950 3951 xs->header = bucket_xh(xs->bucket); 3952 xs->base = bucket_block(xs->bucket, 0); 3953 xs->end = xs->base + inode->i_sb->s_blocksize; 3954 3955 if (found) { 3956 xs->here = &xs->header->xh_entries[index]; 3957 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3958 name, name_index, name_hash, 3959 (unsigned long long)bucket_blkno(xs->bucket), 3960 index); 3961 } else 3962 ret = -ENODATA; 3963 3964 out: 3965 ocfs2_xattr_bucket_free(search); 3966 return ret; 3967 } 3968 3969 static int ocfs2_xattr_index_block_find(struct inode *inode, 3970 struct buffer_head *root_bh, 3971 int name_index, 3972 const char *name, 3973 struct ocfs2_xattr_search *xs) 3974 { 3975 int ret; 3976 struct ocfs2_xattr_block *xb = 3977 (struct ocfs2_xattr_block *)root_bh->b_data; 3978 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3979 struct ocfs2_extent_list *el = &xb_root->xt_list; 3980 u64 p_blkno = 0; 3981 u32 first_hash, num_clusters = 0; 3982 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3983 3984 if (le16_to_cpu(el->l_next_free_rec) == 0) 3985 return -ENODATA; 3986 3987 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3988 name, name_index, name_hash, 3989 (unsigned long long)root_bh->b_blocknr, 3990 -1); 3991 3992 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3993 &num_clusters, el); 3994 if (ret) { 3995 mlog_errno(ret); 3996 goto out; 3997 } 3998 3999 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 4000 4001 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 4002 name, name_index, first_hash, 4003 (unsigned long long)p_blkno, 4004 num_clusters); 4005 4006 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 4007 p_blkno, first_hash, num_clusters, xs); 4008 4009 out: 4010 return ret; 4011 } 4012 4013 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 4014 u64 blkno, 4015 u32 clusters, 4016 xattr_bucket_func *func, 4017 void *para) 4018 { 4019 int i, ret = 0; 4020 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 4021 u32 num_buckets = clusters * bpc; 4022 struct ocfs2_xattr_bucket *bucket; 4023 4024 bucket = ocfs2_xattr_bucket_new(inode); 4025 if (!bucket) { 4026 mlog_errno(-ENOMEM); 4027 return -ENOMEM; 4028 } 4029 4030 trace_ocfs2_iterate_xattr_buckets( 4031 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4032 (unsigned long long)blkno, clusters); 4033 4034 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 4035 ret = ocfs2_read_xattr_bucket(bucket, blkno); 4036 if (ret) { 4037 mlog_errno(ret); 4038 break; 4039 } 4040 4041 /* 4042 * The real bucket num in this series of blocks is stored 4043 * in the 1st bucket. 4044 */ 4045 if (i == 0) 4046 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 4047 4048 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 4049 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 4050 if (func) { 4051 ret = func(inode, bucket, para); 4052 if (ret && ret != -ERANGE) 4053 mlog_errno(ret); 4054 /* Fall through to bucket_relse() */ 4055 } 4056 4057 ocfs2_xattr_bucket_relse(bucket); 4058 if (ret) 4059 break; 4060 } 4061 4062 ocfs2_xattr_bucket_free(bucket); 4063 return ret; 4064 } 4065 4066 struct ocfs2_xattr_tree_list { 4067 char *buffer; 4068 size_t buffer_size; 4069 size_t result; 4070 }; 4071 4072 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4073 struct ocfs2_xattr_header *xh, 4074 int index, 4075 int *block_off, 4076 int *new_offset) 4077 { 4078 u16 name_offset; 4079 4080 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4081 return -EINVAL; 4082 4083 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4084 4085 *block_off = name_offset >> sb->s_blocksize_bits; 4086 *new_offset = name_offset % sb->s_blocksize; 4087 4088 return 0; 4089 } 4090 4091 static int ocfs2_list_xattr_bucket(struct inode *inode, 4092 struct ocfs2_xattr_bucket *bucket, 4093 void *para) 4094 { 4095 int ret = 0, type; 4096 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4097 int i, block_off, new_offset; 4098 const char *name; 4099 4100 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4101 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4102 type = ocfs2_xattr_get_type(entry); 4103 4104 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4105 bucket_xh(bucket), 4106 i, 4107 &block_off, 4108 &new_offset); 4109 if (ret) 4110 break; 4111 4112 name = (const char *)bucket_block(bucket, block_off) + 4113 new_offset; 4114 ret = ocfs2_xattr_list_entry(inode->i_sb, 4115 xl->buffer, 4116 xl->buffer_size, 4117 &xl->result, 4118 type, name, 4119 entry->xe_name_len); 4120 if (ret) 4121 break; 4122 } 4123 4124 return ret; 4125 } 4126 4127 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4128 struct buffer_head *blk_bh, 4129 xattr_tree_rec_func *rec_func, 4130 void *para) 4131 { 4132 struct ocfs2_xattr_block *xb = 4133 (struct ocfs2_xattr_block *)blk_bh->b_data; 4134 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4135 int ret = 0; 4136 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4137 u64 p_blkno = 0; 4138 4139 if (!el->l_next_free_rec || !rec_func) 4140 return 0; 4141 4142 while (name_hash > 0) { 4143 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4144 &e_cpos, &num_clusters, el); 4145 if (ret) { 4146 mlog_errno(ret); 4147 break; 4148 } 4149 4150 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4151 num_clusters, para); 4152 if (ret) { 4153 if (ret != -ERANGE) 4154 mlog_errno(ret); 4155 break; 4156 } 4157 4158 if (e_cpos == 0) 4159 break; 4160 4161 name_hash = e_cpos - 1; 4162 } 4163 4164 return ret; 4165 4166 } 4167 4168 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4169 struct buffer_head *root_bh, 4170 u64 blkno, u32 cpos, u32 len, void *para) 4171 { 4172 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4173 ocfs2_list_xattr_bucket, para); 4174 } 4175 4176 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4177 struct buffer_head *blk_bh, 4178 char *buffer, 4179 size_t buffer_size) 4180 { 4181 int ret; 4182 struct ocfs2_xattr_tree_list xl = { 4183 .buffer = buffer, 4184 .buffer_size = buffer_size, 4185 .result = 0, 4186 }; 4187 4188 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4189 ocfs2_list_xattr_tree_rec, &xl); 4190 if (ret) { 4191 mlog_errno(ret); 4192 goto out; 4193 } 4194 4195 ret = xl.result; 4196 out: 4197 return ret; 4198 } 4199 4200 static int cmp_xe(const void *a, const void *b) 4201 { 4202 const struct ocfs2_xattr_entry *l = a, *r = b; 4203 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4204 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4205 4206 if (l_hash > r_hash) 4207 return 1; 4208 if (l_hash < r_hash) 4209 return -1; 4210 return 0; 4211 } 4212 4213 /* 4214 * When the ocfs2_xattr_block is filled up, new bucket will be created 4215 * and all the xattr entries will be moved to the new bucket. 4216 * The header goes at the start of the bucket, and the names+values are 4217 * filled from the end. This is why *target starts as the last buffer. 4218 * Note: we need to sort the entries since they are not saved in order 4219 * in the ocfs2_xattr_block. 4220 */ 4221 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4222 struct buffer_head *xb_bh, 4223 struct ocfs2_xattr_bucket *bucket) 4224 { 4225 int i, blocksize = inode->i_sb->s_blocksize; 4226 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4227 u16 offset, size, off_change; 4228 struct ocfs2_xattr_entry *xe; 4229 struct ocfs2_xattr_block *xb = 4230 (struct ocfs2_xattr_block *)xb_bh->b_data; 4231 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4232 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4233 u16 count = le16_to_cpu(xb_xh->xh_count); 4234 char *src = xb_bh->b_data; 4235 char *target = bucket_block(bucket, blks - 1); 4236 4237 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4238 (unsigned long long)xb_bh->b_blocknr, 4239 (unsigned long long)bucket_blkno(bucket)); 4240 4241 for (i = 0; i < blks; i++) 4242 memset(bucket_block(bucket, i), 0, blocksize); 4243 4244 /* 4245 * Since the xe_name_offset is based on ocfs2_xattr_header, 4246 * there is a offset change corresponding to the change of 4247 * ocfs2_xattr_header's position. 4248 */ 4249 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4250 xe = &xb_xh->xh_entries[count - 1]; 4251 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4252 size = blocksize - offset; 4253 4254 /* copy all the names and values. */ 4255 memcpy(target + offset, src + offset, size); 4256 4257 /* Init new header now. */ 4258 xh->xh_count = xb_xh->xh_count; 4259 xh->xh_num_buckets = cpu_to_le16(1); 4260 xh->xh_name_value_len = cpu_to_le16(size); 4261 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4262 4263 /* copy all the entries. */ 4264 target = bucket_block(bucket, 0); 4265 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4266 size = count * sizeof(struct ocfs2_xattr_entry); 4267 memcpy(target + offset, (char *)xb_xh + offset, size); 4268 4269 /* Change the xe offset for all the xe because of the move. */ 4270 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4271 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4272 for (i = 0; i < count; i++) 4273 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4274 4275 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4276 4277 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4278 cmp_xe, NULL); 4279 } 4280 4281 /* 4282 * After we move xattr from block to index btree, we have to 4283 * update ocfs2_xattr_search to the new xe and base. 4284 * 4285 * When the entry is in xattr block, xattr_bh indicates the storage place. 4286 * While if the entry is in index b-tree, "bucket" indicates the 4287 * real place of the xattr. 4288 */ 4289 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4290 struct ocfs2_xattr_search *xs, 4291 struct buffer_head *old_bh) 4292 { 4293 char *buf = old_bh->b_data; 4294 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4295 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4296 int i; 4297 4298 xs->header = bucket_xh(xs->bucket); 4299 xs->base = bucket_block(xs->bucket, 0); 4300 xs->end = xs->base + inode->i_sb->s_blocksize; 4301 4302 if (xs->not_found) 4303 return; 4304 4305 i = xs->here - old_xh->xh_entries; 4306 xs->here = &xs->header->xh_entries[i]; 4307 } 4308 4309 static int ocfs2_xattr_create_index_block(struct inode *inode, 4310 struct ocfs2_xattr_search *xs, 4311 struct ocfs2_xattr_set_ctxt *ctxt) 4312 { 4313 int ret; 4314 u32 bit_off, len; 4315 u64 blkno; 4316 handle_t *handle = ctxt->handle; 4317 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4318 struct buffer_head *xb_bh = xs->xattr_bh; 4319 struct ocfs2_xattr_block *xb = 4320 (struct ocfs2_xattr_block *)xb_bh->b_data; 4321 struct ocfs2_xattr_tree_root *xr; 4322 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4323 4324 trace_ocfs2_xattr_create_index_block_begin( 4325 (unsigned long long)xb_bh->b_blocknr); 4326 4327 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4328 BUG_ON(!xs->bucket); 4329 4330 /* 4331 * XXX: 4332 * We can use this lock for now, and maybe move to a dedicated mutex 4333 * if performance becomes a problem later. 4334 */ 4335 down_write(&oi->ip_alloc_sem); 4336 4337 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4338 OCFS2_JOURNAL_ACCESS_WRITE); 4339 if (ret) { 4340 mlog_errno(ret); 4341 goto out; 4342 } 4343 4344 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4345 1, 1, &bit_off, &len); 4346 if (ret) { 4347 mlog_errno(ret); 4348 goto out; 4349 } 4350 4351 /* 4352 * The bucket may spread in many blocks, and 4353 * we will only touch the 1st block and the last block 4354 * in the whole bucket(one for entry and one for data). 4355 */ 4356 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4357 4358 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4359 4360 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4361 if (ret) { 4362 mlog_errno(ret); 4363 goto out; 4364 } 4365 4366 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4367 OCFS2_JOURNAL_ACCESS_CREATE); 4368 if (ret) { 4369 mlog_errno(ret); 4370 goto out; 4371 } 4372 4373 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4374 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4375 4376 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4377 4378 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4379 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4380 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4381 4382 xr = &xb->xb_attrs.xb_root; 4383 xr->xt_clusters = cpu_to_le32(1); 4384 xr->xt_last_eb_blk = 0; 4385 xr->xt_list.l_tree_depth = 0; 4386 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4387 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4388 4389 xr->xt_list.l_recs[0].e_cpos = 0; 4390 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4391 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4392 4393 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4394 4395 ocfs2_journal_dirty(handle, xb_bh); 4396 4397 out: 4398 up_write(&oi->ip_alloc_sem); 4399 4400 return ret; 4401 } 4402 4403 static int cmp_xe_offset(const void *a, const void *b) 4404 { 4405 const struct ocfs2_xattr_entry *l = a, *r = b; 4406 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4407 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4408 4409 if (l_name_offset < r_name_offset) 4410 return 1; 4411 if (l_name_offset > r_name_offset) 4412 return -1; 4413 return 0; 4414 } 4415 4416 /* 4417 * defrag a xattr bucket if we find that the bucket has some 4418 * holes between name/value pairs. 4419 * We will move all the name/value pairs to the end of the bucket 4420 * so that we can spare some space for insertion. 4421 */ 4422 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4423 handle_t *handle, 4424 struct ocfs2_xattr_bucket *bucket) 4425 { 4426 int ret, i; 4427 size_t end, offset, len; 4428 struct ocfs2_xattr_header *xh; 4429 char *entries, *buf, *bucket_buf = NULL; 4430 u64 blkno = bucket_blkno(bucket); 4431 u16 xh_free_start; 4432 size_t blocksize = inode->i_sb->s_blocksize; 4433 struct ocfs2_xattr_entry *xe; 4434 4435 /* 4436 * In order to make the operation more efficient and generic, 4437 * we copy all the blocks into a contiguous memory and do the 4438 * defragment there, so if anything is error, we will not touch 4439 * the real block. 4440 */ 4441 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4442 if (!bucket_buf) { 4443 ret = -EIO; 4444 goto out; 4445 } 4446 4447 buf = bucket_buf; 4448 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4449 memcpy(buf, bucket_block(bucket, i), blocksize); 4450 4451 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4452 OCFS2_JOURNAL_ACCESS_WRITE); 4453 if (ret < 0) { 4454 mlog_errno(ret); 4455 goto out; 4456 } 4457 4458 xh = (struct ocfs2_xattr_header *)bucket_buf; 4459 entries = (char *)xh->xh_entries; 4460 xh_free_start = le16_to_cpu(xh->xh_free_start); 4461 4462 trace_ocfs2_defrag_xattr_bucket( 4463 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4464 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4465 4466 /* 4467 * sort all the entries by their offset. 4468 * the largest will be the first, so that we can 4469 * move them to the end one by one. 4470 */ 4471 sort(entries, le16_to_cpu(xh->xh_count), 4472 sizeof(struct ocfs2_xattr_entry), 4473 cmp_xe_offset, NULL); 4474 4475 /* Move all name/values to the end of the bucket. */ 4476 xe = xh->xh_entries; 4477 end = OCFS2_XATTR_BUCKET_SIZE; 4478 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4479 offset = le16_to_cpu(xe->xe_name_offset); 4480 len = namevalue_size_xe(xe); 4481 4482 /* 4483 * We must make sure that the name/value pair 4484 * exist in the same block. So adjust end to 4485 * the previous block end if needed. 4486 */ 4487 if (((end - len) / blocksize != 4488 (end - 1) / blocksize)) 4489 end = end - end % blocksize; 4490 4491 if (end > offset + len) { 4492 memmove(bucket_buf + end - len, 4493 bucket_buf + offset, len); 4494 xe->xe_name_offset = cpu_to_le16(end - len); 4495 } 4496 4497 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4498 "bucket %llu\n", (unsigned long long)blkno); 4499 4500 end -= len; 4501 } 4502 4503 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4504 "bucket %llu\n", (unsigned long long)blkno); 4505 4506 if (xh_free_start == end) 4507 goto out; 4508 4509 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4510 xh->xh_free_start = cpu_to_le16(end); 4511 4512 /* sort the entries by their name_hash. */ 4513 sort(entries, le16_to_cpu(xh->xh_count), 4514 sizeof(struct ocfs2_xattr_entry), 4515 cmp_xe, NULL); 4516 4517 buf = bucket_buf; 4518 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4519 memcpy(bucket_block(bucket, i), buf, blocksize); 4520 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4521 4522 out: 4523 kfree(bucket_buf); 4524 return ret; 4525 } 4526 4527 /* 4528 * prev_blkno points to the start of an existing extent. new_blkno 4529 * points to a newly allocated extent. Because we know each of our 4530 * clusters contains more than bucket, we can easily split one cluster 4531 * at a bucket boundary. So we take the last cluster of the existing 4532 * extent and split it down the middle. We move the last half of the 4533 * buckets in the last cluster of the existing extent over to the new 4534 * extent. 4535 * 4536 * first_bh is the buffer at prev_blkno so we can update the existing 4537 * extent's bucket count. header_bh is the bucket were we were hoping 4538 * to insert our xattr. If the bucket move places the target in the new 4539 * extent, we'll update first_bh and header_bh after modifying the old 4540 * extent. 4541 * 4542 * first_hash will be set as the 1st xe's name_hash in the new extent. 4543 */ 4544 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4545 handle_t *handle, 4546 struct ocfs2_xattr_bucket *first, 4547 struct ocfs2_xattr_bucket *target, 4548 u64 new_blkno, 4549 u32 num_clusters, 4550 u32 *first_hash) 4551 { 4552 int ret; 4553 struct super_block *sb = inode->i_sb; 4554 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4555 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4556 int to_move = num_buckets / 2; 4557 u64 src_blkno; 4558 u64 last_cluster_blkno = bucket_blkno(first) + 4559 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4560 4561 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4562 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4563 4564 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4565 (unsigned long long)last_cluster_blkno, 4566 (unsigned long long)new_blkno); 4567 4568 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4569 last_cluster_blkno, new_blkno, 4570 to_move, first_hash); 4571 if (ret) { 4572 mlog_errno(ret); 4573 goto out; 4574 } 4575 4576 /* This is the first bucket that got moved */ 4577 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4578 4579 /* 4580 * If the target bucket was part of the moved buckets, we need to 4581 * update first and target. 4582 */ 4583 if (bucket_blkno(target) >= src_blkno) { 4584 /* Find the block for the new target bucket */ 4585 src_blkno = new_blkno + 4586 (bucket_blkno(target) - src_blkno); 4587 4588 ocfs2_xattr_bucket_relse(first); 4589 ocfs2_xattr_bucket_relse(target); 4590 4591 /* 4592 * These shouldn't fail - the buffers are in the 4593 * journal from ocfs2_cp_xattr_bucket(). 4594 */ 4595 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4596 if (ret) { 4597 mlog_errno(ret); 4598 goto out; 4599 } 4600 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4601 if (ret) 4602 mlog_errno(ret); 4603 4604 } 4605 4606 out: 4607 return ret; 4608 } 4609 4610 /* 4611 * Find the suitable pos when we divide a bucket into 2. 4612 * We have to make sure the xattrs with the same hash value exist 4613 * in the same bucket. 4614 * 4615 * If this ocfs2_xattr_header covers more than one hash value, find a 4616 * place where the hash value changes. Try to find the most even split. 4617 * The most common case is that all entries have different hash values, 4618 * and the first check we make will find a place to split. 4619 */ 4620 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4621 { 4622 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4623 int count = le16_to_cpu(xh->xh_count); 4624 int delta, middle = count / 2; 4625 4626 /* 4627 * We start at the middle. Each step gets farther away in both 4628 * directions. We therefore hit the change in hash value 4629 * nearest to the middle. Note that this loop does not execute for 4630 * count < 2. 4631 */ 4632 for (delta = 0; delta < middle; delta++) { 4633 /* Let's check delta earlier than middle */ 4634 if (cmp_xe(&entries[middle - delta - 1], 4635 &entries[middle - delta])) 4636 return middle - delta; 4637 4638 /* For even counts, don't walk off the end */ 4639 if ((middle + delta + 1) == count) 4640 continue; 4641 4642 /* Now try delta past middle */ 4643 if (cmp_xe(&entries[middle + delta], 4644 &entries[middle + delta + 1])) 4645 return middle + delta + 1; 4646 } 4647 4648 /* Every entry had the same hash */ 4649 return count; 4650 } 4651 4652 /* 4653 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4654 * first_hash will record the 1st hash of the new bucket. 4655 * 4656 * Normally half of the xattrs will be moved. But we have to make 4657 * sure that the xattrs with the same hash value are stored in the 4658 * same bucket. If all the xattrs in this bucket have the same hash 4659 * value, the new bucket will be initialized as an empty one and the 4660 * first_hash will be initialized as (hash_value+1). 4661 */ 4662 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4663 handle_t *handle, 4664 u64 blk, 4665 u64 new_blk, 4666 u32 *first_hash, 4667 int new_bucket_head) 4668 { 4669 int ret, i; 4670 int count, start, len, name_value_len = 0, name_offset = 0; 4671 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4672 struct ocfs2_xattr_header *xh; 4673 struct ocfs2_xattr_entry *xe; 4674 int blocksize = inode->i_sb->s_blocksize; 4675 4676 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4677 (unsigned long long)new_blk); 4678 4679 s_bucket = ocfs2_xattr_bucket_new(inode); 4680 t_bucket = ocfs2_xattr_bucket_new(inode); 4681 if (!s_bucket || !t_bucket) { 4682 ret = -ENOMEM; 4683 mlog_errno(ret); 4684 goto out; 4685 } 4686 4687 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4688 if (ret) { 4689 mlog_errno(ret); 4690 goto out; 4691 } 4692 4693 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4694 OCFS2_JOURNAL_ACCESS_WRITE); 4695 if (ret) { 4696 mlog_errno(ret); 4697 goto out; 4698 } 4699 4700 /* 4701 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4702 * there's no need to read it. 4703 */ 4704 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4705 if (ret) { 4706 mlog_errno(ret); 4707 goto out; 4708 } 4709 4710 /* 4711 * Hey, if we're overwriting t_bucket, what difference does 4712 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4713 * same part of ocfs2_cp_xattr_bucket(). 4714 */ 4715 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4716 new_bucket_head ? 4717 OCFS2_JOURNAL_ACCESS_CREATE : 4718 OCFS2_JOURNAL_ACCESS_WRITE); 4719 if (ret) { 4720 mlog_errno(ret); 4721 goto out; 4722 } 4723 4724 xh = bucket_xh(s_bucket); 4725 count = le16_to_cpu(xh->xh_count); 4726 start = ocfs2_xattr_find_divide_pos(xh); 4727 4728 if (start == count) { 4729 xe = &xh->xh_entries[start-1]; 4730 4731 /* 4732 * initialized a new empty bucket here. 4733 * The hash value is set as one larger than 4734 * that of the last entry in the previous bucket. 4735 */ 4736 for (i = 0; i < t_bucket->bu_blocks; i++) 4737 memset(bucket_block(t_bucket, i), 0, blocksize); 4738 4739 xh = bucket_xh(t_bucket); 4740 xh->xh_free_start = cpu_to_le16(blocksize); 4741 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4742 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4743 4744 goto set_num_buckets; 4745 } 4746 4747 /* copy the whole bucket to the new first. */ 4748 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4749 4750 /* update the new bucket. */ 4751 xh = bucket_xh(t_bucket); 4752 4753 /* 4754 * Calculate the total name/value len and xh_free_start for 4755 * the old bucket first. 4756 */ 4757 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4758 name_value_len = 0; 4759 for (i = 0; i < start; i++) { 4760 xe = &xh->xh_entries[i]; 4761 name_value_len += namevalue_size_xe(xe); 4762 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4763 name_offset = le16_to_cpu(xe->xe_name_offset); 4764 } 4765 4766 /* 4767 * Now begin the modification to the new bucket. 4768 * 4769 * In the new bucket, We just move the xattr entry to the beginning 4770 * and don't touch the name/value. So there will be some holes in the 4771 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4772 * called. 4773 */ 4774 xe = &xh->xh_entries[start]; 4775 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4776 trace_ocfs2_divide_xattr_bucket_move(len, 4777 (int)((char *)xe - (char *)xh), 4778 (int)((char *)xh->xh_entries - (char *)xh)); 4779 memmove((char *)xh->xh_entries, (char *)xe, len); 4780 xe = &xh->xh_entries[count - start]; 4781 len = sizeof(struct ocfs2_xattr_entry) * start; 4782 memset((char *)xe, 0, len); 4783 4784 le16_add_cpu(&xh->xh_count, -start); 4785 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4786 4787 /* Calculate xh_free_start for the new bucket. */ 4788 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4789 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4790 xe = &xh->xh_entries[i]; 4791 if (le16_to_cpu(xe->xe_name_offset) < 4792 le16_to_cpu(xh->xh_free_start)) 4793 xh->xh_free_start = xe->xe_name_offset; 4794 } 4795 4796 set_num_buckets: 4797 /* set xh->xh_num_buckets for the new xh. */ 4798 if (new_bucket_head) 4799 xh->xh_num_buckets = cpu_to_le16(1); 4800 else 4801 xh->xh_num_buckets = 0; 4802 4803 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4804 4805 /* store the first_hash of the new bucket. */ 4806 if (first_hash) 4807 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4808 4809 /* 4810 * Now only update the 1st block of the old bucket. If we 4811 * just added a new empty bucket, there is no need to modify 4812 * it. 4813 */ 4814 if (start == count) 4815 goto out; 4816 4817 xh = bucket_xh(s_bucket); 4818 memset(&xh->xh_entries[start], 0, 4819 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4820 xh->xh_count = cpu_to_le16(start); 4821 xh->xh_free_start = cpu_to_le16(name_offset); 4822 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4823 4824 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4825 4826 out: 4827 ocfs2_xattr_bucket_free(s_bucket); 4828 ocfs2_xattr_bucket_free(t_bucket); 4829 4830 return ret; 4831 } 4832 4833 /* 4834 * Copy xattr from one bucket to another bucket. 4835 * 4836 * The caller must make sure that the journal transaction 4837 * has enough space for journaling. 4838 */ 4839 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4840 handle_t *handle, 4841 u64 s_blkno, 4842 u64 t_blkno, 4843 int t_is_new) 4844 { 4845 int ret; 4846 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4847 4848 BUG_ON(s_blkno == t_blkno); 4849 4850 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4851 (unsigned long long)t_blkno, 4852 t_is_new); 4853 4854 s_bucket = ocfs2_xattr_bucket_new(inode); 4855 t_bucket = ocfs2_xattr_bucket_new(inode); 4856 if (!s_bucket || !t_bucket) { 4857 ret = -ENOMEM; 4858 mlog_errno(ret); 4859 goto out; 4860 } 4861 4862 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4863 if (ret) 4864 goto out; 4865 4866 /* 4867 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4868 * there's no need to read it. 4869 */ 4870 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4871 if (ret) 4872 goto out; 4873 4874 /* 4875 * Hey, if we're overwriting t_bucket, what difference does 4876 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4877 * cluster to fill, we came here from 4878 * ocfs2_mv_xattr_buckets(), and it is really new - 4879 * ACCESS_CREATE is required. But we also might have moved data 4880 * out of t_bucket before extending back into it. 4881 * ocfs2_add_new_xattr_bucket() can do this - its call to 4882 * ocfs2_add_new_xattr_cluster() may have created a new extent 4883 * and copied out the end of the old extent. Then it re-extends 4884 * the old extent back to create space for new xattrs. That's 4885 * how we get here, and the bucket isn't really new. 4886 */ 4887 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4888 t_is_new ? 4889 OCFS2_JOURNAL_ACCESS_CREATE : 4890 OCFS2_JOURNAL_ACCESS_WRITE); 4891 if (ret) 4892 goto out; 4893 4894 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4895 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4896 4897 out: 4898 ocfs2_xattr_bucket_free(t_bucket); 4899 ocfs2_xattr_bucket_free(s_bucket); 4900 4901 return ret; 4902 } 4903 4904 /* 4905 * src_blk points to the start of an existing extent. last_blk points to 4906 * last cluster in that extent. to_blk points to a newly allocated 4907 * extent. We copy the buckets from the cluster at last_blk to the new 4908 * extent. If start_bucket is non-zero, we skip that many buckets before 4909 * we start copying. The new extent's xh_num_buckets gets set to the 4910 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4911 * by the same amount. 4912 */ 4913 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4914 u64 src_blk, u64 last_blk, u64 to_blk, 4915 unsigned int start_bucket, 4916 u32 *first_hash) 4917 { 4918 int i, ret, credits; 4919 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4920 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4921 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4922 struct ocfs2_xattr_bucket *old_first, *new_first; 4923 4924 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4925 (unsigned long long)to_blk); 4926 4927 BUG_ON(start_bucket >= num_buckets); 4928 if (start_bucket) { 4929 num_buckets -= start_bucket; 4930 last_blk += (start_bucket * blks_per_bucket); 4931 } 4932 4933 /* The first bucket of the original extent */ 4934 old_first = ocfs2_xattr_bucket_new(inode); 4935 /* The first bucket of the new extent */ 4936 new_first = ocfs2_xattr_bucket_new(inode); 4937 if (!old_first || !new_first) { 4938 ret = -ENOMEM; 4939 mlog_errno(ret); 4940 goto out; 4941 } 4942 4943 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4944 if (ret) { 4945 mlog_errno(ret); 4946 goto out; 4947 } 4948 4949 /* 4950 * We need to update the first bucket of the old extent and all 4951 * the buckets going to the new extent. 4952 */ 4953 credits = ((num_buckets + 1) * blks_per_bucket); 4954 ret = ocfs2_extend_trans(handle, credits); 4955 if (ret) { 4956 mlog_errno(ret); 4957 goto out; 4958 } 4959 4960 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4961 OCFS2_JOURNAL_ACCESS_WRITE); 4962 if (ret) { 4963 mlog_errno(ret); 4964 goto out; 4965 } 4966 4967 for (i = 0; i < num_buckets; i++) { 4968 ret = ocfs2_cp_xattr_bucket(inode, handle, 4969 last_blk + (i * blks_per_bucket), 4970 to_blk + (i * blks_per_bucket), 4971 1); 4972 if (ret) { 4973 mlog_errno(ret); 4974 goto out; 4975 } 4976 } 4977 4978 /* 4979 * Get the new bucket ready before we dirty anything 4980 * (This actually shouldn't fail, because we already dirtied 4981 * it once in ocfs2_cp_xattr_bucket()). 4982 */ 4983 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4984 if (ret) { 4985 mlog_errno(ret); 4986 goto out; 4987 } 4988 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4989 OCFS2_JOURNAL_ACCESS_WRITE); 4990 if (ret) { 4991 mlog_errno(ret); 4992 goto out; 4993 } 4994 4995 /* Now update the headers */ 4996 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4997 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4998 4999 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 5000 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 5001 5002 if (first_hash) 5003 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 5004 5005 out: 5006 ocfs2_xattr_bucket_free(new_first); 5007 ocfs2_xattr_bucket_free(old_first); 5008 return ret; 5009 } 5010 5011 /* 5012 * Move some xattrs in this cluster to the new cluster. 5013 * This function should only be called when bucket size == cluster size. 5014 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 5015 */ 5016 static int ocfs2_divide_xattr_cluster(struct inode *inode, 5017 handle_t *handle, 5018 u64 prev_blk, 5019 u64 new_blk, 5020 u32 *first_hash) 5021 { 5022 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5023 int ret, credits = 2 * blk_per_bucket; 5024 5025 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 5026 5027 ret = ocfs2_extend_trans(handle, credits); 5028 if (ret) { 5029 mlog_errno(ret); 5030 return ret; 5031 } 5032 5033 /* Move half of the xattr in start_blk to the next bucket. */ 5034 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 5035 new_blk, first_hash, 1); 5036 } 5037 5038 /* 5039 * Move some xattrs from the old cluster to the new one since they are not 5040 * contiguous in ocfs2 xattr tree. 5041 * 5042 * new_blk starts a new separate cluster, and we will move some xattrs from 5043 * prev_blk to it. v_start will be set as the first name hash value in this 5044 * new cluster so that it can be used as e_cpos during tree insertion and 5045 * don't collide with our original b-tree operations. first_bh and header_bh 5046 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5047 * to extend the insert bucket. 5048 * 5049 * The problem is how much xattr should we move to the new one and when should 5050 * we update first_bh and header_bh? 5051 * 1. If cluster size > bucket size, that means the previous cluster has more 5052 * than 1 bucket, so just move half nums of bucket into the new cluster and 5053 * update the first_bh and header_bh if the insert bucket has been moved 5054 * to the new cluster. 5055 * 2. If cluster_size == bucket_size: 5056 * a) If the previous extent rec has more than one cluster and the insert 5057 * place isn't in the last cluster, copy the entire last cluster to the 5058 * new one. This time, we don't need to update the first_bh and header_bh 5059 * since they will not be moved into the new cluster. 5060 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5061 * the new one. And we set the extend flag to zero if the insert place is 5062 * moved into the new allocated cluster since no extend is needed. 5063 */ 5064 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5065 handle_t *handle, 5066 struct ocfs2_xattr_bucket *first, 5067 struct ocfs2_xattr_bucket *target, 5068 u64 new_blk, 5069 u32 prev_clusters, 5070 u32 *v_start, 5071 int *extend) 5072 { 5073 int ret; 5074 5075 trace_ocfs2_adjust_xattr_cross_cluster( 5076 (unsigned long long)bucket_blkno(first), 5077 (unsigned long long)new_blk, prev_clusters); 5078 5079 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5080 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5081 handle, 5082 first, target, 5083 new_blk, 5084 prev_clusters, 5085 v_start); 5086 if (ret) 5087 mlog_errno(ret); 5088 } else { 5089 /* The start of the last cluster in the first extent */ 5090 u64 last_blk = bucket_blkno(first) + 5091 ((prev_clusters - 1) * 5092 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5093 5094 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5095 ret = ocfs2_mv_xattr_buckets(inode, handle, 5096 bucket_blkno(first), 5097 last_blk, new_blk, 0, 5098 v_start); 5099 if (ret) 5100 mlog_errno(ret); 5101 } else { 5102 ret = ocfs2_divide_xattr_cluster(inode, handle, 5103 last_blk, new_blk, 5104 v_start); 5105 if (ret) 5106 mlog_errno(ret); 5107 5108 if ((bucket_blkno(target) == last_blk) && extend) 5109 *extend = 0; 5110 } 5111 } 5112 5113 return ret; 5114 } 5115 5116 /* 5117 * Add a new cluster for xattr storage. 5118 * 5119 * If the new cluster is contiguous with the previous one, it will be 5120 * appended to the same extent record, and num_clusters will be updated. 5121 * If not, we will insert a new extent for it and move some xattrs in 5122 * the last cluster into the new allocated one. 5123 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5124 * lose the benefits of hashing because we'll have to search large leaves. 5125 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5126 * if it's bigger). 5127 * 5128 * first_bh is the first block of the previous extent rec and header_bh 5129 * indicates the bucket we will insert the new xattrs. They will be updated 5130 * when the header_bh is moved into the new cluster. 5131 */ 5132 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5133 struct buffer_head *root_bh, 5134 struct ocfs2_xattr_bucket *first, 5135 struct ocfs2_xattr_bucket *target, 5136 u32 *num_clusters, 5137 u32 prev_cpos, 5138 int *extend, 5139 struct ocfs2_xattr_set_ctxt *ctxt) 5140 { 5141 int ret; 5142 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5143 u32 prev_clusters = *num_clusters; 5144 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5145 u64 block; 5146 handle_t *handle = ctxt->handle; 5147 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5148 struct ocfs2_extent_tree et; 5149 5150 trace_ocfs2_add_new_xattr_cluster_begin( 5151 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5152 (unsigned long long)bucket_blkno(first), 5153 prev_cpos, prev_clusters); 5154 5155 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5156 5157 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5158 OCFS2_JOURNAL_ACCESS_WRITE); 5159 if (ret < 0) { 5160 mlog_errno(ret); 5161 goto leave; 5162 } 5163 5164 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5165 clusters_to_add, &bit_off, &num_bits); 5166 if (ret < 0) { 5167 if (ret != -ENOSPC) 5168 mlog_errno(ret); 5169 goto leave; 5170 } 5171 5172 BUG_ON(num_bits > clusters_to_add); 5173 5174 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5175 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5176 5177 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5178 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5179 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5180 /* 5181 * If this cluster is contiguous with the old one and 5182 * adding this new cluster, we don't surpass the limit of 5183 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5184 * initialized and used like other buckets in the previous 5185 * cluster. 5186 * So add it as a contiguous one. The caller will handle 5187 * its init process. 5188 */ 5189 v_start = prev_cpos + prev_clusters; 5190 *num_clusters = prev_clusters + num_bits; 5191 } else { 5192 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5193 handle, 5194 first, 5195 target, 5196 block, 5197 prev_clusters, 5198 &v_start, 5199 extend); 5200 if (ret) { 5201 mlog_errno(ret); 5202 goto leave; 5203 } 5204 } 5205 5206 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5207 v_start, num_bits); 5208 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5209 num_bits, 0, ctxt->meta_ac); 5210 if (ret < 0) { 5211 mlog_errno(ret); 5212 goto leave; 5213 } 5214 5215 ocfs2_journal_dirty(handle, root_bh); 5216 5217 leave: 5218 return ret; 5219 } 5220 5221 /* 5222 * We are given an extent. 'first' is the bucket at the very front of 5223 * the extent. The extent has space for an additional bucket past 5224 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5225 * of the target bucket. We wish to shift every bucket past the target 5226 * down one, filling in that additional space. When we get back to the 5227 * target, we split the target between itself and the now-empty bucket 5228 * at target+1 (aka, target_blkno + blks_per_bucket). 5229 */ 5230 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5231 handle_t *handle, 5232 struct ocfs2_xattr_bucket *first, 5233 u64 target_blk, 5234 u32 num_clusters) 5235 { 5236 int ret, credits; 5237 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5238 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5239 u64 end_blk; 5240 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5241 5242 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5243 (unsigned long long)bucket_blkno(first), 5244 num_clusters, new_bucket); 5245 5246 /* The extent must have room for an additional bucket */ 5247 BUG_ON(new_bucket >= 5248 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5249 5250 /* end_blk points to the last existing bucket */ 5251 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5252 5253 /* 5254 * end_blk is the start of the last existing bucket. 5255 * Thus, (end_blk - target_blk) covers the target bucket and 5256 * every bucket after it up to, but not including, the last 5257 * existing bucket. Then we add the last existing bucket, the 5258 * new bucket, and the first bucket (3 * blk_per_bucket). 5259 */ 5260 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5261 ret = ocfs2_extend_trans(handle, credits); 5262 if (ret) { 5263 mlog_errno(ret); 5264 goto out; 5265 } 5266 5267 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5268 OCFS2_JOURNAL_ACCESS_WRITE); 5269 if (ret) { 5270 mlog_errno(ret); 5271 goto out; 5272 } 5273 5274 while (end_blk != target_blk) { 5275 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5276 end_blk + blk_per_bucket, 0); 5277 if (ret) 5278 goto out; 5279 end_blk -= blk_per_bucket; 5280 } 5281 5282 /* Move half of the xattr in target_blkno to the next bucket. */ 5283 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5284 target_blk + blk_per_bucket, NULL, 0); 5285 5286 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5287 ocfs2_xattr_bucket_journal_dirty(handle, first); 5288 5289 out: 5290 return ret; 5291 } 5292 5293 /* 5294 * Add new xattr bucket in an extent record and adjust the buckets 5295 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5296 * bucket we want to insert into. 5297 * 5298 * In the easy case, we will move all the buckets after target down by 5299 * one. Half of target's xattrs will be moved to the next bucket. 5300 * 5301 * If current cluster is full, we'll allocate a new one. This may not 5302 * be contiguous. The underlying calls will make sure that there is 5303 * space for the insert, shifting buckets around if necessary. 5304 * 'target' may be moved by those calls. 5305 */ 5306 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5307 struct buffer_head *xb_bh, 5308 struct ocfs2_xattr_bucket *target, 5309 struct ocfs2_xattr_set_ctxt *ctxt) 5310 { 5311 struct ocfs2_xattr_block *xb = 5312 (struct ocfs2_xattr_block *)xb_bh->b_data; 5313 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5314 struct ocfs2_extent_list *el = &xb_root->xt_list; 5315 u32 name_hash = 5316 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5317 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5318 int ret, num_buckets, extend = 1; 5319 u64 p_blkno; 5320 u32 e_cpos, num_clusters; 5321 /* The bucket at the front of the extent */ 5322 struct ocfs2_xattr_bucket *first; 5323 5324 trace_ocfs2_add_new_xattr_bucket( 5325 (unsigned long long)bucket_blkno(target)); 5326 5327 /* The first bucket of the original extent */ 5328 first = ocfs2_xattr_bucket_new(inode); 5329 if (!first) { 5330 ret = -ENOMEM; 5331 mlog_errno(ret); 5332 goto out; 5333 } 5334 5335 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5336 &num_clusters, el); 5337 if (ret) { 5338 mlog_errno(ret); 5339 goto out; 5340 } 5341 5342 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5343 if (ret) { 5344 mlog_errno(ret); 5345 goto out; 5346 } 5347 5348 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5349 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5350 /* 5351 * This can move first+target if the target bucket moves 5352 * to the new extent. 5353 */ 5354 ret = ocfs2_add_new_xattr_cluster(inode, 5355 xb_bh, 5356 first, 5357 target, 5358 &num_clusters, 5359 e_cpos, 5360 &extend, 5361 ctxt); 5362 if (ret) { 5363 mlog_errno(ret); 5364 goto out; 5365 } 5366 } 5367 5368 if (extend) { 5369 ret = ocfs2_extend_xattr_bucket(inode, 5370 ctxt->handle, 5371 first, 5372 bucket_blkno(target), 5373 num_clusters); 5374 if (ret) 5375 mlog_errno(ret); 5376 } 5377 5378 out: 5379 ocfs2_xattr_bucket_free(first); 5380 5381 return ret; 5382 } 5383 5384 /* 5385 * Truncate the specified xe_off entry in xattr bucket. 5386 * bucket is indicated by header_bh and len is the new length. 5387 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5388 * 5389 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5390 */ 5391 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5392 struct ocfs2_xattr_bucket *bucket, 5393 int xe_off, 5394 int len, 5395 struct ocfs2_xattr_set_ctxt *ctxt) 5396 { 5397 int ret, offset; 5398 u64 value_blk; 5399 struct ocfs2_xattr_entry *xe; 5400 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5401 size_t blocksize = inode->i_sb->s_blocksize; 5402 struct ocfs2_xattr_value_buf vb = { 5403 .vb_access = ocfs2_journal_access, 5404 }; 5405 5406 xe = &xh->xh_entries[xe_off]; 5407 5408 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5409 5410 offset = le16_to_cpu(xe->xe_name_offset) + 5411 OCFS2_XATTR_SIZE(xe->xe_name_len); 5412 5413 value_blk = offset / blocksize; 5414 5415 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5416 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5417 5418 vb.vb_bh = bucket->bu_bhs[value_blk]; 5419 BUG_ON(!vb.vb_bh); 5420 5421 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5422 (vb.vb_bh->b_data + offset % blocksize); 5423 5424 /* 5425 * From here on out we have to dirty the bucket. The generic 5426 * value calls only modify one of the bucket's bhs, but we need 5427 * to send the bucket at once. So if they error, they *could* have 5428 * modified something. We have to assume they did, and dirty 5429 * the whole bucket. This leaves us in a consistent state. 5430 */ 5431 trace_ocfs2_xattr_bucket_value_truncate( 5432 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5433 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5434 if (ret) { 5435 mlog_errno(ret); 5436 goto out; 5437 } 5438 5439 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5440 OCFS2_JOURNAL_ACCESS_WRITE); 5441 if (ret) { 5442 mlog_errno(ret); 5443 goto out; 5444 } 5445 5446 xe->xe_value_size = cpu_to_le64(len); 5447 5448 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5449 5450 out: 5451 return ret; 5452 } 5453 5454 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5455 struct buffer_head *root_bh, 5456 u64 blkno, 5457 u32 cpos, 5458 u32 len, 5459 void *para) 5460 { 5461 int ret; 5462 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5463 struct inode *tl_inode = osb->osb_tl_inode; 5464 handle_t *handle; 5465 struct ocfs2_xattr_block *xb = 5466 (struct ocfs2_xattr_block *)root_bh->b_data; 5467 struct ocfs2_alloc_context *meta_ac = NULL; 5468 struct ocfs2_cached_dealloc_ctxt dealloc; 5469 struct ocfs2_extent_tree et; 5470 5471 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5472 ocfs2_delete_xattr_in_bucket, para); 5473 if (ret) { 5474 mlog_errno(ret); 5475 return ret; 5476 } 5477 5478 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5479 5480 ocfs2_init_dealloc_ctxt(&dealloc); 5481 5482 trace_ocfs2_rm_xattr_cluster( 5483 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5484 (unsigned long long)blkno, cpos, len); 5485 5486 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5487 len); 5488 5489 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5490 if (ret) { 5491 mlog_errno(ret); 5492 return ret; 5493 } 5494 5495 inode_lock(tl_inode); 5496 5497 if (ocfs2_truncate_log_needs_flush(osb)) { 5498 ret = __ocfs2_flush_truncate_log(osb); 5499 if (ret < 0) { 5500 mlog_errno(ret); 5501 goto out; 5502 } 5503 } 5504 5505 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5506 if (IS_ERR(handle)) { 5507 ret = -ENOMEM; 5508 mlog_errno(ret); 5509 goto out; 5510 } 5511 5512 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5513 OCFS2_JOURNAL_ACCESS_WRITE); 5514 if (ret) { 5515 mlog_errno(ret); 5516 goto out_commit; 5517 } 5518 5519 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5520 &dealloc); 5521 if (ret) { 5522 mlog_errno(ret); 5523 goto out_commit; 5524 } 5525 5526 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5527 ocfs2_journal_dirty(handle, root_bh); 5528 5529 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5530 if (ret) 5531 mlog_errno(ret); 5532 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5533 5534 out_commit: 5535 ocfs2_commit_trans(osb, handle); 5536 out: 5537 ocfs2_schedule_truncate_log_flush(osb, 1); 5538 5539 inode_unlock(tl_inode); 5540 5541 if (meta_ac) 5542 ocfs2_free_alloc_context(meta_ac); 5543 5544 ocfs2_run_deallocs(osb, &dealloc); 5545 5546 return ret; 5547 } 5548 5549 /* 5550 * check whether the xattr bucket is filled up with the same hash value. 5551 * If we want to insert the xattr with the same hash, return -ENOSPC. 5552 * If we want to insert a xattr with different hash value, go ahead 5553 * and ocfs2_divide_xattr_bucket will handle this. 5554 */ 5555 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5556 struct ocfs2_xattr_bucket *bucket, 5557 const char *name) 5558 { 5559 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5560 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5561 5562 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5563 return 0; 5564 5565 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5566 xh->xh_entries[0].xe_name_hash) { 5567 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5568 "hash = %u\n", 5569 (unsigned long long)bucket_blkno(bucket), 5570 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5571 return -ENOSPC; 5572 } 5573 5574 return 0; 5575 } 5576 5577 /* 5578 * Try to set the entry in the current bucket. If we fail, the caller 5579 * will handle getting us another bucket. 5580 */ 5581 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5582 struct ocfs2_xattr_info *xi, 5583 struct ocfs2_xattr_search *xs, 5584 struct ocfs2_xattr_set_ctxt *ctxt) 5585 { 5586 int ret; 5587 struct ocfs2_xa_loc loc; 5588 5589 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5590 5591 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5592 xs->not_found ? NULL : xs->here); 5593 ret = ocfs2_xa_set(&loc, xi, ctxt); 5594 if (!ret) { 5595 xs->here = loc.xl_entry; 5596 goto out; 5597 } 5598 if (ret != -ENOSPC) { 5599 mlog_errno(ret); 5600 goto out; 5601 } 5602 5603 /* Ok, we need space. Let's try defragmenting the bucket. */ 5604 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5605 xs->bucket); 5606 if (ret) { 5607 mlog_errno(ret); 5608 goto out; 5609 } 5610 5611 ret = ocfs2_xa_set(&loc, xi, ctxt); 5612 if (!ret) { 5613 xs->here = loc.xl_entry; 5614 goto out; 5615 } 5616 if (ret != -ENOSPC) 5617 mlog_errno(ret); 5618 5619 5620 out: 5621 return ret; 5622 } 5623 5624 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5625 struct ocfs2_xattr_info *xi, 5626 struct ocfs2_xattr_search *xs, 5627 struct ocfs2_xattr_set_ctxt *ctxt) 5628 { 5629 int ret; 5630 5631 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5632 5633 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5634 if (!ret) 5635 goto out; 5636 if (ret != -ENOSPC) { 5637 mlog_errno(ret); 5638 goto out; 5639 } 5640 5641 /* Ack, need more space. Let's try to get another bucket! */ 5642 5643 /* 5644 * We do not allow for overlapping ranges between buckets. And 5645 * the maximum number of collisions we will allow for then is 5646 * one bucket's worth, so check it here whether we need to 5647 * add a new bucket for the insert. 5648 */ 5649 ret = ocfs2_check_xattr_bucket_collision(inode, 5650 xs->bucket, 5651 xi->xi_name); 5652 if (ret) { 5653 mlog_errno(ret); 5654 goto out; 5655 } 5656 5657 ret = ocfs2_add_new_xattr_bucket(inode, 5658 xs->xattr_bh, 5659 xs->bucket, 5660 ctxt); 5661 if (ret) { 5662 mlog_errno(ret); 5663 goto out; 5664 } 5665 5666 /* 5667 * ocfs2_add_new_xattr_bucket() will have updated 5668 * xs->bucket if it moved, but it will not have updated 5669 * any of the other search fields. Thus, we drop it and 5670 * re-search. Everything should be cached, so it'll be 5671 * quick. 5672 */ 5673 ocfs2_xattr_bucket_relse(xs->bucket); 5674 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5675 xi->xi_name_index, 5676 xi->xi_name, xs); 5677 if (ret && ret != -ENODATA) 5678 goto out; 5679 xs->not_found = ret; 5680 5681 /* Ok, we have a new bucket, let's try again */ 5682 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5683 if (ret && (ret != -ENOSPC)) 5684 mlog_errno(ret); 5685 5686 out: 5687 return ret; 5688 } 5689 5690 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5691 struct ocfs2_xattr_bucket *bucket, 5692 void *para) 5693 { 5694 int ret = 0, ref_credits; 5695 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5696 u16 i; 5697 struct ocfs2_xattr_entry *xe; 5698 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5699 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5700 int credits = ocfs2_remove_extent_credits(osb->sb) + 5701 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5702 struct ocfs2_xattr_value_root *xv; 5703 struct ocfs2_rm_xattr_bucket_para *args = 5704 (struct ocfs2_rm_xattr_bucket_para *)para; 5705 5706 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5707 5708 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5709 xe = &xh->xh_entries[i]; 5710 if (ocfs2_xattr_is_local(xe)) 5711 continue; 5712 5713 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5714 i, &xv, NULL); 5715 if (ret) { 5716 mlog_errno(ret); 5717 break; 5718 } 5719 5720 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5721 args->ref_ci, 5722 args->ref_root_bh, 5723 &ctxt.meta_ac, 5724 &ref_credits); 5725 5726 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5727 if (IS_ERR(ctxt.handle)) { 5728 ret = PTR_ERR(ctxt.handle); 5729 mlog_errno(ret); 5730 break; 5731 } 5732 5733 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5734 i, 0, &ctxt); 5735 5736 ocfs2_commit_trans(osb, ctxt.handle); 5737 if (ctxt.meta_ac) { 5738 ocfs2_free_alloc_context(ctxt.meta_ac); 5739 ctxt.meta_ac = NULL; 5740 } 5741 if (ret) { 5742 mlog_errno(ret); 5743 break; 5744 } 5745 } 5746 5747 if (ctxt.meta_ac) 5748 ocfs2_free_alloc_context(ctxt.meta_ac); 5749 ocfs2_schedule_truncate_log_flush(osb, 1); 5750 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5751 return ret; 5752 } 5753 5754 /* 5755 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5756 * or change the extent record flag), we need to recalculate 5757 * the metaecc for the whole bucket. So it is done here. 5758 * 5759 * Note: 5760 * We have to give the extra credits for the caller. 5761 */ 5762 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5763 handle_t *handle, 5764 void *para) 5765 { 5766 int ret; 5767 struct ocfs2_xattr_bucket *bucket = 5768 (struct ocfs2_xattr_bucket *)para; 5769 5770 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5771 OCFS2_JOURNAL_ACCESS_WRITE); 5772 if (ret) { 5773 mlog_errno(ret); 5774 return ret; 5775 } 5776 5777 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5778 5779 return 0; 5780 } 5781 5782 /* 5783 * Special action we need if the xattr value is refcounted. 5784 * 5785 * 1. If the xattr is refcounted, lock the tree. 5786 * 2. CoW the xattr if we are setting the new value and the value 5787 * will be stored outside. 5788 * 3. In other case, decrease_refcount will work for us, so just 5789 * lock the refcount tree, calculate the meta and credits is OK. 5790 * 5791 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5792 * currently CoW is a completed transaction, while this function 5793 * will also lock the allocators and let us deadlock. So we will 5794 * CoW the whole xattr value. 5795 */ 5796 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5797 struct ocfs2_dinode *di, 5798 struct ocfs2_xattr_info *xi, 5799 struct ocfs2_xattr_search *xis, 5800 struct ocfs2_xattr_search *xbs, 5801 struct ocfs2_refcount_tree **ref_tree, 5802 int *meta_add, 5803 int *credits) 5804 { 5805 int ret = 0; 5806 struct ocfs2_xattr_block *xb; 5807 struct ocfs2_xattr_entry *xe; 5808 char *base; 5809 u32 p_cluster, num_clusters; 5810 unsigned int ext_flags; 5811 int name_offset, name_len; 5812 struct ocfs2_xattr_value_buf vb; 5813 struct ocfs2_xattr_bucket *bucket = NULL; 5814 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5815 struct ocfs2_post_refcount refcount; 5816 struct ocfs2_post_refcount *p = NULL; 5817 struct buffer_head *ref_root_bh = NULL; 5818 5819 if (!xis->not_found) { 5820 xe = xis->here; 5821 name_offset = le16_to_cpu(xe->xe_name_offset); 5822 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5823 base = xis->base; 5824 vb.vb_bh = xis->inode_bh; 5825 vb.vb_access = ocfs2_journal_access_di; 5826 } else { 5827 int i, block_off = 0; 5828 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5829 xe = xbs->here; 5830 name_offset = le16_to_cpu(xe->xe_name_offset); 5831 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5832 i = xbs->here - xbs->header->xh_entries; 5833 5834 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5835 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5836 bucket_xh(xbs->bucket), 5837 i, &block_off, 5838 &name_offset); 5839 if (ret) { 5840 mlog_errno(ret); 5841 goto out; 5842 } 5843 base = bucket_block(xbs->bucket, block_off); 5844 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5845 vb.vb_access = ocfs2_journal_access; 5846 5847 if (ocfs2_meta_ecc(osb)) { 5848 /*create parameters for ocfs2_post_refcount. */ 5849 bucket = xbs->bucket; 5850 refcount.credits = bucket->bu_blocks; 5851 refcount.para = bucket; 5852 refcount.func = 5853 ocfs2_xattr_bucket_post_refcount; 5854 p = &refcount; 5855 } 5856 } else { 5857 base = xbs->base; 5858 vb.vb_bh = xbs->xattr_bh; 5859 vb.vb_access = ocfs2_journal_access_xb; 5860 } 5861 } 5862 5863 if (ocfs2_xattr_is_local(xe)) 5864 goto out; 5865 5866 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5867 (base + name_offset + name_len); 5868 5869 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5870 &num_clusters, &vb.vb_xv->xr_list, 5871 &ext_flags); 5872 if (ret) { 5873 mlog_errno(ret); 5874 goto out; 5875 } 5876 5877 /* 5878 * We just need to check the 1st extent record, since we always 5879 * CoW the whole xattr. So there shouldn't be a xattr with 5880 * some REFCOUNT extent recs after the 1st one. 5881 */ 5882 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5883 goto out; 5884 5885 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5886 1, ref_tree, &ref_root_bh); 5887 if (ret) { 5888 mlog_errno(ret); 5889 goto out; 5890 } 5891 5892 /* 5893 * If we are deleting the xattr or the new size will be stored inside, 5894 * cool, leave it there, the xattr truncate process will remove them 5895 * for us(it still needs the refcount tree lock and the meta, credits). 5896 * And the worse case is that every cluster truncate will split the 5897 * refcount tree, and make the original extent become 3. So we will need 5898 * 2 * cluster more extent recs at most. 5899 */ 5900 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5901 5902 ret = ocfs2_refcounted_xattr_delete_need(inode, 5903 &(*ref_tree)->rf_ci, 5904 ref_root_bh, vb.vb_xv, 5905 meta_add, credits); 5906 if (ret) 5907 mlog_errno(ret); 5908 goto out; 5909 } 5910 5911 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5912 *ref_tree, ref_root_bh, 0, 5913 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5914 if (ret) 5915 mlog_errno(ret); 5916 5917 out: 5918 brelse(ref_root_bh); 5919 return ret; 5920 } 5921 5922 /* 5923 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5924 * The physical clusters will be added to refcount tree. 5925 */ 5926 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5927 struct ocfs2_xattr_value_root *xv, 5928 struct ocfs2_extent_tree *value_et, 5929 struct ocfs2_caching_info *ref_ci, 5930 struct buffer_head *ref_root_bh, 5931 struct ocfs2_cached_dealloc_ctxt *dealloc, 5932 struct ocfs2_post_refcount *refcount) 5933 { 5934 int ret = 0; 5935 u32 clusters = le32_to_cpu(xv->xr_clusters); 5936 u32 cpos, p_cluster, num_clusters; 5937 struct ocfs2_extent_list *el = &xv->xr_list; 5938 unsigned int ext_flags; 5939 5940 cpos = 0; 5941 while (cpos < clusters) { 5942 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5943 &num_clusters, el, &ext_flags); 5944 if (ret) { 5945 mlog_errno(ret); 5946 break; 5947 } 5948 5949 cpos += num_clusters; 5950 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5951 continue; 5952 5953 BUG_ON(!p_cluster); 5954 5955 ret = ocfs2_add_refcount_flag(inode, value_et, 5956 ref_ci, ref_root_bh, 5957 cpos - num_clusters, 5958 p_cluster, num_clusters, 5959 dealloc, refcount); 5960 if (ret) { 5961 mlog_errno(ret); 5962 break; 5963 } 5964 } 5965 5966 return ret; 5967 } 5968 5969 /* 5970 * Given a normal ocfs2_xattr_header, refcount all the entries which 5971 * have value stored outside. 5972 * Used for xattrs stored in inode and ocfs2_xattr_block. 5973 */ 5974 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5975 struct ocfs2_xattr_value_buf *vb, 5976 struct ocfs2_xattr_header *header, 5977 struct ocfs2_caching_info *ref_ci, 5978 struct buffer_head *ref_root_bh, 5979 struct ocfs2_cached_dealloc_ctxt *dealloc) 5980 { 5981 5982 struct ocfs2_xattr_entry *xe; 5983 struct ocfs2_xattr_value_root *xv; 5984 struct ocfs2_extent_tree et; 5985 int i, ret = 0; 5986 5987 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5988 xe = &header->xh_entries[i]; 5989 5990 if (ocfs2_xattr_is_local(xe)) 5991 continue; 5992 5993 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5994 le16_to_cpu(xe->xe_name_offset) + 5995 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5996 5997 vb->vb_xv = xv; 5998 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5999 6000 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 6001 ref_ci, ref_root_bh, 6002 dealloc, NULL); 6003 if (ret) { 6004 mlog_errno(ret); 6005 break; 6006 } 6007 } 6008 6009 return ret; 6010 } 6011 6012 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 6013 struct buffer_head *fe_bh, 6014 struct ocfs2_caching_info *ref_ci, 6015 struct buffer_head *ref_root_bh, 6016 struct ocfs2_cached_dealloc_ctxt *dealloc) 6017 { 6018 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6019 struct ocfs2_xattr_header *header; 6020 int ret; 6021 struct ocfs2_xattr_value_buf vb = { 6022 .vb_bh = fe_bh, 6023 .vb_access = ocfs2_journal_access_di, 6024 }; 6025 6026 ret = ocfs2_xattr_ibody_lookup_header(inode, di, &header); 6027 if (ret) 6028 return ret; 6029 6030 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6031 ref_ci, ref_root_bh, dealloc); 6032 } 6033 6034 struct ocfs2_xattr_tree_value_refcount_para { 6035 struct ocfs2_caching_info *ref_ci; 6036 struct buffer_head *ref_root_bh; 6037 struct ocfs2_cached_dealloc_ctxt *dealloc; 6038 }; 6039 6040 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 6041 struct ocfs2_xattr_bucket *bucket, 6042 int offset, 6043 struct ocfs2_xattr_value_root **xv, 6044 struct buffer_head **bh) 6045 { 6046 int ret, block_off, name_offset; 6047 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6048 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6049 void *base; 6050 6051 ret = ocfs2_xattr_bucket_get_name_value(sb, 6052 bucket_xh(bucket), 6053 offset, 6054 &block_off, 6055 &name_offset); 6056 if (ret) { 6057 mlog_errno(ret); 6058 goto out; 6059 } 6060 6061 base = bucket_block(bucket, block_off); 6062 6063 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6064 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6065 6066 if (bh) 6067 *bh = bucket->bu_bhs[block_off]; 6068 out: 6069 return ret; 6070 } 6071 6072 /* 6073 * For a given xattr bucket, refcount all the entries which 6074 * have value stored outside. 6075 */ 6076 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6077 struct ocfs2_xattr_bucket *bucket, 6078 void *para) 6079 { 6080 int i, ret = 0; 6081 struct ocfs2_extent_tree et; 6082 struct ocfs2_xattr_tree_value_refcount_para *ref = 6083 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6084 struct ocfs2_xattr_header *xh = 6085 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6086 struct ocfs2_xattr_entry *xe; 6087 struct ocfs2_xattr_value_buf vb = { 6088 .vb_access = ocfs2_journal_access, 6089 }; 6090 struct ocfs2_post_refcount refcount = { 6091 .credits = bucket->bu_blocks, 6092 .para = bucket, 6093 .func = ocfs2_xattr_bucket_post_refcount, 6094 }; 6095 struct ocfs2_post_refcount *p = NULL; 6096 6097 /* We only need post_refcount if we support metaecc. */ 6098 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6099 p = &refcount; 6100 6101 trace_ocfs2_xattr_bucket_value_refcount( 6102 (unsigned long long)bucket_blkno(bucket), 6103 le16_to_cpu(xh->xh_count)); 6104 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6105 xe = &xh->xh_entries[i]; 6106 6107 if (ocfs2_xattr_is_local(xe)) 6108 continue; 6109 6110 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6111 &vb.vb_xv, &vb.vb_bh); 6112 if (ret) { 6113 mlog_errno(ret); 6114 break; 6115 } 6116 6117 ocfs2_init_xattr_value_extent_tree(&et, 6118 INODE_CACHE(inode), &vb); 6119 6120 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6121 &et, ref->ref_ci, 6122 ref->ref_root_bh, 6123 ref->dealloc, p); 6124 if (ret) { 6125 mlog_errno(ret); 6126 break; 6127 } 6128 } 6129 6130 return ret; 6131 6132 } 6133 6134 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6135 struct buffer_head *root_bh, 6136 u64 blkno, u32 cpos, u32 len, void *para) 6137 { 6138 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6139 ocfs2_xattr_bucket_value_refcount, 6140 para); 6141 } 6142 6143 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6144 struct buffer_head *blk_bh, 6145 struct ocfs2_caching_info *ref_ci, 6146 struct buffer_head *ref_root_bh, 6147 struct ocfs2_cached_dealloc_ctxt *dealloc) 6148 { 6149 int ret = 0; 6150 struct ocfs2_xattr_block *xb = 6151 (struct ocfs2_xattr_block *)blk_bh->b_data; 6152 6153 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6154 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6155 struct ocfs2_xattr_value_buf vb = { 6156 .vb_bh = blk_bh, 6157 .vb_access = ocfs2_journal_access_xb, 6158 }; 6159 6160 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6161 ref_ci, ref_root_bh, 6162 dealloc); 6163 } else { 6164 struct ocfs2_xattr_tree_value_refcount_para para = { 6165 .ref_ci = ref_ci, 6166 .ref_root_bh = ref_root_bh, 6167 .dealloc = dealloc, 6168 }; 6169 6170 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6171 ocfs2_refcount_xattr_tree_rec, 6172 ¶); 6173 } 6174 6175 return ret; 6176 } 6177 6178 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6179 struct buffer_head *fe_bh, 6180 struct ocfs2_caching_info *ref_ci, 6181 struct buffer_head *ref_root_bh, 6182 struct ocfs2_cached_dealloc_ctxt *dealloc) 6183 { 6184 int ret = 0; 6185 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6186 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6187 struct buffer_head *blk_bh = NULL; 6188 6189 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6190 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6191 ref_ci, ref_root_bh, 6192 dealloc); 6193 if (ret) { 6194 mlog_errno(ret); 6195 goto out; 6196 } 6197 } 6198 6199 if (!di->i_xattr_loc) 6200 goto out; 6201 6202 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6203 &blk_bh); 6204 if (ret < 0) { 6205 mlog_errno(ret); 6206 goto out; 6207 } 6208 6209 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6210 ref_root_bh, dealloc); 6211 if (ret) 6212 mlog_errno(ret); 6213 6214 brelse(blk_bh); 6215 out: 6216 6217 return ret; 6218 } 6219 6220 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6221 /* 6222 * Store the information we need in xattr reflink. 6223 * old_bh and new_bh are inode bh for the old and new inode. 6224 */ 6225 struct ocfs2_xattr_reflink { 6226 struct inode *old_inode; 6227 struct inode *new_inode; 6228 struct buffer_head *old_bh; 6229 struct buffer_head *new_bh; 6230 struct ocfs2_caching_info *ref_ci; 6231 struct buffer_head *ref_root_bh; 6232 struct ocfs2_cached_dealloc_ctxt *dealloc; 6233 should_xattr_reflinked *xattr_reflinked; 6234 }; 6235 6236 /* 6237 * Given a xattr header and xe offset, 6238 * return the proper xv and the corresponding bh. 6239 * xattr in inode, block and xattr tree have different implementations. 6240 */ 6241 typedef int (get_xattr_value_root)(struct super_block *sb, 6242 struct buffer_head *bh, 6243 struct ocfs2_xattr_header *xh, 6244 int offset, 6245 struct ocfs2_xattr_value_root **xv, 6246 struct buffer_head **ret_bh, 6247 void *para); 6248 6249 /* 6250 * Calculate all the xattr value root metadata stored in this xattr header and 6251 * credits we need if we create them from the scratch. 6252 * We use get_xattr_value_root so that all types of xattr container can use it. 6253 */ 6254 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6255 struct buffer_head *bh, 6256 struct ocfs2_xattr_header *xh, 6257 int *metas, int *credits, 6258 int *num_recs, 6259 get_xattr_value_root *func, 6260 void *para) 6261 { 6262 int i, ret = 0; 6263 struct ocfs2_xattr_value_root *xv; 6264 struct ocfs2_xattr_entry *xe; 6265 6266 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6267 xe = &xh->xh_entries[i]; 6268 if (ocfs2_xattr_is_local(xe)) 6269 continue; 6270 6271 ret = func(sb, bh, xh, i, &xv, NULL, para); 6272 if (ret) { 6273 mlog_errno(ret); 6274 break; 6275 } 6276 6277 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6278 le16_to_cpu(xv->xr_list.l_next_free_rec); 6279 6280 *credits += ocfs2_calc_extend_credits(sb, 6281 &def_xv.xv.xr_list); 6282 6283 /* 6284 * If the value is a tree with depth > 1, We don't go deep 6285 * to the extent block, so just calculate a maximum record num. 6286 */ 6287 if (!xv->xr_list.l_tree_depth) 6288 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6289 else 6290 *num_recs += ocfs2_clusters_for_bytes(sb, 6291 XATTR_SIZE_MAX); 6292 } 6293 6294 return ret; 6295 } 6296 6297 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6298 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6299 struct buffer_head *bh, 6300 struct ocfs2_xattr_header *xh, 6301 int offset, 6302 struct ocfs2_xattr_value_root **xv, 6303 struct buffer_head **ret_bh, 6304 void *para) 6305 { 6306 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6307 6308 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6309 le16_to_cpu(xe->xe_name_offset) + 6310 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6311 6312 if (ret_bh) 6313 *ret_bh = bh; 6314 6315 return 0; 6316 } 6317 6318 /* 6319 * Lock the meta_ac and calculate how much credits we need for reflink xattrs. 6320 * It is only used for inline xattr and xattr block. 6321 */ 6322 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6323 struct ocfs2_xattr_header *xh, 6324 struct buffer_head *ref_root_bh, 6325 int *credits, 6326 struct ocfs2_alloc_context **meta_ac) 6327 { 6328 int ret, meta_add = 0, num_recs = 0; 6329 struct ocfs2_refcount_block *rb = 6330 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6331 6332 *credits = 0; 6333 6334 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6335 &meta_add, credits, &num_recs, 6336 ocfs2_get_xattr_value_root, 6337 NULL); 6338 if (ret) { 6339 mlog_errno(ret); 6340 goto out; 6341 } 6342 6343 /* 6344 * We need to add/modify num_recs in refcount tree, so just calculate 6345 * an approximate number we need for refcount tree change. 6346 * Sometimes we need to split the tree, and after split, half recs 6347 * will be moved to the new block, and a new block can only provide 6348 * half number of recs. So we multiple new blocks by 2. 6349 */ 6350 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6351 meta_add += num_recs; 6352 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6353 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6354 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6355 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6356 else 6357 *credits += 1; 6358 6359 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6360 if (ret) 6361 mlog_errno(ret); 6362 6363 out: 6364 return ret; 6365 } 6366 6367 /* 6368 * Given a xattr header, reflink all the xattrs in this container. 6369 * It can be used for inode, block and bucket. 6370 * 6371 * NOTE: 6372 * Before we call this function, the caller has memcpy the xattr in 6373 * old_xh to the new_xh. 6374 * 6375 * If args.xattr_reflinked is set, call it to decide whether the xe should 6376 * be reflinked or not. If not, remove it from the new xattr header. 6377 */ 6378 static int ocfs2_reflink_xattr_header(handle_t *handle, 6379 struct ocfs2_xattr_reflink *args, 6380 struct buffer_head *old_bh, 6381 struct ocfs2_xattr_header *xh, 6382 struct buffer_head *new_bh, 6383 struct ocfs2_xattr_header *new_xh, 6384 struct ocfs2_xattr_value_buf *vb, 6385 struct ocfs2_alloc_context *meta_ac, 6386 get_xattr_value_root *func, 6387 void *para) 6388 { 6389 int ret = 0, i, j; 6390 struct super_block *sb = args->old_inode->i_sb; 6391 struct buffer_head *value_bh; 6392 struct ocfs2_xattr_entry *xe, *last; 6393 struct ocfs2_xattr_value_root *xv, *new_xv; 6394 struct ocfs2_extent_tree data_et; 6395 u32 clusters, cpos, p_cluster, num_clusters; 6396 unsigned int ext_flags = 0; 6397 6398 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6399 le16_to_cpu(xh->xh_count)); 6400 6401 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1; 6402 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6403 xe = &xh->xh_entries[i]; 6404 6405 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6406 xe = &new_xh->xh_entries[j]; 6407 6408 le16_add_cpu(&new_xh->xh_count, -1); 6409 if (new_xh->xh_count) { 6410 memmove(xe, xe + 1, 6411 (void *)last - (void *)xe); 6412 memset(last, 0, 6413 sizeof(struct ocfs2_xattr_entry)); 6414 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1; 6415 } else { 6416 memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); 6417 last = NULL; 6418 } 6419 6420 /* 6421 * We don't want j to increase in the next round since 6422 * it is already moved ahead. 6423 */ 6424 j--; 6425 continue; 6426 } 6427 6428 if (ocfs2_xattr_is_local(xe)) 6429 continue; 6430 6431 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6432 if (ret) { 6433 mlog_errno(ret); 6434 break; 6435 } 6436 6437 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6438 if (ret) { 6439 mlog_errno(ret); 6440 break; 6441 } 6442 6443 /* 6444 * For the xattr which has l_tree_depth = 0, all the extent 6445 * recs have already be copied to the new xh with the 6446 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6447 * increase the refount count int the refcount tree. 6448 * 6449 * For the xattr which has l_tree_depth > 0, we need 6450 * to initialize it to the empty default value root, 6451 * and then insert the extents one by one. 6452 */ 6453 if (xv->xr_list.l_tree_depth) { 6454 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6455 vb->vb_xv = new_xv; 6456 vb->vb_bh = value_bh; 6457 ocfs2_init_xattr_value_extent_tree(&data_et, 6458 INODE_CACHE(args->new_inode), vb); 6459 } 6460 6461 clusters = le32_to_cpu(xv->xr_clusters); 6462 cpos = 0; 6463 while (cpos < clusters) { 6464 ret = ocfs2_xattr_get_clusters(args->old_inode, 6465 cpos, 6466 &p_cluster, 6467 &num_clusters, 6468 &xv->xr_list, 6469 &ext_flags); 6470 if (ret) { 6471 mlog_errno(ret); 6472 goto out; 6473 } 6474 6475 BUG_ON(!p_cluster); 6476 6477 if (xv->xr_list.l_tree_depth) { 6478 ret = ocfs2_insert_extent(handle, 6479 &data_et, cpos, 6480 ocfs2_clusters_to_blocks( 6481 args->old_inode->i_sb, 6482 p_cluster), 6483 num_clusters, ext_flags, 6484 meta_ac); 6485 if (ret) { 6486 mlog_errno(ret); 6487 goto out; 6488 } 6489 } 6490 6491 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6492 args->ref_root_bh, 6493 p_cluster, num_clusters, 6494 meta_ac, args->dealloc); 6495 if (ret) { 6496 mlog_errno(ret); 6497 goto out; 6498 } 6499 6500 cpos += num_clusters; 6501 } 6502 } 6503 6504 out: 6505 return ret; 6506 } 6507 6508 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6509 { 6510 int ret = 0, credits = 0; 6511 handle_t *handle; 6512 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6513 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6514 int inline_size; 6515 int header_off; 6516 struct ocfs2_xattr_header *xh; 6517 struct ocfs2_xattr_header *new_xh; 6518 struct ocfs2_alloc_context *meta_ac = NULL; 6519 struct ocfs2_inode_info *new_oi; 6520 struct ocfs2_dinode *new_di; 6521 struct ocfs2_xattr_value_buf vb = { 6522 .vb_bh = args->new_bh, 6523 .vb_access = ocfs2_journal_access_di, 6524 }; 6525 6526 ret = ocfs2_xattr_ibody_lookup_header(args->old_inode, di, &xh); 6527 if (ret) 6528 goto out; 6529 6530 inline_size = le16_to_cpu(di->i_xattr_inline_size); 6531 header_off = osb->sb->s_blocksize - inline_size; 6532 new_xh = (struct ocfs2_xattr_header *) 6533 (args->new_bh->b_data + header_off); 6534 6535 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6536 &credits, &meta_ac); 6537 if (ret) { 6538 mlog_errno(ret); 6539 goto out; 6540 } 6541 6542 handle = ocfs2_start_trans(osb, credits); 6543 if (IS_ERR(handle)) { 6544 ret = PTR_ERR(handle); 6545 mlog_errno(ret); 6546 goto out; 6547 } 6548 6549 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6550 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6551 if (ret) { 6552 mlog_errno(ret); 6553 goto out_commit; 6554 } 6555 6556 memcpy(args->new_bh->b_data + header_off, 6557 args->old_bh->b_data + header_off, inline_size); 6558 6559 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6560 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6561 6562 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6563 args->new_bh, new_xh, &vb, meta_ac, 6564 ocfs2_get_xattr_value_root, NULL); 6565 if (ret) { 6566 mlog_errno(ret); 6567 goto out_commit; 6568 } 6569 6570 new_oi = OCFS2_I(args->new_inode); 6571 6572 spin_lock(&new_oi->ip_lock); 6573 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6574 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6575 spin_unlock(&new_oi->ip_lock); 6576 6577 ocfs2_journal_dirty(handle, args->new_bh); 6578 6579 out_commit: 6580 ocfs2_commit_trans(osb, handle); 6581 6582 out: 6583 if (meta_ac) 6584 ocfs2_free_alloc_context(meta_ac); 6585 return ret; 6586 } 6587 6588 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6589 struct buffer_head *fe_bh, 6590 struct buffer_head **ret_bh, 6591 int indexed) 6592 { 6593 int ret; 6594 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6595 struct ocfs2_xattr_set_ctxt ctxt; 6596 6597 memset(&ctxt, 0, sizeof(ctxt)); 6598 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6599 if (ret < 0) { 6600 mlog_errno(ret); 6601 return ret; 6602 } 6603 6604 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6605 if (IS_ERR(ctxt.handle)) { 6606 ret = PTR_ERR(ctxt.handle); 6607 mlog_errno(ret); 6608 goto out; 6609 } 6610 6611 trace_ocfs2_create_empty_xattr_block( 6612 (unsigned long long)fe_bh->b_blocknr, indexed); 6613 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6614 ret_bh); 6615 if (ret) 6616 mlog_errno(ret); 6617 6618 ocfs2_commit_trans(osb, ctxt.handle); 6619 out: 6620 ocfs2_free_alloc_context(ctxt.meta_ac); 6621 return ret; 6622 } 6623 6624 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6625 struct buffer_head *blk_bh, 6626 struct buffer_head *new_blk_bh) 6627 { 6628 int ret = 0, credits = 0; 6629 handle_t *handle; 6630 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6631 struct ocfs2_dinode *new_di; 6632 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6633 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6634 struct ocfs2_xattr_block *xb = 6635 (struct ocfs2_xattr_block *)blk_bh->b_data; 6636 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6637 struct ocfs2_xattr_block *new_xb = 6638 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6639 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6640 struct ocfs2_alloc_context *meta_ac; 6641 struct ocfs2_xattr_value_buf vb = { 6642 .vb_bh = new_blk_bh, 6643 .vb_access = ocfs2_journal_access_xb, 6644 }; 6645 6646 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6647 &credits, &meta_ac); 6648 if (ret) { 6649 mlog_errno(ret); 6650 return ret; 6651 } 6652 6653 /* One more credits in case we need to add xattr flags in new inode. */ 6654 handle = ocfs2_start_trans(osb, credits + 1); 6655 if (IS_ERR(handle)) { 6656 ret = PTR_ERR(handle); 6657 mlog_errno(ret); 6658 goto out; 6659 } 6660 6661 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6662 ret = ocfs2_journal_access_di(handle, 6663 INODE_CACHE(args->new_inode), 6664 args->new_bh, 6665 OCFS2_JOURNAL_ACCESS_WRITE); 6666 if (ret) { 6667 mlog_errno(ret); 6668 goto out_commit; 6669 } 6670 } 6671 6672 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6673 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6674 if (ret) { 6675 mlog_errno(ret); 6676 goto out_commit; 6677 } 6678 6679 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6680 osb->sb->s_blocksize - header_off); 6681 6682 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6683 new_blk_bh, new_xh, &vb, meta_ac, 6684 ocfs2_get_xattr_value_root, NULL); 6685 if (ret) { 6686 mlog_errno(ret); 6687 goto out_commit; 6688 } 6689 6690 ocfs2_journal_dirty(handle, new_blk_bh); 6691 6692 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6693 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6694 spin_lock(&new_oi->ip_lock); 6695 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6696 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6697 spin_unlock(&new_oi->ip_lock); 6698 6699 ocfs2_journal_dirty(handle, args->new_bh); 6700 } 6701 6702 out_commit: 6703 ocfs2_commit_trans(osb, handle); 6704 6705 out: 6706 ocfs2_free_alloc_context(meta_ac); 6707 return ret; 6708 } 6709 6710 struct ocfs2_reflink_xattr_tree_args { 6711 struct ocfs2_xattr_reflink *reflink; 6712 struct buffer_head *old_blk_bh; 6713 struct buffer_head *new_blk_bh; 6714 struct ocfs2_xattr_bucket *old_bucket; 6715 struct ocfs2_xattr_bucket *new_bucket; 6716 }; 6717 6718 /* 6719 * NOTE: 6720 * We have to handle the case that both old bucket and new bucket 6721 * will call this function to get the right ret_bh. 6722 * So The caller must give us the right bh. 6723 */ 6724 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6725 struct buffer_head *bh, 6726 struct ocfs2_xattr_header *xh, 6727 int offset, 6728 struct ocfs2_xattr_value_root **xv, 6729 struct buffer_head **ret_bh, 6730 void *para) 6731 { 6732 struct ocfs2_reflink_xattr_tree_args *args = 6733 (struct ocfs2_reflink_xattr_tree_args *)para; 6734 struct ocfs2_xattr_bucket *bucket; 6735 6736 if (bh == args->old_bucket->bu_bhs[0]) 6737 bucket = args->old_bucket; 6738 else 6739 bucket = args->new_bucket; 6740 6741 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6742 xv, ret_bh); 6743 } 6744 6745 struct ocfs2_value_tree_metas { 6746 int num_metas; 6747 int credits; 6748 int num_recs; 6749 }; 6750 6751 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6752 struct buffer_head *bh, 6753 struct ocfs2_xattr_header *xh, 6754 int offset, 6755 struct ocfs2_xattr_value_root **xv, 6756 struct buffer_head **ret_bh, 6757 void *para) 6758 { 6759 struct ocfs2_xattr_bucket *bucket = 6760 (struct ocfs2_xattr_bucket *)para; 6761 6762 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6763 xv, ret_bh); 6764 } 6765 6766 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6767 struct ocfs2_xattr_bucket *bucket, 6768 void *para) 6769 { 6770 struct ocfs2_value_tree_metas *metas = 6771 (struct ocfs2_value_tree_metas *)para; 6772 struct ocfs2_xattr_header *xh = 6773 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6774 6775 /* Add the credits for this bucket first. */ 6776 metas->credits += bucket->bu_blocks; 6777 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6778 xh, &metas->num_metas, 6779 &metas->credits, &metas->num_recs, 6780 ocfs2_value_tree_metas_in_bucket, 6781 bucket); 6782 } 6783 6784 /* 6785 * Given a xattr extent rec starting from blkno and having len clusters, 6786 * iterate all the buckets calculate how much metadata we need for reflinking 6787 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6788 */ 6789 static int ocfs2_lock_reflink_xattr_rec_allocators( 6790 struct ocfs2_reflink_xattr_tree_args *args, 6791 struct ocfs2_extent_tree *xt_et, 6792 u64 blkno, u32 len, int *credits, 6793 struct ocfs2_alloc_context **meta_ac, 6794 struct ocfs2_alloc_context **data_ac) 6795 { 6796 int ret, num_free_extents; 6797 struct ocfs2_value_tree_metas metas; 6798 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6799 struct ocfs2_refcount_block *rb; 6800 6801 memset(&metas, 0, sizeof(metas)); 6802 6803 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6804 ocfs2_calc_value_tree_metas, &metas); 6805 if (ret) { 6806 mlog_errno(ret); 6807 goto out; 6808 } 6809 6810 *credits = metas.credits; 6811 6812 /* 6813 * Calculate we need for refcount tree change. 6814 * 6815 * We need to add/modify num_recs in refcount tree, so just calculate 6816 * an approximate number we need for refcount tree change. 6817 * Sometimes we need to split the tree, and after split, half recs 6818 * will be moved to the new block, and a new block can only provide 6819 * half number of recs. So we multiple new blocks by 2. 6820 * In the end, we have to add credits for modifying the already 6821 * existed refcount block. 6822 */ 6823 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6824 metas.num_recs = 6825 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6826 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6827 metas.num_metas += metas.num_recs; 6828 *credits += metas.num_recs + 6829 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6830 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6831 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6832 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6833 else 6834 *credits += 1; 6835 6836 /* count in the xattr tree change. */ 6837 num_free_extents = ocfs2_num_free_extents(xt_et); 6838 if (num_free_extents < 0) { 6839 ret = num_free_extents; 6840 mlog_errno(ret); 6841 goto out; 6842 } 6843 6844 if (num_free_extents < len) 6845 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6846 6847 *credits += ocfs2_calc_extend_credits(osb->sb, 6848 xt_et->et_root_el); 6849 6850 if (metas.num_metas) { 6851 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6852 meta_ac); 6853 if (ret) { 6854 mlog_errno(ret); 6855 goto out; 6856 } 6857 } 6858 6859 if (len) { 6860 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6861 if (ret) 6862 mlog_errno(ret); 6863 } 6864 out: 6865 if (ret) { 6866 if (*meta_ac) { 6867 ocfs2_free_alloc_context(*meta_ac); 6868 *meta_ac = NULL; 6869 } 6870 } 6871 6872 return ret; 6873 } 6874 6875 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6876 u64 blkno, u64 new_blkno, u32 clusters, 6877 u32 *cpos, int num_buckets, 6878 struct ocfs2_alloc_context *meta_ac, 6879 struct ocfs2_alloc_context *data_ac, 6880 struct ocfs2_reflink_xattr_tree_args *args) 6881 { 6882 int i, j, ret = 0; 6883 struct super_block *sb = args->reflink->old_inode->i_sb; 6884 int bpb = args->old_bucket->bu_blocks; 6885 struct ocfs2_xattr_value_buf vb = { 6886 .vb_access = ocfs2_journal_access, 6887 }; 6888 6889 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6890 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6891 if (ret) { 6892 mlog_errno(ret); 6893 break; 6894 } 6895 6896 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6897 if (ret) { 6898 mlog_errno(ret); 6899 break; 6900 } 6901 6902 ret = ocfs2_xattr_bucket_journal_access(handle, 6903 args->new_bucket, 6904 OCFS2_JOURNAL_ACCESS_CREATE); 6905 if (ret) { 6906 mlog_errno(ret); 6907 break; 6908 } 6909 6910 for (j = 0; j < bpb; j++) 6911 memcpy(bucket_block(args->new_bucket, j), 6912 bucket_block(args->old_bucket, j), 6913 sb->s_blocksize); 6914 6915 /* 6916 * Record the start cpos so that we can use it to initialize 6917 * our xattr tree we also set the xh_num_bucket for the new 6918 * bucket. 6919 */ 6920 if (i == 0) { 6921 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6922 xh_entries[0].xe_name_hash); 6923 bucket_xh(args->new_bucket)->xh_num_buckets = 6924 cpu_to_le16(num_buckets); 6925 } 6926 6927 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6928 6929 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6930 args->old_bucket->bu_bhs[0], 6931 bucket_xh(args->old_bucket), 6932 args->new_bucket->bu_bhs[0], 6933 bucket_xh(args->new_bucket), 6934 &vb, meta_ac, 6935 ocfs2_get_reflink_xattr_value_root, 6936 args); 6937 if (ret) { 6938 mlog_errno(ret); 6939 break; 6940 } 6941 6942 /* 6943 * Re-access and dirty the bucket to calculate metaecc. 6944 * Because we may extend the transaction in reflink_xattr_header 6945 * which will let the already accessed block gone. 6946 */ 6947 ret = ocfs2_xattr_bucket_journal_access(handle, 6948 args->new_bucket, 6949 OCFS2_JOURNAL_ACCESS_WRITE); 6950 if (ret) { 6951 mlog_errno(ret); 6952 break; 6953 } 6954 6955 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6956 6957 ocfs2_xattr_bucket_relse(args->old_bucket); 6958 ocfs2_xattr_bucket_relse(args->new_bucket); 6959 } 6960 6961 ocfs2_xattr_bucket_relse(args->old_bucket); 6962 ocfs2_xattr_bucket_relse(args->new_bucket); 6963 return ret; 6964 } 6965 6966 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6967 struct inode *inode, 6968 struct ocfs2_reflink_xattr_tree_args *args, 6969 struct ocfs2_extent_tree *et, 6970 struct ocfs2_alloc_context *meta_ac, 6971 struct ocfs2_alloc_context *data_ac, 6972 u64 blkno, u32 cpos, u32 len) 6973 { 6974 int ret, first_inserted = 0; 6975 u32 p_cluster, num_clusters, reflink_cpos = 0; 6976 u64 new_blkno; 6977 unsigned int num_buckets, reflink_buckets; 6978 unsigned int bpc = 6979 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6980 6981 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6982 if (ret) { 6983 mlog_errno(ret); 6984 goto out; 6985 } 6986 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6987 ocfs2_xattr_bucket_relse(args->old_bucket); 6988 6989 while (len && num_buckets) { 6990 ret = ocfs2_claim_clusters(handle, data_ac, 6991 1, &p_cluster, &num_clusters); 6992 if (ret) { 6993 mlog_errno(ret); 6994 goto out; 6995 } 6996 6997 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6998 reflink_buckets = min(num_buckets, bpc * num_clusters); 6999 7000 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 7001 new_blkno, num_clusters, 7002 &reflink_cpos, reflink_buckets, 7003 meta_ac, data_ac, args); 7004 if (ret) { 7005 mlog_errno(ret); 7006 goto out; 7007 } 7008 7009 /* 7010 * For the 1st allocated cluster, we make it use the same cpos 7011 * so that the xattr tree looks the same as the original one 7012 * in the most case. 7013 */ 7014 if (!first_inserted) { 7015 reflink_cpos = cpos; 7016 first_inserted = 1; 7017 } 7018 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 7019 num_clusters, 0, meta_ac); 7020 if (ret) 7021 mlog_errno(ret); 7022 7023 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 7024 num_clusters, reflink_cpos); 7025 7026 len -= num_clusters; 7027 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 7028 num_buckets -= reflink_buckets; 7029 } 7030 out: 7031 return ret; 7032 } 7033 7034 /* 7035 * Create the same xattr extent record in the new inode's xattr tree. 7036 */ 7037 static int ocfs2_reflink_xattr_rec(struct inode *inode, 7038 struct buffer_head *root_bh, 7039 u64 blkno, 7040 u32 cpos, 7041 u32 len, 7042 void *para) 7043 { 7044 int ret, credits = 0; 7045 handle_t *handle; 7046 struct ocfs2_reflink_xattr_tree_args *args = 7047 (struct ocfs2_reflink_xattr_tree_args *)para; 7048 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7049 struct ocfs2_alloc_context *meta_ac = NULL; 7050 struct ocfs2_alloc_context *data_ac = NULL; 7051 struct ocfs2_extent_tree et; 7052 7053 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7054 7055 ocfs2_init_xattr_tree_extent_tree(&et, 7056 INODE_CACHE(args->reflink->new_inode), 7057 args->new_blk_bh); 7058 7059 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7060 len, &credits, 7061 &meta_ac, &data_ac); 7062 if (ret) { 7063 mlog_errno(ret); 7064 goto out; 7065 } 7066 7067 handle = ocfs2_start_trans(osb, credits); 7068 if (IS_ERR(handle)) { 7069 ret = PTR_ERR(handle); 7070 mlog_errno(ret); 7071 goto out; 7072 } 7073 7074 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7075 meta_ac, data_ac, 7076 blkno, cpos, len); 7077 if (ret) 7078 mlog_errno(ret); 7079 7080 ocfs2_commit_trans(osb, handle); 7081 7082 out: 7083 if (meta_ac) 7084 ocfs2_free_alloc_context(meta_ac); 7085 if (data_ac) 7086 ocfs2_free_alloc_context(data_ac); 7087 return ret; 7088 } 7089 7090 /* 7091 * Create reflinked xattr buckets. 7092 * We will add bucket one by one, and refcount all the xattrs in the bucket 7093 * if they are stored outside. 7094 */ 7095 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7096 struct buffer_head *blk_bh, 7097 struct buffer_head *new_blk_bh) 7098 { 7099 int ret; 7100 struct ocfs2_reflink_xattr_tree_args para; 7101 7102 memset(¶, 0, sizeof(para)); 7103 para.reflink = args; 7104 para.old_blk_bh = blk_bh; 7105 para.new_blk_bh = new_blk_bh; 7106 7107 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7108 if (!para.old_bucket) { 7109 mlog_errno(-ENOMEM); 7110 return -ENOMEM; 7111 } 7112 7113 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7114 if (!para.new_bucket) { 7115 ret = -ENOMEM; 7116 mlog_errno(ret); 7117 goto out; 7118 } 7119 7120 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7121 ocfs2_reflink_xattr_rec, 7122 ¶); 7123 if (ret) 7124 mlog_errno(ret); 7125 7126 out: 7127 ocfs2_xattr_bucket_free(para.old_bucket); 7128 ocfs2_xattr_bucket_free(para.new_bucket); 7129 return ret; 7130 } 7131 7132 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7133 struct buffer_head *blk_bh) 7134 { 7135 int ret, indexed = 0; 7136 struct buffer_head *new_blk_bh = NULL; 7137 struct ocfs2_xattr_block *xb = 7138 (struct ocfs2_xattr_block *)blk_bh->b_data; 7139 7140 7141 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7142 indexed = 1; 7143 7144 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7145 &new_blk_bh, indexed); 7146 if (ret) { 7147 mlog_errno(ret); 7148 goto out; 7149 } 7150 7151 if (!indexed) 7152 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7153 else 7154 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7155 if (ret) 7156 mlog_errno(ret); 7157 7158 out: 7159 brelse(new_blk_bh); 7160 return ret; 7161 } 7162 7163 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7164 { 7165 int type = ocfs2_xattr_get_type(xe); 7166 7167 return type != OCFS2_XATTR_INDEX_SECURITY && 7168 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7169 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7170 } 7171 7172 int ocfs2_reflink_xattrs(struct inode *old_inode, 7173 struct buffer_head *old_bh, 7174 struct inode *new_inode, 7175 struct buffer_head *new_bh, 7176 bool preserve_security) 7177 { 7178 int ret; 7179 struct ocfs2_xattr_reflink args; 7180 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7181 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7182 struct buffer_head *blk_bh = NULL; 7183 struct ocfs2_cached_dealloc_ctxt dealloc; 7184 struct ocfs2_refcount_tree *ref_tree; 7185 struct buffer_head *ref_root_bh = NULL; 7186 7187 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7188 le64_to_cpu(di->i_refcount_loc), 7189 1, &ref_tree, &ref_root_bh); 7190 if (ret) { 7191 mlog_errno(ret); 7192 goto out; 7193 } 7194 7195 ocfs2_init_dealloc_ctxt(&dealloc); 7196 7197 args.old_inode = old_inode; 7198 args.new_inode = new_inode; 7199 args.old_bh = old_bh; 7200 args.new_bh = new_bh; 7201 args.ref_ci = &ref_tree->rf_ci; 7202 args.ref_root_bh = ref_root_bh; 7203 args.dealloc = &dealloc; 7204 if (preserve_security) 7205 args.xattr_reflinked = NULL; 7206 else 7207 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7208 7209 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7210 ret = ocfs2_reflink_xattr_inline(&args); 7211 if (ret) { 7212 mlog_errno(ret); 7213 goto out_unlock; 7214 } 7215 } 7216 7217 if (!di->i_xattr_loc) 7218 goto out_unlock; 7219 7220 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7221 &blk_bh); 7222 if (ret < 0) { 7223 mlog_errno(ret); 7224 goto out_unlock; 7225 } 7226 7227 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7228 if (ret) 7229 mlog_errno(ret); 7230 7231 brelse(blk_bh); 7232 7233 out_unlock: 7234 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7235 ref_tree, 1); 7236 brelse(ref_root_bh); 7237 7238 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7239 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7240 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7241 } 7242 7243 out: 7244 return ret; 7245 } 7246 7247 /* 7248 * Initialize security and acl for a already created inode. 7249 * Used for reflink a non-preserve-security file. 7250 * 7251 * It uses common api like ocfs2_xattr_set, so the caller 7252 * must not hold any lock expect i_rwsem. 7253 */ 7254 int ocfs2_init_security_and_acl(struct inode *dir, 7255 struct inode *inode, 7256 const struct qstr *qstr) 7257 { 7258 int ret = 0; 7259 struct buffer_head *dir_bh = NULL; 7260 7261 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7262 if (ret) { 7263 mlog_errno(ret); 7264 goto leave; 7265 } 7266 7267 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7268 if (ret) { 7269 mlog_errno(ret); 7270 goto leave; 7271 } 7272 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7273 if (ret) 7274 mlog_errno(ret); 7275 7276 ocfs2_inode_unlock(dir, 0); 7277 brelse(dir_bh); 7278 leave: 7279 return ret; 7280 } 7281 7282 /* 7283 * 'security' attributes support 7284 */ 7285 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7286 struct dentry *unused, struct inode *inode, 7287 const char *name, void *buffer, size_t size) 7288 { 7289 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7290 name, buffer, size); 7291 } 7292 7293 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7294 struct mnt_idmap *idmap, 7295 struct dentry *unused, struct inode *inode, 7296 const char *name, const void *value, 7297 size_t size, int flags) 7298 { 7299 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7300 name, value, size, flags); 7301 } 7302 7303 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7304 void *fs_info) 7305 { 7306 struct ocfs2_security_xattr_info *si = fs_info; 7307 const struct xattr *xattr; 7308 int err = 0; 7309 7310 if (si) { 7311 si->value = kmemdup(xattr_array->value, xattr_array->value_len, 7312 GFP_KERNEL); 7313 if (!si->value) 7314 return -ENOMEM; 7315 7316 si->name = xattr_array->name; 7317 si->value_len = xattr_array->value_len; 7318 return 0; 7319 } 7320 7321 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7322 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7323 xattr->name, xattr->value, 7324 xattr->value_len, XATTR_CREATE); 7325 if (err) 7326 break; 7327 } 7328 return err; 7329 } 7330 7331 int ocfs2_init_security_get(struct inode *inode, 7332 struct inode *dir, 7333 const struct qstr *qstr, 7334 struct ocfs2_security_xattr_info *si) 7335 { 7336 int ret; 7337 7338 /* check whether ocfs2 support feature xattr */ 7339 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7340 return -EOPNOTSUPP; 7341 if (si) { 7342 ret = security_inode_init_security(inode, dir, qstr, 7343 &ocfs2_initxattrs, si); 7344 /* 7345 * security_inode_init_security() does not return -EOPNOTSUPP, 7346 * we have to check the xattr ourselves. 7347 */ 7348 if (!ret && !si->name) 7349 si->enable = 0; 7350 7351 return ret; 7352 } 7353 7354 return security_inode_init_security(inode, dir, qstr, 7355 &ocfs2_initxattrs, NULL); 7356 } 7357 7358 int ocfs2_init_security_set(handle_t *handle, 7359 struct inode *inode, 7360 struct buffer_head *di_bh, 7361 struct ocfs2_security_xattr_info *si, 7362 struct ocfs2_alloc_context *xattr_ac, 7363 struct ocfs2_alloc_context *data_ac) 7364 { 7365 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7366 OCFS2_XATTR_INDEX_SECURITY, 7367 si->name, si->value, si->value_len, 0, 7368 xattr_ac, data_ac); 7369 } 7370 7371 const struct xattr_handler ocfs2_xattr_security_handler = { 7372 .prefix = XATTR_SECURITY_PREFIX, 7373 .get = ocfs2_xattr_security_get, 7374 .set = ocfs2_xattr_security_set, 7375 }; 7376 7377 /* 7378 * 'trusted' attributes support 7379 */ 7380 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7381 struct dentry *unused, struct inode *inode, 7382 const char *name, void *buffer, size_t size) 7383 { 7384 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7385 name, buffer, size); 7386 } 7387 7388 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7389 struct mnt_idmap *idmap, 7390 struct dentry *unused, struct inode *inode, 7391 const char *name, const void *value, 7392 size_t size, int flags) 7393 { 7394 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7395 name, value, size, flags); 7396 } 7397 7398 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7399 .prefix = XATTR_TRUSTED_PREFIX, 7400 .get = ocfs2_xattr_trusted_get, 7401 .set = ocfs2_xattr_trusted_set, 7402 }; 7403 7404 /* 7405 * 'user' attributes support 7406 */ 7407 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7408 struct dentry *unused, struct inode *inode, 7409 const char *name, void *buffer, size_t size) 7410 { 7411 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7412 7413 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7414 return -EOPNOTSUPP; 7415 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7416 buffer, size); 7417 } 7418 7419 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7420 struct mnt_idmap *idmap, 7421 struct dentry *unused, struct inode *inode, 7422 const char *name, const void *value, 7423 size_t size, int flags) 7424 { 7425 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7426 7427 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7428 return -EOPNOTSUPP; 7429 7430 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7431 name, value, size, flags); 7432 } 7433 7434 const struct xattr_handler ocfs2_xattr_user_handler = { 7435 .prefix = XATTR_USER_PREFIX, 7436 .get = ocfs2_xattr_user_get, 7437 .set = ocfs2_xattr_user_set, 7438 }; 7439