1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * xattr.c 5 * 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 7 * 8 * CREDITS: 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public 14 * License version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 */ 21 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/uio.h> 29 #include <linux/sched.h> 30 #include <linux/splice.h> 31 #include <linux/mount.h> 32 #include <linux/writeback.h> 33 #include <linux/falloc.h> 34 #include <linux/sort.h> 35 #include <linux/init.h> 36 #include <linux/module.h> 37 #include <linux/string.h> 38 #include <linux/security.h> 39 40 #include <cluster/masklog.h> 41 42 #include "ocfs2.h" 43 #include "alloc.h" 44 #include "blockcheck.h" 45 #include "dlmglue.h" 46 #include "file.h" 47 #include "symlink.h" 48 #include "sysfile.h" 49 #include "inode.h" 50 #include "journal.h" 51 #include "ocfs2_fs.h" 52 #include "suballoc.h" 53 #include "uptodate.h" 54 #include "buffer_head_io.h" 55 #include "super.h" 56 #include "xattr.h" 57 #include "refcounttree.h" 58 #include "acl.h" 59 #include "ocfs2_trace.h" 60 61 struct ocfs2_xattr_def_value_root { 62 struct ocfs2_xattr_value_root xv; 63 struct ocfs2_extent_rec er; 64 }; 65 66 struct ocfs2_xattr_bucket { 67 /* The inode these xattrs are associated with */ 68 struct inode *bu_inode; 69 70 /* The actual buffers that make up the bucket */ 71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 72 73 /* How many blocks make up one bucket for this filesystem */ 74 int bu_blocks; 75 }; 76 77 struct ocfs2_xattr_set_ctxt { 78 handle_t *handle; 79 struct ocfs2_alloc_context *meta_ac; 80 struct ocfs2_alloc_context *data_ac; 81 struct ocfs2_cached_dealloc_ctxt dealloc; 82 int set_abort; 83 }; 84 85 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 86 #define OCFS2_XATTR_INLINE_SIZE 80 87 #define OCFS2_XATTR_HEADER_GAP 4 88 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 89 - sizeof(struct ocfs2_xattr_header) \ 90 - OCFS2_XATTR_HEADER_GAP) 91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 92 - sizeof(struct ocfs2_xattr_block) \ 93 - sizeof(struct ocfs2_xattr_header) \ 94 - OCFS2_XATTR_HEADER_GAP) 95 96 static struct ocfs2_xattr_def_value_root def_xv = { 97 .xv.xr_list.l_count = cpu_to_le16(1), 98 }; 99 100 const struct xattr_handler *ocfs2_xattr_handlers[] = { 101 &ocfs2_xattr_user_handler, 102 &ocfs2_xattr_acl_access_handler, 103 &ocfs2_xattr_acl_default_handler, 104 &ocfs2_xattr_trusted_handler, 105 &ocfs2_xattr_security_handler, 106 NULL 107 }; 108 109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 112 = &ocfs2_xattr_acl_access_handler, 113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 114 = &ocfs2_xattr_acl_default_handler, 115 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 117 }; 118 119 struct ocfs2_xattr_info { 120 int xi_name_index; 121 const char *xi_name; 122 int xi_name_len; 123 const void *xi_value; 124 size_t xi_value_len; 125 }; 126 127 struct ocfs2_xattr_search { 128 struct buffer_head *inode_bh; 129 /* 130 * xattr_bh point to the block buffer head which has extended attribute 131 * when extended attribute in inode, xattr_bh is equal to inode_bh. 132 */ 133 struct buffer_head *xattr_bh; 134 struct ocfs2_xattr_header *header; 135 struct ocfs2_xattr_bucket *bucket; 136 void *base; 137 void *end; 138 struct ocfs2_xattr_entry *here; 139 int not_found; 140 }; 141 142 /* Operations on struct ocfs2_xa_entry */ 143 struct ocfs2_xa_loc; 144 struct ocfs2_xa_loc_operations { 145 /* 146 * Journal functions 147 */ 148 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 149 int type); 150 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 151 152 /* 153 * Return a pointer to the appropriate buffer in loc->xl_storage 154 * at the given offset from loc->xl_header. 155 */ 156 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 157 158 /* Can we reuse the existing entry for the new value? */ 159 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 160 struct ocfs2_xattr_info *xi); 161 162 /* How much space is needed for the new value? */ 163 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 164 struct ocfs2_xattr_info *xi); 165 166 /* 167 * Return the offset of the first name+value pair. This is 168 * the start of our downward-filling free space. 169 */ 170 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 171 172 /* 173 * Remove the name+value at this location. Do whatever is 174 * appropriate with the remaining name+value pairs. 175 */ 176 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 177 178 /* Fill xl_entry with a new entry */ 179 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 180 181 /* Add name+value storage to an entry */ 182 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 183 184 /* 185 * Initialize the value buf's access and bh fields for this entry. 186 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 187 */ 188 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 189 struct ocfs2_xattr_value_buf *vb); 190 }; 191 192 /* 193 * Describes an xattr entry location. This is a memory structure 194 * tracking the on-disk structure. 195 */ 196 struct ocfs2_xa_loc { 197 /* This xattr belongs to this inode */ 198 struct inode *xl_inode; 199 200 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 201 struct ocfs2_xattr_header *xl_header; 202 203 /* Bytes from xl_header to the end of the storage */ 204 int xl_size; 205 206 /* 207 * The ocfs2_xattr_entry this location describes. If this is 208 * NULL, this location describes the on-disk structure where it 209 * would have been. 210 */ 211 struct ocfs2_xattr_entry *xl_entry; 212 213 /* 214 * Internal housekeeping 215 */ 216 217 /* Buffer(s) containing this entry */ 218 void *xl_storage; 219 220 /* Operations on the storage backing this location */ 221 const struct ocfs2_xa_loc_operations *xl_ops; 222 }; 223 224 /* 225 * Convenience functions to calculate how much space is needed for a 226 * given name+value pair 227 */ 228 static int namevalue_size(int name_len, uint64_t value_len) 229 { 230 if (value_len > OCFS2_XATTR_INLINE_SIZE) 231 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 232 else 233 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 234 } 235 236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 237 { 238 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 239 } 240 241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 242 { 243 u64 value_len = le64_to_cpu(xe->xe_value_size); 244 245 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 246 ocfs2_xattr_is_local(xe)); 247 return namevalue_size(xe->xe_name_len, value_len); 248 } 249 250 251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 252 struct ocfs2_xattr_header *xh, 253 int index, 254 int *block_off, 255 int *new_offset); 256 257 static int ocfs2_xattr_block_find(struct inode *inode, 258 int name_index, 259 const char *name, 260 struct ocfs2_xattr_search *xs); 261 static int ocfs2_xattr_index_block_find(struct inode *inode, 262 struct buffer_head *root_bh, 263 int name_index, 264 const char *name, 265 struct ocfs2_xattr_search *xs); 266 267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 268 struct buffer_head *blk_bh, 269 char *buffer, 270 size_t buffer_size); 271 272 static int ocfs2_xattr_create_index_block(struct inode *inode, 273 struct ocfs2_xattr_search *xs, 274 struct ocfs2_xattr_set_ctxt *ctxt); 275 276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 277 struct ocfs2_xattr_info *xi, 278 struct ocfs2_xattr_search *xs, 279 struct ocfs2_xattr_set_ctxt *ctxt); 280 281 typedef int (xattr_tree_rec_func)(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, u32 cpos, u32 len, void *para); 284 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 285 struct buffer_head *root_bh, 286 xattr_tree_rec_func *rec_func, 287 void *para); 288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 289 struct ocfs2_xattr_bucket *bucket, 290 void *para); 291 static int ocfs2_rm_xattr_cluster(struct inode *inode, 292 struct buffer_head *root_bh, 293 u64 blkno, 294 u32 cpos, 295 u32 len, 296 void *para); 297 298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 299 u64 src_blk, u64 last_blk, u64 to_blk, 300 unsigned int start_bucket, 301 u32 *first_hash); 302 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 303 struct ocfs2_dinode *di, 304 struct ocfs2_xattr_info *xi, 305 struct ocfs2_xattr_search *xis, 306 struct ocfs2_xattr_search *xbs, 307 struct ocfs2_refcount_tree **ref_tree, 308 int *meta_need, 309 int *credits); 310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 311 struct ocfs2_xattr_bucket *bucket, 312 int offset, 313 struct ocfs2_xattr_value_root **xv, 314 struct buffer_head **bh); 315 316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 317 { 318 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 319 } 320 321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 322 { 323 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 324 } 325 326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 329 330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 331 { 332 struct ocfs2_xattr_bucket *bucket; 333 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 334 335 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 336 337 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 338 if (bucket) { 339 bucket->bu_inode = inode; 340 bucket->bu_blocks = blks; 341 } 342 343 return bucket; 344 } 345 346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 347 { 348 int i; 349 350 for (i = 0; i < bucket->bu_blocks; i++) { 351 brelse(bucket->bu_bhs[i]); 352 bucket->bu_bhs[i] = NULL; 353 } 354 } 355 356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 357 { 358 if (bucket) { 359 ocfs2_xattr_bucket_relse(bucket); 360 bucket->bu_inode = NULL; 361 kfree(bucket); 362 } 363 } 364 365 /* 366 * A bucket that has never been written to disk doesn't need to be 367 * read. We just need the buffer_heads. Don't call this for 368 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 369 * them fully. 370 */ 371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 372 u64 xb_blkno) 373 { 374 int i, rc = 0; 375 376 for (i = 0; i < bucket->bu_blocks; i++) { 377 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 378 xb_blkno + i); 379 if (!bucket->bu_bhs[i]) { 380 rc = -ENOMEM; 381 mlog_errno(rc); 382 break; 383 } 384 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 386 bucket->bu_bhs[i])) 387 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 388 bucket->bu_bhs[i]); 389 } 390 391 if (rc) 392 ocfs2_xattr_bucket_relse(bucket); 393 return rc; 394 } 395 396 /* Read the xattr bucket at xb_blkno */ 397 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 398 u64 xb_blkno) 399 { 400 int rc; 401 402 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 403 bucket->bu_blocks, bucket->bu_bhs, 0, 404 NULL); 405 if (!rc) { 406 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 407 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 408 bucket->bu_bhs, 409 bucket->bu_blocks, 410 &bucket_xh(bucket)->xh_check); 411 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 412 if (rc) 413 mlog_errno(rc); 414 } 415 416 if (rc) 417 ocfs2_xattr_bucket_relse(bucket); 418 return rc; 419 } 420 421 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 422 struct ocfs2_xattr_bucket *bucket, 423 int type) 424 { 425 int i, rc = 0; 426 427 for (i = 0; i < bucket->bu_blocks; i++) { 428 rc = ocfs2_journal_access(handle, 429 INODE_CACHE(bucket->bu_inode), 430 bucket->bu_bhs[i], type); 431 if (rc) { 432 mlog_errno(rc); 433 break; 434 } 435 } 436 437 return rc; 438 } 439 440 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 441 struct ocfs2_xattr_bucket *bucket) 442 { 443 int i; 444 445 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 446 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 447 bucket->bu_bhs, bucket->bu_blocks, 448 &bucket_xh(bucket)->xh_check); 449 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 450 451 for (i = 0; i < bucket->bu_blocks; i++) 452 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 453 } 454 455 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 456 struct ocfs2_xattr_bucket *src) 457 { 458 int i; 459 int blocksize = src->bu_inode->i_sb->s_blocksize; 460 461 BUG_ON(dest->bu_blocks != src->bu_blocks); 462 BUG_ON(dest->bu_inode != src->bu_inode); 463 464 for (i = 0; i < src->bu_blocks; i++) { 465 memcpy(bucket_block(dest, i), bucket_block(src, i), 466 blocksize); 467 } 468 } 469 470 static int ocfs2_validate_xattr_block(struct super_block *sb, 471 struct buffer_head *bh) 472 { 473 int rc; 474 struct ocfs2_xattr_block *xb = 475 (struct ocfs2_xattr_block *)bh->b_data; 476 477 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 478 479 BUG_ON(!buffer_uptodate(bh)); 480 481 /* 482 * If the ecc fails, we return the error but otherwise 483 * leave the filesystem running. We know any error is 484 * local to this block. 485 */ 486 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 487 if (rc) 488 return rc; 489 490 /* 491 * Errors after here are fatal 492 */ 493 494 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 495 ocfs2_error(sb, 496 "Extended attribute block #%llu has bad " 497 "signature %.*s", 498 (unsigned long long)bh->b_blocknr, 7, 499 xb->xb_signature); 500 return -EINVAL; 501 } 502 503 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 504 ocfs2_error(sb, 505 "Extended attribute block #%llu has an " 506 "invalid xb_blkno of %llu", 507 (unsigned long long)bh->b_blocknr, 508 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 509 return -EINVAL; 510 } 511 512 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 513 ocfs2_error(sb, 514 "Extended attribute block #%llu has an invalid " 515 "xb_fs_generation of #%u", 516 (unsigned long long)bh->b_blocknr, 517 le32_to_cpu(xb->xb_fs_generation)); 518 return -EINVAL; 519 } 520 521 return 0; 522 } 523 524 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 525 struct buffer_head **bh) 526 { 527 int rc; 528 struct buffer_head *tmp = *bh; 529 530 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 531 ocfs2_validate_xattr_block); 532 533 /* If ocfs2_read_block() got us a new bh, pass it up. */ 534 if (!rc && !*bh) 535 *bh = tmp; 536 537 return rc; 538 } 539 540 static inline const char *ocfs2_xattr_prefix(int name_index) 541 { 542 const struct xattr_handler *handler = NULL; 543 544 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 545 handler = ocfs2_xattr_handler_map[name_index]; 546 547 return handler ? handler->prefix : NULL; 548 } 549 550 static u32 ocfs2_xattr_name_hash(struct inode *inode, 551 const char *name, 552 int name_len) 553 { 554 /* Get hash value of uuid from super block */ 555 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 556 int i; 557 558 /* hash extended attribute name */ 559 for (i = 0; i < name_len; i++) { 560 hash = (hash << OCFS2_HASH_SHIFT) ^ 561 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 562 *name++; 563 } 564 565 return hash; 566 } 567 568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 569 { 570 return namevalue_size(name_len, value_len) + 571 sizeof(struct ocfs2_xattr_entry); 572 } 573 574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 575 { 576 return namevalue_size_xi(xi) + 577 sizeof(struct ocfs2_xattr_entry); 578 } 579 580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 581 { 582 return namevalue_size_xe(xe) + 583 sizeof(struct ocfs2_xattr_entry); 584 } 585 586 int ocfs2_calc_security_init(struct inode *dir, 587 struct ocfs2_security_xattr_info *si, 588 int *want_clusters, 589 int *xattr_credits, 590 struct ocfs2_alloc_context **xattr_ac) 591 { 592 int ret = 0; 593 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 594 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 595 si->value_len); 596 597 /* 598 * The max space of security xattr taken inline is 599 * 256(name) + 80(value) + 16(entry) = 352 bytes, 600 * So reserve one metadata block for it is ok. 601 */ 602 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 603 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 604 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 605 if (ret) { 606 mlog_errno(ret); 607 return ret; 608 } 609 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 610 } 611 612 /* reserve clusters for xattr value which will be set in B tree*/ 613 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 614 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 615 si->value_len); 616 617 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 618 new_clusters); 619 *want_clusters += new_clusters; 620 } 621 return ret; 622 } 623 624 int ocfs2_calc_xattr_init(struct inode *dir, 625 struct buffer_head *dir_bh, 626 umode_t mode, 627 struct ocfs2_security_xattr_info *si, 628 int *want_clusters, 629 int *xattr_credits, 630 int *want_meta) 631 { 632 int ret = 0; 633 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 634 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 635 636 if (si->enable) 637 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 638 si->value_len); 639 640 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 641 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 642 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 643 "", NULL, 0); 644 if (acl_len > 0) { 645 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 646 if (S_ISDIR(mode)) 647 a_size <<= 1; 648 } else if (acl_len != 0 && acl_len != -ENODATA) { 649 mlog_errno(ret); 650 return ret; 651 } 652 } 653 654 if (!(s_size + a_size)) 655 return ret; 656 657 /* 658 * The max space of security xattr taken inline is 659 * 256(name) + 80(value) + 16(entry) = 352 bytes, 660 * The max space of acl xattr taken inline is 661 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 662 * when blocksize = 512, may reserve one more cluser for 663 * xattr bucket, otherwise reserve one metadata block 664 * for them is ok. 665 * If this is a new directory with inline data, 666 * we choose to reserve the entire inline area for 667 * directory contents and force an external xattr block. 668 */ 669 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 670 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 671 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 672 *want_meta = *want_meta + 1; 673 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 674 } 675 676 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 677 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 678 *want_clusters += 1; 679 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 680 } 681 682 /* 683 * reserve credits and clusters for xattrs which has large value 684 * and have to be set outside 685 */ 686 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 687 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 688 si->value_len); 689 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 690 new_clusters); 691 *want_clusters += new_clusters; 692 } 693 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 694 acl_len > OCFS2_XATTR_INLINE_SIZE) { 695 /* for directory, it has DEFAULT and ACCESS two types of acls */ 696 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 697 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 698 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 699 new_clusters); 700 *want_clusters += new_clusters; 701 } 702 703 return ret; 704 } 705 706 static int ocfs2_xattr_extend_allocation(struct inode *inode, 707 u32 clusters_to_add, 708 struct ocfs2_xattr_value_buf *vb, 709 struct ocfs2_xattr_set_ctxt *ctxt) 710 { 711 int status = 0, credits; 712 handle_t *handle = ctxt->handle; 713 enum ocfs2_alloc_restarted why; 714 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 715 struct ocfs2_extent_tree et; 716 717 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 718 719 while (clusters_to_add) { 720 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 721 722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 723 OCFS2_JOURNAL_ACCESS_WRITE); 724 if (status < 0) { 725 mlog_errno(status); 726 break; 727 } 728 729 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 730 status = ocfs2_add_clusters_in_btree(handle, 731 &et, 732 &logical_start, 733 clusters_to_add, 734 0, 735 ctxt->data_ac, 736 ctxt->meta_ac, 737 &why); 738 if ((status < 0) && (status != -EAGAIN)) { 739 if (status != -ENOSPC) 740 mlog_errno(status); 741 break; 742 } 743 744 ocfs2_journal_dirty(handle, vb->vb_bh); 745 746 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 747 prev_clusters; 748 749 if (why != RESTART_NONE && clusters_to_add) { 750 /* 751 * We can only fail in case the alloc file doesn't give 752 * up enough clusters. 753 */ 754 BUG_ON(why == RESTART_META); 755 756 credits = ocfs2_calc_extend_credits(inode->i_sb, 757 &vb->vb_xv->xr_list); 758 status = ocfs2_extend_trans(handle, credits); 759 if (status < 0) { 760 status = -ENOMEM; 761 mlog_errno(status); 762 break; 763 } 764 } 765 } 766 767 return status; 768 } 769 770 static int __ocfs2_remove_xattr_range(struct inode *inode, 771 struct ocfs2_xattr_value_buf *vb, 772 u32 cpos, u32 phys_cpos, u32 len, 773 unsigned int ext_flags, 774 struct ocfs2_xattr_set_ctxt *ctxt) 775 { 776 int ret; 777 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 778 handle_t *handle = ctxt->handle; 779 struct ocfs2_extent_tree et; 780 781 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 782 783 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 784 OCFS2_JOURNAL_ACCESS_WRITE); 785 if (ret) { 786 mlog_errno(ret); 787 goto out; 788 } 789 790 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 791 &ctxt->dealloc); 792 if (ret) { 793 mlog_errno(ret); 794 goto out; 795 } 796 797 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 798 ocfs2_journal_dirty(handle, vb->vb_bh); 799 800 if (ext_flags & OCFS2_EXT_REFCOUNTED) 801 ret = ocfs2_decrease_refcount(inode, handle, 802 ocfs2_blocks_to_clusters(inode->i_sb, 803 phys_blkno), 804 len, ctxt->meta_ac, &ctxt->dealloc, 1); 805 else 806 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 807 phys_blkno, len); 808 if (ret) 809 mlog_errno(ret); 810 811 out: 812 return ret; 813 } 814 815 static int ocfs2_xattr_shrink_size(struct inode *inode, 816 u32 old_clusters, 817 u32 new_clusters, 818 struct ocfs2_xattr_value_buf *vb, 819 struct ocfs2_xattr_set_ctxt *ctxt) 820 { 821 int ret = 0; 822 unsigned int ext_flags; 823 u32 trunc_len, cpos, phys_cpos, alloc_size; 824 u64 block; 825 826 if (old_clusters <= new_clusters) 827 return 0; 828 829 cpos = new_clusters; 830 trunc_len = old_clusters - new_clusters; 831 while (trunc_len) { 832 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 833 &alloc_size, 834 &vb->vb_xv->xr_list, &ext_flags); 835 if (ret) { 836 mlog_errno(ret); 837 goto out; 838 } 839 840 if (alloc_size > trunc_len) 841 alloc_size = trunc_len; 842 843 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 844 phys_cpos, alloc_size, 845 ext_flags, ctxt); 846 if (ret) { 847 mlog_errno(ret); 848 goto out; 849 } 850 851 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 852 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 853 block, alloc_size); 854 cpos += alloc_size; 855 trunc_len -= alloc_size; 856 } 857 858 out: 859 return ret; 860 } 861 862 static int ocfs2_xattr_value_truncate(struct inode *inode, 863 struct ocfs2_xattr_value_buf *vb, 864 int len, 865 struct ocfs2_xattr_set_ctxt *ctxt) 866 { 867 int ret; 868 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 869 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 870 871 if (new_clusters == old_clusters) 872 return 0; 873 874 if (new_clusters > old_clusters) 875 ret = ocfs2_xattr_extend_allocation(inode, 876 new_clusters - old_clusters, 877 vb, ctxt); 878 else 879 ret = ocfs2_xattr_shrink_size(inode, 880 old_clusters, new_clusters, 881 vb, ctxt); 882 883 return ret; 884 } 885 886 static int ocfs2_xattr_list_entry(char *buffer, size_t size, 887 size_t *result, const char *prefix, 888 const char *name, int name_len) 889 { 890 char *p = buffer + *result; 891 int prefix_len = strlen(prefix); 892 int total_len = prefix_len + name_len + 1; 893 894 *result += total_len; 895 896 /* we are just looking for how big our buffer needs to be */ 897 if (!size) 898 return 0; 899 900 if (*result > size) 901 return -ERANGE; 902 903 memcpy(p, prefix, prefix_len); 904 memcpy(p + prefix_len, name, name_len); 905 p[prefix_len + name_len] = '\0'; 906 907 return 0; 908 } 909 910 static int ocfs2_xattr_list_entries(struct inode *inode, 911 struct ocfs2_xattr_header *header, 912 char *buffer, size_t buffer_size) 913 { 914 size_t result = 0; 915 int i, type, ret; 916 const char *prefix, *name; 917 918 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 919 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 920 type = ocfs2_xattr_get_type(entry); 921 prefix = ocfs2_xattr_prefix(type); 922 923 if (prefix) { 924 name = (const char *)header + 925 le16_to_cpu(entry->xe_name_offset); 926 927 ret = ocfs2_xattr_list_entry(buffer, buffer_size, 928 &result, prefix, name, 929 entry->xe_name_len); 930 if (ret) 931 return ret; 932 } 933 } 934 935 return result; 936 } 937 938 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 939 struct ocfs2_dinode *di) 940 { 941 struct ocfs2_xattr_header *xh; 942 int i; 943 944 xh = (struct ocfs2_xattr_header *) 945 ((void *)di + inode->i_sb->s_blocksize - 946 le16_to_cpu(di->i_xattr_inline_size)); 947 948 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 949 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 950 return 1; 951 952 return 0; 953 } 954 955 static int ocfs2_xattr_ibody_list(struct inode *inode, 956 struct ocfs2_dinode *di, 957 char *buffer, 958 size_t buffer_size) 959 { 960 struct ocfs2_xattr_header *header = NULL; 961 struct ocfs2_inode_info *oi = OCFS2_I(inode); 962 int ret = 0; 963 964 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 965 return ret; 966 967 header = (struct ocfs2_xattr_header *) 968 ((void *)di + inode->i_sb->s_blocksize - 969 le16_to_cpu(di->i_xattr_inline_size)); 970 971 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 972 973 return ret; 974 } 975 976 static int ocfs2_xattr_block_list(struct inode *inode, 977 struct ocfs2_dinode *di, 978 char *buffer, 979 size_t buffer_size) 980 { 981 struct buffer_head *blk_bh = NULL; 982 struct ocfs2_xattr_block *xb; 983 int ret = 0; 984 985 if (!di->i_xattr_loc) 986 return ret; 987 988 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 989 &blk_bh); 990 if (ret < 0) { 991 mlog_errno(ret); 992 return ret; 993 } 994 995 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 996 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 997 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 998 ret = ocfs2_xattr_list_entries(inode, header, 999 buffer, buffer_size); 1000 } else 1001 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1002 buffer, buffer_size); 1003 1004 brelse(blk_bh); 1005 1006 return ret; 1007 } 1008 1009 ssize_t ocfs2_listxattr(struct dentry *dentry, 1010 char *buffer, 1011 size_t size) 1012 { 1013 int ret = 0, i_ret = 0, b_ret = 0; 1014 struct buffer_head *di_bh = NULL; 1015 struct ocfs2_dinode *di = NULL; 1016 struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode); 1017 1018 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1019 return -EOPNOTSUPP; 1020 1021 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1022 return ret; 1023 1024 ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0); 1025 if (ret < 0) { 1026 mlog_errno(ret); 1027 return ret; 1028 } 1029 1030 di = (struct ocfs2_dinode *)di_bh->b_data; 1031 1032 down_read(&oi->ip_xattr_sem); 1033 i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size); 1034 if (i_ret < 0) 1035 b_ret = 0; 1036 else { 1037 if (buffer) { 1038 buffer += i_ret; 1039 size -= i_ret; 1040 } 1041 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di, 1042 buffer, size); 1043 if (b_ret < 0) 1044 i_ret = 0; 1045 } 1046 up_read(&oi->ip_xattr_sem); 1047 ocfs2_inode_unlock(dentry->d_inode, 0); 1048 1049 brelse(di_bh); 1050 1051 return i_ret + b_ret; 1052 } 1053 1054 static int ocfs2_xattr_find_entry(int name_index, 1055 const char *name, 1056 struct ocfs2_xattr_search *xs) 1057 { 1058 struct ocfs2_xattr_entry *entry; 1059 size_t name_len; 1060 int i, cmp = 1; 1061 1062 if (name == NULL) 1063 return -EINVAL; 1064 1065 name_len = strlen(name); 1066 entry = xs->here; 1067 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1068 cmp = name_index - ocfs2_xattr_get_type(entry); 1069 if (!cmp) 1070 cmp = name_len - entry->xe_name_len; 1071 if (!cmp) 1072 cmp = memcmp(name, (xs->base + 1073 le16_to_cpu(entry->xe_name_offset)), 1074 name_len); 1075 if (cmp == 0) 1076 break; 1077 entry += 1; 1078 } 1079 xs->here = entry; 1080 1081 return cmp ? -ENODATA : 0; 1082 } 1083 1084 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1085 struct ocfs2_xattr_value_root *xv, 1086 void *buffer, 1087 size_t len) 1088 { 1089 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1090 u64 blkno; 1091 int i, ret = 0; 1092 size_t cplen, blocksize; 1093 struct buffer_head *bh = NULL; 1094 struct ocfs2_extent_list *el; 1095 1096 el = &xv->xr_list; 1097 clusters = le32_to_cpu(xv->xr_clusters); 1098 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1099 blocksize = inode->i_sb->s_blocksize; 1100 1101 cpos = 0; 1102 while (cpos < clusters) { 1103 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1104 &num_clusters, el, NULL); 1105 if (ret) { 1106 mlog_errno(ret); 1107 goto out; 1108 } 1109 1110 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1111 /* Copy ocfs2_xattr_value */ 1112 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1113 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1114 &bh, NULL); 1115 if (ret) { 1116 mlog_errno(ret); 1117 goto out; 1118 } 1119 1120 cplen = len >= blocksize ? blocksize : len; 1121 memcpy(buffer, bh->b_data, cplen); 1122 len -= cplen; 1123 buffer += cplen; 1124 1125 brelse(bh); 1126 bh = NULL; 1127 if (len == 0) 1128 break; 1129 } 1130 cpos += num_clusters; 1131 } 1132 out: 1133 return ret; 1134 } 1135 1136 static int ocfs2_xattr_ibody_get(struct inode *inode, 1137 int name_index, 1138 const char *name, 1139 void *buffer, 1140 size_t buffer_size, 1141 struct ocfs2_xattr_search *xs) 1142 { 1143 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1144 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1145 struct ocfs2_xattr_value_root *xv; 1146 size_t size; 1147 int ret = 0; 1148 1149 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1150 return -ENODATA; 1151 1152 xs->end = (void *)di + inode->i_sb->s_blocksize; 1153 xs->header = (struct ocfs2_xattr_header *) 1154 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1155 xs->base = (void *)xs->header; 1156 xs->here = xs->header->xh_entries; 1157 1158 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1159 if (ret) 1160 return ret; 1161 size = le64_to_cpu(xs->here->xe_value_size); 1162 if (buffer) { 1163 if (size > buffer_size) 1164 return -ERANGE; 1165 if (ocfs2_xattr_is_local(xs->here)) { 1166 memcpy(buffer, (void *)xs->base + 1167 le16_to_cpu(xs->here->xe_name_offset) + 1168 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1169 } else { 1170 xv = (struct ocfs2_xattr_value_root *) 1171 (xs->base + le16_to_cpu( 1172 xs->here->xe_name_offset) + 1173 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1174 ret = ocfs2_xattr_get_value_outside(inode, xv, 1175 buffer, size); 1176 if (ret < 0) { 1177 mlog_errno(ret); 1178 return ret; 1179 } 1180 } 1181 } 1182 1183 return size; 1184 } 1185 1186 static int ocfs2_xattr_block_get(struct inode *inode, 1187 int name_index, 1188 const char *name, 1189 void *buffer, 1190 size_t buffer_size, 1191 struct ocfs2_xattr_search *xs) 1192 { 1193 struct ocfs2_xattr_block *xb; 1194 struct ocfs2_xattr_value_root *xv; 1195 size_t size; 1196 int ret = -ENODATA, name_offset, name_len, i; 1197 int uninitialized_var(block_off); 1198 1199 xs->bucket = ocfs2_xattr_bucket_new(inode); 1200 if (!xs->bucket) { 1201 ret = -ENOMEM; 1202 mlog_errno(ret); 1203 goto cleanup; 1204 } 1205 1206 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1207 if (ret) { 1208 mlog_errno(ret); 1209 goto cleanup; 1210 } 1211 1212 if (xs->not_found) { 1213 ret = -ENODATA; 1214 goto cleanup; 1215 } 1216 1217 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1218 size = le64_to_cpu(xs->here->xe_value_size); 1219 if (buffer) { 1220 ret = -ERANGE; 1221 if (size > buffer_size) 1222 goto cleanup; 1223 1224 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1225 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1226 i = xs->here - xs->header->xh_entries; 1227 1228 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1229 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1230 bucket_xh(xs->bucket), 1231 i, 1232 &block_off, 1233 &name_offset); 1234 xs->base = bucket_block(xs->bucket, block_off); 1235 } 1236 if (ocfs2_xattr_is_local(xs->here)) { 1237 memcpy(buffer, (void *)xs->base + 1238 name_offset + name_len, size); 1239 } else { 1240 xv = (struct ocfs2_xattr_value_root *) 1241 (xs->base + name_offset + name_len); 1242 ret = ocfs2_xattr_get_value_outside(inode, xv, 1243 buffer, size); 1244 if (ret < 0) { 1245 mlog_errno(ret); 1246 goto cleanup; 1247 } 1248 } 1249 } 1250 ret = size; 1251 cleanup: 1252 ocfs2_xattr_bucket_free(xs->bucket); 1253 1254 brelse(xs->xattr_bh); 1255 xs->xattr_bh = NULL; 1256 return ret; 1257 } 1258 1259 int ocfs2_xattr_get_nolock(struct inode *inode, 1260 struct buffer_head *di_bh, 1261 int name_index, 1262 const char *name, 1263 void *buffer, 1264 size_t buffer_size) 1265 { 1266 int ret; 1267 struct ocfs2_dinode *di = NULL; 1268 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1269 struct ocfs2_xattr_search xis = { 1270 .not_found = -ENODATA, 1271 }; 1272 struct ocfs2_xattr_search xbs = { 1273 .not_found = -ENODATA, 1274 }; 1275 1276 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1277 return -EOPNOTSUPP; 1278 1279 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1280 ret = -ENODATA; 1281 1282 xis.inode_bh = xbs.inode_bh = di_bh; 1283 di = (struct ocfs2_dinode *)di_bh->b_data; 1284 1285 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1286 buffer_size, &xis); 1287 if (ret == -ENODATA && di->i_xattr_loc) 1288 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1289 buffer_size, &xbs); 1290 1291 return ret; 1292 } 1293 1294 /* ocfs2_xattr_get() 1295 * 1296 * Copy an extended attribute into the buffer provided. 1297 * Buffer is NULL to compute the size of buffer required. 1298 */ 1299 static int ocfs2_xattr_get(struct inode *inode, 1300 int name_index, 1301 const char *name, 1302 void *buffer, 1303 size_t buffer_size) 1304 { 1305 int ret; 1306 struct buffer_head *di_bh = NULL; 1307 1308 ret = ocfs2_inode_lock(inode, &di_bh, 0); 1309 if (ret < 0) { 1310 mlog_errno(ret); 1311 return ret; 1312 } 1313 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1314 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1315 name, buffer, buffer_size); 1316 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1317 1318 ocfs2_inode_unlock(inode, 0); 1319 1320 brelse(di_bh); 1321 1322 return ret; 1323 } 1324 1325 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1326 handle_t *handle, 1327 struct ocfs2_xattr_value_buf *vb, 1328 const void *value, 1329 int value_len) 1330 { 1331 int ret = 0, i, cp_len; 1332 u16 blocksize = inode->i_sb->s_blocksize; 1333 u32 p_cluster, num_clusters; 1334 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1335 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1336 u64 blkno; 1337 struct buffer_head *bh = NULL; 1338 unsigned int ext_flags; 1339 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1340 1341 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1342 1343 while (cpos < clusters) { 1344 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1345 &num_clusters, &xv->xr_list, 1346 &ext_flags); 1347 if (ret) { 1348 mlog_errno(ret); 1349 goto out; 1350 } 1351 1352 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1353 1354 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1355 1356 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1357 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1358 &bh, NULL); 1359 if (ret) { 1360 mlog_errno(ret); 1361 goto out; 1362 } 1363 1364 ret = ocfs2_journal_access(handle, 1365 INODE_CACHE(inode), 1366 bh, 1367 OCFS2_JOURNAL_ACCESS_WRITE); 1368 if (ret < 0) { 1369 mlog_errno(ret); 1370 goto out; 1371 } 1372 1373 cp_len = value_len > blocksize ? blocksize : value_len; 1374 memcpy(bh->b_data, value, cp_len); 1375 value_len -= cp_len; 1376 value += cp_len; 1377 if (cp_len < blocksize) 1378 memset(bh->b_data + cp_len, 0, 1379 blocksize - cp_len); 1380 1381 ocfs2_journal_dirty(handle, bh); 1382 brelse(bh); 1383 bh = NULL; 1384 1385 /* 1386 * XXX: do we need to empty all the following 1387 * blocks in this cluster? 1388 */ 1389 if (!value_len) 1390 break; 1391 } 1392 cpos += num_clusters; 1393 } 1394 out: 1395 brelse(bh); 1396 1397 return ret; 1398 } 1399 1400 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1401 int num_entries) 1402 { 1403 int free_space; 1404 1405 if (!needed_space) 1406 return 0; 1407 1408 free_space = free_start - 1409 sizeof(struct ocfs2_xattr_header) - 1410 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1411 OCFS2_XATTR_HEADER_GAP; 1412 if (free_space < 0) 1413 return -EIO; 1414 if (free_space < needed_space) 1415 return -ENOSPC; 1416 1417 return 0; 1418 } 1419 1420 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1421 int type) 1422 { 1423 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1424 } 1425 1426 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1427 { 1428 loc->xl_ops->xlo_journal_dirty(handle, loc); 1429 } 1430 1431 /* Give a pointer into the storage for the given offset */ 1432 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1433 { 1434 BUG_ON(offset >= loc->xl_size); 1435 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1436 } 1437 1438 /* 1439 * Wipe the name+value pair and allow the storage to reclaim it. This 1440 * must be followed by either removal of the entry or a call to 1441 * ocfs2_xa_add_namevalue(). 1442 */ 1443 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1444 { 1445 loc->xl_ops->xlo_wipe_namevalue(loc); 1446 } 1447 1448 /* 1449 * Find lowest offset to a name+value pair. This is the start of our 1450 * downward-growing free space. 1451 */ 1452 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1453 { 1454 return loc->xl_ops->xlo_get_free_start(loc); 1455 } 1456 1457 /* Can we reuse loc->xl_entry for xi? */ 1458 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1459 struct ocfs2_xattr_info *xi) 1460 { 1461 return loc->xl_ops->xlo_can_reuse(loc, xi); 1462 } 1463 1464 /* How much free space is needed to set the new value */ 1465 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1466 struct ocfs2_xattr_info *xi) 1467 { 1468 return loc->xl_ops->xlo_check_space(loc, xi); 1469 } 1470 1471 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1472 { 1473 loc->xl_ops->xlo_add_entry(loc, name_hash); 1474 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1475 /* 1476 * We can't leave the new entry's xe_name_offset at zero or 1477 * add_namevalue() will go nuts. We set it to the size of our 1478 * storage so that it can never be less than any other entry. 1479 */ 1480 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1481 } 1482 1483 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1484 struct ocfs2_xattr_info *xi) 1485 { 1486 int size = namevalue_size_xi(xi); 1487 int nameval_offset; 1488 char *nameval_buf; 1489 1490 loc->xl_ops->xlo_add_namevalue(loc, size); 1491 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1492 loc->xl_entry->xe_name_len = xi->xi_name_len; 1493 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1494 ocfs2_xattr_set_local(loc->xl_entry, 1495 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1496 1497 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1498 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1499 memset(nameval_buf, 0, size); 1500 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1501 } 1502 1503 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1504 struct ocfs2_xattr_value_buf *vb) 1505 { 1506 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1507 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1508 1509 /* Value bufs are for value trees */ 1510 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1511 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1512 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1513 1514 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1515 vb->vb_xv = 1516 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1517 nameval_offset + 1518 name_size); 1519 } 1520 1521 static int ocfs2_xa_block_journal_access(handle_t *handle, 1522 struct ocfs2_xa_loc *loc, int type) 1523 { 1524 struct buffer_head *bh = loc->xl_storage; 1525 ocfs2_journal_access_func access; 1526 1527 if (loc->xl_size == (bh->b_size - 1528 offsetof(struct ocfs2_xattr_block, 1529 xb_attrs.xb_header))) 1530 access = ocfs2_journal_access_xb; 1531 else 1532 access = ocfs2_journal_access_di; 1533 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1534 } 1535 1536 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1537 struct ocfs2_xa_loc *loc) 1538 { 1539 struct buffer_head *bh = loc->xl_storage; 1540 1541 ocfs2_journal_dirty(handle, bh); 1542 } 1543 1544 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1545 int offset) 1546 { 1547 return (char *)loc->xl_header + offset; 1548 } 1549 1550 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1551 struct ocfs2_xattr_info *xi) 1552 { 1553 /* 1554 * Block storage is strict. If the sizes aren't exact, we will 1555 * remove the old one and reinsert the new. 1556 */ 1557 return namevalue_size_xe(loc->xl_entry) == 1558 namevalue_size_xi(xi); 1559 } 1560 1561 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1562 { 1563 struct ocfs2_xattr_header *xh = loc->xl_header; 1564 int i, count = le16_to_cpu(xh->xh_count); 1565 int offset, free_start = loc->xl_size; 1566 1567 for (i = 0; i < count; i++) { 1568 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1569 if (offset < free_start) 1570 free_start = offset; 1571 } 1572 1573 return free_start; 1574 } 1575 1576 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1577 struct ocfs2_xattr_info *xi) 1578 { 1579 int count = le16_to_cpu(loc->xl_header->xh_count); 1580 int free_start = ocfs2_xa_get_free_start(loc); 1581 int needed_space = ocfs2_xi_entry_usage(xi); 1582 1583 /* 1584 * Block storage will reclaim the original entry before inserting 1585 * the new value, so we only need the difference. If the new 1586 * entry is smaller than the old one, we don't need anything. 1587 */ 1588 if (loc->xl_entry) { 1589 /* Don't need space if we're reusing! */ 1590 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1591 needed_space = 0; 1592 else 1593 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1594 } 1595 if (needed_space < 0) 1596 needed_space = 0; 1597 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1598 } 1599 1600 /* 1601 * Block storage for xattrs keeps the name+value pairs compacted. When 1602 * we remove one, we have to shift any that preceded it towards the end. 1603 */ 1604 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1605 { 1606 int i, offset; 1607 int namevalue_offset, first_namevalue_offset, namevalue_size; 1608 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1609 struct ocfs2_xattr_header *xh = loc->xl_header; 1610 int count = le16_to_cpu(xh->xh_count); 1611 1612 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1613 namevalue_size = namevalue_size_xe(entry); 1614 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1615 1616 /* Shift the name+value pairs */ 1617 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1618 (char *)xh + first_namevalue_offset, 1619 namevalue_offset - first_namevalue_offset); 1620 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1621 1622 /* Now tell xh->xh_entries about it */ 1623 for (i = 0; i < count; i++) { 1624 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1625 if (offset <= namevalue_offset) 1626 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1627 namevalue_size); 1628 } 1629 1630 /* 1631 * Note that we don't update xh_free_start or xh_name_value_len 1632 * because they're not used in block-stored xattrs. 1633 */ 1634 } 1635 1636 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1637 { 1638 int count = le16_to_cpu(loc->xl_header->xh_count); 1639 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1640 le16_add_cpu(&loc->xl_header->xh_count, 1); 1641 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1642 } 1643 1644 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1645 { 1646 int free_start = ocfs2_xa_get_free_start(loc); 1647 1648 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1649 } 1650 1651 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1652 struct ocfs2_xattr_value_buf *vb) 1653 { 1654 struct buffer_head *bh = loc->xl_storage; 1655 1656 if (loc->xl_size == (bh->b_size - 1657 offsetof(struct ocfs2_xattr_block, 1658 xb_attrs.xb_header))) 1659 vb->vb_access = ocfs2_journal_access_xb; 1660 else 1661 vb->vb_access = ocfs2_journal_access_di; 1662 vb->vb_bh = bh; 1663 } 1664 1665 /* 1666 * Operations for xattrs stored in blocks. This includes inline inode 1667 * storage and unindexed ocfs2_xattr_blocks. 1668 */ 1669 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1670 .xlo_journal_access = ocfs2_xa_block_journal_access, 1671 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1672 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1673 .xlo_check_space = ocfs2_xa_block_check_space, 1674 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1675 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1676 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1677 .xlo_add_entry = ocfs2_xa_block_add_entry, 1678 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1679 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1680 }; 1681 1682 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1683 struct ocfs2_xa_loc *loc, int type) 1684 { 1685 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1686 1687 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1688 } 1689 1690 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1691 struct ocfs2_xa_loc *loc) 1692 { 1693 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1694 1695 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1696 } 1697 1698 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1699 int offset) 1700 { 1701 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1702 int block, block_offset; 1703 1704 /* The header is at the front of the bucket */ 1705 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1706 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1707 1708 return bucket_block(bucket, block) + block_offset; 1709 } 1710 1711 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1712 struct ocfs2_xattr_info *xi) 1713 { 1714 return namevalue_size_xe(loc->xl_entry) >= 1715 namevalue_size_xi(xi); 1716 } 1717 1718 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1719 { 1720 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1721 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1722 } 1723 1724 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1725 int free_start, int size) 1726 { 1727 /* 1728 * We need to make sure that the name+value pair fits within 1729 * one block. 1730 */ 1731 if (((free_start - size) >> sb->s_blocksize_bits) != 1732 ((free_start - 1) >> sb->s_blocksize_bits)) 1733 free_start -= free_start % sb->s_blocksize; 1734 1735 return free_start; 1736 } 1737 1738 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1739 struct ocfs2_xattr_info *xi) 1740 { 1741 int rc; 1742 int count = le16_to_cpu(loc->xl_header->xh_count); 1743 int free_start = ocfs2_xa_get_free_start(loc); 1744 int needed_space = ocfs2_xi_entry_usage(xi); 1745 int size = namevalue_size_xi(xi); 1746 struct super_block *sb = loc->xl_inode->i_sb; 1747 1748 /* 1749 * Bucket storage does not reclaim name+value pairs it cannot 1750 * reuse. They live as holes until the bucket fills, and then 1751 * the bucket is defragmented. However, the bucket can reclaim 1752 * the ocfs2_xattr_entry. 1753 */ 1754 if (loc->xl_entry) { 1755 /* Don't need space if we're reusing! */ 1756 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1757 needed_space = 0; 1758 else 1759 needed_space -= sizeof(struct ocfs2_xattr_entry); 1760 } 1761 BUG_ON(needed_space < 0); 1762 1763 if (free_start < size) { 1764 if (needed_space) 1765 return -ENOSPC; 1766 } else { 1767 /* 1768 * First we check if it would fit in the first place. 1769 * Below, we align the free start to a block. This may 1770 * slide us below the minimum gap. By checking unaligned 1771 * first, we avoid that error. 1772 */ 1773 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1774 count); 1775 if (rc) 1776 return rc; 1777 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1778 size); 1779 } 1780 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1781 } 1782 1783 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1784 { 1785 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1786 -namevalue_size_xe(loc->xl_entry)); 1787 } 1788 1789 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1790 { 1791 struct ocfs2_xattr_header *xh = loc->xl_header; 1792 int count = le16_to_cpu(xh->xh_count); 1793 int low = 0, high = count - 1, tmp; 1794 struct ocfs2_xattr_entry *tmp_xe; 1795 1796 /* 1797 * We keep buckets sorted by name_hash, so we need to find 1798 * our insert place. 1799 */ 1800 while (low <= high && count) { 1801 tmp = (low + high) / 2; 1802 tmp_xe = &xh->xh_entries[tmp]; 1803 1804 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1805 low = tmp + 1; 1806 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1807 high = tmp - 1; 1808 else { 1809 low = tmp; 1810 break; 1811 } 1812 } 1813 1814 if (low != count) 1815 memmove(&xh->xh_entries[low + 1], 1816 &xh->xh_entries[low], 1817 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1818 1819 le16_add_cpu(&xh->xh_count, 1); 1820 loc->xl_entry = &xh->xh_entries[low]; 1821 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1822 } 1823 1824 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1825 { 1826 int free_start = ocfs2_xa_get_free_start(loc); 1827 struct ocfs2_xattr_header *xh = loc->xl_header; 1828 struct super_block *sb = loc->xl_inode->i_sb; 1829 int nameval_offset; 1830 1831 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1832 nameval_offset = free_start - size; 1833 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1834 xh->xh_free_start = cpu_to_le16(nameval_offset); 1835 le16_add_cpu(&xh->xh_name_value_len, size); 1836 1837 } 1838 1839 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1840 struct ocfs2_xattr_value_buf *vb) 1841 { 1842 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1843 struct super_block *sb = loc->xl_inode->i_sb; 1844 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1845 int size = namevalue_size_xe(loc->xl_entry); 1846 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1847 1848 /* Values are not allowed to straddle block boundaries */ 1849 BUG_ON(block_offset != 1850 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1851 /* We expect the bucket to be filled in */ 1852 BUG_ON(!bucket->bu_bhs[block_offset]); 1853 1854 vb->vb_access = ocfs2_journal_access; 1855 vb->vb_bh = bucket->bu_bhs[block_offset]; 1856 } 1857 1858 /* Operations for xattrs stored in buckets. */ 1859 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1860 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1861 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1862 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1863 .xlo_check_space = ocfs2_xa_bucket_check_space, 1864 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1865 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1866 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1867 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1868 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1869 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1870 }; 1871 1872 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1873 { 1874 struct ocfs2_xattr_value_buf vb; 1875 1876 if (ocfs2_xattr_is_local(loc->xl_entry)) 1877 return 0; 1878 1879 ocfs2_xa_fill_value_buf(loc, &vb); 1880 return le32_to_cpu(vb.vb_xv->xr_clusters); 1881 } 1882 1883 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1884 struct ocfs2_xattr_set_ctxt *ctxt) 1885 { 1886 int trunc_rc, access_rc; 1887 struct ocfs2_xattr_value_buf vb; 1888 1889 ocfs2_xa_fill_value_buf(loc, &vb); 1890 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1891 ctxt); 1892 1893 /* 1894 * The caller of ocfs2_xa_value_truncate() has already called 1895 * ocfs2_xa_journal_access on the loc. However, The truncate code 1896 * calls ocfs2_extend_trans(). This may commit the previous 1897 * transaction and open a new one. If this is a bucket, truncate 1898 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1899 * the caller is expecting to dirty the entire bucket. So we must 1900 * reset the journal work. We do this even if truncate has failed, 1901 * as it could have failed after committing the extend. 1902 */ 1903 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1904 OCFS2_JOURNAL_ACCESS_WRITE); 1905 1906 /* Errors in truncate take precedence */ 1907 return trunc_rc ? trunc_rc : access_rc; 1908 } 1909 1910 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1911 { 1912 int index, count; 1913 struct ocfs2_xattr_header *xh = loc->xl_header; 1914 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1915 1916 ocfs2_xa_wipe_namevalue(loc); 1917 loc->xl_entry = NULL; 1918 1919 le16_add_cpu(&xh->xh_count, -1); 1920 count = le16_to_cpu(xh->xh_count); 1921 1922 /* 1923 * Only zero out the entry if there are more remaining. This is 1924 * important for an empty bucket, as it keeps track of the 1925 * bucket's hash value. It doesn't hurt empty block storage. 1926 */ 1927 if (count) { 1928 index = ((char *)entry - (char *)&xh->xh_entries) / 1929 sizeof(struct ocfs2_xattr_entry); 1930 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1931 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1932 memset(&xh->xh_entries[count], 0, 1933 sizeof(struct ocfs2_xattr_entry)); 1934 } 1935 } 1936 1937 /* 1938 * If we have a problem adjusting the size of an external value during 1939 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1940 * in an intermediate state. For example, the value may be partially 1941 * truncated. 1942 * 1943 * If the value tree hasn't changed, the extend/truncate went nowhere. 1944 * We have nothing to do. The caller can treat it as a straight error. 1945 * 1946 * If the value tree got partially truncated, we now have a corrupted 1947 * extended attribute. We're going to wipe its entry and leak the 1948 * clusters. Better to leak some storage than leave a corrupt entry. 1949 * 1950 * If the value tree grew, it obviously didn't grow enough for the 1951 * new entry. We're not going to try and reclaim those clusters either. 1952 * If there was already an external value there (orig_clusters != 0), 1953 * the new clusters are attached safely and we can just leave the old 1954 * value in place. If there was no external value there, we remove 1955 * the entry. 1956 * 1957 * This way, the xattr block we store in the journal will be consistent. 1958 * If the size change broke because of the journal, no changes will hit 1959 * disk anyway. 1960 */ 1961 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1962 const char *what, 1963 unsigned int orig_clusters) 1964 { 1965 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1966 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1967 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1968 1969 if (new_clusters < orig_clusters) { 1970 mlog(ML_ERROR, 1971 "Partial truncate while %s xattr %.*s. Leaking " 1972 "%u clusters and removing the entry\n", 1973 what, loc->xl_entry->xe_name_len, nameval_buf, 1974 orig_clusters - new_clusters); 1975 ocfs2_xa_remove_entry(loc); 1976 } else if (!orig_clusters) { 1977 mlog(ML_ERROR, 1978 "Unable to allocate an external value for xattr " 1979 "%.*s safely. Leaking %u clusters and removing the " 1980 "entry\n", 1981 loc->xl_entry->xe_name_len, nameval_buf, 1982 new_clusters - orig_clusters); 1983 ocfs2_xa_remove_entry(loc); 1984 } else if (new_clusters > orig_clusters) 1985 mlog(ML_ERROR, 1986 "Unable to grow xattr %.*s safely. %u new clusters " 1987 "have been added, but the value will not be " 1988 "modified\n", 1989 loc->xl_entry->xe_name_len, nameval_buf, 1990 new_clusters - orig_clusters); 1991 } 1992 1993 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 1994 struct ocfs2_xattr_set_ctxt *ctxt) 1995 { 1996 int rc = 0; 1997 unsigned int orig_clusters; 1998 1999 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2000 orig_clusters = ocfs2_xa_value_clusters(loc); 2001 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2002 if (rc) { 2003 mlog_errno(rc); 2004 /* 2005 * Since this is remove, we can return 0 if 2006 * ocfs2_xa_cleanup_value_truncate() is going to 2007 * wipe the entry anyway. So we check the 2008 * cluster count as well. 2009 */ 2010 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2011 rc = 0; 2012 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2013 orig_clusters); 2014 if (rc) 2015 goto out; 2016 } 2017 } 2018 2019 ocfs2_xa_remove_entry(loc); 2020 2021 out: 2022 return rc; 2023 } 2024 2025 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2026 { 2027 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2028 char *nameval_buf; 2029 2030 nameval_buf = ocfs2_xa_offset_pointer(loc, 2031 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2032 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2033 } 2034 2035 /* 2036 * Take an existing entry and make it ready for the new value. This 2037 * won't allocate space, but it may free space. It should be ready for 2038 * ocfs2_xa_prepare_entry() to finish the work. 2039 */ 2040 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2041 struct ocfs2_xattr_info *xi, 2042 struct ocfs2_xattr_set_ctxt *ctxt) 2043 { 2044 int rc = 0; 2045 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2046 unsigned int orig_clusters; 2047 char *nameval_buf; 2048 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2049 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2050 2051 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2052 name_size); 2053 2054 nameval_buf = ocfs2_xa_offset_pointer(loc, 2055 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2056 if (xe_local) { 2057 memset(nameval_buf + name_size, 0, 2058 namevalue_size_xe(loc->xl_entry) - name_size); 2059 if (!xi_local) 2060 ocfs2_xa_install_value_root(loc); 2061 } else { 2062 orig_clusters = ocfs2_xa_value_clusters(loc); 2063 if (xi_local) { 2064 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2065 if (rc < 0) 2066 mlog_errno(rc); 2067 else 2068 memset(nameval_buf + name_size, 0, 2069 namevalue_size_xe(loc->xl_entry) - 2070 name_size); 2071 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2072 xi->xi_value_len) { 2073 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2074 ctxt); 2075 if (rc < 0) 2076 mlog_errno(rc); 2077 } 2078 2079 if (rc) { 2080 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2081 orig_clusters); 2082 goto out; 2083 } 2084 } 2085 2086 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2087 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2088 2089 out: 2090 return rc; 2091 } 2092 2093 /* 2094 * Prepares loc->xl_entry to receive the new xattr. This includes 2095 * properly setting up the name+value pair region. If loc->xl_entry 2096 * already exists, it will take care of modifying it appropriately. 2097 * 2098 * Note that this modifies the data. You did journal_access already, 2099 * right? 2100 */ 2101 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2102 struct ocfs2_xattr_info *xi, 2103 u32 name_hash, 2104 struct ocfs2_xattr_set_ctxt *ctxt) 2105 { 2106 int rc = 0; 2107 unsigned int orig_clusters; 2108 __le64 orig_value_size = 0; 2109 2110 rc = ocfs2_xa_check_space(loc, xi); 2111 if (rc) 2112 goto out; 2113 2114 if (loc->xl_entry) { 2115 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2116 orig_value_size = loc->xl_entry->xe_value_size; 2117 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2118 if (rc) 2119 goto out; 2120 goto alloc_value; 2121 } 2122 2123 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2124 orig_clusters = ocfs2_xa_value_clusters(loc); 2125 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2126 if (rc) { 2127 mlog_errno(rc); 2128 ocfs2_xa_cleanup_value_truncate(loc, 2129 "overwriting", 2130 orig_clusters); 2131 goto out; 2132 } 2133 } 2134 ocfs2_xa_wipe_namevalue(loc); 2135 } else 2136 ocfs2_xa_add_entry(loc, name_hash); 2137 2138 /* 2139 * If we get here, we have a blank entry. Fill it. We grow our 2140 * name+value pair back from the end. 2141 */ 2142 ocfs2_xa_add_namevalue(loc, xi); 2143 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2144 ocfs2_xa_install_value_root(loc); 2145 2146 alloc_value: 2147 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2148 orig_clusters = ocfs2_xa_value_clusters(loc); 2149 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2150 if (rc < 0) { 2151 ctxt->set_abort = 1; 2152 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2153 orig_clusters); 2154 /* 2155 * If we were growing an existing value, 2156 * ocfs2_xa_cleanup_value_truncate() won't remove 2157 * the entry. We need to restore the original value 2158 * size. 2159 */ 2160 if (loc->xl_entry) { 2161 BUG_ON(!orig_value_size); 2162 loc->xl_entry->xe_value_size = orig_value_size; 2163 } 2164 mlog_errno(rc); 2165 } 2166 } 2167 2168 out: 2169 return rc; 2170 } 2171 2172 /* 2173 * Store the value portion of the name+value pair. This will skip 2174 * values that are stored externally. Their tree roots were set up 2175 * by ocfs2_xa_prepare_entry(). 2176 */ 2177 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2178 struct ocfs2_xattr_info *xi, 2179 struct ocfs2_xattr_set_ctxt *ctxt) 2180 { 2181 int rc = 0; 2182 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2183 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2184 char *nameval_buf; 2185 struct ocfs2_xattr_value_buf vb; 2186 2187 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2188 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2189 ocfs2_xa_fill_value_buf(loc, &vb); 2190 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2191 ctxt->handle, &vb, 2192 xi->xi_value, 2193 xi->xi_value_len); 2194 } else 2195 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2196 2197 return rc; 2198 } 2199 2200 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2201 struct ocfs2_xattr_info *xi, 2202 struct ocfs2_xattr_set_ctxt *ctxt) 2203 { 2204 int ret; 2205 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2206 xi->xi_name_len); 2207 2208 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2209 OCFS2_JOURNAL_ACCESS_WRITE); 2210 if (ret) { 2211 mlog_errno(ret); 2212 goto out; 2213 } 2214 2215 /* 2216 * From here on out, everything is going to modify the buffer a 2217 * little. Errors are going to leave the xattr header in a 2218 * sane state. Thus, even with errors we dirty the sucker. 2219 */ 2220 2221 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2222 if (!xi->xi_value) { 2223 ret = ocfs2_xa_remove(loc, ctxt); 2224 goto out_dirty; 2225 } 2226 2227 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2228 if (ret) { 2229 if (ret != -ENOSPC) 2230 mlog_errno(ret); 2231 goto out_dirty; 2232 } 2233 2234 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2235 if (ret) 2236 mlog_errno(ret); 2237 2238 out_dirty: 2239 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2240 2241 out: 2242 return ret; 2243 } 2244 2245 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2246 struct inode *inode, 2247 struct buffer_head *bh, 2248 struct ocfs2_xattr_entry *entry) 2249 { 2250 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2251 2252 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2253 2254 loc->xl_inode = inode; 2255 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2256 loc->xl_storage = bh; 2257 loc->xl_entry = entry; 2258 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2259 loc->xl_header = 2260 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2261 loc->xl_size); 2262 } 2263 2264 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2265 struct inode *inode, 2266 struct buffer_head *bh, 2267 struct ocfs2_xattr_entry *entry) 2268 { 2269 struct ocfs2_xattr_block *xb = 2270 (struct ocfs2_xattr_block *)bh->b_data; 2271 2272 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2273 2274 loc->xl_inode = inode; 2275 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2276 loc->xl_storage = bh; 2277 loc->xl_header = &(xb->xb_attrs.xb_header); 2278 loc->xl_entry = entry; 2279 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2280 xb_attrs.xb_header); 2281 } 2282 2283 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2284 struct ocfs2_xattr_bucket *bucket, 2285 struct ocfs2_xattr_entry *entry) 2286 { 2287 loc->xl_inode = bucket->bu_inode; 2288 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2289 loc->xl_storage = bucket; 2290 loc->xl_header = bucket_xh(bucket); 2291 loc->xl_entry = entry; 2292 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2293 } 2294 2295 /* 2296 * In xattr remove, if it is stored outside and refcounted, we may have 2297 * the chance to split the refcount tree. So need the allocators. 2298 */ 2299 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2300 struct ocfs2_xattr_value_root *xv, 2301 struct ocfs2_caching_info *ref_ci, 2302 struct buffer_head *ref_root_bh, 2303 struct ocfs2_alloc_context **meta_ac, 2304 int *ref_credits) 2305 { 2306 int ret, meta_add = 0; 2307 u32 p_cluster, num_clusters; 2308 unsigned int ext_flags; 2309 2310 *ref_credits = 0; 2311 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2312 &num_clusters, 2313 &xv->xr_list, 2314 &ext_flags); 2315 if (ret) { 2316 mlog_errno(ret); 2317 goto out; 2318 } 2319 2320 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2321 goto out; 2322 2323 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2324 ref_root_bh, xv, 2325 &meta_add, ref_credits); 2326 if (ret) { 2327 mlog_errno(ret); 2328 goto out; 2329 } 2330 2331 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2332 meta_add, meta_ac); 2333 if (ret) 2334 mlog_errno(ret); 2335 2336 out: 2337 return ret; 2338 } 2339 2340 static int ocfs2_remove_value_outside(struct inode*inode, 2341 struct ocfs2_xattr_value_buf *vb, 2342 struct ocfs2_xattr_header *header, 2343 struct ocfs2_caching_info *ref_ci, 2344 struct buffer_head *ref_root_bh) 2345 { 2346 int ret = 0, i, ref_credits; 2347 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2348 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2349 void *val; 2350 2351 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2352 2353 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2354 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2355 2356 if (ocfs2_xattr_is_local(entry)) 2357 continue; 2358 2359 val = (void *)header + 2360 le16_to_cpu(entry->xe_name_offset); 2361 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2362 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2363 2364 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2365 ref_ci, ref_root_bh, 2366 &ctxt.meta_ac, 2367 &ref_credits); 2368 2369 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2370 ocfs2_remove_extent_credits(osb->sb)); 2371 if (IS_ERR(ctxt.handle)) { 2372 ret = PTR_ERR(ctxt.handle); 2373 mlog_errno(ret); 2374 break; 2375 } 2376 2377 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2378 2379 ocfs2_commit_trans(osb, ctxt.handle); 2380 if (ctxt.meta_ac) { 2381 ocfs2_free_alloc_context(ctxt.meta_ac); 2382 ctxt.meta_ac = NULL; 2383 } 2384 2385 if (ret < 0) { 2386 mlog_errno(ret); 2387 break; 2388 } 2389 2390 } 2391 2392 if (ctxt.meta_ac) 2393 ocfs2_free_alloc_context(ctxt.meta_ac); 2394 ocfs2_schedule_truncate_log_flush(osb, 1); 2395 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2396 return ret; 2397 } 2398 2399 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2400 struct buffer_head *di_bh, 2401 struct ocfs2_caching_info *ref_ci, 2402 struct buffer_head *ref_root_bh) 2403 { 2404 2405 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2406 struct ocfs2_xattr_header *header; 2407 int ret; 2408 struct ocfs2_xattr_value_buf vb = { 2409 .vb_bh = di_bh, 2410 .vb_access = ocfs2_journal_access_di, 2411 }; 2412 2413 header = (struct ocfs2_xattr_header *) 2414 ((void *)di + inode->i_sb->s_blocksize - 2415 le16_to_cpu(di->i_xattr_inline_size)); 2416 2417 ret = ocfs2_remove_value_outside(inode, &vb, header, 2418 ref_ci, ref_root_bh); 2419 2420 return ret; 2421 } 2422 2423 struct ocfs2_rm_xattr_bucket_para { 2424 struct ocfs2_caching_info *ref_ci; 2425 struct buffer_head *ref_root_bh; 2426 }; 2427 2428 static int ocfs2_xattr_block_remove(struct inode *inode, 2429 struct buffer_head *blk_bh, 2430 struct ocfs2_caching_info *ref_ci, 2431 struct buffer_head *ref_root_bh) 2432 { 2433 struct ocfs2_xattr_block *xb; 2434 int ret = 0; 2435 struct ocfs2_xattr_value_buf vb = { 2436 .vb_bh = blk_bh, 2437 .vb_access = ocfs2_journal_access_xb, 2438 }; 2439 struct ocfs2_rm_xattr_bucket_para args = { 2440 .ref_ci = ref_ci, 2441 .ref_root_bh = ref_root_bh, 2442 }; 2443 2444 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2445 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2446 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2447 ret = ocfs2_remove_value_outside(inode, &vb, header, 2448 ref_ci, ref_root_bh); 2449 } else 2450 ret = ocfs2_iterate_xattr_index_block(inode, 2451 blk_bh, 2452 ocfs2_rm_xattr_cluster, 2453 &args); 2454 2455 return ret; 2456 } 2457 2458 static int ocfs2_xattr_free_block(struct inode *inode, 2459 u64 block, 2460 struct ocfs2_caching_info *ref_ci, 2461 struct buffer_head *ref_root_bh) 2462 { 2463 struct inode *xb_alloc_inode; 2464 struct buffer_head *xb_alloc_bh = NULL; 2465 struct buffer_head *blk_bh = NULL; 2466 struct ocfs2_xattr_block *xb; 2467 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2468 handle_t *handle; 2469 int ret = 0; 2470 u64 blk, bg_blkno; 2471 u16 bit; 2472 2473 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2474 if (ret < 0) { 2475 mlog_errno(ret); 2476 goto out; 2477 } 2478 2479 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2480 if (ret < 0) { 2481 mlog_errno(ret); 2482 goto out; 2483 } 2484 2485 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2486 blk = le64_to_cpu(xb->xb_blkno); 2487 bit = le16_to_cpu(xb->xb_suballoc_bit); 2488 if (xb->xb_suballoc_loc) 2489 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2490 else 2491 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2492 2493 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2494 EXTENT_ALLOC_SYSTEM_INODE, 2495 le16_to_cpu(xb->xb_suballoc_slot)); 2496 if (!xb_alloc_inode) { 2497 ret = -ENOMEM; 2498 mlog_errno(ret); 2499 goto out; 2500 } 2501 mutex_lock(&xb_alloc_inode->i_mutex); 2502 2503 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2504 if (ret < 0) { 2505 mlog_errno(ret); 2506 goto out_mutex; 2507 } 2508 2509 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2510 if (IS_ERR(handle)) { 2511 ret = PTR_ERR(handle); 2512 mlog_errno(ret); 2513 goto out_unlock; 2514 } 2515 2516 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2517 bit, bg_blkno, 1); 2518 if (ret < 0) 2519 mlog_errno(ret); 2520 2521 ocfs2_commit_trans(osb, handle); 2522 out_unlock: 2523 ocfs2_inode_unlock(xb_alloc_inode, 1); 2524 brelse(xb_alloc_bh); 2525 out_mutex: 2526 mutex_unlock(&xb_alloc_inode->i_mutex); 2527 iput(xb_alloc_inode); 2528 out: 2529 brelse(blk_bh); 2530 return ret; 2531 } 2532 2533 /* 2534 * ocfs2_xattr_remove() 2535 * 2536 * Free extended attribute resources associated with this inode. 2537 */ 2538 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2539 { 2540 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2541 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2542 struct ocfs2_refcount_tree *ref_tree = NULL; 2543 struct buffer_head *ref_root_bh = NULL; 2544 struct ocfs2_caching_info *ref_ci = NULL; 2545 handle_t *handle; 2546 int ret; 2547 2548 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2549 return 0; 2550 2551 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2552 return 0; 2553 2554 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { 2555 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2556 le64_to_cpu(di->i_refcount_loc), 2557 1, &ref_tree, &ref_root_bh); 2558 if (ret) { 2559 mlog_errno(ret); 2560 goto out; 2561 } 2562 ref_ci = &ref_tree->rf_ci; 2563 2564 } 2565 2566 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2567 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2568 ref_ci, ref_root_bh); 2569 if (ret < 0) { 2570 mlog_errno(ret); 2571 goto out; 2572 } 2573 } 2574 2575 if (di->i_xattr_loc) { 2576 ret = ocfs2_xattr_free_block(inode, 2577 le64_to_cpu(di->i_xattr_loc), 2578 ref_ci, ref_root_bh); 2579 if (ret < 0) { 2580 mlog_errno(ret); 2581 goto out; 2582 } 2583 } 2584 2585 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2586 OCFS2_INODE_UPDATE_CREDITS); 2587 if (IS_ERR(handle)) { 2588 ret = PTR_ERR(handle); 2589 mlog_errno(ret); 2590 goto out; 2591 } 2592 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2593 OCFS2_JOURNAL_ACCESS_WRITE); 2594 if (ret) { 2595 mlog_errno(ret); 2596 goto out_commit; 2597 } 2598 2599 di->i_xattr_loc = 0; 2600 2601 spin_lock(&oi->ip_lock); 2602 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2603 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2604 spin_unlock(&oi->ip_lock); 2605 2606 ocfs2_journal_dirty(handle, di_bh); 2607 out_commit: 2608 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2609 out: 2610 if (ref_tree) 2611 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2612 brelse(ref_root_bh); 2613 return ret; 2614 } 2615 2616 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2617 struct ocfs2_dinode *di) 2618 { 2619 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2620 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2621 int free; 2622 2623 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2624 return 0; 2625 2626 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2627 struct ocfs2_inline_data *idata = &di->id2.i_data; 2628 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2629 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2630 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2631 le64_to_cpu(di->i_size); 2632 } else { 2633 struct ocfs2_extent_list *el = &di->id2.i_list; 2634 free = (le16_to_cpu(el->l_count) - 2635 le16_to_cpu(el->l_next_free_rec)) * 2636 sizeof(struct ocfs2_extent_rec); 2637 } 2638 if (free >= xattrsize) 2639 return 1; 2640 2641 return 0; 2642 } 2643 2644 /* 2645 * ocfs2_xattr_ibody_find() 2646 * 2647 * Find extended attribute in inode block and 2648 * fill search info into struct ocfs2_xattr_search. 2649 */ 2650 static int ocfs2_xattr_ibody_find(struct inode *inode, 2651 int name_index, 2652 const char *name, 2653 struct ocfs2_xattr_search *xs) 2654 { 2655 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2656 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2657 int ret; 2658 int has_space = 0; 2659 2660 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2661 return 0; 2662 2663 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2664 down_read(&oi->ip_alloc_sem); 2665 has_space = ocfs2_xattr_has_space_inline(inode, di); 2666 up_read(&oi->ip_alloc_sem); 2667 if (!has_space) 2668 return 0; 2669 } 2670 2671 xs->xattr_bh = xs->inode_bh; 2672 xs->end = (void *)di + inode->i_sb->s_blocksize; 2673 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2674 xs->header = (struct ocfs2_xattr_header *) 2675 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2676 else 2677 xs->header = (struct ocfs2_xattr_header *) 2678 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2679 xs->base = (void *)xs->header; 2680 xs->here = xs->header->xh_entries; 2681 2682 /* Find the named attribute. */ 2683 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2684 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2685 if (ret && ret != -ENODATA) 2686 return ret; 2687 xs->not_found = ret; 2688 } 2689 2690 return 0; 2691 } 2692 2693 static int ocfs2_xattr_ibody_init(struct inode *inode, 2694 struct buffer_head *di_bh, 2695 struct ocfs2_xattr_set_ctxt *ctxt) 2696 { 2697 int ret; 2698 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2699 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2700 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2701 unsigned int xattrsize = osb->s_xattr_inline_size; 2702 2703 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2704 ret = -ENOSPC; 2705 goto out; 2706 } 2707 2708 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2709 OCFS2_JOURNAL_ACCESS_WRITE); 2710 if (ret) { 2711 mlog_errno(ret); 2712 goto out; 2713 } 2714 2715 /* 2716 * Adjust extent record count or inline data size 2717 * to reserve space for extended attribute. 2718 */ 2719 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2720 struct ocfs2_inline_data *idata = &di->id2.i_data; 2721 le16_add_cpu(&idata->id_count, -xattrsize); 2722 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2723 struct ocfs2_extent_list *el = &di->id2.i_list; 2724 le16_add_cpu(&el->l_count, -(xattrsize / 2725 sizeof(struct ocfs2_extent_rec))); 2726 } 2727 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2728 2729 spin_lock(&oi->ip_lock); 2730 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2731 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2732 spin_unlock(&oi->ip_lock); 2733 2734 ocfs2_journal_dirty(ctxt->handle, di_bh); 2735 2736 out: 2737 return ret; 2738 } 2739 2740 /* 2741 * ocfs2_xattr_ibody_set() 2742 * 2743 * Set, replace or remove an extended attribute into inode block. 2744 * 2745 */ 2746 static int ocfs2_xattr_ibody_set(struct inode *inode, 2747 struct ocfs2_xattr_info *xi, 2748 struct ocfs2_xattr_search *xs, 2749 struct ocfs2_xattr_set_ctxt *ctxt) 2750 { 2751 int ret; 2752 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2753 struct ocfs2_xa_loc loc; 2754 2755 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2756 return -ENOSPC; 2757 2758 down_write(&oi->ip_alloc_sem); 2759 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2760 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2761 if (ret) { 2762 if (ret != -ENOSPC) 2763 mlog_errno(ret); 2764 goto out; 2765 } 2766 } 2767 2768 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2769 xs->not_found ? NULL : xs->here); 2770 ret = ocfs2_xa_set(&loc, xi, ctxt); 2771 if (ret) { 2772 if (ret != -ENOSPC) 2773 mlog_errno(ret); 2774 goto out; 2775 } 2776 xs->here = loc.xl_entry; 2777 2778 out: 2779 up_write(&oi->ip_alloc_sem); 2780 2781 return ret; 2782 } 2783 2784 /* 2785 * ocfs2_xattr_block_find() 2786 * 2787 * Find extended attribute in external block and 2788 * fill search info into struct ocfs2_xattr_search. 2789 */ 2790 static int ocfs2_xattr_block_find(struct inode *inode, 2791 int name_index, 2792 const char *name, 2793 struct ocfs2_xattr_search *xs) 2794 { 2795 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2796 struct buffer_head *blk_bh = NULL; 2797 struct ocfs2_xattr_block *xb; 2798 int ret = 0; 2799 2800 if (!di->i_xattr_loc) 2801 return ret; 2802 2803 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2804 &blk_bh); 2805 if (ret < 0) { 2806 mlog_errno(ret); 2807 return ret; 2808 } 2809 2810 xs->xattr_bh = blk_bh; 2811 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2812 2813 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2814 xs->header = &xb->xb_attrs.xb_header; 2815 xs->base = (void *)xs->header; 2816 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2817 xs->here = xs->header->xh_entries; 2818 2819 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2820 } else 2821 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2822 name_index, 2823 name, xs); 2824 2825 if (ret && ret != -ENODATA) { 2826 xs->xattr_bh = NULL; 2827 goto cleanup; 2828 } 2829 xs->not_found = ret; 2830 return 0; 2831 cleanup: 2832 brelse(blk_bh); 2833 2834 return ret; 2835 } 2836 2837 static int ocfs2_create_xattr_block(struct inode *inode, 2838 struct buffer_head *inode_bh, 2839 struct ocfs2_xattr_set_ctxt *ctxt, 2840 int indexed, 2841 struct buffer_head **ret_bh) 2842 { 2843 int ret; 2844 u16 suballoc_bit_start; 2845 u32 num_got; 2846 u64 suballoc_loc, first_blkno; 2847 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2848 struct buffer_head *new_bh = NULL; 2849 struct ocfs2_xattr_block *xblk; 2850 2851 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2852 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2853 if (ret < 0) { 2854 mlog_errno(ret); 2855 goto end; 2856 } 2857 2858 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2859 &suballoc_loc, &suballoc_bit_start, 2860 &num_got, &first_blkno); 2861 if (ret < 0) { 2862 mlog_errno(ret); 2863 goto end; 2864 } 2865 2866 new_bh = sb_getblk(inode->i_sb, first_blkno); 2867 if (!new_bh) { 2868 ret = -ENOMEM; 2869 mlog_errno(ret); 2870 goto end; 2871 } 2872 2873 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2874 2875 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2876 new_bh, 2877 OCFS2_JOURNAL_ACCESS_CREATE); 2878 if (ret < 0) { 2879 mlog_errno(ret); 2880 goto end; 2881 } 2882 2883 /* Initialize ocfs2_xattr_block */ 2884 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2885 memset(xblk, 0, inode->i_sb->s_blocksize); 2886 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2887 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2888 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2889 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2890 xblk->xb_fs_generation = 2891 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2892 xblk->xb_blkno = cpu_to_le64(first_blkno); 2893 if (indexed) { 2894 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2895 xr->xt_clusters = cpu_to_le32(1); 2896 xr->xt_last_eb_blk = 0; 2897 xr->xt_list.l_tree_depth = 0; 2898 xr->xt_list.l_count = cpu_to_le16( 2899 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2900 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2901 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2902 } 2903 ocfs2_journal_dirty(ctxt->handle, new_bh); 2904 2905 /* Add it to the inode */ 2906 di->i_xattr_loc = cpu_to_le64(first_blkno); 2907 2908 spin_lock(&OCFS2_I(inode)->ip_lock); 2909 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2910 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2911 spin_unlock(&OCFS2_I(inode)->ip_lock); 2912 2913 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2914 2915 *ret_bh = new_bh; 2916 new_bh = NULL; 2917 2918 end: 2919 brelse(new_bh); 2920 return ret; 2921 } 2922 2923 /* 2924 * ocfs2_xattr_block_set() 2925 * 2926 * Set, replace or remove an extended attribute into external block. 2927 * 2928 */ 2929 static int ocfs2_xattr_block_set(struct inode *inode, 2930 struct ocfs2_xattr_info *xi, 2931 struct ocfs2_xattr_search *xs, 2932 struct ocfs2_xattr_set_ctxt *ctxt) 2933 { 2934 struct buffer_head *new_bh = NULL; 2935 struct ocfs2_xattr_block *xblk = NULL; 2936 int ret; 2937 struct ocfs2_xa_loc loc; 2938 2939 if (!xs->xattr_bh) { 2940 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2941 0, &new_bh); 2942 if (ret) { 2943 mlog_errno(ret); 2944 goto end; 2945 } 2946 2947 xs->xattr_bh = new_bh; 2948 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2949 xs->header = &xblk->xb_attrs.xb_header; 2950 xs->base = (void *)xs->header; 2951 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2952 xs->here = xs->header->xh_entries; 2953 } else 2954 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2955 2956 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2957 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2958 xs->not_found ? NULL : xs->here); 2959 2960 ret = ocfs2_xa_set(&loc, xi, ctxt); 2961 if (!ret) 2962 xs->here = loc.xl_entry; 2963 else if ((ret != -ENOSPC) || ctxt->set_abort) 2964 goto end; 2965 else { 2966 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2967 if (ret) 2968 goto end; 2969 } 2970 } 2971 2972 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2973 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2974 2975 end: 2976 return ret; 2977 } 2978 2979 /* Check whether the new xattr can be inserted into the inode. */ 2980 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 2981 struct ocfs2_xattr_info *xi, 2982 struct ocfs2_xattr_search *xs) 2983 { 2984 struct ocfs2_xattr_entry *last; 2985 int free, i; 2986 size_t min_offs = xs->end - xs->base; 2987 2988 if (!xs->header) 2989 return 0; 2990 2991 last = xs->header->xh_entries; 2992 2993 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 2994 size_t offs = le16_to_cpu(last->xe_name_offset); 2995 if (offs < min_offs) 2996 min_offs = offs; 2997 last += 1; 2998 } 2999 3000 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3001 if (free < 0) 3002 return 0; 3003 3004 BUG_ON(!xs->not_found); 3005 3006 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3007 return 1; 3008 3009 return 0; 3010 } 3011 3012 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3013 struct ocfs2_dinode *di, 3014 struct ocfs2_xattr_info *xi, 3015 struct ocfs2_xattr_search *xis, 3016 struct ocfs2_xattr_search *xbs, 3017 int *clusters_need, 3018 int *meta_need, 3019 int *credits_need) 3020 { 3021 int ret = 0, old_in_xb = 0; 3022 int clusters_add = 0, meta_add = 0, credits = 0; 3023 struct buffer_head *bh = NULL; 3024 struct ocfs2_xattr_block *xb = NULL; 3025 struct ocfs2_xattr_entry *xe = NULL; 3026 struct ocfs2_xattr_value_root *xv = NULL; 3027 char *base = NULL; 3028 int name_offset, name_len = 0; 3029 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3030 xi->xi_value_len); 3031 u64 value_size; 3032 3033 /* 3034 * Calculate the clusters we need to write. 3035 * No matter whether we replace an old one or add a new one, 3036 * we need this for writing. 3037 */ 3038 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3039 credits += new_clusters * 3040 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3041 3042 if (xis->not_found && xbs->not_found) { 3043 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3044 3045 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3046 clusters_add += new_clusters; 3047 credits += ocfs2_calc_extend_credits(inode->i_sb, 3048 &def_xv.xv.xr_list); 3049 } 3050 3051 goto meta_guess; 3052 } 3053 3054 if (!xis->not_found) { 3055 xe = xis->here; 3056 name_offset = le16_to_cpu(xe->xe_name_offset); 3057 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3058 base = xis->base; 3059 credits += OCFS2_INODE_UPDATE_CREDITS; 3060 } else { 3061 int i, block_off = 0; 3062 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3063 xe = xbs->here; 3064 name_offset = le16_to_cpu(xe->xe_name_offset); 3065 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3066 i = xbs->here - xbs->header->xh_entries; 3067 old_in_xb = 1; 3068 3069 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3070 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3071 bucket_xh(xbs->bucket), 3072 i, &block_off, 3073 &name_offset); 3074 base = bucket_block(xbs->bucket, block_off); 3075 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3076 } else { 3077 base = xbs->base; 3078 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3079 } 3080 } 3081 3082 /* 3083 * delete a xattr doesn't need metadata and cluster allocation. 3084 * so just calculate the credits and return. 3085 * 3086 * The credits for removing the value tree will be extended 3087 * by ocfs2_remove_extent itself. 3088 */ 3089 if (!xi->xi_value) { 3090 if (!ocfs2_xattr_is_local(xe)) 3091 credits += ocfs2_remove_extent_credits(inode->i_sb); 3092 3093 goto out; 3094 } 3095 3096 /* do cluster allocation guess first. */ 3097 value_size = le64_to_cpu(xe->xe_value_size); 3098 3099 if (old_in_xb) { 3100 /* 3101 * In xattr set, we always try to set the xe in inode first, 3102 * so if it can be inserted into inode successfully, the old 3103 * one will be removed from the xattr block, and this xattr 3104 * will be inserted into inode as a new xattr in inode. 3105 */ 3106 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3107 clusters_add += new_clusters; 3108 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3109 OCFS2_INODE_UPDATE_CREDITS; 3110 if (!ocfs2_xattr_is_local(xe)) 3111 credits += ocfs2_calc_extend_credits( 3112 inode->i_sb, 3113 &def_xv.xv.xr_list); 3114 goto out; 3115 } 3116 } 3117 3118 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3119 /* the new values will be stored outside. */ 3120 u32 old_clusters = 0; 3121 3122 if (!ocfs2_xattr_is_local(xe)) { 3123 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3124 value_size); 3125 xv = (struct ocfs2_xattr_value_root *) 3126 (base + name_offset + name_len); 3127 value_size = OCFS2_XATTR_ROOT_SIZE; 3128 } else 3129 xv = &def_xv.xv; 3130 3131 if (old_clusters >= new_clusters) { 3132 credits += ocfs2_remove_extent_credits(inode->i_sb); 3133 goto out; 3134 } else { 3135 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3136 clusters_add += new_clusters - old_clusters; 3137 credits += ocfs2_calc_extend_credits(inode->i_sb, 3138 &xv->xr_list); 3139 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3140 goto out; 3141 } 3142 } else { 3143 /* 3144 * Now the new value will be stored inside. So if the new 3145 * value is smaller than the size of value root or the old 3146 * value, we don't need any allocation, otherwise we have 3147 * to guess metadata allocation. 3148 */ 3149 if ((ocfs2_xattr_is_local(xe) && 3150 (value_size >= xi->xi_value_len)) || 3151 (!ocfs2_xattr_is_local(xe) && 3152 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3153 goto out; 3154 } 3155 3156 meta_guess: 3157 /* calculate metadata allocation. */ 3158 if (di->i_xattr_loc) { 3159 if (!xbs->xattr_bh) { 3160 ret = ocfs2_read_xattr_block(inode, 3161 le64_to_cpu(di->i_xattr_loc), 3162 &bh); 3163 if (ret) { 3164 mlog_errno(ret); 3165 goto out; 3166 } 3167 3168 xb = (struct ocfs2_xattr_block *)bh->b_data; 3169 } else 3170 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3171 3172 /* 3173 * If there is already an xattr tree, good, we can calculate 3174 * like other b-trees. Otherwise we may have the chance of 3175 * create a tree, the credit calculation is borrowed from 3176 * ocfs2_calc_extend_credits with root_el = NULL. And the 3177 * new tree will be cluster based, so no meta is needed. 3178 */ 3179 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3180 struct ocfs2_extent_list *el = 3181 &xb->xb_attrs.xb_root.xt_list; 3182 meta_add += ocfs2_extend_meta_needed(el); 3183 credits += ocfs2_calc_extend_credits(inode->i_sb, 3184 el); 3185 } else 3186 credits += OCFS2_SUBALLOC_ALLOC + 1; 3187 3188 /* 3189 * This cluster will be used either for new bucket or for 3190 * new xattr block. 3191 * If the cluster size is the same as the bucket size, one 3192 * more is needed since we may need to extend the bucket 3193 * also. 3194 */ 3195 clusters_add += 1; 3196 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3197 if (OCFS2_XATTR_BUCKET_SIZE == 3198 OCFS2_SB(inode->i_sb)->s_clustersize) { 3199 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3200 clusters_add += 1; 3201 } 3202 } else { 3203 meta_add += 1; 3204 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3205 } 3206 out: 3207 if (clusters_need) 3208 *clusters_need = clusters_add; 3209 if (meta_need) 3210 *meta_need = meta_add; 3211 if (credits_need) 3212 *credits_need = credits; 3213 brelse(bh); 3214 return ret; 3215 } 3216 3217 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3218 struct ocfs2_dinode *di, 3219 struct ocfs2_xattr_info *xi, 3220 struct ocfs2_xattr_search *xis, 3221 struct ocfs2_xattr_search *xbs, 3222 struct ocfs2_xattr_set_ctxt *ctxt, 3223 int extra_meta, 3224 int *credits) 3225 { 3226 int clusters_add, meta_add, ret; 3227 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3228 3229 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3230 3231 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3232 3233 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3234 &clusters_add, &meta_add, credits); 3235 if (ret) { 3236 mlog_errno(ret); 3237 return ret; 3238 } 3239 3240 meta_add += extra_meta; 3241 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3242 clusters_add, *credits); 3243 3244 if (meta_add) { 3245 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3246 &ctxt->meta_ac); 3247 if (ret) { 3248 mlog_errno(ret); 3249 goto out; 3250 } 3251 } 3252 3253 if (clusters_add) { 3254 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3255 if (ret) 3256 mlog_errno(ret); 3257 } 3258 out: 3259 if (ret) { 3260 if (ctxt->meta_ac) { 3261 ocfs2_free_alloc_context(ctxt->meta_ac); 3262 ctxt->meta_ac = NULL; 3263 } 3264 3265 /* 3266 * We cannot have an error and a non null ctxt->data_ac. 3267 */ 3268 } 3269 3270 return ret; 3271 } 3272 3273 static int __ocfs2_xattr_set_handle(struct inode *inode, 3274 struct ocfs2_dinode *di, 3275 struct ocfs2_xattr_info *xi, 3276 struct ocfs2_xattr_search *xis, 3277 struct ocfs2_xattr_search *xbs, 3278 struct ocfs2_xattr_set_ctxt *ctxt) 3279 { 3280 int ret = 0, credits, old_found; 3281 3282 if (!xi->xi_value) { 3283 /* Remove existing extended attribute */ 3284 if (!xis->not_found) 3285 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3286 else if (!xbs->not_found) 3287 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3288 } else { 3289 /* We always try to set extended attribute into inode first*/ 3290 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3291 if (!ret && !xbs->not_found) { 3292 /* 3293 * If succeed and that extended attribute existing in 3294 * external block, then we will remove it. 3295 */ 3296 xi->xi_value = NULL; 3297 xi->xi_value_len = 0; 3298 3299 old_found = xis->not_found; 3300 xis->not_found = -ENODATA; 3301 ret = ocfs2_calc_xattr_set_need(inode, 3302 di, 3303 xi, 3304 xis, 3305 xbs, 3306 NULL, 3307 NULL, 3308 &credits); 3309 xis->not_found = old_found; 3310 if (ret) { 3311 mlog_errno(ret); 3312 goto out; 3313 } 3314 3315 ret = ocfs2_extend_trans(ctxt->handle, credits); 3316 if (ret) { 3317 mlog_errno(ret); 3318 goto out; 3319 } 3320 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3321 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3322 if (di->i_xattr_loc && !xbs->xattr_bh) { 3323 ret = ocfs2_xattr_block_find(inode, 3324 xi->xi_name_index, 3325 xi->xi_name, xbs); 3326 if (ret) 3327 goto out; 3328 3329 old_found = xis->not_found; 3330 xis->not_found = -ENODATA; 3331 ret = ocfs2_calc_xattr_set_need(inode, 3332 di, 3333 xi, 3334 xis, 3335 xbs, 3336 NULL, 3337 NULL, 3338 &credits); 3339 xis->not_found = old_found; 3340 if (ret) { 3341 mlog_errno(ret); 3342 goto out; 3343 } 3344 3345 ret = ocfs2_extend_trans(ctxt->handle, credits); 3346 if (ret) { 3347 mlog_errno(ret); 3348 goto out; 3349 } 3350 } 3351 /* 3352 * If no space in inode, we will set extended attribute 3353 * into external block. 3354 */ 3355 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3356 if (ret) 3357 goto out; 3358 if (!xis->not_found) { 3359 /* 3360 * If succeed and that extended attribute 3361 * existing in inode, we will remove it. 3362 */ 3363 xi->xi_value = NULL; 3364 xi->xi_value_len = 0; 3365 xbs->not_found = -ENODATA; 3366 ret = ocfs2_calc_xattr_set_need(inode, 3367 di, 3368 xi, 3369 xis, 3370 xbs, 3371 NULL, 3372 NULL, 3373 &credits); 3374 if (ret) { 3375 mlog_errno(ret); 3376 goto out; 3377 } 3378 3379 ret = ocfs2_extend_trans(ctxt->handle, credits); 3380 if (ret) { 3381 mlog_errno(ret); 3382 goto out; 3383 } 3384 ret = ocfs2_xattr_ibody_set(inode, xi, 3385 xis, ctxt); 3386 } 3387 } 3388 } 3389 3390 if (!ret) { 3391 /* Update inode ctime. */ 3392 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3393 xis->inode_bh, 3394 OCFS2_JOURNAL_ACCESS_WRITE); 3395 if (ret) { 3396 mlog_errno(ret); 3397 goto out; 3398 } 3399 3400 inode->i_ctime = CURRENT_TIME; 3401 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3402 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3403 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3404 } 3405 out: 3406 return ret; 3407 } 3408 3409 /* 3410 * This function only called duing creating inode 3411 * for init security/acl xattrs of the new inode. 3412 * All transanction credits have been reserved in mknod. 3413 */ 3414 int ocfs2_xattr_set_handle(handle_t *handle, 3415 struct inode *inode, 3416 struct buffer_head *di_bh, 3417 int name_index, 3418 const char *name, 3419 const void *value, 3420 size_t value_len, 3421 int flags, 3422 struct ocfs2_alloc_context *meta_ac, 3423 struct ocfs2_alloc_context *data_ac) 3424 { 3425 struct ocfs2_dinode *di; 3426 int ret; 3427 3428 struct ocfs2_xattr_info xi = { 3429 .xi_name_index = name_index, 3430 .xi_name = name, 3431 .xi_name_len = strlen(name), 3432 .xi_value = value, 3433 .xi_value_len = value_len, 3434 }; 3435 3436 struct ocfs2_xattr_search xis = { 3437 .not_found = -ENODATA, 3438 }; 3439 3440 struct ocfs2_xattr_search xbs = { 3441 .not_found = -ENODATA, 3442 }; 3443 3444 struct ocfs2_xattr_set_ctxt ctxt = { 3445 .handle = handle, 3446 .meta_ac = meta_ac, 3447 .data_ac = data_ac, 3448 }; 3449 3450 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3451 return -EOPNOTSUPP; 3452 3453 /* 3454 * In extreme situation, may need xattr bucket when 3455 * block size is too small. And we have already reserved 3456 * the credits for bucket in mknod. 3457 */ 3458 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3459 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3460 if (!xbs.bucket) { 3461 mlog_errno(-ENOMEM); 3462 return -ENOMEM; 3463 } 3464 } 3465 3466 xis.inode_bh = xbs.inode_bh = di_bh; 3467 di = (struct ocfs2_dinode *)di_bh->b_data; 3468 3469 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3470 3471 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3472 if (ret) 3473 goto cleanup; 3474 if (xis.not_found) { 3475 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3476 if (ret) 3477 goto cleanup; 3478 } 3479 3480 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3481 3482 cleanup: 3483 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3484 brelse(xbs.xattr_bh); 3485 ocfs2_xattr_bucket_free(xbs.bucket); 3486 3487 return ret; 3488 } 3489 3490 /* 3491 * ocfs2_xattr_set() 3492 * 3493 * Set, replace or remove an extended attribute for this inode. 3494 * value is NULL to remove an existing extended attribute, else either 3495 * create or replace an extended attribute. 3496 */ 3497 int ocfs2_xattr_set(struct inode *inode, 3498 int name_index, 3499 const char *name, 3500 const void *value, 3501 size_t value_len, 3502 int flags) 3503 { 3504 struct buffer_head *di_bh = NULL; 3505 struct ocfs2_dinode *di; 3506 int ret, credits, ref_meta = 0, ref_credits = 0; 3507 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3508 struct inode *tl_inode = osb->osb_tl_inode; 3509 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3510 struct ocfs2_refcount_tree *ref_tree = NULL; 3511 3512 struct ocfs2_xattr_info xi = { 3513 .xi_name_index = name_index, 3514 .xi_name = name, 3515 .xi_name_len = strlen(name), 3516 .xi_value = value, 3517 .xi_value_len = value_len, 3518 }; 3519 3520 struct ocfs2_xattr_search xis = { 3521 .not_found = -ENODATA, 3522 }; 3523 3524 struct ocfs2_xattr_search xbs = { 3525 .not_found = -ENODATA, 3526 }; 3527 3528 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3529 return -EOPNOTSUPP; 3530 3531 /* 3532 * Only xbs will be used on indexed trees. xis doesn't need a 3533 * bucket. 3534 */ 3535 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3536 if (!xbs.bucket) { 3537 mlog_errno(-ENOMEM); 3538 return -ENOMEM; 3539 } 3540 3541 ret = ocfs2_inode_lock(inode, &di_bh, 1); 3542 if (ret < 0) { 3543 mlog_errno(ret); 3544 goto cleanup_nolock; 3545 } 3546 xis.inode_bh = xbs.inode_bh = di_bh; 3547 di = (struct ocfs2_dinode *)di_bh->b_data; 3548 3549 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3550 /* 3551 * Scan inode and external block to find the same name 3552 * extended attribute and collect search information. 3553 */ 3554 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3555 if (ret) 3556 goto cleanup; 3557 if (xis.not_found) { 3558 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3559 if (ret) 3560 goto cleanup; 3561 } 3562 3563 if (xis.not_found && xbs.not_found) { 3564 ret = -ENODATA; 3565 if (flags & XATTR_REPLACE) 3566 goto cleanup; 3567 ret = 0; 3568 if (!value) 3569 goto cleanup; 3570 } else { 3571 ret = -EEXIST; 3572 if (flags & XATTR_CREATE) 3573 goto cleanup; 3574 } 3575 3576 /* Check whether the value is refcounted and do some preparation. */ 3577 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3578 (!xis.not_found || !xbs.not_found)) { 3579 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3580 &xis, &xbs, &ref_tree, 3581 &ref_meta, &ref_credits); 3582 if (ret) { 3583 mlog_errno(ret); 3584 goto cleanup; 3585 } 3586 } 3587 3588 mutex_lock(&tl_inode->i_mutex); 3589 3590 if (ocfs2_truncate_log_needs_flush(osb)) { 3591 ret = __ocfs2_flush_truncate_log(osb); 3592 if (ret < 0) { 3593 mutex_unlock(&tl_inode->i_mutex); 3594 mlog_errno(ret); 3595 goto cleanup; 3596 } 3597 } 3598 mutex_unlock(&tl_inode->i_mutex); 3599 3600 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3601 &xbs, &ctxt, ref_meta, &credits); 3602 if (ret) { 3603 mlog_errno(ret); 3604 goto cleanup; 3605 } 3606 3607 /* we need to update inode's ctime field, so add credit for it. */ 3608 credits += OCFS2_INODE_UPDATE_CREDITS; 3609 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3610 if (IS_ERR(ctxt.handle)) { 3611 ret = PTR_ERR(ctxt.handle); 3612 mlog_errno(ret); 3613 goto out_free_ac; 3614 } 3615 3616 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3617 3618 ocfs2_commit_trans(osb, ctxt.handle); 3619 3620 out_free_ac: 3621 if (ctxt.data_ac) 3622 ocfs2_free_alloc_context(ctxt.data_ac); 3623 if (ctxt.meta_ac) 3624 ocfs2_free_alloc_context(ctxt.meta_ac); 3625 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3626 ocfs2_schedule_truncate_log_flush(osb, 1); 3627 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3628 3629 cleanup: 3630 if (ref_tree) 3631 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3632 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3633 if (!value && !ret) { 3634 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3635 if (ret) 3636 mlog_errno(ret); 3637 } 3638 ocfs2_inode_unlock(inode, 1); 3639 cleanup_nolock: 3640 brelse(di_bh); 3641 brelse(xbs.xattr_bh); 3642 ocfs2_xattr_bucket_free(xbs.bucket); 3643 3644 return ret; 3645 } 3646 3647 /* 3648 * Find the xattr extent rec which may contains name_hash. 3649 * e_cpos will be the first name hash of the xattr rec. 3650 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3651 */ 3652 static int ocfs2_xattr_get_rec(struct inode *inode, 3653 u32 name_hash, 3654 u64 *p_blkno, 3655 u32 *e_cpos, 3656 u32 *num_clusters, 3657 struct ocfs2_extent_list *el) 3658 { 3659 int ret = 0, i; 3660 struct buffer_head *eb_bh = NULL; 3661 struct ocfs2_extent_block *eb; 3662 struct ocfs2_extent_rec *rec = NULL; 3663 u64 e_blkno = 0; 3664 3665 if (el->l_tree_depth) { 3666 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3667 &eb_bh); 3668 if (ret) { 3669 mlog_errno(ret); 3670 goto out; 3671 } 3672 3673 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3674 el = &eb->h_list; 3675 3676 if (el->l_tree_depth) { 3677 ocfs2_error(inode->i_sb, 3678 "Inode %lu has non zero tree depth in " 3679 "xattr tree block %llu\n", inode->i_ino, 3680 (unsigned long long)eb_bh->b_blocknr); 3681 ret = -EROFS; 3682 goto out; 3683 } 3684 } 3685 3686 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3687 rec = &el->l_recs[i]; 3688 3689 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3690 e_blkno = le64_to_cpu(rec->e_blkno); 3691 break; 3692 } 3693 } 3694 3695 if (!e_blkno) { 3696 ocfs2_error(inode->i_sb, "Inode %lu has bad extent " 3697 "record (%u, %u, 0) in xattr", inode->i_ino, 3698 le32_to_cpu(rec->e_cpos), 3699 ocfs2_rec_clusters(el, rec)); 3700 ret = -EROFS; 3701 goto out; 3702 } 3703 3704 *p_blkno = le64_to_cpu(rec->e_blkno); 3705 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3706 if (e_cpos) 3707 *e_cpos = le32_to_cpu(rec->e_cpos); 3708 out: 3709 brelse(eb_bh); 3710 return ret; 3711 } 3712 3713 typedef int (xattr_bucket_func)(struct inode *inode, 3714 struct ocfs2_xattr_bucket *bucket, 3715 void *para); 3716 3717 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3718 struct ocfs2_xattr_bucket *bucket, 3719 int name_index, 3720 const char *name, 3721 u32 name_hash, 3722 u16 *xe_index, 3723 int *found) 3724 { 3725 int i, ret = 0, cmp = 1, block_off, new_offset; 3726 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3727 size_t name_len = strlen(name); 3728 struct ocfs2_xattr_entry *xe = NULL; 3729 char *xe_name; 3730 3731 /* 3732 * We don't use binary search in the bucket because there 3733 * may be multiple entries with the same name hash. 3734 */ 3735 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3736 xe = &xh->xh_entries[i]; 3737 3738 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3739 continue; 3740 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3741 break; 3742 3743 cmp = name_index - ocfs2_xattr_get_type(xe); 3744 if (!cmp) 3745 cmp = name_len - xe->xe_name_len; 3746 if (cmp) 3747 continue; 3748 3749 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3750 xh, 3751 i, 3752 &block_off, 3753 &new_offset); 3754 if (ret) { 3755 mlog_errno(ret); 3756 break; 3757 } 3758 3759 3760 xe_name = bucket_block(bucket, block_off) + new_offset; 3761 if (!memcmp(name, xe_name, name_len)) { 3762 *xe_index = i; 3763 *found = 1; 3764 ret = 0; 3765 break; 3766 } 3767 } 3768 3769 return ret; 3770 } 3771 3772 /* 3773 * Find the specified xattr entry in a series of buckets. 3774 * This series start from p_blkno and last for num_clusters. 3775 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3776 * the num of the valid buckets. 3777 * 3778 * Return the buffer_head this xattr should reside in. And if the xattr's 3779 * hash is in the gap of 2 buckets, return the lower bucket. 3780 */ 3781 static int ocfs2_xattr_bucket_find(struct inode *inode, 3782 int name_index, 3783 const char *name, 3784 u32 name_hash, 3785 u64 p_blkno, 3786 u32 first_hash, 3787 u32 num_clusters, 3788 struct ocfs2_xattr_search *xs) 3789 { 3790 int ret, found = 0; 3791 struct ocfs2_xattr_header *xh = NULL; 3792 struct ocfs2_xattr_entry *xe = NULL; 3793 u16 index = 0; 3794 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3795 int low_bucket = 0, bucket, high_bucket; 3796 struct ocfs2_xattr_bucket *search; 3797 u32 last_hash; 3798 u64 blkno, lower_blkno = 0; 3799 3800 search = ocfs2_xattr_bucket_new(inode); 3801 if (!search) { 3802 ret = -ENOMEM; 3803 mlog_errno(ret); 3804 goto out; 3805 } 3806 3807 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3808 if (ret) { 3809 mlog_errno(ret); 3810 goto out; 3811 } 3812 3813 xh = bucket_xh(search); 3814 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3815 while (low_bucket <= high_bucket) { 3816 ocfs2_xattr_bucket_relse(search); 3817 3818 bucket = (low_bucket + high_bucket) / 2; 3819 blkno = p_blkno + bucket * blk_per_bucket; 3820 ret = ocfs2_read_xattr_bucket(search, blkno); 3821 if (ret) { 3822 mlog_errno(ret); 3823 goto out; 3824 } 3825 3826 xh = bucket_xh(search); 3827 xe = &xh->xh_entries[0]; 3828 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3829 high_bucket = bucket - 1; 3830 continue; 3831 } 3832 3833 /* 3834 * Check whether the hash of the last entry in our 3835 * bucket is larger than the search one. for an empty 3836 * bucket, the last one is also the first one. 3837 */ 3838 if (xh->xh_count) 3839 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3840 3841 last_hash = le32_to_cpu(xe->xe_name_hash); 3842 3843 /* record lower_blkno which may be the insert place. */ 3844 lower_blkno = blkno; 3845 3846 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3847 low_bucket = bucket + 1; 3848 continue; 3849 } 3850 3851 /* the searched xattr should reside in this bucket if exists. */ 3852 ret = ocfs2_find_xe_in_bucket(inode, search, 3853 name_index, name, name_hash, 3854 &index, &found); 3855 if (ret) { 3856 mlog_errno(ret); 3857 goto out; 3858 } 3859 break; 3860 } 3861 3862 /* 3863 * Record the bucket we have found. 3864 * When the xattr's hash value is in the gap of 2 buckets, we will 3865 * always set it to the previous bucket. 3866 */ 3867 if (!lower_blkno) 3868 lower_blkno = p_blkno; 3869 3870 /* This should be in cache - we just read it during the search */ 3871 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3872 if (ret) { 3873 mlog_errno(ret); 3874 goto out; 3875 } 3876 3877 xs->header = bucket_xh(xs->bucket); 3878 xs->base = bucket_block(xs->bucket, 0); 3879 xs->end = xs->base + inode->i_sb->s_blocksize; 3880 3881 if (found) { 3882 xs->here = &xs->header->xh_entries[index]; 3883 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3884 name, name_index, name_hash, 3885 (unsigned long long)bucket_blkno(xs->bucket), 3886 index); 3887 } else 3888 ret = -ENODATA; 3889 3890 out: 3891 ocfs2_xattr_bucket_free(search); 3892 return ret; 3893 } 3894 3895 static int ocfs2_xattr_index_block_find(struct inode *inode, 3896 struct buffer_head *root_bh, 3897 int name_index, 3898 const char *name, 3899 struct ocfs2_xattr_search *xs) 3900 { 3901 int ret; 3902 struct ocfs2_xattr_block *xb = 3903 (struct ocfs2_xattr_block *)root_bh->b_data; 3904 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3905 struct ocfs2_extent_list *el = &xb_root->xt_list; 3906 u64 p_blkno = 0; 3907 u32 first_hash, num_clusters = 0; 3908 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3909 3910 if (le16_to_cpu(el->l_next_free_rec) == 0) 3911 return -ENODATA; 3912 3913 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3914 name, name_index, name_hash, 3915 (unsigned long long)root_bh->b_blocknr, 3916 -1); 3917 3918 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3919 &num_clusters, el); 3920 if (ret) { 3921 mlog_errno(ret); 3922 goto out; 3923 } 3924 3925 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3926 3927 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3928 name, name_index, first_hash, 3929 (unsigned long long)p_blkno, 3930 num_clusters); 3931 3932 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3933 p_blkno, first_hash, num_clusters, xs); 3934 3935 out: 3936 return ret; 3937 } 3938 3939 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3940 u64 blkno, 3941 u32 clusters, 3942 xattr_bucket_func *func, 3943 void *para) 3944 { 3945 int i, ret = 0; 3946 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3947 u32 num_buckets = clusters * bpc; 3948 struct ocfs2_xattr_bucket *bucket; 3949 3950 bucket = ocfs2_xattr_bucket_new(inode); 3951 if (!bucket) { 3952 mlog_errno(-ENOMEM); 3953 return -ENOMEM; 3954 } 3955 3956 trace_ocfs2_iterate_xattr_buckets( 3957 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3958 (unsigned long long)blkno, clusters); 3959 3960 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3961 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3962 if (ret) { 3963 mlog_errno(ret); 3964 break; 3965 } 3966 3967 /* 3968 * The real bucket num in this series of blocks is stored 3969 * in the 1st bucket. 3970 */ 3971 if (i == 0) 3972 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3973 3974 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 3975 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3976 if (func) { 3977 ret = func(inode, bucket, para); 3978 if (ret && ret != -ERANGE) 3979 mlog_errno(ret); 3980 /* Fall through to bucket_relse() */ 3981 } 3982 3983 ocfs2_xattr_bucket_relse(bucket); 3984 if (ret) 3985 break; 3986 } 3987 3988 ocfs2_xattr_bucket_free(bucket); 3989 return ret; 3990 } 3991 3992 struct ocfs2_xattr_tree_list { 3993 char *buffer; 3994 size_t buffer_size; 3995 size_t result; 3996 }; 3997 3998 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 3999 struct ocfs2_xattr_header *xh, 4000 int index, 4001 int *block_off, 4002 int *new_offset) 4003 { 4004 u16 name_offset; 4005 4006 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4007 return -EINVAL; 4008 4009 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4010 4011 *block_off = name_offset >> sb->s_blocksize_bits; 4012 *new_offset = name_offset % sb->s_blocksize; 4013 4014 return 0; 4015 } 4016 4017 static int ocfs2_list_xattr_bucket(struct inode *inode, 4018 struct ocfs2_xattr_bucket *bucket, 4019 void *para) 4020 { 4021 int ret = 0, type; 4022 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4023 int i, block_off, new_offset; 4024 const char *prefix, *name; 4025 4026 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4027 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4028 type = ocfs2_xattr_get_type(entry); 4029 prefix = ocfs2_xattr_prefix(type); 4030 4031 if (prefix) { 4032 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4033 bucket_xh(bucket), 4034 i, 4035 &block_off, 4036 &new_offset); 4037 if (ret) 4038 break; 4039 4040 name = (const char *)bucket_block(bucket, block_off) + 4041 new_offset; 4042 ret = ocfs2_xattr_list_entry(xl->buffer, 4043 xl->buffer_size, 4044 &xl->result, 4045 prefix, name, 4046 entry->xe_name_len); 4047 if (ret) 4048 break; 4049 } 4050 } 4051 4052 return ret; 4053 } 4054 4055 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4056 struct buffer_head *blk_bh, 4057 xattr_tree_rec_func *rec_func, 4058 void *para) 4059 { 4060 struct ocfs2_xattr_block *xb = 4061 (struct ocfs2_xattr_block *)blk_bh->b_data; 4062 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4063 int ret = 0; 4064 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4065 u64 p_blkno = 0; 4066 4067 if (!el->l_next_free_rec || !rec_func) 4068 return 0; 4069 4070 while (name_hash > 0) { 4071 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4072 &e_cpos, &num_clusters, el); 4073 if (ret) { 4074 mlog_errno(ret); 4075 break; 4076 } 4077 4078 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4079 num_clusters, para); 4080 if (ret) { 4081 if (ret != -ERANGE) 4082 mlog_errno(ret); 4083 break; 4084 } 4085 4086 if (e_cpos == 0) 4087 break; 4088 4089 name_hash = e_cpos - 1; 4090 } 4091 4092 return ret; 4093 4094 } 4095 4096 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4097 struct buffer_head *root_bh, 4098 u64 blkno, u32 cpos, u32 len, void *para) 4099 { 4100 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4101 ocfs2_list_xattr_bucket, para); 4102 } 4103 4104 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4105 struct buffer_head *blk_bh, 4106 char *buffer, 4107 size_t buffer_size) 4108 { 4109 int ret; 4110 struct ocfs2_xattr_tree_list xl = { 4111 .buffer = buffer, 4112 .buffer_size = buffer_size, 4113 .result = 0, 4114 }; 4115 4116 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4117 ocfs2_list_xattr_tree_rec, &xl); 4118 if (ret) { 4119 mlog_errno(ret); 4120 goto out; 4121 } 4122 4123 ret = xl.result; 4124 out: 4125 return ret; 4126 } 4127 4128 static int cmp_xe(const void *a, const void *b) 4129 { 4130 const struct ocfs2_xattr_entry *l = a, *r = b; 4131 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4132 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4133 4134 if (l_hash > r_hash) 4135 return 1; 4136 if (l_hash < r_hash) 4137 return -1; 4138 return 0; 4139 } 4140 4141 static void swap_xe(void *a, void *b, int size) 4142 { 4143 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4144 4145 tmp = *l; 4146 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4147 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4148 } 4149 4150 /* 4151 * When the ocfs2_xattr_block is filled up, new bucket will be created 4152 * and all the xattr entries will be moved to the new bucket. 4153 * The header goes at the start of the bucket, and the names+values are 4154 * filled from the end. This is why *target starts as the last buffer. 4155 * Note: we need to sort the entries since they are not saved in order 4156 * in the ocfs2_xattr_block. 4157 */ 4158 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4159 struct buffer_head *xb_bh, 4160 struct ocfs2_xattr_bucket *bucket) 4161 { 4162 int i, blocksize = inode->i_sb->s_blocksize; 4163 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4164 u16 offset, size, off_change; 4165 struct ocfs2_xattr_entry *xe; 4166 struct ocfs2_xattr_block *xb = 4167 (struct ocfs2_xattr_block *)xb_bh->b_data; 4168 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4169 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4170 u16 count = le16_to_cpu(xb_xh->xh_count); 4171 char *src = xb_bh->b_data; 4172 char *target = bucket_block(bucket, blks - 1); 4173 4174 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4175 (unsigned long long)xb_bh->b_blocknr, 4176 (unsigned long long)bucket_blkno(bucket)); 4177 4178 for (i = 0; i < blks; i++) 4179 memset(bucket_block(bucket, i), 0, blocksize); 4180 4181 /* 4182 * Since the xe_name_offset is based on ocfs2_xattr_header, 4183 * there is a offset change corresponding to the change of 4184 * ocfs2_xattr_header's position. 4185 */ 4186 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4187 xe = &xb_xh->xh_entries[count - 1]; 4188 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4189 size = blocksize - offset; 4190 4191 /* copy all the names and values. */ 4192 memcpy(target + offset, src + offset, size); 4193 4194 /* Init new header now. */ 4195 xh->xh_count = xb_xh->xh_count; 4196 xh->xh_num_buckets = cpu_to_le16(1); 4197 xh->xh_name_value_len = cpu_to_le16(size); 4198 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4199 4200 /* copy all the entries. */ 4201 target = bucket_block(bucket, 0); 4202 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4203 size = count * sizeof(struct ocfs2_xattr_entry); 4204 memcpy(target + offset, (char *)xb_xh + offset, size); 4205 4206 /* Change the xe offset for all the xe because of the move. */ 4207 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4208 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4209 for (i = 0; i < count; i++) 4210 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4211 4212 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4213 4214 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4215 cmp_xe, swap_xe); 4216 } 4217 4218 /* 4219 * After we move xattr from block to index btree, we have to 4220 * update ocfs2_xattr_search to the new xe and base. 4221 * 4222 * When the entry is in xattr block, xattr_bh indicates the storage place. 4223 * While if the entry is in index b-tree, "bucket" indicates the 4224 * real place of the xattr. 4225 */ 4226 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4227 struct ocfs2_xattr_search *xs, 4228 struct buffer_head *old_bh) 4229 { 4230 char *buf = old_bh->b_data; 4231 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4232 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4233 int i; 4234 4235 xs->header = bucket_xh(xs->bucket); 4236 xs->base = bucket_block(xs->bucket, 0); 4237 xs->end = xs->base + inode->i_sb->s_blocksize; 4238 4239 if (xs->not_found) 4240 return; 4241 4242 i = xs->here - old_xh->xh_entries; 4243 xs->here = &xs->header->xh_entries[i]; 4244 } 4245 4246 static int ocfs2_xattr_create_index_block(struct inode *inode, 4247 struct ocfs2_xattr_search *xs, 4248 struct ocfs2_xattr_set_ctxt *ctxt) 4249 { 4250 int ret; 4251 u32 bit_off, len; 4252 u64 blkno; 4253 handle_t *handle = ctxt->handle; 4254 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4255 struct buffer_head *xb_bh = xs->xattr_bh; 4256 struct ocfs2_xattr_block *xb = 4257 (struct ocfs2_xattr_block *)xb_bh->b_data; 4258 struct ocfs2_xattr_tree_root *xr; 4259 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4260 4261 trace_ocfs2_xattr_create_index_block_begin( 4262 (unsigned long long)xb_bh->b_blocknr); 4263 4264 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4265 BUG_ON(!xs->bucket); 4266 4267 /* 4268 * XXX: 4269 * We can use this lock for now, and maybe move to a dedicated mutex 4270 * if performance becomes a problem later. 4271 */ 4272 down_write(&oi->ip_alloc_sem); 4273 4274 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4275 OCFS2_JOURNAL_ACCESS_WRITE); 4276 if (ret) { 4277 mlog_errno(ret); 4278 goto out; 4279 } 4280 4281 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4282 1, 1, &bit_off, &len); 4283 if (ret) { 4284 mlog_errno(ret); 4285 goto out; 4286 } 4287 4288 /* 4289 * The bucket may spread in many blocks, and 4290 * we will only touch the 1st block and the last block 4291 * in the whole bucket(one for entry and one for data). 4292 */ 4293 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4294 4295 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4296 4297 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); 4298 if (ret) { 4299 mlog_errno(ret); 4300 goto out; 4301 } 4302 4303 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4304 OCFS2_JOURNAL_ACCESS_CREATE); 4305 if (ret) { 4306 mlog_errno(ret); 4307 goto out; 4308 } 4309 4310 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4311 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4312 4313 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4314 4315 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4316 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4317 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4318 4319 xr = &xb->xb_attrs.xb_root; 4320 xr->xt_clusters = cpu_to_le32(1); 4321 xr->xt_last_eb_blk = 0; 4322 xr->xt_list.l_tree_depth = 0; 4323 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4324 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4325 4326 xr->xt_list.l_recs[0].e_cpos = 0; 4327 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4328 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4329 4330 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4331 4332 ocfs2_journal_dirty(handle, xb_bh); 4333 4334 out: 4335 up_write(&oi->ip_alloc_sem); 4336 4337 return ret; 4338 } 4339 4340 static int cmp_xe_offset(const void *a, const void *b) 4341 { 4342 const struct ocfs2_xattr_entry *l = a, *r = b; 4343 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4344 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4345 4346 if (l_name_offset < r_name_offset) 4347 return 1; 4348 if (l_name_offset > r_name_offset) 4349 return -1; 4350 return 0; 4351 } 4352 4353 /* 4354 * defrag a xattr bucket if we find that the bucket has some 4355 * holes beteen name/value pairs. 4356 * We will move all the name/value pairs to the end of the bucket 4357 * so that we can spare some space for insertion. 4358 */ 4359 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4360 handle_t *handle, 4361 struct ocfs2_xattr_bucket *bucket) 4362 { 4363 int ret, i; 4364 size_t end, offset, len; 4365 struct ocfs2_xattr_header *xh; 4366 char *entries, *buf, *bucket_buf = NULL; 4367 u64 blkno = bucket_blkno(bucket); 4368 u16 xh_free_start; 4369 size_t blocksize = inode->i_sb->s_blocksize; 4370 struct ocfs2_xattr_entry *xe; 4371 4372 /* 4373 * In order to make the operation more efficient and generic, 4374 * we copy all the blocks into a contiguous memory and do the 4375 * defragment there, so if anything is error, we will not touch 4376 * the real block. 4377 */ 4378 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4379 if (!bucket_buf) { 4380 ret = -EIO; 4381 goto out; 4382 } 4383 4384 buf = bucket_buf; 4385 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4386 memcpy(buf, bucket_block(bucket, i), blocksize); 4387 4388 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4389 OCFS2_JOURNAL_ACCESS_WRITE); 4390 if (ret < 0) { 4391 mlog_errno(ret); 4392 goto out; 4393 } 4394 4395 xh = (struct ocfs2_xattr_header *)bucket_buf; 4396 entries = (char *)xh->xh_entries; 4397 xh_free_start = le16_to_cpu(xh->xh_free_start); 4398 4399 trace_ocfs2_defrag_xattr_bucket( 4400 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4401 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4402 4403 /* 4404 * sort all the entries by their offset. 4405 * the largest will be the first, so that we can 4406 * move them to the end one by one. 4407 */ 4408 sort(entries, le16_to_cpu(xh->xh_count), 4409 sizeof(struct ocfs2_xattr_entry), 4410 cmp_xe_offset, swap_xe); 4411 4412 /* Move all name/values to the end of the bucket. */ 4413 xe = xh->xh_entries; 4414 end = OCFS2_XATTR_BUCKET_SIZE; 4415 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4416 offset = le16_to_cpu(xe->xe_name_offset); 4417 len = namevalue_size_xe(xe); 4418 4419 /* 4420 * We must make sure that the name/value pair 4421 * exist in the same block. So adjust end to 4422 * the previous block end if needed. 4423 */ 4424 if (((end - len) / blocksize != 4425 (end - 1) / blocksize)) 4426 end = end - end % blocksize; 4427 4428 if (end > offset + len) { 4429 memmove(bucket_buf + end - len, 4430 bucket_buf + offset, len); 4431 xe->xe_name_offset = cpu_to_le16(end - len); 4432 } 4433 4434 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4435 "bucket %llu\n", (unsigned long long)blkno); 4436 4437 end -= len; 4438 } 4439 4440 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4441 "bucket %llu\n", (unsigned long long)blkno); 4442 4443 if (xh_free_start == end) 4444 goto out; 4445 4446 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4447 xh->xh_free_start = cpu_to_le16(end); 4448 4449 /* sort the entries by their name_hash. */ 4450 sort(entries, le16_to_cpu(xh->xh_count), 4451 sizeof(struct ocfs2_xattr_entry), 4452 cmp_xe, swap_xe); 4453 4454 buf = bucket_buf; 4455 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4456 memcpy(bucket_block(bucket, i), buf, blocksize); 4457 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4458 4459 out: 4460 kfree(bucket_buf); 4461 return ret; 4462 } 4463 4464 /* 4465 * prev_blkno points to the start of an existing extent. new_blkno 4466 * points to a newly allocated extent. Because we know each of our 4467 * clusters contains more than bucket, we can easily split one cluster 4468 * at a bucket boundary. So we take the last cluster of the existing 4469 * extent and split it down the middle. We move the last half of the 4470 * buckets in the last cluster of the existing extent over to the new 4471 * extent. 4472 * 4473 * first_bh is the buffer at prev_blkno so we can update the existing 4474 * extent's bucket count. header_bh is the bucket were we were hoping 4475 * to insert our xattr. If the bucket move places the target in the new 4476 * extent, we'll update first_bh and header_bh after modifying the old 4477 * extent. 4478 * 4479 * first_hash will be set as the 1st xe's name_hash in the new extent. 4480 */ 4481 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4482 handle_t *handle, 4483 struct ocfs2_xattr_bucket *first, 4484 struct ocfs2_xattr_bucket *target, 4485 u64 new_blkno, 4486 u32 num_clusters, 4487 u32 *first_hash) 4488 { 4489 int ret; 4490 struct super_block *sb = inode->i_sb; 4491 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4492 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4493 int to_move = num_buckets / 2; 4494 u64 src_blkno; 4495 u64 last_cluster_blkno = bucket_blkno(first) + 4496 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4497 4498 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4499 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4500 4501 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4502 (unsigned long long)last_cluster_blkno, 4503 (unsigned long long)new_blkno); 4504 4505 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4506 last_cluster_blkno, new_blkno, 4507 to_move, first_hash); 4508 if (ret) { 4509 mlog_errno(ret); 4510 goto out; 4511 } 4512 4513 /* This is the first bucket that got moved */ 4514 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4515 4516 /* 4517 * If the target bucket was part of the moved buckets, we need to 4518 * update first and target. 4519 */ 4520 if (bucket_blkno(target) >= src_blkno) { 4521 /* Find the block for the new target bucket */ 4522 src_blkno = new_blkno + 4523 (bucket_blkno(target) - src_blkno); 4524 4525 ocfs2_xattr_bucket_relse(first); 4526 ocfs2_xattr_bucket_relse(target); 4527 4528 /* 4529 * These shouldn't fail - the buffers are in the 4530 * journal from ocfs2_cp_xattr_bucket(). 4531 */ 4532 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4533 if (ret) { 4534 mlog_errno(ret); 4535 goto out; 4536 } 4537 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4538 if (ret) 4539 mlog_errno(ret); 4540 4541 } 4542 4543 out: 4544 return ret; 4545 } 4546 4547 /* 4548 * Find the suitable pos when we divide a bucket into 2. 4549 * We have to make sure the xattrs with the same hash value exist 4550 * in the same bucket. 4551 * 4552 * If this ocfs2_xattr_header covers more than one hash value, find a 4553 * place where the hash value changes. Try to find the most even split. 4554 * The most common case is that all entries have different hash values, 4555 * and the first check we make will find a place to split. 4556 */ 4557 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4558 { 4559 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4560 int count = le16_to_cpu(xh->xh_count); 4561 int delta, middle = count / 2; 4562 4563 /* 4564 * We start at the middle. Each step gets farther away in both 4565 * directions. We therefore hit the change in hash value 4566 * nearest to the middle. Note that this loop does not execute for 4567 * count < 2. 4568 */ 4569 for (delta = 0; delta < middle; delta++) { 4570 /* Let's check delta earlier than middle */ 4571 if (cmp_xe(&entries[middle - delta - 1], 4572 &entries[middle - delta])) 4573 return middle - delta; 4574 4575 /* For even counts, don't walk off the end */ 4576 if ((middle + delta + 1) == count) 4577 continue; 4578 4579 /* Now try delta past middle */ 4580 if (cmp_xe(&entries[middle + delta], 4581 &entries[middle + delta + 1])) 4582 return middle + delta + 1; 4583 } 4584 4585 /* Every entry had the same hash */ 4586 return count; 4587 } 4588 4589 /* 4590 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4591 * first_hash will record the 1st hash of the new bucket. 4592 * 4593 * Normally half of the xattrs will be moved. But we have to make 4594 * sure that the xattrs with the same hash value are stored in the 4595 * same bucket. If all the xattrs in this bucket have the same hash 4596 * value, the new bucket will be initialized as an empty one and the 4597 * first_hash will be initialized as (hash_value+1). 4598 */ 4599 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4600 handle_t *handle, 4601 u64 blk, 4602 u64 new_blk, 4603 u32 *first_hash, 4604 int new_bucket_head) 4605 { 4606 int ret, i; 4607 int count, start, len, name_value_len = 0, name_offset = 0; 4608 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4609 struct ocfs2_xattr_header *xh; 4610 struct ocfs2_xattr_entry *xe; 4611 int blocksize = inode->i_sb->s_blocksize; 4612 4613 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4614 (unsigned long long)new_blk); 4615 4616 s_bucket = ocfs2_xattr_bucket_new(inode); 4617 t_bucket = ocfs2_xattr_bucket_new(inode); 4618 if (!s_bucket || !t_bucket) { 4619 ret = -ENOMEM; 4620 mlog_errno(ret); 4621 goto out; 4622 } 4623 4624 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4625 if (ret) { 4626 mlog_errno(ret); 4627 goto out; 4628 } 4629 4630 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4631 OCFS2_JOURNAL_ACCESS_WRITE); 4632 if (ret) { 4633 mlog_errno(ret); 4634 goto out; 4635 } 4636 4637 /* 4638 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4639 * there's no need to read it. 4640 */ 4641 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk); 4642 if (ret) { 4643 mlog_errno(ret); 4644 goto out; 4645 } 4646 4647 /* 4648 * Hey, if we're overwriting t_bucket, what difference does 4649 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4650 * same part of ocfs2_cp_xattr_bucket(). 4651 */ 4652 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4653 new_bucket_head ? 4654 OCFS2_JOURNAL_ACCESS_CREATE : 4655 OCFS2_JOURNAL_ACCESS_WRITE); 4656 if (ret) { 4657 mlog_errno(ret); 4658 goto out; 4659 } 4660 4661 xh = bucket_xh(s_bucket); 4662 count = le16_to_cpu(xh->xh_count); 4663 start = ocfs2_xattr_find_divide_pos(xh); 4664 4665 if (start == count) { 4666 xe = &xh->xh_entries[start-1]; 4667 4668 /* 4669 * initialized a new empty bucket here. 4670 * The hash value is set as one larger than 4671 * that of the last entry in the previous bucket. 4672 */ 4673 for (i = 0; i < t_bucket->bu_blocks; i++) 4674 memset(bucket_block(t_bucket, i), 0, blocksize); 4675 4676 xh = bucket_xh(t_bucket); 4677 xh->xh_free_start = cpu_to_le16(blocksize); 4678 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4679 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4680 4681 goto set_num_buckets; 4682 } 4683 4684 /* copy the whole bucket to the new first. */ 4685 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4686 4687 /* update the new bucket. */ 4688 xh = bucket_xh(t_bucket); 4689 4690 /* 4691 * Calculate the total name/value len and xh_free_start for 4692 * the old bucket first. 4693 */ 4694 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4695 name_value_len = 0; 4696 for (i = 0; i < start; i++) { 4697 xe = &xh->xh_entries[i]; 4698 name_value_len += namevalue_size_xe(xe); 4699 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4700 name_offset = le16_to_cpu(xe->xe_name_offset); 4701 } 4702 4703 /* 4704 * Now begin the modification to the new bucket. 4705 * 4706 * In the new bucket, We just move the xattr entry to the beginning 4707 * and don't touch the name/value. So there will be some holes in the 4708 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4709 * called. 4710 */ 4711 xe = &xh->xh_entries[start]; 4712 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4713 trace_ocfs2_divide_xattr_bucket_move(len, 4714 (int)((char *)xe - (char *)xh), 4715 (int)((char *)xh->xh_entries - (char *)xh)); 4716 memmove((char *)xh->xh_entries, (char *)xe, len); 4717 xe = &xh->xh_entries[count - start]; 4718 len = sizeof(struct ocfs2_xattr_entry) * start; 4719 memset((char *)xe, 0, len); 4720 4721 le16_add_cpu(&xh->xh_count, -start); 4722 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4723 4724 /* Calculate xh_free_start for the new bucket. */ 4725 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4726 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4727 xe = &xh->xh_entries[i]; 4728 if (le16_to_cpu(xe->xe_name_offset) < 4729 le16_to_cpu(xh->xh_free_start)) 4730 xh->xh_free_start = xe->xe_name_offset; 4731 } 4732 4733 set_num_buckets: 4734 /* set xh->xh_num_buckets for the new xh. */ 4735 if (new_bucket_head) 4736 xh->xh_num_buckets = cpu_to_le16(1); 4737 else 4738 xh->xh_num_buckets = 0; 4739 4740 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4741 4742 /* store the first_hash of the new bucket. */ 4743 if (first_hash) 4744 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4745 4746 /* 4747 * Now only update the 1st block of the old bucket. If we 4748 * just added a new empty bucket, there is no need to modify 4749 * it. 4750 */ 4751 if (start == count) 4752 goto out; 4753 4754 xh = bucket_xh(s_bucket); 4755 memset(&xh->xh_entries[start], 0, 4756 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4757 xh->xh_count = cpu_to_le16(start); 4758 xh->xh_free_start = cpu_to_le16(name_offset); 4759 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4760 4761 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4762 4763 out: 4764 ocfs2_xattr_bucket_free(s_bucket); 4765 ocfs2_xattr_bucket_free(t_bucket); 4766 4767 return ret; 4768 } 4769 4770 /* 4771 * Copy xattr from one bucket to another bucket. 4772 * 4773 * The caller must make sure that the journal transaction 4774 * has enough space for journaling. 4775 */ 4776 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4777 handle_t *handle, 4778 u64 s_blkno, 4779 u64 t_blkno, 4780 int t_is_new) 4781 { 4782 int ret; 4783 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4784 4785 BUG_ON(s_blkno == t_blkno); 4786 4787 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4788 (unsigned long long)t_blkno, 4789 t_is_new); 4790 4791 s_bucket = ocfs2_xattr_bucket_new(inode); 4792 t_bucket = ocfs2_xattr_bucket_new(inode); 4793 if (!s_bucket || !t_bucket) { 4794 ret = -ENOMEM; 4795 mlog_errno(ret); 4796 goto out; 4797 } 4798 4799 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4800 if (ret) 4801 goto out; 4802 4803 /* 4804 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4805 * there's no need to read it. 4806 */ 4807 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno); 4808 if (ret) 4809 goto out; 4810 4811 /* 4812 * Hey, if we're overwriting t_bucket, what difference does 4813 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4814 * cluster to fill, we came here from 4815 * ocfs2_mv_xattr_buckets(), and it is really new - 4816 * ACCESS_CREATE is required. But we also might have moved data 4817 * out of t_bucket before extending back into it. 4818 * ocfs2_add_new_xattr_bucket() can do this - its call to 4819 * ocfs2_add_new_xattr_cluster() may have created a new extent 4820 * and copied out the end of the old extent. Then it re-extends 4821 * the old extent back to create space for new xattrs. That's 4822 * how we get here, and the bucket isn't really new. 4823 */ 4824 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4825 t_is_new ? 4826 OCFS2_JOURNAL_ACCESS_CREATE : 4827 OCFS2_JOURNAL_ACCESS_WRITE); 4828 if (ret) 4829 goto out; 4830 4831 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4832 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4833 4834 out: 4835 ocfs2_xattr_bucket_free(t_bucket); 4836 ocfs2_xattr_bucket_free(s_bucket); 4837 4838 return ret; 4839 } 4840 4841 /* 4842 * src_blk points to the start of an existing extent. last_blk points to 4843 * last cluster in that extent. to_blk points to a newly allocated 4844 * extent. We copy the buckets from the cluster at last_blk to the new 4845 * extent. If start_bucket is non-zero, we skip that many buckets before 4846 * we start copying. The new extent's xh_num_buckets gets set to the 4847 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4848 * by the same amount. 4849 */ 4850 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4851 u64 src_blk, u64 last_blk, u64 to_blk, 4852 unsigned int start_bucket, 4853 u32 *first_hash) 4854 { 4855 int i, ret, credits; 4856 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4857 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4858 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4859 struct ocfs2_xattr_bucket *old_first, *new_first; 4860 4861 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4862 (unsigned long long)to_blk); 4863 4864 BUG_ON(start_bucket >= num_buckets); 4865 if (start_bucket) { 4866 num_buckets -= start_bucket; 4867 last_blk += (start_bucket * blks_per_bucket); 4868 } 4869 4870 /* The first bucket of the original extent */ 4871 old_first = ocfs2_xattr_bucket_new(inode); 4872 /* The first bucket of the new extent */ 4873 new_first = ocfs2_xattr_bucket_new(inode); 4874 if (!old_first || !new_first) { 4875 ret = -ENOMEM; 4876 mlog_errno(ret); 4877 goto out; 4878 } 4879 4880 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4881 if (ret) { 4882 mlog_errno(ret); 4883 goto out; 4884 } 4885 4886 /* 4887 * We need to update the first bucket of the old extent and all 4888 * the buckets going to the new extent. 4889 */ 4890 credits = ((num_buckets + 1) * blks_per_bucket); 4891 ret = ocfs2_extend_trans(handle, credits); 4892 if (ret) { 4893 mlog_errno(ret); 4894 goto out; 4895 } 4896 4897 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4898 OCFS2_JOURNAL_ACCESS_WRITE); 4899 if (ret) { 4900 mlog_errno(ret); 4901 goto out; 4902 } 4903 4904 for (i = 0; i < num_buckets; i++) { 4905 ret = ocfs2_cp_xattr_bucket(inode, handle, 4906 last_blk + (i * blks_per_bucket), 4907 to_blk + (i * blks_per_bucket), 4908 1); 4909 if (ret) { 4910 mlog_errno(ret); 4911 goto out; 4912 } 4913 } 4914 4915 /* 4916 * Get the new bucket ready before we dirty anything 4917 * (This actually shouldn't fail, because we already dirtied 4918 * it once in ocfs2_cp_xattr_bucket()). 4919 */ 4920 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4921 if (ret) { 4922 mlog_errno(ret); 4923 goto out; 4924 } 4925 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4926 OCFS2_JOURNAL_ACCESS_WRITE); 4927 if (ret) { 4928 mlog_errno(ret); 4929 goto out; 4930 } 4931 4932 /* Now update the headers */ 4933 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4934 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4935 4936 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4937 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4938 4939 if (first_hash) 4940 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4941 4942 out: 4943 ocfs2_xattr_bucket_free(new_first); 4944 ocfs2_xattr_bucket_free(old_first); 4945 return ret; 4946 } 4947 4948 /* 4949 * Move some xattrs in this cluster to the new cluster. 4950 * This function should only be called when bucket size == cluster size. 4951 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4952 */ 4953 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4954 handle_t *handle, 4955 u64 prev_blk, 4956 u64 new_blk, 4957 u32 *first_hash) 4958 { 4959 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4960 int ret, credits = 2 * blk_per_bucket; 4961 4962 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4963 4964 ret = ocfs2_extend_trans(handle, credits); 4965 if (ret) { 4966 mlog_errno(ret); 4967 return ret; 4968 } 4969 4970 /* Move half of the xattr in start_blk to the next bucket. */ 4971 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4972 new_blk, first_hash, 1); 4973 } 4974 4975 /* 4976 * Move some xattrs from the old cluster to the new one since they are not 4977 * contiguous in ocfs2 xattr tree. 4978 * 4979 * new_blk starts a new separate cluster, and we will move some xattrs from 4980 * prev_blk to it. v_start will be set as the first name hash value in this 4981 * new cluster so that it can be used as e_cpos during tree insertion and 4982 * don't collide with our original b-tree operations. first_bh and header_bh 4983 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 4984 * to extend the insert bucket. 4985 * 4986 * The problem is how much xattr should we move to the new one and when should 4987 * we update first_bh and header_bh? 4988 * 1. If cluster size > bucket size, that means the previous cluster has more 4989 * than 1 bucket, so just move half nums of bucket into the new cluster and 4990 * update the first_bh and header_bh if the insert bucket has been moved 4991 * to the new cluster. 4992 * 2. If cluster_size == bucket_size: 4993 * a) If the previous extent rec has more than one cluster and the insert 4994 * place isn't in the last cluster, copy the entire last cluster to the 4995 * new one. This time, we don't need to upate the first_bh and header_bh 4996 * since they will not be moved into the new cluster. 4997 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 4998 * the new one. And we set the extend flag to zero if the insert place is 4999 * moved into the new allocated cluster since no extend is needed. 5000 */ 5001 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5002 handle_t *handle, 5003 struct ocfs2_xattr_bucket *first, 5004 struct ocfs2_xattr_bucket *target, 5005 u64 new_blk, 5006 u32 prev_clusters, 5007 u32 *v_start, 5008 int *extend) 5009 { 5010 int ret; 5011 5012 trace_ocfs2_adjust_xattr_cross_cluster( 5013 (unsigned long long)bucket_blkno(first), 5014 (unsigned long long)new_blk, prev_clusters); 5015 5016 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5017 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5018 handle, 5019 first, target, 5020 new_blk, 5021 prev_clusters, 5022 v_start); 5023 if (ret) 5024 mlog_errno(ret); 5025 } else { 5026 /* The start of the last cluster in the first extent */ 5027 u64 last_blk = bucket_blkno(first) + 5028 ((prev_clusters - 1) * 5029 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5030 5031 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5032 ret = ocfs2_mv_xattr_buckets(inode, handle, 5033 bucket_blkno(first), 5034 last_blk, new_blk, 0, 5035 v_start); 5036 if (ret) 5037 mlog_errno(ret); 5038 } else { 5039 ret = ocfs2_divide_xattr_cluster(inode, handle, 5040 last_blk, new_blk, 5041 v_start); 5042 if (ret) 5043 mlog_errno(ret); 5044 5045 if ((bucket_blkno(target) == last_blk) && extend) 5046 *extend = 0; 5047 } 5048 } 5049 5050 return ret; 5051 } 5052 5053 /* 5054 * Add a new cluster for xattr storage. 5055 * 5056 * If the new cluster is contiguous with the previous one, it will be 5057 * appended to the same extent record, and num_clusters will be updated. 5058 * If not, we will insert a new extent for it and move some xattrs in 5059 * the last cluster into the new allocated one. 5060 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5061 * lose the benefits of hashing because we'll have to search large leaves. 5062 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5063 * if it's bigger). 5064 * 5065 * first_bh is the first block of the previous extent rec and header_bh 5066 * indicates the bucket we will insert the new xattrs. They will be updated 5067 * when the header_bh is moved into the new cluster. 5068 */ 5069 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5070 struct buffer_head *root_bh, 5071 struct ocfs2_xattr_bucket *first, 5072 struct ocfs2_xattr_bucket *target, 5073 u32 *num_clusters, 5074 u32 prev_cpos, 5075 int *extend, 5076 struct ocfs2_xattr_set_ctxt *ctxt) 5077 { 5078 int ret; 5079 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5080 u32 prev_clusters = *num_clusters; 5081 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5082 u64 block; 5083 handle_t *handle = ctxt->handle; 5084 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5085 struct ocfs2_extent_tree et; 5086 5087 trace_ocfs2_add_new_xattr_cluster_begin( 5088 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5089 (unsigned long long)bucket_blkno(first), 5090 prev_cpos, prev_clusters); 5091 5092 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5093 5094 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5095 OCFS2_JOURNAL_ACCESS_WRITE); 5096 if (ret < 0) { 5097 mlog_errno(ret); 5098 goto leave; 5099 } 5100 5101 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5102 clusters_to_add, &bit_off, &num_bits); 5103 if (ret < 0) { 5104 if (ret != -ENOSPC) 5105 mlog_errno(ret); 5106 goto leave; 5107 } 5108 5109 BUG_ON(num_bits > clusters_to_add); 5110 5111 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5112 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5113 5114 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5115 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5116 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5117 /* 5118 * If this cluster is contiguous with the old one and 5119 * adding this new cluster, we don't surpass the limit of 5120 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5121 * initialized and used like other buckets in the previous 5122 * cluster. 5123 * So add it as a contiguous one. The caller will handle 5124 * its init process. 5125 */ 5126 v_start = prev_cpos + prev_clusters; 5127 *num_clusters = prev_clusters + num_bits; 5128 } else { 5129 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5130 handle, 5131 first, 5132 target, 5133 block, 5134 prev_clusters, 5135 &v_start, 5136 extend); 5137 if (ret) { 5138 mlog_errno(ret); 5139 goto leave; 5140 } 5141 } 5142 5143 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5144 v_start, num_bits); 5145 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5146 num_bits, 0, ctxt->meta_ac); 5147 if (ret < 0) { 5148 mlog_errno(ret); 5149 goto leave; 5150 } 5151 5152 ocfs2_journal_dirty(handle, root_bh); 5153 5154 leave: 5155 return ret; 5156 } 5157 5158 /* 5159 * We are given an extent. 'first' is the bucket at the very front of 5160 * the extent. The extent has space for an additional bucket past 5161 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5162 * of the target bucket. We wish to shift every bucket past the target 5163 * down one, filling in that additional space. When we get back to the 5164 * target, we split the target between itself and the now-empty bucket 5165 * at target+1 (aka, target_blkno + blks_per_bucket). 5166 */ 5167 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5168 handle_t *handle, 5169 struct ocfs2_xattr_bucket *first, 5170 u64 target_blk, 5171 u32 num_clusters) 5172 { 5173 int ret, credits; 5174 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5175 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5176 u64 end_blk; 5177 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5178 5179 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5180 (unsigned long long)bucket_blkno(first), 5181 num_clusters, new_bucket); 5182 5183 /* The extent must have room for an additional bucket */ 5184 BUG_ON(new_bucket >= 5185 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5186 5187 /* end_blk points to the last existing bucket */ 5188 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5189 5190 /* 5191 * end_blk is the start of the last existing bucket. 5192 * Thus, (end_blk - target_blk) covers the target bucket and 5193 * every bucket after it up to, but not including, the last 5194 * existing bucket. Then we add the last existing bucket, the 5195 * new bucket, and the first bucket (3 * blk_per_bucket). 5196 */ 5197 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5198 ret = ocfs2_extend_trans(handle, credits); 5199 if (ret) { 5200 mlog_errno(ret); 5201 goto out; 5202 } 5203 5204 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5205 OCFS2_JOURNAL_ACCESS_WRITE); 5206 if (ret) { 5207 mlog_errno(ret); 5208 goto out; 5209 } 5210 5211 while (end_blk != target_blk) { 5212 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5213 end_blk + blk_per_bucket, 0); 5214 if (ret) 5215 goto out; 5216 end_blk -= blk_per_bucket; 5217 } 5218 5219 /* Move half of the xattr in target_blkno to the next bucket. */ 5220 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5221 target_blk + blk_per_bucket, NULL, 0); 5222 5223 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5224 ocfs2_xattr_bucket_journal_dirty(handle, first); 5225 5226 out: 5227 return ret; 5228 } 5229 5230 /* 5231 * Add new xattr bucket in an extent record and adjust the buckets 5232 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5233 * bucket we want to insert into. 5234 * 5235 * In the easy case, we will move all the buckets after target down by 5236 * one. Half of target's xattrs will be moved to the next bucket. 5237 * 5238 * If current cluster is full, we'll allocate a new one. This may not 5239 * be contiguous. The underlying calls will make sure that there is 5240 * space for the insert, shifting buckets around if necessary. 5241 * 'target' may be moved by those calls. 5242 */ 5243 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5244 struct buffer_head *xb_bh, 5245 struct ocfs2_xattr_bucket *target, 5246 struct ocfs2_xattr_set_ctxt *ctxt) 5247 { 5248 struct ocfs2_xattr_block *xb = 5249 (struct ocfs2_xattr_block *)xb_bh->b_data; 5250 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5251 struct ocfs2_extent_list *el = &xb_root->xt_list; 5252 u32 name_hash = 5253 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5254 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5255 int ret, num_buckets, extend = 1; 5256 u64 p_blkno; 5257 u32 e_cpos, num_clusters; 5258 /* The bucket at the front of the extent */ 5259 struct ocfs2_xattr_bucket *first; 5260 5261 trace_ocfs2_add_new_xattr_bucket( 5262 (unsigned long long)bucket_blkno(target)); 5263 5264 /* The first bucket of the original extent */ 5265 first = ocfs2_xattr_bucket_new(inode); 5266 if (!first) { 5267 ret = -ENOMEM; 5268 mlog_errno(ret); 5269 goto out; 5270 } 5271 5272 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5273 &num_clusters, el); 5274 if (ret) { 5275 mlog_errno(ret); 5276 goto out; 5277 } 5278 5279 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5280 if (ret) { 5281 mlog_errno(ret); 5282 goto out; 5283 } 5284 5285 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5286 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5287 /* 5288 * This can move first+target if the target bucket moves 5289 * to the new extent. 5290 */ 5291 ret = ocfs2_add_new_xattr_cluster(inode, 5292 xb_bh, 5293 first, 5294 target, 5295 &num_clusters, 5296 e_cpos, 5297 &extend, 5298 ctxt); 5299 if (ret) { 5300 mlog_errno(ret); 5301 goto out; 5302 } 5303 } 5304 5305 if (extend) { 5306 ret = ocfs2_extend_xattr_bucket(inode, 5307 ctxt->handle, 5308 first, 5309 bucket_blkno(target), 5310 num_clusters); 5311 if (ret) 5312 mlog_errno(ret); 5313 } 5314 5315 out: 5316 ocfs2_xattr_bucket_free(first); 5317 5318 return ret; 5319 } 5320 5321 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, 5322 struct ocfs2_xattr_bucket *bucket, 5323 int offs) 5324 { 5325 int block_off = offs >> inode->i_sb->s_blocksize_bits; 5326 5327 offs = offs % inode->i_sb->s_blocksize; 5328 return bucket_block(bucket, block_off) + offs; 5329 } 5330 5331 /* 5332 * Truncate the specified xe_off entry in xattr bucket. 5333 * bucket is indicated by header_bh and len is the new length. 5334 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5335 * 5336 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5337 */ 5338 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5339 struct ocfs2_xattr_bucket *bucket, 5340 int xe_off, 5341 int len, 5342 struct ocfs2_xattr_set_ctxt *ctxt) 5343 { 5344 int ret, offset; 5345 u64 value_blk; 5346 struct ocfs2_xattr_entry *xe; 5347 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5348 size_t blocksize = inode->i_sb->s_blocksize; 5349 struct ocfs2_xattr_value_buf vb = { 5350 .vb_access = ocfs2_journal_access, 5351 }; 5352 5353 xe = &xh->xh_entries[xe_off]; 5354 5355 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5356 5357 offset = le16_to_cpu(xe->xe_name_offset) + 5358 OCFS2_XATTR_SIZE(xe->xe_name_len); 5359 5360 value_blk = offset / blocksize; 5361 5362 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5363 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5364 5365 vb.vb_bh = bucket->bu_bhs[value_blk]; 5366 BUG_ON(!vb.vb_bh); 5367 5368 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5369 (vb.vb_bh->b_data + offset % blocksize); 5370 5371 /* 5372 * From here on out we have to dirty the bucket. The generic 5373 * value calls only modify one of the bucket's bhs, but we need 5374 * to send the bucket at once. So if they error, they *could* have 5375 * modified something. We have to assume they did, and dirty 5376 * the whole bucket. This leaves us in a consistent state. 5377 */ 5378 trace_ocfs2_xattr_bucket_value_truncate( 5379 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5380 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5381 if (ret) { 5382 mlog_errno(ret); 5383 goto out; 5384 } 5385 5386 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5387 OCFS2_JOURNAL_ACCESS_WRITE); 5388 if (ret) { 5389 mlog_errno(ret); 5390 goto out; 5391 } 5392 5393 xe->xe_value_size = cpu_to_le64(len); 5394 5395 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5396 5397 out: 5398 return ret; 5399 } 5400 5401 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5402 struct buffer_head *root_bh, 5403 u64 blkno, 5404 u32 cpos, 5405 u32 len, 5406 void *para) 5407 { 5408 int ret; 5409 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5410 struct inode *tl_inode = osb->osb_tl_inode; 5411 handle_t *handle; 5412 struct ocfs2_xattr_block *xb = 5413 (struct ocfs2_xattr_block *)root_bh->b_data; 5414 struct ocfs2_alloc_context *meta_ac = NULL; 5415 struct ocfs2_cached_dealloc_ctxt dealloc; 5416 struct ocfs2_extent_tree et; 5417 5418 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5419 ocfs2_delete_xattr_in_bucket, para); 5420 if (ret) { 5421 mlog_errno(ret); 5422 return ret; 5423 } 5424 5425 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5426 5427 ocfs2_init_dealloc_ctxt(&dealloc); 5428 5429 trace_ocfs2_rm_xattr_cluster( 5430 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5431 (unsigned long long)blkno, cpos, len); 5432 5433 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5434 len); 5435 5436 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5437 if (ret) { 5438 mlog_errno(ret); 5439 return ret; 5440 } 5441 5442 mutex_lock(&tl_inode->i_mutex); 5443 5444 if (ocfs2_truncate_log_needs_flush(osb)) { 5445 ret = __ocfs2_flush_truncate_log(osb); 5446 if (ret < 0) { 5447 mlog_errno(ret); 5448 goto out; 5449 } 5450 } 5451 5452 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5453 if (IS_ERR(handle)) { 5454 ret = -ENOMEM; 5455 mlog_errno(ret); 5456 goto out; 5457 } 5458 5459 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5460 OCFS2_JOURNAL_ACCESS_WRITE); 5461 if (ret) { 5462 mlog_errno(ret); 5463 goto out_commit; 5464 } 5465 5466 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5467 &dealloc); 5468 if (ret) { 5469 mlog_errno(ret); 5470 goto out_commit; 5471 } 5472 5473 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5474 ocfs2_journal_dirty(handle, root_bh); 5475 5476 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5477 if (ret) 5478 mlog_errno(ret); 5479 5480 out_commit: 5481 ocfs2_commit_trans(osb, handle); 5482 out: 5483 ocfs2_schedule_truncate_log_flush(osb, 1); 5484 5485 mutex_unlock(&tl_inode->i_mutex); 5486 5487 if (meta_ac) 5488 ocfs2_free_alloc_context(meta_ac); 5489 5490 ocfs2_run_deallocs(osb, &dealloc); 5491 5492 return ret; 5493 } 5494 5495 /* 5496 * check whether the xattr bucket is filled up with the same hash value. 5497 * If we want to insert the xattr with the same hash, return -ENOSPC. 5498 * If we want to insert a xattr with different hash value, go ahead 5499 * and ocfs2_divide_xattr_bucket will handle this. 5500 */ 5501 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5502 struct ocfs2_xattr_bucket *bucket, 5503 const char *name) 5504 { 5505 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5506 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5507 5508 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5509 return 0; 5510 5511 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5512 xh->xh_entries[0].xe_name_hash) { 5513 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5514 "hash = %u\n", 5515 (unsigned long long)bucket_blkno(bucket), 5516 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5517 return -ENOSPC; 5518 } 5519 5520 return 0; 5521 } 5522 5523 /* 5524 * Try to set the entry in the current bucket. If we fail, the caller 5525 * will handle getting us another bucket. 5526 */ 5527 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5528 struct ocfs2_xattr_info *xi, 5529 struct ocfs2_xattr_search *xs, 5530 struct ocfs2_xattr_set_ctxt *ctxt) 5531 { 5532 int ret; 5533 struct ocfs2_xa_loc loc; 5534 5535 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5536 5537 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5538 xs->not_found ? NULL : xs->here); 5539 ret = ocfs2_xa_set(&loc, xi, ctxt); 5540 if (!ret) { 5541 xs->here = loc.xl_entry; 5542 goto out; 5543 } 5544 if (ret != -ENOSPC) { 5545 mlog_errno(ret); 5546 goto out; 5547 } 5548 5549 /* Ok, we need space. Let's try defragmenting the bucket. */ 5550 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5551 xs->bucket); 5552 if (ret) { 5553 mlog_errno(ret); 5554 goto out; 5555 } 5556 5557 ret = ocfs2_xa_set(&loc, xi, ctxt); 5558 if (!ret) { 5559 xs->here = loc.xl_entry; 5560 goto out; 5561 } 5562 if (ret != -ENOSPC) 5563 mlog_errno(ret); 5564 5565 5566 out: 5567 return ret; 5568 } 5569 5570 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5571 struct ocfs2_xattr_info *xi, 5572 struct ocfs2_xattr_search *xs, 5573 struct ocfs2_xattr_set_ctxt *ctxt) 5574 { 5575 int ret; 5576 5577 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5578 5579 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5580 if (!ret) 5581 goto out; 5582 if (ret != -ENOSPC) { 5583 mlog_errno(ret); 5584 goto out; 5585 } 5586 5587 /* Ack, need more space. Let's try to get another bucket! */ 5588 5589 /* 5590 * We do not allow for overlapping ranges between buckets. And 5591 * the maximum number of collisions we will allow for then is 5592 * one bucket's worth, so check it here whether we need to 5593 * add a new bucket for the insert. 5594 */ 5595 ret = ocfs2_check_xattr_bucket_collision(inode, 5596 xs->bucket, 5597 xi->xi_name); 5598 if (ret) { 5599 mlog_errno(ret); 5600 goto out; 5601 } 5602 5603 ret = ocfs2_add_new_xattr_bucket(inode, 5604 xs->xattr_bh, 5605 xs->bucket, 5606 ctxt); 5607 if (ret) { 5608 mlog_errno(ret); 5609 goto out; 5610 } 5611 5612 /* 5613 * ocfs2_add_new_xattr_bucket() will have updated 5614 * xs->bucket if it moved, but it will not have updated 5615 * any of the other search fields. Thus, we drop it and 5616 * re-search. Everything should be cached, so it'll be 5617 * quick. 5618 */ 5619 ocfs2_xattr_bucket_relse(xs->bucket); 5620 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5621 xi->xi_name_index, 5622 xi->xi_name, xs); 5623 if (ret && ret != -ENODATA) 5624 goto out; 5625 xs->not_found = ret; 5626 5627 /* Ok, we have a new bucket, let's try again */ 5628 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5629 if (ret && (ret != -ENOSPC)) 5630 mlog_errno(ret); 5631 5632 out: 5633 return ret; 5634 } 5635 5636 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5637 struct ocfs2_xattr_bucket *bucket, 5638 void *para) 5639 { 5640 int ret = 0, ref_credits; 5641 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5642 u16 i; 5643 struct ocfs2_xattr_entry *xe; 5644 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5645 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5646 int credits = ocfs2_remove_extent_credits(osb->sb) + 5647 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5648 struct ocfs2_xattr_value_root *xv; 5649 struct ocfs2_rm_xattr_bucket_para *args = 5650 (struct ocfs2_rm_xattr_bucket_para *)para; 5651 5652 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5653 5654 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5655 xe = &xh->xh_entries[i]; 5656 if (ocfs2_xattr_is_local(xe)) 5657 continue; 5658 5659 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5660 i, &xv, NULL); 5661 5662 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5663 args->ref_ci, 5664 args->ref_root_bh, 5665 &ctxt.meta_ac, 5666 &ref_credits); 5667 5668 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5669 if (IS_ERR(ctxt.handle)) { 5670 ret = PTR_ERR(ctxt.handle); 5671 mlog_errno(ret); 5672 break; 5673 } 5674 5675 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5676 i, 0, &ctxt); 5677 5678 ocfs2_commit_trans(osb, ctxt.handle); 5679 if (ctxt.meta_ac) { 5680 ocfs2_free_alloc_context(ctxt.meta_ac); 5681 ctxt.meta_ac = NULL; 5682 } 5683 if (ret) { 5684 mlog_errno(ret); 5685 break; 5686 } 5687 } 5688 5689 if (ctxt.meta_ac) 5690 ocfs2_free_alloc_context(ctxt.meta_ac); 5691 ocfs2_schedule_truncate_log_flush(osb, 1); 5692 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5693 return ret; 5694 } 5695 5696 /* 5697 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5698 * or change the extent record flag), we need to recalculate 5699 * the metaecc for the whole bucket. So it is done here. 5700 * 5701 * Note: 5702 * We have to give the extra credits for the caller. 5703 */ 5704 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5705 handle_t *handle, 5706 void *para) 5707 { 5708 int ret; 5709 struct ocfs2_xattr_bucket *bucket = 5710 (struct ocfs2_xattr_bucket *)para; 5711 5712 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5713 OCFS2_JOURNAL_ACCESS_WRITE); 5714 if (ret) { 5715 mlog_errno(ret); 5716 return ret; 5717 } 5718 5719 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5720 5721 return 0; 5722 } 5723 5724 /* 5725 * Special action we need if the xattr value is refcounted. 5726 * 5727 * 1. If the xattr is refcounted, lock the tree. 5728 * 2. CoW the xattr if we are setting the new value and the value 5729 * will be stored outside. 5730 * 3. In other case, decrease_refcount will work for us, so just 5731 * lock the refcount tree, calculate the meta and credits is OK. 5732 * 5733 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5734 * currently CoW is a completed transaction, while this function 5735 * will also lock the allocators and let us deadlock. So we will 5736 * CoW the whole xattr value. 5737 */ 5738 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5739 struct ocfs2_dinode *di, 5740 struct ocfs2_xattr_info *xi, 5741 struct ocfs2_xattr_search *xis, 5742 struct ocfs2_xattr_search *xbs, 5743 struct ocfs2_refcount_tree **ref_tree, 5744 int *meta_add, 5745 int *credits) 5746 { 5747 int ret = 0; 5748 struct ocfs2_xattr_block *xb; 5749 struct ocfs2_xattr_entry *xe; 5750 char *base; 5751 u32 p_cluster, num_clusters; 5752 unsigned int ext_flags; 5753 int name_offset, name_len; 5754 struct ocfs2_xattr_value_buf vb; 5755 struct ocfs2_xattr_bucket *bucket = NULL; 5756 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5757 struct ocfs2_post_refcount refcount; 5758 struct ocfs2_post_refcount *p = NULL; 5759 struct buffer_head *ref_root_bh = NULL; 5760 5761 if (!xis->not_found) { 5762 xe = xis->here; 5763 name_offset = le16_to_cpu(xe->xe_name_offset); 5764 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5765 base = xis->base; 5766 vb.vb_bh = xis->inode_bh; 5767 vb.vb_access = ocfs2_journal_access_di; 5768 } else { 5769 int i, block_off = 0; 5770 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5771 xe = xbs->here; 5772 name_offset = le16_to_cpu(xe->xe_name_offset); 5773 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5774 i = xbs->here - xbs->header->xh_entries; 5775 5776 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5777 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5778 bucket_xh(xbs->bucket), 5779 i, &block_off, 5780 &name_offset); 5781 if (ret) { 5782 mlog_errno(ret); 5783 goto out; 5784 } 5785 base = bucket_block(xbs->bucket, block_off); 5786 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5787 vb.vb_access = ocfs2_journal_access; 5788 5789 if (ocfs2_meta_ecc(osb)) { 5790 /*create parameters for ocfs2_post_refcount. */ 5791 bucket = xbs->bucket; 5792 refcount.credits = bucket->bu_blocks; 5793 refcount.para = bucket; 5794 refcount.func = 5795 ocfs2_xattr_bucket_post_refcount; 5796 p = &refcount; 5797 } 5798 } else { 5799 base = xbs->base; 5800 vb.vb_bh = xbs->xattr_bh; 5801 vb.vb_access = ocfs2_journal_access_xb; 5802 } 5803 } 5804 5805 if (ocfs2_xattr_is_local(xe)) 5806 goto out; 5807 5808 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5809 (base + name_offset + name_len); 5810 5811 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5812 &num_clusters, &vb.vb_xv->xr_list, 5813 &ext_flags); 5814 if (ret) { 5815 mlog_errno(ret); 5816 goto out; 5817 } 5818 5819 /* 5820 * We just need to check the 1st extent record, since we always 5821 * CoW the whole xattr. So there shouldn't be a xattr with 5822 * some REFCOUNT extent recs after the 1st one. 5823 */ 5824 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5825 goto out; 5826 5827 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5828 1, ref_tree, &ref_root_bh); 5829 if (ret) { 5830 mlog_errno(ret); 5831 goto out; 5832 } 5833 5834 /* 5835 * If we are deleting the xattr or the new size will be stored inside, 5836 * cool, leave it there, the xattr truncate process will remove them 5837 * for us(it still needs the refcount tree lock and the meta, credits). 5838 * And the worse case is that every cluster truncate will split the 5839 * refcount tree, and make the original extent become 3. So we will need 5840 * 2 * cluster more extent recs at most. 5841 */ 5842 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5843 5844 ret = ocfs2_refcounted_xattr_delete_need(inode, 5845 &(*ref_tree)->rf_ci, 5846 ref_root_bh, vb.vb_xv, 5847 meta_add, credits); 5848 if (ret) 5849 mlog_errno(ret); 5850 goto out; 5851 } 5852 5853 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5854 *ref_tree, ref_root_bh, 0, 5855 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5856 if (ret) 5857 mlog_errno(ret); 5858 5859 out: 5860 brelse(ref_root_bh); 5861 return ret; 5862 } 5863 5864 /* 5865 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5866 * The physical clusters will be added to refcount tree. 5867 */ 5868 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5869 struct ocfs2_xattr_value_root *xv, 5870 struct ocfs2_extent_tree *value_et, 5871 struct ocfs2_caching_info *ref_ci, 5872 struct buffer_head *ref_root_bh, 5873 struct ocfs2_cached_dealloc_ctxt *dealloc, 5874 struct ocfs2_post_refcount *refcount) 5875 { 5876 int ret = 0; 5877 u32 clusters = le32_to_cpu(xv->xr_clusters); 5878 u32 cpos, p_cluster, num_clusters; 5879 struct ocfs2_extent_list *el = &xv->xr_list; 5880 unsigned int ext_flags; 5881 5882 cpos = 0; 5883 while (cpos < clusters) { 5884 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5885 &num_clusters, el, &ext_flags); 5886 if (ret) { 5887 mlog_errno(ret); 5888 break; 5889 } 5890 5891 cpos += num_clusters; 5892 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5893 continue; 5894 5895 BUG_ON(!p_cluster); 5896 5897 ret = ocfs2_add_refcount_flag(inode, value_et, 5898 ref_ci, ref_root_bh, 5899 cpos - num_clusters, 5900 p_cluster, num_clusters, 5901 dealloc, refcount); 5902 if (ret) { 5903 mlog_errno(ret); 5904 break; 5905 } 5906 } 5907 5908 return ret; 5909 } 5910 5911 /* 5912 * Given a normal ocfs2_xattr_header, refcount all the entries which 5913 * have value stored outside. 5914 * Used for xattrs stored in inode and ocfs2_xattr_block. 5915 */ 5916 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5917 struct ocfs2_xattr_value_buf *vb, 5918 struct ocfs2_xattr_header *header, 5919 struct ocfs2_caching_info *ref_ci, 5920 struct buffer_head *ref_root_bh, 5921 struct ocfs2_cached_dealloc_ctxt *dealloc) 5922 { 5923 5924 struct ocfs2_xattr_entry *xe; 5925 struct ocfs2_xattr_value_root *xv; 5926 struct ocfs2_extent_tree et; 5927 int i, ret = 0; 5928 5929 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5930 xe = &header->xh_entries[i]; 5931 5932 if (ocfs2_xattr_is_local(xe)) 5933 continue; 5934 5935 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5936 le16_to_cpu(xe->xe_name_offset) + 5937 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5938 5939 vb->vb_xv = xv; 5940 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5941 5942 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5943 ref_ci, ref_root_bh, 5944 dealloc, NULL); 5945 if (ret) { 5946 mlog_errno(ret); 5947 break; 5948 } 5949 } 5950 5951 return ret; 5952 } 5953 5954 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5955 struct buffer_head *fe_bh, 5956 struct ocfs2_caching_info *ref_ci, 5957 struct buffer_head *ref_root_bh, 5958 struct ocfs2_cached_dealloc_ctxt *dealloc) 5959 { 5960 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5961 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5962 (fe_bh->b_data + inode->i_sb->s_blocksize - 5963 le16_to_cpu(di->i_xattr_inline_size)); 5964 struct ocfs2_xattr_value_buf vb = { 5965 .vb_bh = fe_bh, 5966 .vb_access = ocfs2_journal_access_di, 5967 }; 5968 5969 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5970 ref_ci, ref_root_bh, dealloc); 5971 } 5972 5973 struct ocfs2_xattr_tree_value_refcount_para { 5974 struct ocfs2_caching_info *ref_ci; 5975 struct buffer_head *ref_root_bh; 5976 struct ocfs2_cached_dealloc_ctxt *dealloc; 5977 }; 5978 5979 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5980 struct ocfs2_xattr_bucket *bucket, 5981 int offset, 5982 struct ocfs2_xattr_value_root **xv, 5983 struct buffer_head **bh) 5984 { 5985 int ret, block_off, name_offset; 5986 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5987 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 5988 void *base; 5989 5990 ret = ocfs2_xattr_bucket_get_name_value(sb, 5991 bucket_xh(bucket), 5992 offset, 5993 &block_off, 5994 &name_offset); 5995 if (ret) { 5996 mlog_errno(ret); 5997 goto out; 5998 } 5999 6000 base = bucket_block(bucket, block_off); 6001 6002 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6003 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6004 6005 if (bh) 6006 *bh = bucket->bu_bhs[block_off]; 6007 out: 6008 return ret; 6009 } 6010 6011 /* 6012 * For a given xattr bucket, refcount all the entries which 6013 * have value stored outside. 6014 */ 6015 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6016 struct ocfs2_xattr_bucket *bucket, 6017 void *para) 6018 { 6019 int i, ret = 0; 6020 struct ocfs2_extent_tree et; 6021 struct ocfs2_xattr_tree_value_refcount_para *ref = 6022 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6023 struct ocfs2_xattr_header *xh = 6024 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6025 struct ocfs2_xattr_entry *xe; 6026 struct ocfs2_xattr_value_buf vb = { 6027 .vb_access = ocfs2_journal_access, 6028 }; 6029 struct ocfs2_post_refcount refcount = { 6030 .credits = bucket->bu_blocks, 6031 .para = bucket, 6032 .func = ocfs2_xattr_bucket_post_refcount, 6033 }; 6034 struct ocfs2_post_refcount *p = NULL; 6035 6036 /* We only need post_refcount if we support metaecc. */ 6037 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6038 p = &refcount; 6039 6040 trace_ocfs2_xattr_bucket_value_refcount( 6041 (unsigned long long)bucket_blkno(bucket), 6042 le16_to_cpu(xh->xh_count)); 6043 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6044 xe = &xh->xh_entries[i]; 6045 6046 if (ocfs2_xattr_is_local(xe)) 6047 continue; 6048 6049 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6050 &vb.vb_xv, &vb.vb_bh); 6051 if (ret) { 6052 mlog_errno(ret); 6053 break; 6054 } 6055 6056 ocfs2_init_xattr_value_extent_tree(&et, 6057 INODE_CACHE(inode), &vb); 6058 6059 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6060 &et, ref->ref_ci, 6061 ref->ref_root_bh, 6062 ref->dealloc, p); 6063 if (ret) { 6064 mlog_errno(ret); 6065 break; 6066 } 6067 } 6068 6069 return ret; 6070 6071 } 6072 6073 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6074 struct buffer_head *root_bh, 6075 u64 blkno, u32 cpos, u32 len, void *para) 6076 { 6077 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6078 ocfs2_xattr_bucket_value_refcount, 6079 para); 6080 } 6081 6082 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6083 struct buffer_head *blk_bh, 6084 struct ocfs2_caching_info *ref_ci, 6085 struct buffer_head *ref_root_bh, 6086 struct ocfs2_cached_dealloc_ctxt *dealloc) 6087 { 6088 int ret = 0; 6089 struct ocfs2_xattr_block *xb = 6090 (struct ocfs2_xattr_block *)blk_bh->b_data; 6091 6092 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6093 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6094 struct ocfs2_xattr_value_buf vb = { 6095 .vb_bh = blk_bh, 6096 .vb_access = ocfs2_journal_access_xb, 6097 }; 6098 6099 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6100 ref_ci, ref_root_bh, 6101 dealloc); 6102 } else { 6103 struct ocfs2_xattr_tree_value_refcount_para para = { 6104 .ref_ci = ref_ci, 6105 .ref_root_bh = ref_root_bh, 6106 .dealloc = dealloc, 6107 }; 6108 6109 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6110 ocfs2_refcount_xattr_tree_rec, 6111 ¶); 6112 } 6113 6114 return ret; 6115 } 6116 6117 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6118 struct buffer_head *fe_bh, 6119 struct ocfs2_caching_info *ref_ci, 6120 struct buffer_head *ref_root_bh, 6121 struct ocfs2_cached_dealloc_ctxt *dealloc) 6122 { 6123 int ret = 0; 6124 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6125 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6126 struct buffer_head *blk_bh = NULL; 6127 6128 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6129 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6130 ref_ci, ref_root_bh, 6131 dealloc); 6132 if (ret) { 6133 mlog_errno(ret); 6134 goto out; 6135 } 6136 } 6137 6138 if (!di->i_xattr_loc) 6139 goto out; 6140 6141 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6142 &blk_bh); 6143 if (ret < 0) { 6144 mlog_errno(ret); 6145 goto out; 6146 } 6147 6148 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6149 ref_root_bh, dealloc); 6150 if (ret) 6151 mlog_errno(ret); 6152 6153 brelse(blk_bh); 6154 out: 6155 6156 return ret; 6157 } 6158 6159 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6160 /* 6161 * Store the information we need in xattr reflink. 6162 * old_bh and new_bh are inode bh for the old and new inode. 6163 */ 6164 struct ocfs2_xattr_reflink { 6165 struct inode *old_inode; 6166 struct inode *new_inode; 6167 struct buffer_head *old_bh; 6168 struct buffer_head *new_bh; 6169 struct ocfs2_caching_info *ref_ci; 6170 struct buffer_head *ref_root_bh; 6171 struct ocfs2_cached_dealloc_ctxt *dealloc; 6172 should_xattr_reflinked *xattr_reflinked; 6173 }; 6174 6175 /* 6176 * Given a xattr header and xe offset, 6177 * return the proper xv and the corresponding bh. 6178 * xattr in inode, block and xattr tree have different implementaions. 6179 */ 6180 typedef int (get_xattr_value_root)(struct super_block *sb, 6181 struct buffer_head *bh, 6182 struct ocfs2_xattr_header *xh, 6183 int offset, 6184 struct ocfs2_xattr_value_root **xv, 6185 struct buffer_head **ret_bh, 6186 void *para); 6187 6188 /* 6189 * Calculate all the xattr value root metadata stored in this xattr header and 6190 * credits we need if we create them from the scratch. 6191 * We use get_xattr_value_root so that all types of xattr container can use it. 6192 */ 6193 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6194 struct buffer_head *bh, 6195 struct ocfs2_xattr_header *xh, 6196 int *metas, int *credits, 6197 int *num_recs, 6198 get_xattr_value_root *func, 6199 void *para) 6200 { 6201 int i, ret = 0; 6202 struct ocfs2_xattr_value_root *xv; 6203 struct ocfs2_xattr_entry *xe; 6204 6205 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6206 xe = &xh->xh_entries[i]; 6207 if (ocfs2_xattr_is_local(xe)) 6208 continue; 6209 6210 ret = func(sb, bh, xh, i, &xv, NULL, para); 6211 if (ret) { 6212 mlog_errno(ret); 6213 break; 6214 } 6215 6216 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6217 le16_to_cpu(xv->xr_list.l_next_free_rec); 6218 6219 *credits += ocfs2_calc_extend_credits(sb, 6220 &def_xv.xv.xr_list); 6221 6222 /* 6223 * If the value is a tree with depth > 1, We don't go deep 6224 * to the extent block, so just calculate a maximum record num. 6225 */ 6226 if (!xv->xr_list.l_tree_depth) 6227 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6228 else 6229 *num_recs += ocfs2_clusters_for_bytes(sb, 6230 XATTR_SIZE_MAX); 6231 } 6232 6233 return ret; 6234 } 6235 6236 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6237 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6238 struct buffer_head *bh, 6239 struct ocfs2_xattr_header *xh, 6240 int offset, 6241 struct ocfs2_xattr_value_root **xv, 6242 struct buffer_head **ret_bh, 6243 void *para) 6244 { 6245 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6246 6247 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6248 le16_to_cpu(xe->xe_name_offset) + 6249 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6250 6251 if (ret_bh) 6252 *ret_bh = bh; 6253 6254 return 0; 6255 } 6256 6257 /* 6258 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6259 * It is only used for inline xattr and xattr block. 6260 */ 6261 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6262 struct ocfs2_xattr_header *xh, 6263 struct buffer_head *ref_root_bh, 6264 int *credits, 6265 struct ocfs2_alloc_context **meta_ac) 6266 { 6267 int ret, meta_add = 0, num_recs = 0; 6268 struct ocfs2_refcount_block *rb = 6269 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6270 6271 *credits = 0; 6272 6273 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6274 &meta_add, credits, &num_recs, 6275 ocfs2_get_xattr_value_root, 6276 NULL); 6277 if (ret) { 6278 mlog_errno(ret); 6279 goto out; 6280 } 6281 6282 /* 6283 * We need to add/modify num_recs in refcount tree, so just calculate 6284 * an approximate number we need for refcount tree change. 6285 * Sometimes we need to split the tree, and after split, half recs 6286 * will be moved to the new block, and a new block can only provide 6287 * half number of recs. So we multiple new blocks by 2. 6288 */ 6289 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6290 meta_add += num_recs; 6291 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6292 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6293 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6294 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6295 else 6296 *credits += 1; 6297 6298 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6299 if (ret) 6300 mlog_errno(ret); 6301 6302 out: 6303 return ret; 6304 } 6305 6306 /* 6307 * Given a xattr header, reflink all the xattrs in this container. 6308 * It can be used for inode, block and bucket. 6309 * 6310 * NOTE: 6311 * Before we call this function, the caller has memcpy the xattr in 6312 * old_xh to the new_xh. 6313 * 6314 * If args.xattr_reflinked is set, call it to decide whether the xe should 6315 * be reflinked or not. If not, remove it from the new xattr header. 6316 */ 6317 static int ocfs2_reflink_xattr_header(handle_t *handle, 6318 struct ocfs2_xattr_reflink *args, 6319 struct buffer_head *old_bh, 6320 struct ocfs2_xattr_header *xh, 6321 struct buffer_head *new_bh, 6322 struct ocfs2_xattr_header *new_xh, 6323 struct ocfs2_xattr_value_buf *vb, 6324 struct ocfs2_alloc_context *meta_ac, 6325 get_xattr_value_root *func, 6326 void *para) 6327 { 6328 int ret = 0, i, j; 6329 struct super_block *sb = args->old_inode->i_sb; 6330 struct buffer_head *value_bh; 6331 struct ocfs2_xattr_entry *xe, *last; 6332 struct ocfs2_xattr_value_root *xv, *new_xv; 6333 struct ocfs2_extent_tree data_et; 6334 u32 clusters, cpos, p_cluster, num_clusters; 6335 unsigned int ext_flags = 0; 6336 6337 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6338 le16_to_cpu(xh->xh_count)); 6339 6340 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6341 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6342 xe = &xh->xh_entries[i]; 6343 6344 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6345 xe = &new_xh->xh_entries[j]; 6346 6347 le16_add_cpu(&new_xh->xh_count, -1); 6348 if (new_xh->xh_count) { 6349 memmove(xe, xe + 1, 6350 (void *)last - (void *)xe); 6351 memset(last, 0, 6352 sizeof(struct ocfs2_xattr_entry)); 6353 } 6354 6355 /* 6356 * We don't want j to increase in the next round since 6357 * it is already moved ahead. 6358 */ 6359 j--; 6360 continue; 6361 } 6362 6363 if (ocfs2_xattr_is_local(xe)) 6364 continue; 6365 6366 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6367 if (ret) { 6368 mlog_errno(ret); 6369 break; 6370 } 6371 6372 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6373 if (ret) { 6374 mlog_errno(ret); 6375 break; 6376 } 6377 6378 /* 6379 * For the xattr which has l_tree_depth = 0, all the extent 6380 * recs have already be copied to the new xh with the 6381 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6382 * increase the refount count int the refcount tree. 6383 * 6384 * For the xattr which has l_tree_depth > 0, we need 6385 * to initialize it to the empty default value root, 6386 * and then insert the extents one by one. 6387 */ 6388 if (xv->xr_list.l_tree_depth) { 6389 memcpy(new_xv, &def_xv, sizeof(def_xv)); 6390 vb->vb_xv = new_xv; 6391 vb->vb_bh = value_bh; 6392 ocfs2_init_xattr_value_extent_tree(&data_et, 6393 INODE_CACHE(args->new_inode), vb); 6394 } 6395 6396 clusters = le32_to_cpu(xv->xr_clusters); 6397 cpos = 0; 6398 while (cpos < clusters) { 6399 ret = ocfs2_xattr_get_clusters(args->old_inode, 6400 cpos, 6401 &p_cluster, 6402 &num_clusters, 6403 &xv->xr_list, 6404 &ext_flags); 6405 if (ret) { 6406 mlog_errno(ret); 6407 goto out; 6408 } 6409 6410 BUG_ON(!p_cluster); 6411 6412 if (xv->xr_list.l_tree_depth) { 6413 ret = ocfs2_insert_extent(handle, 6414 &data_et, cpos, 6415 ocfs2_clusters_to_blocks( 6416 args->old_inode->i_sb, 6417 p_cluster), 6418 num_clusters, ext_flags, 6419 meta_ac); 6420 if (ret) { 6421 mlog_errno(ret); 6422 goto out; 6423 } 6424 } 6425 6426 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6427 args->ref_root_bh, 6428 p_cluster, num_clusters, 6429 meta_ac, args->dealloc); 6430 if (ret) { 6431 mlog_errno(ret); 6432 goto out; 6433 } 6434 6435 cpos += num_clusters; 6436 } 6437 } 6438 6439 out: 6440 return ret; 6441 } 6442 6443 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6444 { 6445 int ret = 0, credits = 0; 6446 handle_t *handle; 6447 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6448 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6449 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6450 int header_off = osb->sb->s_blocksize - inline_size; 6451 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6452 (args->old_bh->b_data + header_off); 6453 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6454 (args->new_bh->b_data + header_off); 6455 struct ocfs2_alloc_context *meta_ac = NULL; 6456 struct ocfs2_inode_info *new_oi; 6457 struct ocfs2_dinode *new_di; 6458 struct ocfs2_xattr_value_buf vb = { 6459 .vb_bh = args->new_bh, 6460 .vb_access = ocfs2_journal_access_di, 6461 }; 6462 6463 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6464 &credits, &meta_ac); 6465 if (ret) { 6466 mlog_errno(ret); 6467 goto out; 6468 } 6469 6470 handle = ocfs2_start_trans(osb, credits); 6471 if (IS_ERR(handle)) { 6472 ret = PTR_ERR(handle); 6473 mlog_errno(ret); 6474 goto out; 6475 } 6476 6477 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6478 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6479 if (ret) { 6480 mlog_errno(ret); 6481 goto out_commit; 6482 } 6483 6484 memcpy(args->new_bh->b_data + header_off, 6485 args->old_bh->b_data + header_off, inline_size); 6486 6487 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6488 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6489 6490 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6491 args->new_bh, new_xh, &vb, meta_ac, 6492 ocfs2_get_xattr_value_root, NULL); 6493 if (ret) { 6494 mlog_errno(ret); 6495 goto out_commit; 6496 } 6497 6498 new_oi = OCFS2_I(args->new_inode); 6499 /* 6500 * Adjust extent record count to reserve space for extended attribute. 6501 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6502 */ 6503 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6504 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6505 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6506 le16_add_cpu(&el->l_count, -(inline_size / 6507 sizeof(struct ocfs2_extent_rec))); 6508 } 6509 spin_lock(&new_oi->ip_lock); 6510 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6511 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6512 spin_unlock(&new_oi->ip_lock); 6513 6514 ocfs2_journal_dirty(handle, args->new_bh); 6515 6516 out_commit: 6517 ocfs2_commit_trans(osb, handle); 6518 6519 out: 6520 if (meta_ac) 6521 ocfs2_free_alloc_context(meta_ac); 6522 return ret; 6523 } 6524 6525 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6526 struct buffer_head *fe_bh, 6527 struct buffer_head **ret_bh, 6528 int indexed) 6529 { 6530 int ret; 6531 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6532 struct ocfs2_xattr_set_ctxt ctxt; 6533 6534 memset(&ctxt, 0, sizeof(ctxt)); 6535 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6536 if (ret < 0) { 6537 mlog_errno(ret); 6538 return ret; 6539 } 6540 6541 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6542 if (IS_ERR(ctxt.handle)) { 6543 ret = PTR_ERR(ctxt.handle); 6544 mlog_errno(ret); 6545 goto out; 6546 } 6547 6548 trace_ocfs2_create_empty_xattr_block( 6549 (unsigned long long)fe_bh->b_blocknr, indexed); 6550 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6551 ret_bh); 6552 if (ret) 6553 mlog_errno(ret); 6554 6555 ocfs2_commit_trans(osb, ctxt.handle); 6556 out: 6557 ocfs2_free_alloc_context(ctxt.meta_ac); 6558 return ret; 6559 } 6560 6561 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6562 struct buffer_head *blk_bh, 6563 struct buffer_head *new_blk_bh) 6564 { 6565 int ret = 0, credits = 0; 6566 handle_t *handle; 6567 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6568 struct ocfs2_dinode *new_di; 6569 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6570 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6571 struct ocfs2_xattr_block *xb = 6572 (struct ocfs2_xattr_block *)blk_bh->b_data; 6573 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6574 struct ocfs2_xattr_block *new_xb = 6575 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6576 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6577 struct ocfs2_alloc_context *meta_ac; 6578 struct ocfs2_xattr_value_buf vb = { 6579 .vb_bh = new_blk_bh, 6580 .vb_access = ocfs2_journal_access_xb, 6581 }; 6582 6583 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6584 &credits, &meta_ac); 6585 if (ret) { 6586 mlog_errno(ret); 6587 return ret; 6588 } 6589 6590 /* One more credits in case we need to add xattr flags in new inode. */ 6591 handle = ocfs2_start_trans(osb, credits + 1); 6592 if (IS_ERR(handle)) { 6593 ret = PTR_ERR(handle); 6594 mlog_errno(ret); 6595 goto out; 6596 } 6597 6598 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6599 ret = ocfs2_journal_access_di(handle, 6600 INODE_CACHE(args->new_inode), 6601 args->new_bh, 6602 OCFS2_JOURNAL_ACCESS_WRITE); 6603 if (ret) { 6604 mlog_errno(ret); 6605 goto out_commit; 6606 } 6607 } 6608 6609 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6610 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6611 if (ret) { 6612 mlog_errno(ret); 6613 goto out_commit; 6614 } 6615 6616 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6617 osb->sb->s_blocksize - header_off); 6618 6619 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6620 new_blk_bh, new_xh, &vb, meta_ac, 6621 ocfs2_get_xattr_value_root, NULL); 6622 if (ret) { 6623 mlog_errno(ret); 6624 goto out_commit; 6625 } 6626 6627 ocfs2_journal_dirty(handle, new_blk_bh); 6628 6629 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6630 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6631 spin_lock(&new_oi->ip_lock); 6632 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6633 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6634 spin_unlock(&new_oi->ip_lock); 6635 6636 ocfs2_journal_dirty(handle, args->new_bh); 6637 } 6638 6639 out_commit: 6640 ocfs2_commit_trans(osb, handle); 6641 6642 out: 6643 ocfs2_free_alloc_context(meta_ac); 6644 return ret; 6645 } 6646 6647 struct ocfs2_reflink_xattr_tree_args { 6648 struct ocfs2_xattr_reflink *reflink; 6649 struct buffer_head *old_blk_bh; 6650 struct buffer_head *new_blk_bh; 6651 struct ocfs2_xattr_bucket *old_bucket; 6652 struct ocfs2_xattr_bucket *new_bucket; 6653 }; 6654 6655 /* 6656 * NOTE: 6657 * We have to handle the case that both old bucket and new bucket 6658 * will call this function to get the right ret_bh. 6659 * So The caller must give us the right bh. 6660 */ 6661 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6662 struct buffer_head *bh, 6663 struct ocfs2_xattr_header *xh, 6664 int offset, 6665 struct ocfs2_xattr_value_root **xv, 6666 struct buffer_head **ret_bh, 6667 void *para) 6668 { 6669 struct ocfs2_reflink_xattr_tree_args *args = 6670 (struct ocfs2_reflink_xattr_tree_args *)para; 6671 struct ocfs2_xattr_bucket *bucket; 6672 6673 if (bh == args->old_bucket->bu_bhs[0]) 6674 bucket = args->old_bucket; 6675 else 6676 bucket = args->new_bucket; 6677 6678 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6679 xv, ret_bh); 6680 } 6681 6682 struct ocfs2_value_tree_metas { 6683 int num_metas; 6684 int credits; 6685 int num_recs; 6686 }; 6687 6688 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6689 struct buffer_head *bh, 6690 struct ocfs2_xattr_header *xh, 6691 int offset, 6692 struct ocfs2_xattr_value_root **xv, 6693 struct buffer_head **ret_bh, 6694 void *para) 6695 { 6696 struct ocfs2_xattr_bucket *bucket = 6697 (struct ocfs2_xattr_bucket *)para; 6698 6699 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6700 xv, ret_bh); 6701 } 6702 6703 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6704 struct ocfs2_xattr_bucket *bucket, 6705 void *para) 6706 { 6707 struct ocfs2_value_tree_metas *metas = 6708 (struct ocfs2_value_tree_metas *)para; 6709 struct ocfs2_xattr_header *xh = 6710 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6711 6712 /* Add the credits for this bucket first. */ 6713 metas->credits += bucket->bu_blocks; 6714 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6715 xh, &metas->num_metas, 6716 &metas->credits, &metas->num_recs, 6717 ocfs2_value_tree_metas_in_bucket, 6718 bucket); 6719 } 6720 6721 /* 6722 * Given a xattr extent rec starting from blkno and having len clusters, 6723 * iterate all the buckets calculate how much metadata we need for reflinking 6724 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6725 */ 6726 static int ocfs2_lock_reflink_xattr_rec_allocators( 6727 struct ocfs2_reflink_xattr_tree_args *args, 6728 struct ocfs2_extent_tree *xt_et, 6729 u64 blkno, u32 len, int *credits, 6730 struct ocfs2_alloc_context **meta_ac, 6731 struct ocfs2_alloc_context **data_ac) 6732 { 6733 int ret, num_free_extents; 6734 struct ocfs2_value_tree_metas metas; 6735 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6736 struct ocfs2_refcount_block *rb; 6737 6738 memset(&metas, 0, sizeof(metas)); 6739 6740 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6741 ocfs2_calc_value_tree_metas, &metas); 6742 if (ret) { 6743 mlog_errno(ret); 6744 goto out; 6745 } 6746 6747 *credits = metas.credits; 6748 6749 /* 6750 * Calculate we need for refcount tree change. 6751 * 6752 * We need to add/modify num_recs in refcount tree, so just calculate 6753 * an approximate number we need for refcount tree change. 6754 * Sometimes we need to split the tree, and after split, half recs 6755 * will be moved to the new block, and a new block can only provide 6756 * half number of recs. So we multiple new blocks by 2. 6757 * In the end, we have to add credits for modifying the already 6758 * existed refcount block. 6759 */ 6760 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6761 metas.num_recs = 6762 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6763 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6764 metas.num_metas += metas.num_recs; 6765 *credits += metas.num_recs + 6766 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6767 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6768 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6769 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6770 else 6771 *credits += 1; 6772 6773 /* count in the xattr tree change. */ 6774 num_free_extents = ocfs2_num_free_extents(osb, xt_et); 6775 if (num_free_extents < 0) { 6776 ret = num_free_extents; 6777 mlog_errno(ret); 6778 goto out; 6779 } 6780 6781 if (num_free_extents < len) 6782 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6783 6784 *credits += ocfs2_calc_extend_credits(osb->sb, 6785 xt_et->et_root_el); 6786 6787 if (metas.num_metas) { 6788 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6789 meta_ac); 6790 if (ret) { 6791 mlog_errno(ret); 6792 goto out; 6793 } 6794 } 6795 6796 if (len) { 6797 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6798 if (ret) 6799 mlog_errno(ret); 6800 } 6801 out: 6802 if (ret) { 6803 if (*meta_ac) { 6804 ocfs2_free_alloc_context(*meta_ac); 6805 *meta_ac = NULL; 6806 } 6807 } 6808 6809 return ret; 6810 } 6811 6812 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6813 u64 blkno, u64 new_blkno, u32 clusters, 6814 u32 *cpos, int num_buckets, 6815 struct ocfs2_alloc_context *meta_ac, 6816 struct ocfs2_alloc_context *data_ac, 6817 struct ocfs2_reflink_xattr_tree_args *args) 6818 { 6819 int i, j, ret = 0; 6820 struct super_block *sb = args->reflink->old_inode->i_sb; 6821 int bpb = args->old_bucket->bu_blocks; 6822 struct ocfs2_xattr_value_buf vb = { 6823 .vb_access = ocfs2_journal_access, 6824 }; 6825 6826 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6827 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6828 if (ret) { 6829 mlog_errno(ret); 6830 break; 6831 } 6832 6833 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno); 6834 if (ret) { 6835 mlog_errno(ret); 6836 break; 6837 } 6838 6839 ret = ocfs2_xattr_bucket_journal_access(handle, 6840 args->new_bucket, 6841 OCFS2_JOURNAL_ACCESS_CREATE); 6842 if (ret) { 6843 mlog_errno(ret); 6844 break; 6845 } 6846 6847 for (j = 0; j < bpb; j++) 6848 memcpy(bucket_block(args->new_bucket, j), 6849 bucket_block(args->old_bucket, j), 6850 sb->s_blocksize); 6851 6852 /* 6853 * Record the start cpos so that we can use it to initialize 6854 * our xattr tree we also set the xh_num_bucket for the new 6855 * bucket. 6856 */ 6857 if (i == 0) { 6858 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6859 xh_entries[0].xe_name_hash); 6860 bucket_xh(args->new_bucket)->xh_num_buckets = 6861 cpu_to_le16(num_buckets); 6862 } 6863 6864 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6865 6866 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6867 args->old_bucket->bu_bhs[0], 6868 bucket_xh(args->old_bucket), 6869 args->new_bucket->bu_bhs[0], 6870 bucket_xh(args->new_bucket), 6871 &vb, meta_ac, 6872 ocfs2_get_reflink_xattr_value_root, 6873 args); 6874 if (ret) { 6875 mlog_errno(ret); 6876 break; 6877 } 6878 6879 /* 6880 * Re-access and dirty the bucket to calculate metaecc. 6881 * Because we may extend the transaction in reflink_xattr_header 6882 * which will let the already accessed block gone. 6883 */ 6884 ret = ocfs2_xattr_bucket_journal_access(handle, 6885 args->new_bucket, 6886 OCFS2_JOURNAL_ACCESS_WRITE); 6887 if (ret) { 6888 mlog_errno(ret); 6889 break; 6890 } 6891 6892 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6893 6894 ocfs2_xattr_bucket_relse(args->old_bucket); 6895 ocfs2_xattr_bucket_relse(args->new_bucket); 6896 } 6897 6898 ocfs2_xattr_bucket_relse(args->old_bucket); 6899 ocfs2_xattr_bucket_relse(args->new_bucket); 6900 return ret; 6901 } 6902 6903 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6904 struct inode *inode, 6905 struct ocfs2_reflink_xattr_tree_args *args, 6906 struct ocfs2_extent_tree *et, 6907 struct ocfs2_alloc_context *meta_ac, 6908 struct ocfs2_alloc_context *data_ac, 6909 u64 blkno, u32 cpos, u32 len) 6910 { 6911 int ret, first_inserted = 0; 6912 u32 p_cluster, num_clusters, reflink_cpos = 0; 6913 u64 new_blkno; 6914 unsigned int num_buckets, reflink_buckets; 6915 unsigned int bpc = 6916 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6917 6918 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6919 if (ret) { 6920 mlog_errno(ret); 6921 goto out; 6922 } 6923 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6924 ocfs2_xattr_bucket_relse(args->old_bucket); 6925 6926 while (len && num_buckets) { 6927 ret = ocfs2_claim_clusters(handle, data_ac, 6928 1, &p_cluster, &num_clusters); 6929 if (ret) { 6930 mlog_errno(ret); 6931 goto out; 6932 } 6933 6934 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6935 reflink_buckets = min(num_buckets, bpc * num_clusters); 6936 6937 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6938 new_blkno, num_clusters, 6939 &reflink_cpos, reflink_buckets, 6940 meta_ac, data_ac, args); 6941 if (ret) { 6942 mlog_errno(ret); 6943 goto out; 6944 } 6945 6946 /* 6947 * For the 1st allocated cluster, we make it use the same cpos 6948 * so that the xattr tree looks the same as the original one 6949 * in the most case. 6950 */ 6951 if (!first_inserted) { 6952 reflink_cpos = cpos; 6953 first_inserted = 1; 6954 } 6955 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6956 num_clusters, 0, meta_ac); 6957 if (ret) 6958 mlog_errno(ret); 6959 6960 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6961 num_clusters, reflink_cpos); 6962 6963 len -= num_clusters; 6964 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6965 num_buckets -= reflink_buckets; 6966 } 6967 out: 6968 return ret; 6969 } 6970 6971 /* 6972 * Create the same xattr extent record in the new inode's xattr tree. 6973 */ 6974 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6975 struct buffer_head *root_bh, 6976 u64 blkno, 6977 u32 cpos, 6978 u32 len, 6979 void *para) 6980 { 6981 int ret, credits = 0; 6982 handle_t *handle; 6983 struct ocfs2_reflink_xattr_tree_args *args = 6984 (struct ocfs2_reflink_xattr_tree_args *)para; 6985 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6986 struct ocfs2_alloc_context *meta_ac = NULL; 6987 struct ocfs2_alloc_context *data_ac = NULL; 6988 struct ocfs2_extent_tree et; 6989 6990 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 6991 6992 ocfs2_init_xattr_tree_extent_tree(&et, 6993 INODE_CACHE(args->reflink->new_inode), 6994 args->new_blk_bh); 6995 6996 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 6997 len, &credits, 6998 &meta_ac, &data_ac); 6999 if (ret) { 7000 mlog_errno(ret); 7001 goto out; 7002 } 7003 7004 handle = ocfs2_start_trans(osb, credits); 7005 if (IS_ERR(handle)) { 7006 ret = PTR_ERR(handle); 7007 mlog_errno(ret); 7008 goto out; 7009 } 7010 7011 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7012 meta_ac, data_ac, 7013 blkno, cpos, len); 7014 if (ret) 7015 mlog_errno(ret); 7016 7017 ocfs2_commit_trans(osb, handle); 7018 7019 out: 7020 if (meta_ac) 7021 ocfs2_free_alloc_context(meta_ac); 7022 if (data_ac) 7023 ocfs2_free_alloc_context(data_ac); 7024 return ret; 7025 } 7026 7027 /* 7028 * Create reflinked xattr buckets. 7029 * We will add bucket one by one, and refcount all the xattrs in the bucket 7030 * if they are stored outside. 7031 */ 7032 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7033 struct buffer_head *blk_bh, 7034 struct buffer_head *new_blk_bh) 7035 { 7036 int ret; 7037 struct ocfs2_reflink_xattr_tree_args para; 7038 7039 memset(¶, 0, sizeof(para)); 7040 para.reflink = args; 7041 para.old_blk_bh = blk_bh; 7042 para.new_blk_bh = new_blk_bh; 7043 7044 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7045 if (!para.old_bucket) { 7046 mlog_errno(-ENOMEM); 7047 return -ENOMEM; 7048 } 7049 7050 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7051 if (!para.new_bucket) { 7052 ret = -ENOMEM; 7053 mlog_errno(ret); 7054 goto out; 7055 } 7056 7057 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7058 ocfs2_reflink_xattr_rec, 7059 ¶); 7060 if (ret) 7061 mlog_errno(ret); 7062 7063 out: 7064 ocfs2_xattr_bucket_free(para.old_bucket); 7065 ocfs2_xattr_bucket_free(para.new_bucket); 7066 return ret; 7067 } 7068 7069 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7070 struct buffer_head *blk_bh) 7071 { 7072 int ret, indexed = 0; 7073 struct buffer_head *new_blk_bh = NULL; 7074 struct ocfs2_xattr_block *xb = 7075 (struct ocfs2_xattr_block *)blk_bh->b_data; 7076 7077 7078 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7079 indexed = 1; 7080 7081 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7082 &new_blk_bh, indexed); 7083 if (ret) { 7084 mlog_errno(ret); 7085 goto out; 7086 } 7087 7088 if (!indexed) 7089 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7090 else 7091 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7092 if (ret) 7093 mlog_errno(ret); 7094 7095 out: 7096 brelse(new_blk_bh); 7097 return ret; 7098 } 7099 7100 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7101 { 7102 int type = ocfs2_xattr_get_type(xe); 7103 7104 return type != OCFS2_XATTR_INDEX_SECURITY && 7105 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7106 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7107 } 7108 7109 int ocfs2_reflink_xattrs(struct inode *old_inode, 7110 struct buffer_head *old_bh, 7111 struct inode *new_inode, 7112 struct buffer_head *new_bh, 7113 bool preserve_security) 7114 { 7115 int ret; 7116 struct ocfs2_xattr_reflink args; 7117 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7118 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7119 struct buffer_head *blk_bh = NULL; 7120 struct ocfs2_cached_dealloc_ctxt dealloc; 7121 struct ocfs2_refcount_tree *ref_tree; 7122 struct buffer_head *ref_root_bh = NULL; 7123 7124 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7125 le64_to_cpu(di->i_refcount_loc), 7126 1, &ref_tree, &ref_root_bh); 7127 if (ret) { 7128 mlog_errno(ret); 7129 goto out; 7130 } 7131 7132 ocfs2_init_dealloc_ctxt(&dealloc); 7133 7134 args.old_inode = old_inode; 7135 args.new_inode = new_inode; 7136 args.old_bh = old_bh; 7137 args.new_bh = new_bh; 7138 args.ref_ci = &ref_tree->rf_ci; 7139 args.ref_root_bh = ref_root_bh; 7140 args.dealloc = &dealloc; 7141 if (preserve_security) 7142 args.xattr_reflinked = NULL; 7143 else 7144 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7145 7146 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7147 ret = ocfs2_reflink_xattr_inline(&args); 7148 if (ret) { 7149 mlog_errno(ret); 7150 goto out_unlock; 7151 } 7152 } 7153 7154 if (!di->i_xattr_loc) 7155 goto out_unlock; 7156 7157 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7158 &blk_bh); 7159 if (ret < 0) { 7160 mlog_errno(ret); 7161 goto out_unlock; 7162 } 7163 7164 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7165 if (ret) 7166 mlog_errno(ret); 7167 7168 brelse(blk_bh); 7169 7170 out_unlock: 7171 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7172 ref_tree, 1); 7173 brelse(ref_root_bh); 7174 7175 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7176 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7177 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7178 } 7179 7180 out: 7181 return ret; 7182 } 7183 7184 /* 7185 * Initialize security and acl for a already created inode. 7186 * Used for reflink a non-preserve-security file. 7187 * 7188 * It uses common api like ocfs2_xattr_set, so the caller 7189 * must not hold any lock expect i_mutex. 7190 */ 7191 int ocfs2_init_security_and_acl(struct inode *dir, 7192 struct inode *inode, 7193 const struct qstr *qstr) 7194 { 7195 int ret = 0; 7196 struct buffer_head *dir_bh = NULL; 7197 7198 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7199 if (ret) { 7200 mlog_errno(ret); 7201 goto leave; 7202 } 7203 7204 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7205 if (ret) { 7206 mlog_errno(ret); 7207 goto leave; 7208 } 7209 7210 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7211 if (ret) 7212 mlog_errno(ret); 7213 7214 ocfs2_inode_unlock(dir, 0); 7215 brelse(dir_bh); 7216 leave: 7217 return ret; 7218 } 7219 /* 7220 * 'security' attributes support 7221 */ 7222 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, 7223 size_t list_size, const char *name, 7224 size_t name_len, int type) 7225 { 7226 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7227 const size_t total_len = prefix_len + name_len + 1; 7228 7229 if (list && total_len <= list_size) { 7230 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); 7231 memcpy(list + prefix_len, name, name_len); 7232 list[prefix_len + name_len] = '\0'; 7233 } 7234 return total_len; 7235 } 7236 7237 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, 7238 void *buffer, size_t size, int type) 7239 { 7240 if (strcmp(name, "") == 0) 7241 return -EINVAL; 7242 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY, 7243 name, buffer, size); 7244 } 7245 7246 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, 7247 const void *value, size_t size, int flags, int type) 7248 { 7249 if (strcmp(name, "") == 0) 7250 return -EINVAL; 7251 7252 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY, 7253 name, value, size, flags); 7254 } 7255 7256 int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7257 void *fs_info) 7258 { 7259 const struct xattr *xattr; 7260 int err = 0; 7261 7262 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7263 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7264 xattr->name, xattr->value, 7265 xattr->value_len, XATTR_CREATE); 7266 if (err) 7267 break; 7268 } 7269 return err; 7270 } 7271 7272 int ocfs2_init_security_get(struct inode *inode, 7273 struct inode *dir, 7274 const struct qstr *qstr, 7275 struct ocfs2_security_xattr_info *si) 7276 { 7277 /* check whether ocfs2 support feature xattr */ 7278 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7279 return -EOPNOTSUPP; 7280 if (si) 7281 return security_old_inode_init_security(inode, dir, qstr, 7282 &si->name, &si->value, 7283 &si->value_len); 7284 7285 return security_inode_init_security(inode, dir, qstr, 7286 &ocfs2_initxattrs, NULL); 7287 } 7288 7289 int ocfs2_init_security_set(handle_t *handle, 7290 struct inode *inode, 7291 struct buffer_head *di_bh, 7292 struct ocfs2_security_xattr_info *si, 7293 struct ocfs2_alloc_context *xattr_ac, 7294 struct ocfs2_alloc_context *data_ac) 7295 { 7296 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7297 OCFS2_XATTR_INDEX_SECURITY, 7298 si->name, si->value, si->value_len, 0, 7299 xattr_ac, data_ac); 7300 } 7301 7302 const struct xattr_handler ocfs2_xattr_security_handler = { 7303 .prefix = XATTR_SECURITY_PREFIX, 7304 .list = ocfs2_xattr_security_list, 7305 .get = ocfs2_xattr_security_get, 7306 .set = ocfs2_xattr_security_set, 7307 }; 7308 7309 /* 7310 * 'trusted' attributes support 7311 */ 7312 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, 7313 size_t list_size, const char *name, 7314 size_t name_len, int type) 7315 { 7316 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7317 const size_t total_len = prefix_len + name_len + 1; 7318 7319 if (list && total_len <= list_size) { 7320 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); 7321 memcpy(list + prefix_len, name, name_len); 7322 list[prefix_len + name_len] = '\0'; 7323 } 7324 return total_len; 7325 } 7326 7327 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, 7328 void *buffer, size_t size, int type) 7329 { 7330 if (strcmp(name, "") == 0) 7331 return -EINVAL; 7332 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED, 7333 name, buffer, size); 7334 } 7335 7336 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, 7337 const void *value, size_t size, int flags, int type) 7338 { 7339 if (strcmp(name, "") == 0) 7340 return -EINVAL; 7341 7342 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED, 7343 name, value, size, flags); 7344 } 7345 7346 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7347 .prefix = XATTR_TRUSTED_PREFIX, 7348 .list = ocfs2_xattr_trusted_list, 7349 .get = ocfs2_xattr_trusted_get, 7350 .set = ocfs2_xattr_trusted_set, 7351 }; 7352 7353 /* 7354 * 'user' attributes support 7355 */ 7356 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, 7357 size_t list_size, const char *name, 7358 size_t name_len, int type) 7359 { 7360 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7361 const size_t total_len = prefix_len + name_len + 1; 7362 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7363 7364 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7365 return 0; 7366 7367 if (list && total_len <= list_size) { 7368 memcpy(list, XATTR_USER_PREFIX, prefix_len); 7369 memcpy(list + prefix_len, name, name_len); 7370 list[prefix_len + name_len] = '\0'; 7371 } 7372 return total_len; 7373 } 7374 7375 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, 7376 void *buffer, size_t size, int type) 7377 { 7378 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7379 7380 if (strcmp(name, "") == 0) 7381 return -EINVAL; 7382 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7383 return -EOPNOTSUPP; 7384 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name, 7385 buffer, size); 7386 } 7387 7388 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, 7389 const void *value, size_t size, int flags, int type) 7390 { 7391 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7392 7393 if (strcmp(name, "") == 0) 7394 return -EINVAL; 7395 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7396 return -EOPNOTSUPP; 7397 7398 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER, 7399 name, value, size, flags); 7400 } 7401 7402 const struct xattr_handler ocfs2_xattr_user_handler = { 7403 .prefix = XATTR_USER_PREFIX, 7404 .list = ocfs2_xattr_user_list, 7405 .get = ocfs2_xattr_user_get, 7406 .set = ocfs2_xattr_user_set, 7407 }; 7408