1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 * Portions Copyright 2011 iXsystems, Inc 25 * Copyright (c) 2013 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27 */ 28 29 #include <sys/zfs_context.h> 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/dmu.h> 35 #include <sys/dmu_impl.h> 36 #include <sys/dmu_objset.h> 37 #include <sys/dbuf.h> 38 #include <sys/dnode.h> 39 #include <sys/zap.h> 40 #include <sys/sa.h> 41 #include <sys/sunddi.h> 42 #include <sys/sa_impl.h> 43 #include <sys/dnode.h> 44 #include <sys/errno.h> 45 #include <sys/zfs_context.h> 46 47 /* 48 * ZFS System attributes: 49 * 50 * A generic mechanism to allow for arbitrary attributes 51 * to be stored in a dnode. The data will be stored in the bonus buffer of 52 * the dnode and if necessary a special "spill" block will be used to handle 53 * overflow situations. The spill block will be sized to fit the data 54 * from 512 - 128K. When a spill block is used the BP (blkptr_t) for the 55 * spill block is stored at the end of the current bonus buffer. Any 56 * attributes that would be in the way of the blkptr_t will be relocated 57 * into the spill block. 58 * 59 * Attribute registration: 60 * 61 * Stored persistently on a per dataset basis 62 * a mapping between attribute "string" names and their actual attribute 63 * numeric values, length, and byteswap function. The names are only used 64 * during registration. All attributes are known by their unique attribute 65 * id value. If an attribute can have a variable size then the value 66 * 0 will be used to indicate this. 67 * 68 * Attribute Layout: 69 * 70 * Attribute layouts are a way to compactly store multiple attributes, but 71 * without taking the overhead associated with managing each attribute 72 * individually. Since you will typically have the same set of attributes 73 * stored in the same order a single table will be used to represent that 74 * layout. The ZPL for example will usually have only about 10 different 75 * layouts (regular files, device files, symlinks, 76 * regular files + scanstamp, files/dir with extended attributes, and then 77 * you have the possibility of all of those minus ACL, because it would 78 * be kicked out into the spill block) 79 * 80 * Layouts are simply an array of the attributes and their 81 * ordering i.e. [0, 1, 4, 5, 2] 82 * 83 * Each distinct layout is given a unique layout number and that is whats 84 * stored in the header at the beginning of the SA data buffer. 85 * 86 * A layout only covers a single dbuf (bonus or spill). If a set of 87 * attributes is split up between the bonus buffer and a spill buffer then 88 * two different layouts will be used. This allows us to byteswap the 89 * spill without looking at the bonus buffer and keeps the on disk format of 90 * the bonus and spill buffer the same. 91 * 92 * Adding a single attribute will cause the entire set of attributes to 93 * be rewritten and could result in a new layout number being constructed 94 * as part of the rewrite if no such layout exists for the new set of 95 * attribues. The new attribute will be appended to the end of the already 96 * existing attributes. 97 * 98 * Both the attribute registration and attribute layout information are 99 * stored in normal ZAP attributes. Their should be a small number of 100 * known layouts and the set of attributes is assumed to typically be quite 101 * small. 102 * 103 * The registered attributes and layout "table" information is maintained 104 * in core and a special "sa_os_t" is attached to the objset_t. 105 * 106 * A special interface is provided to allow for quickly applying 107 * a large set of attributes at once. sa_replace_all_by_template() is 108 * used to set an array of attributes. This is used by the ZPL when 109 * creating a brand new file. The template that is passed into the function 110 * specifies the attribute, size for variable length attributes, location of 111 * data and special "data locator" function if the data isn't in a contiguous 112 * location. 113 * 114 * Byteswap implications: 115 * 116 * Since the SA attributes are not entirely self describing we can't do 117 * the normal byteswap processing. The special ZAP layout attribute and 118 * attribute registration attributes define the byteswap function and the 119 * size of the attributes, unless it is variable sized. 120 * The normal ZFS byteswapping infrastructure assumes you don't need 121 * to read any objects in order to do the necessary byteswapping. Whereas 122 * SA attributes can only be properly byteswapped if the dataset is opened 123 * and the layout/attribute ZAP attributes are available. Because of this 124 * the SA attributes will be byteswapped when they are first accessed by 125 * the SA code that will read the SA data. 126 */ 127 128 typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t, 129 uint16_t length, int length_idx, boolean_t, void *userp); 130 131 static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype); 132 static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab); 133 static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, 134 void *data); 135 static void sa_idx_tab_rele(objset_t *os, void *arg); 136 static void sa_copy_data(sa_data_locator_t *func, void *start, void *target, 137 int buflen); 138 static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 139 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 140 uint16_t buflen, dmu_tx_t *tx); 141 142 arc_byteswap_func_t *sa_bswap_table[] = { 143 byteswap_uint64_array, 144 byteswap_uint32_array, 145 byteswap_uint16_array, 146 byteswap_uint8_array, 147 zfs_acl_byteswap, 148 }; 149 150 #define SA_COPY_DATA(f, s, t, l) \ 151 { \ 152 if (f == NULL) { \ 153 if (l == 8) { \ 154 *(uint64_t *)t = *(uint64_t *)s; \ 155 } else if (l == 16) { \ 156 *(uint64_t *)t = *(uint64_t *)s; \ 157 *(uint64_t *)((uintptr_t)t + 8) = \ 158 *(uint64_t *)((uintptr_t)s + 8); \ 159 } else { \ 160 bcopy(s, t, l); \ 161 } \ 162 } else \ 163 sa_copy_data(f, s, t, l); \ 164 } 165 166 /* 167 * This table is fixed and cannot be changed. Its purpose is to 168 * allow the SA code to work with both old/new ZPL file systems. 169 * It contains the list of legacy attributes. These attributes aren't 170 * stored in the "attribute" registry zap objects, since older ZPL file systems 171 * won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will 172 * use this static table. 173 */ 174 sa_attr_reg_t sa_legacy_attrs[] = { 175 {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0}, 176 {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1}, 177 {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2}, 178 {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3}, 179 {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4}, 180 {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5}, 181 {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6}, 182 {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7}, 183 {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, 184 {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, 185 {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, 186 {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11}, 187 {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12}, 188 {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13}, 189 {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14}, 190 {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15}, 191 }; 192 193 /* 194 * This is only used for objects of type DMU_OT_ZNODE 195 */ 196 sa_attr_type_t sa_legacy_zpl_layout[] = { 197 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 198 }; 199 200 /* 201 * Special dummy layout used for buffers with no attributes. 202 */ 203 sa_attr_type_t sa_dummy_zpl_layout[] = { 0 }; 204 205 static int sa_legacy_attr_count = 16; 206 static kmem_cache_t *sa_cache = NULL; 207 208 /*ARGSUSED*/ 209 static int 210 sa_cache_constructor(void *buf, void *unused, int kmflag) 211 { 212 sa_handle_t *hdl = buf; 213 214 hdl->sa_dbu.dbu_evict_func = NULL; 215 hdl->sa_bonus_tab = NULL; 216 hdl->sa_spill_tab = NULL; 217 hdl->sa_os = NULL; 218 hdl->sa_userp = NULL; 219 hdl->sa_bonus = NULL; 220 hdl->sa_spill = NULL; 221 mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL); 222 return (0); 223 } 224 225 /*ARGSUSED*/ 226 static void 227 sa_cache_destructor(void *buf, void *unused) 228 { 229 sa_handle_t *hdl = buf; 230 hdl->sa_dbu.dbu_evict_func = NULL; 231 mutex_destroy(&hdl->sa_lock); 232 } 233 234 void 235 sa_cache_init(void) 236 { 237 sa_cache = kmem_cache_create("sa_cache", 238 sizeof (sa_handle_t), 0, sa_cache_constructor, 239 sa_cache_destructor, NULL, NULL, NULL, 0); 240 } 241 242 void 243 sa_cache_fini(void) 244 { 245 if (sa_cache) 246 kmem_cache_destroy(sa_cache); 247 } 248 249 static int 250 layout_num_compare(const void *arg1, const void *arg2) 251 { 252 const sa_lot_t *node1 = arg1; 253 const sa_lot_t *node2 = arg2; 254 255 if (node1->lot_num > node2->lot_num) 256 return (1); 257 else if (node1->lot_num < node2->lot_num) 258 return (-1); 259 return (0); 260 } 261 262 static int 263 layout_hash_compare(const void *arg1, const void *arg2) 264 { 265 const sa_lot_t *node1 = arg1; 266 const sa_lot_t *node2 = arg2; 267 268 if (node1->lot_hash > node2->lot_hash) 269 return (1); 270 if (node1->lot_hash < node2->lot_hash) 271 return (-1); 272 if (node1->lot_instance > node2->lot_instance) 273 return (1); 274 if (node1->lot_instance < node2->lot_instance) 275 return (-1); 276 return (0); 277 } 278 279 boolean_t 280 sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count) 281 { 282 int i; 283 284 if (count != tbf->lot_attr_count) 285 return (1); 286 287 for (i = 0; i != count; i++) { 288 if (attrs[i] != tbf->lot_attrs[i]) 289 return (1); 290 } 291 return (0); 292 } 293 294 #define SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF]) 295 296 static uint64_t 297 sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count) 298 { 299 int i; 300 uint64_t crc = -1ULL; 301 302 for (i = 0; i != attr_count; i++) 303 crc ^= SA_ATTR_HASH(attrs[i]); 304 305 return (crc); 306 } 307 308 static int 309 sa_get_spill(sa_handle_t *hdl) 310 { 311 int rc; 312 if (hdl->sa_spill == NULL) { 313 if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL, 314 &hdl->sa_spill)) == 0) 315 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 316 } else { 317 rc = 0; 318 } 319 320 return (rc); 321 } 322 323 /* 324 * Main attribute lookup/update function 325 * returns 0 for success or non zero for failures 326 * 327 * Operates on bulk array, first failure will abort further processing 328 */ 329 int 330 sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 331 sa_data_op_t data_op, dmu_tx_t *tx) 332 { 333 sa_os_t *sa = hdl->sa_os->os_sa; 334 int i; 335 int error = 0; 336 sa_buf_type_t buftypes; 337 338 buftypes = 0; 339 340 ASSERT(count > 0); 341 for (i = 0; i != count; i++) { 342 ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs); 343 344 bulk[i].sa_addr = NULL; 345 /* First check the bonus buffer */ 346 347 if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT( 348 hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) { 349 SA_ATTR_INFO(sa, hdl->sa_bonus_tab, 350 SA_GET_HDR(hdl, SA_BONUS), 351 bulk[i].sa_attr, bulk[i], SA_BONUS, hdl); 352 if (tx && !(buftypes & SA_BONUS)) { 353 dmu_buf_will_dirty(hdl->sa_bonus, tx); 354 buftypes |= SA_BONUS; 355 } 356 } 357 if (bulk[i].sa_addr == NULL && 358 ((error = sa_get_spill(hdl)) == 0)) { 359 if (TOC_ATTR_PRESENT( 360 hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) { 361 SA_ATTR_INFO(sa, hdl->sa_spill_tab, 362 SA_GET_HDR(hdl, SA_SPILL), 363 bulk[i].sa_attr, bulk[i], SA_SPILL, hdl); 364 if (tx && !(buftypes & SA_SPILL) && 365 bulk[i].sa_size == bulk[i].sa_length) { 366 dmu_buf_will_dirty(hdl->sa_spill, tx); 367 buftypes |= SA_SPILL; 368 } 369 } 370 } 371 if (error && error != ENOENT) { 372 return ((error == ECKSUM) ? EIO : error); 373 } 374 375 switch (data_op) { 376 case SA_LOOKUP: 377 if (bulk[i].sa_addr == NULL) 378 return (SET_ERROR(ENOENT)); 379 if (bulk[i].sa_data) { 380 SA_COPY_DATA(bulk[i].sa_data_func, 381 bulk[i].sa_addr, bulk[i].sa_data, 382 bulk[i].sa_size); 383 } 384 continue; 385 386 case SA_UPDATE: 387 /* existing rewrite of attr */ 388 if (bulk[i].sa_addr && 389 bulk[i].sa_size == bulk[i].sa_length) { 390 SA_COPY_DATA(bulk[i].sa_data_func, 391 bulk[i].sa_data, bulk[i].sa_addr, 392 bulk[i].sa_length); 393 continue; 394 } else if (bulk[i].sa_addr) { /* attr size change */ 395 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 396 SA_REPLACE, bulk[i].sa_data_func, 397 bulk[i].sa_data, bulk[i].sa_length, tx); 398 } else { /* adding new attribute */ 399 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 400 SA_ADD, bulk[i].sa_data_func, 401 bulk[i].sa_data, bulk[i].sa_length, tx); 402 } 403 if (error) 404 return (error); 405 break; 406 } 407 } 408 return (error); 409 } 410 411 static sa_lot_t * 412 sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, 413 uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx) 414 { 415 sa_os_t *sa = os->os_sa; 416 sa_lot_t *tb, *findtb; 417 int i; 418 avl_index_t loc; 419 420 ASSERT(MUTEX_HELD(&sa->sa_lock)); 421 tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP); 422 tb->lot_attr_count = attr_count; 423 tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 424 KM_SLEEP); 425 bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count); 426 tb->lot_num = lot_num; 427 tb->lot_hash = hash; 428 tb->lot_instance = 0; 429 430 if (zapadd) { 431 char attr_name[8]; 432 433 if (sa->sa_layout_attr_obj == 0) { 434 sa->sa_layout_attr_obj = zap_create_link(os, 435 DMU_OT_SA_ATTR_LAYOUTS, 436 sa->sa_master_obj, SA_LAYOUTS, tx); 437 } 438 439 (void) snprintf(attr_name, sizeof (attr_name), 440 "%d", (int)lot_num); 441 VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj, 442 attr_name, 2, attr_count, attrs, tx)); 443 } 444 445 list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t), 446 offsetof(sa_idx_tab_t, sa_next)); 447 448 for (i = 0; i != attr_count; i++) { 449 if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0) 450 tb->lot_var_sizes++; 451 } 452 453 avl_add(&sa->sa_layout_num_tree, tb); 454 455 /* verify we don't have a hash collision */ 456 if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) { 457 for (; findtb && findtb->lot_hash == hash; 458 findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) { 459 if (findtb->lot_instance != tb->lot_instance) 460 break; 461 tb->lot_instance++; 462 } 463 } 464 avl_add(&sa->sa_layout_hash_tree, tb); 465 return (tb); 466 } 467 468 static void 469 sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs, 470 int count, dmu_tx_t *tx, sa_lot_t **lot) 471 { 472 sa_lot_t *tb, tbsearch; 473 avl_index_t loc; 474 sa_os_t *sa = os->os_sa; 475 boolean_t found = B_FALSE; 476 477 mutex_enter(&sa->sa_lock); 478 tbsearch.lot_hash = hash; 479 tbsearch.lot_instance = 0; 480 tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc); 481 if (tb) { 482 for (; tb && tb->lot_hash == hash; 483 tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) { 484 if (sa_layout_equal(tb, attrs, count) == 0) { 485 found = B_TRUE; 486 break; 487 } 488 } 489 } 490 if (!found) { 491 tb = sa_add_layout_entry(os, attrs, count, 492 avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx); 493 } 494 mutex_exit(&sa->sa_lock); 495 *lot = tb; 496 } 497 498 static int 499 sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) 500 { 501 int error; 502 uint32_t blocksize; 503 504 if (size == 0) { 505 blocksize = SPA_MINBLOCKSIZE; 506 } else if (size > SPA_OLD_MAXBLOCKSIZE) { 507 ASSERT(0); 508 return (SET_ERROR(EFBIG)); 509 } else { 510 blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t); 511 } 512 513 error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx); 514 ASSERT(error == 0); 515 return (error); 516 } 517 518 static void 519 sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen) 520 { 521 if (func == NULL) { 522 bcopy(datastart, target, buflen); 523 } else { 524 boolean_t start; 525 int bytes; 526 void *dataptr; 527 void *saptr = target; 528 uint32_t length; 529 530 start = B_TRUE; 531 bytes = 0; 532 while (bytes < buflen) { 533 func(&dataptr, &length, buflen, start, datastart); 534 bcopy(dataptr, saptr, length); 535 saptr = (void *)((caddr_t)saptr + length); 536 bytes += length; 537 start = B_FALSE; 538 } 539 } 540 } 541 542 /* 543 * Determine several different sizes 544 * first the sa header size 545 * the number of bytes to be stored 546 * if spill would occur the index in the attribute array is returned 547 * 548 * the boolean will_spill will be set when spilling is necessary. It 549 * is only set when the buftype is SA_BONUS 550 */ 551 static int 552 sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count, 553 dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total, 554 boolean_t *will_spill) 555 { 556 int var_size = 0; 557 int i; 558 int j = -1; 559 int full_space; 560 int hdrsize; 561 boolean_t done = B_FALSE; 562 563 if (buftype == SA_BONUS && sa->sa_force_spill) { 564 *total = 0; 565 *index = 0; 566 *will_spill = B_TRUE; 567 return (0); 568 } 569 570 *index = -1; 571 *total = 0; 572 573 if (buftype == SA_BONUS) 574 *will_spill = B_FALSE; 575 576 hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 : 577 sizeof (sa_hdr_phys_t); 578 579 full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size; 580 ASSERT(IS_P2ALIGNED(full_space, 8)); 581 582 for (i = 0; i != attr_count; i++) { 583 boolean_t is_var_sz; 584 585 *total = P2ROUNDUP(*total, 8); 586 *total += attr_desc[i].sa_length; 587 if (done) 588 goto next; 589 590 is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0); 591 if (is_var_sz) { 592 var_size++; 593 } 594 595 if (is_var_sz && var_size > 1) { 596 if (P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) + 597 *total < full_space) { 598 /* 599 * Account for header space used by array of 600 * optional sizes of variable-length attributes. 601 * Record the index in case this increase needs 602 * to be reversed due to spill-over. 603 */ 604 hdrsize += sizeof (uint16_t); 605 j = i; 606 } else { 607 done = B_TRUE; 608 *index = i; 609 if (buftype == SA_BONUS) 610 *will_spill = B_TRUE; 611 continue; 612 } 613 } 614 615 /* 616 * find index of where spill *could* occur. 617 * Then continue to count of remainder attribute 618 * space. The sum is used later for sizing bonus 619 * and spill buffer. 620 */ 621 if (buftype == SA_BONUS && *index == -1 && 622 *total + P2ROUNDUP(hdrsize, 8) > 623 (full_space - sizeof (blkptr_t))) { 624 *index = i; 625 done = B_TRUE; 626 } 627 628 next: 629 if (*total + P2ROUNDUP(hdrsize, 8) > full_space && 630 buftype == SA_BONUS) 631 *will_spill = B_TRUE; 632 } 633 634 /* 635 * j holds the index of the last variable-sized attribute for 636 * which hdrsize was increased. Reverse the increase if that 637 * attribute will be relocated to the spill block. 638 */ 639 if (*will_spill && j == *index) 640 hdrsize -= sizeof (uint16_t); 641 642 hdrsize = P2ROUNDUP(hdrsize, 8); 643 return (hdrsize); 644 } 645 646 #define BUF_SPACE_NEEDED(total, header) (total + header) 647 648 /* 649 * Find layout that corresponds to ordering of attributes 650 * If not found a new layout number is created and added to 651 * persistent layout tables. 652 */ 653 static int 654 sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, 655 dmu_tx_t *tx) 656 { 657 sa_os_t *sa = hdl->sa_os->os_sa; 658 uint64_t hash; 659 sa_buf_type_t buftype; 660 sa_hdr_phys_t *sahdr; 661 void *data_start; 662 int buf_space; 663 sa_attr_type_t *attrs, *attrs_start; 664 int i, lot_count; 665 int hdrsize; 666 int spillhdrsize = 0; 667 int used; 668 dmu_object_type_t bonustype; 669 sa_lot_t *lot; 670 int len_idx; 671 int spill_used; 672 boolean_t spilling; 673 674 dmu_buf_will_dirty(hdl->sa_bonus, tx); 675 bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus); 676 677 /* first determine bonus header size and sum of all attributes */ 678 hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus, 679 SA_BONUS, &i, &used, &spilling); 680 681 if (used > SPA_OLD_MAXBLOCKSIZE) 682 return (SET_ERROR(EFBIG)); 683 684 VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? 685 MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) : 686 used + hdrsize, tx)); 687 688 ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) || 689 bonustype == DMU_OT_SA); 690 691 /* setup and size spill buffer when needed */ 692 if (spilling) { 693 boolean_t dummy; 694 695 if (hdl->sa_spill == NULL) { 696 VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL, 697 &hdl->sa_spill) == 0); 698 } 699 dmu_buf_will_dirty(hdl->sa_spill, tx); 700 701 spillhdrsize = sa_find_sizes(sa, &attr_desc[i], 702 attr_count - i, hdl->sa_spill, SA_SPILL, &i, 703 &spill_used, &dummy); 704 705 if (spill_used > SPA_OLD_MAXBLOCKSIZE) 706 return (SET_ERROR(EFBIG)); 707 708 buf_space = hdl->sa_spill->db_size - spillhdrsize; 709 if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > 710 hdl->sa_spill->db_size) 711 VERIFY(0 == sa_resize_spill(hdl, 712 BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx)); 713 } 714 715 /* setup starting pointers to lay down data */ 716 data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize); 717 sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data; 718 buftype = SA_BONUS; 719 720 if (spilling) 721 buf_space = (sa->sa_force_spill) ? 722 0 : SA_BLKPTR_SPACE - hdrsize; 723 else 724 buf_space = hdl->sa_bonus->db_size - hdrsize; 725 726 attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 727 KM_SLEEP); 728 lot_count = 0; 729 730 for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) { 731 uint16_t length; 732 733 ASSERT(IS_P2ALIGNED(data_start, 8)); 734 ASSERT(IS_P2ALIGNED(buf_space, 8)); 735 attrs[i] = attr_desc[i].sa_attr; 736 length = SA_REGISTERED_LEN(sa, attrs[i]); 737 if (length == 0) 738 length = attr_desc[i].sa_length; 739 740 if (buf_space < length) { /* switch to spill buffer */ 741 VERIFY(spilling); 742 VERIFY(bonustype == DMU_OT_SA); 743 if (buftype == SA_BONUS && !sa->sa_force_spill) { 744 sa_find_layout(hdl->sa_os, hash, attrs_start, 745 lot_count, tx, &lot); 746 SA_SET_HDR(sahdr, lot->lot_num, hdrsize); 747 } 748 749 buftype = SA_SPILL; 750 hash = -1ULL; 751 len_idx = 0; 752 753 sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data; 754 sahdr->sa_magic = SA_MAGIC; 755 data_start = (void *)((uintptr_t)sahdr + 756 spillhdrsize); 757 attrs_start = &attrs[i]; 758 buf_space = hdl->sa_spill->db_size - spillhdrsize; 759 lot_count = 0; 760 } 761 hash ^= SA_ATTR_HASH(attrs[i]); 762 attr_desc[i].sa_addr = data_start; 763 attr_desc[i].sa_size = length; 764 SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data, 765 data_start, length); 766 if (sa->sa_attr_table[attrs[i]].sa_length == 0) { 767 sahdr->sa_lengths[len_idx++] = length; 768 } 769 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 770 length), 8); 771 buf_space -= P2ROUNDUP(length, 8); 772 lot_count++; 773 } 774 775 sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot); 776 777 /* 778 * Verify that old znodes always have layout number 0. 779 * Must be DMU_OT_SA for arbitrary layouts 780 */ 781 VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) || 782 (bonustype == DMU_OT_SA && lot->lot_num > 1)); 783 784 if (bonustype == DMU_OT_SA) { 785 SA_SET_HDR(sahdr, lot->lot_num, 786 buftype == SA_BONUS ? hdrsize : spillhdrsize); 787 } 788 789 kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count); 790 if (hdl->sa_bonus_tab) { 791 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 792 hdl->sa_bonus_tab = NULL; 793 } 794 if (!sa->sa_force_spill) 795 VERIFY(0 == sa_build_index(hdl, SA_BONUS)); 796 if (hdl->sa_spill) { 797 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 798 if (!spilling) { 799 /* 800 * remove spill block that is no longer needed. 801 */ 802 dmu_buf_rele(hdl->sa_spill, NULL); 803 hdl->sa_spill = NULL; 804 hdl->sa_spill_tab = NULL; 805 VERIFY(0 == dmu_rm_spill(hdl->sa_os, 806 sa_handle_object(hdl), tx)); 807 } else { 808 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 809 } 810 } 811 812 return (0); 813 } 814 815 static void 816 sa_free_attr_table(sa_os_t *sa) 817 { 818 int i; 819 820 if (sa->sa_attr_table == NULL) 821 return; 822 823 for (i = 0; i != sa->sa_num_attrs; i++) { 824 if (sa->sa_attr_table[i].sa_name) 825 kmem_free(sa->sa_attr_table[i].sa_name, 826 strlen(sa->sa_attr_table[i].sa_name) + 1); 827 } 828 829 kmem_free(sa->sa_attr_table, 830 sizeof (sa_attr_table_t) * sa->sa_num_attrs); 831 832 sa->sa_attr_table = NULL; 833 } 834 835 static int 836 sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) 837 { 838 sa_os_t *sa = os->os_sa; 839 uint64_t sa_attr_count = 0; 840 uint64_t sa_reg_count = 0; 841 int error = 0; 842 uint64_t attr_value; 843 sa_attr_table_t *tb; 844 zap_cursor_t zc; 845 zap_attribute_t za; 846 int registered_count = 0; 847 int i; 848 dmu_objset_type_t ostype = dmu_objset_type(os); 849 850 sa->sa_user_table = 851 kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP); 852 sa->sa_user_table_sz = count * sizeof (sa_attr_type_t); 853 854 if (sa->sa_reg_attr_obj != 0) { 855 error = zap_count(os, sa->sa_reg_attr_obj, 856 &sa_attr_count); 857 858 /* 859 * Make sure we retrieved a count and that it isn't zero 860 */ 861 if (error || (error == 0 && sa_attr_count == 0)) { 862 if (error == 0) 863 error = SET_ERROR(EINVAL); 864 goto bail; 865 } 866 sa_reg_count = sa_attr_count; 867 } 868 869 if (ostype == DMU_OST_ZFS && sa_attr_count == 0) 870 sa_attr_count += sa_legacy_attr_count; 871 872 /* Allocate attribute numbers for attributes that aren't registered */ 873 for (i = 0; i != count; i++) { 874 boolean_t found = B_FALSE; 875 int j; 876 877 if (ostype == DMU_OST_ZFS) { 878 for (j = 0; j != sa_legacy_attr_count; j++) { 879 if (strcmp(reg_attrs[i].sa_name, 880 sa_legacy_attrs[j].sa_name) == 0) { 881 sa->sa_user_table[i] = 882 sa_legacy_attrs[j].sa_attr; 883 found = B_TRUE; 884 } 885 } 886 } 887 if (found) 888 continue; 889 890 if (sa->sa_reg_attr_obj) 891 error = zap_lookup(os, sa->sa_reg_attr_obj, 892 reg_attrs[i].sa_name, 8, 1, &attr_value); 893 else 894 error = SET_ERROR(ENOENT); 895 switch (error) { 896 case ENOENT: 897 sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count; 898 sa_attr_count++; 899 break; 900 case 0: 901 sa->sa_user_table[i] = ATTR_NUM(attr_value); 902 break; 903 default: 904 goto bail; 905 } 906 } 907 908 sa->sa_num_attrs = sa_attr_count; 909 tb = sa->sa_attr_table = 910 kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP); 911 912 /* 913 * Attribute table is constructed from requested attribute list, 914 * previously foreign registered attributes, and also the legacy 915 * ZPL set of attributes. 916 */ 917 918 if (sa->sa_reg_attr_obj) { 919 for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj); 920 (error = zap_cursor_retrieve(&zc, &za)) == 0; 921 zap_cursor_advance(&zc)) { 922 uint64_t value; 923 value = za.za_first_integer; 924 925 registered_count++; 926 tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value); 927 tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value); 928 tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value); 929 tb[ATTR_NUM(value)].sa_registered = B_TRUE; 930 931 if (tb[ATTR_NUM(value)].sa_name) { 932 continue; 933 } 934 tb[ATTR_NUM(value)].sa_name = 935 kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); 936 (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, 937 strlen(za.za_name) +1); 938 } 939 zap_cursor_fini(&zc); 940 /* 941 * Make sure we processed the correct number of registered 942 * attributes 943 */ 944 if (registered_count != sa_reg_count) { 945 ASSERT(error != 0); 946 goto bail; 947 } 948 949 } 950 951 if (ostype == DMU_OST_ZFS) { 952 for (i = 0; i != sa_legacy_attr_count; i++) { 953 if (tb[i].sa_name) 954 continue; 955 tb[i].sa_attr = sa_legacy_attrs[i].sa_attr; 956 tb[i].sa_length = sa_legacy_attrs[i].sa_length; 957 tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap; 958 tb[i].sa_registered = B_FALSE; 959 tb[i].sa_name = 960 kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1, 961 KM_SLEEP); 962 (void) strlcpy(tb[i].sa_name, 963 sa_legacy_attrs[i].sa_name, 964 strlen(sa_legacy_attrs[i].sa_name) + 1); 965 } 966 } 967 968 for (i = 0; i != count; i++) { 969 sa_attr_type_t attr_id; 970 971 attr_id = sa->sa_user_table[i]; 972 if (tb[attr_id].sa_name) 973 continue; 974 975 tb[attr_id].sa_length = reg_attrs[i].sa_length; 976 tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap; 977 tb[attr_id].sa_attr = attr_id; 978 tb[attr_id].sa_name = 979 kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP); 980 (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name, 981 strlen(reg_attrs[i].sa_name) + 1); 982 } 983 984 sa->sa_need_attr_registration = 985 (sa_attr_count != registered_count); 986 987 return (0); 988 bail: 989 kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t)); 990 sa->sa_user_table = NULL; 991 sa_free_attr_table(sa); 992 return ((error != 0) ? error : EINVAL); 993 } 994 995 int 996 sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, 997 sa_attr_type_t **user_table) 998 { 999 zap_cursor_t zc; 1000 zap_attribute_t za; 1001 sa_os_t *sa; 1002 dmu_objset_type_t ostype = dmu_objset_type(os); 1003 sa_attr_type_t *tb; 1004 int error; 1005 1006 mutex_enter(&os->os_user_ptr_lock); 1007 if (os->os_sa) { 1008 mutex_enter(&os->os_sa->sa_lock); 1009 mutex_exit(&os->os_user_ptr_lock); 1010 tb = os->os_sa->sa_user_table; 1011 mutex_exit(&os->os_sa->sa_lock); 1012 *user_table = tb; 1013 return (0); 1014 } 1015 1016 sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); 1017 mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); 1018 sa->sa_master_obj = sa_obj; 1019 1020 os->os_sa = sa; 1021 mutex_enter(&sa->sa_lock); 1022 mutex_exit(&os->os_user_ptr_lock); 1023 avl_create(&sa->sa_layout_num_tree, layout_num_compare, 1024 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); 1025 avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, 1026 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node)); 1027 1028 if (sa_obj) { 1029 error = zap_lookup(os, sa_obj, SA_LAYOUTS, 1030 8, 1, &sa->sa_layout_attr_obj); 1031 if (error != 0 && error != ENOENT) 1032 goto fail; 1033 error = zap_lookup(os, sa_obj, SA_REGISTRY, 1034 8, 1, &sa->sa_reg_attr_obj); 1035 if (error != 0 && error != ENOENT) 1036 goto fail; 1037 } 1038 1039 if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0) 1040 goto fail; 1041 1042 if (sa->sa_layout_attr_obj != 0) { 1043 uint64_t layout_count; 1044 1045 error = zap_count(os, sa->sa_layout_attr_obj, 1046 &layout_count); 1047 1048 /* 1049 * Layout number count should be > 0 1050 */ 1051 if (error || (error == 0 && layout_count == 0)) { 1052 if (error == 0) 1053 error = SET_ERROR(EINVAL); 1054 goto fail; 1055 } 1056 1057 for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj); 1058 (error = zap_cursor_retrieve(&zc, &za)) == 0; 1059 zap_cursor_advance(&zc)) { 1060 sa_attr_type_t *lot_attrs; 1061 uint64_t lot_num; 1062 1063 lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * 1064 za.za_num_integers, KM_SLEEP); 1065 1066 if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, 1067 za.za_name, 2, za.za_num_integers, 1068 lot_attrs))) != 0) { 1069 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1070 za.za_num_integers); 1071 break; 1072 } 1073 VERIFY(ddi_strtoull(za.za_name, NULL, 10, 1074 (unsigned long long *)&lot_num) == 0); 1075 1076 (void) sa_add_layout_entry(os, lot_attrs, 1077 za.za_num_integers, lot_num, 1078 sa_layout_info_hash(lot_attrs, 1079 za.za_num_integers), B_FALSE, NULL); 1080 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1081 za.za_num_integers); 1082 } 1083 zap_cursor_fini(&zc); 1084 1085 /* 1086 * Make sure layout count matches number of entries added 1087 * to AVL tree 1088 */ 1089 if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) { 1090 ASSERT(error != 0); 1091 goto fail; 1092 } 1093 } 1094 1095 /* Add special layout number for old ZNODES */ 1096 if (ostype == DMU_OST_ZFS) { 1097 (void) sa_add_layout_entry(os, sa_legacy_zpl_layout, 1098 sa_legacy_attr_count, 0, 1099 sa_layout_info_hash(sa_legacy_zpl_layout, 1100 sa_legacy_attr_count), B_FALSE, NULL); 1101 1102 (void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1, 1103 0, B_FALSE, NULL); 1104 } 1105 *user_table = os->os_sa->sa_user_table; 1106 mutex_exit(&sa->sa_lock); 1107 return (0); 1108 fail: 1109 os->os_sa = NULL; 1110 sa_free_attr_table(sa); 1111 if (sa->sa_user_table) 1112 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1113 mutex_exit(&sa->sa_lock); 1114 avl_destroy(&sa->sa_layout_hash_tree); 1115 avl_destroy(&sa->sa_layout_num_tree); 1116 mutex_destroy(&sa->sa_lock); 1117 kmem_free(sa, sizeof (sa_os_t)); 1118 return ((error == ECKSUM) ? EIO : error); 1119 } 1120 1121 void 1122 sa_tear_down(objset_t *os) 1123 { 1124 sa_os_t *sa = os->os_sa; 1125 sa_lot_t *layout; 1126 void *cookie; 1127 1128 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1129 1130 /* Free up attr table */ 1131 1132 sa_free_attr_table(sa); 1133 1134 cookie = NULL; 1135 while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) { 1136 sa_idx_tab_t *tab; 1137 while (tab = list_head(&layout->lot_idx_tab)) { 1138 ASSERT(refcount_count(&tab->sa_refcount)); 1139 sa_idx_tab_rele(os, tab); 1140 } 1141 } 1142 1143 cookie = NULL; 1144 while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) { 1145 kmem_free(layout->lot_attrs, 1146 sizeof (sa_attr_type_t) * layout->lot_attr_count); 1147 kmem_free(layout, sizeof (sa_lot_t)); 1148 } 1149 1150 avl_destroy(&sa->sa_layout_hash_tree); 1151 avl_destroy(&sa->sa_layout_num_tree); 1152 mutex_destroy(&sa->sa_lock); 1153 1154 kmem_free(sa, sizeof (sa_os_t)); 1155 os->os_sa = NULL; 1156 } 1157 1158 void 1159 sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr, 1160 uint16_t length, int length_idx, boolean_t var_length, void *userp) 1161 { 1162 sa_idx_tab_t *idx_tab = userp; 1163 1164 if (var_length) { 1165 ASSERT(idx_tab->sa_variable_lengths); 1166 idx_tab->sa_variable_lengths[length_idx] = length; 1167 } 1168 TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx, 1169 (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr)); 1170 } 1171 1172 static void 1173 sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type, 1174 sa_iterfunc_t func, sa_lot_t *tab, void *userp) 1175 { 1176 void *data_start; 1177 sa_lot_t *tb = tab; 1178 sa_lot_t search; 1179 avl_index_t loc; 1180 sa_os_t *sa = os->os_sa; 1181 int i; 1182 uint16_t *length_start = NULL; 1183 uint8_t length_idx = 0; 1184 1185 if (tab == NULL) { 1186 search.lot_num = SA_LAYOUT_NUM(hdr, type); 1187 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1188 ASSERT(tb); 1189 } 1190 1191 if (IS_SA_BONUSTYPE(type)) { 1192 data_start = (void *)P2ROUNDUP(((uintptr_t)hdr + 1193 offsetof(sa_hdr_phys_t, sa_lengths) + 1194 (sizeof (uint16_t) * tb->lot_var_sizes)), 8); 1195 length_start = hdr->sa_lengths; 1196 } else { 1197 data_start = hdr; 1198 } 1199 1200 for (i = 0; i != tb->lot_attr_count; i++) { 1201 int attr_length, reg_length; 1202 uint8_t idx_len; 1203 1204 reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length; 1205 if (reg_length) { 1206 attr_length = reg_length; 1207 idx_len = 0; 1208 } else { 1209 attr_length = length_start[length_idx]; 1210 idx_len = length_idx++; 1211 } 1212 1213 func(hdr, data_start, tb->lot_attrs[i], attr_length, 1214 idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp); 1215 1216 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 1217 attr_length), 8); 1218 } 1219 } 1220 1221 /*ARGSUSED*/ 1222 void 1223 sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr, 1224 uint16_t length, int length_idx, boolean_t variable_length, void *userp) 1225 { 1226 sa_handle_t *hdl = userp; 1227 sa_os_t *sa = hdl->sa_os->os_sa; 1228 1229 sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length); 1230 } 1231 1232 void 1233 sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype) 1234 { 1235 sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1236 dmu_buf_impl_t *db; 1237 sa_os_t *sa = hdl->sa_os->os_sa; 1238 int num_lengths = 1; 1239 int i; 1240 1241 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1242 if (sa_hdr_phys->sa_magic == SA_MAGIC) 1243 return; 1244 1245 db = SA_GET_DB(hdl, buftype); 1246 1247 if (buftype == SA_SPILL) { 1248 arc_release(db->db_buf, NULL); 1249 arc_buf_thaw(db->db_buf); 1250 } 1251 1252 sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic); 1253 sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info); 1254 1255 /* 1256 * Determine number of variable lenghts in header 1257 * The standard 8 byte header has one for free and a 1258 * 16 byte header would have 4 + 1; 1259 */ 1260 if (SA_HDR_SIZE(sa_hdr_phys) > 8) 1261 num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1; 1262 for (i = 0; i != num_lengths; i++) 1263 sa_hdr_phys->sa_lengths[i] = 1264 BSWAP_16(sa_hdr_phys->sa_lengths[i]); 1265 1266 sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA, 1267 sa_byteswap_cb, NULL, hdl); 1268 1269 if (buftype == SA_SPILL) 1270 arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf); 1271 } 1272 1273 static int 1274 sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype) 1275 { 1276 sa_hdr_phys_t *sa_hdr_phys; 1277 dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype); 1278 dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db); 1279 sa_os_t *sa = hdl->sa_os->os_sa; 1280 sa_idx_tab_t *idx_tab; 1281 1282 sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1283 1284 mutex_enter(&sa->sa_lock); 1285 1286 /* Do we need to byteswap? */ 1287 1288 /* only check if not old znode */ 1289 if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC && 1290 sa_hdr_phys->sa_magic != 0) { 1291 VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC); 1292 sa_byteswap(hdl, buftype); 1293 } 1294 1295 idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys); 1296 1297 if (buftype == SA_BONUS) 1298 hdl->sa_bonus_tab = idx_tab; 1299 else 1300 hdl->sa_spill_tab = idx_tab; 1301 1302 mutex_exit(&sa->sa_lock); 1303 return (0); 1304 } 1305 1306 /*ARGSUSED*/ 1307 static void 1308 sa_evict(void *dbu) 1309 { 1310 panic("evicting sa dbuf\n"); 1311 } 1312 1313 static void 1314 sa_idx_tab_rele(objset_t *os, void *arg) 1315 { 1316 sa_os_t *sa = os->os_sa; 1317 sa_idx_tab_t *idx_tab = arg; 1318 1319 if (idx_tab == NULL) 1320 return; 1321 1322 mutex_enter(&sa->sa_lock); 1323 if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) { 1324 list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab); 1325 if (idx_tab->sa_variable_lengths) 1326 kmem_free(idx_tab->sa_variable_lengths, 1327 sizeof (uint16_t) * 1328 idx_tab->sa_layout->lot_var_sizes); 1329 refcount_destroy(&idx_tab->sa_refcount); 1330 kmem_free(idx_tab->sa_idx_tab, 1331 sizeof (uint32_t) * sa->sa_num_attrs); 1332 kmem_free(idx_tab, sizeof (sa_idx_tab_t)); 1333 } 1334 mutex_exit(&sa->sa_lock); 1335 } 1336 1337 static void 1338 sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab) 1339 { 1340 sa_os_t *sa = os->os_sa; 1341 1342 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1343 (void) refcount_add(&idx_tab->sa_refcount, NULL); 1344 } 1345 1346 void 1347 sa_handle_destroy(sa_handle_t *hdl) 1348 { 1349 dmu_buf_t *db = hdl->sa_bonus; 1350 1351 mutex_enter(&hdl->sa_lock); 1352 (void) dmu_buf_remove_user(db, &hdl->sa_dbu); 1353 1354 if (hdl->sa_bonus_tab) { 1355 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 1356 hdl->sa_bonus_tab = NULL; 1357 } 1358 if (hdl->sa_spill_tab) { 1359 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 1360 hdl->sa_spill_tab = NULL; 1361 } 1362 1363 dmu_buf_rele(hdl->sa_bonus, NULL); 1364 1365 if (hdl->sa_spill) 1366 dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); 1367 mutex_exit(&hdl->sa_lock); 1368 1369 kmem_cache_free(sa_cache, hdl); 1370 } 1371 1372 int 1373 sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp, 1374 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1375 { 1376 int error = 0; 1377 dmu_object_info_t doi; 1378 sa_handle_t *handle = NULL; 1379 1380 #ifdef ZFS_DEBUG 1381 dmu_object_info_from_db(db, &doi); 1382 ASSERT(doi.doi_bonus_type == DMU_OT_SA || 1383 doi.doi_bonus_type == DMU_OT_ZNODE); 1384 #endif 1385 /* find handle, if it exists */ 1386 /* if one doesn't exist then create a new one, and initialize it */ 1387 1388 if (hdl_type == SA_HDL_SHARED) 1389 handle = dmu_buf_get_user(db); 1390 1391 if (handle == NULL) { 1392 sa_handle_t *winner = NULL; 1393 1394 handle = kmem_cache_alloc(sa_cache, KM_SLEEP); 1395 handle->sa_userp = userp; 1396 handle->sa_bonus = db; 1397 handle->sa_os = os; 1398 handle->sa_spill = NULL; 1399 1400 error = sa_build_index(handle, SA_BONUS); 1401 1402 if (hdl_type == SA_HDL_SHARED) { 1403 dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL); 1404 winner = dmu_buf_set_user_ie(db, &handle->sa_dbu); 1405 } 1406 1407 if (winner != NULL) { 1408 kmem_cache_free(sa_cache, handle); 1409 handle = winner; 1410 } 1411 } 1412 *handlepp = handle; 1413 1414 return (error); 1415 } 1416 1417 int 1418 sa_handle_get(objset_t *objset, uint64_t objid, void *userp, 1419 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1420 { 1421 dmu_buf_t *db; 1422 int error; 1423 1424 if (error = dmu_bonus_hold(objset, objid, NULL, &db)) 1425 return (error); 1426 1427 return (sa_handle_get_from_db(objset, db, userp, hdl_type, 1428 handlepp)); 1429 } 1430 1431 int 1432 sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db) 1433 { 1434 return (dmu_bonus_hold(objset, obj_num, tag, db)); 1435 } 1436 1437 void 1438 sa_buf_rele(dmu_buf_t *db, void *tag) 1439 { 1440 dmu_buf_rele(db, tag); 1441 } 1442 1443 int 1444 sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count) 1445 { 1446 ASSERT(hdl); 1447 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1448 return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL)); 1449 } 1450 1451 int 1452 sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) 1453 { 1454 int error; 1455 sa_bulk_attr_t bulk; 1456 1457 bulk.sa_attr = attr; 1458 bulk.sa_data = buf; 1459 bulk.sa_length = buflen; 1460 bulk.sa_data_func = NULL; 1461 1462 ASSERT(hdl); 1463 mutex_enter(&hdl->sa_lock); 1464 error = sa_lookup_impl(hdl, &bulk, 1); 1465 mutex_exit(&hdl->sa_lock); 1466 return (error); 1467 } 1468 1469 #ifdef _KERNEL 1470 int 1471 sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) 1472 { 1473 int error; 1474 sa_bulk_attr_t bulk; 1475 1476 bulk.sa_data = NULL; 1477 bulk.sa_attr = attr; 1478 bulk.sa_data_func = NULL; 1479 1480 ASSERT(hdl); 1481 1482 mutex_enter(&hdl->sa_lock); 1483 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) { 1484 error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size, 1485 uio->uio_resid), UIO_READ, uio); 1486 } 1487 mutex_exit(&hdl->sa_lock); 1488 return (error); 1489 1490 } 1491 #endif 1492 1493 void * 1494 sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data) 1495 { 1496 sa_idx_tab_t *idx_tab; 1497 sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data; 1498 sa_os_t *sa = os->os_sa; 1499 sa_lot_t *tb, search; 1500 avl_index_t loc; 1501 1502 /* 1503 * Deterimine layout number. If SA node and header == 0 then 1504 * force the index table to the dummy "1" empty layout. 1505 * 1506 * The layout number would only be zero for a newly created file 1507 * that has not added any attributes yet, or with crypto enabled which 1508 * doesn't write any attributes to the bonus buffer. 1509 */ 1510 1511 search.lot_num = SA_LAYOUT_NUM(hdr, bonustype); 1512 1513 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1514 1515 /* Verify header size is consistent with layout information */ 1516 ASSERT(tb); 1517 ASSERT(IS_SA_BONUSTYPE(bonustype) && 1518 SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) || 1519 (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0)); 1520 1521 /* 1522 * See if any of the already existing TOC entries can be reused? 1523 */ 1524 1525 for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab; 1526 idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) { 1527 boolean_t valid_idx = B_TRUE; 1528 int i; 1529 1530 if (tb->lot_var_sizes != 0 && 1531 idx_tab->sa_variable_lengths != NULL) { 1532 for (i = 0; i != tb->lot_var_sizes; i++) { 1533 if (hdr->sa_lengths[i] != 1534 idx_tab->sa_variable_lengths[i]) { 1535 valid_idx = B_FALSE; 1536 break; 1537 } 1538 } 1539 } 1540 if (valid_idx) { 1541 sa_idx_tab_hold(os, idx_tab); 1542 return (idx_tab); 1543 } 1544 } 1545 1546 /* No such luck, create a new entry */ 1547 idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP); 1548 idx_tab->sa_idx_tab = 1549 kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP); 1550 idx_tab->sa_layout = tb; 1551 refcount_create(&idx_tab->sa_refcount); 1552 if (tb->lot_var_sizes) 1553 idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) * 1554 tb->lot_var_sizes, KM_SLEEP); 1555 1556 sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab, 1557 tb, idx_tab); 1558 sa_idx_tab_hold(os, idx_tab); /* one hold for consumer */ 1559 sa_idx_tab_hold(os, idx_tab); /* one for layout */ 1560 list_insert_tail(&tb->lot_idx_tab, idx_tab); 1561 return (idx_tab); 1562 } 1563 1564 void 1565 sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len, 1566 boolean_t start, void *userdata) 1567 { 1568 ASSERT(start); 1569 1570 *dataptr = userdata; 1571 *len = total_len; 1572 } 1573 1574 static void 1575 sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) 1576 { 1577 uint64_t attr_value = 0; 1578 sa_os_t *sa = hdl->sa_os->os_sa; 1579 sa_attr_table_t *tb = sa->sa_attr_table; 1580 int i; 1581 1582 mutex_enter(&sa->sa_lock); 1583 1584 if (!sa->sa_need_attr_registration || sa->sa_master_obj == NULL) { 1585 mutex_exit(&sa->sa_lock); 1586 return; 1587 } 1588 1589 if (sa->sa_reg_attr_obj == NULL) { 1590 sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os, 1591 DMU_OT_SA_ATTR_REGISTRATION, 1592 sa->sa_master_obj, SA_REGISTRY, tx); 1593 } 1594 for (i = 0; i != sa->sa_num_attrs; i++) { 1595 if (sa->sa_attr_table[i].sa_registered) 1596 continue; 1597 ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length, 1598 tb[i].sa_byteswap); 1599 VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj, 1600 tb[i].sa_name, 8, 1, &attr_value, tx)); 1601 tb[i].sa_registered = B_TRUE; 1602 } 1603 sa->sa_need_attr_registration = B_FALSE; 1604 mutex_exit(&sa->sa_lock); 1605 } 1606 1607 /* 1608 * Replace all attributes with attributes specified in template. 1609 * If dnode had a spill buffer then those attributes will be 1610 * also be replaced, possibly with just an empty spill block 1611 * 1612 * This interface is intended to only be used for bulk adding of 1613 * attributes for a new file. It will also be used by the ZPL 1614 * when converting and old formatted znode to native SA support. 1615 */ 1616 int 1617 sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1618 int attr_count, dmu_tx_t *tx) 1619 { 1620 sa_os_t *sa = hdl->sa_os->os_sa; 1621 1622 if (sa->sa_need_attr_registration) 1623 sa_attr_register_sync(hdl, tx); 1624 return (sa_build_layouts(hdl, attr_desc, attr_count, tx)); 1625 } 1626 1627 int 1628 sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1629 int attr_count, dmu_tx_t *tx) 1630 { 1631 int error; 1632 1633 mutex_enter(&hdl->sa_lock); 1634 error = sa_replace_all_by_template_locked(hdl, attr_desc, 1635 attr_count, tx); 1636 mutex_exit(&hdl->sa_lock); 1637 return (error); 1638 } 1639 1640 /* 1641 * add/remove/replace a single attribute and then rewrite the entire set 1642 * of attributes. 1643 */ 1644 static int 1645 sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 1646 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 1647 uint16_t buflen, dmu_tx_t *tx) 1648 { 1649 sa_os_t *sa = hdl->sa_os->os_sa; 1650 dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; 1651 dnode_t *dn; 1652 sa_bulk_attr_t *attr_desc; 1653 void *old_data[2]; 1654 int bonus_attr_count = 0; 1655 int bonus_data_size = 0; 1656 int spill_data_size = 0; 1657 int spill_attr_count = 0; 1658 int error; 1659 uint16_t length; 1660 int i, j, k, length_idx; 1661 sa_hdr_phys_t *hdr; 1662 sa_idx_tab_t *idx_tab; 1663 int attr_count; 1664 int count; 1665 1666 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1667 1668 /* First make of copy of the old data */ 1669 1670 DB_DNODE_ENTER(db); 1671 dn = DB_DNODE(db); 1672 if (dn->dn_bonuslen != 0) { 1673 bonus_data_size = hdl->sa_bonus->db_size; 1674 old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); 1675 bcopy(hdl->sa_bonus->db_data, old_data[0], 1676 hdl->sa_bonus->db_size); 1677 bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count; 1678 } else { 1679 old_data[0] = NULL; 1680 } 1681 DB_DNODE_EXIT(db); 1682 1683 /* Bring spill buffer online if it isn't currently */ 1684 1685 if ((error = sa_get_spill(hdl)) == 0) { 1686 spill_data_size = hdl->sa_spill->db_size; 1687 old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP); 1688 bcopy(hdl->sa_spill->db_data, old_data[1], 1689 hdl->sa_spill->db_size); 1690 spill_attr_count = 1691 hdl->sa_spill_tab->sa_layout->lot_attr_count; 1692 } else if (error && error != ENOENT) { 1693 if (old_data[0]) 1694 kmem_free(old_data[0], bonus_data_size); 1695 return (error); 1696 } else { 1697 old_data[1] = NULL; 1698 } 1699 1700 /* build descriptor of all attributes */ 1701 1702 attr_count = bonus_attr_count + spill_attr_count; 1703 if (action == SA_ADD) 1704 attr_count++; 1705 else if (action == SA_REMOVE) 1706 attr_count--; 1707 1708 attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP); 1709 1710 /* 1711 * loop through bonus and spill buffer if it exists, and 1712 * build up new attr_descriptor to reset the attributes 1713 */ 1714 k = j = 0; 1715 count = bonus_attr_count; 1716 hdr = SA_GET_HDR(hdl, SA_BONUS); 1717 idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS); 1718 for (; k != 2; k++) { 1719 /* iterate over each attribute in layout */ 1720 for (i = 0, length_idx = 0; i != count; i++) { 1721 sa_attr_type_t attr; 1722 1723 attr = idx_tab->sa_layout->lot_attrs[i]; 1724 if (attr == newattr) { 1725 if (action == SA_REMOVE) { 1726 j++; 1727 continue; 1728 } 1729 ASSERT(SA_REGISTERED_LEN(sa, attr) == 0); 1730 ASSERT(action == SA_REPLACE); 1731 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1732 locator, datastart, buflen); 1733 } else { 1734 length = SA_REGISTERED_LEN(sa, attr); 1735 if (length == 0) { 1736 length = hdr->sa_lengths[length_idx++]; 1737 } 1738 1739 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1740 NULL, (void *) 1741 (TOC_OFF(idx_tab->sa_idx_tab[attr]) + 1742 (uintptr_t)old_data[k]), length); 1743 } 1744 } 1745 if (k == 0 && hdl->sa_spill) { 1746 hdr = SA_GET_HDR(hdl, SA_SPILL); 1747 idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL); 1748 count = spill_attr_count; 1749 } else { 1750 break; 1751 } 1752 } 1753 if (action == SA_ADD) { 1754 length = SA_REGISTERED_LEN(sa, newattr); 1755 if (length == 0) { 1756 length = buflen; 1757 } 1758 SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator, 1759 datastart, buflen); 1760 } 1761 1762 error = sa_build_layouts(hdl, attr_desc, attr_count, tx); 1763 1764 if (old_data[0]) 1765 kmem_free(old_data[0], bonus_data_size); 1766 if (old_data[1]) 1767 kmem_free(old_data[1], spill_data_size); 1768 kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); 1769 1770 return (error); 1771 } 1772 1773 static int 1774 sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 1775 dmu_tx_t *tx) 1776 { 1777 int error; 1778 sa_os_t *sa = hdl->sa_os->os_sa; 1779 dmu_object_type_t bonustype; 1780 1781 bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS)); 1782 1783 ASSERT(hdl); 1784 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1785 1786 /* sync out registration table if necessary */ 1787 if (sa->sa_need_attr_registration) 1788 sa_attr_register_sync(hdl, tx); 1789 1790 error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx); 1791 if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb) 1792 sa->sa_update_cb(hdl, tx); 1793 1794 return (error); 1795 } 1796 1797 /* 1798 * update or add new attribute 1799 */ 1800 int 1801 sa_update(sa_handle_t *hdl, sa_attr_type_t type, 1802 void *buf, uint32_t buflen, dmu_tx_t *tx) 1803 { 1804 int error; 1805 sa_bulk_attr_t bulk; 1806 1807 bulk.sa_attr = type; 1808 bulk.sa_data_func = NULL; 1809 bulk.sa_length = buflen; 1810 bulk.sa_data = buf; 1811 1812 mutex_enter(&hdl->sa_lock); 1813 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1814 mutex_exit(&hdl->sa_lock); 1815 return (error); 1816 } 1817 1818 int 1819 sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr, 1820 uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx) 1821 { 1822 int error; 1823 sa_bulk_attr_t bulk; 1824 1825 bulk.sa_attr = attr; 1826 bulk.sa_data = userdata; 1827 bulk.sa_data_func = locator; 1828 bulk.sa_length = buflen; 1829 1830 mutex_enter(&hdl->sa_lock); 1831 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1832 mutex_exit(&hdl->sa_lock); 1833 return (error); 1834 } 1835 1836 /* 1837 * Return size of an attribute 1838 */ 1839 1840 int 1841 sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) 1842 { 1843 sa_bulk_attr_t bulk; 1844 int error; 1845 1846 bulk.sa_data = NULL; 1847 bulk.sa_attr = attr; 1848 bulk.sa_data_func = NULL; 1849 1850 ASSERT(hdl); 1851 mutex_enter(&hdl->sa_lock); 1852 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { 1853 mutex_exit(&hdl->sa_lock); 1854 return (error); 1855 } 1856 *size = bulk.sa_size; 1857 1858 mutex_exit(&hdl->sa_lock); 1859 return (0); 1860 } 1861 1862 int 1863 sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1864 { 1865 ASSERT(hdl); 1866 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1867 return (sa_lookup_impl(hdl, attrs, count)); 1868 } 1869 1870 int 1871 sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1872 { 1873 int error; 1874 1875 ASSERT(hdl); 1876 mutex_enter(&hdl->sa_lock); 1877 error = sa_bulk_lookup_locked(hdl, attrs, count); 1878 mutex_exit(&hdl->sa_lock); 1879 return (error); 1880 } 1881 1882 int 1883 sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx) 1884 { 1885 int error; 1886 1887 ASSERT(hdl); 1888 mutex_enter(&hdl->sa_lock); 1889 error = sa_bulk_update_impl(hdl, attrs, count, tx); 1890 mutex_exit(&hdl->sa_lock); 1891 return (error); 1892 } 1893 1894 int 1895 sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) 1896 { 1897 int error; 1898 1899 mutex_enter(&hdl->sa_lock); 1900 error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL, 1901 NULL, 0, tx); 1902 mutex_exit(&hdl->sa_lock); 1903 return (error); 1904 } 1905 1906 void 1907 sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) 1908 { 1909 dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); 1910 } 1911 1912 void 1913 sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) 1914 { 1915 dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, 1916 blksize, nblocks); 1917 } 1918 1919 void 1920 sa_set_userp(sa_handle_t *hdl, void *ptr) 1921 { 1922 hdl->sa_userp = ptr; 1923 } 1924 1925 dmu_buf_t * 1926 sa_get_db(sa_handle_t *hdl) 1927 { 1928 return ((dmu_buf_t *)hdl->sa_bonus); 1929 } 1930 1931 void * 1932 sa_get_userdata(sa_handle_t *hdl) 1933 { 1934 return (hdl->sa_userp); 1935 } 1936 1937 void 1938 sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func) 1939 { 1940 ASSERT(MUTEX_HELD(&os->os_sa->sa_lock)); 1941 os->os_sa->sa_update_cb = func; 1942 } 1943 1944 void 1945 sa_register_update_callback(objset_t *os, sa_update_cb_t *func) 1946 { 1947 1948 mutex_enter(&os->os_sa->sa_lock); 1949 sa_register_update_callback_locked(os, func); 1950 mutex_exit(&os->os_sa->sa_lock); 1951 } 1952 1953 uint64_t 1954 sa_handle_object(sa_handle_t *hdl) 1955 { 1956 return (hdl->sa_bonus->db_object); 1957 } 1958 1959 boolean_t 1960 sa_enabled(objset_t *os) 1961 { 1962 return (os->os_sa == NULL); 1963 } 1964 1965 int 1966 sa_set_sa_object(objset_t *os, uint64_t sa_object) 1967 { 1968 sa_os_t *sa = os->os_sa; 1969 1970 if (sa->sa_master_obj) 1971 return (1); 1972 1973 sa->sa_master_obj = sa_object; 1974 1975 return (0); 1976 } 1977 1978 int 1979 sa_hdrsize(void *arg) 1980 { 1981 sa_hdr_phys_t *hdr = arg; 1982 1983 return (SA_HDR_SIZE(hdr)); 1984 } 1985 1986 void 1987 sa_handle_lock(sa_handle_t *hdl) 1988 { 1989 ASSERT(hdl); 1990 mutex_enter(&hdl->sa_lock); 1991 } 1992 1993 void 1994 sa_handle_unlock(sa_handle_t *hdl) 1995 { 1996 ASSERT(hdl); 1997 mutex_exit(&hdl->sa_lock); 1998 } 1999