1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 * Portions Copyright 2011 iXsystems, Inc 24 */ 25 26 #include <sys/zfs_context.h> 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/dmu.h> 32 #include <sys/dmu_impl.h> 33 #include <sys/dmu_objset.h> 34 #include <sys/dbuf.h> 35 #include <sys/dnode.h> 36 #include <sys/zap.h> 37 #include <sys/sa.h> 38 #include <sys/sunddi.h> 39 #include <sys/sa_impl.h> 40 #include <sys/dnode.h> 41 #include <sys/errno.h> 42 #include <sys/zfs_context.h> 43 44 /* 45 * ZFS System attributes: 46 * 47 * A generic mechanism to allow for arbitrary attributes 48 * to be stored in a dnode. The data will be stored in the bonus buffer of 49 * the dnode and if necessary a special "spill" block will be used to handle 50 * overflow situations. The spill block will be sized to fit the data 51 * from 512 - 128K. When a spill block is used the BP (blkptr_t) for the 52 * spill block is stored at the end of the current bonus buffer. Any 53 * attributes that would be in the way of the blkptr_t will be relocated 54 * into the spill block. 55 * 56 * Attribute registration: 57 * 58 * Stored persistently on a per dataset basis 59 * a mapping between attribute "string" names and their actual attribute 60 * numeric values, length, and byteswap function. The names are only used 61 * during registration. All attributes are known by their unique attribute 62 * id value. If an attribute can have a variable size then the value 63 * 0 will be used to indicate this. 64 * 65 * Attribute Layout: 66 * 67 * Attribute layouts are a way to compactly store multiple attributes, but 68 * without taking the overhead associated with managing each attribute 69 * individually. Since you will typically have the same set of attributes 70 * stored in the same order a single table will be used to represent that 71 * layout. The ZPL for example will usually have only about 10 different 72 * layouts (regular files, device files, symlinks, 73 * regular files + scanstamp, files/dir with extended attributes, and then 74 * you have the possibility of all of those minus ACL, because it would 75 * be kicked out into the spill block) 76 * 77 * Layouts are simply an array of the attributes and their 78 * ordering i.e. [0, 1, 4, 5, 2] 79 * 80 * Each distinct layout is given a unique layout number and that is whats 81 * stored in the header at the beginning of the SA data buffer. 82 * 83 * A layout only covers a single dbuf (bonus or spill). If a set of 84 * attributes is split up between the bonus buffer and a spill buffer then 85 * two different layouts will be used. This allows us to byteswap the 86 * spill without looking at the bonus buffer and keeps the on disk format of 87 * the bonus and spill buffer the same. 88 * 89 * Adding a single attribute will cause the entire set of attributes to 90 * be rewritten and could result in a new layout number being constructed 91 * as part of the rewrite if no such layout exists for the new set of 92 * attribues. The new attribute will be appended to the end of the already 93 * existing attributes. 94 * 95 * Both the attribute registration and attribute layout information are 96 * stored in normal ZAP attributes. Their should be a small number of 97 * known layouts and the set of attributes is assumed to typically be quite 98 * small. 99 * 100 * The registered attributes and layout "table" information is maintained 101 * in core and a special "sa_os_t" is attached to the objset_t. 102 * 103 * A special interface is provided to allow for quickly applying 104 * a large set of attributes at once. sa_replace_all_by_template() is 105 * used to set an array of attributes. This is used by the ZPL when 106 * creating a brand new file. The template that is passed into the function 107 * specifies the attribute, size for variable length attributes, location of 108 * data and special "data locator" function if the data isn't in a contiguous 109 * location. 110 * 111 * Byteswap implications: 112 * Since the SA attributes are not entirely self describing we can't do 113 * the normal byteswap processing. The special ZAP layout attribute and 114 * attribute registration attributes define the byteswap function and the 115 * size of the attributes, unless it is variable sized. 116 * The normal ZFS byteswapping infrastructure assumes you don't need 117 * to read any objects in order to do the necessary byteswapping. Whereas 118 * SA attributes can only be properly byteswapped if the dataset is opened 119 * and the layout/attribute ZAP attributes are available. Because of this 120 * the SA attributes will be byteswapped when they are first accessed by 121 * the SA code that will read the SA data. 122 */ 123 124 typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t, 125 uint16_t length, int length_idx, boolean_t, void *userp); 126 127 static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype); 128 static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab); 129 static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, 130 void *data); 131 static void sa_idx_tab_rele(objset_t *os, void *arg); 132 static void sa_copy_data(sa_data_locator_t *func, void *start, void *target, 133 int buflen); 134 static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 135 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 136 uint16_t buflen, dmu_tx_t *tx); 137 138 arc_byteswap_func_t *sa_bswap_table[] = { 139 byteswap_uint64_array, 140 byteswap_uint32_array, 141 byteswap_uint16_array, 142 byteswap_uint8_array, 143 zfs_acl_byteswap, 144 }; 145 146 #define SA_COPY_DATA(f, s, t, l) \ 147 { \ 148 if (f == NULL) { \ 149 if (l == 8) { \ 150 *(uint64_t *)t = *(uint64_t *)s; \ 151 } else if (l == 16) { \ 152 *(uint64_t *)t = *(uint64_t *)s; \ 153 *(uint64_t *)((uintptr_t)t + 8) = \ 154 *(uint64_t *)((uintptr_t)s + 8); \ 155 } else { \ 156 bcopy(s, t, l); \ 157 } \ 158 } else \ 159 sa_copy_data(f, s, t, l); \ 160 } 161 162 /* 163 * This table is fixed and cannot be changed. Its purpose is to 164 * allow the SA code to work with both old/new ZPL file systems. 165 * It contains the list of legacy attributes. These attributes aren't 166 * stored in the "attribute" registry zap objects, since older ZPL file systems 167 * won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will 168 * use this static table. 169 */ 170 sa_attr_reg_t sa_legacy_attrs[] = { 171 {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0}, 172 {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1}, 173 {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2}, 174 {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3}, 175 {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4}, 176 {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5}, 177 {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6}, 178 {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7}, 179 {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, 180 {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, 181 {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, 182 {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11}, 183 {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12}, 184 {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13}, 185 {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14}, 186 {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15}, 187 }; 188 189 /* 190 * ZPL legacy layout 191 * This is only used for objects of type DMU_OT_ZNODE 192 */ 193 sa_attr_type_t sa_legacy_zpl_layout[] = { 194 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 195 }; 196 197 /* 198 * Special dummy layout used for buffers with no attributes. 199 */ 200 201 sa_attr_type_t sa_dummy_zpl_layout[] = { 0 }; 202 203 static int sa_legacy_attr_count = 16; 204 static kmem_cache_t *sa_cache = NULL; 205 206 /*ARGSUSED*/ 207 static int 208 sa_cache_constructor(void *buf, void *unused, int kmflag) 209 { 210 sa_handle_t *hdl = buf; 211 212 hdl->sa_bonus_tab = NULL; 213 hdl->sa_spill_tab = NULL; 214 hdl->sa_os = NULL; 215 hdl->sa_userp = NULL; 216 hdl->sa_bonus = NULL; 217 hdl->sa_spill = NULL; 218 mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL); 219 return (0); 220 } 221 222 /*ARGSUSED*/ 223 static void 224 sa_cache_destructor(void *buf, void *unused) 225 { 226 sa_handle_t *hdl = buf; 227 mutex_destroy(&hdl->sa_lock); 228 } 229 230 void 231 sa_cache_init(void) 232 { 233 sa_cache = kmem_cache_create("sa_cache", 234 sizeof (sa_handle_t), 0, sa_cache_constructor, 235 sa_cache_destructor, NULL, NULL, NULL, 0); 236 } 237 238 void 239 sa_cache_fini(void) 240 { 241 if (sa_cache) 242 kmem_cache_destroy(sa_cache); 243 } 244 245 static int 246 layout_num_compare(const void *arg1, const void *arg2) 247 { 248 const sa_lot_t *node1 = arg1; 249 const sa_lot_t *node2 = arg2; 250 251 if (node1->lot_num > node2->lot_num) 252 return (1); 253 else if (node1->lot_num < node2->lot_num) 254 return (-1); 255 return (0); 256 } 257 258 static int 259 layout_hash_compare(const void *arg1, const void *arg2) 260 { 261 const sa_lot_t *node1 = arg1; 262 const sa_lot_t *node2 = arg2; 263 264 if (node1->lot_hash > node2->lot_hash) 265 return (1); 266 if (node1->lot_hash < node2->lot_hash) 267 return (-1); 268 if (node1->lot_instance > node2->lot_instance) 269 return (1); 270 if (node1->lot_instance < node2->lot_instance) 271 return (-1); 272 return (0); 273 } 274 275 boolean_t 276 sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count) 277 { 278 int i; 279 280 if (count != tbf->lot_attr_count) 281 return (1); 282 283 for (i = 0; i != count; i++) { 284 if (attrs[i] != tbf->lot_attrs[i]) 285 return (1); 286 } 287 return (0); 288 } 289 290 #define SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF]) 291 292 static uint64_t 293 sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count) 294 { 295 int i; 296 uint64_t crc = -1ULL; 297 298 for (i = 0; i != attr_count; i++) 299 crc ^= SA_ATTR_HASH(attrs[i]); 300 301 return (crc); 302 } 303 304 static int 305 sa_get_spill(sa_handle_t *hdl) 306 { 307 int rc; 308 if (hdl->sa_spill == NULL) { 309 if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL, 310 &hdl->sa_spill)) == 0) 311 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 312 } else { 313 rc = 0; 314 } 315 316 return (rc); 317 } 318 319 /* 320 * Main attribute lookup/update function 321 * returns 0 for success or non zero for failures 322 * 323 * Operates on bulk array, first failure will abort further processing 324 */ 325 int 326 sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 327 sa_data_op_t data_op, dmu_tx_t *tx) 328 { 329 sa_os_t *sa = hdl->sa_os->os_sa; 330 int i; 331 int error = 0; 332 sa_buf_type_t buftypes; 333 334 buftypes = 0; 335 336 ASSERT(count > 0); 337 for (i = 0; i != count; i++) { 338 ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs); 339 340 bulk[i].sa_addr = NULL; 341 /* First check the bonus buffer */ 342 343 if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT( 344 hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) { 345 SA_ATTR_INFO(sa, hdl->sa_bonus_tab, 346 SA_GET_HDR(hdl, SA_BONUS), 347 bulk[i].sa_attr, bulk[i], SA_BONUS, hdl); 348 if (tx && !(buftypes & SA_BONUS)) { 349 dmu_buf_will_dirty(hdl->sa_bonus, tx); 350 buftypes |= SA_BONUS; 351 } 352 } 353 if (bulk[i].sa_addr == NULL && 354 ((error = sa_get_spill(hdl)) == 0)) { 355 if (TOC_ATTR_PRESENT( 356 hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) { 357 SA_ATTR_INFO(sa, hdl->sa_spill_tab, 358 SA_GET_HDR(hdl, SA_SPILL), 359 bulk[i].sa_attr, bulk[i], SA_SPILL, hdl); 360 if (tx && !(buftypes & SA_SPILL) && 361 bulk[i].sa_size == bulk[i].sa_length) { 362 dmu_buf_will_dirty(hdl->sa_spill, tx); 363 buftypes |= SA_SPILL; 364 } 365 } 366 } 367 if (error && error != ENOENT) { 368 return ((error == ECKSUM) ? EIO : error); 369 } 370 371 switch (data_op) { 372 case SA_LOOKUP: 373 if (bulk[i].sa_addr == NULL) 374 return (ENOENT); 375 if (bulk[i].sa_data) { 376 SA_COPY_DATA(bulk[i].sa_data_func, 377 bulk[i].sa_addr, bulk[i].sa_data, 378 bulk[i].sa_size); 379 } 380 continue; 381 382 case SA_UPDATE: 383 /* existing rewrite of attr */ 384 if (bulk[i].sa_addr && 385 bulk[i].sa_size == bulk[i].sa_length) { 386 SA_COPY_DATA(bulk[i].sa_data_func, 387 bulk[i].sa_data, bulk[i].sa_addr, 388 bulk[i].sa_length); 389 continue; 390 } else if (bulk[i].sa_addr) { /* attr size change */ 391 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 392 SA_REPLACE, bulk[i].sa_data_func, 393 bulk[i].sa_data, bulk[i].sa_length, tx); 394 } else { /* adding new attribute */ 395 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 396 SA_ADD, bulk[i].sa_data_func, 397 bulk[i].sa_data, bulk[i].sa_length, tx); 398 } 399 if (error) 400 return (error); 401 break; 402 } 403 } 404 return (error); 405 } 406 407 static sa_lot_t * 408 sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, 409 uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx) 410 { 411 sa_os_t *sa = os->os_sa; 412 sa_lot_t *tb, *findtb; 413 int i; 414 avl_index_t loc; 415 416 ASSERT(MUTEX_HELD(&sa->sa_lock)); 417 tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP); 418 tb->lot_attr_count = attr_count; 419 tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 420 KM_SLEEP); 421 bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count); 422 tb->lot_num = lot_num; 423 tb->lot_hash = hash; 424 tb->lot_instance = 0; 425 426 if (zapadd) { 427 char attr_name[8]; 428 429 if (sa->sa_layout_attr_obj == 0) { 430 sa->sa_layout_attr_obj = zap_create(os, 431 DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx); 432 VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1, 433 &sa->sa_layout_attr_obj, tx) == 0); 434 } 435 436 (void) snprintf(attr_name, sizeof (attr_name), 437 "%d", (int)lot_num); 438 VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj, 439 attr_name, 2, attr_count, attrs, tx)); 440 } 441 442 list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t), 443 offsetof(sa_idx_tab_t, sa_next)); 444 445 for (i = 0; i != attr_count; i++) { 446 if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0) 447 tb->lot_var_sizes++; 448 } 449 450 avl_add(&sa->sa_layout_num_tree, tb); 451 452 /* verify we don't have a hash collision */ 453 if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) { 454 for (; findtb && findtb->lot_hash == hash; 455 findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) { 456 if (findtb->lot_instance != tb->lot_instance) 457 break; 458 tb->lot_instance++; 459 } 460 } 461 avl_add(&sa->sa_layout_hash_tree, tb); 462 return (tb); 463 } 464 465 static void 466 sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs, 467 int count, dmu_tx_t *tx, sa_lot_t **lot) 468 { 469 sa_lot_t *tb, tbsearch; 470 avl_index_t loc; 471 sa_os_t *sa = os->os_sa; 472 boolean_t found = B_FALSE; 473 474 mutex_enter(&sa->sa_lock); 475 tbsearch.lot_hash = hash; 476 tbsearch.lot_instance = 0; 477 tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc); 478 if (tb) { 479 for (; tb && tb->lot_hash == hash; 480 tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) { 481 if (sa_layout_equal(tb, attrs, count) == 0) { 482 found = B_TRUE; 483 break; 484 } 485 } 486 } 487 if (!found) { 488 tb = sa_add_layout_entry(os, attrs, count, 489 avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx); 490 } 491 mutex_exit(&sa->sa_lock); 492 *lot = tb; 493 } 494 495 static int 496 sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) 497 { 498 int error; 499 uint32_t blocksize; 500 501 if (size == 0) { 502 blocksize = SPA_MINBLOCKSIZE; 503 } else if (size > SPA_MAXBLOCKSIZE) { 504 ASSERT(0); 505 return (EFBIG); 506 } else { 507 blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t); 508 } 509 510 error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx); 511 ASSERT(error == 0); 512 return (error); 513 } 514 515 static void 516 sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen) 517 { 518 if (func == NULL) { 519 bcopy(datastart, target, buflen); 520 } else { 521 boolean_t start; 522 int bytes; 523 void *dataptr; 524 void *saptr = target; 525 uint32_t length; 526 527 start = B_TRUE; 528 bytes = 0; 529 while (bytes < buflen) { 530 func(&dataptr, &length, buflen, start, datastart); 531 bcopy(dataptr, saptr, length); 532 saptr = (void *)((caddr_t)saptr + length); 533 bytes += length; 534 start = B_FALSE; 535 } 536 } 537 } 538 539 /* 540 * Determine several different sizes 541 * first the sa header size 542 * the number of bytes to be stored 543 * if spill would occur the index in the attribute array is returned 544 * 545 * the boolean will_spill will be set when spilling is necessary. It 546 * is only set when the buftype is SA_BONUS 547 */ 548 static int 549 sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count, 550 dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total, 551 boolean_t *will_spill) 552 { 553 int var_size = 0; 554 int i; 555 int full_space; 556 int hdrsize; 557 boolean_t done = B_FALSE; 558 559 if (buftype == SA_BONUS && sa->sa_force_spill) { 560 *total = 0; 561 *index = 0; 562 *will_spill = B_TRUE; 563 return (0); 564 } 565 566 *index = -1; 567 *total = 0; 568 569 if (buftype == SA_BONUS) 570 *will_spill = B_FALSE; 571 572 hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 : 573 sizeof (sa_hdr_phys_t); 574 575 full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size; 576 577 for (i = 0; i != attr_count; i++) { 578 boolean_t is_var_sz; 579 580 *total += attr_desc[i].sa_length; 581 if (done) 582 goto next; 583 584 is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0); 585 if (is_var_sz) { 586 var_size++; 587 } 588 589 if (is_var_sz && var_size > 1) { 590 if (P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) + 591 *total < full_space) { 592 hdrsize += sizeof (uint16_t); 593 } else { 594 done = B_TRUE; 595 *index = i; 596 if (buftype == SA_BONUS) 597 *will_spill = B_TRUE; 598 continue; 599 } 600 } 601 602 /* 603 * find index of where spill *could* occur. 604 * Then continue to count of remainder attribute 605 * space. The sum is used later for sizing bonus 606 * and spill buffer. 607 */ 608 if (buftype == SA_BONUS && *index == -1 && 609 *total + P2ROUNDUP(hdrsize, 8) > 610 (full_space - sizeof (blkptr_t))) { 611 *index = i; 612 done = B_TRUE; 613 } 614 615 next: 616 if (*total + P2ROUNDUP(hdrsize, 8) > full_space && 617 buftype == SA_BONUS) 618 *will_spill = B_TRUE; 619 } 620 621 hdrsize = P2ROUNDUP(hdrsize, 8); 622 return (hdrsize); 623 } 624 625 #define BUF_SPACE_NEEDED(total, header) (total + header) 626 627 /* 628 * Find layout that corresponds to ordering of attributes 629 * If not found a new layout number is created and added to 630 * persistent layout tables. 631 */ 632 static int 633 sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, 634 dmu_tx_t *tx) 635 { 636 sa_os_t *sa = hdl->sa_os->os_sa; 637 uint64_t hash; 638 sa_buf_type_t buftype; 639 sa_hdr_phys_t *sahdr; 640 void *data_start; 641 int buf_space; 642 sa_attr_type_t *attrs, *attrs_start; 643 int i, lot_count; 644 int hdrsize, spillhdrsize; 645 int used; 646 dmu_object_type_t bonustype; 647 sa_lot_t *lot; 648 int len_idx; 649 int spill_used; 650 boolean_t spilling; 651 652 dmu_buf_will_dirty(hdl->sa_bonus, tx); 653 bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus); 654 655 /* first determine bonus header size and sum of all attributes */ 656 hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus, 657 SA_BONUS, &i, &used, &spilling); 658 659 if (used > SPA_MAXBLOCKSIZE) 660 return (EFBIG); 661 662 VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? 663 MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) : 664 used + hdrsize, tx)); 665 666 ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) || 667 bonustype == DMU_OT_SA); 668 669 /* setup and size spill buffer when needed */ 670 if (spilling) { 671 boolean_t dummy; 672 673 if (hdl->sa_spill == NULL) { 674 VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL, 675 &hdl->sa_spill) == 0); 676 } 677 dmu_buf_will_dirty(hdl->sa_spill, tx); 678 679 spillhdrsize = sa_find_sizes(sa, &attr_desc[i], 680 attr_count - i, hdl->sa_spill, SA_SPILL, &i, 681 &spill_used, &dummy); 682 683 if (spill_used > SPA_MAXBLOCKSIZE) 684 return (EFBIG); 685 686 buf_space = hdl->sa_spill->db_size - spillhdrsize; 687 if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > 688 hdl->sa_spill->db_size) 689 VERIFY(0 == sa_resize_spill(hdl, 690 BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx)); 691 } 692 693 /* setup starting pointers to lay down data */ 694 data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize); 695 sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data; 696 buftype = SA_BONUS; 697 698 if (spilling) 699 buf_space = (sa->sa_force_spill) ? 700 0 : SA_BLKPTR_SPACE - hdrsize; 701 else 702 buf_space = hdl->sa_bonus->db_size - hdrsize; 703 704 attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 705 KM_SLEEP); 706 lot_count = 0; 707 708 for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) { 709 uint16_t length; 710 711 attrs[i] = attr_desc[i].sa_attr; 712 length = SA_REGISTERED_LEN(sa, attrs[i]); 713 if (length == 0) 714 length = attr_desc[i].sa_length; 715 716 if (buf_space < length) { /* switch to spill buffer */ 717 VERIFY(bonustype == DMU_OT_SA); 718 if (buftype == SA_BONUS && !sa->sa_force_spill) { 719 sa_find_layout(hdl->sa_os, hash, attrs_start, 720 lot_count, tx, &lot); 721 SA_SET_HDR(sahdr, lot->lot_num, hdrsize); 722 } 723 724 buftype = SA_SPILL; 725 hash = -1ULL; 726 len_idx = 0; 727 728 sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data; 729 sahdr->sa_magic = SA_MAGIC; 730 data_start = (void *)((uintptr_t)sahdr + 731 spillhdrsize); 732 attrs_start = &attrs[i]; 733 buf_space = hdl->sa_spill->db_size - spillhdrsize; 734 lot_count = 0; 735 } 736 hash ^= SA_ATTR_HASH(attrs[i]); 737 attr_desc[i].sa_addr = data_start; 738 attr_desc[i].sa_size = length; 739 SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data, 740 data_start, length); 741 if (sa->sa_attr_table[attrs[i]].sa_length == 0) { 742 sahdr->sa_lengths[len_idx++] = length; 743 } 744 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 745 length), 8); 746 buf_space -= P2ROUNDUP(length, 8); 747 lot_count++; 748 } 749 750 sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot); 751 752 /* 753 * Verify that old znodes always have layout number 0. 754 * Must be DMU_OT_SA for arbitrary layouts 755 */ 756 VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) || 757 (bonustype == DMU_OT_SA && lot->lot_num > 1)); 758 759 if (bonustype == DMU_OT_SA) { 760 SA_SET_HDR(sahdr, lot->lot_num, 761 buftype == SA_BONUS ? hdrsize : spillhdrsize); 762 } 763 764 kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count); 765 if (hdl->sa_bonus_tab) { 766 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 767 hdl->sa_bonus_tab = NULL; 768 } 769 if (!sa->sa_force_spill) 770 VERIFY(0 == sa_build_index(hdl, SA_BONUS)); 771 if (hdl->sa_spill) { 772 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 773 if (!spilling) { 774 /* 775 * remove spill block that is no longer needed. 776 */ 777 dmu_buf_rele(hdl->sa_spill, NULL); 778 hdl->sa_spill = NULL; 779 hdl->sa_spill_tab = NULL; 780 VERIFY(0 == dmu_rm_spill(hdl->sa_os, 781 sa_handle_object(hdl), tx)); 782 } else { 783 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 784 } 785 } 786 787 return (0); 788 } 789 790 static void 791 sa_free_attr_table(sa_os_t *sa) 792 { 793 int i; 794 795 if (sa->sa_attr_table == NULL) 796 return; 797 798 for (i = 0; i != sa->sa_num_attrs; i++) { 799 if (sa->sa_attr_table[i].sa_name) 800 kmem_free(sa->sa_attr_table[i].sa_name, 801 strlen(sa->sa_attr_table[i].sa_name) + 1); 802 } 803 804 kmem_free(sa->sa_attr_table, 805 sizeof (sa_attr_table_t) * sa->sa_num_attrs); 806 807 sa->sa_attr_table = NULL; 808 } 809 810 static int 811 sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) 812 { 813 sa_os_t *sa = os->os_sa; 814 uint64_t sa_attr_count = 0; 815 uint64_t sa_reg_count; 816 int error = 0; 817 uint64_t attr_value; 818 sa_attr_table_t *tb; 819 zap_cursor_t zc; 820 zap_attribute_t za; 821 int registered_count = 0; 822 int i; 823 dmu_objset_type_t ostype = dmu_objset_type(os); 824 825 sa->sa_user_table = 826 kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP); 827 sa->sa_user_table_sz = count * sizeof (sa_attr_type_t); 828 829 if (sa->sa_reg_attr_obj != 0) { 830 error = zap_count(os, sa->sa_reg_attr_obj, 831 &sa_attr_count); 832 833 /* 834 * Make sure we retrieved a count and that it isn't zero 835 */ 836 if (error || (error == 0 && sa_attr_count == 0)) { 837 if (error == 0) 838 error = EINVAL; 839 goto bail; 840 } 841 sa_reg_count = sa_attr_count; 842 } 843 844 if (ostype == DMU_OST_ZFS && sa_attr_count == 0) 845 sa_attr_count += sa_legacy_attr_count; 846 847 /* Allocate attribute numbers for attributes that aren't registered */ 848 for (i = 0; i != count; i++) { 849 boolean_t found = B_FALSE; 850 int j; 851 852 if (ostype == DMU_OST_ZFS) { 853 for (j = 0; j != sa_legacy_attr_count; j++) { 854 if (strcmp(reg_attrs[i].sa_name, 855 sa_legacy_attrs[j].sa_name) == 0) { 856 sa->sa_user_table[i] = 857 sa_legacy_attrs[j].sa_attr; 858 found = B_TRUE; 859 } 860 } 861 } 862 if (found) 863 continue; 864 865 if (sa->sa_reg_attr_obj) 866 error = zap_lookup(os, sa->sa_reg_attr_obj, 867 reg_attrs[i].sa_name, 8, 1, &attr_value); 868 else 869 error = ENOENT; 870 switch (error) { 871 case ENOENT: 872 sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count; 873 sa_attr_count++; 874 break; 875 case 0: 876 sa->sa_user_table[i] = ATTR_NUM(attr_value); 877 break; 878 default: 879 goto bail; 880 } 881 } 882 883 sa->sa_num_attrs = sa_attr_count; 884 tb = sa->sa_attr_table = 885 kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP); 886 887 /* 888 * Attribute table is constructed from requested attribute list, 889 * previously foreign registered attributes, and also the legacy 890 * ZPL set of attributes. 891 */ 892 893 if (sa->sa_reg_attr_obj) { 894 for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj); 895 (error = zap_cursor_retrieve(&zc, &za)) == 0; 896 zap_cursor_advance(&zc)) { 897 uint64_t value; 898 value = za.za_first_integer; 899 900 registered_count++; 901 tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value); 902 tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value); 903 tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value); 904 tb[ATTR_NUM(value)].sa_registered = B_TRUE; 905 906 if (tb[ATTR_NUM(value)].sa_name) { 907 continue; 908 } 909 tb[ATTR_NUM(value)].sa_name = 910 kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); 911 (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, 912 strlen(za.za_name) +1); 913 } 914 zap_cursor_fini(&zc); 915 /* 916 * Make sure we processed the correct number of registered 917 * attributes 918 */ 919 if (registered_count != sa_reg_count) { 920 ASSERT(error != 0); 921 goto bail; 922 } 923 924 } 925 926 if (ostype == DMU_OST_ZFS) { 927 for (i = 0; i != sa_legacy_attr_count; i++) { 928 if (tb[i].sa_name) 929 continue; 930 tb[i].sa_attr = sa_legacy_attrs[i].sa_attr; 931 tb[i].sa_length = sa_legacy_attrs[i].sa_length; 932 tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap; 933 tb[i].sa_registered = B_FALSE; 934 tb[i].sa_name = 935 kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1, 936 KM_SLEEP); 937 (void) strlcpy(tb[i].sa_name, 938 sa_legacy_attrs[i].sa_name, 939 strlen(sa_legacy_attrs[i].sa_name) + 1); 940 } 941 } 942 943 for (i = 0; i != count; i++) { 944 sa_attr_type_t attr_id; 945 946 attr_id = sa->sa_user_table[i]; 947 if (tb[attr_id].sa_name) 948 continue; 949 950 tb[attr_id].sa_length = reg_attrs[i].sa_length; 951 tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap; 952 tb[attr_id].sa_attr = attr_id; 953 tb[attr_id].sa_name = 954 kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP); 955 (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name, 956 strlen(reg_attrs[i].sa_name) + 1); 957 } 958 959 sa->sa_need_attr_registration = 960 (sa_attr_count != registered_count); 961 962 return (0); 963 bail: 964 kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t)); 965 sa->sa_user_table = NULL; 966 sa_free_attr_table(sa); 967 return ((error != 0) ? error : EINVAL); 968 } 969 970 int 971 sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, 972 sa_attr_type_t **user_table) 973 { 974 zap_cursor_t zc; 975 zap_attribute_t za; 976 sa_os_t *sa; 977 dmu_objset_type_t ostype = dmu_objset_type(os); 978 sa_attr_type_t *tb; 979 int error; 980 981 mutex_enter(&os->os_lock); 982 if (os->os_sa) { 983 mutex_enter(&os->os_sa->sa_lock); 984 mutex_exit(&os->os_lock); 985 tb = os->os_sa->sa_user_table; 986 mutex_exit(&os->os_sa->sa_lock); 987 *user_table = tb; 988 return (0); 989 } 990 991 sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); 992 mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); 993 sa->sa_master_obj = sa_obj; 994 995 os->os_sa = sa; 996 mutex_enter(&sa->sa_lock); 997 mutex_exit(&os->os_lock); 998 avl_create(&sa->sa_layout_num_tree, layout_num_compare, 999 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); 1000 avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, 1001 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node)); 1002 1003 if (sa_obj) { 1004 error = zap_lookup(os, sa_obj, SA_LAYOUTS, 1005 8, 1, &sa->sa_layout_attr_obj); 1006 if (error != 0 && error != ENOENT) 1007 goto fail; 1008 error = zap_lookup(os, sa_obj, SA_REGISTRY, 1009 8, 1, &sa->sa_reg_attr_obj); 1010 if (error != 0 && error != ENOENT) 1011 goto fail; 1012 } 1013 1014 if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0) 1015 goto fail; 1016 1017 if (sa->sa_layout_attr_obj != 0) { 1018 uint64_t layout_count; 1019 1020 error = zap_count(os, sa->sa_layout_attr_obj, 1021 &layout_count); 1022 1023 /* 1024 * Layout number count should be > 0 1025 */ 1026 if (error || (error == 0 && layout_count == 0)) { 1027 if (error == 0) 1028 error = EINVAL; 1029 goto fail; 1030 } 1031 1032 for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj); 1033 (error = zap_cursor_retrieve(&zc, &za)) == 0; 1034 zap_cursor_advance(&zc)) { 1035 sa_attr_type_t *lot_attrs; 1036 uint64_t lot_num; 1037 1038 lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * 1039 za.za_num_integers, KM_SLEEP); 1040 1041 if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, 1042 za.za_name, 2, za.za_num_integers, 1043 lot_attrs))) != 0) { 1044 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1045 za.za_num_integers); 1046 break; 1047 } 1048 VERIFY(ddi_strtoull(za.za_name, NULL, 10, 1049 (unsigned long long *)&lot_num) == 0); 1050 1051 (void) sa_add_layout_entry(os, lot_attrs, 1052 za.za_num_integers, lot_num, 1053 sa_layout_info_hash(lot_attrs, 1054 za.za_num_integers), B_FALSE, NULL); 1055 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1056 za.za_num_integers); 1057 } 1058 zap_cursor_fini(&zc); 1059 1060 /* 1061 * Make sure layout count matches number of entries added 1062 * to AVL tree 1063 */ 1064 if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) { 1065 ASSERT(error != 0); 1066 goto fail; 1067 } 1068 } 1069 1070 /* Add special layout number for old ZNODES */ 1071 if (ostype == DMU_OST_ZFS) { 1072 (void) sa_add_layout_entry(os, sa_legacy_zpl_layout, 1073 sa_legacy_attr_count, 0, 1074 sa_layout_info_hash(sa_legacy_zpl_layout, 1075 sa_legacy_attr_count), B_FALSE, NULL); 1076 1077 (void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1, 1078 0, B_FALSE, NULL); 1079 } 1080 *user_table = os->os_sa->sa_user_table; 1081 mutex_exit(&sa->sa_lock); 1082 return (0); 1083 fail: 1084 os->os_sa = NULL; 1085 sa_free_attr_table(sa); 1086 if (sa->sa_user_table) 1087 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1088 mutex_exit(&sa->sa_lock); 1089 kmem_free(sa, sizeof (sa_os_t)); 1090 return ((error == ECKSUM) ? EIO : error); 1091 } 1092 1093 void 1094 sa_tear_down(objset_t *os) 1095 { 1096 sa_os_t *sa = os->os_sa; 1097 sa_lot_t *layout; 1098 void *cookie; 1099 1100 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1101 1102 /* Free up attr table */ 1103 1104 sa_free_attr_table(sa); 1105 1106 cookie = NULL; 1107 while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) { 1108 sa_idx_tab_t *tab; 1109 while (tab = list_head(&layout->lot_idx_tab)) { 1110 ASSERT(refcount_count(&tab->sa_refcount)); 1111 sa_idx_tab_rele(os, tab); 1112 } 1113 } 1114 1115 cookie = NULL; 1116 while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) { 1117 kmem_free(layout->lot_attrs, 1118 sizeof (sa_attr_type_t) * layout->lot_attr_count); 1119 kmem_free(layout, sizeof (sa_lot_t)); 1120 } 1121 1122 avl_destroy(&sa->sa_layout_hash_tree); 1123 avl_destroy(&sa->sa_layout_num_tree); 1124 1125 kmem_free(sa, sizeof (sa_os_t)); 1126 os->os_sa = NULL; 1127 } 1128 1129 void 1130 sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr, 1131 uint16_t length, int length_idx, boolean_t var_length, void *userp) 1132 { 1133 sa_idx_tab_t *idx_tab = userp; 1134 1135 if (var_length) { 1136 ASSERT(idx_tab->sa_variable_lengths); 1137 idx_tab->sa_variable_lengths[length_idx] = length; 1138 } 1139 TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx, 1140 (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr)); 1141 } 1142 1143 static void 1144 sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type, 1145 sa_iterfunc_t func, sa_lot_t *tab, void *userp) 1146 { 1147 void *data_start; 1148 sa_lot_t *tb = tab; 1149 sa_lot_t search; 1150 avl_index_t loc; 1151 sa_os_t *sa = os->os_sa; 1152 int i; 1153 uint16_t *length_start = NULL; 1154 uint8_t length_idx = 0; 1155 1156 if (tab == NULL) { 1157 search.lot_num = SA_LAYOUT_NUM(hdr, type); 1158 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1159 ASSERT(tb); 1160 } 1161 1162 if (IS_SA_BONUSTYPE(type)) { 1163 data_start = (void *)P2ROUNDUP(((uintptr_t)hdr + 1164 offsetof(sa_hdr_phys_t, sa_lengths) + 1165 (sizeof (uint16_t) * tb->lot_var_sizes)), 8); 1166 length_start = hdr->sa_lengths; 1167 } else { 1168 data_start = hdr; 1169 } 1170 1171 for (i = 0; i != tb->lot_attr_count; i++) { 1172 int attr_length, reg_length; 1173 uint8_t idx_len; 1174 1175 reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length; 1176 if (reg_length) { 1177 attr_length = reg_length; 1178 idx_len = 0; 1179 } else { 1180 attr_length = length_start[length_idx]; 1181 idx_len = length_idx++; 1182 } 1183 1184 func(hdr, data_start, tb->lot_attrs[i], attr_length, 1185 idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp); 1186 1187 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 1188 attr_length), 8); 1189 } 1190 } 1191 1192 /*ARGSUSED*/ 1193 void 1194 sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr, 1195 uint16_t length, int length_idx, boolean_t variable_length, void *userp) 1196 { 1197 sa_handle_t *hdl = userp; 1198 sa_os_t *sa = hdl->sa_os->os_sa; 1199 1200 sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length); 1201 } 1202 1203 void 1204 sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype) 1205 { 1206 sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1207 dmu_buf_impl_t *db; 1208 sa_os_t *sa = hdl->sa_os->os_sa; 1209 int num_lengths = 1; 1210 int i; 1211 1212 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1213 if (sa_hdr_phys->sa_magic == SA_MAGIC) 1214 return; 1215 1216 db = SA_GET_DB(hdl, buftype); 1217 1218 if (buftype == SA_SPILL) { 1219 arc_release(db->db_buf, NULL); 1220 arc_buf_thaw(db->db_buf); 1221 } 1222 1223 sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic); 1224 sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info); 1225 1226 /* 1227 * Determine number of variable lenghts in header 1228 * The standard 8 byte header has one for free and a 1229 * 16 byte header would have 4 + 1; 1230 */ 1231 if (SA_HDR_SIZE(sa_hdr_phys) > 8) 1232 num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1; 1233 for (i = 0; i != num_lengths; i++) 1234 sa_hdr_phys->sa_lengths[i] = 1235 BSWAP_16(sa_hdr_phys->sa_lengths[i]); 1236 1237 sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA, 1238 sa_byteswap_cb, NULL, hdl); 1239 1240 if (buftype == SA_SPILL) 1241 arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf); 1242 } 1243 1244 static int 1245 sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype) 1246 { 1247 sa_hdr_phys_t *sa_hdr_phys; 1248 dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype); 1249 dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db); 1250 sa_os_t *sa = hdl->sa_os->os_sa; 1251 sa_idx_tab_t *idx_tab; 1252 1253 sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1254 1255 mutex_enter(&sa->sa_lock); 1256 1257 /* Do we need to byteswap? */ 1258 1259 /* only check if not old znode */ 1260 if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC && 1261 sa_hdr_phys->sa_magic != 0) { 1262 VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC); 1263 sa_byteswap(hdl, buftype); 1264 } 1265 1266 idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys); 1267 1268 if (buftype == SA_BONUS) 1269 hdl->sa_bonus_tab = idx_tab; 1270 else 1271 hdl->sa_spill_tab = idx_tab; 1272 1273 mutex_exit(&sa->sa_lock); 1274 return (0); 1275 } 1276 1277 /*ARGSUSED*/ 1278 void 1279 sa_evict(dmu_buf_t *db, void *sap) 1280 { 1281 panic("evicting sa dbuf %p\n", (void *)db); 1282 } 1283 1284 static void 1285 sa_idx_tab_rele(objset_t *os, void *arg) 1286 { 1287 sa_os_t *sa = os->os_sa; 1288 sa_idx_tab_t *idx_tab = arg; 1289 1290 if (idx_tab == NULL) 1291 return; 1292 1293 mutex_enter(&sa->sa_lock); 1294 if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) { 1295 list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab); 1296 if (idx_tab->sa_variable_lengths) 1297 kmem_free(idx_tab->sa_variable_lengths, 1298 sizeof (uint16_t) * 1299 idx_tab->sa_layout->lot_var_sizes); 1300 refcount_destroy(&idx_tab->sa_refcount); 1301 kmem_free(idx_tab->sa_idx_tab, 1302 sizeof (uint32_t) * sa->sa_num_attrs); 1303 kmem_free(idx_tab, sizeof (sa_idx_tab_t)); 1304 } 1305 mutex_exit(&sa->sa_lock); 1306 } 1307 1308 static void 1309 sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab) 1310 { 1311 sa_os_t *sa = os->os_sa; 1312 1313 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1314 (void) refcount_add(&idx_tab->sa_refcount, NULL); 1315 } 1316 1317 void 1318 sa_handle_destroy(sa_handle_t *hdl) 1319 { 1320 mutex_enter(&hdl->sa_lock); 1321 (void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl, 1322 NULL, NULL, NULL); 1323 1324 if (hdl->sa_bonus_tab) { 1325 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 1326 hdl->sa_bonus_tab = NULL; 1327 } 1328 if (hdl->sa_spill_tab) { 1329 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 1330 hdl->sa_spill_tab = NULL; 1331 } 1332 1333 dmu_buf_rele(hdl->sa_bonus, NULL); 1334 1335 if (hdl->sa_spill) 1336 dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); 1337 mutex_exit(&hdl->sa_lock); 1338 1339 kmem_cache_free(sa_cache, hdl); 1340 } 1341 1342 int 1343 sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp, 1344 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1345 { 1346 int error = 0; 1347 dmu_object_info_t doi; 1348 sa_handle_t *handle; 1349 1350 #ifdef ZFS_DEBUG 1351 dmu_object_info_from_db(db, &doi); 1352 ASSERT(doi.doi_bonus_type == DMU_OT_SA || 1353 doi.doi_bonus_type == DMU_OT_ZNODE); 1354 #endif 1355 /* find handle, if it exists */ 1356 /* if one doesn't exist then create a new one, and initialize it */ 1357 1358 handle = (hdl_type == SA_HDL_SHARED) ? dmu_buf_get_user(db) : NULL; 1359 if (handle == NULL) { 1360 sa_handle_t *newhandle; 1361 handle = kmem_cache_alloc(sa_cache, KM_SLEEP); 1362 handle->sa_userp = userp; 1363 handle->sa_bonus = db; 1364 handle->sa_os = os; 1365 handle->sa_spill = NULL; 1366 1367 error = sa_build_index(handle, SA_BONUS); 1368 newhandle = (hdl_type == SA_HDL_SHARED) ? 1369 dmu_buf_set_user_ie(db, handle, 1370 NULL, sa_evict) : NULL; 1371 1372 if (newhandle != NULL) { 1373 kmem_cache_free(sa_cache, handle); 1374 handle = newhandle; 1375 } 1376 } 1377 *handlepp = handle; 1378 1379 return (error); 1380 } 1381 1382 int 1383 sa_handle_get(objset_t *objset, uint64_t objid, void *userp, 1384 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1385 { 1386 dmu_buf_t *db; 1387 int error; 1388 1389 if (error = dmu_bonus_hold(objset, objid, NULL, &db)) 1390 return (error); 1391 1392 return (sa_handle_get_from_db(objset, db, userp, hdl_type, 1393 handlepp)); 1394 } 1395 1396 int 1397 sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db) 1398 { 1399 return (dmu_bonus_hold(objset, obj_num, tag, db)); 1400 } 1401 1402 void 1403 sa_buf_rele(dmu_buf_t *db, void *tag) 1404 { 1405 dmu_buf_rele(db, tag); 1406 } 1407 1408 int 1409 sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count) 1410 { 1411 ASSERT(hdl); 1412 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1413 return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL)); 1414 } 1415 1416 int 1417 sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) 1418 { 1419 int error; 1420 sa_bulk_attr_t bulk; 1421 1422 bulk.sa_attr = attr; 1423 bulk.sa_data = buf; 1424 bulk.sa_length = buflen; 1425 bulk.sa_data_func = NULL; 1426 1427 ASSERT(hdl); 1428 mutex_enter(&hdl->sa_lock); 1429 error = sa_lookup_impl(hdl, &bulk, 1); 1430 mutex_exit(&hdl->sa_lock); 1431 return (error); 1432 } 1433 1434 #ifdef _KERNEL 1435 int 1436 sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) 1437 { 1438 int error; 1439 sa_bulk_attr_t bulk; 1440 1441 bulk.sa_data = NULL; 1442 bulk.sa_attr = attr; 1443 bulk.sa_data_func = NULL; 1444 1445 ASSERT(hdl); 1446 1447 mutex_enter(&hdl->sa_lock); 1448 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) { 1449 error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size, 1450 uio->uio_resid), UIO_READ, uio); 1451 } 1452 mutex_exit(&hdl->sa_lock); 1453 return (error); 1454 1455 } 1456 #endif 1457 1458 void * 1459 sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data) 1460 { 1461 sa_idx_tab_t *idx_tab; 1462 sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data; 1463 sa_os_t *sa = os->os_sa; 1464 sa_lot_t *tb, search; 1465 avl_index_t loc; 1466 1467 /* 1468 * Deterimine layout number. If SA node and header == 0 then 1469 * force the index table to the dummy "1" empty layout. 1470 * 1471 * The layout number would only be zero for a newly created file 1472 * that has not added any attributes yet, or with crypto enabled which 1473 * doesn't write any attributes to the bonus buffer. 1474 */ 1475 1476 search.lot_num = SA_LAYOUT_NUM(hdr, bonustype); 1477 1478 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1479 1480 /* Verify header size is consistent with layout information */ 1481 ASSERT(tb); 1482 ASSERT(IS_SA_BONUSTYPE(bonustype) && 1483 SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) || 1484 (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0)); 1485 1486 /* 1487 * See if any of the already existing TOC entries can be reused? 1488 */ 1489 1490 for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab; 1491 idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) { 1492 boolean_t valid_idx = B_TRUE; 1493 int i; 1494 1495 if (tb->lot_var_sizes != 0 && 1496 idx_tab->sa_variable_lengths != NULL) { 1497 for (i = 0; i != tb->lot_var_sizes; i++) { 1498 if (hdr->sa_lengths[i] != 1499 idx_tab->sa_variable_lengths[i]) { 1500 valid_idx = B_FALSE; 1501 break; 1502 } 1503 } 1504 } 1505 if (valid_idx) { 1506 sa_idx_tab_hold(os, idx_tab); 1507 return (idx_tab); 1508 } 1509 } 1510 1511 /* No such luck, create a new entry */ 1512 idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP); 1513 idx_tab->sa_idx_tab = 1514 kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP); 1515 idx_tab->sa_layout = tb; 1516 refcount_create(&idx_tab->sa_refcount); 1517 if (tb->lot_var_sizes) 1518 idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) * 1519 tb->lot_var_sizes, KM_SLEEP); 1520 1521 sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab, 1522 tb, idx_tab); 1523 sa_idx_tab_hold(os, idx_tab); /* one hold for consumer */ 1524 sa_idx_tab_hold(os, idx_tab); /* one for layout */ 1525 list_insert_tail(&tb->lot_idx_tab, idx_tab); 1526 return (idx_tab); 1527 } 1528 1529 void 1530 sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len, 1531 boolean_t start, void *userdata) 1532 { 1533 ASSERT(start); 1534 1535 *dataptr = userdata; 1536 *len = total_len; 1537 } 1538 1539 static void 1540 sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) 1541 { 1542 uint64_t attr_value = 0; 1543 sa_os_t *sa = hdl->sa_os->os_sa; 1544 sa_attr_table_t *tb = sa->sa_attr_table; 1545 int i; 1546 1547 mutex_enter(&sa->sa_lock); 1548 1549 if (!sa->sa_need_attr_registration || sa->sa_master_obj == NULL) { 1550 mutex_exit(&sa->sa_lock); 1551 return; 1552 } 1553 1554 if (sa->sa_reg_attr_obj == NULL) { 1555 sa->sa_reg_attr_obj = zap_create(hdl->sa_os, 1556 DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx); 1557 VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj, 1558 SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0); 1559 } 1560 for (i = 0; i != sa->sa_num_attrs; i++) { 1561 if (sa->sa_attr_table[i].sa_registered) 1562 continue; 1563 ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length, 1564 tb[i].sa_byteswap); 1565 VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj, 1566 tb[i].sa_name, 8, 1, &attr_value, tx)); 1567 tb[i].sa_registered = B_TRUE; 1568 } 1569 sa->sa_need_attr_registration = B_FALSE; 1570 mutex_exit(&sa->sa_lock); 1571 } 1572 1573 /* 1574 * Replace all attributes with attributes specified in template. 1575 * If dnode had a spill buffer then those attributes will be 1576 * also be replaced, possibly with just an empty spill block 1577 * 1578 * This interface is intended to only be used for bulk adding of 1579 * attributes for a new file. It will also be used by the ZPL 1580 * when converting and old formatted znode to native SA support. 1581 */ 1582 int 1583 sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1584 int attr_count, dmu_tx_t *tx) 1585 { 1586 sa_os_t *sa = hdl->sa_os->os_sa; 1587 1588 if (sa->sa_need_attr_registration) 1589 sa_attr_register_sync(hdl, tx); 1590 return (sa_build_layouts(hdl, attr_desc, attr_count, tx)); 1591 } 1592 1593 int 1594 sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1595 int attr_count, dmu_tx_t *tx) 1596 { 1597 int error; 1598 1599 mutex_enter(&hdl->sa_lock); 1600 error = sa_replace_all_by_template_locked(hdl, attr_desc, 1601 attr_count, tx); 1602 mutex_exit(&hdl->sa_lock); 1603 return (error); 1604 } 1605 1606 /* 1607 * add/remove/replace a single attribute and then rewrite the entire set 1608 * of attributes. 1609 */ 1610 static int 1611 sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 1612 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 1613 uint16_t buflen, dmu_tx_t *tx) 1614 { 1615 sa_os_t *sa = hdl->sa_os->os_sa; 1616 dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; 1617 dnode_t *dn; 1618 sa_bulk_attr_t *attr_desc; 1619 void *old_data[2]; 1620 int bonus_attr_count = 0; 1621 int bonus_data_size, spill_data_size; 1622 int spill_attr_count = 0; 1623 int error; 1624 uint16_t length; 1625 int i, j, k, length_idx; 1626 sa_hdr_phys_t *hdr; 1627 sa_idx_tab_t *idx_tab; 1628 int attr_count; 1629 int count; 1630 1631 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1632 1633 /* First make of copy of the old data */ 1634 1635 DB_DNODE_ENTER(db); 1636 dn = DB_DNODE(db); 1637 if (dn->dn_bonuslen != 0) { 1638 bonus_data_size = hdl->sa_bonus->db_size; 1639 old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); 1640 bcopy(hdl->sa_bonus->db_data, old_data[0], 1641 hdl->sa_bonus->db_size); 1642 bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count; 1643 } else { 1644 old_data[0] = NULL; 1645 } 1646 DB_DNODE_EXIT(db); 1647 1648 /* Bring spill buffer online if it isn't currently */ 1649 1650 if ((error = sa_get_spill(hdl)) == 0) { 1651 spill_data_size = hdl->sa_spill->db_size; 1652 old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP); 1653 bcopy(hdl->sa_spill->db_data, old_data[1], 1654 hdl->sa_spill->db_size); 1655 spill_attr_count = 1656 hdl->sa_spill_tab->sa_layout->lot_attr_count; 1657 } else if (error && error != ENOENT) { 1658 if (old_data[0]) 1659 kmem_free(old_data[0], bonus_data_size); 1660 return (error); 1661 } else { 1662 old_data[1] = NULL; 1663 } 1664 1665 /* build descriptor of all attributes */ 1666 1667 attr_count = bonus_attr_count + spill_attr_count; 1668 if (action == SA_ADD) 1669 attr_count++; 1670 else if (action == SA_REMOVE) 1671 attr_count--; 1672 1673 attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP); 1674 1675 /* 1676 * loop through bonus and spill buffer if it exists, and 1677 * build up new attr_descriptor to reset the attributes 1678 */ 1679 k = j = 0; 1680 count = bonus_attr_count; 1681 hdr = SA_GET_HDR(hdl, SA_BONUS); 1682 idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS); 1683 for (; k != 2; k++) { 1684 /* iterate over each attribute in layout */ 1685 for (i = 0, length_idx = 0; i != count; i++) { 1686 sa_attr_type_t attr; 1687 1688 attr = idx_tab->sa_layout->lot_attrs[i]; 1689 if (attr == newattr) { 1690 if (action == SA_REMOVE) { 1691 j++; 1692 continue; 1693 } 1694 ASSERT(SA_REGISTERED_LEN(sa, attr) == 0); 1695 ASSERT(action == SA_REPLACE); 1696 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1697 locator, datastart, buflen); 1698 } else { 1699 length = SA_REGISTERED_LEN(sa, attr); 1700 if (length == 0) { 1701 length = hdr->sa_lengths[length_idx++]; 1702 } 1703 1704 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1705 NULL, (void *) 1706 (TOC_OFF(idx_tab->sa_idx_tab[attr]) + 1707 (uintptr_t)old_data[k]), length); 1708 } 1709 } 1710 if (k == 0 && hdl->sa_spill) { 1711 hdr = SA_GET_HDR(hdl, SA_SPILL); 1712 idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL); 1713 count = spill_attr_count; 1714 } else { 1715 break; 1716 } 1717 } 1718 if (action == SA_ADD) { 1719 length = SA_REGISTERED_LEN(sa, newattr); 1720 if (length == 0) { 1721 length = buflen; 1722 } 1723 SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator, 1724 datastart, buflen); 1725 } 1726 1727 error = sa_build_layouts(hdl, attr_desc, attr_count, tx); 1728 1729 if (old_data[0]) 1730 kmem_free(old_data[0], bonus_data_size); 1731 if (old_data[1]) 1732 kmem_free(old_data[1], spill_data_size); 1733 kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); 1734 1735 return (error); 1736 } 1737 1738 static int 1739 sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 1740 dmu_tx_t *tx) 1741 { 1742 int error; 1743 sa_os_t *sa = hdl->sa_os->os_sa; 1744 dmu_object_type_t bonustype; 1745 1746 bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS)); 1747 1748 ASSERT(hdl); 1749 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1750 1751 /* sync out registration table if necessary */ 1752 if (sa->sa_need_attr_registration) 1753 sa_attr_register_sync(hdl, tx); 1754 1755 error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx); 1756 if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb) 1757 sa->sa_update_cb(hdl, tx); 1758 1759 return (error); 1760 } 1761 1762 /* 1763 * update or add new attribute 1764 */ 1765 int 1766 sa_update(sa_handle_t *hdl, sa_attr_type_t type, 1767 void *buf, uint32_t buflen, dmu_tx_t *tx) 1768 { 1769 int error; 1770 sa_bulk_attr_t bulk; 1771 1772 bulk.sa_attr = type; 1773 bulk.sa_data_func = NULL; 1774 bulk.sa_length = buflen; 1775 bulk.sa_data = buf; 1776 1777 mutex_enter(&hdl->sa_lock); 1778 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1779 mutex_exit(&hdl->sa_lock); 1780 return (error); 1781 } 1782 1783 int 1784 sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr, 1785 uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx) 1786 { 1787 int error; 1788 sa_bulk_attr_t bulk; 1789 1790 bulk.sa_attr = attr; 1791 bulk.sa_data = userdata; 1792 bulk.sa_data_func = locator; 1793 bulk.sa_length = buflen; 1794 1795 mutex_enter(&hdl->sa_lock); 1796 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1797 mutex_exit(&hdl->sa_lock); 1798 return (error); 1799 } 1800 1801 /* 1802 * Return size of an attribute 1803 */ 1804 1805 int 1806 sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) 1807 { 1808 sa_bulk_attr_t bulk; 1809 int error; 1810 1811 bulk.sa_data = NULL; 1812 bulk.sa_attr = attr; 1813 bulk.sa_data_func = NULL; 1814 1815 ASSERT(hdl); 1816 mutex_enter(&hdl->sa_lock); 1817 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { 1818 mutex_exit(&hdl->sa_lock); 1819 return (error); 1820 } 1821 *size = bulk.sa_size; 1822 1823 mutex_exit(&hdl->sa_lock); 1824 return (0); 1825 } 1826 1827 int 1828 sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1829 { 1830 ASSERT(hdl); 1831 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1832 return (sa_lookup_impl(hdl, attrs, count)); 1833 } 1834 1835 int 1836 sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1837 { 1838 int error; 1839 1840 ASSERT(hdl); 1841 mutex_enter(&hdl->sa_lock); 1842 error = sa_bulk_lookup_locked(hdl, attrs, count); 1843 mutex_exit(&hdl->sa_lock); 1844 return (error); 1845 } 1846 1847 int 1848 sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx) 1849 { 1850 int error; 1851 1852 ASSERT(hdl); 1853 mutex_enter(&hdl->sa_lock); 1854 error = sa_bulk_update_impl(hdl, attrs, count, tx); 1855 mutex_exit(&hdl->sa_lock); 1856 return (error); 1857 } 1858 1859 int 1860 sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) 1861 { 1862 int error; 1863 1864 mutex_enter(&hdl->sa_lock); 1865 error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL, 1866 NULL, 0, tx); 1867 mutex_exit(&hdl->sa_lock); 1868 return (error); 1869 } 1870 1871 void 1872 sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) 1873 { 1874 dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); 1875 } 1876 1877 void 1878 sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) 1879 { 1880 dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, 1881 blksize, nblocks); 1882 } 1883 1884 void 1885 sa_update_user(sa_handle_t *newhdl, sa_handle_t *oldhdl) 1886 { 1887 (void) dmu_buf_update_user((dmu_buf_t *)newhdl->sa_bonus, 1888 oldhdl, newhdl, NULL, sa_evict); 1889 oldhdl->sa_bonus = NULL; 1890 } 1891 1892 void 1893 sa_set_userp(sa_handle_t *hdl, void *ptr) 1894 { 1895 hdl->sa_userp = ptr; 1896 } 1897 1898 dmu_buf_t * 1899 sa_get_db(sa_handle_t *hdl) 1900 { 1901 return ((dmu_buf_t *)hdl->sa_bonus); 1902 } 1903 1904 void * 1905 sa_get_userdata(sa_handle_t *hdl) 1906 { 1907 return (hdl->sa_userp); 1908 } 1909 1910 void 1911 sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func) 1912 { 1913 ASSERT(MUTEX_HELD(&os->os_sa->sa_lock)); 1914 os->os_sa->sa_update_cb = func; 1915 } 1916 1917 void 1918 sa_register_update_callback(objset_t *os, sa_update_cb_t *func) 1919 { 1920 1921 mutex_enter(&os->os_sa->sa_lock); 1922 sa_register_update_callback_locked(os, func); 1923 mutex_exit(&os->os_sa->sa_lock); 1924 } 1925 1926 uint64_t 1927 sa_handle_object(sa_handle_t *hdl) 1928 { 1929 return (hdl->sa_bonus->db_object); 1930 } 1931 1932 boolean_t 1933 sa_enabled(objset_t *os) 1934 { 1935 return (os->os_sa == NULL); 1936 } 1937 1938 int 1939 sa_set_sa_object(objset_t *os, uint64_t sa_object) 1940 { 1941 sa_os_t *sa = os->os_sa; 1942 1943 if (sa->sa_master_obj) 1944 return (1); 1945 1946 sa->sa_master_obj = sa_object; 1947 1948 return (0); 1949 } 1950 1951 int 1952 sa_hdrsize(void *arg) 1953 { 1954 sa_hdr_phys_t *hdr = arg; 1955 1956 return (SA_HDR_SIZE(hdr)); 1957 } 1958 1959 void 1960 sa_handle_lock(sa_handle_t *hdl) 1961 { 1962 ASSERT(hdl); 1963 mutex_enter(&hdl->sa_lock); 1964 } 1965 1966 void 1967 sa_handle_unlock(sa_handle_t *hdl) 1968 { 1969 ASSERT(hdl); 1970 mutex_exit(&hdl->sa_lock); 1971 } 1972