1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2014 by Chunwei Chen. All rights reserved. 14 * Copyright (c) 2016 by Delphix. All rights reserved. 15 */ 16 17 /* 18 * ARC buffer data (ABD). 19 * 20 * ABDs are an abstract data structure for the ARC which can use two 21 * different ways of storing the underlying data: 22 * 23 * (a) Linear buffer. In this case, all the data in the ABD is stored in one 24 * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache). 25 * 26 * +-------------------+ 27 * | ABD (linear) | 28 * | abd_flags = ... | 29 * | abd_size = ... | +--------------------------------+ 30 * | abd_buf ------------->| raw buffer of size abd_size | 31 * +-------------------+ +--------------------------------+ 32 * no abd_chunks 33 * 34 * (b) Scattered buffer. In this case, the data in the ABD is split into 35 * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers 36 * to the chunks recorded in an array at the end of the ABD structure. 37 * 38 * +-------------------+ 39 * | ABD (scattered) | 40 * | abd_flags = ... | 41 * | abd_size = ... | 42 * | abd_offset = 0 | +-----------+ 43 * | abd_chunks[0] ----------------------------->| chunk 0 | 44 * | abd_chunks[1] ---------------------+ +-----------+ 45 * | ... | | +-----------+ 46 * | abd_chunks[N-1] ---------+ +------->| chunk 1 | 47 * +-------------------+ | +-----------+ 48 * | ... 49 * | +-----------+ 50 * +----------------->| chunk N-1 | 51 * +-----------+ 52 * 53 * Using a large proportion of scattered ABDs decreases ARC fragmentation since 54 * when we are at the limit of allocatable space, using equal-size chunks will 55 * allow us to quickly reclaim enough space for a new large allocation (assuming 56 * it is also scattered). 57 * 58 * In addition to directly allocating a linear or scattered ABD, it is also 59 * possible to create an ABD by requesting the "sub-ABD" starting at an offset 60 * within an existing ABD. In linear buffers this is simple (set abd_buf of 61 * the new ABD to the starting point within the original raw buffer), but 62 * scattered ABDs are a little more complex. The new ABD makes a copy of the 63 * relevant abd_chunks pointers (but not the underlying data). However, to 64 * provide arbitrary rather than only chunk-aligned starting offsets, it also 65 * tracks an abd_offset field which represents the starting point of the data 66 * within the first chunk in abd_chunks. For both linear and scattered ABDs, 67 * creating an offset ABD marks the original ABD as the offset's parent, and the 68 * original ABD's abd_children refcount is incremented. This data allows us to 69 * ensure the root ABD isn't deleted before its children. 70 * 71 * Most consumers should never need to know what type of ABD they're using -- 72 * the ABD public API ensures that it's possible to transparently switch from 73 * using a linear ABD to a scattered one when doing so would be beneficial. 74 * 75 * If you need to use the data within an ABD directly, if you know it's linear 76 * (because you allocated it) you can use abd_to_buf() to access the underlying 77 * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions 78 * which will allocate a raw buffer if necessary. Use the abd_return_buf* 79 * functions to return any raw buffers that are no longer necessary when you're 80 * done using them. 81 * 82 * There are a variety of ABD APIs that implement basic buffer operations: 83 * compare, copy, read, write, and fill with zeroes. If you need a custom 84 * function which progressively accesses the whole ABD, use the abd_iterate_* 85 * functions. 86 */ 87 88 #include <sys/abd.h> 89 #include <sys/param.h> 90 #include <sys/zio.h> 91 #include <sys/zfs_context.h> 92 #include <sys/zfs_znode.h> 93 94 typedef struct abd_stats { 95 kstat_named_t abdstat_struct_size; 96 kstat_named_t abdstat_scatter_cnt; 97 kstat_named_t abdstat_scatter_data_size; 98 kstat_named_t abdstat_scatter_chunk_waste; 99 kstat_named_t abdstat_linear_cnt; 100 kstat_named_t abdstat_linear_data_size; 101 } abd_stats_t; 102 103 static abd_stats_t abd_stats = { 104 /* Amount of memory occupied by all of the abd_t struct allocations */ 105 { "struct_size", KSTAT_DATA_UINT64 }, 106 /* 107 * The number of scatter ABDs which are currently allocated, excluding 108 * ABDs which don't own their data (for instance the ones which were 109 * allocated through abd_get_offset()). 110 */ 111 { "scatter_cnt", KSTAT_DATA_UINT64 }, 112 /* Amount of data stored in all scatter ABDs tracked by scatter_cnt */ 113 { "scatter_data_size", KSTAT_DATA_UINT64 }, 114 /* 115 * The amount of space wasted at the end of the last chunk across all 116 * scatter ABDs tracked by scatter_cnt. 117 */ 118 { "scatter_chunk_waste", KSTAT_DATA_UINT64 }, 119 /* 120 * The number of linear ABDs which are currently allocated, excluding 121 * ABDs which don't own their data (for instance the ones which were 122 * allocated through abd_get_offset() and abd_get_from_buf()). If an 123 * ABD takes ownership of its buf then it will become tracked. 124 */ 125 { "linear_cnt", KSTAT_DATA_UINT64 }, 126 /* Amount of data stored in all linear ABDs tracked by linear_cnt */ 127 { "linear_data_size", KSTAT_DATA_UINT64 }, 128 }; 129 130 #define ABDSTAT(stat) (abd_stats.stat.value.ui64) 131 #define ABDSTAT_INCR(stat, val) \ 132 atomic_add_64(&abd_stats.stat.value.ui64, (val)) 133 #define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1) 134 #define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1) 135 136 /* 137 * It is possible to make all future ABDs be linear by setting this to B_FALSE. 138 * Otherwise, ABDs are allocated scattered by default unless the caller uses 139 * abd_alloc_linear(). 140 */ 141 boolean_t zfs_abd_scatter_enabled = B_TRUE; 142 143 /* 144 * The size of the chunks ABD allocates. Because the sizes allocated from the 145 * kmem_cache can't change, this tunable can only be modified at boot. Changing 146 * it at runtime would cause ABD iteration to work incorrectly for ABDs which 147 * were allocated with the old size, so a safeguard has been put in place which 148 * will cause the machine to panic if you change it and try to access the data 149 * within a scattered ABD. 150 */ 151 size_t zfs_abd_chunk_size = 4096; 152 153 #ifdef _KERNEL 154 extern vmem_t *zio_alloc_arena; 155 #endif 156 157 kmem_cache_t *abd_chunk_cache; 158 static kstat_t *abd_ksp; 159 160 static void * 161 abd_alloc_chunk() 162 { 163 void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); 164 ASSERT3P(c, !=, NULL); 165 return (c); 166 } 167 168 static void 169 abd_free_chunk(void *c) 170 { 171 kmem_cache_free(abd_chunk_cache, c); 172 } 173 174 void 175 abd_init(void) 176 { 177 vmem_t *data_alloc_arena = NULL; 178 179 #ifdef _KERNEL 180 data_alloc_arena = zio_alloc_arena; 181 #endif 182 183 /* 184 * Since ABD chunks do not appear in crash dumps, we pass KMC_NOTOUCH 185 * so that no allocator metadata is stored with the buffers. 186 */ 187 abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, 188 NULL, NULL, NULL, NULL, data_alloc_arena, KMC_NOTOUCH); 189 190 abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, 191 sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 192 if (abd_ksp != NULL) { 193 abd_ksp->ks_data = &abd_stats; 194 kstat_install(abd_ksp); 195 } 196 } 197 198 void 199 abd_fini(void) 200 { 201 if (abd_ksp != NULL) { 202 kstat_delete(abd_ksp); 203 abd_ksp = NULL; 204 } 205 206 kmem_cache_destroy(abd_chunk_cache); 207 abd_chunk_cache = NULL; 208 } 209 210 static inline size_t 211 abd_chunkcnt_for_bytes(size_t size) 212 { 213 return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size); 214 } 215 216 static inline size_t 217 abd_scatter_chunkcnt(abd_t *abd) 218 { 219 ASSERT(!abd_is_linear(abd)); 220 return (abd_chunkcnt_for_bytes( 221 abd->abd_u.abd_scatter.abd_offset + abd->abd_size)); 222 } 223 224 static inline void 225 abd_verify(abd_t *abd) 226 { 227 ASSERT3U(abd->abd_size, >, 0); 228 ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); 229 ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | 230 ABD_FLAG_OWNER | ABD_FLAG_META)); 231 IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); 232 IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); 233 if (abd_is_linear(abd)) { 234 ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL); 235 } else { 236 ASSERT3U(abd->abd_u.abd_scatter.abd_offset, <, 237 zfs_abd_chunk_size); 238 size_t n = abd_scatter_chunkcnt(abd); 239 for (int i = 0; i < n; i++) { 240 ASSERT3P( 241 abd->abd_u.abd_scatter.abd_chunks[i], !=, NULL); 242 } 243 } 244 } 245 246 static inline abd_t * 247 abd_alloc_struct(size_t chunkcnt) 248 { 249 size_t size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); 250 abd_t *abd = kmem_alloc(size, KM_PUSHPAGE); 251 ASSERT3P(abd, !=, NULL); 252 ABDSTAT_INCR(abdstat_struct_size, size); 253 254 return (abd); 255 } 256 257 static inline void 258 abd_free_struct(abd_t *abd) 259 { 260 size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd); 261 int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); 262 kmem_free(abd, size); 263 ABDSTAT_INCR(abdstat_struct_size, -size); 264 } 265 266 /* 267 * Allocate an ABD, along with its own underlying data buffers. Use this if you 268 * don't care whether the ABD is linear or not. 269 */ 270 abd_t * 271 abd_alloc(size_t size, boolean_t is_metadata) 272 { 273 if (!zfs_abd_scatter_enabled) 274 return (abd_alloc_linear(size, is_metadata)); 275 276 VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); 277 278 size_t n = abd_chunkcnt_for_bytes(size); 279 abd_t *abd = abd_alloc_struct(n); 280 281 abd->abd_flags = ABD_FLAG_OWNER; 282 if (is_metadata) { 283 abd->abd_flags |= ABD_FLAG_META; 284 } 285 abd->abd_size = size; 286 abd->abd_parent = NULL; 287 refcount_create(&abd->abd_children); 288 289 abd->abd_u.abd_scatter.abd_offset = 0; 290 abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; 291 292 for (int i = 0; i < n; i++) { 293 void *c = abd_alloc_chunk(); 294 ASSERT3P(c, !=, NULL); 295 abd->abd_u.abd_scatter.abd_chunks[i] = c; 296 } 297 298 ABDSTAT_BUMP(abdstat_scatter_cnt); 299 ABDSTAT_INCR(abdstat_scatter_data_size, size); 300 ABDSTAT_INCR(abdstat_scatter_chunk_waste, 301 n * zfs_abd_chunk_size - size); 302 303 return (abd); 304 } 305 306 static void 307 abd_free_scatter(abd_t *abd) 308 { 309 size_t n = abd_scatter_chunkcnt(abd); 310 for (int i = 0; i < n; i++) { 311 abd_free_chunk(abd->abd_u.abd_scatter.abd_chunks[i]); 312 } 313 314 refcount_destroy(&abd->abd_children); 315 ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); 316 ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); 317 ABDSTAT_INCR(abdstat_scatter_chunk_waste, 318 abd->abd_size - n * zfs_abd_chunk_size); 319 320 abd_free_struct(abd); 321 } 322 323 /* 324 * Allocate an ABD that must be linear, along with its own underlying data 325 * buffer. Only use this when it would be very annoying to write your ABD 326 * consumer with a scattered ABD. 327 */ 328 abd_t * 329 abd_alloc_linear(size_t size, boolean_t is_metadata) 330 { 331 abd_t *abd = abd_alloc_struct(0); 332 333 VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); 334 335 abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER; 336 if (is_metadata) { 337 abd->abd_flags |= ABD_FLAG_META; 338 } 339 abd->abd_size = size; 340 abd->abd_parent = NULL; 341 refcount_create(&abd->abd_children); 342 343 if (is_metadata) { 344 abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size); 345 } else { 346 abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size); 347 } 348 349 ABDSTAT_BUMP(abdstat_linear_cnt); 350 ABDSTAT_INCR(abdstat_linear_data_size, size); 351 352 return (abd); 353 } 354 355 static void 356 abd_free_linear(abd_t *abd) 357 { 358 if (abd->abd_flags & ABD_FLAG_META) { 359 zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); 360 } else { 361 zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); 362 } 363 364 refcount_destroy(&abd->abd_children); 365 ABDSTAT_BUMPDOWN(abdstat_linear_cnt); 366 ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); 367 368 abd_free_struct(abd); 369 } 370 371 /* 372 * Free an ABD. Only use this on ABDs allocated with abd_alloc() or 373 * abd_alloc_linear(). 374 */ 375 void 376 abd_free(abd_t *abd) 377 { 378 abd_verify(abd); 379 ASSERT3P(abd->abd_parent, ==, NULL); 380 ASSERT(abd->abd_flags & ABD_FLAG_OWNER); 381 if (abd_is_linear(abd)) 382 abd_free_linear(abd); 383 else 384 abd_free_scatter(abd); 385 } 386 387 /* 388 * Allocate an ABD of the same format (same metadata flag, same scatterize 389 * setting) as another ABD. 390 */ 391 abd_t * 392 abd_alloc_sametype(abd_t *sabd, size_t size) 393 { 394 boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; 395 if (abd_is_linear(sabd)) { 396 return (abd_alloc_linear(size, is_metadata)); 397 } else { 398 return (abd_alloc(size, is_metadata)); 399 } 400 } 401 402 /* 403 * If we're going to use this ABD for doing I/O using the block layer, the 404 * consumer of the ABD data doesn't care if it's scattered or not, and we don't 405 * plan to store this ABD in memory for a long period of time, we should 406 * allocate the ABD type that requires the least data copying to do the I/O. 407 * 408 * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os 409 * using a scatter/gather list we should switch to that and replace this call 410 * with vanilla abd_alloc(). 411 */ 412 abd_t * 413 abd_alloc_for_io(size_t size, boolean_t is_metadata) 414 { 415 return (abd_alloc_linear(size, is_metadata)); 416 } 417 418 /* 419 * Allocate a new ABD to point to offset off of sabd. It shares the underlying 420 * buffer data with sabd. Use abd_put() to free. sabd must not be freed while 421 * any derived ABDs exist. 422 */ 423 abd_t * 424 abd_get_offset(abd_t *sabd, size_t off) 425 { 426 abd_t *abd; 427 428 abd_verify(sabd); 429 ASSERT3U(off, <=, sabd->abd_size); 430 431 if (abd_is_linear(sabd)) { 432 abd = abd_alloc_struct(0); 433 434 /* 435 * Even if this buf is filesystem metadata, we only track that 436 * if we own the underlying data buffer, which is not true in 437 * this case. Therefore, we don't ever use ABD_FLAG_META here. 438 */ 439 abd->abd_flags = ABD_FLAG_LINEAR; 440 441 abd->abd_u.abd_linear.abd_buf = 442 (char *)sabd->abd_u.abd_linear.abd_buf + off; 443 } else { 444 size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off; 445 size_t chunkcnt = abd_scatter_chunkcnt(sabd) - 446 (new_offset / zfs_abd_chunk_size); 447 448 abd = abd_alloc_struct(chunkcnt); 449 450 /* 451 * Even if this buf is filesystem metadata, we only track that 452 * if we own the underlying data buffer, which is not true in 453 * this case. Therefore, we don't ever use ABD_FLAG_META here. 454 */ 455 abd->abd_flags = 0; 456 457 abd->abd_u.abd_scatter.abd_offset = 458 new_offset % zfs_abd_chunk_size; 459 abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; 460 461 /* Copy the scatterlist starting at the correct offset */ 462 (void) memcpy(&abd->abd_u.abd_scatter.abd_chunks, 463 &sabd->abd_u.abd_scatter.abd_chunks[new_offset / 464 zfs_abd_chunk_size], 465 chunkcnt * sizeof (void *)); 466 } 467 468 abd->abd_size = sabd->abd_size - off; 469 abd->abd_parent = sabd; 470 refcount_create(&abd->abd_children); 471 (void) refcount_add_many(&sabd->abd_children, abd->abd_size, abd); 472 473 return (abd); 474 } 475 476 /* 477 * Allocate a linear ABD structure for buf. You must free this with abd_put() 478 * since the resulting ABD doesn't own its own buffer. 479 */ 480 abd_t * 481 abd_get_from_buf(void *buf, size_t size) 482 { 483 abd_t *abd = abd_alloc_struct(0); 484 485 VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); 486 487 /* 488 * Even if this buf is filesystem metadata, we only track that if we 489 * own the underlying data buffer, which is not true in this case. 490 * Therefore, we don't ever use ABD_FLAG_META here. 491 */ 492 abd->abd_flags = ABD_FLAG_LINEAR; 493 abd->abd_size = size; 494 abd->abd_parent = NULL; 495 refcount_create(&abd->abd_children); 496 497 abd->abd_u.abd_linear.abd_buf = buf; 498 499 return (abd); 500 } 501 502 /* 503 * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not 504 * free the underlying scatterlist or buffer. 505 */ 506 void 507 abd_put(abd_t *abd) 508 { 509 abd_verify(abd); 510 ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); 511 512 if (abd->abd_parent != NULL) { 513 (void) refcount_remove_many(&abd->abd_parent->abd_children, 514 abd->abd_size, abd); 515 } 516 517 refcount_destroy(&abd->abd_children); 518 abd_free_struct(abd); 519 } 520 521 /* 522 * Get the raw buffer associated with a linear ABD. 523 */ 524 void * 525 abd_to_buf(abd_t *abd) 526 { 527 ASSERT(abd_is_linear(abd)); 528 abd_verify(abd); 529 return (abd->abd_u.abd_linear.abd_buf); 530 } 531 532 /* 533 * Borrow a raw buffer from an ABD without copying the contents of the ABD 534 * into the buffer. If the ABD is scattered, this will allocate a raw buffer 535 * whose contents are undefined. To copy over the existing data in the ABD, use 536 * abd_borrow_buf_copy() instead. 537 */ 538 void * 539 abd_borrow_buf(abd_t *abd, size_t n) 540 { 541 void *buf; 542 abd_verify(abd); 543 ASSERT3U(abd->abd_size, >=, n); 544 if (abd_is_linear(abd)) { 545 buf = abd_to_buf(abd); 546 } else { 547 buf = zio_buf_alloc(n); 548 } 549 (void) refcount_add_many(&abd->abd_children, n, buf); 550 551 return (buf); 552 } 553 554 void * 555 abd_borrow_buf_copy(abd_t *abd, size_t n) 556 { 557 void *buf = abd_borrow_buf(abd, n); 558 if (!abd_is_linear(abd)) { 559 abd_copy_to_buf(buf, abd, n); 560 } 561 return (buf); 562 } 563 564 /* 565 * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will 566 * not change the contents of the ABD and will ASSERT that you didn't modify 567 * the buffer since it was borrowed. If you want any changes you made to buf to 568 * be copied back to abd, use abd_return_buf_copy() instead. 569 */ 570 void 571 abd_return_buf(abd_t *abd, void *buf, size_t n) 572 { 573 abd_verify(abd); 574 ASSERT3U(abd->abd_size, >=, n); 575 if (abd_is_linear(abd)) { 576 ASSERT3P(buf, ==, abd_to_buf(abd)); 577 } else { 578 ASSERT0(abd_cmp_buf(abd, buf, n)); 579 zio_buf_free(buf, n); 580 } 581 (void) refcount_remove_many(&abd->abd_children, n, buf); 582 } 583 584 void 585 abd_return_buf_copy(abd_t *abd, void *buf, size_t n) 586 { 587 if (!abd_is_linear(abd)) { 588 abd_copy_from_buf(abd, buf, n); 589 } 590 abd_return_buf(abd, buf, n); 591 } 592 593 /* 594 * Give this ABD ownership of the buffer that it's storing. Can only be used on 595 * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated 596 * with abd_alloc_linear() which subsequently released ownership of their buf 597 * with abd_release_ownership_of_buf(). 598 */ 599 void 600 abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata) 601 { 602 ASSERT(abd_is_linear(abd)); 603 ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); 604 abd_verify(abd); 605 606 abd->abd_flags |= ABD_FLAG_OWNER; 607 if (is_metadata) { 608 abd->abd_flags |= ABD_FLAG_META; 609 } 610 611 ABDSTAT_BUMP(abdstat_linear_cnt); 612 ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size); 613 } 614 615 void 616 abd_release_ownership_of_buf(abd_t *abd) 617 { 618 ASSERT(abd_is_linear(abd)); 619 ASSERT(abd->abd_flags & ABD_FLAG_OWNER); 620 abd_verify(abd); 621 622 abd->abd_flags &= ~ABD_FLAG_OWNER; 623 /* Disable this flag since we no longer own the data buffer */ 624 abd->abd_flags &= ~ABD_FLAG_META; 625 626 ABDSTAT_BUMPDOWN(abdstat_linear_cnt); 627 ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); 628 } 629 630 struct abd_iter { 631 abd_t *iter_abd; /* ABD being iterated through */ 632 size_t iter_pos; /* position (relative to abd_offset) */ 633 void *iter_mapaddr; /* addr corresponding to iter_pos */ 634 size_t iter_mapsize; /* length of data valid at mapaddr */ 635 }; 636 637 static inline size_t 638 abd_iter_scatter_chunk_offset(struct abd_iter *aiter) 639 { 640 ASSERT(!abd_is_linear(aiter->iter_abd)); 641 return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + 642 aiter->iter_pos) % zfs_abd_chunk_size); 643 } 644 645 static inline size_t 646 abd_iter_scatter_chunk_index(struct abd_iter *aiter) 647 { 648 ASSERT(!abd_is_linear(aiter->iter_abd)); 649 return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + 650 aiter->iter_pos) / zfs_abd_chunk_size); 651 } 652 653 /* 654 * Initialize the abd_iter. 655 */ 656 static void 657 abd_iter_init(struct abd_iter *aiter, abd_t *abd) 658 { 659 abd_verify(abd); 660 aiter->iter_abd = abd; 661 aiter->iter_pos = 0; 662 aiter->iter_mapaddr = NULL; 663 aiter->iter_mapsize = 0; 664 } 665 666 /* 667 * Advance the iterator by a certain amount. Cannot be called when a chunk is 668 * in use. This can be safely called when the aiter has already exhausted, in 669 * which case this does nothing. 670 */ 671 static void 672 abd_iter_advance(struct abd_iter *aiter, size_t amount) 673 { 674 ASSERT3P(aiter->iter_mapaddr, ==, NULL); 675 ASSERT0(aiter->iter_mapsize); 676 677 /* There's nothing left to advance to, so do nothing */ 678 if (aiter->iter_pos == aiter->iter_abd->abd_size) 679 return; 680 681 aiter->iter_pos += amount; 682 } 683 684 /* 685 * Map the current chunk into aiter. This can be safely called when the aiter 686 * has already exhausted, in which case this does nothing. 687 */ 688 static void 689 abd_iter_map(struct abd_iter *aiter) 690 { 691 void *paddr; 692 size_t offset = 0; 693 694 ASSERT3P(aiter->iter_mapaddr, ==, NULL); 695 ASSERT0(aiter->iter_mapsize); 696 697 /* Panic if someone has changed zfs_abd_chunk_size */ 698 IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size == 699 aiter->iter_abd->abd_u.abd_scatter.abd_chunk_size); 700 701 /* There's nothing left to iterate over, so do nothing */ 702 if (aiter->iter_pos == aiter->iter_abd->abd_size) 703 return; 704 705 if (abd_is_linear(aiter->iter_abd)) { 706 offset = aiter->iter_pos; 707 aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; 708 paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf; 709 } else { 710 size_t index = abd_iter_scatter_chunk_index(aiter); 711 offset = abd_iter_scatter_chunk_offset(aiter); 712 aiter->iter_mapsize = zfs_abd_chunk_size - offset; 713 paddr = aiter->iter_abd->abd_u.abd_scatter.abd_chunks[index]; 714 } 715 aiter->iter_mapaddr = (char *)paddr + offset; 716 } 717 718 /* 719 * Unmap the current chunk from aiter. This can be safely called when the aiter 720 * has already exhausted, in which case this does nothing. 721 */ 722 static void 723 abd_iter_unmap(struct abd_iter *aiter) 724 { 725 /* There's nothing left to unmap, so do nothing */ 726 if (aiter->iter_pos == aiter->iter_abd->abd_size) 727 return; 728 729 ASSERT3P(aiter->iter_mapaddr, !=, NULL); 730 ASSERT3U(aiter->iter_mapsize, >, 0); 731 732 aiter->iter_mapaddr = NULL; 733 aiter->iter_mapsize = 0; 734 } 735 736 int 737 abd_iterate_func(abd_t *abd, size_t off, size_t size, 738 abd_iter_func_t *func, void *private) 739 { 740 int ret = 0; 741 struct abd_iter aiter; 742 743 abd_verify(abd); 744 ASSERT3U(off + size, <=, abd->abd_size); 745 746 abd_iter_init(&aiter, abd); 747 abd_iter_advance(&aiter, off); 748 749 while (size > 0) { 750 abd_iter_map(&aiter); 751 752 size_t len = MIN(aiter.iter_mapsize, size); 753 ASSERT3U(len, >, 0); 754 755 ret = func(aiter.iter_mapaddr, len, private); 756 757 abd_iter_unmap(&aiter); 758 759 if (ret != 0) 760 break; 761 762 size -= len; 763 abd_iter_advance(&aiter, len); 764 } 765 766 return (ret); 767 } 768 769 struct buf_arg { 770 void *arg_buf; 771 }; 772 773 static int 774 abd_copy_to_buf_off_cb(void *buf, size_t size, void *private) 775 { 776 struct buf_arg *ba_ptr = private; 777 778 (void) memcpy(ba_ptr->arg_buf, buf, size); 779 ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; 780 781 return (0); 782 } 783 784 /* 785 * Copy abd to buf. (off is the offset in abd.) 786 */ 787 void 788 abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size) 789 { 790 struct buf_arg ba_ptr = { buf }; 791 792 (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb, 793 &ba_ptr); 794 } 795 796 static int 797 abd_cmp_buf_off_cb(void *buf, size_t size, void *private) 798 { 799 int ret; 800 struct buf_arg *ba_ptr = private; 801 802 ret = memcmp(buf, ba_ptr->arg_buf, size); 803 ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; 804 805 return (ret); 806 } 807 808 /* 809 * Compare the contents of abd to buf. (off is the offset in abd.) 810 */ 811 int 812 abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) 813 { 814 struct buf_arg ba_ptr = { (void *) buf }; 815 816 return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr)); 817 } 818 819 static int 820 abd_copy_from_buf_off_cb(void *buf, size_t size, void *private) 821 { 822 struct buf_arg *ba_ptr = private; 823 824 (void) memcpy(buf, ba_ptr->arg_buf, size); 825 ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; 826 827 return (0); 828 } 829 830 /* 831 * Copy from buf to abd. (off is the offset in abd.) 832 */ 833 void 834 abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) 835 { 836 struct buf_arg ba_ptr = { (void *) buf }; 837 838 (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb, 839 &ba_ptr); 840 } 841 842 /*ARGSUSED*/ 843 static int 844 abd_zero_off_cb(void *buf, size_t size, void *private) 845 { 846 (void) memset(buf, 0, size); 847 return (0); 848 } 849 850 /* 851 * Zero out the abd from a particular offset to the end. 852 */ 853 void 854 abd_zero_off(abd_t *abd, size_t off, size_t size) 855 { 856 (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL); 857 } 858 859 /* 860 * Iterate over two ABDs and call func incrementally on the two ABDs' data in 861 * equal-sized chunks (passed to func as raw buffers). func could be called many 862 * times during this iteration. 863 */ 864 int 865 abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, 866 size_t size, abd_iter_func2_t *func, void *private) 867 { 868 int ret = 0; 869 struct abd_iter daiter, saiter; 870 871 abd_verify(dabd); 872 abd_verify(sabd); 873 874 ASSERT3U(doff + size, <=, dabd->abd_size); 875 ASSERT3U(soff + size, <=, sabd->abd_size); 876 877 abd_iter_init(&daiter, dabd); 878 abd_iter_init(&saiter, sabd); 879 abd_iter_advance(&daiter, doff); 880 abd_iter_advance(&saiter, soff); 881 882 while (size > 0) { 883 abd_iter_map(&daiter); 884 abd_iter_map(&saiter); 885 886 size_t dlen = MIN(daiter.iter_mapsize, size); 887 size_t slen = MIN(saiter.iter_mapsize, size); 888 size_t len = MIN(dlen, slen); 889 ASSERT(dlen > 0 || slen > 0); 890 891 ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len, 892 private); 893 894 abd_iter_unmap(&saiter); 895 abd_iter_unmap(&daiter); 896 897 if (ret != 0) 898 break; 899 900 size -= len; 901 abd_iter_advance(&daiter, len); 902 abd_iter_advance(&saiter, len); 903 } 904 905 return (ret); 906 } 907 908 /*ARGSUSED*/ 909 static int 910 abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private) 911 { 912 (void) memcpy(dbuf, sbuf, size); 913 return (0); 914 } 915 916 /* 917 * Copy from sabd to dabd starting from soff and doff. 918 */ 919 void 920 abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size) 921 { 922 (void) abd_iterate_func2(dabd, sabd, doff, soff, size, 923 abd_copy_off_cb, NULL); 924 } 925 926 /*ARGSUSED*/ 927 static int 928 abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private) 929 { 930 return (memcmp(bufa, bufb, size)); 931 } 932 933 /* 934 * Compares the first size bytes of two ABDs. 935 */ 936 int 937 abd_cmp(abd_t *dabd, abd_t *sabd, size_t size) 938 { 939 return (abd_iterate_func2(dabd, sabd, 0, 0, size, abd_cmp_cb, NULL)); 940 } 941