1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2019, Joyent, Inc. 24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25 * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. 26 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 27 * Copyright (c) 2020, George Amanakis. All rights reserved. 28 */ 29 30 #ifndef _SYS_ARC_IMPL_H 31 #define _SYS_ARC_IMPL_H 32 33 #include <sys/arc.h> 34 #include <sys/multilist.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 /* 41 * Note that buffers can be in one of 6 states: 42 * ARC_anon - anonymous (discussed below) 43 * ARC_mru - recently used, currently cached 44 * ARC_mru_ghost - recently used, no longer in cache 45 * ARC_mfu - frequently used, currently cached 46 * ARC_mfu_ghost - frequently used, no longer in cache 47 * ARC_l2c_only - exists in L2ARC but not other states 48 * When there are no active references to the buffer, they are 49 * are linked onto a list in one of these arc states. These are 50 * the only buffers that can be evicted or deleted. Within each 51 * state there are multiple lists, one for meta-data and one for 52 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, 53 * etc.) is tracked separately so that it can be managed more 54 * explicitly: favored over data, limited explicitly. 55 * 56 * Anonymous buffers are buffers that are not associated with 57 * a DVA. These are buffers that hold dirty block copies 58 * before they are written to stable storage. By definition, 59 * they are "ref'd" and are considered part of arc_mru 60 * that cannot be freed. Generally, they will aquire a DVA 61 * as they are written and migrate onto the arc_mru list. 62 * 63 * The ARC_l2c_only state is for buffers that are in the second 64 * level ARC but no longer in any of the ARC_m* lists. The second 65 * level ARC itself may also contain buffers that are in any of 66 * the ARC_m* states - meaning that a buffer can exist in two 67 * places. The reason for the ARC_l2c_only state is to keep the 68 * buffer header in the hash table, so that reads that hit the 69 * second level ARC benefit from these fast lookups. 70 */ 71 72 typedef struct arc_state { 73 /* 74 * list of evictable buffers 75 */ 76 multilist_t *arcs_list[ARC_BUFC_NUMTYPES]; 77 /* 78 * total amount of evictable data in this state 79 */ 80 zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES]; 81 /* 82 * total amount of data in this state; this includes: evictable, 83 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. 84 */ 85 zfs_refcount_t arcs_size; 86 } arc_state_t; 87 88 typedef struct arc_callback arc_callback_t; 89 90 struct arc_callback { 91 void *acb_private; 92 arc_read_done_func_t *acb_done; 93 arc_buf_t *acb_buf; 94 boolean_t acb_encrypted; 95 boolean_t acb_compressed; 96 boolean_t acb_noauth; 97 zbookmark_phys_t acb_zb; 98 zio_t *acb_zio_dummy; 99 zio_t *acb_zio_head; 100 arc_callback_t *acb_next; 101 }; 102 103 typedef struct arc_write_callback arc_write_callback_t; 104 105 struct arc_write_callback { 106 void *awcb_private; 107 arc_write_done_func_t *awcb_ready; 108 arc_write_done_func_t *awcb_children_ready; 109 arc_write_done_func_t *awcb_physdone; 110 arc_write_done_func_t *awcb_done; 111 arc_buf_t *awcb_buf; 112 }; 113 114 /* 115 * ARC buffers are separated into multiple structs as a memory saving measure: 116 * - Common fields struct, always defined, and embedded within it: 117 * - L2-only fields, always allocated but undefined when not in L2ARC 118 * - L1-only fields, only allocated when in L1ARC 119 * 120 * Buffer in L1 Buffer only in L2 121 * +------------------------+ +------------------------+ 122 * | arc_buf_hdr_t | | arc_buf_hdr_t | 123 * | | | | 124 * | | | | 125 * | | | | 126 * +------------------------+ +------------------------+ 127 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t | 128 * | (undefined if L1-only) | | | 129 * +------------------------+ +------------------------+ 130 * | l1arc_buf_hdr_t | 131 * | | 132 * | | 133 * | | 134 * | | 135 * +------------------------+ 136 * 137 * Because it's possible for the L2ARC to become extremely large, we can wind 138 * up eating a lot of memory in L2ARC buffer headers, so the size of a header 139 * is minimized by only allocating the fields necessary for an L1-cached buffer 140 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and 141 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple 142 * words in pointers. arc_hdr_realloc() is used to switch a header between 143 * these two allocation states. 144 */ 145 typedef struct l1arc_buf_hdr { 146 kmutex_t b_freeze_lock; 147 zio_cksum_t *b_freeze_cksum; 148 #ifdef ZFS_DEBUG 149 /* 150 * Used for debugging with kmem_flags - by allocating and freeing 151 * b_thawed when the buffer is thawed, we get a record of the stack 152 * trace that thawed it. 153 */ 154 void *b_thawed; 155 #endif 156 157 arc_buf_t *b_buf; 158 uint32_t b_bufcnt; 159 /* for waiting on writes to complete */ 160 kcondvar_t b_cv; 161 uint8_t b_byteswap; 162 163 /* protected by arc state mutex */ 164 arc_state_t *b_state; 165 multilist_node_t b_arc_node; 166 167 /* updated atomically */ 168 clock_t b_arc_access; 169 170 /* self protecting */ 171 zfs_refcount_t b_refcnt; 172 173 arc_callback_t *b_acb; 174 abd_t *b_pabd; 175 } l1arc_buf_hdr_t; 176 177 typedef enum l2arc_dev_hdr_flags_t { 178 L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0) /* mirror of l2ad_first */ 179 } l2arc_dev_hdr_flags_t; 180 181 /* 182 * Pointer used in persistent L2ARC (for pointing to log blocks). 183 */ 184 typedef struct l2arc_log_blkptr { 185 /* 186 * Offset of log block within the device, in bytes 187 */ 188 uint64_t lbp_daddr; 189 /* 190 * Aligned payload size (in bytes) of the log block 191 */ 192 uint64_t lbp_payload_asize; 193 /* 194 * Offset in bytes of the first buffer in the payload 195 */ 196 uint64_t lbp_payload_start; 197 /* 198 * lbp_prop has the following format: 199 * * logical size (in bytes) 200 * * aligned (after compression) size (in bytes) 201 * * compression algorithm (we always LZ4-compress l2arc logs) 202 * * checksum algorithm (used for lbp_cksum) 203 */ 204 uint64_t lbp_prop; 205 zio_cksum_t lbp_cksum; /* checksum of log */ 206 } l2arc_log_blkptr_t; 207 208 /* 209 * The persistent L2ARC device header. 210 * Byte order of magic determines whether 64-bit bswap of fields is necessary. 211 */ 212 typedef struct l2arc_dev_hdr_phys { 213 uint64_t dh_magic; /* L2ARC_DEV_HDR_MAGIC */ 214 uint64_t dh_version; /* Persistent L2ARC version */ 215 216 /* 217 * Global L2ARC device state and metadata. 218 */ 219 uint64_t dh_spa_guid; 220 uint64_t dh_vdev_guid; 221 uint64_t dh_log_entries; /* mirror of l2ad_log_entries */ 222 uint64_t dh_evict; /* evicted offset in bytes */ 223 uint64_t dh_flags; /* l2arc_dev_hdr_flags_t */ 224 /* 225 * Used in zdb.c for determining if a log block is valid, in the same 226 * way that l2arc_rebuild() does. 227 */ 228 uint64_t dh_start; /* mirror of l2ad_start */ 229 uint64_t dh_end; /* mirror of l2ad_end */ 230 /* 231 * Start of log block chain. [0] -> newest log, [1] -> one older (used 232 * for initiating prefetch). 233 */ 234 l2arc_log_blkptr_t dh_start_lbps[2]; 235 /* 236 * Aligned size of all log blocks as accounted by vdev_space_update(). 237 */ 238 uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */ 239 uint64_t dh_lb_count; /* mirror of l2ad_lb_count */ 240 const uint64_t dh_pad[32]; /* pad to 512 bytes */ 241 zio_eck_t dh_tail; 242 } l2arc_dev_hdr_phys_t; 243 CTASSERT(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE); 244 245 /* 246 * A single ARC buffer header entry in a l2arc_log_blk_phys_t. 247 */ 248 typedef struct l2arc_log_ent_phys { 249 dva_t le_dva; /* dva of buffer */ 250 uint64_t le_birth; /* birth txg of buffer */ 251 /* 252 * le_prop has the following format: 253 * * logical size (in bytes) 254 * * physical (compressed) size (in bytes) 255 * * compression algorithm 256 * * object type (used to restore arc_buf_contents_t) 257 * * protected status (used for encryption) 258 * * prefetch status (used in l2arc_read_done()) 259 */ 260 uint64_t le_prop; 261 uint64_t le_daddr; /* buf location on l2dev */ 262 /* 263 * We pad the size of each entry to a power of 2 so that the size of 264 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT, 265 * because of the L2ARC_SET_*SIZE macros. 266 */ 267 const uint64_t le_pad[3]; /* pad to 64 bytes */ 268 } l2arc_log_ent_phys_t; 269 270 #define L2ARC_LOG_BLK_MAX_ENTRIES (1022) 271 272 /* 273 * A log block of up to 1022 ARC buffer log entries, chained into the 274 * persistent L2ARC metadata linked list. Byte order of magic determines 275 * whether 64-bit bswap of fields is necessary. 276 */ 277 typedef struct l2arc_log_blk_phys { 278 uint64_t lb_magic; /* L2ARC_LOG_BLK_MAGIC */ 279 /* 280 * There are 2 chains (headed by dh_start_lbps[2]), and this field 281 * points back to the previous block in this chain. We alternate 282 * which chain we append to, so they are time-wise and offset-wise 283 * interleaved, but that is an optimization rather than for 284 * correctness. 285 */ 286 l2arc_log_blkptr_t lb_prev_lbp; /* pointer to prev log block */ 287 /* 288 * Pad header section to 128 bytes 289 */ 290 uint64_t lb_pad[7]; 291 /* Payload */ 292 l2arc_log_ent_phys_t lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES]; 293 } l2arc_log_blk_phys_t; /* 64K total */ 294 /* 295 * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with 296 * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros. 297 */ 298 CTASSERT(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t), 299 1ULL << SPA_MINBLOCKSHIFT)); 300 CTASSERT(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE); 301 CTASSERT(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE); 302 303 /* 304 * These structures hold in-flight abd buffers for log blocks as they're being 305 * written to the L2ARC device. 306 */ 307 typedef struct l2arc_lb_abd_buf { 308 abd_t *abd; 309 list_node_t node; 310 } l2arc_lb_abd_buf_t; 311 312 /* 313 * These structures hold pointers to log blocks present on the L2ARC device. 314 */ 315 typedef struct l2arc_lb_ptr_buf { 316 l2arc_log_blkptr_t *lb_ptr; 317 list_node_t node; 318 } l2arc_lb_ptr_buf_t; 319 320 /* Macros for setting fields in le_prop and lbp_prop */ 321 #define L2BLK_GET_LSIZE(field) \ 322 BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1) 323 #define L2BLK_SET_LSIZE(field, x) \ 324 BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) 325 #define L2BLK_GET_PSIZE(field) \ 326 BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1) 327 #define L2BLK_SET_PSIZE(field, x) \ 328 BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) 329 #define L2BLK_GET_COMPRESS(field) \ 330 BF64_GET((field), 32, SPA_COMPRESSBITS) 331 #define L2BLK_SET_COMPRESS(field, x) \ 332 BF64_SET((field), 32, SPA_COMPRESSBITS, x) 333 #define L2BLK_GET_PREFETCH(field) BF64_GET((field), 39, 1) 334 #define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x) 335 #define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8) 336 #define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x) 337 #define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8) 338 #define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x) 339 #define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1) 340 #define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x) 341 342 #define PTR_SWAP(x, y) \ 343 do { \ 344 void *tmp = (x);\ 345 x = y; \ 346 y = tmp; \ 347 _NOTE(CONSTCOND)\ 348 } while (0) 349 350 #define L2ARC_DEV_HDR_MAGIC 0x5a46534341434845LLU /* ASCII: "ZFSCACHE" */ 351 #define L2ARC_LOG_BLK_MAGIC 0x4c4f47424c4b4844LLU /* ASCII: "LOGBLKHD" */ 352 353 /* 354 * L2ARC Internals 355 */ 356 typedef struct l2arc_dev { 357 vdev_t *l2ad_vdev; /* vdev */ 358 spa_t *l2ad_spa; /* spa */ 359 uint64_t l2ad_hand; /* next write location */ 360 uint64_t l2ad_start; /* first addr on device */ 361 uint64_t l2ad_end; /* last addr on device */ 362 boolean_t l2ad_first; /* first sweep through */ 363 boolean_t l2ad_writing; /* currently writing */ 364 kmutex_t l2ad_mtx; /* lock for buffer list */ 365 list_t l2ad_buflist; /* buffer list */ 366 list_node_t l2ad_node; /* device list node */ 367 zfs_refcount_t l2ad_alloc; /* allocated bytes */ 368 /* 369 * Persistence-related stuff 370 */ 371 l2arc_dev_hdr_phys_t *l2ad_dev_hdr; /* persistent device header */ 372 uint64_t l2ad_dev_hdr_asize; /* aligned hdr size */ 373 l2arc_log_blk_phys_t l2ad_log_blk; /* currently open log block */ 374 int l2ad_log_ent_idx; /* index into cur log blk */ 375 /* Number of bytes in current log block's payload */ 376 uint64_t l2ad_log_blk_payload_asize; 377 /* 378 * Offset (in bytes) of the first buffer in current log block's 379 * payload. 380 */ 381 uint64_t l2ad_log_blk_payload_start; 382 /* Flag indicating whether a rebuild is scheduled or is going on */ 383 boolean_t l2ad_rebuild; 384 boolean_t l2ad_rebuild_cancel; 385 boolean_t l2ad_rebuild_began; 386 uint64_t l2ad_log_entries; /* entries per log blk */ 387 uint64_t l2ad_evict; /* evicted offset in bytes */ 388 /* List of pointers to log blocks present in the L2ARC device */ 389 list_t l2ad_lbptr_list; 390 /* 391 * Aligned size of all log blocks as accounted by vdev_space_update(). 392 */ 393 zfs_refcount_t l2ad_lb_asize; 394 /* 395 * Number of log blocks present on the device. 396 */ 397 zfs_refcount_t l2ad_lb_count; 398 } l2arc_dev_t; 399 400 /* 401 * Encrypted blocks will need to be stored encrypted on the L2ARC 402 * disk as they appear in the main pool. In order for this to work we 403 * need to pass around the encryption parameters so they can be used 404 * to write data to the L2ARC. This struct is only defined in the 405 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED 406 * flag set. 407 */ 408 typedef struct arc_buf_hdr_crypt { 409 abd_t *b_rabd; /* raw encrypted data */ 410 dmu_object_type_t b_ot; /* object type */ 411 uint32_t b_ebufcnt; /* number or encryped buffers */ 412 413 /* dsobj for looking up encryption key for l2arc encryption */ 414 uint64_t b_dsobj; /* for looking up key */ 415 416 /* encryption parameters */ 417 uint8_t b_salt[ZIO_DATA_SALT_LEN]; 418 uint8_t b_iv[ZIO_DATA_IV_LEN]; 419 420 /* 421 * Technically this could be removed since we will always be able to 422 * get the mac from the bp when we need it. However, it is inconvenient 423 * for callers of arc code to have to pass a bp in all the time. This 424 * also allows us to assert that L2ARC data is properly encrypted to 425 * match the data in the main storage pool. 426 */ 427 uint8_t b_mac[ZIO_DATA_MAC_LEN]; 428 } arc_buf_hdr_crypt_t; 429 430 typedef struct l2arc_buf_hdr { 431 /* protected by arc_buf_hdr mutex */ 432 l2arc_dev_t *b_dev; /* L2ARC device */ 433 uint64_t b_daddr; /* disk address, offset byte */ 434 435 list_node_t b_l2node; 436 } l2arc_buf_hdr_t; 437 438 typedef struct l2arc_write_callback { 439 l2arc_dev_t *l2wcb_dev; /* device info */ 440 arc_buf_hdr_t *l2wcb_head; /* head of write buflist */ 441 /* in-flight list of log blocks */ 442 list_t l2wcb_abd_list; 443 } l2arc_write_callback_t; 444 445 struct arc_buf_hdr { 446 /* protected by hash lock */ 447 dva_t b_dva; 448 uint64_t b_birth; 449 450 arc_buf_contents_t b_type; 451 arc_buf_hdr_t *b_hash_next; 452 arc_flags_t b_flags; 453 454 /* 455 * This field stores the size of the data buffer after 456 * compression, and is set in the arc's zio completion handlers. 457 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes). 458 * 459 * While the block pointers can store up to 32MB in their psize 460 * field, we can only store up to 32MB minus 512B. This is due 461 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e. 462 * a field of zeros represents 512B in the bp). We can't use a 463 * bias of 1 since we need to reserve a psize of zero, here, to 464 * represent holes and embedded blocks. 465 * 466 * This isn't a problem in practice, since the maximum size of a 467 * buffer is limited to 16MB, so we never need to store 32MB in 468 * this field. 469 */ 470 uint16_t b_psize; 471 472 /* 473 * This field stores the size of the data buffer before 474 * compression, and cannot change once set. It is in units 475 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes) 476 */ 477 uint16_t b_lsize; /* immutable */ 478 uint64_t b_spa; /* immutable */ 479 480 /* L2ARC fields. Undefined when not in L2ARC. */ 481 l2arc_buf_hdr_t b_l2hdr; 482 /* L1ARC fields. Undefined when in l2arc_only state */ 483 l1arc_buf_hdr_t b_l1hdr; 484 /* 485 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED 486 * is set and the L1 header exists. 487 */ 488 arc_buf_hdr_crypt_t b_crypt_hdr; 489 }; 490 491 typedef struct arc_stats { 492 kstat_named_t arcstat_hits; 493 kstat_named_t arcstat_misses; 494 kstat_named_t arcstat_demand_data_hits; 495 kstat_named_t arcstat_demand_data_misses; 496 kstat_named_t arcstat_demand_metadata_hits; 497 kstat_named_t arcstat_demand_metadata_misses; 498 kstat_named_t arcstat_prefetch_data_hits; 499 kstat_named_t arcstat_prefetch_data_misses; 500 kstat_named_t arcstat_prefetch_metadata_hits; 501 kstat_named_t arcstat_prefetch_metadata_misses; 502 kstat_named_t arcstat_mru_hits; 503 kstat_named_t arcstat_mru_ghost_hits; 504 kstat_named_t arcstat_mfu_hits; 505 kstat_named_t arcstat_mfu_ghost_hits; 506 kstat_named_t arcstat_deleted; 507 /* 508 * Number of buffers that could not be evicted because the hash lock 509 * was held by another thread. The lock may not necessarily be held 510 * by something using the same buffer, since hash locks are shared 511 * by multiple buffers. 512 */ 513 kstat_named_t arcstat_mutex_miss; 514 /* 515 * Number of buffers skipped when updating the access state due to the 516 * header having already been released after acquiring the hash lock. 517 */ 518 kstat_named_t arcstat_access_skip; 519 /* 520 * Number of buffers skipped because they have I/O in progress, are 521 * indirect prefetch buffers that have not lived long enough, or are 522 * not from the spa we're trying to evict from. 523 */ 524 kstat_named_t arcstat_evict_skip; 525 /* 526 * Number of times arc_evict_state() was unable to evict enough 527 * buffers to reach its target amount. 528 */ 529 kstat_named_t arcstat_evict_not_enough; 530 kstat_named_t arcstat_evict_l2_cached; 531 kstat_named_t arcstat_evict_l2_eligible; 532 kstat_named_t arcstat_evict_l2_ineligible; 533 kstat_named_t arcstat_evict_l2_skip; 534 kstat_named_t arcstat_hash_elements; 535 kstat_named_t arcstat_hash_elements_max; 536 kstat_named_t arcstat_hash_collisions; 537 kstat_named_t arcstat_hash_chains; 538 kstat_named_t arcstat_hash_chain_max; 539 kstat_named_t arcstat_p; 540 kstat_named_t arcstat_c; 541 kstat_named_t arcstat_c_min; 542 kstat_named_t arcstat_c_max; 543 /* Not updated directly; only synced in arc_kstat_update. */ 544 kstat_named_t arcstat_size; 545 /* 546 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. 547 * Note that the compressed bytes may match the uncompressed bytes 548 * if the block is either not compressed or compressed arc is disabled. 549 */ 550 kstat_named_t arcstat_compressed_size; 551 /* 552 * Uncompressed size of the data stored in b_pabd. If compressed 553 * arc is disabled then this value will be identical to the stat 554 * above. 555 */ 556 kstat_named_t arcstat_uncompressed_size; 557 /* 558 * Number of bytes stored in all the arc_buf_t's. This is classified 559 * as "overhead" since this data is typically short-lived and will 560 * be evicted from the arc when it becomes unreferenced unless the 561 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level 562 * values have been set (see comment in dbuf.c for more information). 563 */ 564 kstat_named_t arcstat_overhead_size; 565 /* 566 * Number of bytes consumed by internal ARC structures necessary 567 * for tracking purposes; these structures are not actually 568 * backed by ARC buffers. This includes arc_buf_hdr_t structures 569 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only 570 * caches), and arc_buf_t structures (allocated via arc_buf_t 571 * cache). 572 * Not updated directly; only synced in arc_kstat_update. 573 */ 574 kstat_named_t arcstat_hdr_size; 575 /* 576 * Number of bytes consumed by ARC buffers of type equal to 577 * ARC_BUFC_DATA. This is generally consumed by buffers backing 578 * on disk user data (e.g. plain file contents). 579 * Not updated directly; only synced in arc_kstat_update. 580 */ 581 kstat_named_t arcstat_data_size; 582 /* 583 * Number of bytes consumed by ARC buffers of type equal to 584 * ARC_BUFC_METADATA. This is generally consumed by buffers 585 * backing on disk data that is used for internal ZFS 586 * structures (e.g. ZAP, dnode, indirect blocks, etc). 587 * Not updated directly; only synced in arc_kstat_update. 588 */ 589 kstat_named_t arcstat_metadata_size; 590 /* 591 * Number of bytes consumed by various buffers and structures 592 * not actually backed with ARC buffers. This includes bonus 593 * buffers (allocated directly via zio_buf_* functions), 594 * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t 595 * cache), and dnode_t structures (allocated via dnode_t cache). 596 * Not updated directly; only synced in arc_kstat_update. 597 */ 598 kstat_named_t arcstat_other_size; 599 /* 600 * Total number of bytes consumed by ARC buffers residing in the 601 * arc_anon state. This includes *all* buffers in the arc_anon 602 * state; e.g. data, metadata, evictable, and unevictable buffers 603 * are all included in this value. 604 * Not updated directly; only synced in arc_kstat_update. 605 */ 606 kstat_named_t arcstat_anon_size; 607 /* 608 * Number of bytes consumed by ARC buffers that meet the 609 * following criteria: backing buffers of type ARC_BUFC_DATA, 610 * residing in the arc_anon state, and are eligible for eviction 611 * (e.g. have no outstanding holds on the buffer). 612 * Not updated directly; only synced in arc_kstat_update. 613 */ 614 kstat_named_t arcstat_anon_evictable_data; 615 /* 616 * Number of bytes consumed by ARC buffers that meet the 617 * following criteria: backing buffers of type ARC_BUFC_METADATA, 618 * residing in the arc_anon state, and are eligible for eviction 619 * (e.g. have no outstanding holds on the buffer). 620 * Not updated directly; only synced in arc_kstat_update. 621 */ 622 kstat_named_t arcstat_anon_evictable_metadata; 623 /* 624 * Total number of bytes consumed by ARC buffers residing in the 625 * arc_mru state. This includes *all* buffers in the arc_mru 626 * state; e.g. data, metadata, evictable, and unevictable buffers 627 * are all included in this value. 628 * Not updated directly; only synced in arc_kstat_update. 629 */ 630 kstat_named_t arcstat_mru_size; 631 /* 632 * Number of bytes consumed by ARC buffers that meet the 633 * following criteria: backing buffers of type ARC_BUFC_DATA, 634 * residing in the arc_mru state, and are eligible for eviction 635 * (e.g. have no outstanding holds on the buffer). 636 * Not updated directly; only synced in arc_kstat_update. 637 */ 638 kstat_named_t arcstat_mru_evictable_data; 639 /* 640 * Number of bytes consumed by ARC buffers that meet the 641 * following criteria: backing buffers of type ARC_BUFC_METADATA, 642 * residing in the arc_mru state, and are eligible for eviction 643 * (e.g. have no outstanding holds on the buffer). 644 * Not updated directly; only synced in arc_kstat_update. 645 */ 646 kstat_named_t arcstat_mru_evictable_metadata; 647 /* 648 * Total number of bytes that *would have been* consumed by ARC 649 * buffers in the arc_mru_ghost state. The key thing to note 650 * here, is the fact that this size doesn't actually indicate 651 * RAM consumption. The ghost lists only consist of headers and 652 * don't actually have ARC buffers linked off of these headers. 653 * Thus, *if* the headers had associated ARC buffers, these 654 * buffers *would have* consumed this number of bytes. 655 * Not updated directly; only synced in arc_kstat_update. 656 */ 657 kstat_named_t arcstat_mru_ghost_size; 658 /* 659 * Number of bytes that *would have been* consumed by ARC 660 * buffers that are eligible for eviction, of type 661 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state. 662 * Not updated directly; only synced in arc_kstat_update. 663 */ 664 kstat_named_t arcstat_mru_ghost_evictable_data; 665 /* 666 * Number of bytes that *would have been* consumed by ARC 667 * buffers that are eligible for eviction, of type 668 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. 669 * Not updated directly; only synced in arc_kstat_update. 670 */ 671 kstat_named_t arcstat_mru_ghost_evictable_metadata; 672 /* 673 * Total number of bytes consumed by ARC buffers residing in the 674 * arc_mfu state. This includes *all* buffers in the arc_mfu 675 * state; e.g. data, metadata, evictable, and unevictable buffers 676 * are all included in this value. 677 * Not updated directly; only synced in arc_kstat_update. 678 */ 679 kstat_named_t arcstat_mfu_size; 680 /* 681 * Number of bytes consumed by ARC buffers that are eligible for 682 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu 683 * state. 684 * Not updated directly; only synced in arc_kstat_update. 685 */ 686 kstat_named_t arcstat_mfu_evictable_data; 687 /* 688 * Number of bytes consumed by ARC buffers that are eligible for 689 * eviction, of type ARC_BUFC_METADATA, and reside in the 690 * arc_mfu state. 691 * Not updated directly; only synced in arc_kstat_update. 692 */ 693 kstat_named_t arcstat_mfu_evictable_metadata; 694 /* 695 * Total number of bytes that *would have been* consumed by ARC 696 * buffers in the arc_mfu_ghost state. See the comment above 697 * arcstat_mru_ghost_size for more details. 698 * Not updated directly; only synced in arc_kstat_update. 699 */ 700 kstat_named_t arcstat_mfu_ghost_size; 701 /* 702 * Number of bytes that *would have been* consumed by ARC 703 * buffers that are eligible for eviction, of type 704 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state. 705 * Not updated directly; only synced in arc_kstat_update. 706 */ 707 kstat_named_t arcstat_mfu_ghost_evictable_data; 708 /* 709 * Number of bytes that *would have been* consumed by ARC 710 * buffers that are eligible for eviction, of type 711 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. 712 * Not updated directly; only synced in arc_kstat_update. 713 */ 714 kstat_named_t arcstat_mfu_ghost_evictable_metadata; 715 kstat_named_t arcstat_l2_hits; 716 kstat_named_t arcstat_l2_misses; 717 kstat_named_t arcstat_l2_feeds; 718 kstat_named_t arcstat_l2_rw_clash; 719 kstat_named_t arcstat_l2_read_bytes; 720 kstat_named_t arcstat_l2_write_bytes; 721 kstat_named_t arcstat_l2_writes_sent; 722 kstat_named_t arcstat_l2_writes_done; 723 kstat_named_t arcstat_l2_writes_error; 724 kstat_named_t arcstat_l2_writes_lock_retry; 725 kstat_named_t arcstat_l2_evict_lock_retry; 726 kstat_named_t arcstat_l2_evict_reading; 727 kstat_named_t arcstat_l2_evict_l1cached; 728 kstat_named_t arcstat_l2_free_on_write; 729 kstat_named_t arcstat_l2_abort_lowmem; 730 kstat_named_t arcstat_l2_cksum_bad; 731 kstat_named_t arcstat_l2_io_error; 732 kstat_named_t arcstat_l2_lsize; 733 kstat_named_t arcstat_l2_psize; 734 /* Not updated directly; only synced in arc_kstat_update. */ 735 kstat_named_t arcstat_l2_hdr_size; 736 /* 737 * Number of L2ARC log blocks written. These are used for restoring the 738 * L2ARC. Updated during writing of L2ARC log blocks. 739 */ 740 kstat_named_t arcstat_l2_log_blk_writes; 741 /* 742 * Moving average of the aligned size of the L2ARC log blocks, in 743 * bytes. Updated during L2ARC rebuild and during writing of L2ARC 744 * log blocks. 745 */ 746 kstat_named_t arcstat_l2_log_blk_avg_asize; 747 /* Aligned size of L2ARC log blocks on L2ARC devices. */ 748 kstat_named_t arcstat_l2_log_blk_asize; 749 /* Number of L2ARC log blocks present on L2ARC devices. */ 750 kstat_named_t arcstat_l2_log_blk_count; 751 /* 752 * Moving average of the aligned size of L2ARC restored data, in bytes, 753 * to the aligned size of their metadata in L2ARC, in bytes. 754 * Updated during L2ARC rebuild and during writing of L2ARC log blocks. 755 */ 756 kstat_named_t arcstat_l2_data_to_meta_ratio; 757 /* 758 * Number of times the L2ARC rebuild was successful for an L2ARC device. 759 */ 760 kstat_named_t arcstat_l2_rebuild_success; 761 /* 762 * Number of times the L2ARC rebuild failed because the device header 763 * was in an unsupported format or corrupted. 764 */ 765 kstat_named_t arcstat_l2_rebuild_abort_unsupported; 766 /* 767 * Number of times the L2ARC rebuild failed because of IO errors 768 * while reading a log block. 769 */ 770 kstat_named_t arcstat_l2_rebuild_abort_io_errors; 771 /* 772 * Number of times the L2ARC rebuild failed because of IO errors when 773 * reading the device header. 774 */ 775 kstat_named_t arcstat_l2_rebuild_abort_dh_errors; 776 /* 777 * Number of L2ARC log blocks which failed to be restored due to 778 * checksum errors. 779 */ 780 kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors; 781 /* 782 * Number of times the L2ARC rebuild was aborted due to low system 783 * memory. 784 */ 785 kstat_named_t arcstat_l2_rebuild_abort_lowmem; 786 /* Logical size of L2ARC restored data, in bytes. */ 787 kstat_named_t arcstat_l2_rebuild_size; 788 /* Aligned size of L2ARC restored data, in bytes. */ 789 kstat_named_t arcstat_l2_rebuild_asize; 790 /* 791 * Number of L2ARC log entries (buffers) that were successfully 792 * restored in ARC. 793 */ 794 kstat_named_t arcstat_l2_rebuild_bufs; 795 /* 796 * Number of L2ARC log entries (buffers) already cached in ARC. These 797 * were not restored again. 798 */ 799 kstat_named_t arcstat_l2_rebuild_bufs_precached; 800 /* 801 * Number of L2ARC log blocks that were restored successfully. Each 802 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers. 803 */ 804 kstat_named_t arcstat_l2_rebuild_log_blks; 805 kstat_named_t arcstat_memory_throttle_count; 806 /* Not updated directly; only synced in arc_kstat_update. */ 807 kstat_named_t arcstat_meta_used; 808 kstat_named_t arcstat_meta_limit; 809 kstat_named_t arcstat_meta_max; 810 kstat_named_t arcstat_meta_min; 811 kstat_named_t arcstat_async_upgrade_sync; 812 kstat_named_t arcstat_demand_hit_predictive_prefetch; 813 kstat_named_t arcstat_demand_hit_prescient_prefetch; 814 } arc_stats_t; 815 816 #define ARCSTAT(stat) (arc_stats.stat.value.ui64) 817 818 #define ARCSTAT_INCR(stat, val) \ 819 atomic_add_64(&arc_stats.stat.value.ui64, (val)) 820 821 #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) 822 #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) 823 824 /* 825 * There are several ARC variables that are critical to export as kstats -- 826 * but we don't want to have to grovel around in the kstat whenever we wish to 827 * manipulate them. For these variables, we therefore define them to be in 828 * terms of the statistic variable. This assures that we are not introducing 829 * the possibility of inconsistency by having shadow copies of the variables, 830 * while still allowing the code to be readable. 831 */ 832 #define arc_p ARCSTAT(arcstat_p) /* target size of MRU */ 833 #define arc_c ARCSTAT(arcstat_c) /* target size of cache */ 834 #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */ 835 #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */ 836 #define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */ 837 #define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ 838 #define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */ 839 840 /* compressed size of entire arc */ 841 #define arc_compressed_size ARCSTAT(arcstat_compressed_size) 842 /* uncompressed size of entire arc */ 843 #define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size) 844 /* number of bytes in the arc from arc_buf_t's */ 845 #define arc_overhead_size ARCSTAT(arcstat_overhead_size) 846 847 extern arc_stats_t arc_stats; 848 849 /* used in zdb.c */ 850 boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev, 851 const l2arc_log_blkptr_t *lbp); 852 853 #ifdef __cplusplus 854 } 855 #endif 856 857 #endif /* _SYS_ARC_IMPL_H */ 858