1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2019, Joyent, Inc. 24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25 * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. 26 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 27 * Copyright (c) 2020, George Amanakis. All rights reserved. 28 */ 29 30 #ifndef _SYS_ARC_IMPL_H 31 #define _SYS_ARC_IMPL_H 32 33 #include <sys/arc.h> 34 #include <sys/multilist.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 /* 41 * Note that buffers can be in one of 6 states: 42 * ARC_anon - anonymous (discussed below) 43 * ARC_mru - recently used, currently cached 44 * ARC_mru_ghost - recently used, no longer in cache 45 * ARC_mfu - frequently used, currently cached 46 * ARC_mfu_ghost - frequently used, no longer in cache 47 * ARC_l2c_only - exists in L2ARC but not other states 48 * When there are no active references to the buffer, they are 49 * are linked onto a list in one of these arc states. These are 50 * the only buffers that can be evicted or deleted. Within each 51 * state there are multiple lists, one for meta-data and one for 52 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, 53 * etc.) is tracked separately so that it can be managed more 54 * explicitly: favored over data, limited explicitly. 55 * 56 * Anonymous buffers are buffers that are not associated with 57 * a DVA. These are buffers that hold dirty block copies 58 * before they are written to stable storage. By definition, 59 * they are "ref'd" and are considered part of arc_mru 60 * that cannot be freed. Generally, they will aquire a DVA 61 * as they are written and migrate onto the arc_mru list. 62 * 63 * The ARC_l2c_only state is for buffers that are in the second 64 * level ARC but no longer in any of the ARC_m* lists. The second 65 * level ARC itself may also contain buffers that are in any of 66 * the ARC_m* states - meaning that a buffer can exist in two 67 * places. The reason for the ARC_l2c_only state is to keep the 68 * buffer header in the hash table, so that reads that hit the 69 * second level ARC benefit from these fast lookups. 70 */ 71 72 typedef struct arc_state { 73 /* 74 * list of evictable buffers 75 */ 76 multilist_t *arcs_list[ARC_BUFC_NUMTYPES]; 77 /* 78 * total amount of evictable data in this state 79 */ 80 zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES]; 81 /* 82 * total amount of data in this state; this includes: evictable, 83 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. 84 */ 85 zfs_refcount_t arcs_size; 86 87 arc_state_type_t arcs_state; 88 } arc_state_t; 89 90 typedef struct arc_callback arc_callback_t; 91 92 struct arc_callback { 93 void *acb_private; 94 arc_read_done_func_t *acb_done; 95 arc_buf_t *acb_buf; 96 boolean_t acb_encrypted; 97 boolean_t acb_compressed; 98 boolean_t acb_noauth; 99 zbookmark_phys_t acb_zb; 100 zio_t *acb_zio_dummy; 101 zio_t *acb_zio_head; 102 arc_callback_t *acb_next; 103 }; 104 105 typedef struct arc_write_callback arc_write_callback_t; 106 107 struct arc_write_callback { 108 void *awcb_private; 109 arc_write_done_func_t *awcb_ready; 110 arc_write_done_func_t *awcb_children_ready; 111 arc_write_done_func_t *awcb_physdone; 112 arc_write_done_func_t *awcb_done; 113 arc_buf_t *awcb_buf; 114 }; 115 116 /* 117 * ARC buffers are separated into multiple structs as a memory saving measure: 118 * - Common fields struct, always defined, and embedded within it: 119 * - L2-only fields, always allocated but undefined when not in L2ARC 120 * - L1-only fields, only allocated when in L1ARC 121 * 122 * Buffer in L1 Buffer only in L2 123 * +------------------------+ +------------------------+ 124 * | arc_buf_hdr_t | | arc_buf_hdr_t | 125 * | | | | 126 * | | | | 127 * | | | | 128 * +------------------------+ +------------------------+ 129 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t | 130 * | (undefined if L1-only) | | | 131 * +------------------------+ +------------------------+ 132 * | l1arc_buf_hdr_t | 133 * | | 134 * | | 135 * | | 136 * | | 137 * +------------------------+ 138 * 139 * Because it's possible for the L2ARC to become extremely large, we can wind 140 * up eating a lot of memory in L2ARC buffer headers, so the size of a header 141 * is minimized by only allocating the fields necessary for an L1-cached buffer 142 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and 143 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple 144 * words in pointers. arc_hdr_realloc() is used to switch a header between 145 * these two allocation states. 146 */ 147 typedef struct l1arc_buf_hdr { 148 kmutex_t b_freeze_lock; 149 zio_cksum_t *b_freeze_cksum; 150 #ifdef ZFS_DEBUG 151 /* 152 * Used for debugging with kmem_flags - by allocating and freeing 153 * b_thawed when the buffer is thawed, we get a record of the stack 154 * trace that thawed it. 155 */ 156 void *b_thawed; 157 #endif 158 159 arc_buf_t *b_buf; 160 uint32_t b_bufcnt; 161 /* for waiting on writes to complete */ 162 kcondvar_t b_cv; 163 uint8_t b_byteswap; 164 165 /* protected by arc state mutex */ 166 arc_state_t *b_state; 167 multilist_node_t b_arc_node; 168 169 /* updated atomically */ 170 clock_t b_arc_access; 171 172 /* self protecting */ 173 zfs_refcount_t b_refcnt; 174 175 arc_callback_t *b_acb; 176 abd_t *b_pabd; 177 } l1arc_buf_hdr_t; 178 179 typedef enum l2arc_dev_hdr_flags_t { 180 L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0) /* mirror of l2ad_first */ 181 } l2arc_dev_hdr_flags_t; 182 183 /* 184 * Pointer used in persistent L2ARC (for pointing to log blocks). 185 */ 186 typedef struct l2arc_log_blkptr { 187 /* 188 * Offset of log block within the device, in bytes 189 */ 190 uint64_t lbp_daddr; 191 /* 192 * Aligned payload size (in bytes) of the log block 193 */ 194 uint64_t lbp_payload_asize; 195 /* 196 * Offset in bytes of the first buffer in the payload 197 */ 198 uint64_t lbp_payload_start; 199 /* 200 * lbp_prop has the following format: 201 * * logical size (in bytes) 202 * * aligned (after compression) size (in bytes) 203 * * compression algorithm (we always LZ4-compress l2arc logs) 204 * * checksum algorithm (used for lbp_cksum) 205 */ 206 uint64_t lbp_prop; 207 zio_cksum_t lbp_cksum; /* checksum of log */ 208 } l2arc_log_blkptr_t; 209 210 /* 211 * The persistent L2ARC device header. 212 * Byte order of magic determines whether 64-bit bswap of fields is necessary. 213 */ 214 typedef struct l2arc_dev_hdr_phys { 215 uint64_t dh_magic; /* L2ARC_DEV_HDR_MAGIC */ 216 uint64_t dh_version; /* Persistent L2ARC version */ 217 218 /* 219 * Global L2ARC device state and metadata. 220 */ 221 uint64_t dh_spa_guid; 222 uint64_t dh_vdev_guid; 223 uint64_t dh_log_entries; /* mirror of l2ad_log_entries */ 224 uint64_t dh_evict; /* evicted offset in bytes */ 225 uint64_t dh_flags; /* l2arc_dev_hdr_flags_t */ 226 /* 227 * Used in zdb.c for determining if a log block is valid, in the same 228 * way that l2arc_rebuild() does. 229 */ 230 uint64_t dh_start; /* mirror of l2ad_start */ 231 uint64_t dh_end; /* mirror of l2ad_end */ 232 /* 233 * Start of log block chain. [0] -> newest log, [1] -> one older (used 234 * for initiating prefetch). 235 */ 236 l2arc_log_blkptr_t dh_start_lbps[2]; 237 /* 238 * Aligned size of all log blocks as accounted by vdev_space_update(). 239 */ 240 uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */ 241 uint64_t dh_lb_count; /* mirror of l2ad_lb_count */ 242 const uint64_t dh_pad[32]; /* pad to 512 bytes */ 243 zio_eck_t dh_tail; 244 } l2arc_dev_hdr_phys_t; 245 CTASSERT(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE); 246 247 /* 248 * A single ARC buffer header entry in a l2arc_log_blk_phys_t. 249 */ 250 typedef struct l2arc_log_ent_phys { 251 dva_t le_dva; /* dva of buffer */ 252 uint64_t le_birth; /* birth txg of buffer */ 253 /* 254 * le_prop has the following format: 255 * * logical size (in bytes) 256 * * physical (compressed) size (in bytes) 257 * * compression algorithm 258 * * object type (used to restore arc_buf_contents_t) 259 * * protected status (used for encryption) 260 * * prefetch status (used in l2arc_read_done()) 261 */ 262 uint64_t le_prop; 263 uint64_t le_daddr; /* buf location on l2dev */ 264 /* 265 * We pad the size of each entry to a power of 2 so that the size of 266 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT, 267 * because of the L2ARC_SET_*SIZE macros. 268 */ 269 const uint64_t le_pad[3]; /* pad to 64 bytes */ 270 } l2arc_log_ent_phys_t; 271 272 #define L2ARC_LOG_BLK_MAX_ENTRIES (1022) 273 274 /* 275 * A log block of up to 1022 ARC buffer log entries, chained into the 276 * persistent L2ARC metadata linked list. Byte order of magic determines 277 * whether 64-bit bswap of fields is necessary. 278 */ 279 typedef struct l2arc_log_blk_phys { 280 uint64_t lb_magic; /* L2ARC_LOG_BLK_MAGIC */ 281 /* 282 * There are 2 chains (headed by dh_start_lbps[2]), and this field 283 * points back to the previous block in this chain. We alternate 284 * which chain we append to, so they are time-wise and offset-wise 285 * interleaved, but that is an optimization rather than for 286 * correctness. 287 */ 288 l2arc_log_blkptr_t lb_prev_lbp; /* pointer to prev log block */ 289 /* 290 * Pad header section to 128 bytes 291 */ 292 uint64_t lb_pad[7]; 293 /* Payload */ 294 l2arc_log_ent_phys_t lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES]; 295 } l2arc_log_blk_phys_t; /* 64K total */ 296 /* 297 * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with 298 * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros. 299 */ 300 CTASSERT(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t), 301 1ULL << SPA_MINBLOCKSHIFT)); 302 CTASSERT(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE); 303 CTASSERT(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE); 304 305 /* 306 * These structures hold in-flight abd buffers for log blocks as they're being 307 * written to the L2ARC device. 308 */ 309 typedef struct l2arc_lb_abd_buf { 310 abd_t *abd; 311 list_node_t node; 312 } l2arc_lb_abd_buf_t; 313 314 /* 315 * These structures hold pointers to log blocks present on the L2ARC device. 316 */ 317 typedef struct l2arc_lb_ptr_buf { 318 l2arc_log_blkptr_t *lb_ptr; 319 list_node_t node; 320 } l2arc_lb_ptr_buf_t; 321 322 /* Macros for setting fields in le_prop and lbp_prop */ 323 #define L2BLK_GET_LSIZE(field) \ 324 BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1) 325 #define L2BLK_SET_LSIZE(field, x) \ 326 BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) 327 #define L2BLK_GET_PSIZE(field) \ 328 BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1) 329 #define L2BLK_SET_PSIZE(field, x) \ 330 BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) 331 #define L2BLK_GET_COMPRESS(field) \ 332 BF64_GET((field), 32, SPA_COMPRESSBITS) 333 #define L2BLK_SET_COMPRESS(field, x) \ 334 BF64_SET((field), 32, SPA_COMPRESSBITS, x) 335 #define L2BLK_GET_PREFETCH(field) BF64_GET((field), 39, 1) 336 #define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x) 337 #define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8) 338 #define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x) 339 #define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8) 340 #define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x) 341 #define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1) 342 #define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x) 343 #define L2BLK_GET_STATE(field) BF64_GET((field), 57, 4) 344 #define L2BLK_SET_STATE(field, x) BF64_SET((field), 57, 4, x) 345 346 #define PTR_SWAP(x, y) \ 347 do { \ 348 void *tmp = (x);\ 349 x = y; \ 350 y = tmp; \ 351 _NOTE(CONSTCOND)\ 352 } while (0) 353 354 #define L2ARC_DEV_HDR_MAGIC 0x5a46534341434845LLU /* ASCII: "ZFSCACHE" */ 355 #define L2ARC_LOG_BLK_MAGIC 0x4c4f47424c4b4844LLU /* ASCII: "LOGBLKHD" */ 356 357 /* 358 * L2ARC Internals 359 */ 360 typedef struct l2arc_dev { 361 vdev_t *l2ad_vdev; /* vdev */ 362 spa_t *l2ad_spa; /* spa */ 363 uint64_t l2ad_hand; /* next write location */ 364 uint64_t l2ad_start; /* first addr on device */ 365 uint64_t l2ad_end; /* last addr on device */ 366 boolean_t l2ad_first; /* first sweep through */ 367 boolean_t l2ad_writing; /* currently writing */ 368 kmutex_t l2ad_mtx; /* lock for buffer list */ 369 list_t l2ad_buflist; /* buffer list */ 370 list_node_t l2ad_node; /* device list node */ 371 zfs_refcount_t l2ad_alloc; /* allocated bytes */ 372 /* 373 * Persistence-related stuff 374 */ 375 l2arc_dev_hdr_phys_t *l2ad_dev_hdr; /* persistent device header */ 376 uint64_t l2ad_dev_hdr_asize; /* aligned hdr size */ 377 l2arc_log_blk_phys_t l2ad_log_blk; /* currently open log block */ 378 int l2ad_log_ent_idx; /* index into cur log blk */ 379 /* Number of bytes in current log block's payload */ 380 uint64_t l2ad_log_blk_payload_asize; 381 /* 382 * Offset (in bytes) of the first buffer in current log block's 383 * payload. 384 */ 385 uint64_t l2ad_log_blk_payload_start; 386 /* Flag indicating whether a rebuild is scheduled or is going on */ 387 boolean_t l2ad_rebuild; 388 boolean_t l2ad_rebuild_cancel; 389 boolean_t l2ad_rebuild_began; 390 uint64_t l2ad_log_entries; /* entries per log blk */ 391 uint64_t l2ad_evict; /* evicted offset in bytes */ 392 /* List of pointers to log blocks present in the L2ARC device */ 393 list_t l2ad_lbptr_list; 394 /* 395 * Aligned size of all log blocks as accounted by vdev_space_update(). 396 */ 397 zfs_refcount_t l2ad_lb_asize; 398 /* 399 * Number of log blocks present on the device. 400 */ 401 zfs_refcount_t l2ad_lb_count; 402 } l2arc_dev_t; 403 404 /* 405 * Encrypted blocks will need to be stored encrypted on the L2ARC 406 * disk as they appear in the main pool. In order for this to work we 407 * need to pass around the encryption parameters so they can be used 408 * to write data to the L2ARC. This struct is only defined in the 409 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED 410 * flag set. 411 */ 412 typedef struct arc_buf_hdr_crypt { 413 abd_t *b_rabd; /* raw encrypted data */ 414 dmu_object_type_t b_ot; /* object type */ 415 uint32_t b_ebufcnt; /* number or encryped buffers */ 416 417 /* dsobj for looking up encryption key for l2arc encryption */ 418 uint64_t b_dsobj; /* for looking up key */ 419 420 /* encryption parameters */ 421 uint8_t b_salt[ZIO_DATA_SALT_LEN]; 422 uint8_t b_iv[ZIO_DATA_IV_LEN]; 423 424 /* 425 * Technically this could be removed since we will always be able to 426 * get the mac from the bp when we need it. However, it is inconvenient 427 * for callers of arc code to have to pass a bp in all the time. This 428 * also allows us to assert that L2ARC data is properly encrypted to 429 * match the data in the main storage pool. 430 */ 431 uint8_t b_mac[ZIO_DATA_MAC_LEN]; 432 } arc_buf_hdr_crypt_t; 433 434 typedef struct l2arc_buf_hdr { 435 /* protected by arc_buf_hdr mutex */ 436 l2arc_dev_t *b_dev; /* L2ARC device */ 437 uint64_t b_daddr; /* disk address, offset byte */ 438 439 arc_state_type_t b_arcs_state; 440 list_node_t b_l2node; 441 } l2arc_buf_hdr_t; 442 443 typedef struct l2arc_write_callback { 444 l2arc_dev_t *l2wcb_dev; /* device info */ 445 arc_buf_hdr_t *l2wcb_head; /* head of write buflist */ 446 /* in-flight list of log blocks */ 447 list_t l2wcb_abd_list; 448 } l2arc_write_callback_t; 449 450 struct arc_buf_hdr { 451 /* protected by hash lock */ 452 dva_t b_dva; 453 uint64_t b_birth; 454 455 arc_buf_contents_t b_type; 456 arc_buf_hdr_t *b_hash_next; 457 arc_flags_t b_flags; 458 459 /* 460 * This field stores the size of the data buffer after 461 * compression, and is set in the arc's zio completion handlers. 462 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes). 463 * 464 * While the block pointers can store up to 32MB in their psize 465 * field, we can only store up to 32MB minus 512B. This is due 466 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e. 467 * a field of zeros represents 512B in the bp). We can't use a 468 * bias of 1 since we need to reserve a psize of zero, here, to 469 * represent holes and embedded blocks. 470 * 471 * This isn't a problem in practice, since the maximum size of a 472 * buffer is limited to 16MB, so we never need to store 32MB in 473 * this field. 474 */ 475 uint16_t b_psize; 476 477 /* 478 * This field stores the size of the data buffer before 479 * compression, and cannot change once set. It is in units 480 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes) 481 */ 482 uint16_t b_lsize; /* immutable */ 483 uint64_t b_spa; /* immutable */ 484 485 /* L2ARC fields. Undefined when not in L2ARC. */ 486 l2arc_buf_hdr_t b_l2hdr; 487 /* L1ARC fields. Undefined when in l2arc_only state */ 488 l1arc_buf_hdr_t b_l1hdr; 489 /* 490 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED 491 * is set and the L1 header exists. 492 */ 493 arc_buf_hdr_crypt_t b_crypt_hdr; 494 }; 495 496 typedef struct arc_stats { 497 kstat_named_t arcstat_hits; 498 kstat_named_t arcstat_misses; 499 kstat_named_t arcstat_demand_data_hits; 500 kstat_named_t arcstat_demand_data_misses; 501 kstat_named_t arcstat_demand_metadata_hits; 502 kstat_named_t arcstat_demand_metadata_misses; 503 kstat_named_t arcstat_prefetch_data_hits; 504 kstat_named_t arcstat_prefetch_data_misses; 505 kstat_named_t arcstat_prefetch_metadata_hits; 506 kstat_named_t arcstat_prefetch_metadata_misses; 507 kstat_named_t arcstat_mru_hits; 508 kstat_named_t arcstat_mru_ghost_hits; 509 kstat_named_t arcstat_mfu_hits; 510 kstat_named_t arcstat_mfu_ghost_hits; 511 kstat_named_t arcstat_deleted; 512 /* 513 * Number of buffers that could not be evicted because the hash lock 514 * was held by another thread. The lock may not necessarily be held 515 * by something using the same buffer, since hash locks are shared 516 * by multiple buffers. 517 */ 518 kstat_named_t arcstat_mutex_miss; 519 /* 520 * Number of buffers skipped when updating the access state due to the 521 * header having already been released after acquiring the hash lock. 522 */ 523 kstat_named_t arcstat_access_skip; 524 /* 525 * Number of buffers skipped because they have I/O in progress, are 526 * indirect prefetch buffers that have not lived long enough, or are 527 * not from the spa we're trying to evict from. 528 */ 529 kstat_named_t arcstat_evict_skip; 530 /* 531 * Number of times arc_evict_state() was unable to evict enough 532 * buffers to reach its target amount. 533 */ 534 kstat_named_t arcstat_evict_not_enough; 535 kstat_named_t arcstat_evict_l2_cached; 536 kstat_named_t arcstat_evict_l2_eligible; 537 kstat_named_t arcstat_evict_l2_eligible_mfu; 538 kstat_named_t arcstat_evict_l2_eligible_mru; 539 kstat_named_t arcstat_evict_l2_ineligible; 540 kstat_named_t arcstat_evict_l2_skip; 541 kstat_named_t arcstat_hash_elements; 542 kstat_named_t arcstat_hash_elements_max; 543 kstat_named_t arcstat_hash_collisions; 544 kstat_named_t arcstat_hash_chains; 545 kstat_named_t arcstat_hash_chain_max; 546 kstat_named_t arcstat_p; 547 kstat_named_t arcstat_c; 548 kstat_named_t arcstat_c_min; 549 kstat_named_t arcstat_c_max; 550 /* Not updated directly; only synced in arc_kstat_update. */ 551 kstat_named_t arcstat_size; 552 /* 553 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. 554 * Note that the compressed bytes may match the uncompressed bytes 555 * if the block is either not compressed or compressed arc is disabled. 556 */ 557 kstat_named_t arcstat_compressed_size; 558 /* 559 * Uncompressed size of the data stored in b_pabd. If compressed 560 * arc is disabled then this value will be identical to the stat 561 * above. 562 */ 563 kstat_named_t arcstat_uncompressed_size; 564 /* 565 * Number of bytes stored in all the arc_buf_t's. This is classified 566 * as "overhead" since this data is typically short-lived and will 567 * be evicted from the arc when it becomes unreferenced unless the 568 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level 569 * values have been set (see comment in dbuf.c for more information). 570 */ 571 kstat_named_t arcstat_overhead_size; 572 /* 573 * Number of bytes consumed by internal ARC structures necessary 574 * for tracking purposes; these structures are not actually 575 * backed by ARC buffers. This includes arc_buf_hdr_t structures 576 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only 577 * caches), and arc_buf_t structures (allocated via arc_buf_t 578 * cache). 579 * Not updated directly; only synced in arc_kstat_update. 580 */ 581 kstat_named_t arcstat_hdr_size; 582 /* 583 * Number of bytes consumed by ARC buffers of type equal to 584 * ARC_BUFC_DATA. This is generally consumed by buffers backing 585 * on disk user data (e.g. plain file contents). 586 * Not updated directly; only synced in arc_kstat_update. 587 */ 588 kstat_named_t arcstat_data_size; 589 /* 590 * Number of bytes consumed by ARC buffers of type equal to 591 * ARC_BUFC_METADATA. This is generally consumed by buffers 592 * backing on disk data that is used for internal ZFS 593 * structures (e.g. ZAP, dnode, indirect blocks, etc). 594 * Not updated directly; only synced in arc_kstat_update. 595 */ 596 kstat_named_t arcstat_metadata_size; 597 /* 598 * Number of bytes consumed by various buffers and structures 599 * not actually backed with ARC buffers. This includes bonus 600 * buffers (allocated directly via zio_buf_* functions), 601 * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t 602 * cache), and dnode_t structures (allocated via dnode_t cache). 603 * Not updated directly; only synced in arc_kstat_update. 604 */ 605 kstat_named_t arcstat_other_size; 606 /* 607 * Total number of bytes consumed by ARC buffers residing in the 608 * arc_anon state. This includes *all* buffers in the arc_anon 609 * state; e.g. data, metadata, evictable, and unevictable buffers 610 * are all included in this value. 611 * Not updated directly; only synced in arc_kstat_update. 612 */ 613 kstat_named_t arcstat_anon_size; 614 /* 615 * Number of bytes consumed by ARC buffers that meet the 616 * following criteria: backing buffers of type ARC_BUFC_DATA, 617 * residing in the arc_anon state, and are eligible for eviction 618 * (e.g. have no outstanding holds on the buffer). 619 * Not updated directly; only synced in arc_kstat_update. 620 */ 621 kstat_named_t arcstat_anon_evictable_data; 622 /* 623 * Number of bytes consumed by ARC buffers that meet the 624 * following criteria: backing buffers of type ARC_BUFC_METADATA, 625 * residing in the arc_anon state, and are eligible for eviction 626 * (e.g. have no outstanding holds on the buffer). 627 * Not updated directly; only synced in arc_kstat_update. 628 */ 629 kstat_named_t arcstat_anon_evictable_metadata; 630 /* 631 * Total number of bytes consumed by ARC buffers residing in the 632 * arc_mru state. This includes *all* buffers in the arc_mru 633 * state; e.g. data, metadata, evictable, and unevictable buffers 634 * are all included in this value. 635 * Not updated directly; only synced in arc_kstat_update. 636 */ 637 kstat_named_t arcstat_mru_size; 638 /* 639 * Number of bytes consumed by ARC buffers that meet the 640 * following criteria: backing buffers of type ARC_BUFC_DATA, 641 * residing in the arc_mru state, and are eligible for eviction 642 * (e.g. have no outstanding holds on the buffer). 643 * Not updated directly; only synced in arc_kstat_update. 644 */ 645 kstat_named_t arcstat_mru_evictable_data; 646 /* 647 * Number of bytes consumed by ARC buffers that meet the 648 * following criteria: backing buffers of type ARC_BUFC_METADATA, 649 * residing in the arc_mru state, and are eligible for eviction 650 * (e.g. have no outstanding holds on the buffer). 651 * Not updated directly; only synced in arc_kstat_update. 652 */ 653 kstat_named_t arcstat_mru_evictable_metadata; 654 /* 655 * Total number of bytes that *would have been* consumed by ARC 656 * buffers in the arc_mru_ghost state. The key thing to note 657 * here, is the fact that this size doesn't actually indicate 658 * RAM consumption. The ghost lists only consist of headers and 659 * don't actually have ARC buffers linked off of these headers. 660 * Thus, *if* the headers had associated ARC buffers, these 661 * buffers *would have* consumed this number of bytes. 662 * Not updated directly; only synced in arc_kstat_update. 663 */ 664 kstat_named_t arcstat_mru_ghost_size; 665 /* 666 * Number of bytes that *would have been* consumed by ARC 667 * buffers that are eligible for eviction, of type 668 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state. 669 * Not updated directly; only synced in arc_kstat_update. 670 */ 671 kstat_named_t arcstat_mru_ghost_evictable_data; 672 /* 673 * Number of bytes that *would have been* consumed by ARC 674 * buffers that are eligible for eviction, of type 675 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. 676 * Not updated directly; only synced in arc_kstat_update. 677 */ 678 kstat_named_t arcstat_mru_ghost_evictable_metadata; 679 /* 680 * Total number of bytes consumed by ARC buffers residing in the 681 * arc_mfu state. This includes *all* buffers in the arc_mfu 682 * state; e.g. data, metadata, evictable, and unevictable buffers 683 * are all included in this value. 684 * Not updated directly; only synced in arc_kstat_update. 685 */ 686 kstat_named_t arcstat_mfu_size; 687 /* 688 * Number of bytes consumed by ARC buffers that are eligible for 689 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu 690 * state. 691 * Not updated directly; only synced in arc_kstat_update. 692 */ 693 kstat_named_t arcstat_mfu_evictable_data; 694 /* 695 * Number of bytes consumed by ARC buffers that are eligible for 696 * eviction, of type ARC_BUFC_METADATA, and reside in the 697 * arc_mfu state. 698 * Not updated directly; only synced in arc_kstat_update. 699 */ 700 kstat_named_t arcstat_mfu_evictable_metadata; 701 /* 702 * Total number of bytes that *would have been* consumed by ARC 703 * buffers in the arc_mfu_ghost state. See the comment above 704 * arcstat_mru_ghost_size for more details. 705 * Not updated directly; only synced in arc_kstat_update. 706 */ 707 kstat_named_t arcstat_mfu_ghost_size; 708 /* 709 * Number of bytes that *would have been* consumed by ARC 710 * buffers that are eligible for eviction, of type 711 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state. 712 * Not updated directly; only synced in arc_kstat_update. 713 */ 714 kstat_named_t arcstat_mfu_ghost_evictable_data; 715 /* 716 * Number of bytes that *would have been* consumed by ARC 717 * buffers that are eligible for eviction, of type 718 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. 719 * Not updated directly; only synced in arc_kstat_update. 720 */ 721 kstat_named_t arcstat_mfu_ghost_evictable_metadata; 722 kstat_named_t arcstat_l2_hits; 723 kstat_named_t arcstat_l2_misses; 724 /* 725 * Allocated size (in bytes) of L2ARC cached buffers by ARC state. 726 */ 727 kstat_named_t arcstat_l2_prefetch_asize; 728 kstat_named_t arcstat_l2_mru_asize; 729 kstat_named_t arcstat_l2_mfu_asize; 730 /* 731 * Allocated size (in bytes) of L2ARC cached buffers by buffer content 732 * type. 733 */ 734 kstat_named_t arcstat_l2_bufc_data_asize; 735 kstat_named_t arcstat_l2_bufc_metadata_asize; 736 kstat_named_t arcstat_l2_feeds; 737 kstat_named_t arcstat_l2_rw_clash; 738 kstat_named_t arcstat_l2_read_bytes; 739 kstat_named_t arcstat_l2_write_bytes; 740 kstat_named_t arcstat_l2_writes_sent; 741 kstat_named_t arcstat_l2_writes_done; 742 kstat_named_t arcstat_l2_writes_error; 743 kstat_named_t arcstat_l2_writes_lock_retry; 744 kstat_named_t arcstat_l2_evict_lock_retry; 745 kstat_named_t arcstat_l2_evict_reading; 746 kstat_named_t arcstat_l2_evict_l1cached; 747 kstat_named_t arcstat_l2_free_on_write; 748 kstat_named_t arcstat_l2_abort_lowmem; 749 kstat_named_t arcstat_l2_cksum_bad; 750 kstat_named_t arcstat_l2_io_error; 751 kstat_named_t arcstat_l2_lsize; 752 kstat_named_t arcstat_l2_psize; 753 /* Not updated directly; only synced in arc_kstat_update. */ 754 kstat_named_t arcstat_l2_hdr_size; 755 /* 756 * Number of L2ARC log blocks written. These are used for restoring the 757 * L2ARC. Updated during writing of L2ARC log blocks. 758 */ 759 kstat_named_t arcstat_l2_log_blk_writes; 760 /* 761 * Moving average of the aligned size of the L2ARC log blocks, in 762 * bytes. Updated during L2ARC rebuild and during writing of L2ARC 763 * log blocks. 764 */ 765 kstat_named_t arcstat_l2_log_blk_avg_asize; 766 /* Aligned size of L2ARC log blocks on L2ARC devices. */ 767 kstat_named_t arcstat_l2_log_blk_asize; 768 /* Number of L2ARC log blocks present on L2ARC devices. */ 769 kstat_named_t arcstat_l2_log_blk_count; 770 /* 771 * Moving average of the aligned size of L2ARC restored data, in bytes, 772 * to the aligned size of their metadata in L2ARC, in bytes. 773 * Updated during L2ARC rebuild and during writing of L2ARC log blocks. 774 */ 775 kstat_named_t arcstat_l2_data_to_meta_ratio; 776 /* 777 * Number of times the L2ARC rebuild was successful for an L2ARC device. 778 */ 779 kstat_named_t arcstat_l2_rebuild_success; 780 /* 781 * Number of times the L2ARC rebuild failed because the device header 782 * was in an unsupported format or corrupted. 783 */ 784 kstat_named_t arcstat_l2_rebuild_abort_unsupported; 785 /* 786 * Number of times the L2ARC rebuild failed because of IO errors 787 * while reading a log block. 788 */ 789 kstat_named_t arcstat_l2_rebuild_abort_io_errors; 790 /* 791 * Number of times the L2ARC rebuild failed because of IO errors when 792 * reading the device header. 793 */ 794 kstat_named_t arcstat_l2_rebuild_abort_dh_errors; 795 /* 796 * Number of L2ARC log blocks which failed to be restored due to 797 * checksum errors. 798 */ 799 kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors; 800 /* 801 * Number of times the L2ARC rebuild was aborted due to low system 802 * memory. 803 */ 804 kstat_named_t arcstat_l2_rebuild_abort_lowmem; 805 /* Logical size of L2ARC restored data, in bytes. */ 806 kstat_named_t arcstat_l2_rebuild_size; 807 /* Aligned size of L2ARC restored data, in bytes. */ 808 kstat_named_t arcstat_l2_rebuild_asize; 809 /* 810 * Number of L2ARC log entries (buffers) that were successfully 811 * restored in ARC. 812 */ 813 kstat_named_t arcstat_l2_rebuild_bufs; 814 /* 815 * Number of L2ARC log entries (buffers) already cached in ARC. These 816 * were not restored again. 817 */ 818 kstat_named_t arcstat_l2_rebuild_bufs_precached; 819 /* 820 * Number of L2ARC log blocks that were restored successfully. Each 821 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers. 822 */ 823 kstat_named_t arcstat_l2_rebuild_log_blks; 824 kstat_named_t arcstat_memory_throttle_count; 825 /* Not updated directly; only synced in arc_kstat_update. */ 826 kstat_named_t arcstat_meta_used; 827 kstat_named_t arcstat_meta_limit; 828 kstat_named_t arcstat_meta_max; 829 kstat_named_t arcstat_meta_min; 830 kstat_named_t arcstat_async_upgrade_sync; 831 kstat_named_t arcstat_demand_hit_predictive_prefetch; 832 kstat_named_t arcstat_demand_hit_prescient_prefetch; 833 } arc_stats_t; 834 835 #define ARCSTAT(stat) (arc_stats.stat.value.ui64) 836 837 #define ARCSTAT_INCR(stat, val) \ 838 atomic_add_64(&arc_stats.stat.value.ui64, (val)) 839 840 #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) 841 #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) 842 843 /* 844 * There are several ARC variables that are critical to export as kstats -- 845 * but we don't want to have to grovel around in the kstat whenever we wish to 846 * manipulate them. For these variables, we therefore define them to be in 847 * terms of the statistic variable. This assures that we are not introducing 848 * the possibility of inconsistency by having shadow copies of the variables, 849 * while still allowing the code to be readable. 850 */ 851 #define arc_p ARCSTAT(arcstat_p) /* target size of MRU */ 852 #define arc_c ARCSTAT(arcstat_c) /* target size of cache */ 853 #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */ 854 #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */ 855 #define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */ 856 #define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ 857 #define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */ 858 859 /* compressed size of entire arc */ 860 #define arc_compressed_size ARCSTAT(arcstat_compressed_size) 861 /* uncompressed size of entire arc */ 862 #define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size) 863 /* number of bytes in the arc from arc_buf_t's */ 864 #define arc_overhead_size ARCSTAT(arcstat_overhead_size) 865 866 extern arc_stats_t arc_stats; 867 868 /* used in zdb.c */ 869 boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev, 870 const l2arc_log_blkptr_t *lbp); 871 872 #ifdef __cplusplus 873 } 874 #endif 875 876 #endif /* _SYS_ARC_IMPL_H */ 877