1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 25 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 26 * Copyright (c) 2019, Allan Jude 27 * Copyright (c) 2019, Klara Inc. 28 */ 29 30 #ifndef _SYS_ARC_H 31 #define _SYS_ARC_H 32 33 #include <sys/zfs_context.h> 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 #include <sys/zio.h> 40 #include <sys/dmu.h> 41 #include <sys/spa.h> 42 #include <sys/zfs_refcount.h> 43 44 /* 45 * Used by arc_flush() to inform arc_evict_state() that it should evict 46 * all available buffers from the arc state being passed in. 47 */ 48 #define ARC_EVICT_ALL UINT64_MAX 49 50 /* 51 * ZFS gets very unhappy when the maximum ARC size is smaller than the maximum 52 * block size and a larger block is written. To leave some safety margin, we 53 * limit the minimum for zfs_arc_max to the maximium transaction size. 54 */ 55 #define MIN_ARC_MAX DMU_MAX_ACCESS 56 57 #define HDR_SET_LSIZE(hdr, x) do { \ 58 ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \ 59 (hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \ 60 } while (0) 61 62 #define HDR_SET_PSIZE(hdr, x) do { \ 63 ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \ 64 (hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \ 65 } while (0) 66 67 /* The l2size in the header is only used by L2 cache */ 68 #define HDR_SET_L2SIZE(hdr, x) do { \ 69 ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \ 70 (hdr)->b_l2size = ((x) >> SPA_MINBLOCKSHIFT); \ 71 } while (0) 72 73 #define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT) 74 #define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT) 75 #define HDR_GET_L2SIZE(hdr) ((hdr)->b_l2size << SPA_MINBLOCKSHIFT) 76 77 typedef struct arc_buf_hdr arc_buf_hdr_t; 78 typedef struct arc_buf arc_buf_t; 79 typedef struct arc_prune arc_prune_t; 80 81 /* 82 * Because the ARC can store encrypted data, errors (not due to bugs) may arise 83 * while transforming data into its desired format - specifically, when 84 * decrypting, the key may not be present, or the HMAC may not be correct 85 * which signifies deliberate tampering with the on-disk state 86 * (assuming that the checksum was correct). If any error occurs, the "buf" 87 * parameter will be NULL. 88 */ 89 typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb, 90 const blkptr_t *bp, arc_buf_t *buf, void *priv); 91 typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); 92 typedef void arc_prune_func_t(uint64_t bytes, void *priv); 93 94 /* Shared module parameters */ 95 extern uint_t zfs_arc_average_blocksize; 96 extern int l2arc_exclude_special; 97 98 /* generic arc_done_func_t's which you can use */ 99 arc_read_done_func_t arc_bcopy_func; 100 arc_read_done_func_t arc_getbuf_func; 101 102 /* generic arc_prune_func_t wrapper for callbacks */ 103 struct arc_prune { 104 arc_prune_func_t *p_pfunc; 105 void *p_private; 106 uint64_t p_adjust; 107 list_node_t p_node; 108 zfs_refcount_t p_refcnt; 109 }; 110 111 typedef enum arc_strategy { 112 ARC_STRATEGY_META_ONLY = 0, /* Evict only meta data buffers */ 113 ARC_STRATEGY_META_BALANCED = 1, /* Evict data buffers if needed */ 114 } arc_strategy_t; 115 116 typedef enum arc_flags 117 { 118 /* 119 * Public flags that can be passed into the ARC by external consumers. 120 */ 121 ARC_FLAG_WAIT = 1 << 0, /* perform sync I/O */ 122 ARC_FLAG_NOWAIT = 1 << 1, /* perform async I/O */ 123 ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */ 124 ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */ 125 ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */ 126 ARC_FLAG_UNCACHED = 1 << 5, /* evict after use */ 127 ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */ 128 129 /* 130 * Private ARC flags. These flags are private ARC only flags that 131 * will show up in b_flags in the arc_buf_hdr_t. These flags should 132 * only be set by ARC code. 133 */ 134 ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */ 135 ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */ 136 ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */ 137 ARC_FLAG_INDIRECT = 1 << 10, /* indirect block */ 138 /* Indicates that block was read with ASYNC priority. */ 139 ARC_FLAG_PRIO_ASYNC_READ = 1 << 11, 140 ARC_FLAG_L2_WRITING = 1 << 12, /* write in progress */ 141 ARC_FLAG_L2_EVICTED = 1 << 13, /* evicted during I/O */ 142 ARC_FLAG_L2_WRITE_HEAD = 1 << 14, /* head of write list */ 143 /* 144 * Encrypted or authenticated on disk (may be plaintext in memory). 145 * This header has b_crypt_hdr allocated. Does not include indirect 146 * blocks with checksums of MACs which will also have their X 147 * (encrypted) bit set in the bp. 148 */ 149 ARC_FLAG_PROTECTED = 1 << 15, 150 /* data has not been authenticated yet */ 151 ARC_FLAG_NOAUTH = 1 << 16, 152 /* indicates that the buffer contains metadata (otherwise, data) */ 153 ARC_FLAG_BUFC_METADATA = 1 << 17, 154 155 /* Flags specifying whether optional hdr struct fields are defined */ 156 ARC_FLAG_HAS_L1HDR = 1 << 18, 157 ARC_FLAG_HAS_L2HDR = 1 << 19, 158 159 /* 160 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. 161 * This allows the l2arc to use the blkptr's checksum to verify 162 * the data without having to store the checksum in the hdr. 163 */ 164 ARC_FLAG_COMPRESSED_ARC = 1 << 20, 165 ARC_FLAG_SHARED_DATA = 1 << 21, 166 167 /* 168 * Fail this arc_read() (with ENOENT) if the data is not already present 169 * in cache. 170 */ 171 ARC_FLAG_CACHED_ONLY = 1 << 22, 172 173 /* 174 * Don't instantiate an arc_buf_t for arc_read_done. 175 */ 176 ARC_FLAG_NO_BUF = 1 << 23, 177 178 /* 179 * The arc buffer's compression mode is stored in the top 7 bits of the 180 * flags field, so these dummy flags are included so that MDB can 181 * interpret the enum properly. 182 */ 183 ARC_FLAG_COMPRESS_0 = 1 << 24, 184 ARC_FLAG_COMPRESS_1 = 1 << 25, 185 ARC_FLAG_COMPRESS_2 = 1 << 26, 186 ARC_FLAG_COMPRESS_3 = 1 << 27, 187 ARC_FLAG_COMPRESS_4 = 1 << 28, 188 ARC_FLAG_COMPRESS_5 = 1 << 29, 189 ARC_FLAG_COMPRESS_6 = 1 << 30 190 } arc_flags_t; 191 192 typedef enum arc_buf_flags { 193 ARC_BUF_FLAG_SHARED = 1 << 0, 194 ARC_BUF_FLAG_COMPRESSED = 1 << 1, 195 /* 196 * indicates whether this arc_buf_t is encrypted, regardless of 197 * state on-disk 198 */ 199 ARC_BUF_FLAG_ENCRYPTED = 1 << 2 200 } arc_buf_flags_t; 201 202 struct arc_buf { 203 arc_buf_hdr_t *b_hdr; 204 arc_buf_t *b_next; 205 void *b_data; 206 arc_buf_flags_t b_flags; 207 }; 208 209 typedef enum arc_buf_contents { 210 ARC_BUFC_DATA, /* buffer contains data */ 211 ARC_BUFC_METADATA, /* buffer contains metadata */ 212 ARC_BUFC_NUMTYPES 213 } arc_buf_contents_t; 214 215 /* 216 * The following breakdowns of arc_size exist for kstat only. 217 */ 218 typedef enum arc_space_type { 219 ARC_SPACE_DATA, 220 ARC_SPACE_META, 221 ARC_SPACE_HDRS, 222 ARC_SPACE_L2HDRS, 223 ARC_SPACE_DBUF, 224 ARC_SPACE_DNODE, 225 ARC_SPACE_BONUS, 226 ARC_SPACE_ABD_CHUNK_WASTE, 227 ARC_SPACE_NUMTYPES 228 } arc_space_type_t; 229 230 typedef enum arc_state_type { 231 ARC_STATE_ANON, 232 ARC_STATE_MRU, 233 ARC_STATE_MRU_GHOST, 234 ARC_STATE_MFU, 235 ARC_STATE_MFU_GHOST, 236 ARC_STATE_L2C_ONLY, 237 ARC_STATE_UNCACHED, 238 ARC_STATE_NUMTYPES 239 } arc_state_type_t; 240 241 typedef struct arc_buf_info { 242 arc_state_type_t abi_state_type; 243 arc_buf_contents_t abi_state_contents; 244 uint32_t abi_flags; 245 uint32_t abi_bufcnt; 246 uint64_t abi_size; 247 uint64_t abi_spa; 248 uint64_t abi_access; 249 uint32_t abi_mru_hits; 250 uint32_t abi_mru_ghost_hits; 251 uint32_t abi_mfu_hits; 252 uint32_t abi_mfu_ghost_hits; 253 uint32_t abi_l2arc_hits; 254 uint32_t abi_holds; 255 uint64_t abi_l2arc_dattr; 256 uint64_t abi_l2arc_asize; 257 enum zio_compress abi_l2arc_compress; 258 } arc_buf_info_t; 259 260 /* 261 * Flags returned by arc_cached; describes which part of the arc 262 * the block is cached in. 263 */ 264 #define ARC_CACHED_EMBEDDED (1U << 0) 265 #define ARC_CACHED_IN_L1 (1U << 1) 266 #define ARC_CACHED_IN_MRU (1U << 2) 267 #define ARC_CACHED_IN_MFU (1U << 3) 268 #define ARC_CACHED_IN_L2 (1U << 4) 269 270 void arc_space_consume(uint64_t space, arc_space_type_t type); 271 void arc_space_return(uint64_t space, arc_space_type_t type); 272 boolean_t arc_is_metadata(arc_buf_t *buf); 273 boolean_t arc_is_encrypted(arc_buf_t *buf); 274 boolean_t arc_is_unauthenticated(arc_buf_t *buf); 275 enum zio_compress arc_get_compression(arc_buf_t *buf); 276 void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, 277 uint8_t *iv, uint8_t *mac); 278 int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, 279 boolean_t in_place); 280 void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, 281 dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, 282 const uint8_t *mac); 283 arc_buf_t *arc_alloc_buf(spa_t *spa, const void *tag, arc_buf_contents_t type, 284 int32_t size); 285 arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, const void *tag, 286 uint64_t psize, uint64_t lsize, enum zio_compress compression_type, 287 uint8_t complevel); 288 arc_buf_t *arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj, 289 boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, 290 const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize, 291 enum zio_compress compression_type, uint8_t complevel); 292 uint8_t arc_get_complevel(arc_buf_t *buf); 293 arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); 294 arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, 295 enum zio_compress compression_type, uint8_t complevel); 296 arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, 297 const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, 298 dmu_object_type_t ot, uint64_t psize, uint64_t lsize, 299 enum zio_compress compression_type, uint8_t complevel); 300 void arc_return_buf(arc_buf_t *buf, const void *tag); 301 void arc_loan_inuse_buf(arc_buf_t *buf, const void *tag); 302 void arc_buf_destroy(arc_buf_t *buf, const void *tag); 303 void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index); 304 uint64_t arc_buf_size(arc_buf_t *buf); 305 uint64_t arc_buf_lsize(arc_buf_t *buf); 306 void arc_buf_access(arc_buf_t *buf); 307 void arc_release(arc_buf_t *buf, const void *tag); 308 int arc_released(arc_buf_t *buf); 309 void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused); 310 void arc_buf_freeze(arc_buf_t *buf); 311 void arc_buf_thaw(arc_buf_t *buf); 312 #ifdef ZFS_DEBUG 313 int arc_referenced(arc_buf_t *buf); 314 #else 315 #define arc_referenced(buf) ((void) sizeof (buf), 0) 316 #endif 317 318 int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, 319 arc_read_done_func_t *done, void *priv, zio_priority_t priority, 320 int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb); 321 zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, 322 arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp, 323 arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, 324 arc_write_done_func_t *done, void *priv, zio_priority_t priority, 325 int zio_flags, const zbookmark_phys_t *zb); 326 327 arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv); 328 void arc_remove_prune_callback(arc_prune_t *p); 329 void arc_freed(spa_t *spa, const blkptr_t *bp); 330 int arc_cached(spa_t *spa, const blkptr_t *bp); 331 332 void arc_flush(spa_t *spa, boolean_t retry); 333 void arc_flush_async(spa_t *spa); 334 void arc_tempreserve_clear(uint64_t reserve); 335 int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg); 336 boolean_t arc_async_flush_guid_inuse(uint64_t load_guid); 337 338 uint64_t arc_all_memory(void); 339 uint64_t arc_default_max(uint64_t min, uint64_t allmem); 340 uint64_t arc_target_bytes(void); 341 void arc_set_limits(uint64_t); 342 void arc_init(void); 343 void arc_fini(void); 344 345 /* 346 * Level 2 ARC 347 */ 348 349 void l2arc_add_vdev(spa_t *spa, vdev_t *vd); 350 void l2arc_remove_vdev(vdev_t *vd); 351 boolean_t l2arc_vdev_present(vdev_t *vd); 352 void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen); 353 boolean_t l2arc_range_check_overlap(uint64_t bottom, uint64_t top, 354 uint64_t check); 355 void l2arc_init(void); 356 void l2arc_fini(void); 357 void l2arc_start(void); 358 void l2arc_stop(void); 359 void l2arc_spa_rebuild_start(spa_t *spa); 360 void l2arc_spa_rebuild_stop(spa_t *spa); 361 362 #ifndef _KERNEL 363 extern boolean_t arc_watch; 364 #endif 365 366 #ifdef __cplusplus 367 } 368 #endif 369 370 #endif /* _SYS_ARC_H */ 371