xref: /freebsd/sys/contrib/openzfs/include/sys/arc.h (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
25  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
26  * Copyright (c) 2019, Allan Jude
27  * Copyright (c) 2019, Klara Inc.
28  */
29 
30 #ifndef	_SYS_ARC_H
31 #define	_SYS_ARC_H
32 
33 #include <sys/zfs_context.h>
34 
35 #ifdef	__cplusplus
36 extern "C" {
37 #endif
38 
39 #include <sys/zio.h>
40 #include <sys/dmu.h>
41 #include <sys/spa.h>
42 #include <sys/zfs_refcount.h>
43 
44 /*
45  * Used by arc_flush() to inform arc_evict_state() that it should evict
46  * all available buffers from the arc state being passed in.
47  */
48 #define	ARC_EVICT_ALL	UINT64_MAX
49 
50 /*
51  * ZFS gets very unhappy when the maximum ARC size is smaller than the maximum
52  * block size and a larger block is written.  To leave some safety margin, we
53  * limit the minimum for zfs_arc_max to the maximium transaction size.
54  */
55 #define	MIN_ARC_MAX	DMU_MAX_ACCESS
56 
57 #define	HDR_SET_LSIZE(hdr, x) do { \
58 	ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
59 	(hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
60 } while (0)
61 
62 #define	HDR_SET_PSIZE(hdr, x) do { \
63 	ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
64 	(hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \
65 } while (0)
66 
67 /* The l2size in the header is only used by L2 cache */
68 #define	HDR_SET_L2SIZE(hdr, x) do { \
69 	ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
70 	(hdr)->b_l2size = ((x) >> SPA_MINBLOCKSHIFT); \
71 } while (0)
72 
73 #define	HDR_GET_LSIZE(hdr)	((hdr)->b_lsize << SPA_MINBLOCKSHIFT)
74 #define	HDR_GET_PSIZE(hdr)	((hdr)->b_psize << SPA_MINBLOCKSHIFT)
75 #define	HDR_GET_L2SIZE(hdr)	((hdr)->b_l2size << SPA_MINBLOCKSHIFT)
76 
77 typedef struct arc_buf_hdr arc_buf_hdr_t;
78 typedef struct arc_buf arc_buf_t;
79 typedef struct arc_prune arc_prune_t;
80 
81 /*
82  * Because the ARC can store encrypted data, errors (not due to bugs) may arise
83  * while transforming data into its desired format - specifically, when
84  * decrypting, the key may not be present, or the HMAC may not be correct
85  * which signifies deliberate tampering with the on-disk state
86  * (assuming that the checksum was correct). If any error occurs, the "buf"
87  * parameter will be NULL.
88  */
89 typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
90     const blkptr_t *bp, arc_buf_t *buf, void *priv);
91 typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
92 typedef void arc_prune_func_t(uint64_t bytes, void *priv);
93 
94 /* Shared module parameters */
95 extern uint_t zfs_arc_average_blocksize;
96 extern int l2arc_exclude_special;
97 
98 /* generic arc_done_func_t's which you can use */
99 arc_read_done_func_t arc_bcopy_func;
100 arc_read_done_func_t arc_getbuf_func;
101 
102 /* generic arc_prune_func_t wrapper for callbacks */
103 struct arc_prune {
104 	arc_prune_func_t	*p_pfunc;
105 	void			*p_private;
106 	uint64_t		p_adjust;
107 	list_node_t		p_node;
108 	zfs_refcount_t		p_refcnt;
109 };
110 
111 typedef enum arc_strategy {
112 	ARC_STRATEGY_META_ONLY		= 0, /* Evict only meta data buffers */
113 	ARC_STRATEGY_META_BALANCED	= 1, /* Evict data buffers if needed */
114 } arc_strategy_t;
115 
116 typedef enum arc_flags
117 {
118 	/*
119 	 * Public flags that can be passed into the ARC by external consumers.
120 	 */
121 	ARC_FLAG_WAIT			= 1 << 0,	/* perform sync I/O */
122 	ARC_FLAG_NOWAIT			= 1 << 1,	/* perform async I/O */
123 	ARC_FLAG_PREFETCH		= 1 << 2,	/* I/O is a prefetch */
124 	ARC_FLAG_CACHED			= 1 << 3,	/* I/O was in cache */
125 	ARC_FLAG_L2CACHE		= 1 << 4,	/* cache in L2ARC */
126 	ARC_FLAG_UNCACHED		= 1 << 5,	/* evict after use */
127 	ARC_FLAG_PRESCIENT_PREFETCH	= 1 << 6,	/* long min lifespan */
128 
129 	/*
130 	 * Private ARC flags.  These flags are private ARC only flags that
131 	 * will show up in b_flags in the arc_buf_hdr_t. These flags should
132 	 * only be set by ARC code.
133 	 */
134 	ARC_FLAG_IN_HASH_TABLE		= 1 << 7,	/* buffer is hashed */
135 	ARC_FLAG_IO_IN_PROGRESS		= 1 << 8,	/* I/O in progress */
136 	ARC_FLAG_IO_ERROR		= 1 << 9,	/* I/O failed for buf */
137 	ARC_FLAG_INDIRECT		= 1 << 10,	/* indirect block */
138 	/* Indicates that block was read with ASYNC priority. */
139 	ARC_FLAG_PRIO_ASYNC_READ	= 1 << 11,
140 	ARC_FLAG_L2_WRITING		= 1 << 12,	/* write in progress */
141 	ARC_FLAG_L2_EVICTED		= 1 << 13,	/* evicted during I/O */
142 	ARC_FLAG_L2_WRITE_HEAD		= 1 << 14,	/* head of write list */
143 	/*
144 	 * Encrypted or authenticated on disk (may be plaintext in memory).
145 	 * This header has b_crypt_hdr allocated. Does not include indirect
146 	 * blocks with checksums of MACs which will also have their X
147 	 * (encrypted) bit set in the bp.
148 	 */
149 	ARC_FLAG_PROTECTED		= 1 << 15,
150 	/* data has not been authenticated yet */
151 	ARC_FLAG_NOAUTH			= 1 << 16,
152 	/* indicates that the buffer contains metadata (otherwise, data) */
153 	ARC_FLAG_BUFC_METADATA		= 1 << 17,
154 
155 	/* Flags specifying whether optional hdr struct fields are defined */
156 	ARC_FLAG_HAS_L1HDR		= 1 << 18,
157 	ARC_FLAG_HAS_L2HDR		= 1 << 19,
158 
159 	/*
160 	 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
161 	 * This allows the l2arc to use the blkptr's checksum to verify
162 	 * the data without having to store the checksum in the hdr.
163 	 */
164 	ARC_FLAG_COMPRESSED_ARC		= 1 << 20,
165 	ARC_FLAG_SHARED_DATA		= 1 << 21,
166 
167 	/*
168 	 * Fail this arc_read() (with ENOENT) if the data is not already present
169 	 * in cache.
170 	 */
171 	ARC_FLAG_CACHED_ONLY		= 1 << 22,
172 
173 	/*
174 	 * Don't instantiate an arc_buf_t for arc_read_done.
175 	 */
176 	ARC_FLAG_NO_BUF			= 1 << 23,
177 
178 	/*
179 	 * The arc buffer's compression mode is stored in the top 7 bits of the
180 	 * flags field, so these dummy flags are included so that MDB can
181 	 * interpret the enum properly.
182 	 */
183 	ARC_FLAG_COMPRESS_0		= 1 << 24,
184 	ARC_FLAG_COMPRESS_1		= 1 << 25,
185 	ARC_FLAG_COMPRESS_2		= 1 << 26,
186 	ARC_FLAG_COMPRESS_3		= 1 << 27,
187 	ARC_FLAG_COMPRESS_4		= 1 << 28,
188 	ARC_FLAG_COMPRESS_5		= 1 << 29,
189 	ARC_FLAG_COMPRESS_6		= 1 << 30
190 } arc_flags_t;
191 
192 typedef enum arc_buf_flags {
193 	ARC_BUF_FLAG_SHARED		= 1 << 0,
194 	ARC_BUF_FLAG_COMPRESSED		= 1 << 1,
195 	/*
196 	 * indicates whether this arc_buf_t is encrypted, regardless of
197 	 * state on-disk
198 	 */
199 	ARC_BUF_FLAG_ENCRYPTED		= 1 << 2
200 } arc_buf_flags_t;
201 
202 struct arc_buf {
203 	arc_buf_hdr_t		*b_hdr;
204 	arc_buf_t		*b_next;
205 	void			*b_data;
206 	arc_buf_flags_t		b_flags;
207 };
208 
209 typedef enum arc_buf_contents {
210 	ARC_BUFC_DATA,				/* buffer contains data */
211 	ARC_BUFC_METADATA,			/* buffer contains metadata */
212 	ARC_BUFC_NUMTYPES
213 } arc_buf_contents_t;
214 
215 /*
216  * The following breakdowns of arc_size exist for kstat only.
217  */
218 typedef enum arc_space_type {
219 	ARC_SPACE_DATA,
220 	ARC_SPACE_META,
221 	ARC_SPACE_HDRS,
222 	ARC_SPACE_L2HDRS,
223 	ARC_SPACE_DBUF,
224 	ARC_SPACE_DNODE,
225 	ARC_SPACE_BONUS,
226 	ARC_SPACE_ABD_CHUNK_WASTE,
227 	ARC_SPACE_NUMTYPES
228 } arc_space_type_t;
229 
230 typedef enum arc_state_type {
231 	ARC_STATE_ANON,
232 	ARC_STATE_MRU,
233 	ARC_STATE_MRU_GHOST,
234 	ARC_STATE_MFU,
235 	ARC_STATE_MFU_GHOST,
236 	ARC_STATE_L2C_ONLY,
237 	ARC_STATE_UNCACHED,
238 	ARC_STATE_NUMTYPES
239 } arc_state_type_t;
240 
241 typedef struct arc_buf_info {
242 	arc_state_type_t	abi_state_type;
243 	arc_buf_contents_t	abi_state_contents;
244 	uint32_t		abi_flags;
245 	uint32_t		abi_bufcnt;
246 	uint64_t		abi_size;
247 	uint64_t		abi_spa;
248 	uint64_t		abi_access;
249 	uint32_t		abi_mru_hits;
250 	uint32_t		abi_mru_ghost_hits;
251 	uint32_t		abi_mfu_hits;
252 	uint32_t		abi_mfu_ghost_hits;
253 	uint32_t		abi_l2arc_hits;
254 	uint32_t		abi_holds;
255 	uint64_t		abi_l2arc_dattr;
256 	uint64_t		abi_l2arc_asize;
257 	enum zio_compress	abi_l2arc_compress;
258 } arc_buf_info_t;
259 
260 /*
261  * Flags returned by arc_cached; describes which part of the arc
262  * the block is cached in.
263  */
264 #define	ARC_CACHED_EMBEDDED	(1U << 0)
265 #define	ARC_CACHED_IN_L1	(1U << 1)
266 #define	ARC_CACHED_IN_MRU	(1U << 2)
267 #define	ARC_CACHED_IN_MFU	(1U << 3)
268 #define	ARC_CACHED_IN_L2	(1U << 4)
269 
270 void arc_space_consume(uint64_t space, arc_space_type_t type);
271 void arc_space_return(uint64_t space, arc_space_type_t type);
272 boolean_t arc_is_metadata(arc_buf_t *buf);
273 boolean_t arc_is_encrypted(arc_buf_t *buf);
274 boolean_t arc_is_unauthenticated(arc_buf_t *buf);
275 enum zio_compress arc_get_compression(arc_buf_t *buf);
276 void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
277     uint8_t *iv, uint8_t *mac);
278 int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
279     boolean_t in_place);
280 void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
281     dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
282     const uint8_t *mac);
283 arc_buf_t *arc_alloc_buf(spa_t *spa, const void *tag, arc_buf_contents_t type,
284     int32_t size);
285 arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, const void *tag,
286     uint64_t psize, uint64_t lsize, enum zio_compress compression_type,
287     uint8_t complevel);
288 arc_buf_t *arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj,
289     boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
290     const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
291     enum zio_compress compression_type, uint8_t complevel);
292 uint8_t arc_get_complevel(arc_buf_t *buf);
293 arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
294 arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
295     enum zio_compress compression_type, uint8_t complevel);
296 arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
297     const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
298     dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
299     enum zio_compress compression_type, uint8_t complevel);
300 void arc_return_buf(arc_buf_t *buf, const void *tag);
301 void arc_loan_inuse_buf(arc_buf_t *buf, const void *tag);
302 void arc_buf_destroy(arc_buf_t *buf, const void *tag);
303 void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
304 uint64_t arc_buf_size(arc_buf_t *buf);
305 uint64_t arc_buf_lsize(arc_buf_t *buf);
306 void arc_buf_access(arc_buf_t *buf);
307 void arc_release(arc_buf_t *buf, const void *tag);
308 int arc_released(arc_buf_t *buf);
309 void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
310 void arc_buf_freeze(arc_buf_t *buf);
311 void arc_buf_thaw(arc_buf_t *buf);
312 #ifdef ZFS_DEBUG
313 int arc_referenced(arc_buf_t *buf);
314 #else
315 #define	arc_referenced(buf) ((void) sizeof (buf), 0)
316 #endif
317 
318 int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
319     arc_read_done_func_t *done, void *priv, zio_priority_t priority,
320     int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
321 zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
322     arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp,
323     arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
324     arc_write_done_func_t *done, void *priv, zio_priority_t priority,
325     int zio_flags, const zbookmark_phys_t *zb);
326 
327 arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv);
328 void arc_remove_prune_callback(arc_prune_t *p);
329 void arc_freed(spa_t *spa, const blkptr_t *bp);
330 int arc_cached(spa_t *spa, const blkptr_t *bp);
331 
332 void arc_flush(spa_t *spa, boolean_t retry);
333 void arc_flush_async(spa_t *spa);
334 void arc_tempreserve_clear(uint64_t reserve);
335 int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
336 boolean_t arc_async_flush_guid_inuse(uint64_t load_guid);
337 
338 uint64_t arc_all_memory(void);
339 uint64_t arc_default_max(uint64_t min, uint64_t allmem);
340 uint64_t arc_target_bytes(void);
341 void arc_set_limits(uint64_t);
342 void arc_init(void);
343 void arc_fini(void);
344 
345 /*
346  * Level 2 ARC
347  */
348 
349 void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
350 void l2arc_remove_vdev(vdev_t *vd);
351 boolean_t l2arc_vdev_present(vdev_t *vd);
352 void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
353 boolean_t l2arc_range_check_overlap(uint64_t bottom, uint64_t top,
354     uint64_t check);
355 void l2arc_init(void);
356 void l2arc_fini(void);
357 void l2arc_start(void);
358 void l2arc_stop(void);
359 void l2arc_spa_rebuild_start(spa_t *spa);
360 void l2arc_spa_rebuild_stop(spa_t *spa);
361 
362 #ifndef _KERNEL
363 extern boolean_t arc_watch;
364 #endif
365 
366 #ifdef	__cplusplus
367 }
368 #endif
369 
370 #endif /* _SYS_ARC_H */
371