xref: /linux/drivers/md/dm-vdo/indexer/indexer.h (revision 5014bebee0cffda14fafae5a2534d08120b7b9e8)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright 2023 Red Hat
4  */
5 
6 #ifndef INDEXER_H
7 #define INDEXER_H
8 
9 #include <linux/mutex.h>
10 #include <linux/sched.h>
11 #include <linux/stddef.h>
12 #include <linux/types.h>
13 #include <linux/wait.h>
14 
15 #include "funnel-queue.h"
16 
17 /*
18  * UDS public API
19  *
20  * The Universal Deduplication System (UDS) is an efficient name-value store. When used for
21  * deduplicating storage, the names are generally hashes of data blocks and the associated data is
22  * where that block is located on the underlying storage medium. The stored names are expected to
23  * be randomly distributed among the space of possible names. If this assumption is violated, the
24  * UDS index will store fewer names than normal but will otherwise continue to work. The data
25  * associated with each name can be any 16-byte value.
26  *
27  * A client must first create an index session to interact with an index. Once created, the session
28  * can be shared among multiple threads or users. When a session is destroyed, it will also close
29  * and save any associated index.
30  *
31  * To make a request, a client must allocate a uds_request structure and set the required fields
32  * before launching it. UDS will invoke the provided callback to complete the request. After the
33  * callback has been called, the uds_request structure can be freed or reused for a new request.
34  * There are five types of requests:
35  *
36  * A UDS_UPDATE request will associate the provided name with the provided data. Any previous data
37  * associated with that name will be discarded.
38  *
39  * A UDS_QUERY request will return the data associated with the provided name, if any. The entry
40  * for the name will also be marked as most recent, as if the data had been updated.
41  *
42  * A UDS_POST request is a combination of UDS_QUERY and UDS_UPDATE. If there is already data
43  * associated with the provided name, that data is returned. If there is no existing association,
44  * the name is associated with the newly provided data. This request is equivalent to a UDS_QUERY
45  * request followed by a UDS_UPDATE request if no data is found, but it is much more efficient.
46  *
47  * A UDS_QUERY_NO_UPDATE request will return the data associated with the provided name, but will
48  * not change the recency of the entry for the name. This request is primarily useful for testing,
49  * to determine whether an entry exists without changing the internal state of the index.
50  *
51  * A UDS_DELETE request removes any data associated with the provided name. This operation is
52  * generally not necessary, because the index will automatically discard its oldest entries once it
53  * becomes full.
54  */
55 
56 /* General UDS constants and structures */
57 
58 enum uds_request_type {
59 	/* Create or update the mapping for a name, and make the name most recent. */
60 	UDS_UPDATE,
61 
62 	/* Return any mapped data for a name, and make the name most recent. */
63 	UDS_QUERY,
64 
65 	/*
66 	 * Return any mapped data for a name, or map the provided data to the name if there is no
67 	 * current data, and make the name most recent.
68 	 */
69 	UDS_POST,
70 
71 	/* Return any mapped data for a name without updating its recency. */
72 	UDS_QUERY_NO_UPDATE,
73 
74 	/* Remove any mapping for a name. */
75 	UDS_DELETE,
76 
77 } __packed;
78 
79 enum uds_open_index_type {
80 	/* Create a new index. */
81 	UDS_CREATE,
82 
83 	/* Load an existing index and try to recover if necessary. */
84 	UDS_LOAD,
85 
86 	/* Load an existing index, but only if it was saved cleanly. */
87 	UDS_NO_REBUILD,
88 };
89 
90 enum {
91 	/* The record name size in bytes */
92 	UDS_RECORD_NAME_SIZE = 16,
93 	/* The maximum record data size in bytes */
94 	UDS_RECORD_DATA_SIZE = 16,
95 };
96 
97 /*
98  * A type representing a UDS memory configuration which is either a positive integer number of
99  * gigabytes or one of the six special constants for configurations smaller than one gigabyte.
100  */
101 typedef int uds_memory_config_size_t;
102 
103 enum {
104 	/* The maximum configurable amount of memory */
105 	UDS_MEMORY_CONFIG_MAX = 1024,
106 	/* Flag indicating that the index has one less chapter than usual */
107 	UDS_MEMORY_CONFIG_REDUCED = 0x1000,
108 	UDS_MEMORY_CONFIG_REDUCED_MAX = 1024 + UDS_MEMORY_CONFIG_REDUCED,
109 	/* Special values indicating sizes less than 1 GB */
110 	UDS_MEMORY_CONFIG_256MB = -256,
111 	UDS_MEMORY_CONFIG_512MB = -512,
112 	UDS_MEMORY_CONFIG_768MB = -768,
113 	UDS_MEMORY_CONFIG_REDUCED_256MB = -1280,
114 	UDS_MEMORY_CONFIG_REDUCED_512MB = -1536,
115 	UDS_MEMORY_CONFIG_REDUCED_768MB = -1792,
116 };
117 
118 struct uds_record_name {
119 	unsigned char name[UDS_RECORD_NAME_SIZE];
120 };
121 
122 struct uds_record_data {
123 	unsigned char data[UDS_RECORD_DATA_SIZE];
124 };
125 
126 struct uds_volume_record {
127 	struct uds_record_name name;
128 	struct uds_record_data data;
129 };
130 
131 struct uds_parameters {
132 	/* The block_device used for storage */
133 	struct block_device *bdev;
134 	/* The maximum allowable size of the index on storage */
135 	size_t size;
136 	/* The offset where the index should start */
137 	off_t offset;
138 	/* The maximum memory allocation, in GB */
139 	uds_memory_config_size_t memory_size;
140 	/* Whether the index should include sparse chapters */
141 	bool sparse;
142 	/* A 64-bit nonce to validate the index */
143 	u64 nonce;
144 	/* The number of threads used to process index requests */
145 	unsigned int zone_count;
146 	/* The number of threads used to read volume pages */
147 	unsigned int read_threads;
148 };
149 
150 /*
151  * These statistics capture characteristics of the current index, including resource usage and
152  * requests processed since the index was opened.
153  */
154 struct uds_index_stats {
155 	/* The total number of records stored in the index */
156 	u64 entries_indexed;
157 	/* An estimate of the index's memory usage, in bytes */
158 	u64 memory_used;
159 	/* The number of collisions recorded in the volume index */
160 	u64 collisions;
161 	/* The number of entries discarded from the index since startup */
162 	u64 entries_discarded;
163 	/* The time at which these statistics were fetched */
164 	s64 current_time;
165 	/* The number of post calls that found an existing entry */
166 	u64 posts_found;
167 	/* The number of post calls that added an entry */
168 	u64 posts_not_found;
169 	/*
170 	 * The number of post calls that found an existing entry that is current enough to only
171 	 * exist in memory and not have been committed to disk yet
172 	 */
173 	u64 in_memory_posts_found;
174 	/*
175 	 * The number of post calls that found an existing entry in the dense portion of the index
176 	 */
177 	u64 dense_posts_found;
178 	/*
179 	 * The number of post calls that found an existing entry in the sparse portion of the index
180 	 */
181 	u64 sparse_posts_found;
182 	/* The number of update calls that updated an existing entry */
183 	u64 updates_found;
184 	/* The number of update calls that added a new entry */
185 	u64 updates_not_found;
186 	/* The number of delete requests that deleted an existing entry */
187 	u64 deletions_found;
188 	/* The number of delete requests that did nothing */
189 	u64 deletions_not_found;
190 	/* The number of query calls that found existing entry */
191 	u64 queries_found;
192 	/* The number of query calls that did not find an entry */
193 	u64 queries_not_found;
194 	/* The total number of requests processed */
195 	u64 requests;
196 };
197 
198 enum uds_index_region {
199 	/* No location information has been determined */
200 	UDS_LOCATION_UNKNOWN = 0,
201 	/* The index page entry has been found */
202 	UDS_LOCATION_INDEX_PAGE_LOOKUP,
203 	/* The record page entry has been found */
204 	UDS_LOCATION_RECORD_PAGE_LOOKUP,
205 	/* The record is not in the index */
206 	UDS_LOCATION_UNAVAILABLE,
207 	/* The record was found in the open chapter */
208 	UDS_LOCATION_IN_OPEN_CHAPTER,
209 	/* The record was found in the dense part of the index */
210 	UDS_LOCATION_IN_DENSE,
211 	/* The record was found in the sparse part of the index */
212 	UDS_LOCATION_IN_SPARSE,
213 } __packed;
214 
215 /* Zone message requests are used to communicate between index zones. */
216 enum uds_zone_message_type {
217 	/* A standard request with no message */
218 	UDS_MESSAGE_NONE = 0,
219 	/* Add a chapter to the sparse chapter index cache */
220 	UDS_MESSAGE_SPARSE_CACHE_BARRIER,
221 	/* Close a chapter to keep the zone from falling behind */
222 	UDS_MESSAGE_ANNOUNCE_CHAPTER_CLOSED,
223 } __packed;
224 
225 struct uds_zone_message {
226 	/* The type of message, determining how it will be processed */
227 	enum uds_zone_message_type type;
228 	/* The virtual chapter number to which the message applies */
229 	u64 virtual_chapter;
230 } __packed;
231 
232 struct uds_index_session;
233 struct uds_index;
234 struct uds_request;
235 
236 /* Once this callback has been invoked, the uds_request structure can be reused or freed. */
237 typedef void (*uds_request_callback_fn)(struct uds_request *request);
238 
239 struct uds_request {
240 	/* These input fields must be set before launching a request. */
241 
242 	/* The name of the record to look up or create */
243 	struct uds_record_name record_name;
244 	/* New data to associate with the record name, if applicable */
245 	struct uds_record_data new_metadata;
246 	/* A callback to invoke when the request is complete */
247 	uds_request_callback_fn callback;
248 	/* The index session that will manage this request */
249 	struct uds_index_session *session;
250 	/* The type of operation to perform, as describe above */
251 	enum uds_request_type type;
252 
253 	/* These output fields are set when a request is complete. */
254 
255 	/* The existing data associated with the request name, if any */
256 	struct uds_record_data old_metadata;
257 	/* True if the record name had an existing entry in the index */
258 	bool found;
259 	/* Either UDS_SUCCESS or an error code for the request */
260 	int status;
261 
262 	/* The remaining fields are used internally and should not be altered by clients. */
263 	struct_group(internal,
264 		     /* The virtual chapter containing the record name, if known */
265 		     u64 virtual_chapter;
266 		     /* The region of the index containing the record name */
267 		     enum uds_index_region location;
268 		     /* If true, process request immediately by waking the worker thread */
269 		     bool unbatched;
270 		     /* If true, continue this request before processing newer requests */
271 		     bool requeued;
272 		     /* Control message for coordinating between zones */
273 		     struct uds_zone_message zone_message;
274 		     /* The number of the zone which will process this request*/
275 		     unsigned int zone_number;
276 		     /* A link for adding a request to a lock-free queue */
277 		     struct funnel_queue_entry queue_link;
278 		     /* A link for adding a request to a standard linked list */
279 		     struct uds_request *next_request;
280 		     /* A pointer to the index processing this request */
281 		     struct uds_index *index;
282 		     );
283 };
284 
285 /* A session is required for most index operations. */
286 int __must_check uds_create_index_session(struct uds_index_session **session);
287 
288 /* Destroying an index session also closes and saves the associated index. */
289 int uds_destroy_index_session(struct uds_index_session *session);
290 
291 /*
292  * Create or open an index with an existing session. This operation fails if the index session is
293  * suspended, or if there is already an open index.
294  */
295 int __must_check uds_open_index(enum uds_open_index_type open_type,
296 				const struct uds_parameters *parameters,
297 				struct uds_index_session *session);
298 
299 /*
300  * Wait until all callbacks for index operations are complete, and prevent new index operations
301  * from starting. New index operations will fail with EBUSY until the session is resumed. Also
302  * optionally saves the index.
303  */
304 int __must_check uds_suspend_index_session(struct uds_index_session *session, bool save);
305 
306 /*
307  * Allow new index operations for an index, whether it was suspended or not. If the index is
308  * suspended and the supplied block device differs from the current backing store, the index will
309  * start using the new backing store instead.
310  */
311 int __must_check uds_resume_index_session(struct uds_index_session *session,
312 					  struct block_device *bdev);
313 
314 /* Wait until all outstanding index operations are complete. */
315 int __must_check uds_flush_index_session(struct uds_index_session *session);
316 
317 /* Close an index. This operation fails if the index session is suspended. */
318 int __must_check uds_close_index(struct uds_index_session *session);
319 
320 /* Get index statistics since the last time the index was opened. */
321 int __must_check uds_get_index_session_stats(struct uds_index_session *session,
322 					     struct uds_index_stats *stats);
323 
324 /* This function will fail if any required field of the request is not set. */
325 int __must_check uds_launch_request(struct uds_request *request);
326 
327 struct cond_var {
328 	wait_queue_head_t wait_queue;
329 };
330 
uds_init_cond(struct cond_var * cv)331 static inline void uds_init_cond(struct cond_var *cv)
332 {
333 	init_waitqueue_head(&cv->wait_queue);
334 }
335 
uds_signal_cond(struct cond_var * cv)336 static inline void uds_signal_cond(struct cond_var *cv)
337 {
338 	wake_up(&cv->wait_queue);
339 }
340 
uds_broadcast_cond(struct cond_var * cv)341 static inline void uds_broadcast_cond(struct cond_var *cv)
342 {
343 	wake_up_all(&cv->wait_queue);
344 }
345 
346 void uds_wait_cond(struct cond_var *cv, struct mutex *mutex);
347 
348 #endif /* INDEXER_H */
349