1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 6 #ifndef INDEXER_H 7 #define INDEXER_H 8 9 #include <linux/mutex.h> 10 #include <linux/sched.h> 11 #include <linux/types.h> 12 #include <linux/wait.h> 13 14 #include "funnel-queue.h" 15 16 /* 17 * UDS public API 18 * 19 * The Universal Deduplication System (UDS) is an efficient name-value store. When used for 20 * deduplicating storage, the names are generally hashes of data blocks and the associated data is 21 * where that block is located on the underlying storage medium. The stored names are expected to 22 * be randomly distributed among the space of possible names. If this assumption is violated, the 23 * UDS index will store fewer names than normal but will otherwise continue to work. The data 24 * associated with each name can be any 16-byte value. 25 * 26 * A client must first create an index session to interact with an index. Once created, the session 27 * can be shared among multiple threads or users. When a session is destroyed, it will also close 28 * and save any associated index. 29 * 30 * To make a request, a client must allocate a uds_request structure and set the required fields 31 * before launching it. UDS will invoke the provided callback to complete the request. After the 32 * callback has been called, the uds_request structure can be freed or reused for a new request. 33 * There are five types of requests: 34 * 35 * A UDS_UPDATE request will associate the provided name with the provided data. Any previous data 36 * associated with that name will be discarded. 37 * 38 * A UDS_QUERY request will return the data associated with the provided name, if any. The entry 39 * for the name will also be marked as most recent, as if the data had been updated. 40 * 41 * A UDS_POST request is a combination of UDS_QUERY and UDS_UPDATE. If there is already data 42 * associated with the provided name, that data is returned. If there is no existing association, 43 * the name is associated with the newly provided data. This request is equivalent to a UDS_QUERY 44 * request followed by a UDS_UPDATE request if no data is found, but it is much more efficient. 45 * 46 * A UDS_QUERY_NO_UPDATE request will return the data associated with the provided name, but will 47 * not change the recency of the entry for the name. This request is primarily useful for testing, 48 * to determine whether an entry exists without changing the internal state of the index. 49 * 50 * A UDS_DELETE request removes any data associated with the provided name. This operation is 51 * generally not necessary, because the index will automatically discard its oldest entries once it 52 * becomes full. 53 */ 54 55 /* General UDS constants and structures */ 56 57 enum uds_request_type { 58 /* Create or update the mapping for a name, and make the name most recent. */ 59 UDS_UPDATE, 60 61 /* Return any mapped data for a name, and make the name most recent. */ 62 UDS_QUERY, 63 64 /* 65 * Return any mapped data for a name, or map the provided data to the name if there is no 66 * current data, and make the name most recent. 67 */ 68 UDS_POST, 69 70 /* Return any mapped data for a name without updating its recency. */ 71 UDS_QUERY_NO_UPDATE, 72 73 /* Remove any mapping for a name. */ 74 UDS_DELETE, 75 76 }; 77 78 enum uds_open_index_type { 79 /* Create a new index. */ 80 UDS_CREATE, 81 82 /* Load an existing index and try to recover if necessary. */ 83 UDS_LOAD, 84 85 /* Load an existing index, but only if it was saved cleanly. */ 86 UDS_NO_REBUILD, 87 }; 88 89 enum { 90 /* The record name size in bytes */ 91 UDS_RECORD_NAME_SIZE = 16, 92 /* The maximum record data size in bytes */ 93 UDS_RECORD_DATA_SIZE = 16, 94 }; 95 96 /* 97 * A type representing a UDS memory configuration which is either a positive integer number of 98 * gigabytes or one of the six special constants for configurations smaller than one gigabyte. 99 */ 100 typedef int uds_memory_config_size_t; 101 102 enum { 103 /* The maximum configurable amount of memory */ 104 UDS_MEMORY_CONFIG_MAX = 1024, 105 /* Flag indicating that the index has one less chapter than usual */ 106 UDS_MEMORY_CONFIG_REDUCED = 0x1000, 107 UDS_MEMORY_CONFIG_REDUCED_MAX = 1024 + UDS_MEMORY_CONFIG_REDUCED, 108 /* Special values indicating sizes less than 1 GB */ 109 UDS_MEMORY_CONFIG_256MB = -256, 110 UDS_MEMORY_CONFIG_512MB = -512, 111 UDS_MEMORY_CONFIG_768MB = -768, 112 UDS_MEMORY_CONFIG_REDUCED_256MB = -1280, 113 UDS_MEMORY_CONFIG_REDUCED_512MB = -1536, 114 UDS_MEMORY_CONFIG_REDUCED_768MB = -1792, 115 }; 116 117 struct uds_record_name { 118 unsigned char name[UDS_RECORD_NAME_SIZE]; 119 }; 120 121 struct uds_record_data { 122 unsigned char data[UDS_RECORD_DATA_SIZE]; 123 }; 124 125 struct uds_volume_record { 126 struct uds_record_name name; 127 struct uds_record_data data; 128 }; 129 130 struct uds_parameters { 131 /* The block_device used for storage */ 132 struct block_device *bdev; 133 /* The maximum allowable size of the index on storage */ 134 size_t size; 135 /* The offset where the index should start */ 136 off_t offset; 137 /* The maximum memory allocation, in GB */ 138 uds_memory_config_size_t memory_size; 139 /* Whether the index should include sparse chapters */ 140 bool sparse; 141 /* A 64-bit nonce to validate the index */ 142 u64 nonce; 143 /* The number of threads used to process index requests */ 144 unsigned int zone_count; 145 /* The number of threads used to read volume pages */ 146 unsigned int read_threads; 147 }; 148 149 /* 150 * These statistics capture characteristics of the current index, including resource usage and 151 * requests processed since the index was opened. 152 */ 153 struct uds_index_stats { 154 /* The total number of records stored in the index */ 155 u64 entries_indexed; 156 /* An estimate of the index's memory usage, in bytes */ 157 u64 memory_used; 158 /* The number of collisions recorded in the volume index */ 159 u64 collisions; 160 /* The number of entries discarded from the index since startup */ 161 u64 entries_discarded; 162 /* The time at which these statistics were fetched */ 163 s64 current_time; 164 /* The number of post calls that found an existing entry */ 165 u64 posts_found; 166 /* The number of post calls that added an entry */ 167 u64 posts_not_found; 168 /* 169 * The number of post calls that found an existing entry that is current enough to only 170 * exist in memory and not have been committed to disk yet 171 */ 172 u64 in_memory_posts_found; 173 /* 174 * The number of post calls that found an existing entry in the dense portion of the index 175 */ 176 u64 dense_posts_found; 177 /* 178 * The number of post calls that found an existing entry in the sparse portion of the index 179 */ 180 u64 sparse_posts_found; 181 /* The number of update calls that updated an existing entry */ 182 u64 updates_found; 183 /* The number of update calls that added a new entry */ 184 u64 updates_not_found; 185 /* The number of delete requests that deleted an existing entry */ 186 u64 deletions_found; 187 /* The number of delete requests that did nothing */ 188 u64 deletions_not_found; 189 /* The number of query calls that found existing entry */ 190 u64 queries_found; 191 /* The number of query calls that did not find an entry */ 192 u64 queries_not_found; 193 /* The total number of requests processed */ 194 u64 requests; 195 }; 196 197 enum uds_index_region { 198 /* No location information has been determined */ 199 UDS_LOCATION_UNKNOWN = 0, 200 /* The index page entry has been found */ 201 UDS_LOCATION_INDEX_PAGE_LOOKUP, 202 /* The record page entry has been found */ 203 UDS_LOCATION_RECORD_PAGE_LOOKUP, 204 /* The record is not in the index */ 205 UDS_LOCATION_UNAVAILABLE, 206 /* The record was found in the open chapter */ 207 UDS_LOCATION_IN_OPEN_CHAPTER, 208 /* The record was found in the dense part of the index */ 209 UDS_LOCATION_IN_DENSE, 210 /* The record was found in the sparse part of the index */ 211 UDS_LOCATION_IN_SPARSE, 212 } __packed; 213 214 /* Zone message requests are used to communicate between index zones. */ 215 enum uds_zone_message_type { 216 /* A standard request with no message */ 217 UDS_MESSAGE_NONE = 0, 218 /* Add a chapter to the sparse chapter index cache */ 219 UDS_MESSAGE_SPARSE_CACHE_BARRIER, 220 /* Close a chapter to keep the zone from falling behind */ 221 UDS_MESSAGE_ANNOUNCE_CHAPTER_CLOSED, 222 } __packed; 223 224 struct uds_zone_message { 225 /* The type of message, determining how it will be processed */ 226 enum uds_zone_message_type type; 227 /* The virtual chapter number to which the message applies */ 228 u64 virtual_chapter; 229 }; 230 231 struct uds_index_session; 232 struct uds_index; 233 struct uds_request; 234 235 /* Once this callback has been invoked, the uds_request structure can be reused or freed. */ 236 typedef void (*uds_request_callback_fn)(struct uds_request *request); 237 238 struct uds_request { 239 /* These input fields must be set before launching a request. */ 240 241 /* The name of the record to look up or create */ 242 struct uds_record_name record_name; 243 /* New data to associate with the record name, if applicable */ 244 struct uds_record_data new_metadata; 245 /* A callback to invoke when the request is complete */ 246 uds_request_callback_fn callback; 247 /* The index session that will manage this request */ 248 struct uds_index_session *session; 249 /* The type of operation to perform, as describe above */ 250 enum uds_request_type type; 251 252 /* These output fields are set when a request is complete. */ 253 254 /* The existing data associated with the request name, if any */ 255 struct uds_record_data old_metadata; 256 /* Either UDS_SUCCESS or an error code for the request */ 257 int status; 258 /* True if the record name had an existing entry in the index */ 259 bool found; 260 261 /* 262 * The remaining fields are used internally and should not be altered by clients. The index 263 * relies on zone_number being the first field in this section. 264 */ 265 266 /* The number of the zone which will process this request*/ 267 unsigned int zone_number; 268 /* A link for adding a request to a lock-free queue */ 269 struct funnel_queue_entry queue_link; 270 /* A link for adding a request to a standard linked list */ 271 struct uds_request *next_request; 272 /* A pointer to the index processing this request */ 273 struct uds_index *index; 274 /* Control message for coordinating between zones */ 275 struct uds_zone_message zone_message; 276 /* If true, process request immediately by waking the worker thread */ 277 bool unbatched; 278 /* If true, continue this request before processing newer requests */ 279 bool requeued; 280 /* The virtual chapter containing the record name, if known */ 281 u64 virtual_chapter; 282 /* The region of the index containing the record name */ 283 enum uds_index_region location; 284 }; 285 286 /* Compute the number of bytes needed to store an index. */ 287 int __must_check uds_compute_index_size(const struct uds_parameters *parameters, 288 u64 *index_size); 289 290 /* A session is required for most index operations. */ 291 int __must_check uds_create_index_session(struct uds_index_session **session); 292 293 /* Destroying an index session also closes and saves the associated index. */ 294 int uds_destroy_index_session(struct uds_index_session *session); 295 296 /* 297 * Create or open an index with an existing session. This operation fails if the index session is 298 * suspended, or if there is already an open index. 299 */ 300 int __must_check uds_open_index(enum uds_open_index_type open_type, 301 const struct uds_parameters *parameters, 302 struct uds_index_session *session); 303 304 /* 305 * Wait until all callbacks for index operations are complete, and prevent new index operations 306 * from starting. New index operations will fail with EBUSY until the session is resumed. Also 307 * optionally saves the index. 308 */ 309 int __must_check uds_suspend_index_session(struct uds_index_session *session, bool save); 310 311 /* 312 * Allow new index operations for an index, whether it was suspended or not. If the index is 313 * suspended and the supplied block device differs from the current backing store, the index will 314 * start using the new backing store instead. 315 */ 316 int __must_check uds_resume_index_session(struct uds_index_session *session, 317 struct block_device *bdev); 318 319 /* Wait until all outstanding index operations are complete. */ 320 int __must_check uds_flush_index_session(struct uds_index_session *session); 321 322 /* Close an index. This operation fails if the index session is suspended. */ 323 int __must_check uds_close_index(struct uds_index_session *session); 324 325 /* Get index statistics since the last time the index was opened. */ 326 int __must_check uds_get_index_session_stats(struct uds_index_session *session, 327 struct uds_index_stats *stats); 328 329 /* This function will fail if any required field of the request is not set. */ 330 int __must_check uds_launch_request(struct uds_request *request); 331 332 struct cond_var { 333 wait_queue_head_t wait_queue; 334 }; 335 336 static inline void uds_init_cond(struct cond_var *cv) 337 { 338 init_waitqueue_head(&cv->wait_queue); 339 } 340 341 static inline void uds_signal_cond(struct cond_var *cv) 342 { 343 wake_up(&cv->wait_queue); 344 } 345 346 static inline void uds_broadcast_cond(struct cond_var *cv) 347 { 348 wake_up_all(&cv->wait_queue); 349 } 350 351 void uds_wait_cond(struct cond_var *cv, struct mutex *mutex); 352 353 #endif /* INDEXER_H */ 354