1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 6 #ifndef INDEXER_H 7 #define INDEXER_H 8 9 #include <linux/mutex.h> 10 #include <linux/sched.h> 11 #include <linux/stddef.h> 12 #include <linux/types.h> 13 #include <linux/wait.h> 14 15 #include "funnel-queue.h" 16 17 /* 18 * UDS public API 19 * 20 * The Universal Deduplication System (UDS) is an efficient name-value store. When used for 21 * deduplicating storage, the names are generally hashes of data blocks and the associated data is 22 * where that block is located on the underlying storage medium. The stored names are expected to 23 * be randomly distributed among the space of possible names. If this assumption is violated, the 24 * UDS index will store fewer names than normal but will otherwise continue to work. The data 25 * associated with each name can be any 16-byte value. 26 * 27 * A client must first create an index session to interact with an index. Once created, the session 28 * can be shared among multiple threads or users. When a session is destroyed, it will also close 29 * and save any associated index. 30 * 31 * To make a request, a client must allocate a uds_request structure and set the required fields 32 * before launching it. UDS will invoke the provided callback to complete the request. After the 33 * callback has been called, the uds_request structure can be freed or reused for a new request. 34 * There are five types of requests: 35 * 36 * A UDS_UPDATE request will associate the provided name with the provided data. Any previous data 37 * associated with that name will be discarded. 38 * 39 * A UDS_QUERY request will return the data associated with the provided name, if any. The entry 40 * for the name will also be marked as most recent, as if the data had been updated. 41 * 42 * A UDS_POST request is a combination of UDS_QUERY and UDS_UPDATE. If there is already data 43 * associated with the provided name, that data is returned. If there is no existing association, 44 * the name is associated with the newly provided data. This request is equivalent to a UDS_QUERY 45 * request followed by a UDS_UPDATE request if no data is found, but it is much more efficient. 46 * 47 * A UDS_QUERY_NO_UPDATE request will return the data associated with the provided name, but will 48 * not change the recency of the entry for the name. This request is primarily useful for testing, 49 * to determine whether an entry exists without changing the internal state of the index. 50 * 51 * A UDS_DELETE request removes any data associated with the provided name. This operation is 52 * generally not necessary, because the index will automatically discard its oldest entries once it 53 * becomes full. 54 */ 55 56 /* General UDS constants and structures */ 57 58 enum uds_request_type { 59 /* Create or update the mapping for a name, and make the name most recent. */ 60 UDS_UPDATE, 61 62 /* Return any mapped data for a name, and make the name most recent. */ 63 UDS_QUERY, 64 65 /* 66 * Return any mapped data for a name, or map the provided data to the name if there is no 67 * current data, and make the name most recent. 68 */ 69 UDS_POST, 70 71 /* Return any mapped data for a name without updating its recency. */ 72 UDS_QUERY_NO_UPDATE, 73 74 /* Remove any mapping for a name. */ 75 UDS_DELETE, 76 77 } __packed; 78 79 enum uds_open_index_type { 80 /* Create a new index. */ 81 UDS_CREATE, 82 83 /* Load an existing index and try to recover if necessary. */ 84 UDS_LOAD, 85 86 /* Load an existing index, but only if it was saved cleanly. */ 87 UDS_NO_REBUILD, 88 }; 89 90 enum { 91 /* The record name size in bytes */ 92 UDS_RECORD_NAME_SIZE = 16, 93 /* The maximum record data size in bytes */ 94 UDS_RECORD_DATA_SIZE = 16, 95 }; 96 97 /* 98 * A type representing a UDS memory configuration which is either a positive integer number of 99 * gigabytes or one of the six special constants for configurations smaller than one gigabyte. 100 */ 101 typedef int uds_memory_config_size_t; 102 103 enum { 104 /* The maximum configurable amount of memory */ 105 UDS_MEMORY_CONFIG_MAX = 1024, 106 /* Flag indicating that the index has one less chapter than usual */ 107 UDS_MEMORY_CONFIG_REDUCED = 0x1000, 108 UDS_MEMORY_CONFIG_REDUCED_MAX = 1024 + UDS_MEMORY_CONFIG_REDUCED, 109 /* Special values indicating sizes less than 1 GB */ 110 UDS_MEMORY_CONFIG_256MB = -256, 111 UDS_MEMORY_CONFIG_512MB = -512, 112 UDS_MEMORY_CONFIG_768MB = -768, 113 UDS_MEMORY_CONFIG_REDUCED_256MB = -1280, 114 UDS_MEMORY_CONFIG_REDUCED_512MB = -1536, 115 UDS_MEMORY_CONFIG_REDUCED_768MB = -1792, 116 }; 117 118 struct uds_record_name { 119 unsigned char name[UDS_RECORD_NAME_SIZE]; 120 }; 121 122 struct uds_record_data { 123 unsigned char data[UDS_RECORD_DATA_SIZE]; 124 }; 125 126 struct uds_volume_record { 127 struct uds_record_name name; 128 struct uds_record_data data; 129 }; 130 131 struct uds_parameters { 132 /* The block_device used for storage */ 133 struct block_device *bdev; 134 /* The maximum allowable size of the index on storage */ 135 size_t size; 136 /* The offset where the index should start */ 137 off_t offset; 138 /* The maximum memory allocation, in GB */ 139 uds_memory_config_size_t memory_size; 140 /* Whether the index should include sparse chapters */ 141 bool sparse; 142 /* A 64-bit nonce to validate the index */ 143 u64 nonce; 144 /* The number of threads used to process index requests */ 145 unsigned int zone_count; 146 /* The number of threads used to read volume pages */ 147 unsigned int read_threads; 148 }; 149 150 /* 151 * These statistics capture characteristics of the current index, including resource usage and 152 * requests processed since the index was opened. 153 */ 154 struct uds_index_stats { 155 /* The total number of records stored in the index */ 156 u64 entries_indexed; 157 /* An estimate of the index's memory usage, in bytes */ 158 u64 memory_used; 159 /* The number of collisions recorded in the volume index */ 160 u64 collisions; 161 /* The number of entries discarded from the index since startup */ 162 u64 entries_discarded; 163 /* The time at which these statistics were fetched */ 164 s64 current_time; 165 /* The number of post calls that found an existing entry */ 166 u64 posts_found; 167 /* The number of post calls that added an entry */ 168 u64 posts_not_found; 169 /* 170 * The number of post calls that found an existing entry that is current enough to only 171 * exist in memory and not have been committed to disk yet 172 */ 173 u64 in_memory_posts_found; 174 /* 175 * The number of post calls that found an existing entry in the dense portion of the index 176 */ 177 u64 dense_posts_found; 178 /* 179 * The number of post calls that found an existing entry in the sparse portion of the index 180 */ 181 u64 sparse_posts_found; 182 /* The number of update calls that updated an existing entry */ 183 u64 updates_found; 184 /* The number of update calls that added a new entry */ 185 u64 updates_not_found; 186 /* The number of delete requests that deleted an existing entry */ 187 u64 deletions_found; 188 /* The number of delete requests that did nothing */ 189 u64 deletions_not_found; 190 /* The number of query calls that found existing entry */ 191 u64 queries_found; 192 /* The number of query calls that did not find an entry */ 193 u64 queries_not_found; 194 /* The total number of requests processed */ 195 u64 requests; 196 }; 197 198 enum uds_index_region { 199 /* No location information has been determined */ 200 UDS_LOCATION_UNKNOWN = 0, 201 /* The index page entry has been found */ 202 UDS_LOCATION_INDEX_PAGE_LOOKUP, 203 /* The record page entry has been found */ 204 UDS_LOCATION_RECORD_PAGE_LOOKUP, 205 /* The record is not in the index */ 206 UDS_LOCATION_UNAVAILABLE, 207 /* The record was found in the open chapter */ 208 UDS_LOCATION_IN_OPEN_CHAPTER, 209 /* The record was found in the dense part of the index */ 210 UDS_LOCATION_IN_DENSE, 211 /* The record was found in the sparse part of the index */ 212 UDS_LOCATION_IN_SPARSE, 213 } __packed; 214 215 /* Zone message requests are used to communicate between index zones. */ 216 enum uds_zone_message_type { 217 /* A standard request with no message */ 218 UDS_MESSAGE_NONE = 0, 219 /* Add a chapter to the sparse chapter index cache */ 220 UDS_MESSAGE_SPARSE_CACHE_BARRIER, 221 /* Close a chapter to keep the zone from falling behind */ 222 UDS_MESSAGE_ANNOUNCE_CHAPTER_CLOSED, 223 } __packed; 224 225 struct uds_zone_message { 226 /* The type of message, determining how it will be processed */ 227 enum uds_zone_message_type type; 228 /* The virtual chapter number to which the message applies */ 229 u64 virtual_chapter; 230 } __packed; 231 232 struct uds_index_session; 233 struct uds_index; 234 struct uds_request; 235 236 /* Once this callback has been invoked, the uds_request structure can be reused or freed. */ 237 typedef void (*uds_request_callback_fn)(struct uds_request *request); 238 239 struct uds_request { 240 /* These input fields must be set before launching a request. */ 241 242 /* The name of the record to look up or create */ 243 struct uds_record_name record_name; 244 /* New data to associate with the record name, if applicable */ 245 struct uds_record_data new_metadata; 246 /* A callback to invoke when the request is complete */ 247 uds_request_callback_fn callback; 248 /* The index session that will manage this request */ 249 struct uds_index_session *session; 250 /* The type of operation to perform, as describe above */ 251 enum uds_request_type type; 252 253 /* These output fields are set when a request is complete. */ 254 255 /* The existing data associated with the request name, if any */ 256 struct uds_record_data old_metadata; 257 /* True if the record name had an existing entry in the index */ 258 bool found; 259 /* Either UDS_SUCCESS or an error code for the request */ 260 int status; 261 262 /* The remaining fields are used internally and should not be altered by clients. */ 263 struct_group(internal, 264 /* The virtual chapter containing the record name, if known */ 265 u64 virtual_chapter; 266 /* The region of the index containing the record name */ 267 enum uds_index_region location; 268 /* If true, process request immediately by waking the worker thread */ 269 bool unbatched; 270 /* If true, continue this request before processing newer requests */ 271 bool requeued; 272 /* Control message for coordinating between zones */ 273 struct uds_zone_message zone_message; 274 /* The number of the zone which will process this request*/ 275 unsigned int zone_number; 276 /* A link for adding a request to a lock-free queue */ 277 struct funnel_queue_entry queue_link; 278 /* A link for adding a request to a standard linked list */ 279 struct uds_request *next_request; 280 /* A pointer to the index processing this request */ 281 struct uds_index *index; 282 ); 283 }; 284 285 /* A session is required for most index operations. */ 286 int __must_check uds_create_index_session(struct uds_index_session **session); 287 288 /* Destroying an index session also closes and saves the associated index. */ 289 int uds_destroy_index_session(struct uds_index_session *session); 290 291 /* 292 * Create or open an index with an existing session. This operation fails if the index session is 293 * suspended, or if there is already an open index. 294 */ 295 int __must_check uds_open_index(enum uds_open_index_type open_type, 296 const struct uds_parameters *parameters, 297 struct uds_index_session *session); 298 299 /* 300 * Wait until all callbacks for index operations are complete, and prevent new index operations 301 * from starting. New index operations will fail with EBUSY until the session is resumed. Also 302 * optionally saves the index. 303 */ 304 int __must_check uds_suspend_index_session(struct uds_index_session *session, bool save); 305 306 /* 307 * Allow new index operations for an index, whether it was suspended or not. If the index is 308 * suspended and the supplied block device differs from the current backing store, the index will 309 * start using the new backing store instead. 310 */ 311 int __must_check uds_resume_index_session(struct uds_index_session *session, 312 struct block_device *bdev); 313 314 /* Wait until all outstanding index operations are complete. */ 315 int __must_check uds_flush_index_session(struct uds_index_session *session); 316 317 /* Close an index. This operation fails if the index session is suspended. */ 318 int __must_check uds_close_index(struct uds_index_session *session); 319 320 /* Get index statistics since the last time the index was opened. */ 321 int __must_check uds_get_index_session_stats(struct uds_index_session *session, 322 struct uds_index_stats *stats); 323 324 /* This function will fail if any required field of the request is not set. */ 325 int __must_check uds_launch_request(struct uds_request *request); 326 327 struct cond_var { 328 wait_queue_head_t wait_queue; 329 }; 330 331 static inline void uds_init_cond(struct cond_var *cv) 332 { 333 init_waitqueue_head(&cv->wait_queue); 334 } 335 336 static inline void uds_signal_cond(struct cond_var *cv) 337 { 338 wake_up(&cv->wait_queue); 339 } 340 341 static inline void uds_broadcast_cond(struct cond_var *cv) 342 { 343 wake_up_all(&cv->wait_queue); 344 } 345 346 void uds_wait_cond(struct cond_var *cv, struct mutex *mutex); 347 348 #endif /* INDEXER_H */ 349