1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * This file and its contents are supplied under the terms of the 6 * Common Development and Distribution License ("CDDL"), version 1.0. 7 * You may only use this file in accordance with the terms of version 8 * 1.0 of the CDDL. 9 * 10 * A full copy of the text of the CDDL should have accompanied this 11 * source. A copy of the CDDL is also available via the Internet at 12 * http://www.illumos.org/license/CDDL. 13 * 14 * CDDL HEADER END 15 */ 16 /* 17 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 18 */ 19 20 #include <sys/zfs_context.h> 21 #include <sys/multilist.h> 22 #include <sys/trace_zfs.h> 23 24 /* 25 * This overrides the number of sublists in each multilist_t, which defaults 26 * to the number of CPUs in the system (see multilist_create()). 27 */ 28 uint_t zfs_multilist_num_sublists = 0; 29 30 /* 31 * Given the object contained on the list, return a pointer to the 32 * object's multilist_node_t structure it contains. 33 */ 34 #ifdef ZFS_DEBUG 35 static multilist_node_t * 36 multilist_d2l(multilist_t *ml, void *obj) 37 { 38 return ((multilist_node_t *)((char *)obj + ml->ml_offset)); 39 } 40 #else 41 #define multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL) 42 #endif 43 44 /* 45 * Initialize a new mutlilist using the parameters specified. 46 * 47 * - 'size' denotes the size of the structure containing the 48 * multilist_node_t. 49 * - 'offset' denotes the byte offset of the mutlilist_node_t within 50 * the structure that contains it. 51 * - 'num' specifies the number of internal sublists to create. 52 * - 'index_func' is used to determine which sublist to insert into 53 * when the multilist_insert() function is called; as well as which 54 * sublist to remove from when multilist_remove() is called. The 55 * requirements this function must meet, are the following: 56 * 57 * - It must always return the same value when called on the same 58 * object (to ensure the object is removed from the list it was 59 * inserted into). 60 * 61 * - It must return a value in the range [0, number of sublists). 62 * The multilist_get_num_sublists() function may be used to 63 * determine the number of sublists in the multilist. 64 * 65 * Also, in order to reduce internal contention between the sublists 66 * during insertion and removal, this function should choose evenly 67 * between all available sublists when inserting. This isn't a hard 68 * requirement, but a general rule of thumb in order to garner the 69 * best multi-threaded performance out of the data structure. 70 */ 71 static void 72 multilist_create_impl(multilist_t *ml, size_t size, size_t offset, 73 uint_t num, multilist_sublist_index_func_t *index_func) 74 { 75 ASSERT3U(size, >, 0); 76 ASSERT3U(size, >=, offset + sizeof (multilist_node_t)); 77 ASSERT3U(num, >, 0); 78 ASSERT3P(index_func, !=, NULL); 79 80 ml->ml_offset = offset; 81 ml->ml_num_sublists = num; 82 ml->ml_index_func = index_func; 83 84 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * 85 ml->ml_num_sublists, KM_SLEEP); 86 87 ASSERT3P(ml->ml_sublists, !=, NULL); 88 89 for (int i = 0; i < ml->ml_num_sublists; i++) { 90 multilist_sublist_t *mls = &ml->ml_sublists[i]; 91 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); 92 list_create(&mls->mls_list, size, offset); 93 } 94 } 95 96 /* 97 * Allocate a new multilist, using the default number of sublists (the number 98 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note 99 * that the multilists do not expand if more CPUs are hot-added. In that case, 100 * we will have less fanout than boot_ncpus, but we don't want to always 101 * reserve the RAM necessary to create the extra slots for additional CPUs up 102 * front, and dynamically adding them is a complex task. 103 */ 104 void 105 multilist_create(multilist_t *ml, size_t size, size_t offset, 106 multilist_sublist_index_func_t *index_func) 107 { 108 uint_t num_sublists; 109 110 if (zfs_multilist_num_sublists > 0) { 111 num_sublists = zfs_multilist_num_sublists; 112 } else { 113 num_sublists = MAX(boot_ncpus, 4); 114 } 115 116 multilist_create_impl(ml, size, offset, num_sublists, index_func); 117 } 118 119 /* 120 * Destroy the given multilist object, and free up any memory it holds. 121 */ 122 void 123 multilist_destroy(multilist_t *ml) 124 { 125 ASSERT(multilist_is_empty(ml)); 126 127 for (int i = 0; i < ml->ml_num_sublists; i++) { 128 multilist_sublist_t *mls = &ml->ml_sublists[i]; 129 130 ASSERT(list_is_empty(&mls->mls_list)); 131 132 list_destroy(&mls->mls_list); 133 mutex_destroy(&mls->mls_lock); 134 } 135 136 ASSERT3P(ml->ml_sublists, !=, NULL); 137 kmem_free(ml->ml_sublists, 138 sizeof (multilist_sublist_t) * ml->ml_num_sublists); 139 140 ml->ml_num_sublists = 0; 141 ml->ml_offset = 0; 142 ml->ml_sublists = NULL; 143 } 144 145 /* 146 * Insert the given object into the multilist. 147 * 148 * This function will insert the object specified into the sublist 149 * determined using the function given at multilist creation time. 150 * 151 * The sublist locks are automatically acquired if not already held, to 152 * ensure consistency when inserting and removing from multiple threads. 153 */ 154 void 155 multilist_insert(multilist_t *ml, void *obj) 156 { 157 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 158 multilist_sublist_t *mls; 159 boolean_t need_lock; 160 161 DTRACE_PROBE3(multilist__insert, multilist_t *, ml, 162 unsigned int, sublist_idx, void *, obj); 163 164 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 165 166 mls = &ml->ml_sublists[sublist_idx]; 167 168 /* 169 * Note: Callers may already hold the sublist lock by calling 170 * multilist_sublist_lock(). Here we rely on MUTEX_HELD() 171 * returning TRUE if and only if the current thread holds the 172 * lock. While it's a little ugly to make the lock recursive in 173 * this way, it works and allows the calling code to be much 174 * simpler -- otherwise it would have to pass around a flag 175 * indicating that it already has the lock. 176 */ 177 need_lock = !MUTEX_HELD(&mls->mls_lock); 178 179 if (need_lock) 180 mutex_enter(&mls->mls_lock); 181 182 ASSERT(!multilist_link_active(multilist_d2l(ml, obj))); 183 184 multilist_sublist_insert_head(mls, obj); 185 186 if (need_lock) 187 mutex_exit(&mls->mls_lock); 188 } 189 190 /* 191 * Remove the given object from the multilist. 192 * 193 * This function will remove the object specified from the sublist 194 * determined using the function given at multilist creation time. 195 * 196 * The necessary sublist locks are automatically acquired, to ensure 197 * consistency when inserting and removing from multiple threads. 198 */ 199 void 200 multilist_remove(multilist_t *ml, void *obj) 201 { 202 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 203 multilist_sublist_t *mls; 204 boolean_t need_lock; 205 206 DTRACE_PROBE3(multilist__remove, multilist_t *, ml, 207 unsigned int, sublist_idx, void *, obj); 208 209 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 210 211 mls = &ml->ml_sublists[sublist_idx]; 212 /* See comment in multilist_insert(). */ 213 need_lock = !MUTEX_HELD(&mls->mls_lock); 214 215 if (need_lock) 216 mutex_enter(&mls->mls_lock); 217 218 ASSERT(multilist_link_active(multilist_d2l(ml, obj))); 219 220 multilist_sublist_remove(mls, obj); 221 222 if (need_lock) 223 mutex_exit(&mls->mls_lock); 224 } 225 226 /* 227 * Check to see if this multilist object is empty. 228 * 229 * This will return TRUE if it finds all of the sublists of this 230 * multilist to be empty, and FALSE otherwise. Each sublist lock will be 231 * automatically acquired as necessary. 232 * 233 * If concurrent insertions and removals are occurring, the semantics 234 * of this function become a little fuzzy. Instead of locking all 235 * sublists for the entire call time of the function, each sublist is 236 * only locked as it is individually checked for emptiness. Thus, it's 237 * possible for this function to return TRUE with non-empty sublists at 238 * the time the function returns. This would be due to another thread 239 * inserting into a given sublist, after that specific sublist was check 240 * and deemed empty, but before all sublists have been checked. 241 */ 242 int 243 multilist_is_empty(multilist_t *ml) 244 { 245 for (int i = 0; i < ml->ml_num_sublists; i++) { 246 multilist_sublist_t *mls = &ml->ml_sublists[i]; 247 /* See comment in multilist_insert(). */ 248 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock); 249 250 if (need_lock) 251 mutex_enter(&mls->mls_lock); 252 253 if (!list_is_empty(&mls->mls_list)) { 254 if (need_lock) 255 mutex_exit(&mls->mls_lock); 256 257 return (FALSE); 258 } 259 260 if (need_lock) 261 mutex_exit(&mls->mls_lock); 262 } 263 264 return (TRUE); 265 } 266 267 /* Return the number of sublists composing this multilist */ 268 unsigned int 269 multilist_get_num_sublists(multilist_t *ml) 270 { 271 return (ml->ml_num_sublists); 272 } 273 274 /* Return a randomly selected, valid sublist index for this multilist */ 275 unsigned int 276 multilist_get_random_index(multilist_t *ml) 277 { 278 return (random_in_range(ml->ml_num_sublists)); 279 } 280 281 void 282 multilist_sublist_lock(multilist_sublist_t *mls) 283 { 284 mutex_enter(&mls->mls_lock); 285 } 286 287 /* Lock and return the sublist specified at the given index */ 288 multilist_sublist_t * 289 multilist_sublist_lock_idx(multilist_t *ml, unsigned int sublist_idx) 290 { 291 multilist_sublist_t *mls; 292 293 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 294 mls = &ml->ml_sublists[sublist_idx]; 295 mutex_enter(&mls->mls_lock); 296 297 return (mls); 298 } 299 300 /* Lock and return the sublist that would be used to store the specified obj */ 301 multilist_sublist_t * 302 multilist_sublist_lock_obj(multilist_t *ml, void *obj) 303 { 304 return (multilist_sublist_lock_idx(ml, ml->ml_index_func(ml, obj))); 305 } 306 307 void 308 multilist_sublist_unlock(multilist_sublist_t *mls) 309 { 310 mutex_exit(&mls->mls_lock); 311 } 312 313 /* 314 * We're allowing any object to be inserted into this specific sublist, 315 * but this can lead to trouble if multilist_remove() is called to 316 * remove this object. Specifically, if calling ml_index_func on this 317 * object returns an index for sublist different than what is passed as 318 * a parameter here, any call to multilist_remove() with this newly 319 * inserted object is undefined! (the call to multilist_remove() will 320 * remove the object from a list that it isn't contained in) 321 */ 322 void 323 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj) 324 { 325 ASSERT(MUTEX_HELD(&mls->mls_lock)); 326 list_insert_head(&mls->mls_list, obj); 327 } 328 329 /* please see comment above multilist_sublist_insert_head */ 330 void 331 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj) 332 { 333 ASSERT(MUTEX_HELD(&mls->mls_lock)); 334 list_insert_tail(&mls->mls_list, obj); 335 } 336 337 /* please see comment above multilist_sublist_insert_head */ 338 void 339 multilist_sublist_insert_after(multilist_sublist_t *mls, void *prev, void *obj) 340 { 341 ASSERT(MUTEX_HELD(&mls->mls_lock)); 342 list_insert_after(&mls->mls_list, prev, obj); 343 } 344 345 /* please see comment above multilist_sublist_insert_head */ 346 void 347 multilist_sublist_insert_before(multilist_sublist_t *mls, void *next, void *obj) 348 { 349 ASSERT(MUTEX_HELD(&mls->mls_lock)); 350 list_insert_before(&mls->mls_list, next, obj); 351 } 352 353 /* 354 * Move the object one element forward in the list. 355 * 356 * This function will move the given object forward in the list (towards 357 * the head) by one object. So, in essence, it will swap its position in 358 * the list with its "prev" pointer. If the given object is already at the 359 * head of the list, it cannot be moved forward any more than it already 360 * is, so no action is taken. 361 * 362 * NOTE: This function **must not** remove any object from the list other 363 * than the object given as the parameter. This is relied upon in 364 * arc_evict_state_impl(). 365 */ 366 void 367 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj) 368 { 369 void *prev = list_prev(&mls->mls_list, obj); 370 371 ASSERT(MUTEX_HELD(&mls->mls_lock)); 372 ASSERT(!list_is_empty(&mls->mls_list)); 373 374 /* 'obj' must be at the head of the list, nothing to do */ 375 if (prev == NULL) 376 return; 377 378 list_remove(&mls->mls_list, obj); 379 list_insert_before(&mls->mls_list, prev, obj); 380 } 381 382 void 383 multilist_sublist_remove(multilist_sublist_t *mls, void *obj) 384 { 385 ASSERT(MUTEX_HELD(&mls->mls_lock)); 386 list_remove(&mls->mls_list, obj); 387 } 388 389 int 390 multilist_sublist_is_empty(multilist_sublist_t *mls) 391 { 392 ASSERT(MUTEX_HELD(&mls->mls_lock)); 393 return (list_is_empty(&mls->mls_list)); 394 } 395 396 int 397 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) 398 { 399 multilist_sublist_t *mls; 400 int empty; 401 402 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 403 mls = &ml->ml_sublists[sublist_idx]; 404 ASSERT(!MUTEX_HELD(&mls->mls_lock)); 405 mutex_enter(&mls->mls_lock); 406 empty = list_is_empty(&mls->mls_list); 407 mutex_exit(&mls->mls_lock); 408 return (empty); 409 } 410 411 void * 412 multilist_sublist_head(multilist_sublist_t *mls) 413 { 414 ASSERT(MUTEX_HELD(&mls->mls_lock)); 415 return (list_head(&mls->mls_list)); 416 } 417 418 void * 419 multilist_sublist_tail(multilist_sublist_t *mls) 420 { 421 ASSERT(MUTEX_HELD(&mls->mls_lock)); 422 return (list_tail(&mls->mls_list)); 423 } 424 425 void * 426 multilist_sublist_next(multilist_sublist_t *mls, void *obj) 427 { 428 ASSERT(MUTEX_HELD(&mls->mls_lock)); 429 return (list_next(&mls->mls_list, obj)); 430 } 431 432 void * 433 multilist_sublist_prev(multilist_sublist_t *mls, void *obj) 434 { 435 ASSERT(MUTEX_HELD(&mls->mls_lock)); 436 return (list_prev(&mls->mls_list, obj)); 437 } 438 439 void 440 multilist_link_init(multilist_node_t *link) 441 { 442 list_link_init(link); 443 } 444 445 int 446 multilist_link_active(multilist_node_t *link) 447 { 448 return (list_link_active(link)); 449 } 450 451 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, UINT, ZMOD_RW, 452 "Number of sublists used in each multilist"); 453