1 /* 2 * CDDL HEADER START 3 * 4 * This file and its contents are supplied under the terms of the 5 * Common Development and Distribution License ("CDDL"), version 1.0. 6 * You may only use this file in accordance with the terms of version 7 * 1.0 of the CDDL. 8 * 9 * A full copy of the text of the CDDL should have accompanied this 10 * source. A copy of the CDDL is also available via the Internet at 11 * http://www.illumos.org/license/CDDL. 12 * 13 * CDDL HEADER END 14 */ 15 /* 16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 17 */ 18 19 #include <sys/zfs_context.h> 20 #include <sys/multilist.h> 21 #include <sys/trace_zfs.h> 22 23 /* 24 * This overrides the number of sublists in each multilist_t, which defaults 25 * to the number of CPUs in the system (see multilist_create()). 26 */ 27 uint_t zfs_multilist_num_sublists = 0; 28 29 /* 30 * Given the object contained on the list, return a pointer to the 31 * object's multilist_node_t structure it contains. 32 */ 33 #ifdef ZFS_DEBUG 34 static multilist_node_t * 35 multilist_d2l(multilist_t *ml, void *obj) 36 { 37 return ((multilist_node_t *)((char *)obj + ml->ml_offset)); 38 } 39 #else 40 #define multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL) 41 #endif 42 43 /* 44 * Initialize a new mutlilist using the parameters specified. 45 * 46 * - 'size' denotes the size of the structure containing the 47 * multilist_node_t. 48 * - 'offset' denotes the byte offset of the mutlilist_node_t within 49 * the structure that contains it. 50 * - 'num' specifies the number of internal sublists to create. 51 * - 'index_func' is used to determine which sublist to insert into 52 * when the multilist_insert() function is called; as well as which 53 * sublist to remove from when multilist_remove() is called. The 54 * requirements this function must meet, are the following: 55 * 56 * - It must always return the same value when called on the same 57 * object (to ensure the object is removed from the list it was 58 * inserted into). 59 * 60 * - It must return a value in the range [0, number of sublists). 61 * The multilist_get_num_sublists() function may be used to 62 * determine the number of sublists in the multilist. 63 * 64 * Also, in order to reduce internal contention between the sublists 65 * during insertion and removal, this function should choose evenly 66 * between all available sublists when inserting. This isn't a hard 67 * requirement, but a general rule of thumb in order to garner the 68 * best multi-threaded performance out of the data structure. 69 */ 70 static void 71 multilist_create_impl(multilist_t *ml, size_t size, size_t offset, 72 uint_t num, multilist_sublist_index_func_t *index_func) 73 { 74 ASSERT3U(size, >, 0); 75 ASSERT3U(size, >=, offset + sizeof (multilist_node_t)); 76 ASSERT3U(num, >, 0); 77 ASSERT3P(index_func, !=, NULL); 78 79 ml->ml_offset = offset; 80 ml->ml_num_sublists = num; 81 ml->ml_index_func = index_func; 82 83 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * 84 ml->ml_num_sublists, KM_SLEEP); 85 86 ASSERT3P(ml->ml_sublists, !=, NULL); 87 88 for (int i = 0; i < ml->ml_num_sublists; i++) { 89 multilist_sublist_t *mls = &ml->ml_sublists[i]; 90 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); 91 list_create(&mls->mls_list, size, offset); 92 } 93 } 94 95 /* 96 * Allocate a new multilist, using the default number of sublists (the number 97 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note 98 * that the multilists do not expand if more CPUs are hot-added. In that case, 99 * we will have less fanout than boot_ncpus, but we don't want to always 100 * reserve the RAM necessary to create the extra slots for additional CPUs up 101 * front, and dynamically adding them is a complex task. 102 */ 103 void 104 multilist_create(multilist_t *ml, size_t size, size_t offset, 105 multilist_sublist_index_func_t *index_func) 106 { 107 uint_t num_sublists; 108 109 if (zfs_multilist_num_sublists > 0) { 110 num_sublists = zfs_multilist_num_sublists; 111 } else { 112 num_sublists = MAX(boot_ncpus, 4); 113 } 114 115 multilist_create_impl(ml, size, offset, num_sublists, index_func); 116 } 117 118 /* 119 * Destroy the given multilist object, and free up any memory it holds. 120 */ 121 void 122 multilist_destroy(multilist_t *ml) 123 { 124 ASSERT(multilist_is_empty(ml)); 125 126 for (int i = 0; i < ml->ml_num_sublists; i++) { 127 multilist_sublist_t *mls = &ml->ml_sublists[i]; 128 129 ASSERT(list_is_empty(&mls->mls_list)); 130 131 list_destroy(&mls->mls_list); 132 mutex_destroy(&mls->mls_lock); 133 } 134 135 ASSERT3P(ml->ml_sublists, !=, NULL); 136 kmem_free(ml->ml_sublists, 137 sizeof (multilist_sublist_t) * ml->ml_num_sublists); 138 139 ml->ml_num_sublists = 0; 140 ml->ml_offset = 0; 141 ml->ml_sublists = NULL; 142 } 143 144 /* 145 * Insert the given object into the multilist. 146 * 147 * This function will insert the object specified into the sublist 148 * determined using the function given at multilist creation time. 149 * 150 * The sublist locks are automatically acquired if not already held, to 151 * ensure consistency when inserting and removing from multiple threads. 152 */ 153 void 154 multilist_insert(multilist_t *ml, void *obj) 155 { 156 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 157 multilist_sublist_t *mls; 158 boolean_t need_lock; 159 160 DTRACE_PROBE3(multilist__insert, multilist_t *, ml, 161 unsigned int, sublist_idx, void *, obj); 162 163 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 164 165 mls = &ml->ml_sublists[sublist_idx]; 166 167 /* 168 * Note: Callers may already hold the sublist lock by calling 169 * multilist_sublist_lock(). Here we rely on MUTEX_HELD() 170 * returning TRUE if and only if the current thread holds the 171 * lock. While it's a little ugly to make the lock recursive in 172 * this way, it works and allows the calling code to be much 173 * simpler -- otherwise it would have to pass around a flag 174 * indicating that it already has the lock. 175 */ 176 need_lock = !MUTEX_HELD(&mls->mls_lock); 177 178 if (need_lock) 179 mutex_enter(&mls->mls_lock); 180 181 ASSERT(!multilist_link_active(multilist_d2l(ml, obj))); 182 183 multilist_sublist_insert_head(mls, obj); 184 185 if (need_lock) 186 mutex_exit(&mls->mls_lock); 187 } 188 189 /* 190 * Remove the given object from the multilist. 191 * 192 * This function will remove the object specified from the sublist 193 * determined using the function given at multilist creation time. 194 * 195 * The necessary sublist locks are automatically acquired, to ensure 196 * consistency when inserting and removing from multiple threads. 197 */ 198 void 199 multilist_remove(multilist_t *ml, void *obj) 200 { 201 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 202 multilist_sublist_t *mls; 203 boolean_t need_lock; 204 205 DTRACE_PROBE3(multilist__remove, multilist_t *, ml, 206 unsigned int, sublist_idx, void *, obj); 207 208 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 209 210 mls = &ml->ml_sublists[sublist_idx]; 211 /* See comment in multilist_insert(). */ 212 need_lock = !MUTEX_HELD(&mls->mls_lock); 213 214 if (need_lock) 215 mutex_enter(&mls->mls_lock); 216 217 ASSERT(multilist_link_active(multilist_d2l(ml, obj))); 218 219 multilist_sublist_remove(mls, obj); 220 221 if (need_lock) 222 mutex_exit(&mls->mls_lock); 223 } 224 225 /* 226 * Check to see if this multilist object is empty. 227 * 228 * This will return TRUE if it finds all of the sublists of this 229 * multilist to be empty, and FALSE otherwise. Each sublist lock will be 230 * automatically acquired as necessary. 231 * 232 * If concurrent insertions and removals are occurring, the semantics 233 * of this function become a little fuzzy. Instead of locking all 234 * sublists for the entire call time of the function, each sublist is 235 * only locked as it is individually checked for emptiness. Thus, it's 236 * possible for this function to return TRUE with non-empty sublists at 237 * the time the function returns. This would be due to another thread 238 * inserting into a given sublist, after that specific sublist was check 239 * and deemed empty, but before all sublists have been checked. 240 */ 241 int 242 multilist_is_empty(multilist_t *ml) 243 { 244 for (int i = 0; i < ml->ml_num_sublists; i++) { 245 multilist_sublist_t *mls = &ml->ml_sublists[i]; 246 /* See comment in multilist_insert(). */ 247 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock); 248 249 if (need_lock) 250 mutex_enter(&mls->mls_lock); 251 252 if (!list_is_empty(&mls->mls_list)) { 253 if (need_lock) 254 mutex_exit(&mls->mls_lock); 255 256 return (FALSE); 257 } 258 259 if (need_lock) 260 mutex_exit(&mls->mls_lock); 261 } 262 263 return (TRUE); 264 } 265 266 /* Return the number of sublists composing this multilist */ 267 unsigned int 268 multilist_get_num_sublists(multilist_t *ml) 269 { 270 return (ml->ml_num_sublists); 271 } 272 273 /* Return a randomly selected, valid sublist index for this multilist */ 274 unsigned int 275 multilist_get_random_index(multilist_t *ml) 276 { 277 return (random_in_range(ml->ml_num_sublists)); 278 } 279 280 void 281 multilist_sublist_lock(multilist_sublist_t *mls) 282 { 283 mutex_enter(&mls->mls_lock); 284 } 285 286 /* Lock and return the sublist specified at the given index */ 287 multilist_sublist_t * 288 multilist_sublist_lock_idx(multilist_t *ml, unsigned int sublist_idx) 289 { 290 multilist_sublist_t *mls; 291 292 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 293 mls = &ml->ml_sublists[sublist_idx]; 294 mutex_enter(&mls->mls_lock); 295 296 return (mls); 297 } 298 299 /* Lock and return the sublist that would be used to store the specified obj */ 300 multilist_sublist_t * 301 multilist_sublist_lock_obj(multilist_t *ml, void *obj) 302 { 303 return (multilist_sublist_lock_idx(ml, ml->ml_index_func(ml, obj))); 304 } 305 306 void 307 multilist_sublist_unlock(multilist_sublist_t *mls) 308 { 309 mutex_exit(&mls->mls_lock); 310 } 311 312 /* 313 * We're allowing any object to be inserted into this specific sublist, 314 * but this can lead to trouble if multilist_remove() is called to 315 * remove this object. Specifically, if calling ml_index_func on this 316 * object returns an index for sublist different than what is passed as 317 * a parameter here, any call to multilist_remove() with this newly 318 * inserted object is undefined! (the call to multilist_remove() will 319 * remove the object from a list that it isn't contained in) 320 */ 321 void 322 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj) 323 { 324 ASSERT(MUTEX_HELD(&mls->mls_lock)); 325 list_insert_head(&mls->mls_list, obj); 326 } 327 328 /* please see comment above multilist_sublist_insert_head */ 329 void 330 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj) 331 { 332 ASSERT(MUTEX_HELD(&mls->mls_lock)); 333 list_insert_tail(&mls->mls_list, obj); 334 } 335 336 /* please see comment above multilist_sublist_insert_head */ 337 void 338 multilist_sublist_insert_after(multilist_sublist_t *mls, void *prev, void *obj) 339 { 340 ASSERT(MUTEX_HELD(&mls->mls_lock)); 341 list_insert_after(&mls->mls_list, prev, obj); 342 } 343 344 /* please see comment above multilist_sublist_insert_head */ 345 void 346 multilist_sublist_insert_before(multilist_sublist_t *mls, void *next, void *obj) 347 { 348 ASSERT(MUTEX_HELD(&mls->mls_lock)); 349 list_insert_before(&mls->mls_list, next, obj); 350 } 351 352 /* 353 * Move the object one element forward in the list. 354 * 355 * This function will move the given object forward in the list (towards 356 * the head) by one object. So, in essence, it will swap its position in 357 * the list with its "prev" pointer. If the given object is already at the 358 * head of the list, it cannot be moved forward any more than it already 359 * is, so no action is taken. 360 * 361 * NOTE: This function **must not** remove any object from the list other 362 * than the object given as the parameter. This is relied upon in 363 * arc_evict_state_impl(). 364 */ 365 void 366 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj) 367 { 368 void *prev = list_prev(&mls->mls_list, obj); 369 370 ASSERT(MUTEX_HELD(&mls->mls_lock)); 371 ASSERT(!list_is_empty(&mls->mls_list)); 372 373 /* 'obj' must be at the head of the list, nothing to do */ 374 if (prev == NULL) 375 return; 376 377 list_remove(&mls->mls_list, obj); 378 list_insert_before(&mls->mls_list, prev, obj); 379 } 380 381 void 382 multilist_sublist_remove(multilist_sublist_t *mls, void *obj) 383 { 384 ASSERT(MUTEX_HELD(&mls->mls_lock)); 385 list_remove(&mls->mls_list, obj); 386 } 387 388 int 389 multilist_sublist_is_empty(multilist_sublist_t *mls) 390 { 391 ASSERT(MUTEX_HELD(&mls->mls_lock)); 392 return (list_is_empty(&mls->mls_list)); 393 } 394 395 int 396 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) 397 { 398 multilist_sublist_t *mls; 399 int empty; 400 401 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 402 mls = &ml->ml_sublists[sublist_idx]; 403 ASSERT(!MUTEX_HELD(&mls->mls_lock)); 404 mutex_enter(&mls->mls_lock); 405 empty = list_is_empty(&mls->mls_list); 406 mutex_exit(&mls->mls_lock); 407 return (empty); 408 } 409 410 void * 411 multilist_sublist_head(multilist_sublist_t *mls) 412 { 413 ASSERT(MUTEX_HELD(&mls->mls_lock)); 414 return (list_head(&mls->mls_list)); 415 } 416 417 void * 418 multilist_sublist_tail(multilist_sublist_t *mls) 419 { 420 ASSERT(MUTEX_HELD(&mls->mls_lock)); 421 return (list_tail(&mls->mls_list)); 422 } 423 424 void * 425 multilist_sublist_next(multilist_sublist_t *mls, void *obj) 426 { 427 ASSERT(MUTEX_HELD(&mls->mls_lock)); 428 return (list_next(&mls->mls_list, obj)); 429 } 430 431 void * 432 multilist_sublist_prev(multilist_sublist_t *mls, void *obj) 433 { 434 ASSERT(MUTEX_HELD(&mls->mls_lock)); 435 return (list_prev(&mls->mls_list, obj)); 436 } 437 438 void 439 multilist_link_init(multilist_node_t *link) 440 { 441 list_link_init(link); 442 } 443 444 int 445 multilist_link_active(multilist_node_t *link) 446 { 447 return (list_link_active(link)); 448 } 449 450 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, UINT, ZMOD_RW, 451 "Number of sublists used in each multilist"); 452