1 /* 2 * CDDL HEADER START 3 * 4 * This file and its contents are supplied under the terms of the 5 * Common Development and Distribution License ("CDDL"), version 1.0. 6 * You may only use this file in accordance with the terms of version 7 * 1.0 of the CDDL. 8 * 9 * A full copy of the text of the CDDL should have accompanied this 10 * source. A copy of the CDDL is also available via the Internet at 11 * http://www.illumos.org/license/CDDL. 12 * 13 * CDDL HEADER END 14 */ 15 /* 16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 17 */ 18 19 #include <sys/zfs_context.h> 20 #include <sys/multilist.h> 21 #include <sys/trace_zfs.h> 22 23 /* needed for spa_get_random() */ 24 #include <sys/spa.h> 25 26 /* 27 * This overrides the number of sublists in each multilist_t, which defaults 28 * to the number of CPUs in the system (see multilist_create()). 29 */ 30 int zfs_multilist_num_sublists = 0; 31 32 /* 33 * Given the object contained on the list, return a pointer to the 34 * object's multilist_node_t structure it contains. 35 */ 36 #ifdef ZFS_DEBUG 37 static multilist_node_t * 38 multilist_d2l(multilist_t *ml, void *obj) 39 { 40 return ((multilist_node_t *)((char *)obj + ml->ml_offset)); 41 } 42 #endif 43 44 /* 45 * Initialize a new mutlilist using the parameters specified. 46 * 47 * - 'size' denotes the size of the structure containing the 48 * multilist_node_t. 49 * - 'offset' denotes the byte offset of the mutlilist_node_t within 50 * the structure that contains it. 51 * - 'num' specifies the number of internal sublists to create. 52 * - 'index_func' is used to determine which sublist to insert into 53 * when the multilist_insert() function is called; as well as which 54 * sublist to remove from when multilist_remove() is called. The 55 * requirements this function must meet, are the following: 56 * 57 * - It must always return the same value when called on the same 58 * object (to ensure the object is removed from the list it was 59 * inserted into). 60 * 61 * - It must return a value in the range [0, number of sublists). 62 * The multilist_get_num_sublists() function may be used to 63 * determine the number of sublists in the multilist. 64 * 65 * Also, in order to reduce internal contention between the sublists 66 * during insertion and removal, this function should choose evenly 67 * between all available sublists when inserting. This isn't a hard 68 * requirement, but a general rule of thumb in order to garner the 69 * best multi-threaded performance out of the data structure. 70 */ 71 static multilist_t * 72 multilist_create_impl(size_t size, size_t offset, 73 unsigned int num, multilist_sublist_index_func_t *index_func) 74 { 75 ASSERT3U(size, >, 0); 76 ASSERT3U(size, >=, offset + sizeof (multilist_node_t)); 77 ASSERT3U(num, >, 0); 78 ASSERT3P(index_func, !=, NULL); 79 80 multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP); 81 ml->ml_offset = offset; 82 ml->ml_num_sublists = num; 83 ml->ml_index_func = index_func; 84 85 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * 86 ml->ml_num_sublists, KM_SLEEP); 87 88 ASSERT3P(ml->ml_sublists, !=, NULL); 89 90 for (int i = 0; i < ml->ml_num_sublists; i++) { 91 multilist_sublist_t *mls = &ml->ml_sublists[i]; 92 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); 93 list_create(&mls->mls_list, size, offset); 94 } 95 return (ml); 96 } 97 98 /* 99 * Allocate a new multilist, using the default number of sublists (the number 100 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note 101 * that the multilists do not expand if more CPUs are hot-added. In that case, 102 * we will have less fanout than boot_ncpus, but we don't want to always 103 * reserve the RAM necessary to create the extra slots for additional CPUs up 104 * front, and dynamically adding them is a complex task. 105 */ 106 multilist_t * 107 multilist_create(size_t size, size_t offset, 108 multilist_sublist_index_func_t *index_func) 109 { 110 int num_sublists; 111 112 if (zfs_multilist_num_sublists > 0) { 113 num_sublists = zfs_multilist_num_sublists; 114 } else { 115 num_sublists = MAX(boot_ncpus, 4); 116 } 117 118 return (multilist_create_impl(size, offset, num_sublists, index_func)); 119 } 120 121 /* 122 * Destroy the given multilist object, and free up any memory it holds. 123 */ 124 void 125 multilist_destroy(multilist_t *ml) 126 { 127 ASSERT(multilist_is_empty(ml)); 128 129 for (int i = 0; i < ml->ml_num_sublists; i++) { 130 multilist_sublist_t *mls = &ml->ml_sublists[i]; 131 132 ASSERT(list_is_empty(&mls->mls_list)); 133 134 list_destroy(&mls->mls_list); 135 mutex_destroy(&mls->mls_lock); 136 } 137 138 ASSERT3P(ml->ml_sublists, !=, NULL); 139 kmem_free(ml->ml_sublists, 140 sizeof (multilist_sublist_t) * ml->ml_num_sublists); 141 142 ml->ml_num_sublists = 0; 143 ml->ml_offset = 0; 144 kmem_free(ml, sizeof (multilist_t)); 145 } 146 147 /* 148 * Insert the given object into the multilist. 149 * 150 * This function will insert the object specified into the sublist 151 * determined using the function given at multilist creation time. 152 * 153 * The sublist locks are automatically acquired if not already held, to 154 * ensure consistency when inserting and removing from multiple threads. 155 */ 156 void 157 multilist_insert(multilist_t *ml, void *obj) 158 { 159 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 160 multilist_sublist_t *mls; 161 boolean_t need_lock; 162 163 DTRACE_PROBE3(multilist__insert, multilist_t *, ml, 164 unsigned int, sublist_idx, void *, obj); 165 166 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 167 168 mls = &ml->ml_sublists[sublist_idx]; 169 170 /* 171 * Note: Callers may already hold the sublist lock by calling 172 * multilist_sublist_lock(). Here we rely on MUTEX_HELD() 173 * returning TRUE if and only if the current thread holds the 174 * lock. While it's a little ugly to make the lock recursive in 175 * this way, it works and allows the calling code to be much 176 * simpler -- otherwise it would have to pass around a flag 177 * indicating that it already has the lock. 178 */ 179 need_lock = !MUTEX_HELD(&mls->mls_lock); 180 181 if (need_lock) 182 mutex_enter(&mls->mls_lock); 183 184 ASSERT(!multilist_link_active(multilist_d2l(ml, obj))); 185 186 multilist_sublist_insert_head(mls, obj); 187 188 if (need_lock) 189 mutex_exit(&mls->mls_lock); 190 } 191 192 /* 193 * Remove the given object from the multilist. 194 * 195 * This function will remove the object specified from the sublist 196 * determined using the function given at multilist creation time. 197 * 198 * The necessary sublist locks are automatically acquired, to ensure 199 * consistency when inserting and removing from multiple threads. 200 */ 201 void 202 multilist_remove(multilist_t *ml, void *obj) 203 { 204 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 205 multilist_sublist_t *mls; 206 boolean_t need_lock; 207 208 DTRACE_PROBE3(multilist__remove, multilist_t *, ml, 209 unsigned int, sublist_idx, void *, obj); 210 211 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 212 213 mls = &ml->ml_sublists[sublist_idx]; 214 /* See comment in multilist_insert(). */ 215 need_lock = !MUTEX_HELD(&mls->mls_lock); 216 217 if (need_lock) 218 mutex_enter(&mls->mls_lock); 219 220 ASSERT(multilist_link_active(multilist_d2l(ml, obj))); 221 222 multilist_sublist_remove(mls, obj); 223 224 if (need_lock) 225 mutex_exit(&mls->mls_lock); 226 } 227 228 /* 229 * Check to see if this multilist object is empty. 230 * 231 * This will return TRUE if it finds all of the sublists of this 232 * multilist to be empty, and FALSE otherwise. Each sublist lock will be 233 * automatically acquired as necessary. 234 * 235 * If concurrent insertions and removals are occurring, the semantics 236 * of this function become a little fuzzy. Instead of locking all 237 * sublists for the entire call time of the function, each sublist is 238 * only locked as it is individually checked for emptiness. Thus, it's 239 * possible for this function to return TRUE with non-empty sublists at 240 * the time the function returns. This would be due to another thread 241 * inserting into a given sublist, after that specific sublist was check 242 * and deemed empty, but before all sublists have been checked. 243 */ 244 int 245 multilist_is_empty(multilist_t *ml) 246 { 247 for (int i = 0; i < ml->ml_num_sublists; i++) { 248 multilist_sublist_t *mls = &ml->ml_sublists[i]; 249 /* See comment in multilist_insert(). */ 250 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock); 251 252 if (need_lock) 253 mutex_enter(&mls->mls_lock); 254 255 if (!list_is_empty(&mls->mls_list)) { 256 if (need_lock) 257 mutex_exit(&mls->mls_lock); 258 259 return (FALSE); 260 } 261 262 if (need_lock) 263 mutex_exit(&mls->mls_lock); 264 } 265 266 return (TRUE); 267 } 268 269 /* Return the number of sublists composing this multilist */ 270 unsigned int 271 multilist_get_num_sublists(multilist_t *ml) 272 { 273 return (ml->ml_num_sublists); 274 } 275 276 /* Return a randomly selected, valid sublist index for this multilist */ 277 unsigned int 278 multilist_get_random_index(multilist_t *ml) 279 { 280 return (spa_get_random(ml->ml_num_sublists)); 281 } 282 283 /* Lock and return the sublist specified at the given index */ 284 multilist_sublist_t * 285 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx) 286 { 287 multilist_sublist_t *mls; 288 289 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 290 mls = &ml->ml_sublists[sublist_idx]; 291 mutex_enter(&mls->mls_lock); 292 293 return (mls); 294 } 295 296 /* Lock and return the sublist that would be used to store the specified obj */ 297 multilist_sublist_t * 298 multilist_sublist_lock_obj(multilist_t *ml, void *obj) 299 { 300 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj))); 301 } 302 303 void 304 multilist_sublist_unlock(multilist_sublist_t *mls) 305 { 306 mutex_exit(&mls->mls_lock); 307 } 308 309 /* 310 * We're allowing any object to be inserted into this specific sublist, 311 * but this can lead to trouble if multilist_remove() is called to 312 * remove this object. Specifically, if calling ml_index_func on this 313 * object returns an index for sublist different than what is passed as 314 * a parameter here, any call to multilist_remove() with this newly 315 * inserted object is undefined! (the call to multilist_remove() will 316 * remove the object from a list that it isn't contained in) 317 */ 318 void 319 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj) 320 { 321 ASSERT(MUTEX_HELD(&mls->mls_lock)); 322 list_insert_head(&mls->mls_list, obj); 323 } 324 325 /* please see comment above multilist_sublist_insert_head */ 326 void 327 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj) 328 { 329 ASSERT(MUTEX_HELD(&mls->mls_lock)); 330 list_insert_tail(&mls->mls_list, obj); 331 } 332 333 /* 334 * Move the object one element forward in the list. 335 * 336 * This function will move the given object forward in the list (towards 337 * the head) by one object. So, in essence, it will swap its position in 338 * the list with its "prev" pointer. If the given object is already at the 339 * head of the list, it cannot be moved forward any more than it already 340 * is, so no action is taken. 341 * 342 * NOTE: This function **must not** remove any object from the list other 343 * than the object given as the parameter. This is relied upon in 344 * arc_evict_state_impl(). 345 */ 346 void 347 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj) 348 { 349 void *prev = list_prev(&mls->mls_list, obj); 350 351 ASSERT(MUTEX_HELD(&mls->mls_lock)); 352 ASSERT(!list_is_empty(&mls->mls_list)); 353 354 /* 'obj' must be at the head of the list, nothing to do */ 355 if (prev == NULL) 356 return; 357 358 list_remove(&mls->mls_list, obj); 359 list_insert_before(&mls->mls_list, prev, obj); 360 } 361 362 void 363 multilist_sublist_remove(multilist_sublist_t *mls, void *obj) 364 { 365 ASSERT(MUTEX_HELD(&mls->mls_lock)); 366 list_remove(&mls->mls_list, obj); 367 } 368 369 int 370 multilist_sublist_is_empty(multilist_sublist_t *mls) 371 { 372 ASSERT(MUTEX_HELD(&mls->mls_lock)); 373 return (list_is_empty(&mls->mls_list)); 374 } 375 376 int 377 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) 378 { 379 multilist_sublist_t *mls; 380 int empty; 381 382 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 383 mls = &ml->ml_sublists[sublist_idx]; 384 ASSERT(!MUTEX_HELD(&mls->mls_lock)); 385 mutex_enter(&mls->mls_lock); 386 empty = list_is_empty(&mls->mls_list); 387 mutex_exit(&mls->mls_lock); 388 return (empty); 389 } 390 391 void * 392 multilist_sublist_head(multilist_sublist_t *mls) 393 { 394 ASSERT(MUTEX_HELD(&mls->mls_lock)); 395 return (list_head(&mls->mls_list)); 396 } 397 398 void * 399 multilist_sublist_tail(multilist_sublist_t *mls) 400 { 401 ASSERT(MUTEX_HELD(&mls->mls_lock)); 402 return (list_tail(&mls->mls_list)); 403 } 404 405 void * 406 multilist_sublist_next(multilist_sublist_t *mls, void *obj) 407 { 408 ASSERT(MUTEX_HELD(&mls->mls_lock)); 409 return (list_next(&mls->mls_list, obj)); 410 } 411 412 void * 413 multilist_sublist_prev(multilist_sublist_t *mls, void *obj) 414 { 415 ASSERT(MUTEX_HELD(&mls->mls_lock)); 416 return (list_prev(&mls->mls_list, obj)); 417 } 418 419 void 420 multilist_link_init(multilist_node_t *link) 421 { 422 list_link_init(link); 423 } 424 425 int 426 multilist_link_active(multilist_node_t *link) 427 { 428 return (list_link_active(link)); 429 } 430 431 /* BEGIN CSTYLED */ 432 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, INT, ZMOD_RW, 433 "Number of sublists used in each multilist"); 434 /* END CSTYLED */ 435