1 /* 2 * CDDL HEADER START 3 * 4 * This file and its contents are supplied under the terms of the 5 * Common Development and Distribution License ("CDDL"), version 1.0. 6 * You may only use this file in accordance with the terms of version 7 * 1.0 of the CDDL. 8 * 9 * A full copy of the text of the CDDL should have accompanied this 10 * source. A copy of the CDDL is also available via the Internet at 11 * http://www.illumos.org/license/CDDL. 12 * 13 * CDDL HEADER END 14 */ 15 /* 16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 17 */ 18 19 #include <sys/zfs_context.h> 20 #include <sys/multilist.h> 21 #include <sys/trace_zfs.h> 22 23 /* 24 * This overrides the number of sublists in each multilist_t, which defaults 25 * to the number of CPUs in the system (see multilist_create()). 26 */ 27 int zfs_multilist_num_sublists = 0; 28 29 /* 30 * Given the object contained on the list, return a pointer to the 31 * object's multilist_node_t structure it contains. 32 */ 33 #ifdef ZFS_DEBUG 34 static multilist_node_t * 35 multilist_d2l(multilist_t *ml, void *obj) 36 { 37 return ((multilist_node_t *)((char *)obj + ml->ml_offset)); 38 } 39 #endif 40 41 /* 42 * Initialize a new mutlilist using the parameters specified. 43 * 44 * - 'size' denotes the size of the structure containing the 45 * multilist_node_t. 46 * - 'offset' denotes the byte offset of the mutlilist_node_t within 47 * the structure that contains it. 48 * - 'num' specifies the number of internal sublists to create. 49 * - 'index_func' is used to determine which sublist to insert into 50 * when the multilist_insert() function is called; as well as which 51 * sublist to remove from when multilist_remove() is called. The 52 * requirements this function must meet, are the following: 53 * 54 * - It must always return the same value when called on the same 55 * object (to ensure the object is removed from the list it was 56 * inserted into). 57 * 58 * - It must return a value in the range [0, number of sublists). 59 * The multilist_get_num_sublists() function may be used to 60 * determine the number of sublists in the multilist. 61 * 62 * Also, in order to reduce internal contention between the sublists 63 * during insertion and removal, this function should choose evenly 64 * between all available sublists when inserting. This isn't a hard 65 * requirement, but a general rule of thumb in order to garner the 66 * best multi-threaded performance out of the data structure. 67 */ 68 static void 69 multilist_create_impl(multilist_t *ml, size_t size, size_t offset, 70 unsigned int num, multilist_sublist_index_func_t *index_func) 71 { 72 ASSERT3U(size, >, 0); 73 ASSERT3U(size, >=, offset + sizeof (multilist_node_t)); 74 ASSERT3U(num, >, 0); 75 ASSERT3P(index_func, !=, NULL); 76 77 ml->ml_offset = offset; 78 ml->ml_num_sublists = num; 79 ml->ml_index_func = index_func; 80 81 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * 82 ml->ml_num_sublists, KM_SLEEP); 83 84 ASSERT3P(ml->ml_sublists, !=, NULL); 85 86 for (int i = 0; i < ml->ml_num_sublists; i++) { 87 multilist_sublist_t *mls = &ml->ml_sublists[i]; 88 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); 89 list_create(&mls->mls_list, size, offset); 90 } 91 } 92 93 /* 94 * Allocate a new multilist, using the default number of sublists (the number 95 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note 96 * that the multilists do not expand if more CPUs are hot-added. In that case, 97 * we will have less fanout than boot_ncpus, but we don't want to always 98 * reserve the RAM necessary to create the extra slots for additional CPUs up 99 * front, and dynamically adding them is a complex task. 100 */ 101 void 102 multilist_create(multilist_t *ml, size_t size, size_t offset, 103 multilist_sublist_index_func_t *index_func) 104 { 105 int num_sublists; 106 107 if (zfs_multilist_num_sublists > 0) { 108 num_sublists = zfs_multilist_num_sublists; 109 } else { 110 num_sublists = MAX(boot_ncpus, 4); 111 } 112 113 multilist_create_impl(ml, size, offset, num_sublists, index_func); 114 } 115 116 /* 117 * Destroy the given multilist object, and free up any memory it holds. 118 */ 119 void 120 multilist_destroy(multilist_t *ml) 121 { 122 ASSERT(multilist_is_empty(ml)); 123 124 for (int i = 0; i < ml->ml_num_sublists; i++) { 125 multilist_sublist_t *mls = &ml->ml_sublists[i]; 126 127 ASSERT(list_is_empty(&mls->mls_list)); 128 129 list_destroy(&mls->mls_list); 130 mutex_destroy(&mls->mls_lock); 131 } 132 133 ASSERT3P(ml->ml_sublists, !=, NULL); 134 kmem_free(ml->ml_sublists, 135 sizeof (multilist_sublist_t) * ml->ml_num_sublists); 136 137 ml->ml_num_sublists = 0; 138 ml->ml_offset = 0; 139 ml->ml_sublists = NULL; 140 } 141 142 /* 143 * Insert the given object into the multilist. 144 * 145 * This function will insert the object specified into the sublist 146 * determined using the function given at multilist creation time. 147 * 148 * The sublist locks are automatically acquired if not already held, to 149 * ensure consistency when inserting and removing from multiple threads. 150 */ 151 void 152 multilist_insert(multilist_t *ml, void *obj) 153 { 154 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 155 multilist_sublist_t *mls; 156 boolean_t need_lock; 157 158 DTRACE_PROBE3(multilist__insert, multilist_t *, ml, 159 unsigned int, sublist_idx, void *, obj); 160 161 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 162 163 mls = &ml->ml_sublists[sublist_idx]; 164 165 /* 166 * Note: Callers may already hold the sublist lock by calling 167 * multilist_sublist_lock(). Here we rely on MUTEX_HELD() 168 * returning TRUE if and only if the current thread holds the 169 * lock. While it's a little ugly to make the lock recursive in 170 * this way, it works and allows the calling code to be much 171 * simpler -- otherwise it would have to pass around a flag 172 * indicating that it already has the lock. 173 */ 174 need_lock = !MUTEX_HELD(&mls->mls_lock); 175 176 if (need_lock) 177 mutex_enter(&mls->mls_lock); 178 179 ASSERT(!multilist_link_active(multilist_d2l(ml, obj))); 180 181 multilist_sublist_insert_head(mls, obj); 182 183 if (need_lock) 184 mutex_exit(&mls->mls_lock); 185 } 186 187 /* 188 * Remove the given object from the multilist. 189 * 190 * This function will remove the object specified from the sublist 191 * determined using the function given at multilist creation time. 192 * 193 * The necessary sublist locks are automatically acquired, to ensure 194 * consistency when inserting and removing from multiple threads. 195 */ 196 void 197 multilist_remove(multilist_t *ml, void *obj) 198 { 199 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 200 multilist_sublist_t *mls; 201 boolean_t need_lock; 202 203 DTRACE_PROBE3(multilist__remove, multilist_t *, ml, 204 unsigned int, sublist_idx, void *, obj); 205 206 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 207 208 mls = &ml->ml_sublists[sublist_idx]; 209 /* See comment in multilist_insert(). */ 210 need_lock = !MUTEX_HELD(&mls->mls_lock); 211 212 if (need_lock) 213 mutex_enter(&mls->mls_lock); 214 215 ASSERT(multilist_link_active(multilist_d2l(ml, obj))); 216 217 multilist_sublist_remove(mls, obj); 218 219 if (need_lock) 220 mutex_exit(&mls->mls_lock); 221 } 222 223 /* 224 * Check to see if this multilist object is empty. 225 * 226 * This will return TRUE if it finds all of the sublists of this 227 * multilist to be empty, and FALSE otherwise. Each sublist lock will be 228 * automatically acquired as necessary. 229 * 230 * If concurrent insertions and removals are occurring, the semantics 231 * of this function become a little fuzzy. Instead of locking all 232 * sublists for the entire call time of the function, each sublist is 233 * only locked as it is individually checked for emptiness. Thus, it's 234 * possible for this function to return TRUE with non-empty sublists at 235 * the time the function returns. This would be due to another thread 236 * inserting into a given sublist, after that specific sublist was check 237 * and deemed empty, but before all sublists have been checked. 238 */ 239 int 240 multilist_is_empty(multilist_t *ml) 241 { 242 for (int i = 0; i < ml->ml_num_sublists; i++) { 243 multilist_sublist_t *mls = &ml->ml_sublists[i]; 244 /* See comment in multilist_insert(). */ 245 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock); 246 247 if (need_lock) 248 mutex_enter(&mls->mls_lock); 249 250 if (!list_is_empty(&mls->mls_list)) { 251 if (need_lock) 252 mutex_exit(&mls->mls_lock); 253 254 return (FALSE); 255 } 256 257 if (need_lock) 258 mutex_exit(&mls->mls_lock); 259 } 260 261 return (TRUE); 262 } 263 264 /* Return the number of sublists composing this multilist */ 265 unsigned int 266 multilist_get_num_sublists(multilist_t *ml) 267 { 268 return (ml->ml_num_sublists); 269 } 270 271 /* Return a randomly selected, valid sublist index for this multilist */ 272 unsigned int 273 multilist_get_random_index(multilist_t *ml) 274 { 275 return (random_in_range(ml->ml_num_sublists)); 276 } 277 278 /* Lock and return the sublist specified at the given index */ 279 multilist_sublist_t * 280 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx) 281 { 282 multilist_sublist_t *mls; 283 284 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 285 mls = &ml->ml_sublists[sublist_idx]; 286 mutex_enter(&mls->mls_lock); 287 288 return (mls); 289 } 290 291 /* Lock and return the sublist that would be used to store the specified obj */ 292 multilist_sublist_t * 293 multilist_sublist_lock_obj(multilist_t *ml, void *obj) 294 { 295 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj))); 296 } 297 298 void 299 multilist_sublist_unlock(multilist_sublist_t *mls) 300 { 301 mutex_exit(&mls->mls_lock); 302 } 303 304 /* 305 * We're allowing any object to be inserted into this specific sublist, 306 * but this can lead to trouble if multilist_remove() is called to 307 * remove this object. Specifically, if calling ml_index_func on this 308 * object returns an index for sublist different than what is passed as 309 * a parameter here, any call to multilist_remove() with this newly 310 * inserted object is undefined! (the call to multilist_remove() will 311 * remove the object from a list that it isn't contained in) 312 */ 313 void 314 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj) 315 { 316 ASSERT(MUTEX_HELD(&mls->mls_lock)); 317 list_insert_head(&mls->mls_list, obj); 318 } 319 320 /* please see comment above multilist_sublist_insert_head */ 321 void 322 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj) 323 { 324 ASSERT(MUTEX_HELD(&mls->mls_lock)); 325 list_insert_tail(&mls->mls_list, obj); 326 } 327 328 /* 329 * Move the object one element forward in the list. 330 * 331 * This function will move the given object forward in the list (towards 332 * the head) by one object. So, in essence, it will swap its position in 333 * the list with its "prev" pointer. If the given object is already at the 334 * head of the list, it cannot be moved forward any more than it already 335 * is, so no action is taken. 336 * 337 * NOTE: This function **must not** remove any object from the list other 338 * than the object given as the parameter. This is relied upon in 339 * arc_evict_state_impl(). 340 */ 341 void 342 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj) 343 { 344 void *prev = list_prev(&mls->mls_list, obj); 345 346 ASSERT(MUTEX_HELD(&mls->mls_lock)); 347 ASSERT(!list_is_empty(&mls->mls_list)); 348 349 /* 'obj' must be at the head of the list, nothing to do */ 350 if (prev == NULL) 351 return; 352 353 list_remove(&mls->mls_list, obj); 354 list_insert_before(&mls->mls_list, prev, obj); 355 } 356 357 void 358 multilist_sublist_remove(multilist_sublist_t *mls, void *obj) 359 { 360 ASSERT(MUTEX_HELD(&mls->mls_lock)); 361 list_remove(&mls->mls_list, obj); 362 } 363 364 int 365 multilist_sublist_is_empty(multilist_sublist_t *mls) 366 { 367 ASSERT(MUTEX_HELD(&mls->mls_lock)); 368 return (list_is_empty(&mls->mls_list)); 369 } 370 371 int 372 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) 373 { 374 multilist_sublist_t *mls; 375 int empty; 376 377 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 378 mls = &ml->ml_sublists[sublist_idx]; 379 ASSERT(!MUTEX_HELD(&mls->mls_lock)); 380 mutex_enter(&mls->mls_lock); 381 empty = list_is_empty(&mls->mls_list); 382 mutex_exit(&mls->mls_lock); 383 return (empty); 384 } 385 386 void * 387 multilist_sublist_head(multilist_sublist_t *mls) 388 { 389 ASSERT(MUTEX_HELD(&mls->mls_lock)); 390 return (list_head(&mls->mls_list)); 391 } 392 393 void * 394 multilist_sublist_tail(multilist_sublist_t *mls) 395 { 396 ASSERT(MUTEX_HELD(&mls->mls_lock)); 397 return (list_tail(&mls->mls_list)); 398 } 399 400 void * 401 multilist_sublist_next(multilist_sublist_t *mls, void *obj) 402 { 403 ASSERT(MUTEX_HELD(&mls->mls_lock)); 404 return (list_next(&mls->mls_list, obj)); 405 } 406 407 void * 408 multilist_sublist_prev(multilist_sublist_t *mls, void *obj) 409 { 410 ASSERT(MUTEX_HELD(&mls->mls_lock)); 411 return (list_prev(&mls->mls_list, obj)); 412 } 413 414 void 415 multilist_link_init(multilist_node_t *link) 416 { 417 list_link_init(link); 418 } 419 420 int 421 multilist_link_active(multilist_node_t *link) 422 { 423 return (list_link_active(link)); 424 } 425 426 /* BEGIN CSTYLED */ 427 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, INT, ZMOD_RW, 428 "Number of sublists used in each multilist"); 429 /* END CSTYLED */ 430