1 /* 2 * CDDL HEADER START 3 * 4 * This file and its contents are supplied under the terms of the 5 * Common Development and Distribution License ("CDDL"), version 1.0. 6 * You may only use this file in accordance with the terms of version 7 * 1.0 of the CDDL. 8 * 9 * A full copy of the text of the CDDL should have accompanied this 10 * source. A copy of the CDDL is also available via the Internet at 11 * http://www.illumos.org/license/CDDL. 12 * 13 * CDDL HEADER END 14 */ 15 /* 16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 17 */ 18 19 #include <sys/zfs_context.h> 20 #include <sys/multilist.h> 21 22 /* needed for spa_get_random() */ 23 #include <sys/spa.h> 24 25 /* 26 * This overrides the number of sublists in each multilist_t, which defaults 27 * to the number of CPUs in the system (see multilist_create()). 28 */ 29 int zfs_multilist_num_sublists = 0; 30 31 /* 32 * Given the object contained on the list, return a pointer to the 33 * object's multilist_node_t structure it contains. 34 */ 35 static multilist_node_t * 36 multilist_d2l(multilist_t *ml, void *obj) 37 { 38 return ((multilist_node_t *)((char *)obj + ml->ml_offset)); 39 } 40 41 /* 42 * Initialize a new mutlilist using the parameters specified. 43 * 44 * - 'size' denotes the size of the structure containing the 45 * multilist_node_t. 46 * - 'offset' denotes the byte offset of the mutlilist_node_t within 47 * the structure that contains it. 48 * - 'num' specifies the number of internal sublists to create. 49 * - 'index_func' is used to determine which sublist to insert into 50 * when the multilist_insert() function is called; as well as which 51 * sublist to remove from when multilist_remove() is called. The 52 * requirements this function must meet, are the following: 53 * 54 * - It must always return the same value when called on the same 55 * object (to ensure the object is removed from the list it was 56 * inserted into). 57 * 58 * - It must return a value in the range [0, number of sublists). 59 * The multilist_get_num_sublists() function may be used to 60 * determine the number of sublists in the multilist. 61 * 62 * Also, in order to reduce internal contention between the sublists 63 * during insertion and removal, this function should choose evenly 64 * between all available sublists when inserting. This isn't a hard 65 * requirement, but a general rule of thumb in order to garner the 66 * best multi-threaded performance out of the data structure. 67 */ 68 static multilist_t * 69 multilist_create_impl(size_t size, size_t offset, 70 unsigned int num, multilist_sublist_index_func_t *index_func) 71 { 72 ASSERT3U(size, >, 0); 73 ASSERT3U(size, >=, offset + sizeof (multilist_node_t)); 74 ASSERT3U(num, >, 0); 75 ASSERT3P(index_func, !=, NULL); 76 77 multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP); 78 ml->ml_offset = offset; 79 ml->ml_num_sublists = num; 80 ml->ml_index_func = index_func; 81 82 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * 83 ml->ml_num_sublists, KM_SLEEP); 84 85 ASSERT3P(ml->ml_sublists, !=, NULL); 86 87 for (int i = 0; i < ml->ml_num_sublists; i++) { 88 multilist_sublist_t *mls = &ml->ml_sublists[i]; 89 mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL); 90 list_create(&mls->mls_list, size, offset); 91 } 92 return (ml); 93 } 94 95 /* 96 * Allocate a new multilist, using the default number of sublists 97 * (the number of CPUs, or at least 4, or the tunable 98 * zfs_multilist_num_sublists). 99 */ 100 multilist_t * 101 multilist_create(size_t size, size_t offset, 102 multilist_sublist_index_func_t *index_func) 103 { 104 int num_sublists; 105 106 if (zfs_multilist_num_sublists > 0) { 107 num_sublists = zfs_multilist_num_sublists; 108 } else { 109 num_sublists = MAX(boot_ncpus, 4); 110 } 111 112 return (multilist_create_impl(size, offset, num_sublists, index_func)); 113 } 114 115 /* 116 * Destroy the given multilist object, and free up any memory it holds. 117 */ 118 void 119 multilist_destroy(multilist_t *ml) 120 { 121 ASSERT(multilist_is_empty(ml)); 122 123 for (int i = 0; i < ml->ml_num_sublists; i++) { 124 multilist_sublist_t *mls = &ml->ml_sublists[i]; 125 126 ASSERT(list_is_empty(&mls->mls_list)); 127 128 list_destroy(&mls->mls_list); 129 mutex_destroy(&mls->mls_lock); 130 } 131 132 ASSERT3P(ml->ml_sublists, !=, NULL); 133 kmem_free(ml->ml_sublists, 134 sizeof (multilist_sublist_t) * ml->ml_num_sublists); 135 136 ml->ml_num_sublists = 0; 137 ml->ml_offset = 0; 138 kmem_free(ml, sizeof (multilist_t)); 139 } 140 141 /* 142 * Insert the given object into the multilist. 143 * 144 * This function will insert the object specified into the sublist 145 * determined using the function given at multilist creation time. 146 * 147 * The sublist locks are automatically acquired if not already held, to 148 * ensure consistency when inserting and removing from multiple threads. 149 */ 150 void 151 multilist_insert(multilist_t *ml, void *obj) 152 { 153 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 154 multilist_sublist_t *mls; 155 boolean_t need_lock; 156 157 DTRACE_PROBE3(multilist__insert, multilist_t *, ml, 158 unsigned int, sublist_idx, void *, obj); 159 160 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 161 162 mls = &ml->ml_sublists[sublist_idx]; 163 164 /* 165 * Note: Callers may already hold the sublist lock by calling 166 * multilist_sublist_lock(). Here we rely on MUTEX_HELD() 167 * returning TRUE if and only if the current thread holds the 168 * lock. While it's a little ugly to make the lock recursive in 169 * this way, it works and allows the calling code to be much 170 * simpler -- otherwise it would have to pass around a flag 171 * indicating that it already has the lock. 172 */ 173 need_lock = !MUTEX_HELD(&mls->mls_lock); 174 175 if (need_lock) 176 mutex_enter(&mls->mls_lock); 177 178 ASSERT(!multilist_link_active(multilist_d2l(ml, obj))); 179 180 multilist_sublist_insert_head(mls, obj); 181 182 if (need_lock) 183 mutex_exit(&mls->mls_lock); 184 } 185 186 /* 187 * Remove the given object from the multilist. 188 * 189 * This function will remove the object specified from the sublist 190 * determined using the function given at multilist creation time. 191 * 192 * The necessary sublist locks are automatically acquired, to ensure 193 * consistency when inserting and removing from multiple threads. 194 */ 195 void 196 multilist_remove(multilist_t *ml, void *obj) 197 { 198 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 199 multilist_sublist_t *mls; 200 boolean_t need_lock; 201 202 DTRACE_PROBE3(multilist__remove, multilist_t *, ml, 203 unsigned int, sublist_idx, void *, obj); 204 205 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 206 207 mls = &ml->ml_sublists[sublist_idx]; 208 /* See comment in multilist_insert(). */ 209 need_lock = !MUTEX_HELD(&mls->mls_lock); 210 211 if (need_lock) 212 mutex_enter(&mls->mls_lock); 213 214 ASSERT(multilist_link_active(multilist_d2l(ml, obj))); 215 216 multilist_sublist_remove(mls, obj); 217 218 if (need_lock) 219 mutex_exit(&mls->mls_lock); 220 } 221 222 /* 223 * Check to see if this multilist object is empty. 224 * 225 * This will return TRUE if it finds all of the sublists of this 226 * multilist to be empty, and FALSE otherwise. Each sublist lock will be 227 * automatically acquired as necessary. 228 * 229 * If concurrent insertions and removals are occurring, the semantics 230 * of this function become a little fuzzy. Instead of locking all 231 * sublists for the entire call time of the function, each sublist is 232 * only locked as it is individually checked for emptiness. Thus, it's 233 * possible for this function to return TRUE with non-empty sublists at 234 * the time the function returns. This would be due to another thread 235 * inserting into a given sublist, after that specific sublist was check 236 * and deemed empty, but before all sublists have been checked. 237 */ 238 int 239 multilist_is_empty(multilist_t *ml) 240 { 241 for (int i = 0; i < ml->ml_num_sublists; i++) { 242 multilist_sublist_t *mls = &ml->ml_sublists[i]; 243 /* See comment in multilist_insert(). */ 244 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock); 245 246 if (need_lock) 247 mutex_enter(&mls->mls_lock); 248 249 if (!list_is_empty(&mls->mls_list)) { 250 if (need_lock) 251 mutex_exit(&mls->mls_lock); 252 253 return (FALSE); 254 } 255 256 if (need_lock) 257 mutex_exit(&mls->mls_lock); 258 } 259 260 return (TRUE); 261 } 262 263 /* Return the number of sublists composing this multilist */ 264 unsigned int 265 multilist_get_num_sublists(multilist_t *ml) 266 { 267 return (ml->ml_num_sublists); 268 } 269 270 /* Return a randomly selected, valid sublist index for this multilist */ 271 unsigned int 272 multilist_get_random_index(multilist_t *ml) 273 { 274 return (spa_get_random(ml->ml_num_sublists)); 275 } 276 277 /* Lock and return the sublist specified at the given index */ 278 multilist_sublist_t * 279 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx) 280 { 281 multilist_sublist_t *mls; 282 283 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 284 mls = &ml->ml_sublists[sublist_idx]; 285 mutex_enter(&mls->mls_lock); 286 287 return (mls); 288 } 289 290 /* Lock and return the sublist that would be used to store the specified obj */ 291 multilist_sublist_t * 292 multilist_sublist_lock_obj(multilist_t *ml, void *obj) 293 { 294 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj))); 295 } 296 297 void 298 multilist_sublist_unlock(multilist_sublist_t *mls) 299 { 300 mutex_exit(&mls->mls_lock); 301 } 302 303 /* 304 * We're allowing any object to be inserted into this specific sublist, 305 * but this can lead to trouble if multilist_remove() is called to 306 * remove this object. Specifically, if calling ml_index_func on this 307 * object returns an index for sublist different than what is passed as 308 * a parameter here, any call to multilist_remove() with this newly 309 * inserted object is undefined! (the call to multilist_remove() will 310 * remove the object from a list that it isn't contained in) 311 */ 312 void 313 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj) 314 { 315 ASSERT(MUTEX_HELD(&mls->mls_lock)); 316 list_insert_head(&mls->mls_list, obj); 317 } 318 319 /* please see comment above multilist_sublist_insert_head */ 320 void 321 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj) 322 { 323 ASSERT(MUTEX_HELD(&mls->mls_lock)); 324 list_insert_tail(&mls->mls_list, obj); 325 } 326 327 /* 328 * Move the object one element forward in the list. 329 * 330 * This function will move the given object forward in the list (towards 331 * the head) by one object. So, in essence, it will swap its position in 332 * the list with its "prev" pointer. If the given object is already at the 333 * head of the list, it cannot be moved forward any more than it already 334 * is, so no action is taken. 335 * 336 * NOTE: This function **must not** remove any object from the list other 337 * than the object given as the parameter. This is relied upon in 338 * arc_evict_state_impl(). 339 */ 340 void 341 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj) 342 { 343 void *prev = list_prev(&mls->mls_list, obj); 344 345 ASSERT(MUTEX_HELD(&mls->mls_lock)); 346 ASSERT(!list_is_empty(&mls->mls_list)); 347 348 /* 'obj' must be at the head of the list, nothing to do */ 349 if (prev == NULL) 350 return; 351 352 list_remove(&mls->mls_list, obj); 353 list_insert_before(&mls->mls_list, prev, obj); 354 } 355 356 void 357 multilist_sublist_remove(multilist_sublist_t *mls, void *obj) 358 { 359 ASSERT(MUTEX_HELD(&mls->mls_lock)); 360 list_remove(&mls->mls_list, obj); 361 } 362 363 void * 364 multilist_sublist_head(multilist_sublist_t *mls) 365 { 366 ASSERT(MUTEX_HELD(&mls->mls_lock)); 367 return (list_head(&mls->mls_list)); 368 } 369 370 void * 371 multilist_sublist_tail(multilist_sublist_t *mls) 372 { 373 ASSERT(MUTEX_HELD(&mls->mls_lock)); 374 return (list_tail(&mls->mls_list)); 375 } 376 377 void * 378 multilist_sublist_next(multilist_sublist_t *mls, void *obj) 379 { 380 ASSERT(MUTEX_HELD(&mls->mls_lock)); 381 return (list_next(&mls->mls_list, obj)); 382 } 383 384 void * 385 multilist_sublist_prev(multilist_sublist_t *mls, void *obj) 386 { 387 ASSERT(MUTEX_HELD(&mls->mls_lock)); 388 return (list_prev(&mls->mls_list, obj)); 389 } 390 391 void 392 multilist_link_init(multilist_node_t *link) 393 { 394 list_link_init(link); 395 } 396 397 int 398 multilist_link_active(multilist_node_t *link) 399 { 400 return (list_link_active(link)); 401 } 402