1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * This file and its contents are supplied under the terms of the
6 * Common Development and Distribution License ("CDDL"), version 1.0.
7 * You may only use this file in accordance with the terms of version
8 * 1.0 of the CDDL.
9 *
10 * A full copy of the text of the CDDL should have accompanied this
11 * source. A copy of the CDDL is also available via the Internet at
12 * http://www.illumos.org/license/CDDL.
13 *
14 * CDDL HEADER END
15 */
16 /*
17 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
18 */
19
20 #include <sys/zfs_context.h>
21 #include <sys/multilist.h>
22 #include <sys/trace_zfs.h>
23
24 /*
25 * This overrides the number of sublists in each multilist_t, which defaults
26 * to the number of CPUs in the system (see multilist_create()).
27 */
28 uint_t zfs_multilist_num_sublists = 0;
29
30 /*
31 * Given the object contained on the list, return a pointer to the
32 * object's multilist_node_t structure it contains.
33 */
34 #ifdef ZFS_DEBUG
35 static multilist_node_t *
multilist_d2l(multilist_t * ml,void * obj)36 multilist_d2l(multilist_t *ml, void *obj)
37 {
38 return ((multilist_node_t *)((char *)obj + ml->ml_offset));
39 }
40 #else
41 #define multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL)
42 #endif
43
44 /*
45 * Initialize a new mutlilist using the parameters specified.
46 *
47 * - 'size' denotes the size of the structure containing the
48 * multilist_node_t.
49 * - 'offset' denotes the byte offset of the mutlilist_node_t within
50 * the structure that contains it.
51 * - 'num' specifies the number of internal sublists to create.
52 * - 'index_func' is used to determine which sublist to insert into
53 * when the multilist_insert() function is called; as well as which
54 * sublist to remove from when multilist_remove() is called. The
55 * requirements this function must meet, are the following:
56 *
57 * - It must always return the same value when called on the same
58 * object (to ensure the object is removed from the list it was
59 * inserted into).
60 *
61 * - It must return a value in the range [0, number of sublists).
62 * The multilist_get_num_sublists() function may be used to
63 * determine the number of sublists in the multilist.
64 *
65 * Also, in order to reduce internal contention between the sublists
66 * during insertion and removal, this function should choose evenly
67 * between all available sublists when inserting. This isn't a hard
68 * requirement, but a general rule of thumb in order to garner the
69 * best multi-threaded performance out of the data structure.
70 */
71 static void
multilist_create_impl(multilist_t * ml,size_t size,size_t offset,uint_t num,multilist_sublist_index_func_t * index_func)72 multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
73 uint_t num, multilist_sublist_index_func_t *index_func)
74 {
75 ASSERT3U(size, >, 0);
76 ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
77 ASSERT3U(num, >, 0);
78 ASSERT3P(index_func, !=, NULL);
79
80 ml->ml_offset = offset;
81 ml->ml_num_sublists = num;
82 ml->ml_index_func = index_func;
83
84 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
85 ml->ml_num_sublists, KM_SLEEP);
86
87 ASSERT3P(ml->ml_sublists, !=, NULL);
88
89 for (int i = 0; i < ml->ml_num_sublists; i++) {
90 multilist_sublist_t *mls = &ml->ml_sublists[i];
91 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
92 list_create(&mls->mls_list, size, offset);
93 }
94 }
95
96 /*
97 * Allocate a new multilist, using the default number of sublists (the number
98 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
99 * that the multilists do not expand if more CPUs are hot-added. In that case,
100 * we will have less fanout than boot_ncpus, but we don't want to always
101 * reserve the RAM necessary to create the extra slots for additional CPUs up
102 * front, and dynamically adding them is a complex task.
103 */
104 void
multilist_create(multilist_t * ml,size_t size,size_t offset,multilist_sublist_index_func_t * index_func)105 multilist_create(multilist_t *ml, size_t size, size_t offset,
106 multilist_sublist_index_func_t *index_func)
107 {
108 uint_t num_sublists;
109
110 if (zfs_multilist_num_sublists > 0) {
111 num_sublists = zfs_multilist_num_sublists;
112 } else {
113 num_sublists = MAX(boot_ncpus, 4);
114 }
115
116 multilist_create_impl(ml, size, offset, num_sublists, index_func);
117 }
118
119 /*
120 * Destroy the given multilist object, and free up any memory it holds.
121 */
122 void
multilist_destroy(multilist_t * ml)123 multilist_destroy(multilist_t *ml)
124 {
125 ASSERT(multilist_is_empty(ml));
126
127 for (int i = 0; i < ml->ml_num_sublists; i++) {
128 multilist_sublist_t *mls = &ml->ml_sublists[i];
129
130 ASSERT(list_is_empty(&mls->mls_list));
131
132 list_destroy(&mls->mls_list);
133 mutex_destroy(&mls->mls_lock);
134 }
135
136 ASSERT3P(ml->ml_sublists, !=, NULL);
137 kmem_free(ml->ml_sublists,
138 sizeof (multilist_sublist_t) * ml->ml_num_sublists);
139
140 ml->ml_num_sublists = 0;
141 ml->ml_offset = 0;
142 ml->ml_sublists = NULL;
143 }
144
145 /*
146 * Insert the given object into the multilist.
147 *
148 * This function will insert the object specified into the sublist
149 * determined using the function given at multilist creation time.
150 *
151 * The sublist locks are automatically acquired if not already held, to
152 * ensure consistency when inserting and removing from multiple threads.
153 */
154 void
multilist_insert(multilist_t * ml,void * obj)155 multilist_insert(multilist_t *ml, void *obj)
156 {
157 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
158 multilist_sublist_t *mls;
159 boolean_t need_lock;
160
161 DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
162 unsigned int, sublist_idx, void *, obj);
163
164 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
165
166 mls = &ml->ml_sublists[sublist_idx];
167
168 /*
169 * Note: Callers may already hold the sublist lock by calling
170 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
171 * returning TRUE if and only if the current thread holds the
172 * lock. While it's a little ugly to make the lock recursive in
173 * this way, it works and allows the calling code to be much
174 * simpler -- otherwise it would have to pass around a flag
175 * indicating that it already has the lock.
176 */
177 need_lock = !MUTEX_HELD(&mls->mls_lock);
178
179 if (need_lock)
180 mutex_enter(&mls->mls_lock);
181
182 ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
183
184 multilist_sublist_insert_head(mls, obj);
185
186 if (need_lock)
187 mutex_exit(&mls->mls_lock);
188 }
189
190 /*
191 * Remove the given object from the multilist.
192 *
193 * This function will remove the object specified from the sublist
194 * determined using the function given at multilist creation time.
195 *
196 * The necessary sublist locks are automatically acquired, to ensure
197 * consistency when inserting and removing from multiple threads.
198 */
199 void
multilist_remove(multilist_t * ml,void * obj)200 multilist_remove(multilist_t *ml, void *obj)
201 {
202 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
203 multilist_sublist_t *mls;
204 boolean_t need_lock;
205
206 DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
207 unsigned int, sublist_idx, void *, obj);
208
209 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
210
211 mls = &ml->ml_sublists[sublist_idx];
212 /* See comment in multilist_insert(). */
213 need_lock = !MUTEX_HELD(&mls->mls_lock);
214
215 if (need_lock)
216 mutex_enter(&mls->mls_lock);
217
218 ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
219
220 multilist_sublist_remove(mls, obj);
221
222 if (need_lock)
223 mutex_exit(&mls->mls_lock);
224 }
225
226 /*
227 * Check to see if this multilist object is empty.
228 *
229 * This will return TRUE if it finds all of the sublists of this
230 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
231 * automatically acquired as necessary.
232 *
233 * If concurrent insertions and removals are occurring, the semantics
234 * of this function become a little fuzzy. Instead of locking all
235 * sublists for the entire call time of the function, each sublist is
236 * only locked as it is individually checked for emptiness. Thus, it's
237 * possible for this function to return TRUE with non-empty sublists at
238 * the time the function returns. This would be due to another thread
239 * inserting into a given sublist, after that specific sublist was check
240 * and deemed empty, but before all sublists have been checked.
241 */
242 int
multilist_is_empty(multilist_t * ml)243 multilist_is_empty(multilist_t *ml)
244 {
245 for (int i = 0; i < ml->ml_num_sublists; i++) {
246 multilist_sublist_t *mls = &ml->ml_sublists[i];
247 /* See comment in multilist_insert(). */
248 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
249
250 if (need_lock)
251 mutex_enter(&mls->mls_lock);
252
253 if (!list_is_empty(&mls->mls_list)) {
254 if (need_lock)
255 mutex_exit(&mls->mls_lock);
256
257 return (FALSE);
258 }
259
260 if (need_lock)
261 mutex_exit(&mls->mls_lock);
262 }
263
264 return (TRUE);
265 }
266
267 /* Return the number of sublists composing this multilist */
268 unsigned int
multilist_get_num_sublists(multilist_t * ml)269 multilist_get_num_sublists(multilist_t *ml)
270 {
271 return (ml->ml_num_sublists);
272 }
273
274 /* Return a randomly selected, valid sublist index for this multilist */
275 unsigned int
multilist_get_random_index(multilist_t * ml)276 multilist_get_random_index(multilist_t *ml)
277 {
278 return (random_in_range(ml->ml_num_sublists));
279 }
280
281 void
multilist_sublist_lock(multilist_sublist_t * mls)282 multilist_sublist_lock(multilist_sublist_t *mls)
283 {
284 mutex_enter(&mls->mls_lock);
285 }
286
287 /* Lock and return the sublist specified at the given index */
288 multilist_sublist_t *
multilist_sublist_lock_idx(multilist_t * ml,unsigned int sublist_idx)289 multilist_sublist_lock_idx(multilist_t *ml, unsigned int sublist_idx)
290 {
291 multilist_sublist_t *mls;
292
293 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
294 mls = &ml->ml_sublists[sublist_idx];
295 mutex_enter(&mls->mls_lock);
296
297 return (mls);
298 }
299
300 /* Lock and return the sublist that would be used to store the specified obj */
301 multilist_sublist_t *
multilist_sublist_lock_obj(multilist_t * ml,void * obj)302 multilist_sublist_lock_obj(multilist_t *ml, void *obj)
303 {
304 return (multilist_sublist_lock_idx(ml, ml->ml_index_func(ml, obj)));
305 }
306
307 void
multilist_sublist_unlock(multilist_sublist_t * mls)308 multilist_sublist_unlock(multilist_sublist_t *mls)
309 {
310 mutex_exit(&mls->mls_lock);
311 }
312
313 /*
314 * We're allowing any object to be inserted into this specific sublist,
315 * but this can lead to trouble if multilist_remove() is called to
316 * remove this object. Specifically, if calling ml_index_func on this
317 * object returns an index for sublist different than what is passed as
318 * a parameter here, any call to multilist_remove() with this newly
319 * inserted object is undefined! (the call to multilist_remove() will
320 * remove the object from a list that it isn't contained in)
321 */
322 void
multilist_sublist_insert_head(multilist_sublist_t * mls,void * obj)323 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
324 {
325 ASSERT(MUTEX_HELD(&mls->mls_lock));
326 list_insert_head(&mls->mls_list, obj);
327 }
328
329 /* please see comment above multilist_sublist_insert_head */
330 void
multilist_sublist_insert_tail(multilist_sublist_t * mls,void * obj)331 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
332 {
333 ASSERT(MUTEX_HELD(&mls->mls_lock));
334 list_insert_tail(&mls->mls_list, obj);
335 }
336
337 /* please see comment above multilist_sublist_insert_head */
338 void
multilist_sublist_insert_after(multilist_sublist_t * mls,void * prev,void * obj)339 multilist_sublist_insert_after(multilist_sublist_t *mls, void *prev, void *obj)
340 {
341 ASSERT(MUTEX_HELD(&mls->mls_lock));
342 list_insert_after(&mls->mls_list, prev, obj);
343 }
344
345 /* please see comment above multilist_sublist_insert_head */
346 void
multilist_sublist_insert_before(multilist_sublist_t * mls,void * next,void * obj)347 multilist_sublist_insert_before(multilist_sublist_t *mls, void *next, void *obj)
348 {
349 ASSERT(MUTEX_HELD(&mls->mls_lock));
350 list_insert_before(&mls->mls_list, next, obj);
351 }
352
353 /*
354 * Move the object one element forward in the list.
355 *
356 * This function will move the given object forward in the list (towards
357 * the head) by one object. So, in essence, it will swap its position in
358 * the list with its "prev" pointer. If the given object is already at the
359 * head of the list, it cannot be moved forward any more than it already
360 * is, so no action is taken.
361 *
362 * NOTE: This function **must not** remove any object from the list other
363 * than the object given as the parameter. This is relied upon in
364 * arc_evict_state_impl().
365 */
366 void
multilist_sublist_move_forward(multilist_sublist_t * mls,void * obj)367 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
368 {
369 void *prev = list_prev(&mls->mls_list, obj);
370
371 ASSERT(MUTEX_HELD(&mls->mls_lock));
372 ASSERT(!list_is_empty(&mls->mls_list));
373
374 /* 'obj' must be at the head of the list, nothing to do */
375 if (prev == NULL)
376 return;
377
378 list_remove(&mls->mls_list, obj);
379 list_insert_before(&mls->mls_list, prev, obj);
380 }
381
382 void
multilist_sublist_remove(multilist_sublist_t * mls,void * obj)383 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
384 {
385 ASSERT(MUTEX_HELD(&mls->mls_lock));
386 list_remove(&mls->mls_list, obj);
387 }
388
389 int
multilist_sublist_is_empty(multilist_sublist_t * mls)390 multilist_sublist_is_empty(multilist_sublist_t *mls)
391 {
392 ASSERT(MUTEX_HELD(&mls->mls_lock));
393 return (list_is_empty(&mls->mls_list));
394 }
395
396 int
multilist_sublist_is_empty_idx(multilist_t * ml,unsigned int sublist_idx)397 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx)
398 {
399 multilist_sublist_t *mls;
400 int empty;
401
402 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
403 mls = &ml->ml_sublists[sublist_idx];
404 ASSERT(!MUTEX_HELD(&mls->mls_lock));
405 mutex_enter(&mls->mls_lock);
406 empty = list_is_empty(&mls->mls_list);
407 mutex_exit(&mls->mls_lock);
408 return (empty);
409 }
410
411 void *
multilist_sublist_head(multilist_sublist_t * mls)412 multilist_sublist_head(multilist_sublist_t *mls)
413 {
414 ASSERT(MUTEX_HELD(&mls->mls_lock));
415 return (list_head(&mls->mls_list));
416 }
417
418 void *
multilist_sublist_tail(multilist_sublist_t * mls)419 multilist_sublist_tail(multilist_sublist_t *mls)
420 {
421 ASSERT(MUTEX_HELD(&mls->mls_lock));
422 return (list_tail(&mls->mls_list));
423 }
424
425 void *
multilist_sublist_next(multilist_sublist_t * mls,void * obj)426 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
427 {
428 ASSERT(MUTEX_HELD(&mls->mls_lock));
429 return (list_next(&mls->mls_list, obj));
430 }
431
432 void *
multilist_sublist_prev(multilist_sublist_t * mls,void * obj)433 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
434 {
435 ASSERT(MUTEX_HELD(&mls->mls_lock));
436 return (list_prev(&mls->mls_list, obj));
437 }
438
439 void
multilist_link_init(multilist_node_t * link)440 multilist_link_init(multilist_node_t *link)
441 {
442 list_link_init(link);
443 }
444
445 int
multilist_link_active(multilist_node_t * link)446 multilist_link_active(multilist_node_t *link)
447 {
448 return (list_link_active(link));
449 }
450
451 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, UINT, ZMOD_RW,
452 "Number of sublists used in each multilist");
453