xref: /freebsd/sys/contrib/openzfs/module/zfs/multilist.c (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 /*
2  * CDDL HEADER START
3  *
4  * This file and its contents are supplied under the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may only use this file in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.illumos.org/license/CDDL.
12  *
13  * CDDL HEADER END
14  */
15 /*
16  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
17  */
18 
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 #include <sys/trace_zfs.h>
22 
23 /*
24  * This overrides the number of sublists in each multilist_t, which defaults
25  * to the number of CPUs in the system (see multilist_create()).
26  */
27 uint_t zfs_multilist_num_sublists = 0;
28 
29 /*
30  * Given the object contained on the list, return a pointer to the
31  * object's multilist_node_t structure it contains.
32  */
33 #ifdef ZFS_DEBUG
34 static multilist_node_t *
35 multilist_d2l(multilist_t *ml, void *obj)
36 {
37 	return ((multilist_node_t *)((char *)obj + ml->ml_offset));
38 }
39 #else
40 #define	multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL)
41 #endif
42 
43 /*
44  * Initialize a new mutlilist using the parameters specified.
45  *
46  *  - 'size' denotes the size of the structure containing the
47  *     multilist_node_t.
48  *  - 'offset' denotes the byte offset of the mutlilist_node_t within
49  *     the structure that contains it.
50  *  - 'num' specifies the number of internal sublists to create.
51  *  - 'index_func' is used to determine which sublist to insert into
52  *     when the multilist_insert() function is called; as well as which
53  *     sublist to remove from when multilist_remove() is called. The
54  *     requirements this function must meet, are the following:
55  *
56  *      - It must always return the same value when called on the same
57  *        object (to ensure the object is removed from the list it was
58  *        inserted into).
59  *
60  *      - It must return a value in the range [0, number of sublists).
61  *        The multilist_get_num_sublists() function may be used to
62  *        determine the number of sublists in the multilist.
63  *
64  *     Also, in order to reduce internal contention between the sublists
65  *     during insertion and removal, this function should choose evenly
66  *     between all available sublists when inserting. This isn't a hard
67  *     requirement, but a general rule of thumb in order to garner the
68  *     best multi-threaded performance out of the data structure.
69  */
70 static void
71 multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
72     uint_t num, multilist_sublist_index_func_t *index_func)
73 {
74 	ASSERT3U(size, >, 0);
75 	ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
76 	ASSERT3U(num, >, 0);
77 	ASSERT3P(index_func, !=, NULL);
78 
79 	ml->ml_offset = offset;
80 	ml->ml_num_sublists = num;
81 	ml->ml_index_func = index_func;
82 
83 	ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
84 	    ml->ml_num_sublists, KM_SLEEP);
85 
86 	ASSERT3P(ml->ml_sublists, !=, NULL);
87 
88 	for (int i = 0; i < ml->ml_num_sublists; i++) {
89 		multilist_sublist_t *mls = &ml->ml_sublists[i];
90 		mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
91 		list_create(&mls->mls_list, size, offset);
92 	}
93 }
94 
95 /*
96  * Allocate a new multilist, using the default number of sublists (the number
97  * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
98  * that the multilists do not expand if more CPUs are hot-added. In that case,
99  * we will have less fanout than boot_ncpus, but we don't want to always
100  * reserve the RAM necessary to create the extra slots for additional CPUs up
101  * front, and dynamically adding them is a complex task.
102  */
103 void
104 multilist_create(multilist_t *ml, size_t size, size_t offset,
105     multilist_sublist_index_func_t *index_func)
106 {
107 	uint_t num_sublists;
108 
109 	if (zfs_multilist_num_sublists > 0) {
110 		num_sublists = zfs_multilist_num_sublists;
111 	} else {
112 		num_sublists = MAX(boot_ncpus, 4);
113 	}
114 
115 	multilist_create_impl(ml, size, offset, num_sublists, index_func);
116 }
117 
118 /*
119  * Destroy the given multilist object, and free up any memory it holds.
120  */
121 void
122 multilist_destroy(multilist_t *ml)
123 {
124 	ASSERT(multilist_is_empty(ml));
125 
126 	for (int i = 0; i < ml->ml_num_sublists; i++) {
127 		multilist_sublist_t *mls = &ml->ml_sublists[i];
128 
129 		ASSERT(list_is_empty(&mls->mls_list));
130 
131 		list_destroy(&mls->mls_list);
132 		mutex_destroy(&mls->mls_lock);
133 	}
134 
135 	ASSERT3P(ml->ml_sublists, !=, NULL);
136 	kmem_free(ml->ml_sublists,
137 	    sizeof (multilist_sublist_t) * ml->ml_num_sublists);
138 
139 	ml->ml_num_sublists = 0;
140 	ml->ml_offset = 0;
141 	ml->ml_sublists = NULL;
142 }
143 
144 /*
145  * Insert the given object into the multilist.
146  *
147  * This function will insert the object specified into the sublist
148  * determined using the function given at multilist creation time.
149  *
150  * The sublist locks are automatically acquired if not already held, to
151  * ensure consistency when inserting and removing from multiple threads.
152  */
153 void
154 multilist_insert(multilist_t *ml, void *obj)
155 {
156 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
157 	multilist_sublist_t *mls;
158 	boolean_t need_lock;
159 
160 	DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
161 	    unsigned int, sublist_idx, void *, obj);
162 
163 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
164 
165 	mls = &ml->ml_sublists[sublist_idx];
166 
167 	/*
168 	 * Note: Callers may already hold the sublist lock by calling
169 	 * multilist_sublist_lock().  Here we rely on MUTEX_HELD()
170 	 * returning TRUE if and only if the current thread holds the
171 	 * lock.  While it's a little ugly to make the lock recursive in
172 	 * this way, it works and allows the calling code to be much
173 	 * simpler -- otherwise it would have to pass around a flag
174 	 * indicating that it already has the lock.
175 	 */
176 	need_lock = !MUTEX_HELD(&mls->mls_lock);
177 
178 	if (need_lock)
179 		mutex_enter(&mls->mls_lock);
180 
181 	ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
182 
183 	multilist_sublist_insert_head(mls, obj);
184 
185 	if (need_lock)
186 		mutex_exit(&mls->mls_lock);
187 }
188 
189 /*
190  * Remove the given object from the multilist.
191  *
192  * This function will remove the object specified from the sublist
193  * determined using the function given at multilist creation time.
194  *
195  * The necessary sublist locks are automatically acquired, to ensure
196  * consistency when inserting and removing from multiple threads.
197  */
198 void
199 multilist_remove(multilist_t *ml, void *obj)
200 {
201 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
202 	multilist_sublist_t *mls;
203 	boolean_t need_lock;
204 
205 	DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
206 	    unsigned int, sublist_idx, void *, obj);
207 
208 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
209 
210 	mls = &ml->ml_sublists[sublist_idx];
211 	/* See comment in multilist_insert(). */
212 	need_lock = !MUTEX_HELD(&mls->mls_lock);
213 
214 	if (need_lock)
215 		mutex_enter(&mls->mls_lock);
216 
217 	ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
218 
219 	multilist_sublist_remove(mls, obj);
220 
221 	if (need_lock)
222 		mutex_exit(&mls->mls_lock);
223 }
224 
225 /*
226  * Check to see if this multilist object is empty.
227  *
228  * This will return TRUE if it finds all of the sublists of this
229  * multilist to be empty, and FALSE otherwise. Each sublist lock will be
230  * automatically acquired as necessary.
231  *
232  * If concurrent insertions and removals are occurring, the semantics
233  * of this function become a little fuzzy. Instead of locking all
234  * sublists for the entire call time of the function, each sublist is
235  * only locked as it is individually checked for emptiness. Thus, it's
236  * possible for this function to return TRUE with non-empty sublists at
237  * the time the function returns. This would be due to another thread
238  * inserting into a given sublist, after that specific sublist was check
239  * and deemed empty, but before all sublists have been checked.
240  */
241 int
242 multilist_is_empty(multilist_t *ml)
243 {
244 	for (int i = 0; i < ml->ml_num_sublists; i++) {
245 		multilist_sublist_t *mls = &ml->ml_sublists[i];
246 		/* See comment in multilist_insert(). */
247 		boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
248 
249 		if (need_lock)
250 			mutex_enter(&mls->mls_lock);
251 
252 		if (!list_is_empty(&mls->mls_list)) {
253 			if (need_lock)
254 				mutex_exit(&mls->mls_lock);
255 
256 			return (FALSE);
257 		}
258 
259 		if (need_lock)
260 			mutex_exit(&mls->mls_lock);
261 	}
262 
263 	return (TRUE);
264 }
265 
266 /* Return the number of sublists composing this multilist */
267 unsigned int
268 multilist_get_num_sublists(multilist_t *ml)
269 {
270 	return (ml->ml_num_sublists);
271 }
272 
273 /* Return a randomly selected, valid sublist index for this multilist */
274 unsigned int
275 multilist_get_random_index(multilist_t *ml)
276 {
277 	return (random_in_range(ml->ml_num_sublists));
278 }
279 
280 void
281 multilist_sublist_lock(multilist_sublist_t *mls)
282 {
283 	mutex_enter(&mls->mls_lock);
284 }
285 
286 /* Lock and return the sublist specified at the given index */
287 multilist_sublist_t *
288 multilist_sublist_lock_idx(multilist_t *ml, unsigned int sublist_idx)
289 {
290 	multilist_sublist_t *mls;
291 
292 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
293 	mls = &ml->ml_sublists[sublist_idx];
294 	mutex_enter(&mls->mls_lock);
295 
296 	return (mls);
297 }
298 
299 /* Lock and return the sublist that would be used to store the specified obj */
300 multilist_sublist_t *
301 multilist_sublist_lock_obj(multilist_t *ml, void *obj)
302 {
303 	return (multilist_sublist_lock_idx(ml, ml->ml_index_func(ml, obj)));
304 }
305 
306 void
307 multilist_sublist_unlock(multilist_sublist_t *mls)
308 {
309 	mutex_exit(&mls->mls_lock);
310 }
311 
312 /*
313  * We're allowing any object to be inserted into this specific sublist,
314  * but this can lead to trouble if multilist_remove() is called to
315  * remove this object. Specifically, if calling ml_index_func on this
316  * object returns an index for sublist different than what is passed as
317  * a parameter here, any call to multilist_remove() with this newly
318  * inserted object is undefined! (the call to multilist_remove() will
319  * remove the object from a list that it isn't contained in)
320  */
321 void
322 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
323 {
324 	ASSERT(MUTEX_HELD(&mls->mls_lock));
325 	list_insert_head(&mls->mls_list, obj);
326 }
327 
328 /* please see comment above multilist_sublist_insert_head */
329 void
330 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
331 {
332 	ASSERT(MUTEX_HELD(&mls->mls_lock));
333 	list_insert_tail(&mls->mls_list, obj);
334 }
335 
336 /* please see comment above multilist_sublist_insert_head */
337 void
338 multilist_sublist_insert_after(multilist_sublist_t *mls, void *prev, void *obj)
339 {
340 	ASSERT(MUTEX_HELD(&mls->mls_lock));
341 	list_insert_after(&mls->mls_list, prev, obj);
342 }
343 
344 /* please see comment above multilist_sublist_insert_head */
345 void
346 multilist_sublist_insert_before(multilist_sublist_t *mls, void *next, void *obj)
347 {
348 	ASSERT(MUTEX_HELD(&mls->mls_lock));
349 	list_insert_before(&mls->mls_list, next, obj);
350 }
351 
352 /*
353  * Move the object one element forward in the list.
354  *
355  * This function will move the given object forward in the list (towards
356  * the head) by one object. So, in essence, it will swap its position in
357  * the list with its "prev" pointer. If the given object is already at the
358  * head of the list, it cannot be moved forward any more than it already
359  * is, so no action is taken.
360  *
361  * NOTE: This function **must not** remove any object from the list other
362  *       than the object given as the parameter. This is relied upon in
363  *       arc_evict_state_impl().
364  */
365 void
366 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
367 {
368 	void *prev = list_prev(&mls->mls_list, obj);
369 
370 	ASSERT(MUTEX_HELD(&mls->mls_lock));
371 	ASSERT(!list_is_empty(&mls->mls_list));
372 
373 	/* 'obj' must be at the head of the list, nothing to do */
374 	if (prev == NULL)
375 		return;
376 
377 	list_remove(&mls->mls_list, obj);
378 	list_insert_before(&mls->mls_list, prev, obj);
379 }
380 
381 void
382 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
383 {
384 	ASSERT(MUTEX_HELD(&mls->mls_lock));
385 	list_remove(&mls->mls_list, obj);
386 }
387 
388 int
389 multilist_sublist_is_empty(multilist_sublist_t *mls)
390 {
391 	ASSERT(MUTEX_HELD(&mls->mls_lock));
392 	return (list_is_empty(&mls->mls_list));
393 }
394 
395 int
396 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx)
397 {
398 	multilist_sublist_t *mls;
399 	int empty;
400 
401 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
402 	mls = &ml->ml_sublists[sublist_idx];
403 	ASSERT(!MUTEX_HELD(&mls->mls_lock));
404 	mutex_enter(&mls->mls_lock);
405 	empty = list_is_empty(&mls->mls_list);
406 	mutex_exit(&mls->mls_lock);
407 	return (empty);
408 }
409 
410 void *
411 multilist_sublist_head(multilist_sublist_t *mls)
412 {
413 	ASSERT(MUTEX_HELD(&mls->mls_lock));
414 	return (list_head(&mls->mls_list));
415 }
416 
417 void *
418 multilist_sublist_tail(multilist_sublist_t *mls)
419 {
420 	ASSERT(MUTEX_HELD(&mls->mls_lock));
421 	return (list_tail(&mls->mls_list));
422 }
423 
424 void *
425 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
426 {
427 	ASSERT(MUTEX_HELD(&mls->mls_lock));
428 	return (list_next(&mls->mls_list, obj));
429 }
430 
431 void *
432 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
433 {
434 	ASSERT(MUTEX_HELD(&mls->mls_lock));
435 	return (list_prev(&mls->mls_list, obj));
436 }
437 
438 void
439 multilist_link_init(multilist_node_t *link)
440 {
441 	list_link_init(link);
442 }
443 
444 int
445 multilist_link_active(multilist_node_t *link)
446 {
447 	return (list_link_active(link));
448 }
449 
450 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, UINT, ZMOD_RW,
451 	"Number of sublists used in each multilist");
452