xref: /freebsd/sys/contrib/openzfs/module/zfs/multilist.c (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1 /*
2  * CDDL HEADER START
3  *
4  * This file and its contents are supplied under the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may only use this file in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.illumos.org/license/CDDL.
12  *
13  * CDDL HEADER END
14  */
15 /*
16  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
17  */
18 
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 #include <sys/trace_zfs.h>
22 
23 /*
24  * This overrides the number of sublists in each multilist_t, which defaults
25  * to the number of CPUs in the system (see multilist_create()).
26  */
27 int zfs_multilist_num_sublists = 0;
28 
29 /*
30  * Given the object contained on the list, return a pointer to the
31  * object's multilist_node_t structure it contains.
32  */
33 #ifdef ZFS_DEBUG
34 static multilist_node_t *
35 multilist_d2l(multilist_t *ml, void *obj)
36 {
37 	return ((multilist_node_t *)((char *)obj + ml->ml_offset));
38 }
39 #endif
40 
41 /*
42  * Initialize a new mutlilist using the parameters specified.
43  *
44  *  - 'size' denotes the size of the structure containing the
45  *     multilist_node_t.
46  *  - 'offset' denotes the byte offset of the mutlilist_node_t within
47  *     the structure that contains it.
48  *  - 'num' specifies the number of internal sublists to create.
49  *  - 'index_func' is used to determine which sublist to insert into
50  *     when the multilist_insert() function is called; as well as which
51  *     sublist to remove from when multilist_remove() is called. The
52  *     requirements this function must meet, are the following:
53  *
54  *      - It must always return the same value when called on the same
55  *        object (to ensure the object is removed from the list it was
56  *        inserted into).
57  *
58  *      - It must return a value in the range [0, number of sublists).
59  *        The multilist_get_num_sublists() function may be used to
60  *        determine the number of sublists in the multilist.
61  *
62  *     Also, in order to reduce internal contention between the sublists
63  *     during insertion and removal, this function should choose evenly
64  *     between all available sublists when inserting. This isn't a hard
65  *     requirement, but a general rule of thumb in order to garner the
66  *     best multi-threaded performance out of the data structure.
67  */
68 static void
69 multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
70     unsigned int num, multilist_sublist_index_func_t *index_func)
71 {
72 	ASSERT3U(size, >, 0);
73 	ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
74 	ASSERT3U(num, >, 0);
75 	ASSERT3P(index_func, !=, NULL);
76 
77 	ml->ml_offset = offset;
78 	ml->ml_num_sublists = num;
79 	ml->ml_index_func = index_func;
80 
81 	ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
82 	    ml->ml_num_sublists, KM_SLEEP);
83 
84 	ASSERT3P(ml->ml_sublists, !=, NULL);
85 
86 	for (int i = 0; i < ml->ml_num_sublists; i++) {
87 		multilist_sublist_t *mls = &ml->ml_sublists[i];
88 		mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
89 		list_create(&mls->mls_list, size, offset);
90 	}
91 }
92 
93 /*
94  * Allocate a new multilist, using the default number of sublists (the number
95  * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
96  * that the multilists do not expand if more CPUs are hot-added. In that case,
97  * we will have less fanout than boot_ncpus, but we don't want to always
98  * reserve the RAM necessary to create the extra slots for additional CPUs up
99  * front, and dynamically adding them is a complex task.
100  */
101 void
102 multilist_create(multilist_t *ml, size_t size, size_t offset,
103     multilist_sublist_index_func_t *index_func)
104 {
105 	int num_sublists;
106 
107 	if (zfs_multilist_num_sublists > 0) {
108 		num_sublists = zfs_multilist_num_sublists;
109 	} else {
110 		num_sublists = MAX(boot_ncpus, 4);
111 	}
112 
113 	multilist_create_impl(ml, size, offset, num_sublists, index_func);
114 }
115 
116 /*
117  * Destroy the given multilist object, and free up any memory it holds.
118  */
119 void
120 multilist_destroy(multilist_t *ml)
121 {
122 	ASSERT(multilist_is_empty(ml));
123 
124 	for (int i = 0; i < ml->ml_num_sublists; i++) {
125 		multilist_sublist_t *mls = &ml->ml_sublists[i];
126 
127 		ASSERT(list_is_empty(&mls->mls_list));
128 
129 		list_destroy(&mls->mls_list);
130 		mutex_destroy(&mls->mls_lock);
131 	}
132 
133 	ASSERT3P(ml->ml_sublists, !=, NULL);
134 	kmem_free(ml->ml_sublists,
135 	    sizeof (multilist_sublist_t) * ml->ml_num_sublists);
136 
137 	ml->ml_num_sublists = 0;
138 	ml->ml_offset = 0;
139 	ml->ml_sublists = NULL;
140 }
141 
142 /*
143  * Insert the given object into the multilist.
144  *
145  * This function will insert the object specified into the sublist
146  * determined using the function given at multilist creation time.
147  *
148  * The sublist locks are automatically acquired if not already held, to
149  * ensure consistency when inserting and removing from multiple threads.
150  */
151 void
152 multilist_insert(multilist_t *ml, void *obj)
153 {
154 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
155 	multilist_sublist_t *mls;
156 	boolean_t need_lock;
157 
158 	DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
159 	    unsigned int, sublist_idx, void *, obj);
160 
161 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
162 
163 	mls = &ml->ml_sublists[sublist_idx];
164 
165 	/*
166 	 * Note: Callers may already hold the sublist lock by calling
167 	 * multilist_sublist_lock().  Here we rely on MUTEX_HELD()
168 	 * returning TRUE if and only if the current thread holds the
169 	 * lock.  While it's a little ugly to make the lock recursive in
170 	 * this way, it works and allows the calling code to be much
171 	 * simpler -- otherwise it would have to pass around a flag
172 	 * indicating that it already has the lock.
173 	 */
174 	need_lock = !MUTEX_HELD(&mls->mls_lock);
175 
176 	if (need_lock)
177 		mutex_enter(&mls->mls_lock);
178 
179 	ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
180 
181 	multilist_sublist_insert_head(mls, obj);
182 
183 	if (need_lock)
184 		mutex_exit(&mls->mls_lock);
185 }
186 
187 /*
188  * Remove the given object from the multilist.
189  *
190  * This function will remove the object specified from the sublist
191  * determined using the function given at multilist creation time.
192  *
193  * The necessary sublist locks are automatically acquired, to ensure
194  * consistency when inserting and removing from multiple threads.
195  */
196 void
197 multilist_remove(multilist_t *ml, void *obj)
198 {
199 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
200 	multilist_sublist_t *mls;
201 	boolean_t need_lock;
202 
203 	DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
204 	    unsigned int, sublist_idx, void *, obj);
205 
206 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
207 
208 	mls = &ml->ml_sublists[sublist_idx];
209 	/* See comment in multilist_insert(). */
210 	need_lock = !MUTEX_HELD(&mls->mls_lock);
211 
212 	if (need_lock)
213 		mutex_enter(&mls->mls_lock);
214 
215 	ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
216 
217 	multilist_sublist_remove(mls, obj);
218 
219 	if (need_lock)
220 		mutex_exit(&mls->mls_lock);
221 }
222 
223 /*
224  * Check to see if this multilist object is empty.
225  *
226  * This will return TRUE if it finds all of the sublists of this
227  * multilist to be empty, and FALSE otherwise. Each sublist lock will be
228  * automatically acquired as necessary.
229  *
230  * If concurrent insertions and removals are occurring, the semantics
231  * of this function become a little fuzzy. Instead of locking all
232  * sublists for the entire call time of the function, each sublist is
233  * only locked as it is individually checked for emptiness. Thus, it's
234  * possible for this function to return TRUE with non-empty sublists at
235  * the time the function returns. This would be due to another thread
236  * inserting into a given sublist, after that specific sublist was check
237  * and deemed empty, but before all sublists have been checked.
238  */
239 int
240 multilist_is_empty(multilist_t *ml)
241 {
242 	for (int i = 0; i < ml->ml_num_sublists; i++) {
243 		multilist_sublist_t *mls = &ml->ml_sublists[i];
244 		/* See comment in multilist_insert(). */
245 		boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
246 
247 		if (need_lock)
248 			mutex_enter(&mls->mls_lock);
249 
250 		if (!list_is_empty(&mls->mls_list)) {
251 			if (need_lock)
252 				mutex_exit(&mls->mls_lock);
253 
254 			return (FALSE);
255 		}
256 
257 		if (need_lock)
258 			mutex_exit(&mls->mls_lock);
259 	}
260 
261 	return (TRUE);
262 }
263 
264 /* Return the number of sublists composing this multilist */
265 unsigned int
266 multilist_get_num_sublists(multilist_t *ml)
267 {
268 	return (ml->ml_num_sublists);
269 }
270 
271 /* Return a randomly selected, valid sublist index for this multilist */
272 unsigned int
273 multilist_get_random_index(multilist_t *ml)
274 {
275 	return (random_in_range(ml->ml_num_sublists));
276 }
277 
278 /* Lock and return the sublist specified at the given index */
279 multilist_sublist_t *
280 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
281 {
282 	multilist_sublist_t *mls;
283 
284 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
285 	mls = &ml->ml_sublists[sublist_idx];
286 	mutex_enter(&mls->mls_lock);
287 
288 	return (mls);
289 }
290 
291 /* Lock and return the sublist that would be used to store the specified obj */
292 multilist_sublist_t *
293 multilist_sublist_lock_obj(multilist_t *ml, void *obj)
294 {
295 	return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
296 }
297 
298 void
299 multilist_sublist_unlock(multilist_sublist_t *mls)
300 {
301 	mutex_exit(&mls->mls_lock);
302 }
303 
304 /*
305  * We're allowing any object to be inserted into this specific sublist,
306  * but this can lead to trouble if multilist_remove() is called to
307  * remove this object. Specifically, if calling ml_index_func on this
308  * object returns an index for sublist different than what is passed as
309  * a parameter here, any call to multilist_remove() with this newly
310  * inserted object is undefined! (the call to multilist_remove() will
311  * remove the object from a list that it isn't contained in)
312  */
313 void
314 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
315 {
316 	ASSERT(MUTEX_HELD(&mls->mls_lock));
317 	list_insert_head(&mls->mls_list, obj);
318 }
319 
320 /* please see comment above multilist_sublist_insert_head */
321 void
322 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
323 {
324 	ASSERT(MUTEX_HELD(&mls->mls_lock));
325 	list_insert_tail(&mls->mls_list, obj);
326 }
327 
328 /*
329  * Move the object one element forward in the list.
330  *
331  * This function will move the given object forward in the list (towards
332  * the head) by one object. So, in essence, it will swap its position in
333  * the list with its "prev" pointer. If the given object is already at the
334  * head of the list, it cannot be moved forward any more than it already
335  * is, so no action is taken.
336  *
337  * NOTE: This function **must not** remove any object from the list other
338  *       than the object given as the parameter. This is relied upon in
339  *       arc_evict_state_impl().
340  */
341 void
342 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
343 {
344 	void *prev = list_prev(&mls->mls_list, obj);
345 
346 	ASSERT(MUTEX_HELD(&mls->mls_lock));
347 	ASSERT(!list_is_empty(&mls->mls_list));
348 
349 	/* 'obj' must be at the head of the list, nothing to do */
350 	if (prev == NULL)
351 		return;
352 
353 	list_remove(&mls->mls_list, obj);
354 	list_insert_before(&mls->mls_list, prev, obj);
355 }
356 
357 void
358 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
359 {
360 	ASSERT(MUTEX_HELD(&mls->mls_lock));
361 	list_remove(&mls->mls_list, obj);
362 }
363 
364 int
365 multilist_sublist_is_empty(multilist_sublist_t *mls)
366 {
367 	ASSERT(MUTEX_HELD(&mls->mls_lock));
368 	return (list_is_empty(&mls->mls_list));
369 }
370 
371 int
372 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx)
373 {
374 	multilist_sublist_t *mls;
375 	int empty;
376 
377 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
378 	mls = &ml->ml_sublists[sublist_idx];
379 	ASSERT(!MUTEX_HELD(&mls->mls_lock));
380 	mutex_enter(&mls->mls_lock);
381 	empty = list_is_empty(&mls->mls_list);
382 	mutex_exit(&mls->mls_lock);
383 	return (empty);
384 }
385 
386 void *
387 multilist_sublist_head(multilist_sublist_t *mls)
388 {
389 	ASSERT(MUTEX_HELD(&mls->mls_lock));
390 	return (list_head(&mls->mls_list));
391 }
392 
393 void *
394 multilist_sublist_tail(multilist_sublist_t *mls)
395 {
396 	ASSERT(MUTEX_HELD(&mls->mls_lock));
397 	return (list_tail(&mls->mls_list));
398 }
399 
400 void *
401 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
402 {
403 	ASSERT(MUTEX_HELD(&mls->mls_lock));
404 	return (list_next(&mls->mls_list, obj));
405 }
406 
407 void *
408 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
409 {
410 	ASSERT(MUTEX_HELD(&mls->mls_lock));
411 	return (list_prev(&mls->mls_list, obj));
412 }
413 
414 void
415 multilist_link_init(multilist_node_t *link)
416 {
417 	list_link_init(link);
418 }
419 
420 int
421 multilist_link_active(multilist_node_t *link)
422 {
423 	return (list_link_active(link));
424 }
425 
426 /* BEGIN CSTYLED */
427 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, INT, ZMOD_RW,
428 	"Number of sublists used in each multilist");
429 /* END CSTYLED */
430