xref: /titanic_41/usr/src/uts/common/fs/zfs/multilist.c (revision b959b0b3b1c09d9e744c4cbdac35429511cbcf86)
1 /*
2  * CDDL HEADER START
3  *
4  * This file and its contents are supplied under the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may only use this file in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.illumos.org/license/CDDL.
12  *
13  * CDDL HEADER END
14  */
15 /*
16  * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
17  */
18 
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 
22 /* needed for spa_get_random() */
23 #include <sys/spa.h>
24 
25 /*
26  * Given the object contained on the list, return a pointer to the
27  * object's multilist_node_t structure it contains.
28  */
29 static multilist_node_t *
multilist_d2l(multilist_t * ml,void * obj)30 multilist_d2l(multilist_t *ml, void *obj)
31 {
32 	return ((multilist_node_t *)((char *)obj + ml->ml_offset));
33 }
34 
35 /*
36  * Initialize a new mutlilist using the parameters specified.
37  *
38  *  - 'size' denotes the size of the structure containing the
39  *     multilist_node_t.
40  *  - 'offset' denotes the byte offset of the mutlilist_node_t within
41  *     the structure that contains it.
42  *  - 'num' specifies the number of internal sublists to create.
43  *  - 'index_func' is used to determine which sublist to insert into
44  *     when the multilist_insert() function is called; as well as which
45  *     sublist to remove from when multilist_remove() is called. The
46  *     requirements this function must meet, are the following:
47  *
48  *      - It must always return the same value when called on the same
49  *        object (to ensure the object is removed from the list it was
50  *        inserted into).
51  *
52  *      - It must return a value in the range [0, number of sublists).
53  *        The multilist_get_num_sublists() function may be used to
54  *        determine the number of sublists in the multilist.
55  *
56  *     Also, in order to reduce internal contention between the sublists
57  *     during insertion and removal, this function should choose evenly
58  *     between all available sublists when inserting. This isn't a hard
59  *     requirement, but a general rule of thumb in order to garner the
60  *     best multi-threaded performance out of the data structure.
61  */
62 void
multilist_create(multilist_t * ml,size_t size,size_t offset,unsigned int num,multilist_sublist_index_func_t * index_func)63 multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
64     multilist_sublist_index_func_t *index_func)
65 {
66 	ASSERT3P(ml, !=, NULL);
67 	ASSERT3U(size, >, 0);
68 	ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
69 	ASSERT3U(num, >, 0);
70 	ASSERT3P(index_func, !=, NULL);
71 
72 	ml->ml_offset = offset;
73 	ml->ml_num_sublists = num;
74 	ml->ml_index_func = index_func;
75 
76 	ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
77 	    ml->ml_num_sublists, KM_SLEEP);
78 
79 	ASSERT3P(ml->ml_sublists, !=, NULL);
80 
81 	for (int i = 0; i < ml->ml_num_sublists; i++) {
82 		multilist_sublist_t *mls = &ml->ml_sublists[i];
83 		mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
84 		list_create(&mls->mls_list, size, offset);
85 	}
86 }
87 
88 /*
89  * Destroy the given multilist object, and free up any memory it holds.
90  */
91 void
multilist_destroy(multilist_t * ml)92 multilist_destroy(multilist_t *ml)
93 {
94 	ASSERT(multilist_is_empty(ml));
95 
96 	for (int i = 0; i < ml->ml_num_sublists; i++) {
97 		multilist_sublist_t *mls = &ml->ml_sublists[i];
98 
99 		ASSERT(list_is_empty(&mls->mls_list));
100 
101 		list_destroy(&mls->mls_list);
102 		mutex_destroy(&mls->mls_lock);
103 	}
104 
105 	ASSERT3P(ml->ml_sublists, !=, NULL);
106 	kmem_free(ml->ml_sublists,
107 	    sizeof (multilist_sublist_t) * ml->ml_num_sublists);
108 
109 	ml->ml_num_sublists = 0;
110 	ml->ml_offset = 0;
111 }
112 
113 /*
114  * Insert the given object into the multilist.
115  *
116  * This function will insert the object specified into the sublist
117  * determined using the function given at multilist creation time.
118  *
119  * The sublist locks are automatically acquired if not already held, to
120  * ensure consistency when inserting and removing from multiple threads.
121  */
122 void
multilist_insert(multilist_t * ml,void * obj)123 multilist_insert(multilist_t *ml, void *obj)
124 {
125 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
126 	multilist_sublist_t *mls;
127 	boolean_t need_lock;
128 
129 	DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
130 	    unsigned int, sublist_idx, void *, obj);
131 
132 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
133 
134 	mls = &ml->ml_sublists[sublist_idx];
135 
136 	/*
137 	 * Note: Callers may already hold the sublist lock by calling
138 	 * multilist_sublist_lock().  Here we rely on MUTEX_HELD()
139 	 * returning TRUE if and only if the current thread holds the
140 	 * lock.  While it's a little ugly to make the lock recursive in
141 	 * this way, it works and allows the calling code to be much
142 	 * simpler -- otherwise it would have to pass around a flag
143 	 * indicating that it already has the lock.
144 	 */
145 	need_lock = !MUTEX_HELD(&mls->mls_lock);
146 
147 	if (need_lock)
148 		mutex_enter(&mls->mls_lock);
149 
150 	ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
151 
152 	multilist_sublist_insert_head(mls, obj);
153 
154 	if (need_lock)
155 		mutex_exit(&mls->mls_lock);
156 }
157 
158 /*
159  * Remove the given object from the multilist.
160  *
161  * This function will remove the object specified from the sublist
162  * determined using the function given at multilist creation time.
163  *
164  * The necessary sublist locks are automatically acquired, to ensure
165  * consistency when inserting and removing from multiple threads.
166  */
167 void
multilist_remove(multilist_t * ml,void * obj)168 multilist_remove(multilist_t *ml, void *obj)
169 {
170 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
171 	multilist_sublist_t *mls;
172 	boolean_t need_lock;
173 
174 	DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
175 	    unsigned int, sublist_idx, void *, obj);
176 
177 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
178 
179 	mls = &ml->ml_sublists[sublist_idx];
180 	/* See comment in multilist_insert(). */
181 	need_lock = !MUTEX_HELD(&mls->mls_lock);
182 
183 	if (need_lock)
184 		mutex_enter(&mls->mls_lock);
185 
186 	ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
187 
188 	multilist_sublist_remove(mls, obj);
189 
190 	if (need_lock)
191 		mutex_exit(&mls->mls_lock);
192 }
193 
194 /*
195  * Check to see if this multilist object is empty.
196  *
197  * This will return TRUE if it finds all of the sublists of this
198  * multilist to be empty, and FALSE otherwise. Each sublist lock will be
199  * automatically acquired as necessary.
200  *
201  * If concurrent insertions and removals are occurring, the semantics
202  * of this function become a little fuzzy. Instead of locking all
203  * sublists for the entire call time of the function, each sublist is
204  * only locked as it is individually checked for emptiness. Thus, it's
205  * possible for this function to return TRUE with non-empty sublists at
206  * the time the function returns. This would be due to another thread
207  * inserting into a given sublist, after that specific sublist was check
208  * and deemed empty, but before all sublists have been checked.
209  */
210 int
multilist_is_empty(multilist_t * ml)211 multilist_is_empty(multilist_t *ml)
212 {
213 	for (int i = 0; i < ml->ml_num_sublists; i++) {
214 		multilist_sublist_t *mls = &ml->ml_sublists[i];
215 		/* See comment in multilist_insert(). */
216 		boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
217 
218 		if (need_lock)
219 			mutex_enter(&mls->mls_lock);
220 
221 		if (!list_is_empty(&mls->mls_list)) {
222 			if (need_lock)
223 				mutex_exit(&mls->mls_lock);
224 
225 			return (FALSE);
226 		}
227 
228 		if (need_lock)
229 			mutex_exit(&mls->mls_lock);
230 	}
231 
232 	return (TRUE);
233 }
234 
235 /* Return the number of sublists composing this multilist */
236 unsigned int
multilist_get_num_sublists(multilist_t * ml)237 multilist_get_num_sublists(multilist_t *ml)
238 {
239 	return (ml->ml_num_sublists);
240 }
241 
242 /* Return a randomly selected, valid sublist index for this multilist */
243 unsigned int
multilist_get_random_index(multilist_t * ml)244 multilist_get_random_index(multilist_t *ml)
245 {
246 	return (spa_get_random(ml->ml_num_sublists));
247 }
248 
249 /* Lock and return the sublist specified at the given index */
250 multilist_sublist_t *
multilist_sublist_lock(multilist_t * ml,unsigned int sublist_idx)251 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
252 {
253 	multilist_sublist_t *mls;
254 
255 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
256 	mls = &ml->ml_sublists[sublist_idx];
257 	mutex_enter(&mls->mls_lock);
258 
259 	return (mls);
260 }
261 
262 void
multilist_sublist_unlock(multilist_sublist_t * mls)263 multilist_sublist_unlock(multilist_sublist_t *mls)
264 {
265 	mutex_exit(&mls->mls_lock);
266 }
267 
268 /*
269  * We're allowing any object to be inserted into this specific sublist,
270  * but this can lead to trouble if multilist_remove() is called to
271  * remove this object. Specifically, if calling ml_index_func on this
272  * object returns an index for sublist different than what is passed as
273  * a parameter here, any call to multilist_remove() with this newly
274  * inserted object is undefined! (the call to multilist_remove() will
275  * remove the object from a list that it isn't contained in)
276  */
277 void
multilist_sublist_insert_head(multilist_sublist_t * mls,void * obj)278 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
279 {
280 	ASSERT(MUTEX_HELD(&mls->mls_lock));
281 	list_insert_head(&mls->mls_list, obj);
282 }
283 
284 /* please see comment above multilist_sublist_insert_head */
285 void
multilist_sublist_insert_tail(multilist_sublist_t * mls,void * obj)286 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
287 {
288 	ASSERT(MUTEX_HELD(&mls->mls_lock));
289 	list_insert_tail(&mls->mls_list, obj);
290 }
291 
292 /*
293  * Move the object one element forward in the list.
294  *
295  * This function will move the given object forward in the list (towards
296  * the head) by one object. So, in essence, it will swap its position in
297  * the list with its "prev" pointer. If the given object is already at the
298  * head of the list, it cannot be moved forward any more than it already
299  * is, so no action is taken.
300  *
301  * NOTE: This function **must not** remove any object from the list other
302  *       than the object given as the parameter. This is relied upon in
303  *       arc_evict_state_impl().
304  */
305 void
multilist_sublist_move_forward(multilist_sublist_t * mls,void * obj)306 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
307 {
308 	void *prev = list_prev(&mls->mls_list, obj);
309 
310 	ASSERT(MUTEX_HELD(&mls->mls_lock));
311 	ASSERT(!list_is_empty(&mls->mls_list));
312 
313 	/* 'obj' must be at the head of the list, nothing to do */
314 	if (prev == NULL)
315 		return;
316 
317 	list_remove(&mls->mls_list, obj);
318 	list_insert_before(&mls->mls_list, prev, obj);
319 }
320 
321 void
multilist_sublist_remove(multilist_sublist_t * mls,void * obj)322 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
323 {
324 	ASSERT(MUTEX_HELD(&mls->mls_lock));
325 	list_remove(&mls->mls_list, obj);
326 }
327 
328 void *
multilist_sublist_head(multilist_sublist_t * mls)329 multilist_sublist_head(multilist_sublist_t *mls)
330 {
331 	ASSERT(MUTEX_HELD(&mls->mls_lock));
332 	return (list_head(&mls->mls_list));
333 }
334 
335 void *
multilist_sublist_tail(multilist_sublist_t * mls)336 multilist_sublist_tail(multilist_sublist_t *mls)
337 {
338 	ASSERT(MUTEX_HELD(&mls->mls_lock));
339 	return (list_tail(&mls->mls_list));
340 }
341 
342 void *
multilist_sublist_next(multilist_sublist_t * mls,void * obj)343 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
344 {
345 	ASSERT(MUTEX_HELD(&mls->mls_lock));
346 	return (list_next(&mls->mls_list, obj));
347 }
348 
349 void *
multilist_sublist_prev(multilist_sublist_t * mls,void * obj)350 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
351 {
352 	ASSERT(MUTEX_HELD(&mls->mls_lock));
353 	return (list_prev(&mls->mls_list, obj));
354 }
355 
356 void
multilist_link_init(multilist_node_t * link)357 multilist_link_init(multilist_node_t *link)
358 {
359 	list_link_init(link);
360 }
361 
362 int
multilist_link_active(multilist_node_t * link)363 multilist_link_active(multilist_node_t *link)
364 {
365 	return (list_link_active(link));
366 }
367