xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_squeue.c (revision 2576e7a56bb1b296053722f3ebc688cef754350f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2017 Joyent, Inc.
25  * Copyright 2026 Oxide Computer Company
26  */
27 
28 /*
29  * IP interface to squeues.
30  *
31  * IP uses squeues to force serialization of packets, both incoming and
32  * outgoing. Each squeue is associated with a connection instance (conn_t)
33  * above, and a soft ring (if enabled) below. Each CPU will have a default
34  * squeue for outbound connections, and each soft ring of an interface will
35  * have an squeue to which it sends incoming packets. squeues are never
36  * destroyed, and if they become unused they are kept around against future
37  * needs.
38  *
39  * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU
40  * in the system there will be one squeue set, all of whose squeues will be
41  * bound to that CPU, plus one additional set known as the unbound set. Sets
42  * associated with CPUs will have one default squeue, for outbound
43  * connections, and a linked list of squeues used by various NICs for inbound
44  * packets. The unbound set also has a linked list of squeues, but no default
45  * squeue.
46  *
47  * When a CPU goes offline its squeue set is destroyed, and all its squeues
48  * are moved to the unbound set. When a CPU comes online, a new squeue set is
49  * created and the default set is searched for a default squeue formerly bound
50  * to this CPU. If no default squeue is found, a new one is created.
51  *
52  * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP
53  * and not the squeue code. squeue.c will not touch them, and we can modify
54  * them without holding the squeue lock because of the guarantee that squeues
55  * are never destroyed. ip_squeue locks must be held, however.
56  *
57  * All the squeue sets are protected by a single lock, the sqset_lock. This
58  * is also used to protect the sq_next and sq_set fields of an squeue_t.
59  *
60  * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock
61  *
62  * There are two modes of associating connection with squeues. The first mode
63  * associates each connection with the CPU that creates the connection (either
64  * during open time or during accept time). The second mode associates each
65  * connection with a random CPU, effectively distributing load over all CPUs
66  * and all squeues in the system. The mode is controlled by the
67  * ip_squeue_fanout variable.
68  *
69  * NOTE: The fact that there is an association between each connection and
70  * squeue and squeue and CPU does not mean that each connection is always
71  * processed on this CPU and on this CPU only. Any thread calling squeue_enter()
72  * may process the connection on whatever CPU it is scheduled. The squeue to CPU
73  * binding is only relevant for the worker thread.
74  *
75  * INTERFACE:
76  *
77  * squeue_t *ip_squeue_get(ill_rx_ring_t)
78  *
79  * Returns the squeue associated with an ill receive ring. If the ring is
80  * not bound to a CPU, and we're currently servicing the interrupt which
81  * generated the packet, then bind the squeue to CPU.
82  *
83  *
84  * DR Notes
85  * ========
86  *
87  * The ip_squeue_init() registers a call-back function with the CPU DR
88  * subsystem using register_cpu_setup_func(). The call-back function does two
89  * things:
90  *
91  * o When the CPU is going off-line or unconfigured, the worker thread is
92  *	unbound from the CPU. This allows the CPU unconfig code to move it to
93  *	another CPU.
94  *
95  * o When the CPU is going online, it creates a new squeue for this CPU if
96  *	necessary and binds the squeue worker thread to this CPU.
97  *
98  * TUNABLES:
99  *
100  * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then
101  * pick the default squeue from a random CPU, otherwise use our CPU's default
102  * squeue.
103  *
104  * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or
105  * /dev/ip.
106  */
107 
108 #include <sys/types.h>
109 #include <sys/debug.h>
110 #include <sys/kmem.h>
111 #include <sys/cpuvar.h>
112 #include <sys/cmn_err.h>
113 
114 #include <inet/common.h>
115 #include <inet/ip.h>
116 #include <netinet/ip6.h>
117 #include <inet/ip_if.h>
118 #include <inet/ip_ire.h>
119 #include <inet/nd.h>
120 #include <inet/ipclassifier.h>
121 #include <sys/types.h>
122 #include <sys/conf.h>
123 #include <sys/sunddi.h>
124 #include <sys/dlpi.h>
125 #include <sys/squeue_impl.h>
126 #include <sys/tihdr.h>
127 #include <inet/udp_impl.h>
128 #include <sys/strsubr.h>
129 #include <sys/zone.h>
130 #include <sys/dld.h>
131 #include <sys/atomic.h>
132 
133 /*
134  * List of all created squeue sets. The list and its size are protected by
135  * sqset_lock.
136  */
137 static squeue_set_t	**sqset_global_list; /* list 0 is the unbound list */
138 static uint_t		sqset_global_size;
139 kmutex_t		sqset_lock;
140 
141 static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
142 
143 static squeue_t *ip_squeue_create(pri_t);
144 static squeue_set_t *ip_squeue_set_create(processorid_t);
145 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
146 static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
147 static void ip_squeue_set_destroy(cpu_t *);
148 static void ip_squeue_clean(void *, mblk_t *, void *);
149 
150 #define	CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
151 
152 static squeue_t *
153 ip_squeue_create(pri_t pri)
154 {
155 	squeue_t *sqp;
156 
157 	sqp = squeue_create(pri);
158 	ASSERT(sqp != NULL);
159 	if (ip_squeue_create_callback != NULL)
160 		ip_squeue_create_callback(sqp);
161 	return (sqp);
162 }
163 
164 /*
165  * Create a new squeue_set. If id == -1, then we're creating the unbound set,
166  * which should only happen once when we are first initialized. Otherwise id
167  * is the id of the CPU that needs a set, either because we are initializing
168  * or because the CPU has come online.
169  *
170  * If id != -1, then we need at a minimum to provide a default squeue for the
171  * new set. We search the unbound set for candidates, and if none are found we
172  * create a new one.
173  */
174 static squeue_set_t *
175 ip_squeue_set_create(processorid_t id)
176 {
177 	squeue_set_t	*sqs;
178 	squeue_set_t	*src = sqset_global_list[0];
179 	squeue_t	**lastsqp, *sq;
180 	squeue_t	**defaultq_lastp = NULL;
181 
182 	sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
183 	sqs->sqs_cpuid = id;
184 
185 	if (id == -1) {
186 		ASSERT(sqset_global_size == 0);
187 		sqset_global_list[0] = sqs;
188 		sqset_global_size = 1;
189 		return (sqs);
190 	}
191 
192 	/*
193 	 * When we create an squeue set id != -1, we need to give it a
194 	 * default squeue, in order to support fanout of conns across
195 	 * CPUs. Try to find a former default squeue that matches this
196 	 * cpu id on the unbound squeue set. If no such squeue is found,
197 	 * find some non-default TCP squeue that is free. If still no such
198 	 * candidate is found, create a new squeue.
199 	 */
200 
201 	ASSERT(MUTEX_HELD(&cpu_lock));
202 	mutex_enter(&sqset_lock);
203 	lastsqp = &src->sqs_head;
204 
205 	while (*lastsqp) {
206 		if ((*lastsqp)->sq_bind == id &&
207 		    (*lastsqp)->sq_state & SQS_DEFAULT) {
208 			/*
209 			 * Exact match. Former default squeue of cpu 'id'
210 			 */
211 			ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
212 			defaultq_lastp = lastsqp;
213 			break;
214 		}
215 		if (defaultq_lastp == NULL &&
216 		    !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
217 			/*
218 			 * A free non-default TCP squeue
219 			 */
220 			defaultq_lastp = lastsqp;
221 		}
222 		lastsqp = &(*lastsqp)->sq_next;
223 	}
224 
225 	if (defaultq_lastp != NULL) {
226 		/* Remove from src set and set SQS_DEFAULT */
227 		sq = *defaultq_lastp;
228 		*defaultq_lastp = sq->sq_next;
229 		sq->sq_next = NULL;
230 		if (!(sq->sq_state & SQS_DEFAULT)) {
231 			mutex_enter(&sq->sq_lock);
232 			sq->sq_state |= SQS_DEFAULT;
233 			mutex_exit(&sq->sq_lock);
234 		}
235 	} else {
236 		sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
237 		sq->sq_state |= SQS_DEFAULT;
238 	}
239 
240 	sq->sq_set = sqs;
241 	sqs->sqs_default = sq;
242 	squeue_bind(sq, id); /* this locks squeue mutex */
243 
244 	ASSERT(sqset_global_size <= NCPU);
245 	sqset_global_list[sqset_global_size++] = sqs;
246 	mutex_exit(&sqset_lock);
247 	return (sqs);
248 }
249 
250 /*
251  * Obtain a free squeue and set its worker and poll thread priorities, if
252  * required. A free squeue is one that is not already bound to an ill_t
253  * and that is not marked as "default". If a free squeue does not exist,
254  * then one is created.
255  */
256 squeue_t *
257 ip_squeue_getfree(pri_t pri)
258 {
259 	squeue_set_t	*sqs = sqset_global_list[0];
260 	squeue_t	*sq;
261 
262 	mutex_enter(&sqset_lock);
263 	for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
264 		/*
265 		 * Select a non-default TCP squeue that is free i.e. not
266 		 * bound to any ill.
267 		 */
268 		if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
269 			break;
270 	}
271 
272 	if (sq == NULL) {
273 		sq = ip_squeue_create(pri);
274 		sq->sq_set = sqs;
275 		sq->sq_next = sqs->sqs_head;
276 		sqs->sqs_head = sq;
277 	}
278 
279 	ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
280 	    SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
281 	    SQS_POLL_THR_QUIESCED)));
282 
283 	mutex_enter(&sq->sq_lock);
284 	sq->sq_state |= SQS_ILL_BOUND;
285 	mutex_exit(&sq->sq_lock);
286 	mutex_exit(&sqset_lock);
287 
288 	if (sq->sq_priority != pri) {
289 		thread_lock(sq->sq_worker);
290 		(void) thread_change_pri(sq->sq_worker, pri, 0);
291 		thread_unlock(sq->sq_worker);
292 
293 		thread_lock(sq->sq_poll_thr);
294 		(void) thread_change_pri(sq->sq_poll_thr, pri, 0);
295 		thread_unlock(sq->sq_poll_thr);
296 
297 		sq->sq_priority = pri;
298 	}
299 	return (sq);
300 }
301 
302 /*
303  * Initialize IP squeues.
304  */
305 void
306 ip_squeue_init(void (*callback)(squeue_t *))
307 {
308 	int i;
309 	squeue_set_t	*sqs;
310 
311 	ASSERT(sqset_global_list == NULL);
312 
313 	ip_squeue_create_callback = callback;
314 	squeue_init();
315 	mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
316 	sqset_global_list =
317 	    kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
318 	sqset_global_size = 0;
319 	/*
320 	 * We are called at system boot time and we don't
321 	 * expect memory allocation failure.
322 	 */
323 	sqs = ip_squeue_set_create(-1);
324 	ASSERT(sqs != NULL);
325 
326 	mutex_enter(&cpu_lock);
327 	/* Create squeue for each active CPU available */
328 	for (i = 0; i < NCPU; i++) {
329 		cpu_t *cp = cpu_get(i);
330 		if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
331 			/*
332 			 * We are called at system boot time and we don't
333 			 * expect memory allocation failure then
334 			 */
335 			cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
336 			ASSERT(cp->cpu_squeue_set != NULL);
337 		}
338 	}
339 
340 	register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
341 	mutex_exit(&cpu_lock);
342 }
343 
344 /*
345  * Get a default squeue, either from the current CPU or a CPU derived by hash
346  * from the index argument, depending upon the setting of ip_squeue_fanout.
347  */
348 squeue_t *
349 ip_squeue_random(uint_t index)
350 {
351 	squeue_set_t *sqs = NULL;
352 	squeue_t *sq;
353 
354 	/*
355 	 * The minimum value of sqset_global_size is 2, one for the unbound
356 	 * squeue set and another for the squeue set of the zeroth CPU.
357 	 * Even though the value could be changing, it can never go below 2,
358 	 * so the assert does not need the lock protection.
359 	 */
360 	ASSERT(sqset_global_size > 1);
361 
362 	/* Protect against changes to sqset_global_list */
363 	mutex_enter(&sqset_lock);
364 
365 	if (!ip_squeue_fanout)
366 		sqs = CPU->cpu_squeue_set;
367 
368 	/*
369 	 * sqset_global_list[0] corresponds to the unbound squeue set.
370 	 * The computation below picks a set other than the unbound set.
371 	 */
372 	if (sqs == NULL)
373 		sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
374 	sq = sqs->sqs_default;
375 
376 	mutex_exit(&sqset_lock);
377 	ASSERT(sq);
378 	return (sq);
379 }
380 
381 /*
382  * Move squeue from its current set to newset. Not used for default squeues.
383  * Bind or unbind the worker thread as appropriate.
384  */
385 
386 static void
387 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
388 {
389 	squeue_set_t	*set;
390 	squeue_t	**lastsqp;
391 	processorid_t	cpuid = newset->sqs_cpuid;
392 
393 	ASSERT(!(sq->sq_state & SQS_DEFAULT));
394 	ASSERT(!MUTEX_HELD(&sq->sq_lock));
395 	ASSERT(MUTEX_HELD(&sqset_lock));
396 
397 	set = sq->sq_set;
398 	if (set == newset)
399 		return;
400 
401 	lastsqp = &set->sqs_head;
402 	while (*lastsqp != sq)
403 		lastsqp = &(*lastsqp)->sq_next;
404 
405 	*lastsqp = sq->sq_next;
406 	sq->sq_next = newset->sqs_head;
407 	newset->sqs_head = sq;
408 	sq->sq_set = newset;
409 	if (cpuid == -1)
410 		squeue_unbind(sq);
411 	else
412 		squeue_bind(sq, cpuid);
413 }
414 
415 /*
416  * Move squeue from its current set to cpuid's set and bind to cpuid.
417  */
418 
419 int
420 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
421 {
422 	cpu_t *cpu;
423 	squeue_set_t *set;
424 
425 	if (sq->sq_state & SQS_DEFAULT)
426 		return (-1);
427 
428 	ASSERT(MUTEX_HELD(&cpu_lock));
429 
430 	cpu = cpu_get(cpuid);
431 	if (!CPU_ISON(cpu))
432 		return (-1);
433 
434 	mutex_enter(&sqset_lock);
435 	set = cpu->cpu_squeue_set;
436 	if (set != NULL)
437 		ip_squeue_set_move(sq, set);
438 	mutex_exit(&sqset_lock);
439 	return ((set == NULL) ? -1 : 0);
440 }
441 
442 /*
443  * The mac layer is calling, asking us to move an squeue to a
444  * new CPU. This routine is called with cpu_lock held.
445  */
446 void
447 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
448 {
449 	ASSERT(ILL_MAC_PERIM_HELD(ill));
450 	ASSERT(rx_ring->rr_ill == ill);
451 
452 	mutex_enter(&ill->ill_lock);
453 	if (rx_ring->rr_ring_state == RR_FREE ||
454 	    rx_ring->rr_ring_state == RR_FREE_INPROG) {
455 		mutex_exit(&ill->ill_lock);
456 		return;
457 	}
458 
459 	if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
460 		rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
461 
462 	mutex_exit(&ill->ill_lock);
463 }
464 
465 void *
466 ip_squeue_add_ring(ill_t *ill, void *mrp)
467 {
468 	mac_rx_fifo_t		*mrfp = (mac_rx_fifo_t *)mrp;
469 	ill_rx_ring_t		*rx_ring, *ring_tbl;
470 	int			ip_rx_index;
471 	squeue_t		*sq = NULL;
472 	pri_t			pri;
473 
474 	ASSERT(ILL_MAC_PERIM_HELD(ill));
475 	ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
476 	ASSERT(ill->ill_dld_capab != NULL);
477 
478 	ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
479 
480 	mutex_enter(&ill->ill_lock);
481 	for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
482 		rx_ring = &ring_tbl[ip_rx_index];
483 		if (rx_ring->rr_ring_state == RR_FREE)
484 			break;
485 	}
486 
487 	if (ip_rx_index == ILL_MAX_RINGS) {
488 		/*
489 		 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If
490 		 * we have devices which can overwhelm this limit,
491 		 * ILL_MAX_RING should be made configurable. Meanwhile it
492 		 * cause no panic because driver will pass ip_input a NULL
493 		 * handle which will make IP allocate the default squeue and
494 		 * Polling mode will not be used for this ring.
495 		 */
496 		cmn_err(CE_NOTE,
497 		    "Reached maximum number of receiving rings (%d) for %s\n",
498 		    ILL_MAX_RINGS, ill->ill_name);
499 		mutex_exit(&ill->ill_lock);
500 		return (NULL);
501 	}
502 
503 	bzero(rx_ring, sizeof (ill_rx_ring_t));
504 	rx_ring->rr_rx = mrfp->mrf_receive;
505 	rx_ring->rr_ip_accept = (ill->ill_isv6 != 0) ?
506 	    (ip_accept_t)ip_accept_tcp_v6 :
507 	    (ip_accept_t)ip_accept_tcp;
508 
509 	rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
510 	rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
511 	rx_ring->rr_intr_disable =
512 	    (ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
513 	rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
514 	rx_ring->rr_ill = ill;
515 
516 	pri = mrfp->mrf_flow_priority;
517 
518 	sq = ip_squeue_getfree(pri);
519 
520 	mutex_enter(&sq->sq_lock);
521 	sq->sq_rx_ring = rx_ring;
522 	rx_ring->rr_sqp = sq;
523 
524 	sq->sq_state |= SQS_POLL_CAPAB;
525 
526 	rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
527 	sq->sq_ill = ill;
528 	mutex_exit(&sq->sq_lock);
529 	mutex_exit(&ill->ill_lock);
530 
531 	DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
532 	    ip_rx_index, void *, mrfp->mrf_rx_arg);
533 
534 	/* Assign the squeue to the specified CPU as well */
535 	mutex_enter(&cpu_lock);
536 	(void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
537 	mutex_exit(&cpu_lock);
538 
539 	return (rx_ring);
540 }
541 
542 /*
543  * sanitize the squeue etc. Some of the processing
544  * needs to be done from inside the perimeter.
545  */
546 void
547 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
548 {
549 	squeue_t *sqp;
550 
551 	ASSERT(ILL_MAC_PERIM_HELD(ill));
552 	ASSERT(rx_ring != NULL);
553 
554 	/* Just clean one squeue */
555 	mutex_enter(&ill->ill_lock);
556 	if (rx_ring->rr_ring_state == RR_FREE) {
557 		mutex_exit(&ill->ill_lock);
558 		return;
559 	}
560 	rx_ring->rr_ring_state = RR_FREE_INPROG;
561 	sqp = rx_ring->rr_sqp;
562 
563 	mutex_enter(&sqp->sq_lock);
564 	sqp->sq_state |= SQS_POLL_CLEANUP;
565 	cv_signal(&sqp->sq_worker_cv);
566 	mutex_exit(&ill->ill_lock);
567 	while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
568 		cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
569 	sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
570 
571 	ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
572 	    SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
573 	    SQS_POLL_THR_QUIESCED)));
574 
575 	cv_signal(&sqp->sq_worker_cv);
576 	mutex_exit(&sqp->sq_lock);
577 
578 	/*
579 	 * Move the squeue to sqset_global_list[0] which holds the set of
580 	 * squeues not bound to any cpu. Note that the squeue is still
581 	 * considered bound to an ill as long as SQS_ILL_BOUND is set.
582 	 */
583 	mutex_enter(&sqset_lock);
584 	ip_squeue_set_move(sqp, sqset_global_list[0]);
585 	mutex_exit(&sqset_lock);
586 
587 	/*
588 	 * CPU going offline can also trigger a move of the squeue to the
589 	 * unbound set sqset_global_list[0]. However the squeue won't be
590 	 * recycled for the next use as long as the SQS_ILL_BOUND flag
591 	 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the
592 	 * end after the move.
593 	 */
594 	mutex_enter(&sqp->sq_lock);
595 	sqp->sq_state &= ~SQS_ILL_BOUND;
596 	mutex_exit(&sqp->sq_lock);
597 
598 	mutex_enter(&ill->ill_lock);
599 	rx_ring->rr_ring_state = RR_FREE;
600 	mutex_exit(&ill->ill_lock);
601 }
602 
603 /*
604  * Stop the squeue from polling. This needs to be done
605  * from inside the perimeter.
606  */
607 void
608 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
609 {
610 	squeue_t *sqp;
611 
612 	ASSERT(ILL_MAC_PERIM_HELD(ill));
613 	ASSERT(rx_ring != NULL);
614 
615 	sqp = rx_ring->rr_sqp;
616 	mutex_enter(&sqp->sq_lock);
617 	sqp->sq_state |= SQS_POLL_QUIESCE;
618 	cv_signal(&sqp->sq_worker_cv);
619 	while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
620 		cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
621 
622 	mutex_exit(&sqp->sq_lock);
623 }
624 
625 /*
626  * Restart polling etc. Needs to be inside the perimeter to
627  * prevent races.
628  */
629 void
630 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
631 {
632 	squeue_t *sqp;
633 
634 	ASSERT(ILL_MAC_PERIM_HELD(ill));
635 	ASSERT(rx_ring != NULL);
636 
637 	sqp = rx_ring->rr_sqp;
638 	mutex_enter(&sqp->sq_lock);
639 	/*
640 	 * Handle change in number of rings between the quiesce and
641 	 * restart operations by checking for a previous quiesce before
642 	 * attempting a restart.
643 	 */
644 	if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
645 		mutex_exit(&sqp->sq_lock);
646 		return;
647 	}
648 	sqp->sq_state |= SQS_POLL_RESTART;
649 	cv_signal(&sqp->sq_worker_cv);
650 	while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
651 		cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
652 	sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
653 	mutex_exit(&sqp->sq_lock);
654 }
655 
656 /*
657  * sanitize all squeues associated with the ill.
658  */
659 void
660 ip_squeue_clean_all(ill_t *ill)
661 {
662 	int idx;
663 	ill_rx_ring_t	*rx_ring;
664 
665 	for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
666 		rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
667 		ip_squeue_clean_ring(ill, rx_ring);
668 	}
669 }
670 
671 /*
672  * Used by IP to get the squeue associated with a ring. If the squeue isn't
673  * yet bound to a CPU, and we're being called directly from the NIC's
674  * interrupt, then we know what CPU we want to assign the squeue to, so
675  * dispatch that task to a taskq.
676  */
677 squeue_t *
678 ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
679 {
680 	squeue_t	*sqp;
681 
682 	if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
683 		return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
684 
685 	return (sqp);
686 }
687 
688 /*
689  * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all
690  * squeues are unboudn and moved to the unbound set.
691  */
692 static void
693 ip_squeue_set_destroy(cpu_t *cpu)
694 {
695 	int i;
696 	squeue_t *sqp, *lastsqp = NULL;
697 	squeue_set_t *sqs, *unbound = sqset_global_list[0];
698 
699 	mutex_enter(&sqset_lock);
700 	if ((sqs = cpu->cpu_squeue_set) == NULL) {
701 		mutex_exit(&sqset_lock);
702 		return;
703 	}
704 
705 	/* Move all squeues to unbound set */
706 
707 	for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
708 		squeue_unbind(sqp);
709 		sqp->sq_set = unbound;
710 	}
711 	if (sqs->sqs_head) {
712 		lastsqp->sq_next = unbound->sqs_head;
713 		unbound->sqs_head = sqs->sqs_head;
714 	}
715 
716 	/* Also move default squeue to unbound set */
717 
718 	sqp = sqs->sqs_default;
719 	ASSERT(sqp != NULL);
720 	ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
721 
722 	sqp->sq_next = unbound->sqs_head;
723 	unbound->sqs_head = sqp;
724 	squeue_unbind(sqp);
725 	sqp->sq_set = unbound;
726 
727 	for (i = 1; i < sqset_global_size; i++)
728 		if (sqset_global_list[i] == sqs)
729 			break;
730 
731 	ASSERT(i < sqset_global_size);
732 	sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
733 	sqset_global_list[sqset_global_size - 1] = NULL;
734 	sqset_global_size--;
735 
736 	mutex_exit(&sqset_lock);
737 	kmem_free(sqs, sizeof (*sqs));
738 }
739 
740 /*
741  * Reconfiguration callback
742  */
743 /* ARGSUSED */
744 static int
745 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
746 {
747 	cpu_t *cp = cpu_get(id);
748 
749 	ASSERT(MUTEX_HELD(&cpu_lock));
750 	switch (what) {
751 	case CPU_CONFIG:
752 	case CPU_ON:
753 	case CPU_INIT:
754 	case CPU_CPUPART_IN:
755 		if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
756 			cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
757 		break;
758 	case CPU_UNCONFIG:
759 	case CPU_OFF:
760 	case CPU_CPUPART_OUT:
761 		if (cp->cpu_squeue_set != NULL) {
762 			ip_squeue_set_destroy(cp);
763 			cp->cpu_squeue_set = NULL;
764 		}
765 		break;
766 	default:
767 		break;
768 	}
769 	return (0);
770 }
771