1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2017 Joyent, Inc.
25 * Copyright 2026 Oxide Computer Company
26 */
27
28 /*
29 * IP interface to squeues.
30 *
31 * IP uses squeues to force serialization of packets, both incoming and
32 * outgoing. Each squeue is associated with a connection instance (conn_t)
33 * above, and a soft ring (if enabled) below. Each CPU will have a default
34 * squeue for outbound connections, and each soft ring of an interface will
35 * have an squeue to which it sends incoming packets. squeues are never
36 * destroyed, and if they become unused they are kept around against future
37 * needs.
38 *
39 * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU
40 * in the system there will be one squeue set, all of whose squeues will be
41 * bound to that CPU, plus one additional set known as the unbound set. Sets
42 * associated with CPUs will have one default squeue, for outbound
43 * connections, and a linked list of squeues used by various NICs for inbound
44 * packets. The unbound set also has a linked list of squeues, but no default
45 * squeue.
46 *
47 * When a CPU goes offline its squeue set is destroyed, and all its squeues
48 * are moved to the unbound set. When a CPU comes online, a new squeue set is
49 * created and the default set is searched for a default squeue formerly bound
50 * to this CPU. If no default squeue is found, a new one is created.
51 *
52 * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP
53 * and not the squeue code. squeue.c will not touch them, and we can modify
54 * them without holding the squeue lock because of the guarantee that squeues
55 * are never destroyed. ip_squeue locks must be held, however.
56 *
57 * All the squeue sets are protected by a single lock, the sqset_lock. This
58 * is also used to protect the sq_next and sq_set fields of an squeue_t.
59 *
60 * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock
61 *
62 * There are two modes of associating connection with squeues. The first mode
63 * associates each connection with the CPU that creates the connection (either
64 * during open time or during accept time). The second mode associates each
65 * connection with a random CPU, effectively distributing load over all CPUs
66 * and all squeues in the system. The mode is controlled by the
67 * ip_squeue_fanout variable.
68 *
69 * NOTE: The fact that there is an association between each connection and
70 * squeue and squeue and CPU does not mean that each connection is always
71 * processed on this CPU and on this CPU only. Any thread calling squeue_enter()
72 * may process the connection on whatever CPU it is scheduled. The squeue to CPU
73 * binding is only relevant for the worker thread.
74 *
75 * INTERFACE:
76 *
77 * squeue_t *ip_squeue_get(ill_rx_ring_t)
78 *
79 * Returns the squeue associated with an ill receive ring. If the ring is
80 * not bound to a CPU, and we're currently servicing the interrupt which
81 * generated the packet, then bind the squeue to CPU.
82 *
83 *
84 * DR Notes
85 * ========
86 *
87 * The ip_squeue_init() registers a call-back function with the CPU DR
88 * subsystem using register_cpu_setup_func(). The call-back function does two
89 * things:
90 *
91 * o When the CPU is going off-line or unconfigured, the worker thread is
92 * unbound from the CPU. This allows the CPU unconfig code to move it to
93 * another CPU.
94 *
95 * o When the CPU is going online, it creates a new squeue for this CPU if
96 * necessary and binds the squeue worker thread to this CPU.
97 *
98 * TUNABLES:
99 *
100 * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then
101 * pick the default squeue from a random CPU, otherwise use our CPU's default
102 * squeue.
103 *
104 * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or
105 * /dev/ip.
106 */
107
108 #include <sys/types.h>
109 #include <sys/debug.h>
110 #include <sys/kmem.h>
111 #include <sys/cpuvar.h>
112 #include <sys/cmn_err.h>
113
114 #include <inet/common.h>
115 #include <inet/ip.h>
116 #include <netinet/ip6.h>
117 #include <inet/ip_if.h>
118 #include <inet/ip_ire.h>
119 #include <inet/nd.h>
120 #include <inet/ipclassifier.h>
121 #include <sys/types.h>
122 #include <sys/conf.h>
123 #include <sys/sunddi.h>
124 #include <sys/dlpi.h>
125 #include <sys/squeue_impl.h>
126 #include <sys/tihdr.h>
127 #include <inet/udp_impl.h>
128 #include <sys/strsubr.h>
129 #include <sys/zone.h>
130 #include <sys/dld.h>
131 #include <sys/atomic.h>
132
133 /*
134 * List of all created squeue sets. The list and its size are protected by
135 * sqset_lock.
136 */
137 static squeue_set_t **sqset_global_list; /* list 0 is the unbound list */
138 static uint_t sqset_global_size;
139 kmutex_t sqset_lock;
140
141 static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
142
143 static squeue_t *ip_squeue_create(pri_t);
144 static squeue_set_t *ip_squeue_set_create(processorid_t);
145 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
146 static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
147 static void ip_squeue_set_destroy(cpu_t *);
148 static void ip_squeue_clean(void *, mblk_t *, void *);
149
150 #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
151
152 static squeue_t *
ip_squeue_create(pri_t pri)153 ip_squeue_create(pri_t pri)
154 {
155 squeue_t *sqp;
156
157 sqp = squeue_create(pri);
158 ASSERT(sqp != NULL);
159 if (ip_squeue_create_callback != NULL)
160 ip_squeue_create_callback(sqp);
161 return (sqp);
162 }
163
164 /*
165 * Create a new squeue_set. If id == -1, then we're creating the unbound set,
166 * which should only happen once when we are first initialized. Otherwise id
167 * is the id of the CPU that needs a set, either because we are initializing
168 * or because the CPU has come online.
169 *
170 * If id != -1, then we need at a minimum to provide a default squeue for the
171 * new set. We search the unbound set for candidates, and if none are found we
172 * create a new one.
173 */
174 static squeue_set_t *
ip_squeue_set_create(processorid_t id)175 ip_squeue_set_create(processorid_t id)
176 {
177 squeue_set_t *sqs;
178 squeue_set_t *src = sqset_global_list[0];
179 squeue_t **lastsqp, *sq;
180 squeue_t **defaultq_lastp = NULL;
181
182 sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
183 sqs->sqs_cpuid = id;
184
185 if (id == -1) {
186 ASSERT(sqset_global_size == 0);
187 sqset_global_list[0] = sqs;
188 sqset_global_size = 1;
189 return (sqs);
190 }
191
192 /*
193 * When we create an squeue set id != -1, we need to give it a
194 * default squeue, in order to support fanout of conns across
195 * CPUs. Try to find a former default squeue that matches this
196 * cpu id on the unbound squeue set. If no such squeue is found,
197 * find some non-default TCP squeue that is free. If still no such
198 * candidate is found, create a new squeue.
199 */
200
201 ASSERT(MUTEX_HELD(&cpu_lock));
202 mutex_enter(&sqset_lock);
203 lastsqp = &src->sqs_head;
204
205 while (*lastsqp) {
206 if ((*lastsqp)->sq_bind == id &&
207 (*lastsqp)->sq_state & SQS_DEFAULT) {
208 /*
209 * Exact match. Former default squeue of cpu 'id'
210 */
211 ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
212 defaultq_lastp = lastsqp;
213 break;
214 }
215 if (defaultq_lastp == NULL &&
216 !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
217 /*
218 * A free non-default TCP squeue
219 */
220 defaultq_lastp = lastsqp;
221 }
222 lastsqp = &(*lastsqp)->sq_next;
223 }
224
225 if (defaultq_lastp != NULL) {
226 /* Remove from src set and set SQS_DEFAULT */
227 sq = *defaultq_lastp;
228 *defaultq_lastp = sq->sq_next;
229 sq->sq_next = NULL;
230 if (!(sq->sq_state & SQS_DEFAULT)) {
231 mutex_enter(&sq->sq_lock);
232 sq->sq_state |= SQS_DEFAULT;
233 mutex_exit(&sq->sq_lock);
234 }
235 } else {
236 sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
237 sq->sq_state |= SQS_DEFAULT;
238 }
239
240 sq->sq_set = sqs;
241 sqs->sqs_default = sq;
242 squeue_bind(sq, id); /* this locks squeue mutex */
243
244 ASSERT(sqset_global_size <= NCPU);
245 sqset_global_list[sqset_global_size++] = sqs;
246 mutex_exit(&sqset_lock);
247 return (sqs);
248 }
249
250 /*
251 * Obtain a free squeue and set its worker and poll thread priorities, if
252 * required. A free squeue is one that is not already bound to an ill_t
253 * and that is not marked as "default". If a free squeue does not exist,
254 * then one is created.
255 */
256 squeue_t *
ip_squeue_getfree(pri_t pri)257 ip_squeue_getfree(pri_t pri)
258 {
259 squeue_set_t *sqs = sqset_global_list[0];
260 squeue_t *sq;
261
262 mutex_enter(&sqset_lock);
263 for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
264 /*
265 * Select a non-default TCP squeue that is free i.e. not
266 * bound to any ill.
267 */
268 if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
269 break;
270 }
271
272 if (sq == NULL) {
273 sq = ip_squeue_create(pri);
274 sq->sq_set = sqs;
275 sq->sq_next = sqs->sqs_head;
276 sqs->sqs_head = sq;
277 }
278
279 ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
280 SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
281 SQS_POLL_THR_QUIESCED)));
282
283 mutex_enter(&sq->sq_lock);
284 sq->sq_state |= SQS_ILL_BOUND;
285 mutex_exit(&sq->sq_lock);
286 mutex_exit(&sqset_lock);
287
288 if (sq->sq_priority != pri) {
289 thread_lock(sq->sq_worker);
290 (void) thread_change_pri(sq->sq_worker, pri, 0);
291 thread_unlock(sq->sq_worker);
292
293 thread_lock(sq->sq_poll_thr);
294 (void) thread_change_pri(sq->sq_poll_thr, pri, 0);
295 thread_unlock(sq->sq_poll_thr);
296
297 sq->sq_priority = pri;
298 }
299 return (sq);
300 }
301
302 /*
303 * Initialize IP squeues.
304 */
305 void
ip_squeue_init(void (* callback)(squeue_t *))306 ip_squeue_init(void (*callback)(squeue_t *))
307 {
308 int i;
309 squeue_set_t *sqs;
310
311 ASSERT(sqset_global_list == NULL);
312
313 ip_squeue_create_callback = callback;
314 squeue_init();
315 mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
316 sqset_global_list =
317 kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
318 sqset_global_size = 0;
319 /*
320 * We are called at system boot time and we don't
321 * expect memory allocation failure.
322 */
323 sqs = ip_squeue_set_create(-1);
324 ASSERT(sqs != NULL);
325
326 mutex_enter(&cpu_lock);
327 /* Create squeue for each active CPU available */
328 for (i = 0; i < NCPU; i++) {
329 cpu_t *cp = cpu_get(i);
330 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
331 /*
332 * We are called at system boot time and we don't
333 * expect memory allocation failure then
334 */
335 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
336 ASSERT(cp->cpu_squeue_set != NULL);
337 }
338 }
339
340 register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
341 mutex_exit(&cpu_lock);
342 }
343
344 /*
345 * Get a default squeue, either from the current CPU or a CPU derived by hash
346 * from the index argument, depending upon the setting of ip_squeue_fanout.
347 */
348 squeue_t *
ip_squeue_random(uint_t index)349 ip_squeue_random(uint_t index)
350 {
351 squeue_set_t *sqs = NULL;
352 squeue_t *sq;
353
354 /*
355 * The minimum value of sqset_global_size is 2, one for the unbound
356 * squeue set and another for the squeue set of the zeroth CPU.
357 * Even though the value could be changing, it can never go below 2,
358 * so the assert does not need the lock protection.
359 */
360 ASSERT(sqset_global_size > 1);
361
362 /* Protect against changes to sqset_global_list */
363 mutex_enter(&sqset_lock);
364
365 if (!ip_squeue_fanout)
366 sqs = CPU->cpu_squeue_set;
367
368 /*
369 * sqset_global_list[0] corresponds to the unbound squeue set.
370 * The computation below picks a set other than the unbound set.
371 */
372 if (sqs == NULL)
373 sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
374 sq = sqs->sqs_default;
375
376 mutex_exit(&sqset_lock);
377 ASSERT(sq);
378 return (sq);
379 }
380
381 /*
382 * Move squeue from its current set to newset. Not used for default squeues.
383 * Bind or unbind the worker thread as appropriate.
384 */
385
386 static void
ip_squeue_set_move(squeue_t * sq,squeue_set_t * newset)387 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
388 {
389 squeue_set_t *set;
390 squeue_t **lastsqp;
391 processorid_t cpuid = newset->sqs_cpuid;
392
393 ASSERT(!(sq->sq_state & SQS_DEFAULT));
394 ASSERT(!MUTEX_HELD(&sq->sq_lock));
395 ASSERT(MUTEX_HELD(&sqset_lock));
396
397 set = sq->sq_set;
398 if (set == newset)
399 return;
400
401 lastsqp = &set->sqs_head;
402 while (*lastsqp != sq)
403 lastsqp = &(*lastsqp)->sq_next;
404
405 *lastsqp = sq->sq_next;
406 sq->sq_next = newset->sqs_head;
407 newset->sqs_head = sq;
408 sq->sq_set = newset;
409 if (cpuid == -1)
410 squeue_unbind(sq);
411 else
412 squeue_bind(sq, cpuid);
413 }
414
415 /*
416 * Move squeue from its current set to cpuid's set and bind to cpuid.
417 */
418
419 int
ip_squeue_cpu_move(squeue_t * sq,processorid_t cpuid)420 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
421 {
422 cpu_t *cpu;
423 squeue_set_t *set;
424
425 if (sq->sq_state & SQS_DEFAULT)
426 return (-1);
427
428 ASSERT(MUTEX_HELD(&cpu_lock));
429
430 cpu = cpu_get(cpuid);
431 if (!CPU_ISON(cpu))
432 return (-1);
433
434 mutex_enter(&sqset_lock);
435 set = cpu->cpu_squeue_set;
436 if (set != NULL)
437 ip_squeue_set_move(sq, set);
438 mutex_exit(&sqset_lock);
439 return ((set == NULL) ? -1 : 0);
440 }
441
442 /*
443 * The mac layer is calling, asking us to move an squeue to a
444 * new CPU. This routine is called with cpu_lock held.
445 */
446 void
ip_squeue_bind_ring(ill_t * ill,ill_rx_ring_t * rx_ring,processorid_t cpuid)447 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
448 {
449 ASSERT(ILL_MAC_PERIM_HELD(ill));
450 ASSERT(rx_ring->rr_ill == ill);
451
452 mutex_enter(&ill->ill_lock);
453 if (rx_ring->rr_ring_state == RR_FREE ||
454 rx_ring->rr_ring_state == RR_FREE_INPROG) {
455 mutex_exit(&ill->ill_lock);
456 return;
457 }
458
459 if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
460 rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
461
462 mutex_exit(&ill->ill_lock);
463 }
464
465 void *
ip_squeue_add_ring(ill_t * ill,void * mrp)466 ip_squeue_add_ring(ill_t *ill, void *mrp)
467 {
468 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp;
469 ill_rx_ring_t *rx_ring, *ring_tbl;
470 int ip_rx_index;
471 squeue_t *sq = NULL;
472 pri_t pri;
473
474 ASSERT(ILL_MAC_PERIM_HELD(ill));
475 ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
476 ASSERT(ill->ill_dld_capab != NULL);
477
478 ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
479
480 mutex_enter(&ill->ill_lock);
481 for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
482 rx_ring = &ring_tbl[ip_rx_index];
483 if (rx_ring->rr_ring_state == RR_FREE)
484 break;
485 }
486
487 if (ip_rx_index == ILL_MAX_RINGS) {
488 /*
489 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If
490 * we have devices which can overwhelm this limit,
491 * ILL_MAX_RING should be made configurable. Meanwhile it
492 * cause no panic because driver will pass ip_input a NULL
493 * handle which will make IP allocate the default squeue and
494 * Polling mode will not be used for this ring.
495 */
496 cmn_err(CE_NOTE,
497 "Reached maximum number of receiving rings (%d) for %s\n",
498 ILL_MAX_RINGS, ill->ill_name);
499 mutex_exit(&ill->ill_lock);
500 return (NULL);
501 }
502
503 bzero(rx_ring, sizeof (ill_rx_ring_t));
504 rx_ring->rr_rx = mrfp->mrf_receive;
505 rx_ring->rr_ip_accept = (ill->ill_isv6 != 0) ?
506 (ip_accept_t)ip_accept_tcp_v6 :
507 (ip_accept_t)ip_accept_tcp;
508
509 rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
510 rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
511 rx_ring->rr_intr_disable =
512 (ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
513 rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
514 rx_ring->rr_ill = ill;
515
516 pri = mrfp->mrf_flow_priority;
517
518 sq = ip_squeue_getfree(pri);
519
520 mutex_enter(&sq->sq_lock);
521 sq->sq_rx_ring = rx_ring;
522 rx_ring->rr_sqp = sq;
523
524 sq->sq_state |= SQS_POLL_CAPAB;
525
526 rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
527 sq->sq_ill = ill;
528 mutex_exit(&sq->sq_lock);
529 mutex_exit(&ill->ill_lock);
530
531 DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
532 ip_rx_index, void *, mrfp->mrf_rx_arg);
533
534 /* Assign the squeue to the specified CPU as well */
535 mutex_enter(&cpu_lock);
536 (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
537 mutex_exit(&cpu_lock);
538
539 return (rx_ring);
540 }
541
542 /*
543 * sanitize the squeue etc. Some of the processing
544 * needs to be done from inside the perimeter.
545 */
546 void
ip_squeue_clean_ring(ill_t * ill,ill_rx_ring_t * rx_ring)547 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
548 {
549 squeue_t *sqp;
550
551 ASSERT(ILL_MAC_PERIM_HELD(ill));
552 ASSERT(rx_ring != NULL);
553
554 /* Just clean one squeue */
555 mutex_enter(&ill->ill_lock);
556 if (rx_ring->rr_ring_state == RR_FREE) {
557 mutex_exit(&ill->ill_lock);
558 return;
559 }
560 rx_ring->rr_ring_state = RR_FREE_INPROG;
561 sqp = rx_ring->rr_sqp;
562
563 mutex_enter(&sqp->sq_lock);
564 sqp->sq_state |= SQS_POLL_CLEANUP;
565 cv_signal(&sqp->sq_worker_cv);
566 mutex_exit(&ill->ill_lock);
567 while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
568 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
569 sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
570
571 ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
572 SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
573 SQS_POLL_THR_QUIESCED)));
574
575 cv_signal(&sqp->sq_worker_cv);
576 mutex_exit(&sqp->sq_lock);
577
578 /*
579 * Move the squeue to sqset_global_list[0] which holds the set of
580 * squeues not bound to any cpu. Note that the squeue is still
581 * considered bound to an ill as long as SQS_ILL_BOUND is set.
582 */
583 mutex_enter(&sqset_lock);
584 ip_squeue_set_move(sqp, sqset_global_list[0]);
585 mutex_exit(&sqset_lock);
586
587 /*
588 * CPU going offline can also trigger a move of the squeue to the
589 * unbound set sqset_global_list[0]. However the squeue won't be
590 * recycled for the next use as long as the SQS_ILL_BOUND flag
591 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the
592 * end after the move.
593 */
594 mutex_enter(&sqp->sq_lock);
595 sqp->sq_state &= ~SQS_ILL_BOUND;
596 mutex_exit(&sqp->sq_lock);
597
598 mutex_enter(&ill->ill_lock);
599 rx_ring->rr_ring_state = RR_FREE;
600 mutex_exit(&ill->ill_lock);
601 }
602
603 /*
604 * Stop the squeue from polling. This needs to be done
605 * from inside the perimeter.
606 */
607 void
ip_squeue_quiesce_ring(ill_t * ill,ill_rx_ring_t * rx_ring)608 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
609 {
610 squeue_t *sqp;
611
612 ASSERT(ILL_MAC_PERIM_HELD(ill));
613 ASSERT(rx_ring != NULL);
614
615 sqp = rx_ring->rr_sqp;
616 mutex_enter(&sqp->sq_lock);
617 sqp->sq_state |= SQS_POLL_QUIESCE;
618 cv_signal(&sqp->sq_worker_cv);
619 while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
620 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
621
622 mutex_exit(&sqp->sq_lock);
623 }
624
625 /*
626 * Restart polling etc. Needs to be inside the perimeter to
627 * prevent races.
628 */
629 void
ip_squeue_restart_ring(ill_t * ill,ill_rx_ring_t * rx_ring)630 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
631 {
632 squeue_t *sqp;
633
634 ASSERT(ILL_MAC_PERIM_HELD(ill));
635 ASSERT(rx_ring != NULL);
636
637 sqp = rx_ring->rr_sqp;
638 mutex_enter(&sqp->sq_lock);
639 /*
640 * Handle change in number of rings between the quiesce and
641 * restart operations by checking for a previous quiesce before
642 * attempting a restart.
643 */
644 if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
645 mutex_exit(&sqp->sq_lock);
646 return;
647 }
648 sqp->sq_state |= SQS_POLL_RESTART;
649 cv_signal(&sqp->sq_worker_cv);
650 while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
651 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
652 sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
653 mutex_exit(&sqp->sq_lock);
654 }
655
656 /*
657 * sanitize all squeues associated with the ill.
658 */
659 void
ip_squeue_clean_all(ill_t * ill)660 ip_squeue_clean_all(ill_t *ill)
661 {
662 int idx;
663 ill_rx_ring_t *rx_ring;
664
665 for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
666 rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
667 ip_squeue_clean_ring(ill, rx_ring);
668 }
669 }
670
671 /*
672 * Used by IP to get the squeue associated with a ring. If the squeue isn't
673 * yet bound to a CPU, and we're being called directly from the NIC's
674 * interrupt, then we know what CPU we want to assign the squeue to, so
675 * dispatch that task to a taskq.
676 */
677 squeue_t *
ip_squeue_get(ill_rx_ring_t * ill_rx_ring)678 ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
679 {
680 squeue_t *sqp;
681
682 if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
683 return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
684
685 return (sqp);
686 }
687
688 /*
689 * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all
690 * squeues are unboudn and moved to the unbound set.
691 */
692 static void
ip_squeue_set_destroy(cpu_t * cpu)693 ip_squeue_set_destroy(cpu_t *cpu)
694 {
695 int i;
696 squeue_t *sqp, *lastsqp = NULL;
697 squeue_set_t *sqs, *unbound = sqset_global_list[0];
698
699 mutex_enter(&sqset_lock);
700 if ((sqs = cpu->cpu_squeue_set) == NULL) {
701 mutex_exit(&sqset_lock);
702 return;
703 }
704
705 /* Move all squeues to unbound set */
706
707 for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
708 squeue_unbind(sqp);
709 sqp->sq_set = unbound;
710 }
711 if (sqs->sqs_head) {
712 lastsqp->sq_next = unbound->sqs_head;
713 unbound->sqs_head = sqs->sqs_head;
714 }
715
716 /* Also move default squeue to unbound set */
717
718 sqp = sqs->sqs_default;
719 ASSERT(sqp != NULL);
720 ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
721
722 sqp->sq_next = unbound->sqs_head;
723 unbound->sqs_head = sqp;
724 squeue_unbind(sqp);
725 sqp->sq_set = unbound;
726
727 for (i = 1; i < sqset_global_size; i++)
728 if (sqset_global_list[i] == sqs)
729 break;
730
731 ASSERT(i < sqset_global_size);
732 sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
733 sqset_global_list[sqset_global_size - 1] = NULL;
734 sqset_global_size--;
735
736 mutex_exit(&sqset_lock);
737 kmem_free(sqs, sizeof (*sqs));
738 }
739
740 /*
741 * Reconfiguration callback
742 */
743 /* ARGSUSED */
744 static int
ip_squeue_cpu_setup(cpu_setup_t what,int id,void * arg)745 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
746 {
747 cpu_t *cp = cpu_get(id);
748
749 ASSERT(MUTEX_HELD(&cpu_lock));
750 switch (what) {
751 case CPU_CONFIG:
752 case CPU_ON:
753 case CPU_INIT:
754 case CPU_CPUPART_IN:
755 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
756 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
757 break;
758 case CPU_UNCONFIG:
759 case CPU_OFF:
760 case CPU_CPUPART_OUT:
761 if (cp->cpu_squeue_set != NULL) {
762 ip_squeue_set_destroy(cp);
763 cp->cpu_squeue_set = NULL;
764 }
765 break;
766 default:
767 break;
768 }
769 return (0);
770 }
771