1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * IP interface to squeues.
28 *
29 * IP uses squeues to force serialization of packets, both incoming and
30 * outgoing. Each squeue is associated with a connection instance (conn_t)
31 * above, and a soft ring (if enabled) below. Each CPU will have a default
32 * squeue for outbound connections, and each soft ring of an interface will
33 * have an squeue to which it sends incoming packets. squeues are never
34 * destroyed, and if they become unused they are kept around against future
35 * needs.
36 *
37 * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU
38 * in the system there will be one squeue set, all of whose squeues will be
39 * bound to that CPU, plus one additional set known as the unbound set. Sets
40 * associated with CPUs will have one default squeue, for outbound
41 * connections, and a linked list of squeues used by various NICs for inbound
42 * packets. The unbound set also has a linked list of squeues, but no default
43 * squeue.
44 *
45 * When a CPU goes offline its squeue set is destroyed, and all its squeues
46 * are moved to the unbound set. When a CPU comes online, a new squeue set is
47 * created and the default set is searched for a default squeue formerly bound
48 * to this CPU. If no default squeue is found, a new one is created.
49 *
50 * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP
51 * and not the squeue code. squeue.c will not touch them, and we can modify
52 * them without holding the squeue lock because of the guarantee that squeues
53 * are never destroyed. ip_squeue locks must be held, however.
54 *
55 * All the squeue sets are protected by a single lock, the sqset_lock. This
56 * is also used to protect the sq_next and sq_set fields of an squeue_t.
57 *
58 * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock
59 *
60 * There are two modes of associating connection with squeues. The first mode
61 * associates each connection with the CPU that creates the connection (either
62 * during open time or during accept time). The second mode associates each
63 * connection with a random CPU, effectively distributing load over all CPUs
64 * and all squeues in the system. The mode is controlled by the
65 * ip_squeue_fanout variable.
66 *
67 * NOTE: The fact that there is an association between each connection and
68 * squeue and squeue and CPU does not mean that each connection is always
69 * processed on this CPU and on this CPU only. Any thread calling squeue_enter()
70 * may process the connection on whatever CPU it is scheduled. The squeue to CPU
71 * binding is only relevant for the worker thread.
72 *
73 * INTERFACE:
74 *
75 * squeue_t *ip_squeue_get(ill_rx_ring_t)
76 *
77 * Returns the squeue associated with an ill receive ring. If the ring is
78 * not bound to a CPU, and we're currently servicing the interrupt which
79 * generated the packet, then bind the squeue to CPU.
80 *
81 *
82 * DR Notes
83 * ========
84 *
85 * The ip_squeue_init() registers a call-back function with the CPU DR
86 * subsystem using register_cpu_setup_func(). The call-back function does two
87 * things:
88 *
89 * o When the CPU is going off-line or unconfigured, the worker thread is
90 * unbound from the CPU. This allows the CPU unconfig code to move it to
91 * another CPU.
92 *
93 * o When the CPU is going online, it creates a new squeue for this CPU if
94 * necessary and binds the squeue worker thread to this CPU.
95 *
96 * TUNABLES:
97 *
98 * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then
99 * pick the default squeue from a random CPU, otherwise use our CPU's default
100 * squeue.
101 *
102 * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or
103 * /dev/ip.
104 *
105 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues *
106 * created. This is the time squeue code waits before waking up the worker
107 * thread after queuing a request.
108 */
109
110 #include <sys/types.h>
111 #include <sys/debug.h>
112 #include <sys/kmem.h>
113 #include <sys/cpuvar.h>
114 #include <sys/cmn_err.h>
115
116 #include <inet/common.h>
117 #include <inet/ip.h>
118 #include <netinet/ip6.h>
119 #include <inet/ip_if.h>
120 #include <inet/ip_ire.h>
121 #include <inet/nd.h>
122 #include <inet/ipclassifier.h>
123 #include <sys/types.h>
124 #include <sys/conf.h>
125 #include <sys/sunddi.h>
126 #include <sys/dlpi.h>
127 #include <sys/squeue_impl.h>
128 #include <sys/tihdr.h>
129 #include <inet/udp_impl.h>
130 #include <sys/strsubr.h>
131 #include <sys/zone.h>
132 #include <sys/dld.h>
133 #include <sys/atomic.h>
134
135 /*
136 * List of all created squeue sets. The list and its size are protected by
137 * sqset_lock.
138 */
139 static squeue_set_t **sqset_global_list; /* list 0 is the unbound list */
140 static uint_t sqset_global_size;
141 kmutex_t sqset_lock;
142
143 static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
144
145 /*
146 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues
147 * created. This is the time squeue code waits before waking up the worker
148 * thread after queuing a request.
149 */
150 uint_t ip_squeue_worker_wait = 10;
151
152 static squeue_t *ip_squeue_create(pri_t);
153 static squeue_set_t *ip_squeue_set_create(processorid_t);
154 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
155 static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
156 static void ip_squeue_set_destroy(cpu_t *);
157 static void ip_squeue_clean(void *, mblk_t *, void *);
158
159 #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
160
161 static squeue_t *
ip_squeue_create(pri_t pri)162 ip_squeue_create(pri_t pri)
163 {
164 squeue_t *sqp;
165
166 sqp = squeue_create(ip_squeue_worker_wait, pri);
167 ASSERT(sqp != NULL);
168 if (ip_squeue_create_callback != NULL)
169 ip_squeue_create_callback(sqp);
170 return (sqp);
171 }
172
173 /*
174 * Create a new squeue_set. If id == -1, then we're creating the unbound set,
175 * which should only happen once when we are first initialized. Otherwise id
176 * is the id of the CPU that needs a set, either because we are initializing
177 * or because the CPU has come online.
178 *
179 * If id != -1, then we need at a minimum to provide a default squeue for the
180 * new set. We search the unbound set for candidates, and if none are found we
181 * create a new one.
182 */
183 static squeue_set_t *
ip_squeue_set_create(processorid_t id)184 ip_squeue_set_create(processorid_t id)
185 {
186 squeue_set_t *sqs;
187 squeue_set_t *src = sqset_global_list[0];
188 squeue_t **lastsqp, *sq;
189 squeue_t **defaultq_lastp = NULL;
190
191 sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
192 sqs->sqs_cpuid = id;
193
194 if (id == -1) {
195 ASSERT(sqset_global_size == 0);
196 sqset_global_list[0] = sqs;
197 sqset_global_size = 1;
198 return (sqs);
199 }
200
201 /*
202 * When we create an squeue set id != -1, we need to give it a
203 * default squeue, in order to support fanout of conns across
204 * CPUs. Try to find a former default squeue that matches this
205 * cpu id on the unbound squeue set. If no such squeue is found,
206 * find some non-default TCP squeue that is free. If still no such
207 * candidate is found, create a new squeue.
208 */
209
210 ASSERT(MUTEX_HELD(&cpu_lock));
211 mutex_enter(&sqset_lock);
212 lastsqp = &src->sqs_head;
213
214 while (*lastsqp) {
215 if ((*lastsqp)->sq_bind == id &&
216 (*lastsqp)->sq_state & SQS_DEFAULT) {
217 /*
218 * Exact match. Former default squeue of cpu 'id'
219 */
220 ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
221 defaultq_lastp = lastsqp;
222 break;
223 }
224 if (defaultq_lastp == NULL &&
225 !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
226 /*
227 * A free non-default TCP squeue
228 */
229 defaultq_lastp = lastsqp;
230 }
231 lastsqp = &(*lastsqp)->sq_next;
232 }
233
234 if (defaultq_lastp != NULL) {
235 /* Remove from src set and set SQS_DEFAULT */
236 sq = *defaultq_lastp;
237 *defaultq_lastp = sq->sq_next;
238 sq->sq_next = NULL;
239 if (!(sq->sq_state & SQS_DEFAULT)) {
240 mutex_enter(&sq->sq_lock);
241 sq->sq_state |= SQS_DEFAULT;
242 mutex_exit(&sq->sq_lock);
243 }
244 } else {
245 sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
246 sq->sq_state |= SQS_DEFAULT;
247 }
248
249 sq->sq_set = sqs;
250 sqs->sqs_default = sq;
251 squeue_bind(sq, id); /* this locks squeue mutex */
252
253 ASSERT(sqset_global_size <= NCPU);
254 sqset_global_list[sqset_global_size++] = sqs;
255 mutex_exit(&sqset_lock);
256 return (sqs);
257 }
258
259 /*
260 * Called by ill_ring_add() to find an squeue to associate with a new ring.
261 */
262
263 squeue_t *
ip_squeue_getfree(pri_t pri)264 ip_squeue_getfree(pri_t pri)
265 {
266 squeue_set_t *sqs = sqset_global_list[0];
267 squeue_t *sq;
268
269 mutex_enter(&sqset_lock);
270 for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
271 /*
272 * Select a non-default TCP squeue that is free i.e. not
273 * bound to any ill.
274 */
275 if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
276 break;
277 }
278
279 if (sq == NULL) {
280 sq = ip_squeue_create(pri);
281 sq->sq_set = sqs;
282 sq->sq_next = sqs->sqs_head;
283 sqs->sqs_head = sq;
284 }
285
286 ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
287 SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
288 SQS_POLL_THR_QUIESCED)));
289
290 mutex_enter(&sq->sq_lock);
291 sq->sq_state |= SQS_ILL_BOUND;
292 mutex_exit(&sq->sq_lock);
293 mutex_exit(&sqset_lock);
294
295 if (sq->sq_priority != pri) {
296 thread_lock(sq->sq_worker);
297 (void) thread_change_pri(sq->sq_worker, pri, 0);
298 thread_unlock(sq->sq_worker);
299
300 thread_lock(sq->sq_poll_thr);
301 (void) thread_change_pri(sq->sq_poll_thr, pri, 0);
302 thread_unlock(sq->sq_poll_thr);
303
304 sq->sq_priority = pri;
305 }
306 return (sq);
307 }
308
309 /*
310 * Initialize IP squeues.
311 */
312 void
ip_squeue_init(void (* callback)(squeue_t *))313 ip_squeue_init(void (*callback)(squeue_t *))
314 {
315 int i;
316 squeue_set_t *sqs;
317
318 ASSERT(sqset_global_list == NULL);
319
320 ip_squeue_create_callback = callback;
321 squeue_init();
322 mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
323 sqset_global_list =
324 kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
325 sqset_global_size = 0;
326 /*
327 * We are called at system boot time and we don't
328 * expect memory allocation failure.
329 */
330 sqs = ip_squeue_set_create(-1);
331 ASSERT(sqs != NULL);
332
333 mutex_enter(&cpu_lock);
334 /* Create squeue for each active CPU available */
335 for (i = 0; i < NCPU; i++) {
336 cpu_t *cp = cpu_get(i);
337 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
338 /*
339 * We are called at system boot time and we don't
340 * expect memory allocation failure then
341 */
342 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
343 ASSERT(cp->cpu_squeue_set != NULL);
344 }
345 }
346
347 register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
348 mutex_exit(&cpu_lock);
349 }
350
351 /*
352 * Get a default squeue, either from the current CPU or a CPU derived by hash
353 * from the index argument, depending upon the setting of ip_squeue_fanout.
354 */
355 squeue_t *
ip_squeue_random(uint_t index)356 ip_squeue_random(uint_t index)
357 {
358 squeue_set_t *sqs = NULL;
359 squeue_t *sq;
360
361 /*
362 * The minimum value of sqset_global_size is 2, one for the unbound
363 * squeue set and another for the squeue set of the zeroth CPU.
364 * Even though the value could be changing, it can never go below 2,
365 * so the assert does not need the lock protection.
366 */
367 ASSERT(sqset_global_size > 1);
368
369 /* Protect against changes to sqset_global_list */
370 mutex_enter(&sqset_lock);
371
372 if (!ip_squeue_fanout)
373 sqs = CPU->cpu_squeue_set;
374
375 /*
376 * sqset_global_list[0] corresponds to the unbound squeue set.
377 * The computation below picks a set other than the unbound set.
378 */
379 if (sqs == NULL)
380 sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
381 sq = sqs->sqs_default;
382
383 mutex_exit(&sqset_lock);
384 ASSERT(sq);
385 return (sq);
386 }
387
388 /*
389 * Move squeue from its current set to newset. Not used for default squeues.
390 * Bind or unbind the worker thread as appropriate.
391 */
392
393 static void
ip_squeue_set_move(squeue_t * sq,squeue_set_t * newset)394 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
395 {
396 squeue_set_t *set;
397 squeue_t **lastsqp;
398 processorid_t cpuid = newset->sqs_cpuid;
399
400 ASSERT(!(sq->sq_state & SQS_DEFAULT));
401 ASSERT(!MUTEX_HELD(&sq->sq_lock));
402 ASSERT(MUTEX_HELD(&sqset_lock));
403
404 set = sq->sq_set;
405 if (set == newset)
406 return;
407
408 lastsqp = &set->sqs_head;
409 while (*lastsqp != sq)
410 lastsqp = &(*lastsqp)->sq_next;
411
412 *lastsqp = sq->sq_next;
413 sq->sq_next = newset->sqs_head;
414 newset->sqs_head = sq;
415 sq->sq_set = newset;
416 if (cpuid == -1)
417 squeue_unbind(sq);
418 else
419 squeue_bind(sq, cpuid);
420 }
421
422 /*
423 * Move squeue from its current set to cpuid's set and bind to cpuid.
424 */
425
426 int
ip_squeue_cpu_move(squeue_t * sq,processorid_t cpuid)427 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
428 {
429 cpu_t *cpu;
430 squeue_set_t *set;
431
432 if (sq->sq_state & SQS_DEFAULT)
433 return (-1);
434
435 ASSERT(MUTEX_HELD(&cpu_lock));
436
437 cpu = cpu_get(cpuid);
438 if (!CPU_ISON(cpu))
439 return (-1);
440
441 mutex_enter(&sqset_lock);
442 set = cpu->cpu_squeue_set;
443 if (set != NULL)
444 ip_squeue_set_move(sq, set);
445 mutex_exit(&sqset_lock);
446 return ((set == NULL) ? -1 : 0);
447 }
448
449 /*
450 * The mac layer is calling, asking us to move an squeue to a
451 * new CPU. This routine is called with cpu_lock held.
452 */
453 void
ip_squeue_bind_ring(ill_t * ill,ill_rx_ring_t * rx_ring,processorid_t cpuid)454 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
455 {
456 ASSERT(ILL_MAC_PERIM_HELD(ill));
457 ASSERT(rx_ring->rr_ill == ill);
458
459 mutex_enter(&ill->ill_lock);
460 if (rx_ring->rr_ring_state == RR_FREE ||
461 rx_ring->rr_ring_state == RR_FREE_INPROG) {
462 mutex_exit(&ill->ill_lock);
463 return;
464 }
465
466 if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
467 rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
468
469 mutex_exit(&ill->ill_lock);
470 }
471
472 void *
ip_squeue_add_ring(ill_t * ill,void * mrp)473 ip_squeue_add_ring(ill_t *ill, void *mrp)
474 {
475 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp;
476 ill_rx_ring_t *rx_ring, *ring_tbl;
477 int ip_rx_index;
478 squeue_t *sq = NULL;
479 pri_t pri;
480
481 ASSERT(ILL_MAC_PERIM_HELD(ill));
482 ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
483 ASSERT(ill->ill_dld_capab != NULL);
484
485 ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
486
487 mutex_enter(&ill->ill_lock);
488 for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
489 rx_ring = &ring_tbl[ip_rx_index];
490 if (rx_ring->rr_ring_state == RR_FREE)
491 break;
492 }
493
494 if (ip_rx_index == ILL_MAX_RINGS) {
495 /*
496 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If
497 * we have devices which can overwhelm this limit,
498 * ILL_MAX_RING should be made configurable. Meanwhile it
499 * cause no panic because driver will pass ip_input a NULL
500 * handle which will make IP allocate the default squeue and
501 * Polling mode will not be used for this ring.
502 */
503 cmn_err(CE_NOTE,
504 "Reached maximum number of receiving rings (%d) for %s\n",
505 ILL_MAX_RINGS, ill->ill_name);
506 mutex_exit(&ill->ill_lock);
507 return (NULL);
508 }
509
510 bzero(rx_ring, sizeof (ill_rx_ring_t));
511 rx_ring->rr_rx = (ip_mac_rx_t)mrfp->mrf_receive;
512 /* XXX: Hard code it to tcp accept for now */
513 rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp;
514
515 rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
516 rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
517 rx_ring->rr_intr_disable =
518 (ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
519 rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
520 rx_ring->rr_ill = ill;
521
522 pri = mrfp->mrf_flow_priority;
523
524 sq = ip_squeue_getfree(pri);
525
526 mutex_enter(&sq->sq_lock);
527 sq->sq_rx_ring = rx_ring;
528 rx_ring->rr_sqp = sq;
529
530 sq->sq_state |= SQS_POLL_CAPAB;
531
532 rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
533 sq->sq_ill = ill;
534 mutex_exit(&sq->sq_lock);
535 mutex_exit(&ill->ill_lock);
536
537 DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
538 ip_rx_index, void *, mrfp->mrf_rx_arg);
539
540 /* Assign the squeue to the specified CPU as well */
541 mutex_enter(&cpu_lock);
542 (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
543 mutex_exit(&cpu_lock);
544
545 return (rx_ring);
546 }
547
548 /*
549 * sanitize the squeue etc. Some of the processing
550 * needs to be done from inside the perimeter.
551 */
552 void
ip_squeue_clean_ring(ill_t * ill,ill_rx_ring_t * rx_ring)553 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
554 {
555 squeue_t *sqp;
556
557 ASSERT(ILL_MAC_PERIM_HELD(ill));
558 ASSERT(rx_ring != NULL);
559
560 /* Just clean one squeue */
561 mutex_enter(&ill->ill_lock);
562 if (rx_ring->rr_ring_state == RR_FREE) {
563 mutex_exit(&ill->ill_lock);
564 return;
565 }
566 rx_ring->rr_ring_state = RR_FREE_INPROG;
567 sqp = rx_ring->rr_sqp;
568
569 mutex_enter(&sqp->sq_lock);
570 sqp->sq_state |= SQS_POLL_CLEANUP;
571 cv_signal(&sqp->sq_worker_cv);
572 mutex_exit(&ill->ill_lock);
573 while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
574 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
575 sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
576
577 ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
578 SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
579 SQS_POLL_THR_QUIESCED)));
580
581 cv_signal(&sqp->sq_worker_cv);
582 mutex_exit(&sqp->sq_lock);
583
584 /*
585 * Move the squeue to sqset_global_list[0] which holds the set of
586 * squeues not bound to any cpu. Note that the squeue is still
587 * considered bound to an ill as long as SQS_ILL_BOUND is set.
588 */
589 mutex_enter(&sqset_lock);
590 ip_squeue_set_move(sqp, sqset_global_list[0]);
591 mutex_exit(&sqset_lock);
592
593 /*
594 * CPU going offline can also trigger a move of the squeue to the
595 * unbound set sqset_global_list[0]. However the squeue won't be
596 * recycled for the next use as long as the SQS_ILL_BOUND flag
597 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the
598 * end after the move.
599 */
600 mutex_enter(&sqp->sq_lock);
601 sqp->sq_state &= ~SQS_ILL_BOUND;
602 mutex_exit(&sqp->sq_lock);
603
604 mutex_enter(&ill->ill_lock);
605 rx_ring->rr_ring_state = RR_FREE;
606 mutex_exit(&ill->ill_lock);
607 }
608
609 /*
610 * Stop the squeue from polling. This needs to be done
611 * from inside the perimeter.
612 */
613 void
ip_squeue_quiesce_ring(ill_t * ill,ill_rx_ring_t * rx_ring)614 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
615 {
616 squeue_t *sqp;
617
618 ASSERT(ILL_MAC_PERIM_HELD(ill));
619 ASSERT(rx_ring != NULL);
620
621 sqp = rx_ring->rr_sqp;
622 mutex_enter(&sqp->sq_lock);
623 sqp->sq_state |= SQS_POLL_QUIESCE;
624 cv_signal(&sqp->sq_worker_cv);
625 while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
626 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
627
628 mutex_exit(&sqp->sq_lock);
629 }
630
631 /*
632 * Restart polling etc. Needs to be inside the perimeter to
633 * prevent races.
634 */
635 void
ip_squeue_restart_ring(ill_t * ill,ill_rx_ring_t * rx_ring)636 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
637 {
638 squeue_t *sqp;
639
640 ASSERT(ILL_MAC_PERIM_HELD(ill));
641 ASSERT(rx_ring != NULL);
642
643 sqp = rx_ring->rr_sqp;
644 mutex_enter(&sqp->sq_lock);
645 /*
646 * Handle change in number of rings between the quiesce and
647 * restart operations by checking for a previous quiesce before
648 * attempting a restart.
649 */
650 if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
651 mutex_exit(&sqp->sq_lock);
652 return;
653 }
654 sqp->sq_state |= SQS_POLL_RESTART;
655 cv_signal(&sqp->sq_worker_cv);
656 while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
657 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
658 sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
659 mutex_exit(&sqp->sq_lock);
660 }
661
662 /*
663 * sanitize all squeues associated with the ill.
664 */
665 void
ip_squeue_clean_all(ill_t * ill)666 ip_squeue_clean_all(ill_t *ill)
667 {
668 int idx;
669 ill_rx_ring_t *rx_ring;
670
671 for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
672 rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
673 ip_squeue_clean_ring(ill, rx_ring);
674 }
675 }
676
677 /*
678 * Used by IP to get the squeue associated with a ring. If the squeue isn't
679 * yet bound to a CPU, and we're being called directly from the NIC's
680 * interrupt, then we know what CPU we want to assign the squeue to, so
681 * dispatch that task to a taskq.
682 */
683 squeue_t *
ip_squeue_get(ill_rx_ring_t * ill_rx_ring)684 ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
685 {
686 squeue_t *sqp;
687
688 if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
689 return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
690
691 return (sqp);
692 }
693
694 /*
695 * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all
696 * squeues are unboudn and moved to the unbound set.
697 */
698 static void
ip_squeue_set_destroy(cpu_t * cpu)699 ip_squeue_set_destroy(cpu_t *cpu)
700 {
701 int i;
702 squeue_t *sqp, *lastsqp = NULL;
703 squeue_set_t *sqs, *unbound = sqset_global_list[0];
704
705 mutex_enter(&sqset_lock);
706 if ((sqs = cpu->cpu_squeue_set) == NULL) {
707 mutex_exit(&sqset_lock);
708 return;
709 }
710
711 /* Move all squeues to unbound set */
712
713 for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
714 squeue_unbind(sqp);
715 sqp->sq_set = unbound;
716 }
717 if (sqs->sqs_head) {
718 lastsqp->sq_next = unbound->sqs_head;
719 unbound->sqs_head = sqs->sqs_head;
720 }
721
722 /* Also move default squeue to unbound set */
723
724 sqp = sqs->sqs_default;
725 ASSERT(sqp != NULL);
726 ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
727
728 sqp->sq_next = unbound->sqs_head;
729 unbound->sqs_head = sqp;
730 squeue_unbind(sqp);
731 sqp->sq_set = unbound;
732
733 for (i = 1; i < sqset_global_size; i++)
734 if (sqset_global_list[i] == sqs)
735 break;
736
737 ASSERT(i < sqset_global_size);
738 sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
739 sqset_global_list[sqset_global_size - 1] = NULL;
740 sqset_global_size--;
741
742 mutex_exit(&sqset_lock);
743 kmem_free(sqs, sizeof (*sqs));
744 }
745
746 /*
747 * Reconfiguration callback
748 */
749 /* ARGSUSED */
750 static int
ip_squeue_cpu_setup(cpu_setup_t what,int id,void * arg)751 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
752 {
753 cpu_t *cp = cpu_get(id);
754
755 ASSERT(MUTEX_HELD(&cpu_lock));
756 switch (what) {
757 case CPU_CONFIG:
758 case CPU_ON:
759 case CPU_INIT:
760 case CPU_CPUPART_IN:
761 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
762 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
763 break;
764 case CPU_UNCONFIG:
765 case CPU_OFF:
766 case CPU_CPUPART_OUT:
767 if (cp->cpu_squeue_set != NULL) {
768 ip_squeue_set_destroy(cp);
769 cp->cpu_squeue_set = NULL;
770 }
771 break;
772 default:
773 break;
774 }
775 return (0);
776 }
777