1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2017 Joyent, Inc. 25 */ 26 27 /* 28 * IP interface to squeues. 29 * 30 * IP uses squeues to force serialization of packets, both incoming and 31 * outgoing. Each squeue is associated with a connection instance (conn_t) 32 * above, and a soft ring (if enabled) below. Each CPU will have a default 33 * squeue for outbound connections, and each soft ring of an interface will 34 * have an squeue to which it sends incoming packets. squeues are never 35 * destroyed, and if they become unused they are kept around against future 36 * needs. 37 * 38 * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU 39 * in the system there will be one squeue set, all of whose squeues will be 40 * bound to that CPU, plus one additional set known as the unbound set. Sets 41 * associated with CPUs will have one default squeue, for outbound 42 * connections, and a linked list of squeues used by various NICs for inbound 43 * packets. The unbound set also has a linked list of squeues, but no default 44 * squeue. 45 * 46 * When a CPU goes offline its squeue set is destroyed, and all its squeues 47 * are moved to the unbound set. When a CPU comes online, a new squeue set is 48 * created and the default set is searched for a default squeue formerly bound 49 * to this CPU. If no default squeue is found, a new one is created. 50 * 51 * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP 52 * and not the squeue code. squeue.c will not touch them, and we can modify 53 * them without holding the squeue lock because of the guarantee that squeues 54 * are never destroyed. ip_squeue locks must be held, however. 55 * 56 * All the squeue sets are protected by a single lock, the sqset_lock. This 57 * is also used to protect the sq_next and sq_set fields of an squeue_t. 58 * 59 * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock 60 * 61 * There are two modes of associating connection with squeues. The first mode 62 * associates each connection with the CPU that creates the connection (either 63 * during open time or during accept time). The second mode associates each 64 * connection with a random CPU, effectively distributing load over all CPUs 65 * and all squeues in the system. The mode is controlled by the 66 * ip_squeue_fanout variable. 67 * 68 * NOTE: The fact that there is an association between each connection and 69 * squeue and squeue and CPU does not mean that each connection is always 70 * processed on this CPU and on this CPU only. Any thread calling squeue_enter() 71 * may process the connection on whatever CPU it is scheduled. The squeue to CPU 72 * binding is only relevant for the worker thread. 73 * 74 * INTERFACE: 75 * 76 * squeue_t *ip_squeue_get(ill_rx_ring_t) 77 * 78 * Returns the squeue associated with an ill receive ring. If the ring is 79 * not bound to a CPU, and we're currently servicing the interrupt which 80 * generated the packet, then bind the squeue to CPU. 81 * 82 * 83 * DR Notes 84 * ======== 85 * 86 * The ip_squeue_init() registers a call-back function with the CPU DR 87 * subsystem using register_cpu_setup_func(). The call-back function does two 88 * things: 89 * 90 * o When the CPU is going off-line or unconfigured, the worker thread is 91 * unbound from the CPU. This allows the CPU unconfig code to move it to 92 * another CPU. 93 * 94 * o When the CPU is going online, it creates a new squeue for this CPU if 95 * necessary and binds the squeue worker thread to this CPU. 96 * 97 * TUNABLES: 98 * 99 * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then 100 * pick the default squeue from a random CPU, otherwise use our CPU's default 101 * squeue. 102 * 103 * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or 104 * /dev/ip. 105 */ 106 107 #include <sys/types.h> 108 #include <sys/debug.h> 109 #include <sys/kmem.h> 110 #include <sys/cpuvar.h> 111 #include <sys/cmn_err.h> 112 113 #include <inet/common.h> 114 #include <inet/ip.h> 115 #include <netinet/ip6.h> 116 #include <inet/ip_if.h> 117 #include <inet/ip_ire.h> 118 #include <inet/nd.h> 119 #include <inet/ipclassifier.h> 120 #include <sys/types.h> 121 #include <sys/conf.h> 122 #include <sys/sunddi.h> 123 #include <sys/dlpi.h> 124 #include <sys/squeue_impl.h> 125 #include <sys/tihdr.h> 126 #include <inet/udp_impl.h> 127 #include <sys/strsubr.h> 128 #include <sys/zone.h> 129 #include <sys/dld.h> 130 #include <sys/atomic.h> 131 132 /* 133 * List of all created squeue sets. The list and its size are protected by 134 * sqset_lock. 135 */ 136 static squeue_set_t **sqset_global_list; /* list 0 is the unbound list */ 137 static uint_t sqset_global_size; 138 kmutex_t sqset_lock; 139 140 static void (*ip_squeue_create_callback)(squeue_t *) = NULL; 141 142 static squeue_t *ip_squeue_create(pri_t); 143 static squeue_set_t *ip_squeue_set_create(processorid_t); 144 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *); 145 static void ip_squeue_set_move(squeue_t *, squeue_set_t *); 146 static void ip_squeue_set_destroy(cpu_t *); 147 static void ip_squeue_clean(void *, mblk_t *, void *); 148 149 #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS)) 150 151 static squeue_t * 152 ip_squeue_create(pri_t pri) 153 { 154 squeue_t *sqp; 155 156 sqp = squeue_create(pri); 157 ASSERT(sqp != NULL); 158 if (ip_squeue_create_callback != NULL) 159 ip_squeue_create_callback(sqp); 160 return (sqp); 161 } 162 163 /* 164 * Create a new squeue_set. If id == -1, then we're creating the unbound set, 165 * which should only happen once when we are first initialized. Otherwise id 166 * is the id of the CPU that needs a set, either because we are initializing 167 * or because the CPU has come online. 168 * 169 * If id != -1, then we need at a minimum to provide a default squeue for the 170 * new set. We search the unbound set for candidates, and if none are found we 171 * create a new one. 172 */ 173 static squeue_set_t * 174 ip_squeue_set_create(processorid_t id) 175 { 176 squeue_set_t *sqs; 177 squeue_set_t *src = sqset_global_list[0]; 178 squeue_t **lastsqp, *sq; 179 squeue_t **defaultq_lastp = NULL; 180 181 sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP); 182 sqs->sqs_cpuid = id; 183 184 if (id == -1) { 185 ASSERT(sqset_global_size == 0); 186 sqset_global_list[0] = sqs; 187 sqset_global_size = 1; 188 return (sqs); 189 } 190 191 /* 192 * When we create an squeue set id != -1, we need to give it a 193 * default squeue, in order to support fanout of conns across 194 * CPUs. Try to find a former default squeue that matches this 195 * cpu id on the unbound squeue set. If no such squeue is found, 196 * find some non-default TCP squeue that is free. If still no such 197 * candidate is found, create a new squeue. 198 */ 199 200 ASSERT(MUTEX_HELD(&cpu_lock)); 201 mutex_enter(&sqset_lock); 202 lastsqp = &src->sqs_head; 203 204 while (*lastsqp) { 205 if ((*lastsqp)->sq_bind == id && 206 (*lastsqp)->sq_state & SQS_DEFAULT) { 207 /* 208 * Exact match. Former default squeue of cpu 'id' 209 */ 210 ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND)); 211 defaultq_lastp = lastsqp; 212 break; 213 } 214 if (defaultq_lastp == NULL && 215 !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) { 216 /* 217 * A free non-default TCP squeue 218 */ 219 defaultq_lastp = lastsqp; 220 } 221 lastsqp = &(*lastsqp)->sq_next; 222 } 223 224 if (defaultq_lastp != NULL) { 225 /* Remove from src set and set SQS_DEFAULT */ 226 sq = *defaultq_lastp; 227 *defaultq_lastp = sq->sq_next; 228 sq->sq_next = NULL; 229 if (!(sq->sq_state & SQS_DEFAULT)) { 230 mutex_enter(&sq->sq_lock); 231 sq->sq_state |= SQS_DEFAULT; 232 mutex_exit(&sq->sq_lock); 233 } 234 } else { 235 sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY); 236 sq->sq_state |= SQS_DEFAULT; 237 } 238 239 sq->sq_set = sqs; 240 sqs->sqs_default = sq; 241 squeue_bind(sq, id); /* this locks squeue mutex */ 242 243 ASSERT(sqset_global_size <= NCPU); 244 sqset_global_list[sqset_global_size++] = sqs; 245 mutex_exit(&sqset_lock); 246 return (sqs); 247 } 248 249 /* 250 * Called by ill_ring_add() to find an squeue to associate with a new ring. 251 */ 252 253 squeue_t * 254 ip_squeue_getfree(pri_t pri) 255 { 256 squeue_set_t *sqs = sqset_global_list[0]; 257 squeue_t *sq; 258 259 mutex_enter(&sqset_lock); 260 for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) { 261 /* 262 * Select a non-default TCP squeue that is free i.e. not 263 * bound to any ill. 264 */ 265 if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND))) 266 break; 267 } 268 269 if (sq == NULL) { 270 sq = ip_squeue_create(pri); 271 sq->sq_set = sqs; 272 sq->sq_next = sqs->sqs_head; 273 sqs->sqs_head = sq; 274 } 275 276 ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL | 277 SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE | 278 SQS_POLL_THR_QUIESCED))); 279 280 mutex_enter(&sq->sq_lock); 281 sq->sq_state |= SQS_ILL_BOUND; 282 mutex_exit(&sq->sq_lock); 283 mutex_exit(&sqset_lock); 284 285 if (sq->sq_priority != pri) { 286 thread_lock(sq->sq_worker); 287 (void) thread_change_pri(sq->sq_worker, pri, 0); 288 thread_unlock(sq->sq_worker); 289 290 thread_lock(sq->sq_poll_thr); 291 (void) thread_change_pri(sq->sq_poll_thr, pri, 0); 292 thread_unlock(sq->sq_poll_thr); 293 294 sq->sq_priority = pri; 295 } 296 return (sq); 297 } 298 299 /* 300 * Initialize IP squeues. 301 */ 302 void 303 ip_squeue_init(void (*callback)(squeue_t *)) 304 { 305 int i; 306 squeue_set_t *sqs; 307 308 ASSERT(sqset_global_list == NULL); 309 310 ip_squeue_create_callback = callback; 311 squeue_init(); 312 mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL); 313 sqset_global_list = 314 kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP); 315 sqset_global_size = 0; 316 /* 317 * We are called at system boot time and we don't 318 * expect memory allocation failure. 319 */ 320 sqs = ip_squeue_set_create(-1); 321 ASSERT(sqs != NULL); 322 323 mutex_enter(&cpu_lock); 324 /* Create squeue for each active CPU available */ 325 for (i = 0; i < NCPU; i++) { 326 cpu_t *cp = cpu_get(i); 327 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) { 328 /* 329 * We are called at system boot time and we don't 330 * expect memory allocation failure then 331 */ 332 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id); 333 ASSERT(cp->cpu_squeue_set != NULL); 334 } 335 } 336 337 register_cpu_setup_func(ip_squeue_cpu_setup, NULL); 338 mutex_exit(&cpu_lock); 339 } 340 341 /* 342 * Get a default squeue, either from the current CPU or a CPU derived by hash 343 * from the index argument, depending upon the setting of ip_squeue_fanout. 344 */ 345 squeue_t * 346 ip_squeue_random(uint_t index) 347 { 348 squeue_set_t *sqs = NULL; 349 squeue_t *sq; 350 351 /* 352 * The minimum value of sqset_global_size is 2, one for the unbound 353 * squeue set and another for the squeue set of the zeroth CPU. 354 * Even though the value could be changing, it can never go below 2, 355 * so the assert does not need the lock protection. 356 */ 357 ASSERT(sqset_global_size > 1); 358 359 /* Protect against changes to sqset_global_list */ 360 mutex_enter(&sqset_lock); 361 362 if (!ip_squeue_fanout) 363 sqs = CPU->cpu_squeue_set; 364 365 /* 366 * sqset_global_list[0] corresponds to the unbound squeue set. 367 * The computation below picks a set other than the unbound set. 368 */ 369 if (sqs == NULL) 370 sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1]; 371 sq = sqs->sqs_default; 372 373 mutex_exit(&sqset_lock); 374 ASSERT(sq); 375 return (sq); 376 } 377 378 /* 379 * Move squeue from its current set to newset. Not used for default squeues. 380 * Bind or unbind the worker thread as appropriate. 381 */ 382 383 static void 384 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset) 385 { 386 squeue_set_t *set; 387 squeue_t **lastsqp; 388 processorid_t cpuid = newset->sqs_cpuid; 389 390 ASSERT(!(sq->sq_state & SQS_DEFAULT)); 391 ASSERT(!MUTEX_HELD(&sq->sq_lock)); 392 ASSERT(MUTEX_HELD(&sqset_lock)); 393 394 set = sq->sq_set; 395 if (set == newset) 396 return; 397 398 lastsqp = &set->sqs_head; 399 while (*lastsqp != sq) 400 lastsqp = &(*lastsqp)->sq_next; 401 402 *lastsqp = sq->sq_next; 403 sq->sq_next = newset->sqs_head; 404 newset->sqs_head = sq; 405 sq->sq_set = newset; 406 if (cpuid == -1) 407 squeue_unbind(sq); 408 else 409 squeue_bind(sq, cpuid); 410 } 411 412 /* 413 * Move squeue from its current set to cpuid's set and bind to cpuid. 414 */ 415 416 int 417 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid) 418 { 419 cpu_t *cpu; 420 squeue_set_t *set; 421 422 if (sq->sq_state & SQS_DEFAULT) 423 return (-1); 424 425 ASSERT(MUTEX_HELD(&cpu_lock)); 426 427 cpu = cpu_get(cpuid); 428 if (!CPU_ISON(cpu)) 429 return (-1); 430 431 mutex_enter(&sqset_lock); 432 set = cpu->cpu_squeue_set; 433 if (set != NULL) 434 ip_squeue_set_move(sq, set); 435 mutex_exit(&sqset_lock); 436 return ((set == NULL) ? -1 : 0); 437 } 438 439 /* 440 * The mac layer is calling, asking us to move an squeue to a 441 * new CPU. This routine is called with cpu_lock held. 442 */ 443 void 444 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid) 445 { 446 ASSERT(ILL_MAC_PERIM_HELD(ill)); 447 ASSERT(rx_ring->rr_ill == ill); 448 449 mutex_enter(&ill->ill_lock); 450 if (rx_ring->rr_ring_state == RR_FREE || 451 rx_ring->rr_ring_state == RR_FREE_INPROG) { 452 mutex_exit(&ill->ill_lock); 453 return; 454 } 455 456 if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1) 457 rx_ring->rr_ring_state = RR_SQUEUE_BOUND; 458 459 mutex_exit(&ill->ill_lock); 460 } 461 462 void * 463 ip_squeue_add_ring(ill_t *ill, void *mrp) 464 { 465 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 466 ill_rx_ring_t *rx_ring, *ring_tbl; 467 int ip_rx_index; 468 squeue_t *sq = NULL; 469 pri_t pri; 470 471 ASSERT(ILL_MAC_PERIM_HELD(ill)); 472 ASSERT(mrfp->mrf_type == MAC_RX_FIFO); 473 ASSERT(ill->ill_dld_capab != NULL); 474 475 ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl; 476 477 mutex_enter(&ill->ill_lock); 478 for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) { 479 rx_ring = &ring_tbl[ip_rx_index]; 480 if (rx_ring->rr_ring_state == RR_FREE) 481 break; 482 } 483 484 if (ip_rx_index == ILL_MAX_RINGS) { 485 /* 486 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If 487 * we have devices which can overwhelm this limit, 488 * ILL_MAX_RING should be made configurable. Meanwhile it 489 * cause no panic because driver will pass ip_input a NULL 490 * handle which will make IP allocate the default squeue and 491 * Polling mode will not be used for this ring. 492 */ 493 cmn_err(CE_NOTE, 494 "Reached maximum number of receiving rings (%d) for %s\n", 495 ILL_MAX_RINGS, ill->ill_name); 496 mutex_exit(&ill->ill_lock); 497 return (NULL); 498 } 499 500 bzero(rx_ring, sizeof (ill_rx_ring_t)); 501 rx_ring->rr_rx = mrfp->mrf_receive; 502 /* XXX: Hard code it to tcp accept for now */ 503 rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp; 504 505 rx_ring->rr_intr_handle = mrfp->mrf_intr_handle; 506 rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable; 507 rx_ring->rr_intr_disable = 508 (ip_mac_intr_disable_t)mrfp->mrf_intr_disable; 509 rx_ring->rr_rx_handle = mrfp->mrf_rx_arg; 510 rx_ring->rr_ill = ill; 511 512 pri = mrfp->mrf_flow_priority; 513 514 sq = ip_squeue_getfree(pri); 515 516 mutex_enter(&sq->sq_lock); 517 sq->sq_rx_ring = rx_ring; 518 rx_ring->rr_sqp = sq; 519 520 sq->sq_state |= SQS_POLL_CAPAB; 521 522 rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND; 523 sq->sq_ill = ill; 524 mutex_exit(&sq->sq_lock); 525 mutex_exit(&ill->ill_lock); 526 527 DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int, 528 ip_rx_index, void *, mrfp->mrf_rx_arg); 529 530 /* Assign the squeue to the specified CPU as well */ 531 mutex_enter(&cpu_lock); 532 (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id); 533 mutex_exit(&cpu_lock); 534 535 return (rx_ring); 536 } 537 538 /* 539 * sanitize the squeue etc. Some of the processing 540 * needs to be done from inside the perimeter. 541 */ 542 void 543 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 544 { 545 squeue_t *sqp; 546 547 ASSERT(ILL_MAC_PERIM_HELD(ill)); 548 ASSERT(rx_ring != NULL); 549 550 /* Just clean one squeue */ 551 mutex_enter(&ill->ill_lock); 552 if (rx_ring->rr_ring_state == RR_FREE) { 553 mutex_exit(&ill->ill_lock); 554 return; 555 } 556 rx_ring->rr_ring_state = RR_FREE_INPROG; 557 sqp = rx_ring->rr_sqp; 558 559 mutex_enter(&sqp->sq_lock); 560 sqp->sq_state |= SQS_POLL_CLEANUP; 561 cv_signal(&sqp->sq_worker_cv); 562 mutex_exit(&ill->ill_lock); 563 while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE)) 564 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 565 sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE; 566 567 ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL | 568 SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE | 569 SQS_POLL_THR_QUIESCED))); 570 571 cv_signal(&sqp->sq_worker_cv); 572 mutex_exit(&sqp->sq_lock); 573 574 /* 575 * Move the squeue to sqset_global_list[0] which holds the set of 576 * squeues not bound to any cpu. Note that the squeue is still 577 * considered bound to an ill as long as SQS_ILL_BOUND is set. 578 */ 579 mutex_enter(&sqset_lock); 580 ip_squeue_set_move(sqp, sqset_global_list[0]); 581 mutex_exit(&sqset_lock); 582 583 /* 584 * CPU going offline can also trigger a move of the squeue to the 585 * unbound set sqset_global_list[0]. However the squeue won't be 586 * recycled for the next use as long as the SQS_ILL_BOUND flag 587 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the 588 * end after the move. 589 */ 590 mutex_enter(&sqp->sq_lock); 591 sqp->sq_state &= ~SQS_ILL_BOUND; 592 mutex_exit(&sqp->sq_lock); 593 594 mutex_enter(&ill->ill_lock); 595 rx_ring->rr_ring_state = RR_FREE; 596 mutex_exit(&ill->ill_lock); 597 } 598 599 /* 600 * Stop the squeue from polling. This needs to be done 601 * from inside the perimeter. 602 */ 603 void 604 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 605 { 606 squeue_t *sqp; 607 608 ASSERT(ILL_MAC_PERIM_HELD(ill)); 609 ASSERT(rx_ring != NULL); 610 611 sqp = rx_ring->rr_sqp; 612 mutex_enter(&sqp->sq_lock); 613 sqp->sq_state |= SQS_POLL_QUIESCE; 614 cv_signal(&sqp->sq_worker_cv); 615 while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) 616 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 617 618 mutex_exit(&sqp->sq_lock); 619 } 620 621 /* 622 * Restart polling etc. Needs to be inside the perimeter to 623 * prevent races. 624 */ 625 void 626 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 627 { 628 squeue_t *sqp; 629 630 ASSERT(ILL_MAC_PERIM_HELD(ill)); 631 ASSERT(rx_ring != NULL); 632 633 sqp = rx_ring->rr_sqp; 634 mutex_enter(&sqp->sq_lock); 635 /* 636 * Handle change in number of rings between the quiesce and 637 * restart operations by checking for a previous quiesce before 638 * attempting a restart. 639 */ 640 if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) { 641 mutex_exit(&sqp->sq_lock); 642 return; 643 } 644 sqp->sq_state |= SQS_POLL_RESTART; 645 cv_signal(&sqp->sq_worker_cv); 646 while (!(sqp->sq_state & SQS_POLL_RESTART_DONE)) 647 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 648 sqp->sq_state &= ~SQS_POLL_RESTART_DONE; 649 mutex_exit(&sqp->sq_lock); 650 } 651 652 /* 653 * sanitize all squeues associated with the ill. 654 */ 655 void 656 ip_squeue_clean_all(ill_t *ill) 657 { 658 int idx; 659 ill_rx_ring_t *rx_ring; 660 661 for (idx = 0; idx < ILL_MAX_RINGS; idx++) { 662 rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx]; 663 ip_squeue_clean_ring(ill, rx_ring); 664 } 665 } 666 667 /* 668 * Used by IP to get the squeue associated with a ring. If the squeue isn't 669 * yet bound to a CPU, and we're being called directly from the NIC's 670 * interrupt, then we know what CPU we want to assign the squeue to, so 671 * dispatch that task to a taskq. 672 */ 673 squeue_t * 674 ip_squeue_get(ill_rx_ring_t *ill_rx_ring) 675 { 676 squeue_t *sqp; 677 678 if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL)) 679 return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM())); 680 681 return (sqp); 682 } 683 684 /* 685 * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all 686 * squeues are unboudn and moved to the unbound set. 687 */ 688 static void 689 ip_squeue_set_destroy(cpu_t *cpu) 690 { 691 int i; 692 squeue_t *sqp, *lastsqp = NULL; 693 squeue_set_t *sqs, *unbound = sqset_global_list[0]; 694 695 mutex_enter(&sqset_lock); 696 if ((sqs = cpu->cpu_squeue_set) == NULL) { 697 mutex_exit(&sqset_lock); 698 return; 699 } 700 701 /* Move all squeues to unbound set */ 702 703 for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) { 704 squeue_unbind(sqp); 705 sqp->sq_set = unbound; 706 } 707 if (sqs->sqs_head) { 708 lastsqp->sq_next = unbound->sqs_head; 709 unbound->sqs_head = sqs->sqs_head; 710 } 711 712 /* Also move default squeue to unbound set */ 713 714 sqp = sqs->sqs_default; 715 ASSERT(sqp != NULL); 716 ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT); 717 718 sqp->sq_next = unbound->sqs_head; 719 unbound->sqs_head = sqp; 720 squeue_unbind(sqp); 721 sqp->sq_set = unbound; 722 723 for (i = 1; i < sqset_global_size; i++) 724 if (sqset_global_list[i] == sqs) 725 break; 726 727 ASSERT(i < sqset_global_size); 728 sqset_global_list[i] = sqset_global_list[sqset_global_size - 1]; 729 sqset_global_list[sqset_global_size - 1] = NULL; 730 sqset_global_size--; 731 732 mutex_exit(&sqset_lock); 733 kmem_free(sqs, sizeof (*sqs)); 734 } 735 736 /* 737 * Reconfiguration callback 738 */ 739 /* ARGSUSED */ 740 static int 741 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg) 742 { 743 cpu_t *cp = cpu_get(id); 744 745 ASSERT(MUTEX_HELD(&cpu_lock)); 746 switch (what) { 747 case CPU_CONFIG: 748 case CPU_ON: 749 case CPU_INIT: 750 case CPU_CPUPART_IN: 751 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) 752 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id); 753 break; 754 case CPU_UNCONFIG: 755 case CPU_OFF: 756 case CPU_CPUPART_OUT: 757 if (cp->cpu_squeue_set != NULL) { 758 ip_squeue_set_destroy(cp); 759 cp->cpu_squeue_set = NULL; 760 } 761 break; 762 default: 763 break; 764 } 765 return (0); 766 } 767