1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IP interface to squeues. 28 * 29 * IP uses squeues to force serialization of packets, both incoming and 30 * outgoing. Each squeue is associated with a connection instance (conn_t) 31 * above, and a soft ring (if enabled) below. Each CPU will have a default 32 * squeue for outbound connections, and each soft ring of an interface will 33 * have an squeue to which it sends incoming packets. squeues are never 34 * destroyed, and if they become unused they are kept around against future 35 * needs. 36 * 37 * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU 38 * in the system there will be one squeue set, all of whose squeues will be 39 * bound to that CPU, plus one additional set known as the unbound set. Sets 40 * associated with CPUs will have one default squeue, for outbound 41 * connections, and a linked list of squeues used by various NICs for inbound 42 * packets. The unbound set also has a linked list of squeues, but no default 43 * squeue. 44 * 45 * When a CPU goes offline its squeue set is destroyed, and all its squeues 46 * are moved to the unbound set. When a CPU comes online, a new squeue set is 47 * created and the default set is searched for a default squeue formerly bound 48 * to this CPU. If no default squeue is found, a new one is created. 49 * 50 * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP 51 * and not the squeue code. squeue.c will not touch them, and we can modify 52 * them without holding the squeue lock because of the guarantee that squeues 53 * are never destroyed. ip_squeue locks must be held, however. 54 * 55 * All the squeue sets are protected by a single lock, the sqset_lock. This 56 * is also used to protect the sq_next and sq_set fields of an squeue_t. 57 * 58 * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock 59 * 60 * There are two modes of associating connection with squeues. The first mode 61 * associates each connection with the CPU that creates the connection (either 62 * during open time or during accept time). The second mode associates each 63 * connection with a random CPU, effectively distributing load over all CPUs 64 * and all squeues in the system. The mode is controlled by the 65 * ip_squeue_fanout variable. 66 * 67 * NOTE: The fact that there is an association between each connection and 68 * squeue and squeue and CPU does not mean that each connection is always 69 * processed on this CPU and on this CPU only. Any thread calling squeue_enter() 70 * may process the connection on whatever CPU it is scheduled. The squeue to CPU 71 * binding is only relevant for the worker thread. 72 * 73 * INTERFACE: 74 * 75 * squeue_t *ip_squeue_get(ill_rx_ring_t) 76 * 77 * Returns the squeue associated with an ill receive ring. If the ring is 78 * not bound to a CPU, and we're currently servicing the interrupt which 79 * generated the packet, then bind the squeue to CPU. 80 * 81 * 82 * DR Notes 83 * ======== 84 * 85 * The ip_squeue_init() registers a call-back function with the CPU DR 86 * subsystem using register_cpu_setup_func(). The call-back function does two 87 * things: 88 * 89 * o When the CPU is going off-line or unconfigured, the worker thread is 90 * unbound from the CPU. This allows the CPU unconfig code to move it to 91 * another CPU. 92 * 93 * o When the CPU is going online, it creates a new squeue for this CPU if 94 * necessary and binds the squeue worker thread to this CPU. 95 * 96 * TUNABLES: 97 * 98 * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then 99 * pick the default squeue from a random CPU, otherwise use our CPU's default 100 * squeue. 101 * 102 * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or 103 * /dev/ip. 104 * 105 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues * 106 * created. This is the time squeue code waits before waking up the worker 107 * thread after queuing a request. 108 */ 109 110 #include <sys/types.h> 111 #include <sys/debug.h> 112 #include <sys/kmem.h> 113 #include <sys/cpuvar.h> 114 #include <sys/cmn_err.h> 115 116 #include <inet/common.h> 117 #include <inet/ip.h> 118 #include <netinet/ip6.h> 119 #include <inet/ip_if.h> 120 #include <inet/ip_ire.h> 121 #include <inet/nd.h> 122 #include <inet/ipclassifier.h> 123 #include <sys/types.h> 124 #include <sys/conf.h> 125 #include <sys/sunddi.h> 126 #include <sys/dlpi.h> 127 #include <sys/squeue_impl.h> 128 #include <sys/tihdr.h> 129 #include <inet/udp_impl.h> 130 #include <sys/strsubr.h> 131 #include <sys/zone.h> 132 #include <sys/dld.h> 133 #include <sys/atomic.h> 134 135 /* 136 * List of all created squeue sets. The list and its size are protected by 137 * sqset_lock. 138 */ 139 static squeue_set_t **sqset_global_list; /* list 0 is the unbound list */ 140 static uint_t sqset_global_size; 141 kmutex_t sqset_lock; 142 143 static void (*ip_squeue_create_callback)(squeue_t *) = NULL; 144 145 /* 146 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues 147 * created. This is the time squeue code waits before waking up the worker 148 * thread after queuing a request. 149 */ 150 uint_t ip_squeue_worker_wait = 10; 151 152 static squeue_t *ip_squeue_create(pri_t); 153 static squeue_set_t *ip_squeue_set_create(processorid_t); 154 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *); 155 static void ip_squeue_set_move(squeue_t *, squeue_set_t *); 156 static void ip_squeue_set_destroy(cpu_t *); 157 static void ip_squeue_clean(void *, mblk_t *, void *); 158 159 #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS)) 160 161 static squeue_t * 162 ip_squeue_create(pri_t pri) 163 { 164 squeue_t *sqp; 165 166 sqp = squeue_create(ip_squeue_worker_wait, pri); 167 ASSERT(sqp != NULL); 168 if (ip_squeue_create_callback != NULL) 169 ip_squeue_create_callback(sqp); 170 return (sqp); 171 } 172 173 /* 174 * Create a new squeue_set. If id == -1, then we're creating the unbound set, 175 * which should only happen once when we are first initialized. Otherwise id 176 * is the id of the CPU that needs a set, either because we are initializing 177 * or because the CPU has come online. 178 * 179 * If id != -1, then we need at a minimum to provide a default squeue for the 180 * new set. We search the unbound set for candidates, and if none are found we 181 * create a new one. 182 */ 183 static squeue_set_t * 184 ip_squeue_set_create(processorid_t id) 185 { 186 squeue_set_t *sqs; 187 squeue_set_t *src = sqset_global_list[0]; 188 squeue_t **lastsqp, *sq; 189 squeue_t **defaultq_lastp = NULL; 190 191 sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP); 192 sqs->sqs_cpuid = id; 193 194 if (id == -1) { 195 ASSERT(sqset_global_size == 0); 196 sqset_global_list[0] = sqs; 197 sqset_global_size = 1; 198 return (sqs); 199 } 200 201 /* 202 * When we create an squeue set id != -1, we need to give it a 203 * default squeue, in order to support fanout of conns across 204 * CPUs. Try to find a former default squeue that matches this 205 * cpu id on the unbound squeue set. If no such squeue is found, 206 * find some non-default TCP squeue that is free. If still no such 207 * candidate is found, create a new squeue. 208 */ 209 210 ASSERT(MUTEX_HELD(&cpu_lock)); 211 mutex_enter(&sqset_lock); 212 lastsqp = &src->sqs_head; 213 214 while (*lastsqp) { 215 if ((*lastsqp)->sq_bind == id && 216 (*lastsqp)->sq_state & SQS_DEFAULT) { 217 /* 218 * Exact match. Former default squeue of cpu 'id' 219 */ 220 ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND)); 221 defaultq_lastp = lastsqp; 222 break; 223 } 224 if (defaultq_lastp == NULL && 225 !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) { 226 /* 227 * A free non-default TCP squeue 228 */ 229 defaultq_lastp = lastsqp; 230 } 231 lastsqp = &(*lastsqp)->sq_next; 232 } 233 234 if (defaultq_lastp != NULL) { 235 /* Remove from src set and set SQS_DEFAULT */ 236 sq = *defaultq_lastp; 237 *defaultq_lastp = sq->sq_next; 238 sq->sq_next = NULL; 239 if (!(sq->sq_state & SQS_DEFAULT)) { 240 mutex_enter(&sq->sq_lock); 241 sq->sq_state |= SQS_DEFAULT; 242 mutex_exit(&sq->sq_lock); 243 } 244 } else { 245 sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY); 246 sq->sq_state |= SQS_DEFAULT; 247 } 248 249 sq->sq_set = sqs; 250 sqs->sqs_default = sq; 251 squeue_bind(sq, id); /* this locks squeue mutex */ 252 253 ASSERT(sqset_global_size <= NCPU); 254 sqset_global_list[sqset_global_size++] = sqs; 255 mutex_exit(&sqset_lock); 256 return (sqs); 257 } 258 259 /* 260 * Called by ill_ring_add() to find an squeue to associate with a new ring. 261 */ 262 263 squeue_t * 264 ip_squeue_getfree(pri_t pri) 265 { 266 squeue_set_t *sqs = sqset_global_list[0]; 267 squeue_t *sq; 268 269 mutex_enter(&sqset_lock); 270 for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) { 271 /* 272 * Select a non-default TCP squeue that is free i.e. not 273 * bound to any ill. 274 */ 275 if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND))) 276 break; 277 } 278 279 if (sq == NULL) { 280 sq = ip_squeue_create(pri); 281 sq->sq_set = sqs; 282 sq->sq_next = sqs->sqs_head; 283 sqs->sqs_head = sq; 284 } 285 286 ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL | 287 SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE | 288 SQS_POLL_THR_QUIESCED))); 289 290 mutex_enter(&sq->sq_lock); 291 sq->sq_state |= SQS_ILL_BOUND; 292 mutex_exit(&sq->sq_lock); 293 mutex_exit(&sqset_lock); 294 295 if (sq->sq_priority != pri) { 296 thread_lock(sq->sq_worker); 297 (void) thread_change_pri(sq->sq_worker, pri, 0); 298 thread_unlock(sq->sq_worker); 299 300 thread_lock(sq->sq_poll_thr); 301 (void) thread_change_pri(sq->sq_poll_thr, pri, 0); 302 thread_unlock(sq->sq_poll_thr); 303 304 sq->sq_priority = pri; 305 } 306 return (sq); 307 } 308 309 /* 310 * Initialize IP squeues. 311 */ 312 void 313 ip_squeue_init(void (*callback)(squeue_t *)) 314 { 315 int i; 316 squeue_set_t *sqs; 317 318 ASSERT(sqset_global_list == NULL); 319 320 ip_squeue_create_callback = callback; 321 squeue_init(); 322 mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL); 323 sqset_global_list = 324 kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP); 325 sqset_global_size = 0; 326 /* 327 * We are called at system boot time and we don't 328 * expect memory allocation failure. 329 */ 330 sqs = ip_squeue_set_create(-1); 331 ASSERT(sqs != NULL); 332 333 mutex_enter(&cpu_lock); 334 /* Create squeue for each active CPU available */ 335 for (i = 0; i < NCPU; i++) { 336 cpu_t *cp = cpu_get(i); 337 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) { 338 /* 339 * We are called at system boot time and we don't 340 * expect memory allocation failure then 341 */ 342 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id); 343 ASSERT(cp->cpu_squeue_set != NULL); 344 } 345 } 346 347 register_cpu_setup_func(ip_squeue_cpu_setup, NULL); 348 mutex_exit(&cpu_lock); 349 } 350 351 /* 352 * Get a default squeue, either from the current CPU or a CPU derived by hash 353 * from the index argument, depending upon the setting of ip_squeue_fanout. 354 */ 355 squeue_t * 356 ip_squeue_random(uint_t index) 357 { 358 squeue_set_t *sqs = NULL; 359 squeue_t *sq; 360 361 /* 362 * The minimum value of sqset_global_size is 2, one for the unbound 363 * squeue set and another for the squeue set of the zeroth CPU. 364 * Even though the value could be changing, it can never go below 2, 365 * so the assert does not need the lock protection. 366 */ 367 ASSERT(sqset_global_size > 1); 368 369 /* Protect against changes to sqset_global_list */ 370 mutex_enter(&sqset_lock); 371 372 if (!ip_squeue_fanout) 373 sqs = CPU->cpu_squeue_set; 374 375 /* 376 * sqset_global_list[0] corresponds to the unbound squeue set. 377 * The computation below picks a set other than the unbound set. 378 */ 379 if (sqs == NULL) 380 sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1]; 381 sq = sqs->sqs_default; 382 383 mutex_exit(&sqset_lock); 384 ASSERT(sq); 385 return (sq); 386 } 387 388 /* 389 * Move squeue from its current set to newset. Not used for default squeues. 390 * Bind or unbind the worker thread as appropriate. 391 */ 392 393 static void 394 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset) 395 { 396 squeue_set_t *set; 397 squeue_t **lastsqp; 398 processorid_t cpuid = newset->sqs_cpuid; 399 400 ASSERT(!(sq->sq_state & SQS_DEFAULT)); 401 ASSERT(!MUTEX_HELD(&sq->sq_lock)); 402 ASSERT(MUTEX_HELD(&sqset_lock)); 403 404 set = sq->sq_set; 405 if (set == newset) 406 return; 407 408 lastsqp = &set->sqs_head; 409 while (*lastsqp != sq) 410 lastsqp = &(*lastsqp)->sq_next; 411 412 *lastsqp = sq->sq_next; 413 sq->sq_next = newset->sqs_head; 414 newset->sqs_head = sq; 415 sq->sq_set = newset; 416 if (cpuid == -1) 417 squeue_unbind(sq); 418 else 419 squeue_bind(sq, cpuid); 420 } 421 422 /* 423 * Move squeue from its current set to cpuid's set and bind to cpuid. 424 */ 425 426 int 427 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid) 428 { 429 cpu_t *cpu; 430 squeue_set_t *set; 431 432 if (sq->sq_state & SQS_DEFAULT) 433 return (-1); 434 435 ASSERT(MUTEX_HELD(&cpu_lock)); 436 437 cpu = cpu_get(cpuid); 438 if (!CPU_ISON(cpu)) 439 return (-1); 440 441 mutex_enter(&sqset_lock); 442 set = cpu->cpu_squeue_set; 443 if (set != NULL) 444 ip_squeue_set_move(sq, set); 445 mutex_exit(&sqset_lock); 446 return ((set == NULL) ? -1 : 0); 447 } 448 449 /* 450 * The mac layer is calling, asking us to move an squeue to a 451 * new CPU. This routine is called with cpu_lock held. 452 */ 453 void 454 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid) 455 { 456 ASSERT(ILL_MAC_PERIM_HELD(ill)); 457 ASSERT(rx_ring->rr_ill == ill); 458 459 mutex_enter(&ill->ill_lock); 460 if (rx_ring->rr_ring_state == RR_FREE || 461 rx_ring->rr_ring_state == RR_FREE_INPROG) { 462 mutex_exit(&ill->ill_lock); 463 return; 464 } 465 466 if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1) 467 rx_ring->rr_ring_state = RR_SQUEUE_BOUND; 468 469 mutex_exit(&ill->ill_lock); 470 } 471 472 void * 473 ip_squeue_add_ring(ill_t *ill, void *mrp) 474 { 475 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 476 ill_rx_ring_t *rx_ring, *ring_tbl; 477 int ip_rx_index; 478 squeue_t *sq = NULL; 479 pri_t pri; 480 481 ASSERT(ILL_MAC_PERIM_HELD(ill)); 482 ASSERT(mrfp->mrf_type == MAC_RX_FIFO); 483 ASSERT(ill->ill_dld_capab != NULL); 484 485 ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl; 486 487 mutex_enter(&ill->ill_lock); 488 for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) { 489 rx_ring = &ring_tbl[ip_rx_index]; 490 if (rx_ring->rr_ring_state == RR_FREE) 491 break; 492 } 493 494 if (ip_rx_index == ILL_MAX_RINGS) { 495 /* 496 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If 497 * we have devices which can overwhelm this limit, 498 * ILL_MAX_RING should be made configurable. Meanwhile it 499 * cause no panic because driver will pass ip_input a NULL 500 * handle which will make IP allocate the default squeue and 501 * Polling mode will not be used for this ring. 502 */ 503 cmn_err(CE_NOTE, 504 "Reached maximum number of receiving rings (%d) for %s\n", 505 ILL_MAX_RINGS, ill->ill_name); 506 mutex_exit(&ill->ill_lock); 507 return (NULL); 508 } 509 510 bzero(rx_ring, sizeof (ill_rx_ring_t)); 511 rx_ring->rr_rx = (ip_mac_rx_t)mrfp->mrf_receive; 512 /* XXX: Hard code it to tcp accept for now */ 513 rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp; 514 515 rx_ring->rr_intr_handle = mrfp->mrf_intr_handle; 516 rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable; 517 rx_ring->rr_intr_disable = 518 (ip_mac_intr_disable_t)mrfp->mrf_intr_disable; 519 rx_ring->rr_rx_handle = mrfp->mrf_rx_arg; 520 rx_ring->rr_ill = ill; 521 522 pri = mrfp->mrf_flow_priority; 523 524 sq = ip_squeue_getfree(pri); 525 526 mutex_enter(&sq->sq_lock); 527 sq->sq_rx_ring = rx_ring; 528 rx_ring->rr_sqp = sq; 529 530 sq->sq_state |= SQS_POLL_CAPAB; 531 532 rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND; 533 sq->sq_ill = ill; 534 mutex_exit(&sq->sq_lock); 535 mutex_exit(&ill->ill_lock); 536 537 DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int, 538 ip_rx_index, void *, mrfp->mrf_rx_arg); 539 540 /* Assign the squeue to the specified CPU as well */ 541 mutex_enter(&cpu_lock); 542 (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id); 543 mutex_exit(&cpu_lock); 544 545 return (rx_ring); 546 } 547 548 /* 549 * sanitize the squeue etc. Some of the processing 550 * needs to be done from inside the perimeter. 551 */ 552 void 553 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 554 { 555 squeue_t *sqp; 556 557 ASSERT(ILL_MAC_PERIM_HELD(ill)); 558 ASSERT(rx_ring != NULL); 559 560 /* Just clean one squeue */ 561 mutex_enter(&ill->ill_lock); 562 if (rx_ring->rr_ring_state == RR_FREE) { 563 mutex_exit(&ill->ill_lock); 564 return; 565 } 566 rx_ring->rr_ring_state = RR_FREE_INPROG; 567 sqp = rx_ring->rr_sqp; 568 569 mutex_enter(&sqp->sq_lock); 570 sqp->sq_state |= SQS_POLL_CLEANUP; 571 cv_signal(&sqp->sq_worker_cv); 572 mutex_exit(&ill->ill_lock); 573 while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE)) 574 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 575 sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE; 576 577 ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL | 578 SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE | 579 SQS_POLL_THR_QUIESCED))); 580 581 cv_signal(&sqp->sq_worker_cv); 582 mutex_exit(&sqp->sq_lock); 583 584 /* 585 * Move the squeue to sqset_global_list[0] which holds the set of 586 * squeues not bound to any cpu. Note that the squeue is still 587 * considered bound to an ill as long as SQS_ILL_BOUND is set. 588 */ 589 mutex_enter(&sqset_lock); 590 ip_squeue_set_move(sqp, sqset_global_list[0]); 591 mutex_exit(&sqset_lock); 592 593 /* 594 * CPU going offline can also trigger a move of the squeue to the 595 * unbound set sqset_global_list[0]. However the squeue won't be 596 * recycled for the next use as long as the SQS_ILL_BOUND flag 597 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the 598 * end after the move. 599 */ 600 mutex_enter(&sqp->sq_lock); 601 sqp->sq_state &= ~SQS_ILL_BOUND; 602 mutex_exit(&sqp->sq_lock); 603 604 mutex_enter(&ill->ill_lock); 605 rx_ring->rr_ring_state = RR_FREE; 606 mutex_exit(&ill->ill_lock); 607 } 608 609 /* 610 * Stop the squeue from polling. This needs to be done 611 * from inside the perimeter. 612 */ 613 void 614 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 615 { 616 squeue_t *sqp; 617 618 ASSERT(ILL_MAC_PERIM_HELD(ill)); 619 ASSERT(rx_ring != NULL); 620 621 sqp = rx_ring->rr_sqp; 622 mutex_enter(&sqp->sq_lock); 623 sqp->sq_state |= SQS_POLL_QUIESCE; 624 cv_signal(&sqp->sq_worker_cv); 625 while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) 626 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 627 628 mutex_exit(&sqp->sq_lock); 629 } 630 631 /* 632 * Restart polling etc. Needs to be inside the perimeter to 633 * prevent races. 634 */ 635 void 636 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 637 { 638 squeue_t *sqp; 639 640 ASSERT(ILL_MAC_PERIM_HELD(ill)); 641 ASSERT(rx_ring != NULL); 642 643 sqp = rx_ring->rr_sqp; 644 mutex_enter(&sqp->sq_lock); 645 /* 646 * Handle change in number of rings between the quiesce and 647 * restart operations by checking for a previous quiesce before 648 * attempting a restart. 649 */ 650 if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) { 651 mutex_exit(&sqp->sq_lock); 652 return; 653 } 654 sqp->sq_state |= SQS_POLL_RESTART; 655 cv_signal(&sqp->sq_worker_cv); 656 while (!(sqp->sq_state & SQS_POLL_RESTART_DONE)) 657 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 658 sqp->sq_state &= ~SQS_POLL_RESTART_DONE; 659 mutex_exit(&sqp->sq_lock); 660 } 661 662 /* 663 * sanitize all squeues associated with the ill. 664 */ 665 void 666 ip_squeue_clean_all(ill_t *ill) 667 { 668 int idx; 669 ill_rx_ring_t *rx_ring; 670 671 for (idx = 0; idx < ILL_MAX_RINGS; idx++) { 672 rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx]; 673 ip_squeue_clean_ring(ill, rx_ring); 674 } 675 } 676 677 /* 678 * Used by IP to get the squeue associated with a ring. If the squeue isn't 679 * yet bound to a CPU, and we're being called directly from the NIC's 680 * interrupt, then we know what CPU we want to assign the squeue to, so 681 * dispatch that task to a taskq. 682 */ 683 squeue_t * 684 ip_squeue_get(ill_rx_ring_t *ill_rx_ring) 685 { 686 squeue_t *sqp; 687 688 if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL)) 689 return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM())); 690 691 return (sqp); 692 } 693 694 /* 695 * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all 696 * squeues are unboudn and moved to the unbound set. 697 */ 698 static void 699 ip_squeue_set_destroy(cpu_t *cpu) 700 { 701 int i; 702 squeue_t *sqp, *lastsqp = NULL; 703 squeue_set_t *sqs, *unbound = sqset_global_list[0]; 704 705 mutex_enter(&sqset_lock); 706 if ((sqs = cpu->cpu_squeue_set) == NULL) { 707 mutex_exit(&sqset_lock); 708 return; 709 } 710 711 /* Move all squeues to unbound set */ 712 713 for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) { 714 squeue_unbind(sqp); 715 sqp->sq_set = unbound; 716 } 717 if (sqs->sqs_head) { 718 lastsqp->sq_next = unbound->sqs_head; 719 unbound->sqs_head = sqs->sqs_head; 720 } 721 722 /* Also move default squeue to unbound set */ 723 724 sqp = sqs->sqs_default; 725 ASSERT(sqp != NULL); 726 ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT); 727 728 sqp->sq_next = unbound->sqs_head; 729 unbound->sqs_head = sqp; 730 squeue_unbind(sqp); 731 sqp->sq_set = unbound; 732 733 for (i = 1; i < sqset_global_size; i++) 734 if (sqset_global_list[i] == sqs) 735 break; 736 737 ASSERT(i < sqset_global_size); 738 sqset_global_list[i] = sqset_global_list[sqset_global_size - 1]; 739 sqset_global_list[sqset_global_size - 1] = NULL; 740 sqset_global_size--; 741 742 mutex_exit(&sqset_lock); 743 kmem_free(sqs, sizeof (*sqs)); 744 } 745 746 /* 747 * Reconfiguration callback 748 */ 749 /* ARGSUSED */ 750 static int 751 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg) 752 { 753 cpu_t *cp = cpu_get(id); 754 755 ASSERT(MUTEX_HELD(&cpu_lock)); 756 switch (what) { 757 case CPU_CONFIG: 758 case CPU_ON: 759 case CPU_INIT: 760 case CPU_CPUPART_IN: 761 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) 762 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id); 763 break; 764 case CPU_UNCONFIG: 765 case CPU_OFF: 766 case CPU_CPUPART_OUT: 767 if (cp->cpu_squeue_set != NULL) { 768 ip_squeue_set_destroy(cp); 769 cp->cpu_squeue_set = NULL; 770 } 771 break; 772 default: 773 break; 774 } 775 return (0); 776 } 777