1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2019 Joyent, Inc. 27 */ 28 /* 29 * Copyright 2019 Peter Tribble. 30 */ 31 32 #include <sys/sysmacros.h> 33 #include <sys/stack.h> 34 #include <sys/cpuvar.h> 35 #include <sys/ivintr.h> 36 #include <sys/intreg.h> 37 #include <sys/membar.h> 38 #include <sys/kmem.h> 39 #include <sys/intr.h> 40 #include <sys/sunddi.h> 41 #include <sys/sunndi.h> 42 #include <sys/cmn_err.h> 43 #include <sys/privregs.h> 44 #include <sys/systm.h> 45 #include <sys/archsystm.h> 46 #include <sys/machsystm.h> 47 #include <sys/x_call.h> 48 #include <vm/seg_kp.h> 49 #include <sys/debug.h> 50 #include <sys/cyclic.h> 51 #include <sys/kdi_impl.h> 52 #include <sys/ddi_periodic.h> 53 54 #include <sys/cpu_sgnblk_defs.h> 55 56 /* Global locks which protect the interrupt distribution lists */ 57 static kmutex_t intr_dist_lock; 58 static kmutex_t intr_dist_cpu_lock; 59 60 /* Head of the interrupt distribution lists */ 61 static struct intr_dist *intr_dist_head = NULL; 62 static struct intr_dist *intr_dist_whead = NULL; 63 64 static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */ 65 uint64_t *siron_cpu_inum = NULL; 66 uint64_t siron_poke_cpu_inum; 67 static int siron_cpu_setup(cpu_setup_t, int, void *); 68 extern uint_t softlevel1(); 69 70 static uint64_t siron1_inum; /* backward compatibility */ 71 uint64_t poke_cpu_inum; 72 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2); 73 uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2); 74 75 /* 76 * Variable to enable/disable printing a message when an invalid vecintr 77 * is received. 78 */ 79 uint_t ignore_invalid_vecintr = 0; 80 81 /* 82 * Note:- 83 * siron_pending was originally created to prevent a resource over consumption 84 * bug in setsoftint(exhaustion of interrupt pool free list). 85 * It's original intention is obsolete with the use of iv_pending in 86 * setsoftint. However, siron_pending stayed around, acting as a second 87 * gatekeeper preventing soft interrupts from being queued. In this capacity, 88 * it can lead to hangs on MP systems, where due to global visibility issues 89 * it can end up set while iv_pending is reset, preventing soft interrupts from 90 * ever being processed. In addition to its gatekeeper role, init_intr also 91 * uses it to flag the situation where siron() was called before siron_inum has 92 * been defined. 93 * 94 * siron() does not need an extra gatekeeper; any cpu that wishes should be 95 * allowed to queue a soft interrupt. It is softint()'s job to ensure 96 * correct handling of the queues. Therefore, siron_pending has been 97 * stripped of its gatekeeper task, retaining only its intr_init job, where 98 * it indicates that there is a pending need to call siron(). 99 */ 100 static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */ 101 static int siron1_pending; /* backward compatibility */ 102 103 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 104 int intr_dist_debug = 0; 105 int32_t intr_dist_weight_max = 1; 106 int32_t intr_dist_weight_maxmax = 1000; 107 int intr_dist_weight_maxfactor = 2; 108 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 109 110 /* 111 * intr_init() - Interrupt initialization 112 * Initialize the system's interrupt vector table. 113 */ 114 void 115 intr_init(cpu_t *cp) 116 { 117 int i; 118 extern uint_t softlevel1(); 119 120 init_ivintr(); 121 122 /* 123 * Register these software interrupts for ddi timer. 124 * Software interrupts up to the level 10 are supported. 125 */ 126 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 127 siron_inum[i - 1] = add_softintr(i, 128 (softintrfunc)ddi_periodic_softintr, 129 (caddr_t)(uintptr_t)(i), SOFTINT_ST); 130 } 131 132 siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST); 133 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT); 134 siron_poke_cpu_inum = add_softintr(PIL_13, 135 siron_poke_cpu_intr, 0, SOFTINT_MT); 136 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 137 138 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 139 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 140 141 /* 142 * A soft interrupt may have been requested prior to the initialization 143 * of soft interrupts. Soft interrupts can't be dispatched until after 144 * init_intr(), so we have to wait until now before we can dispatch the 145 * pending soft interrupt (if any). 146 */ 147 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 148 if (siron_pending[i-1]) { 149 siron_pending[i-1] = 0; 150 sir_on(i); 151 } 152 } 153 if (siron1_pending) { 154 siron1_pending = 0; 155 siron(); 156 } 157 } 158 159 /* 160 * poke_cpu_intr - fall through when poke_cpu calls 161 */ 162 /* ARGSUSED */ 163 uint_t 164 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 165 { 166 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 167 membar_stld_stst(); 168 return (1); 169 } 170 171 /* 172 * Trigger software interrupts dedicated to ddi timer. 173 */ 174 void 175 sir_on(int level) 176 { 177 ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10); 178 if (siron_inum[level-1]) 179 setsoftint(siron_inum[level-1]); 180 else 181 siron_pending[level-1] = 1; 182 } 183 184 /* 185 * kmdb uses siron (and thus setsoftint) while the world is stopped in order to 186 * inform its driver component that there's work to be done. We need to keep 187 * DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron, 188 * giving kmdb's version a kdi_ prefix to keep DTrace at bay. The 189 * implementation of setsoftint is complicated enough that we don't want to 190 * duplicate it, but at the same time we don't want to preclude tracing either. 191 * The meat of setsoftint() therefore goes into kdi_setsoftint, with 192 * setsoftint() implemented as a wrapper. This allows tracing, while still 193 * providing a way for kmdb to sneak in unmolested. 194 */ 195 void 196 kdi_siron(void) 197 { 198 if (siron1_inum != 0) 199 kdi_setsoftint(siron1_inum); 200 else 201 siron1_pending = 1; 202 } 203 204 void 205 setsoftint(uint64_t inum) 206 { 207 kdi_setsoftint(inum); 208 } 209 210 /* 211 * Generates softlevel1 interrupt on current CPU if it 212 * is not pending already. 213 */ 214 void 215 siron(void) 216 { 217 uint64_t inum; 218 219 if (siron1_inum != 0) { 220 /* 221 * Once siron_cpu_inum has been allocated, we can 222 * use per-CPU siron inum. 223 */ 224 if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0) 225 inum = siron_cpu_inum[CPU->cpu_id]; 226 else 227 inum = siron1_inum; 228 229 setsoftint(inum); 230 } else 231 siron1_pending = 1; 232 } 233 234 235 static void 236 siron_init(void) 237 { 238 /* 239 * We just allocate memory for per-cpu siron right now. Rest of 240 * the work is done when CPU is configured. 241 */ 242 siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP); 243 } 244 245 /* 246 * This routine creates per-CPU siron inum for CPUs which are 247 * configured during boot. 248 */ 249 void 250 siron_mp_init() 251 { 252 cpu_t *c; 253 254 /* 255 * Get the memory for per-CPU siron inums 256 */ 257 siron_init(); 258 259 mutex_enter(&cpu_lock); 260 c = cpu_list; 261 do { 262 (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL); 263 } while ((c = c->cpu_next) != cpu_list); 264 265 register_cpu_setup_func(siron_cpu_setup, NULL); 266 mutex_exit(&cpu_lock); 267 } 268 269 /* 270 * siron_poke_cpu_intr - cross-call handler. 271 */ 272 /* ARGSUSED */ 273 uint_t 274 siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2) 275 { 276 /* generate level1 softint */ 277 siron(); 278 return (1); 279 } 280 281 /* 282 * This routine generates a cross-call on target CPU(s). 283 */ 284 void 285 siron_poke_cpu(cpuset_t poke) 286 { 287 int cpuid = CPU->cpu_id; 288 289 if (CPU_IN_SET(poke, cpuid)) { 290 siron(); 291 CPUSET_DEL(poke, cpuid); 292 if (CPUSET_ISNULL(poke)) 293 return; 294 } 295 296 xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0); 297 } 298 299 /* 300 * This callback function allows us to create per-CPU siron inum. 301 */ 302 /* ARGSUSED */ 303 static int 304 siron_cpu_setup(cpu_setup_t what, int id, void *arg) 305 { 306 cpu_t *cp = cpu[id]; 307 308 ASSERT(MUTEX_HELD(&cpu_lock)); 309 ASSERT(cp != NULL); 310 311 switch (what) { 312 case CPU_CONFIG: 313 siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1, 314 (softintrfunc)softlevel1, 0, SOFTINT_ST); 315 break; 316 case CPU_UNCONFIG: 317 (void) rem_softintr(siron_cpu_inum[cp->cpu_id]); 318 siron_cpu_inum[cp->cpu_id] = 0; 319 break; 320 default: 321 break; 322 } 323 324 return (0); 325 } 326 327 /* 328 * no_ivintr() 329 * called by setvecint_tl1() through sys_trap() 330 * vector interrupt received but not valid or not 331 * registered in intr_vec_table 332 * considered as a spurious mondo interrupt 333 */ 334 /* ARGSUSED */ 335 void 336 no_ivintr(struct regs *rp, int inum, int pil) 337 { 338 if (!ignore_invalid_vecintr) 339 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 340 inum, pil); 341 342 #ifdef DEBUG_VEC_INTR 343 prom_enter_mon(); 344 #endif /* DEBUG_VEC_INTR */ 345 } 346 347 void 348 intr_dequeue_req(uint_t pil, uint64_t inum) 349 { 350 intr_vec_t *iv, *next, *prev; 351 struct machcpu *mcpu; 352 uint32_t clr; 353 processorid_t cpu_id; 354 extern uint_t getpstate(void); 355 356 ASSERT((getpstate() & PSTATE_IE) == 0); 357 358 mcpu = &CPU->cpu_m; 359 cpu_id = CPU->cpu_id; 360 361 iv = (intr_vec_t *)inum; 362 prev = NULL; 363 next = mcpu->intr_head[pil]; 364 365 /* Find a matching entry in the list */ 366 while (next != NULL) { 367 if (next == iv) 368 break; 369 prev = next; 370 next = IV_GET_PIL_NEXT(next, cpu_id); 371 } 372 373 if (next != NULL) { 374 intr_vec_t *next_iv = IV_GET_PIL_NEXT(next, cpu_id); 375 376 /* Remove entry from list */ 377 if (prev != NULL) 378 IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */ 379 else 380 mcpu->intr_head[pil] = next_iv; /* head */ 381 382 if (next_iv == NULL) 383 mcpu->intr_tail[pil] = prev; /* tail */ 384 } 385 386 /* Clear pending interrupts at this level if the list is empty */ 387 if (mcpu->intr_head[pil] == NULL) { 388 clr = 1 << pil; 389 if (pil == PIL_14) 390 clr |= (TICK_INT_MASK | STICK_INT_MASK); 391 wr_clr_softint(clr); 392 } 393 } 394 395 396 /* 397 * Send a directed interrupt of specified interrupt number id to a cpu. 398 */ 399 void 400 send_dirint( 401 int cpuix, /* cpu to be interrupted */ 402 int intr_id) /* interrupt number id */ 403 { 404 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 405 } 406 407 /* 408 * Take the specified CPU out of participation in interrupts. 409 * Called by p_online(2) when a processor is being taken off-line. 410 * This allows interrupt threads being handled on the processor to 411 * complete before the processor is idled. 412 */ 413 int 414 cpu_disable_intr(struct cpu *cp) 415 { 416 ASSERT(MUTEX_HELD(&cpu_lock)); 417 418 /* 419 * Turn off the CPU_ENABLE flag before calling the redistribution 420 * function, since it checks for this in the cpu flags. 421 */ 422 cp->cpu_flags &= ~CPU_ENABLE; 423 ncpus_intr_enabled--; 424 425 intr_redist_all_cpus(); 426 427 return (0); 428 } 429 430 /* 431 * Allow the specified CPU to participate in interrupts. 432 * Called by p_online(2) if a processor could not be taken off-line 433 * because of bound threads, in order to resume processing interrupts. 434 * Also called after starting a processor. 435 */ 436 void 437 cpu_enable_intr(struct cpu *cp) 438 { 439 ASSERT(MUTEX_HELD(&cpu_lock)); 440 441 cp->cpu_flags |= CPU_ENABLE; 442 ncpus_intr_enabled++; 443 444 intr_redist_all_cpus(); 445 } 446 447 /* 448 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 449 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 450 * are issued to redirect interrupts of a specified weight, from heavy to 451 * light. This allows all the interrupts of a given weight to be redistributed 452 * for all weighted nexus drivers prior to those of less weight. 453 */ 454 static void 455 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 456 { 457 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 458 struct intr_dist *iptr; 459 struct intr_dist **pptr; 460 461 ASSERT(func); 462 new->func = func; 463 new->arg = arg; 464 new->next = NULL; 465 466 /* Add to tail so that redistribution occurs in original order. */ 467 mutex_enter(&intr_dist_lock); 468 for (iptr = *phead, pptr = phead; iptr != NULL; 469 pptr = &iptr->next, iptr = iptr->next) { 470 /* check for problems as we locate the tail */ 471 if ((iptr->func == func) && (iptr->arg == arg)) { 472 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 473 /*NOTREACHED*/ 474 } 475 } 476 *pptr = new; 477 478 mutex_exit(&intr_dist_lock); 479 } 480 481 void 482 intr_dist_add(void (*func)(void *), void *arg) 483 { 484 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 485 } 486 487 void 488 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 489 { 490 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 491 } 492 493 /* 494 * Search for the interrupt distribution structure with the specified 495 * mondo vec reg in the interrupt distribution list. If a match is found, 496 * then delete the entry from the list. The caller is responsible for 497 * modifying the mondo vector registers. 498 */ 499 static void 500 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 501 { 502 struct intr_dist *iptr; 503 struct intr_dist **vect; 504 505 mutex_enter(&intr_dist_lock); 506 for (iptr = *headp, vect = headp; 507 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 508 if ((iptr->func == func) && (iptr->arg == arg)) { 509 *vect = iptr->next; 510 kmem_free(iptr, sizeof (struct intr_dist)); 511 mutex_exit(&intr_dist_lock); 512 return; 513 } 514 } 515 516 if (!panicstr) 517 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 518 mutex_exit(&intr_dist_lock); 519 } 520 521 void 522 intr_dist_rem(void (*func)(void *), void *arg) 523 { 524 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 525 } 526 527 void 528 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 529 { 530 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 531 } 532 533 /* 534 * Initiate interrupt redistribution. Redistribution improves the isolation 535 * associated with interrupt weights by ordering operations from heavy weight 536 * to light weight. When a CPUs orientation changes relative to interrupts, 537 * there is *always* a redistribution to accommodate this change (call to 538 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 539 * that a redistribution could improve the quality of an initialization. For 540 * example, if you are not using a NIC it may not be attached with s10 (devfs). 541 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 542 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 543 * occurring late, so optimal "isolation" relative to weight is not occurring. 544 * The same applies to detach, although in this case doing the redistribution 545 * might improve "spread" for medium weight devices since the "isolation" of 546 * a higher weight device may no longer be present. 547 * 548 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 549 * 550 * NB: There is risk associated with automatically triggering execution of the 551 * redistribution code at arbitrary times. The risk comes from the fact that 552 * there is a lot of low-level hardware interaction associated with a 553 * redistribution. At some point we may want this code to perform automatic 554 * redistribution (redistribution thread; trigger timeout when add/remove 555 * weight delta is large enough, and call cv_signal from timeout - causing 556 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 557 * risky at this time. 558 */ 559 void 560 i_ddi_intr_redist_all_cpus() 561 { 562 mutex_enter(&cpu_lock); 563 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 564 intr_redist_all_cpus(); 565 mutex_exit(&cpu_lock); 566 } 567 568 /* 569 * Redistribute all interrupts 570 * 571 * This function redistributes all interrupting devices, running the 572 * parent callback functions for each node. 573 */ 574 void 575 intr_redist_all_cpus(void) 576 { 577 struct cpu *cp; 578 struct intr_dist *iptr; 579 int32_t weight, max_weight; 580 581 ASSERT(MUTEX_HELD(&cpu_lock)); 582 mutex_enter(&intr_dist_lock); 583 584 /* 585 * zero cpu_intr_weight on all cpus - it is safe to traverse 586 * cpu_list since we hold cpu_lock. 587 */ 588 cp = cpu_list; 589 do { 590 cp->cpu_intr_weight = 0; 591 } while ((cp = cp->cpu_next) != cpu_list); 592 593 /* 594 * Assume that this redistribution may encounter a device weight 595 * via driver.conf tuning of "ddi-intr-weight" that is at most 596 * intr_dist_weight_maxfactor times larger. 597 */ 598 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 599 if (max_weight > intr_dist_weight_maxmax) 600 max_weight = intr_dist_weight_maxmax; 601 intr_dist_weight_max = 1; 602 603 INTR_DEBUG((CE_CONT, "intr_dist: " 604 "intr_redist_all_cpus: %d-0\n", max_weight)); 605 606 /* 607 * Redistribute weighted, from heavy to light. The callback that 608 * specifies a weight equal to weight_max should redirect all 609 * interrupts of weight weight_max or greater [weight_max, inf.). 610 * Interrupts of lesser weight should be processed on the call with 611 * the matching weight. This allows all the heaver weight interrupts 612 * on all weighted busses (multiple pci busses) to be redirected prior 613 * to any lesser weight interrupts. 614 */ 615 for (weight = max_weight; weight >= 0; weight--) 616 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 617 ((void (*)(void *, int32_t, int32_t))iptr->func) 618 (iptr->arg, max_weight, weight); 619 620 /* redistribute normal (non-weighted) interrupts */ 621 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 622 ((void (*)(void *))iptr->func)(iptr->arg); 623 mutex_exit(&intr_dist_lock); 624 } 625 626 void 627 intr_redist_all_cpus_shutdown(void) 628 { 629 intr_policy = INTR_CURRENT_CPU; 630 intr_redist_all_cpus(); 631 } 632 633 /* 634 * Determine what CPU to target, based on interrupt policy. 635 * 636 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 637 * advance through interrupt enabled cpus (round-robin). 638 * 639 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 640 * cpu_intr_weight, round robin when all equal. 641 * 642 * Weighted interrupt distribution provides two things: "spread" of weight 643 * (associated with algorithm itself) and "isolation" (associated with a 644 * particular device weight). A redistribution is what provides optimal 645 * "isolation" of heavy weight interrupts, optimal "spread" of weight 646 * (relative to what came before) is always occurring. 647 * 648 * An interrupt weight is a subjective number that represents the 649 * percentage of a CPU required to service a device's interrupts: the 650 * default weight is 0% (however the algorithm still maintains 651 * round-robin), a network interface controller (NIC) may have a large 652 * weight (35%). Interrupt weight only has meaning relative to the 653 * interrupt weight of other devices: a CPU can be weighted more than 654 * 100%, and a single device might consume more than 100% of a CPU. 655 * 656 * A coarse interrupt weight can be defined by the parent nexus driver 657 * based on bus specific information, like pci class codes. A nexus 658 * driver that supports device interrupt weighting for its children 659 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 660 * and removes the weight of a device from the CPU that an interrupt 661 * is directed at. The quality of initialization improves when the 662 * device interrupt weights more accuracy reflect actual run-time weights, 663 * and as the assignments are ordered from is heavy to light. 664 * 665 * The implementation also supports interrupt weight being specified in 666 * driver.conf files via the property "ddi-intr-weight", which takes 667 * precedence over the nexus supplied weight. This support is added to 668 * permit possible tweaking in the product in response to customer 669 * problems. This is not a formal or committed interface. 670 * 671 * While a weighted approach chooses the CPU providing the best spread 672 * given past weights, less than optimal isolation can result in cases 673 * where heavy weight devices show up last. The nexus driver's interrupt 674 * redistribution logic should use intr_dist_add/rem_weighted so that 675 * interrupts can be redistributed heavy first for optimal isolation. 676 */ 677 uint32_t 678 intr_dist_cpuid(void) 679 { 680 static struct cpu *curr_cpu; 681 struct cpu *start_cpu; 682 struct cpu *new_cpu; 683 struct cpu *cp; 684 int cpuid = -1; 685 686 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 687 mutex_enter(&intr_dist_cpu_lock); 688 689 switch (intr_policy) { 690 case INTR_CURRENT_CPU: 691 cpuid = CPU->cpu_id; 692 break; 693 694 case INTR_BOOT_CPU: 695 panic("INTR_BOOT_CPU no longer supported."); 696 /*NOTREACHED*/ 697 698 case INTR_FLAT_DIST: 699 case INTR_WEIGHTED_DIST: 700 default: 701 /* 702 * Ensure that curr_cpu is valid - cpu_next will be NULL if 703 * the cpu has been deleted (cpu structs are never freed). 704 */ 705 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 706 curr_cpu = CPU; 707 708 /* 709 * Advance to online CPU after curr_cpu (round-robin). For 710 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 711 * weight. For a nexus that does not support weight the 712 * default weight of zero is used. We degrade to round-robin 713 * behavior among equal weightes. The default weight is zero 714 * and round-robin behavior continues. 715 * 716 * Disable preemption while traversing cpu_next_onln to 717 * ensure the list does not change. This works because 718 * modifiers of this list and other lists in a struct cpu 719 * call pause_cpus() before making changes. 720 */ 721 kpreempt_disable(); 722 cp = start_cpu = curr_cpu->cpu_next_onln; 723 new_cpu = NULL; 724 do { 725 /* Skip CPUs with interrupts disabled */ 726 if ((cp->cpu_flags & CPU_ENABLE) == 0) 727 continue; 728 729 if (intr_policy == INTR_FLAT_DIST) { 730 /* select CPU */ 731 new_cpu = cp; 732 break; 733 } else if ((new_cpu == NULL) || 734 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 735 /* Choose if lighter weight */ 736 new_cpu = cp; 737 } 738 } while ((cp = cp->cpu_next_onln) != start_cpu); 739 ASSERT(new_cpu); 740 cpuid = new_cpu->cpu_id; 741 742 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 743 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 744 745 /* update static pointer for next round-robin */ 746 curr_cpu = new_cpu; 747 kpreempt_enable(); 748 break; 749 } 750 mutex_exit(&intr_dist_cpu_lock); 751 return (cpuid); 752 } 753 754 /* 755 * Add or remove the the weight of a device from a CPUs interrupt weight. 756 * 757 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 758 * their children to improve the overall quality of interrupt initialization. 759 * 760 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 761 * among multiple devices (sharing ino) then the nexus should call 762 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 763 * that share must specify the same cpuid. 764 * 765 * If a nexus driver is unable to determine the cpu at remove_intr time 766 * for some of its interrupts, then it should not call add_device_weight - 767 * intr_dist_cpuid will still provide round-robin. 768 * 769 * An established device weight (from dev_info node) takes precedence over 770 * the weight passed in. If a device weight is not already established 771 * then the passed in nexus weight is established. 772 */ 773 void 774 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 775 dev_info_t *dip, int32_t nweight) 776 { 777 int32_t eweight; 778 779 /* 780 * For non-weighted policy everything has weight of zero (and we get 781 * round-robin distribution from intr_dist_cpuid). 782 * NB: intr_policy is limited to this file. A weighted nexus driver is 783 * calls this rouitne even if intr_policy has been patched to 784 * INTR_FLAG_DIST. 785 */ 786 ASSERT(dip); 787 if (intr_policy != INTR_WEIGHTED_DIST) 788 return; 789 790 eweight = i_ddi_get_intr_weight(dip); 791 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 792 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 793 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 794 ddi_get_instance(ddi_get_parent(dip)), 795 ddi_driver_name(dip), ddi_get_instance(dip))); 796 797 /* if no establish weight, establish nexus weight */ 798 if (eweight < 0) { 799 if (nweight > 0) 800 (void) i_ddi_set_intr_weight(dip, nweight); 801 else 802 nweight = 0; 803 } else 804 nweight = eweight; /* use established weight */ 805 806 /* Establish exclusion for cpu_intr_weight manipulation */ 807 mutex_enter(&intr_dist_cpu_lock); 808 cpu[cpuid]->cpu_intr_weight += nweight; 809 810 /* update intr_dist_weight_max */ 811 if (nweight > intr_dist_weight_max) 812 intr_dist_weight_max = nweight; 813 mutex_exit(&intr_dist_cpu_lock); 814 } 815 816 void 817 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 818 { 819 struct cpu *cp; 820 int32_t weight; 821 822 ASSERT(dip); 823 if (intr_policy != INTR_WEIGHTED_DIST) 824 return; 825 826 /* remove weight of device from cpu */ 827 weight = i_ddi_get_intr_weight(dip); 828 if (weight < 0) 829 weight = 0; 830 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 831 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 832 ddi_driver_name(ddi_get_parent(dip)), 833 ddi_get_instance(ddi_get_parent(dip)), 834 ddi_driver_name(dip), ddi_get_instance(dip))); 835 836 /* Establish exclusion for cpu_intr_weight manipulation */ 837 mutex_enter(&intr_dist_cpu_lock); 838 cp = cpu[cpuid]; 839 cp->cpu_intr_weight -= weight; 840 if (cp->cpu_intr_weight < 0) 841 cp->cpu_intr_weight = 0; /* sanity */ 842 mutex_exit(&intr_dist_cpu_lock); 843 } 844 845 ulong_t 846 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1) 847 { 848 uint64_t inum; 849 850 inum = add_softintr(pil, func, arg1, SOFTINT_MT); 851 return ((ulong_t)inum); 852 } 853 854 void 855 invoke_softint(processorid_t cpuid, ulong_t hdl) 856 { 857 uint64_t inum = hdl; 858 859 if (cpuid == CPU->cpu_id) 860 setsoftint(inum); 861 else 862 xt_one(cpuid, setsoftint_tl1, inum, 0); 863 } 864 865 void 866 remove_softint(ulong_t hdl) 867 { 868 uint64_t inum = hdl; 869 870 (void) rem_softintr(inum); 871 } 872 873 void 874 sync_softint(cpuset_t set) 875 { 876 xt_sync(set); 877 } 878