1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/sysmacros.h> 29 #include <sys/stack.h> 30 #include <sys/cpuvar.h> 31 #include <sys/ivintr.h> 32 #include <sys/intreg.h> 33 #include <sys/membar.h> 34 #include <sys/kmem.h> 35 #include <sys/intr.h> 36 #include <sys/sunddi.h> 37 #include <sys/sunndi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/privregs.h> 40 #include <sys/systm.h> 41 #include <sys/archsystm.h> 42 #include <sys/machsystm.h> 43 #include <sys/x_call.h> 44 #include <vm/seg_kp.h> 45 #include <sys/debug.h> 46 #include <sys/cyclic.h> 47 #include <sys/kdi_impl.h> 48 #include <sys/ddi_timer.h> 49 50 #include <sys/cpu_sgnblk_defs.h> 51 52 /* Global locks which protect the interrupt distribution lists */ 53 static kmutex_t intr_dist_lock; 54 static kmutex_t intr_dist_cpu_lock; 55 56 /* Head of the interrupt distribution lists */ 57 static struct intr_dist *intr_dist_head = NULL; 58 static struct intr_dist *intr_dist_whead = NULL; 59 60 static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */ 61 uint64_t *siron_cpu_inum = NULL; 62 uint64_t siron_poke_cpu_inum; 63 static int siron_cpu_setup(cpu_setup_t, int, void *); 64 extern uint_t softlevel1(); 65 66 static uint64_t siron1_inum; /* backward compatibility */ 67 uint64_t poke_cpu_inum; 68 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2); 69 uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2); 70 71 /* 72 * Note:- 73 * siron_pending was originally created to prevent a resource over consumption 74 * bug in setsoftint(exhaustion of interrupt pool free list). 75 * It's original intention is obsolete with the use of iv_pending in 76 * setsoftint. However, siron_pending stayed around, acting as a second 77 * gatekeeper preventing soft interrupts from being queued. In this capacity, 78 * it can lead to hangs on MP systems, where due to global visibility issues 79 * it can end up set while iv_pending is reset, preventing soft interrupts from 80 * ever being processed. In addition to its gatekeeper role, init_intr also 81 * uses it to flag the situation where siron() was called before siron_inum has 82 * been defined. 83 * 84 * siron() does not need an extra gatekeeper; any cpu that wishes should be 85 * allowed to queue a soft interrupt. It is softint()'s job to ensure 86 * correct handling of the queues. Therefore, siron_pending has been 87 * stripped of its gatekeeper task, retaining only its intr_init job, where 88 * it indicates that there is a pending need to call siron(). 89 */ 90 static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */ 91 static int siron1_pending; /* backward compatibility */ 92 93 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 94 int intr_dist_debug = 0; 95 int32_t intr_dist_weight_max = 1; 96 int32_t intr_dist_weight_maxmax = 1000; 97 int intr_dist_weight_maxfactor = 2; 98 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 99 100 /* 101 * intr_init() - Interrupt initialization 102 * Initialize the system's interrupt vector table. 103 */ 104 void 105 intr_init(cpu_t *cp) 106 { 107 int i; 108 extern uint_t softlevel1(); 109 110 init_ivintr(); 111 REGISTER_BBUS_INTR(); 112 113 /* 114 * Register these software interrupts for ddi timer. 115 * Software interrupts up to the level 10 are supported. 116 */ 117 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 118 siron_inum[i-1] = add_softintr(i, (softintrfunc)timer_softintr, 119 (caddr_t)(uintptr_t)(i), SOFTINT_ST); 120 } 121 122 siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST); 123 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT); 124 siron_poke_cpu_inum = add_softintr(PIL_13, 125 siron_poke_cpu_intr, 0, SOFTINT_MT); 126 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 127 128 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 129 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 130 131 /* 132 * A soft interrupt may have been requested prior to the initialization 133 * of soft interrupts. Soft interrupts can't be dispatched until after 134 * init_intr(), so we have to wait until now before we can dispatch the 135 * pending soft interrupt (if any). 136 */ 137 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 138 if (siron_pending[i-1]) { 139 siron_pending[i-1] = 0; 140 sir_on(i); 141 } 142 } 143 if (siron1_pending) { 144 siron1_pending = 0; 145 siron(); 146 } 147 } 148 149 /* 150 * poke_cpu_intr - fall through when poke_cpu calls 151 */ 152 /* ARGSUSED */ 153 uint_t 154 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 155 { 156 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 157 membar_stld_stst(); 158 return (1); 159 } 160 161 /* 162 * Trigger software interrupts dedicated to ddi timer. 163 */ 164 void 165 sir_on(int level) 166 { 167 ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10); 168 if (siron_inum[level-1]) 169 setsoftint(siron_inum[level-1]); 170 else 171 siron_pending[level-1] = 1; 172 } 173 174 /* 175 * kmdb uses siron (and thus setsoftint) while the world is stopped in order to 176 * inform its driver component that there's work to be done. We need to keep 177 * DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron, 178 * giving kmdb's version a kdi_ prefix to keep DTrace at bay. The 179 * implementation of setsoftint is complicated enough that we don't want to 180 * duplicate it, but at the same time we don't want to preclude tracing either. 181 * The meat of setsoftint() therefore goes into kdi_setsoftint, with 182 * setsoftint() implemented as a wrapper. This allows tracing, while still 183 * providing a way for kmdb to sneak in unmolested. 184 */ 185 void 186 kdi_siron(void) 187 { 188 if (siron1_inum != 0) 189 kdi_setsoftint(siron1_inum); 190 else 191 siron1_pending = 1; 192 } 193 194 void 195 setsoftint(uint64_t inum) 196 { 197 kdi_setsoftint(inum); 198 } 199 200 /* 201 * Generates softlevel1 interrupt on current CPU if it 202 * is not pending already. 203 */ 204 void 205 siron(void) 206 { 207 uint64_t inum; 208 209 if (siron1_inum != 0) { 210 /* 211 * Once siron_cpu_inum has been allocated, we can 212 * use per-CPU siron inum. 213 */ 214 if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0) 215 inum = siron_cpu_inum[CPU->cpu_id]; 216 else 217 inum = siron1_inum; 218 219 setsoftint(inum); 220 } else 221 siron1_pending = 1; 222 } 223 224 225 static void 226 siron_init(void) 227 { 228 /* 229 * We just allocate memory for per-cpu siron right now. Rest of 230 * the work is done when CPU is configured. 231 */ 232 siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP); 233 } 234 235 /* 236 * This routine creates per-CPU siron inum for CPUs which are 237 * configured during boot. 238 */ 239 void 240 siron_mp_init() 241 { 242 cpu_t *c; 243 244 /* 245 * Get the memory for per-CPU siron inums 246 */ 247 siron_init(); 248 249 mutex_enter(&cpu_lock); 250 c = cpu_list; 251 do { 252 (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL); 253 } while ((c = c->cpu_next) != cpu_list); 254 255 register_cpu_setup_func(siron_cpu_setup, NULL); 256 mutex_exit(&cpu_lock); 257 } 258 259 /* 260 * siron_poke_cpu_intr - cross-call handler. 261 */ 262 /* ARGSUSED */ 263 uint_t 264 siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2) 265 { 266 /* generate level1 softint */ 267 siron(); 268 return (1); 269 } 270 271 /* 272 * This routine generates a cross-call on target CPU(s). 273 */ 274 void 275 siron_poke_cpu(cpuset_t poke) 276 { 277 int cpuid = CPU->cpu_id; 278 279 if (CPU_IN_SET(poke, cpuid)) { 280 siron(); 281 CPUSET_DEL(poke, cpuid); 282 if (CPUSET_ISNULL(poke)) 283 return; 284 } 285 286 xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0); 287 } 288 289 /* 290 * This callback function allows us to create per-CPU siron inum. 291 */ 292 /* ARGSUSED */ 293 static int 294 siron_cpu_setup(cpu_setup_t what, int id, void *arg) 295 { 296 cpu_t *cp = cpu[id]; 297 298 ASSERT(MUTEX_HELD(&cpu_lock)); 299 ASSERT(cp != NULL); 300 301 switch (what) { 302 case CPU_CONFIG: 303 siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1, 304 (softintrfunc)softlevel1, 0, SOFTINT_ST); 305 break; 306 case CPU_UNCONFIG: 307 (void) rem_softintr(siron_cpu_inum[cp->cpu_id]); 308 siron_cpu_inum[cp->cpu_id] = 0; 309 break; 310 default: 311 break; 312 } 313 314 return (0); 315 } 316 317 /* 318 * no_ivintr() 319 * called by setvecint_tl1() through sys_trap() 320 * vector interrupt received but not valid or not 321 * registered in intr_vec_table 322 * considered as a spurious mondo interrupt 323 */ 324 /* ARGSUSED */ 325 void 326 no_ivintr(struct regs *rp, int inum, int pil) 327 { 328 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 329 inum, pil); 330 331 #ifdef DEBUG_VEC_INTR 332 prom_enter_mon(); 333 #endif /* DEBUG_VEC_INTR */ 334 } 335 336 void 337 intr_dequeue_req(uint_t pil, uint64_t inum) 338 { 339 intr_vec_t *iv, *next, *prev; 340 struct machcpu *mcpu; 341 uint32_t clr; 342 processorid_t cpu_id; 343 extern uint_t getpstate(void); 344 345 ASSERT((getpstate() & PSTATE_IE) == 0); 346 347 mcpu = &CPU->cpu_m; 348 cpu_id = CPU->cpu_id; 349 350 iv = (intr_vec_t *)inum; 351 prev = NULL; 352 next = mcpu->intr_head[pil]; 353 354 /* Find a matching entry in the list */ 355 while (next != NULL) { 356 if (next == iv) 357 break; 358 prev = next; 359 next = IV_GET_PIL_NEXT(next, cpu_id); 360 } 361 362 if (next != NULL) { 363 intr_vec_t *next_iv = IV_GET_PIL_NEXT(next, cpu_id); 364 365 /* Remove entry from list */ 366 if (prev != NULL) 367 IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */ 368 else 369 mcpu->intr_head[pil] = next_iv; /* head */ 370 371 if (next_iv == NULL) 372 mcpu->intr_tail[pil] = prev; /* tail */ 373 } 374 375 /* Clear pending interrupts at this level if the list is empty */ 376 if (mcpu->intr_head[pil] == NULL) { 377 clr = 1 << pil; 378 if (pil == PIL_14) 379 clr |= (TICK_INT_MASK | STICK_INT_MASK); 380 wr_clr_softint(clr); 381 } 382 } 383 384 385 /* 386 * Send a directed interrupt of specified interrupt number id to a cpu. 387 */ 388 void 389 send_dirint( 390 int cpuix, /* cpu to be interrupted */ 391 int intr_id) /* interrupt number id */ 392 { 393 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 394 } 395 396 /* 397 * Take the specified CPU out of participation in interrupts. 398 * Called by p_online(2) when a processor is being taken off-line. 399 * This allows interrupt threads being handled on the processor to 400 * complete before the processor is idled. 401 */ 402 int 403 cpu_disable_intr(struct cpu *cp) 404 { 405 ASSERT(MUTEX_HELD(&cpu_lock)); 406 407 /* 408 * Turn off the CPU_ENABLE flag before calling the redistribution 409 * function, since it checks for this in the cpu flags. 410 */ 411 cp->cpu_flags &= ~CPU_ENABLE; 412 413 intr_redist_all_cpus(); 414 415 return (0); 416 } 417 418 /* 419 * Allow the specified CPU to participate in interrupts. 420 * Called by p_online(2) if a processor could not be taken off-line 421 * because of bound threads, in order to resume processing interrupts. 422 * Also called after starting a processor. 423 */ 424 void 425 cpu_enable_intr(struct cpu *cp) 426 { 427 ASSERT(MUTEX_HELD(&cpu_lock)); 428 429 cp->cpu_flags |= CPU_ENABLE; 430 431 intr_redist_all_cpus(); 432 } 433 434 /* 435 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 436 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 437 * are issued to redirect interrupts of a specified weight, from heavy to 438 * light. This allows all the interrupts of a given weight to be redistributed 439 * for all weighted nexus drivers prior to those of less weight. 440 */ 441 static void 442 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 443 { 444 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 445 struct intr_dist *iptr; 446 struct intr_dist **pptr; 447 448 ASSERT(func); 449 new->func = func; 450 new->arg = arg; 451 new->next = NULL; 452 453 /* Add to tail so that redistribution occurs in original order. */ 454 mutex_enter(&intr_dist_lock); 455 for (iptr = *phead, pptr = phead; iptr != NULL; 456 pptr = &iptr->next, iptr = iptr->next) { 457 /* check for problems as we locate the tail */ 458 if ((iptr->func == func) && (iptr->arg == arg)) { 459 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 460 /*NOTREACHED*/ 461 } 462 } 463 *pptr = new; 464 465 mutex_exit(&intr_dist_lock); 466 } 467 468 void 469 intr_dist_add(void (*func)(void *), void *arg) 470 { 471 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 472 } 473 474 void 475 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 476 { 477 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 478 } 479 480 /* 481 * Search for the interrupt distribution structure with the specified 482 * mondo vec reg in the interrupt distribution list. If a match is found, 483 * then delete the entry from the list. The caller is responsible for 484 * modifying the mondo vector registers. 485 */ 486 static void 487 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 488 { 489 struct intr_dist *iptr; 490 struct intr_dist **vect; 491 492 mutex_enter(&intr_dist_lock); 493 for (iptr = *headp, vect = headp; 494 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 495 if ((iptr->func == func) && (iptr->arg == arg)) { 496 *vect = iptr->next; 497 kmem_free(iptr, sizeof (struct intr_dist)); 498 mutex_exit(&intr_dist_lock); 499 return; 500 } 501 } 502 503 if (!panicstr) 504 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 505 mutex_exit(&intr_dist_lock); 506 } 507 508 void 509 intr_dist_rem(void (*func)(void *), void *arg) 510 { 511 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 512 } 513 514 void 515 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 516 { 517 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 518 } 519 520 /* 521 * Initiate interrupt redistribution. Redistribution improves the isolation 522 * associated with interrupt weights by ordering operations from heavy weight 523 * to light weight. When a CPUs orientation changes relative to interrupts, 524 * there is *always* a redistribution to accommodate this change (call to 525 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 526 * that a redistribution could improve the quality of an initialization. For 527 * example, if you are not using a NIC it may not be attached with s10 (devfs). 528 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 529 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 530 * occurring late, so optimal "isolation" relative to weight is not occurring. 531 * The same applies to detach, although in this case doing the redistribution 532 * might improve "spread" for medium weight devices since the "isolation" of 533 * a higher weight device may no longer be present. 534 * 535 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 536 * 537 * NB: There is risk associated with automatically triggering execution of the 538 * redistribution code at arbitrary times. The risk comes from the fact that 539 * there is a lot of low-level hardware interaction associated with a 540 * redistribution. At some point we may want this code to perform automatic 541 * redistribution (redistribution thread; trigger timeout when add/remove 542 * weight delta is large enough, and call cv_signal from timeout - causing 543 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 544 * risky at this time. 545 */ 546 void 547 i_ddi_intr_redist_all_cpus() 548 { 549 mutex_enter(&cpu_lock); 550 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 551 intr_redist_all_cpus(); 552 mutex_exit(&cpu_lock); 553 } 554 555 /* 556 * Redistribute all interrupts 557 * 558 * This function redistributes all interrupting devices, running the 559 * parent callback functions for each node. 560 */ 561 void 562 intr_redist_all_cpus(void) 563 { 564 struct cpu *cp; 565 struct intr_dist *iptr; 566 int32_t weight, max_weight; 567 568 ASSERT(MUTEX_HELD(&cpu_lock)); 569 mutex_enter(&intr_dist_lock); 570 571 /* 572 * zero cpu_intr_weight on all cpus - it is safe to traverse 573 * cpu_list since we hold cpu_lock. 574 */ 575 cp = cpu_list; 576 do { 577 cp->cpu_intr_weight = 0; 578 } while ((cp = cp->cpu_next) != cpu_list); 579 580 /* 581 * Assume that this redistribution may encounter a device weight 582 * via driver.conf tuning of "ddi-intr-weight" that is at most 583 * intr_dist_weight_maxfactor times larger. 584 */ 585 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 586 if (max_weight > intr_dist_weight_maxmax) 587 max_weight = intr_dist_weight_maxmax; 588 intr_dist_weight_max = 1; 589 590 INTR_DEBUG((CE_CONT, "intr_dist: " 591 "intr_redist_all_cpus: %d-0\n", max_weight)); 592 593 /* 594 * Redistribute weighted, from heavy to light. The callback that 595 * specifies a weight equal to weight_max should redirect all 596 * interrupts of weight weight_max or greater [weight_max, inf.). 597 * Interrupts of lesser weight should be processed on the call with 598 * the matching weight. This allows all the heaver weight interrupts 599 * on all weighted busses (multiple pci busses) to be redirected prior 600 * to any lesser weight interrupts. 601 */ 602 for (weight = max_weight; weight >= 0; weight--) 603 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 604 ((void (*)(void *, int32_t, int32_t))iptr->func) 605 (iptr->arg, max_weight, weight); 606 607 /* redistribute normal (non-weighted) interrupts */ 608 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 609 ((void (*)(void *))iptr->func)(iptr->arg); 610 mutex_exit(&intr_dist_lock); 611 } 612 613 void 614 intr_redist_all_cpus_shutdown(void) 615 { 616 intr_policy = INTR_CURRENT_CPU; 617 intr_redist_all_cpus(); 618 } 619 620 /* 621 * Determine what CPU to target, based on interrupt policy. 622 * 623 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 624 * advance through interrupt enabled cpus (round-robin). 625 * 626 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 627 * cpu_intr_weight, round robin when all equal. 628 * 629 * Weighted interrupt distribution provides two things: "spread" of weight 630 * (associated with algorithm itself) and "isolation" (associated with a 631 * particular device weight). A redistribution is what provides optimal 632 * "isolation" of heavy weight interrupts, optimal "spread" of weight 633 * (relative to what came before) is always occurring. 634 * 635 * An interrupt weight is a subjective number that represents the 636 * percentage of a CPU required to service a device's interrupts: the 637 * default weight is 0% (however the algorithm still maintains 638 * round-robin), a network interface controller (NIC) may have a large 639 * weight (35%). Interrupt weight only has meaning relative to the 640 * interrupt weight of other devices: a CPU can be weighted more than 641 * 100%, and a single device might consume more than 100% of a CPU. 642 * 643 * A coarse interrupt weight can be defined by the parent nexus driver 644 * based on bus specific information, like pci class codes. A nexus 645 * driver that supports device interrupt weighting for its children 646 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 647 * and removes the weight of a device from the CPU that an interrupt 648 * is directed at. The quality of initialization improves when the 649 * device interrupt weights more accuracy reflect actual run-time weights, 650 * and as the assignments are ordered from is heavy to light. 651 * 652 * The implementation also supports interrupt weight being specified in 653 * driver.conf files via the property "ddi-intr-weight", which takes 654 * precedence over the nexus supplied weight. This support is added to 655 * permit possible tweaking in the product in response to customer 656 * problems. This is not a formal or committed interface. 657 * 658 * While a weighted approach chooses the CPU providing the best spread 659 * given past weights, less than optimal isolation can result in cases 660 * where heavy weight devices show up last. The nexus driver's interrupt 661 * redistribution logic should use intr_dist_add/rem_weighted so that 662 * interrupts can be redistributed heavy first for optimal isolation. 663 */ 664 uint32_t 665 intr_dist_cpuid(void) 666 { 667 static struct cpu *curr_cpu; 668 struct cpu *start_cpu; 669 struct cpu *new_cpu; 670 struct cpu *cp; 671 int cpuid = -1; 672 673 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 674 mutex_enter(&intr_dist_cpu_lock); 675 676 switch (intr_policy) { 677 case INTR_CURRENT_CPU: 678 cpuid = CPU->cpu_id; 679 break; 680 681 case INTR_BOOT_CPU: 682 panic("INTR_BOOT_CPU no longer supported."); 683 /*NOTREACHED*/ 684 685 case INTR_FLAT_DIST: 686 case INTR_WEIGHTED_DIST: 687 default: 688 /* 689 * Ensure that curr_cpu is valid - cpu_next will be NULL if 690 * the cpu has been deleted (cpu structs are never freed). 691 */ 692 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 693 curr_cpu = CPU; 694 695 /* 696 * Advance to online CPU after curr_cpu (round-robin). For 697 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 698 * weight. For a nexus that does not support weight the 699 * default weight of zero is used. We degrade to round-robin 700 * behavior among equal weightes. The default weight is zero 701 * and round-robin behavior continues. 702 * 703 * Disable preemption while traversing cpu_next_onln to 704 * ensure the list does not change. This works because 705 * modifiers of this list and other lists in a struct cpu 706 * call pause_cpus() before making changes. 707 */ 708 kpreempt_disable(); 709 cp = start_cpu = curr_cpu->cpu_next_onln; 710 new_cpu = NULL; 711 do { 712 /* Skip CPUs with interrupts disabled */ 713 if ((cp->cpu_flags & CPU_ENABLE) == 0) 714 continue; 715 716 if (intr_policy == INTR_FLAT_DIST) { 717 /* select CPU */ 718 new_cpu = cp; 719 break; 720 } else if ((new_cpu == NULL) || 721 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 722 /* Choose if lighter weight */ 723 new_cpu = cp; 724 } 725 } while ((cp = cp->cpu_next_onln) != start_cpu); 726 ASSERT(new_cpu); 727 cpuid = new_cpu->cpu_id; 728 729 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 730 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 731 732 /* update static pointer for next round-robin */ 733 curr_cpu = new_cpu; 734 kpreempt_enable(); 735 break; 736 } 737 mutex_exit(&intr_dist_cpu_lock); 738 return (cpuid); 739 } 740 741 /* 742 * Add or remove the the weight of a device from a CPUs interrupt weight. 743 * 744 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 745 * their children to improve the overall quality of interrupt initialization. 746 * 747 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 748 * among multiple devices (sharing ino) then the nexus should call 749 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 750 * that share must specify the same cpuid. 751 * 752 * If a nexus driver is unable to determine the cpu at remove_intr time 753 * for some of its interrupts, then it should not call add_device_weight - 754 * intr_dist_cpuid will still provide round-robin. 755 * 756 * An established device weight (from dev_info node) takes precedence over 757 * the weight passed in. If a device weight is not already established 758 * then the passed in nexus weight is established. 759 */ 760 void 761 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 762 dev_info_t *dip, int32_t nweight) 763 { 764 int32_t eweight; 765 766 /* 767 * For non-weighted policy everything has weight of zero (and we get 768 * round-robin distribution from intr_dist_cpuid). 769 * NB: intr_policy is limited to this file. A weighted nexus driver is 770 * calls this rouitne even if intr_policy has been patched to 771 * INTR_FLAG_DIST. 772 */ 773 ASSERT(dip); 774 if (intr_policy != INTR_WEIGHTED_DIST) 775 return; 776 777 eweight = i_ddi_get_intr_weight(dip); 778 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 779 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 780 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 781 ddi_get_instance(ddi_get_parent(dip)), 782 ddi_driver_name(dip), ddi_get_instance(dip))); 783 784 /* if no establish weight, establish nexus weight */ 785 if (eweight < 0) { 786 if (nweight > 0) 787 (void) i_ddi_set_intr_weight(dip, nweight); 788 else 789 nweight = 0; 790 } else 791 nweight = eweight; /* use established weight */ 792 793 /* Establish exclusion for cpu_intr_weight manipulation */ 794 mutex_enter(&intr_dist_cpu_lock); 795 cpu[cpuid]->cpu_intr_weight += nweight; 796 797 /* update intr_dist_weight_max */ 798 if (nweight > intr_dist_weight_max) 799 intr_dist_weight_max = nweight; 800 mutex_exit(&intr_dist_cpu_lock); 801 } 802 803 void 804 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 805 { 806 struct cpu *cp; 807 int32_t weight; 808 809 ASSERT(dip); 810 if (intr_policy != INTR_WEIGHTED_DIST) 811 return; 812 813 /* remove weight of device from cpu */ 814 weight = i_ddi_get_intr_weight(dip); 815 if (weight < 0) 816 weight = 0; 817 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 818 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 819 ddi_driver_name(ddi_get_parent(dip)), 820 ddi_get_instance(ddi_get_parent(dip)), 821 ddi_driver_name(dip), ddi_get_instance(dip))); 822 823 /* Establish exclusion for cpu_intr_weight manipulation */ 824 mutex_enter(&intr_dist_cpu_lock); 825 cp = cpu[cpuid]; 826 cp->cpu_intr_weight -= weight; 827 if (cp->cpu_intr_weight < 0) 828 cp->cpu_intr_weight = 0; /* sanity */ 829 mutex_exit(&intr_dist_cpu_lock); 830 } 831 832 ulong_t 833 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1) 834 { 835 uint64_t inum; 836 837 inum = add_softintr(pil, func, arg1, SOFTINT_ST); 838 return ((ulong_t)inum); 839 } 840 841 void 842 invoke_softint(processorid_t cpuid, ulong_t hdl) 843 { 844 uint64_t inum = hdl; 845 846 if (cpuid == CPU->cpu_id) 847 setsoftint(inum); 848 else 849 xt_one(cpuid, setsoftint_tl1, inum, 0); 850 } 851 852 void 853 remove_softint(ulong_t hdl) 854 { 855 uint64_t inum = hdl; 856 857 (void) rem_softintr(inum); 858 } 859 860 void 861 sync_softint(cpuset_t set) 862 { 863 xt_sync(set); 864 } 865