1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/sysmacros.h> 27 #include <sys/stack.h> 28 #include <sys/cpuvar.h> 29 #include <sys/ivintr.h> 30 #include <sys/intreg.h> 31 #include <sys/membar.h> 32 #include <sys/kmem.h> 33 #include <sys/intr.h> 34 #include <sys/sunddi.h> 35 #include <sys/sunndi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/privregs.h> 38 #include <sys/systm.h> 39 #include <sys/archsystm.h> 40 #include <sys/machsystm.h> 41 #include <sys/x_call.h> 42 #include <vm/seg_kp.h> 43 #include <sys/debug.h> 44 #include <sys/cyclic.h> 45 #include <sys/kdi_impl.h> 46 #include <sys/ddi_timer.h> 47 48 #include <sys/cpu_sgnblk_defs.h> 49 50 /* Global locks which protect the interrupt distribution lists */ 51 static kmutex_t intr_dist_lock; 52 static kmutex_t intr_dist_cpu_lock; 53 54 /* Head of the interrupt distribution lists */ 55 static struct intr_dist *intr_dist_head = NULL; 56 static struct intr_dist *intr_dist_whead = NULL; 57 58 static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */ 59 uint64_t *siron_cpu_inum = NULL; 60 uint64_t siron_poke_cpu_inum; 61 static int siron_cpu_setup(cpu_setup_t, int, void *); 62 extern uint_t softlevel1(); 63 64 static uint64_t siron1_inum; /* backward compatibility */ 65 uint64_t poke_cpu_inum; 66 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2); 67 uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2); 68 69 /* 70 * Variable to enable/disable printing a message when an invalid vecintr 71 * is received. 72 */ 73 uint_t ignore_invalid_vecintr = 0; 74 75 /* 76 * Note:- 77 * siron_pending was originally created to prevent a resource over consumption 78 * bug in setsoftint(exhaustion of interrupt pool free list). 79 * It's original intention is obsolete with the use of iv_pending in 80 * setsoftint. However, siron_pending stayed around, acting as a second 81 * gatekeeper preventing soft interrupts from being queued. In this capacity, 82 * it can lead to hangs on MP systems, where due to global visibility issues 83 * it can end up set while iv_pending is reset, preventing soft interrupts from 84 * ever being processed. In addition to its gatekeeper role, init_intr also 85 * uses it to flag the situation where siron() was called before siron_inum has 86 * been defined. 87 * 88 * siron() does not need an extra gatekeeper; any cpu that wishes should be 89 * allowed to queue a soft interrupt. It is softint()'s job to ensure 90 * correct handling of the queues. Therefore, siron_pending has been 91 * stripped of its gatekeeper task, retaining only its intr_init job, where 92 * it indicates that there is a pending need to call siron(). 93 */ 94 static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */ 95 static int siron1_pending; /* backward compatibility */ 96 97 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 98 int intr_dist_debug = 0; 99 int32_t intr_dist_weight_max = 1; 100 int32_t intr_dist_weight_maxmax = 1000; 101 int intr_dist_weight_maxfactor = 2; 102 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 103 104 /* 105 * intr_init() - Interrupt initialization 106 * Initialize the system's interrupt vector table. 107 */ 108 void 109 intr_init(cpu_t *cp) 110 { 111 int i; 112 extern uint_t softlevel1(); 113 114 init_ivintr(); 115 REGISTER_BBUS_INTR(); 116 117 /* 118 * Register these software interrupts for ddi timer. 119 * Software interrupts up to the level 10 are supported. 120 */ 121 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 122 siron_inum[i-1] = add_softintr(i, (softintrfunc)timer_softintr, 123 (caddr_t)(uintptr_t)(i), SOFTINT_ST); 124 } 125 126 siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST); 127 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT); 128 siron_poke_cpu_inum = add_softintr(PIL_13, 129 siron_poke_cpu_intr, 0, SOFTINT_MT); 130 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 131 132 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 133 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 134 135 /* 136 * A soft interrupt may have been requested prior to the initialization 137 * of soft interrupts. Soft interrupts can't be dispatched until after 138 * init_intr(), so we have to wait until now before we can dispatch the 139 * pending soft interrupt (if any). 140 */ 141 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 142 if (siron_pending[i-1]) { 143 siron_pending[i-1] = 0; 144 sir_on(i); 145 } 146 } 147 if (siron1_pending) { 148 siron1_pending = 0; 149 siron(); 150 } 151 } 152 153 /* 154 * poke_cpu_intr - fall through when poke_cpu calls 155 */ 156 /* ARGSUSED */ 157 uint_t 158 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 159 { 160 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 161 membar_stld_stst(); 162 return (1); 163 } 164 165 /* 166 * Trigger software interrupts dedicated to ddi timer. 167 */ 168 void 169 sir_on(int level) 170 { 171 ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10); 172 if (siron_inum[level-1]) 173 setsoftint(siron_inum[level-1]); 174 else 175 siron_pending[level-1] = 1; 176 } 177 178 /* 179 * kmdb uses siron (and thus setsoftint) while the world is stopped in order to 180 * inform its driver component that there's work to be done. We need to keep 181 * DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron, 182 * giving kmdb's version a kdi_ prefix to keep DTrace at bay. The 183 * implementation of setsoftint is complicated enough that we don't want to 184 * duplicate it, but at the same time we don't want to preclude tracing either. 185 * The meat of setsoftint() therefore goes into kdi_setsoftint, with 186 * setsoftint() implemented as a wrapper. This allows tracing, while still 187 * providing a way for kmdb to sneak in unmolested. 188 */ 189 void 190 kdi_siron(void) 191 { 192 if (siron1_inum != 0) 193 kdi_setsoftint(siron1_inum); 194 else 195 siron1_pending = 1; 196 } 197 198 void 199 setsoftint(uint64_t inum) 200 { 201 kdi_setsoftint(inum); 202 } 203 204 /* 205 * Generates softlevel1 interrupt on current CPU if it 206 * is not pending already. 207 */ 208 void 209 siron(void) 210 { 211 uint64_t inum; 212 213 if (siron1_inum != 0) { 214 /* 215 * Once siron_cpu_inum has been allocated, we can 216 * use per-CPU siron inum. 217 */ 218 if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0) 219 inum = siron_cpu_inum[CPU->cpu_id]; 220 else 221 inum = siron1_inum; 222 223 setsoftint(inum); 224 } else 225 siron1_pending = 1; 226 } 227 228 229 static void 230 siron_init(void) 231 { 232 /* 233 * We just allocate memory for per-cpu siron right now. Rest of 234 * the work is done when CPU is configured. 235 */ 236 siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP); 237 } 238 239 /* 240 * This routine creates per-CPU siron inum for CPUs which are 241 * configured during boot. 242 */ 243 void 244 siron_mp_init() 245 { 246 cpu_t *c; 247 248 /* 249 * Get the memory for per-CPU siron inums 250 */ 251 siron_init(); 252 253 mutex_enter(&cpu_lock); 254 c = cpu_list; 255 do { 256 (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL); 257 } while ((c = c->cpu_next) != cpu_list); 258 259 register_cpu_setup_func(siron_cpu_setup, NULL); 260 mutex_exit(&cpu_lock); 261 } 262 263 /* 264 * siron_poke_cpu_intr - cross-call handler. 265 */ 266 /* ARGSUSED */ 267 uint_t 268 siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2) 269 { 270 /* generate level1 softint */ 271 siron(); 272 return (1); 273 } 274 275 /* 276 * This routine generates a cross-call on target CPU(s). 277 */ 278 void 279 siron_poke_cpu(cpuset_t poke) 280 { 281 int cpuid = CPU->cpu_id; 282 283 if (CPU_IN_SET(poke, cpuid)) { 284 siron(); 285 CPUSET_DEL(poke, cpuid); 286 if (CPUSET_ISNULL(poke)) 287 return; 288 } 289 290 xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0); 291 } 292 293 /* 294 * This callback function allows us to create per-CPU siron inum. 295 */ 296 /* ARGSUSED */ 297 static int 298 siron_cpu_setup(cpu_setup_t what, int id, void *arg) 299 { 300 cpu_t *cp = cpu[id]; 301 302 ASSERT(MUTEX_HELD(&cpu_lock)); 303 ASSERT(cp != NULL); 304 305 switch (what) { 306 case CPU_CONFIG: 307 siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1, 308 (softintrfunc)softlevel1, 0, SOFTINT_ST); 309 break; 310 case CPU_UNCONFIG: 311 (void) rem_softintr(siron_cpu_inum[cp->cpu_id]); 312 siron_cpu_inum[cp->cpu_id] = 0; 313 break; 314 default: 315 break; 316 } 317 318 return (0); 319 } 320 321 /* 322 * no_ivintr() 323 * called by setvecint_tl1() through sys_trap() 324 * vector interrupt received but not valid or not 325 * registered in intr_vec_table 326 * considered as a spurious mondo interrupt 327 */ 328 /* ARGSUSED */ 329 void 330 no_ivintr(struct regs *rp, int inum, int pil) 331 { 332 if (!ignore_invalid_vecintr) 333 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 334 inum, pil); 335 336 #ifdef DEBUG_VEC_INTR 337 prom_enter_mon(); 338 #endif /* DEBUG_VEC_INTR */ 339 } 340 341 void 342 intr_dequeue_req(uint_t pil, uint64_t inum) 343 { 344 intr_vec_t *iv, *next, *prev; 345 struct machcpu *mcpu; 346 uint32_t clr; 347 processorid_t cpu_id; 348 extern uint_t getpstate(void); 349 350 ASSERT((getpstate() & PSTATE_IE) == 0); 351 352 mcpu = &CPU->cpu_m; 353 cpu_id = CPU->cpu_id; 354 355 iv = (intr_vec_t *)inum; 356 prev = NULL; 357 next = mcpu->intr_head[pil]; 358 359 /* Find a matching entry in the list */ 360 while (next != NULL) { 361 if (next == iv) 362 break; 363 prev = next; 364 next = IV_GET_PIL_NEXT(next, cpu_id); 365 } 366 367 if (next != NULL) { 368 intr_vec_t *next_iv = IV_GET_PIL_NEXT(next, cpu_id); 369 370 /* Remove entry from list */ 371 if (prev != NULL) 372 IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */ 373 else 374 mcpu->intr_head[pil] = next_iv; /* head */ 375 376 if (next_iv == NULL) 377 mcpu->intr_tail[pil] = prev; /* tail */ 378 } 379 380 /* Clear pending interrupts at this level if the list is empty */ 381 if (mcpu->intr_head[pil] == NULL) { 382 clr = 1 << pil; 383 if (pil == PIL_14) 384 clr |= (TICK_INT_MASK | STICK_INT_MASK); 385 wr_clr_softint(clr); 386 } 387 } 388 389 390 /* 391 * Send a directed interrupt of specified interrupt number id to a cpu. 392 */ 393 void 394 send_dirint( 395 int cpuix, /* cpu to be interrupted */ 396 int intr_id) /* interrupt number id */ 397 { 398 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 399 } 400 401 /* 402 * Take the specified CPU out of participation in interrupts. 403 * Called by p_online(2) when a processor is being taken off-line. 404 * This allows interrupt threads being handled on the processor to 405 * complete before the processor is idled. 406 */ 407 int 408 cpu_disable_intr(struct cpu *cp) 409 { 410 ASSERT(MUTEX_HELD(&cpu_lock)); 411 412 /* 413 * Turn off the CPU_ENABLE flag before calling the redistribution 414 * function, since it checks for this in the cpu flags. 415 */ 416 cp->cpu_flags &= ~CPU_ENABLE; 417 418 intr_redist_all_cpus(); 419 420 return (0); 421 } 422 423 /* 424 * Allow the specified CPU to participate in interrupts. 425 * Called by p_online(2) if a processor could not be taken off-line 426 * because of bound threads, in order to resume processing interrupts. 427 * Also called after starting a processor. 428 */ 429 void 430 cpu_enable_intr(struct cpu *cp) 431 { 432 ASSERT(MUTEX_HELD(&cpu_lock)); 433 434 cp->cpu_flags |= CPU_ENABLE; 435 436 intr_redist_all_cpus(); 437 } 438 439 /* 440 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 441 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 442 * are issued to redirect interrupts of a specified weight, from heavy to 443 * light. This allows all the interrupts of a given weight to be redistributed 444 * for all weighted nexus drivers prior to those of less weight. 445 */ 446 static void 447 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 448 { 449 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 450 struct intr_dist *iptr; 451 struct intr_dist **pptr; 452 453 ASSERT(func); 454 new->func = func; 455 new->arg = arg; 456 new->next = NULL; 457 458 /* Add to tail so that redistribution occurs in original order. */ 459 mutex_enter(&intr_dist_lock); 460 for (iptr = *phead, pptr = phead; iptr != NULL; 461 pptr = &iptr->next, iptr = iptr->next) { 462 /* check for problems as we locate the tail */ 463 if ((iptr->func == func) && (iptr->arg == arg)) { 464 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 465 /*NOTREACHED*/ 466 } 467 } 468 *pptr = new; 469 470 mutex_exit(&intr_dist_lock); 471 } 472 473 void 474 intr_dist_add(void (*func)(void *), void *arg) 475 { 476 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 477 } 478 479 void 480 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 481 { 482 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 483 } 484 485 /* 486 * Search for the interrupt distribution structure with the specified 487 * mondo vec reg in the interrupt distribution list. If a match is found, 488 * then delete the entry from the list. The caller is responsible for 489 * modifying the mondo vector registers. 490 */ 491 static void 492 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 493 { 494 struct intr_dist *iptr; 495 struct intr_dist **vect; 496 497 mutex_enter(&intr_dist_lock); 498 for (iptr = *headp, vect = headp; 499 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 500 if ((iptr->func == func) && (iptr->arg == arg)) { 501 *vect = iptr->next; 502 kmem_free(iptr, sizeof (struct intr_dist)); 503 mutex_exit(&intr_dist_lock); 504 return; 505 } 506 } 507 508 if (!panicstr) 509 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 510 mutex_exit(&intr_dist_lock); 511 } 512 513 void 514 intr_dist_rem(void (*func)(void *), void *arg) 515 { 516 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 517 } 518 519 void 520 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 521 { 522 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 523 } 524 525 /* 526 * Initiate interrupt redistribution. Redistribution improves the isolation 527 * associated with interrupt weights by ordering operations from heavy weight 528 * to light weight. When a CPUs orientation changes relative to interrupts, 529 * there is *always* a redistribution to accommodate this change (call to 530 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 531 * that a redistribution could improve the quality of an initialization. For 532 * example, if you are not using a NIC it may not be attached with s10 (devfs). 533 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 534 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 535 * occurring late, so optimal "isolation" relative to weight is not occurring. 536 * The same applies to detach, although in this case doing the redistribution 537 * might improve "spread" for medium weight devices since the "isolation" of 538 * a higher weight device may no longer be present. 539 * 540 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 541 * 542 * NB: There is risk associated with automatically triggering execution of the 543 * redistribution code at arbitrary times. The risk comes from the fact that 544 * there is a lot of low-level hardware interaction associated with a 545 * redistribution. At some point we may want this code to perform automatic 546 * redistribution (redistribution thread; trigger timeout when add/remove 547 * weight delta is large enough, and call cv_signal from timeout - causing 548 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 549 * risky at this time. 550 */ 551 void 552 i_ddi_intr_redist_all_cpus() 553 { 554 mutex_enter(&cpu_lock); 555 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 556 intr_redist_all_cpus(); 557 mutex_exit(&cpu_lock); 558 } 559 560 /* 561 * Redistribute all interrupts 562 * 563 * This function redistributes all interrupting devices, running the 564 * parent callback functions for each node. 565 */ 566 void 567 intr_redist_all_cpus(void) 568 { 569 struct cpu *cp; 570 struct intr_dist *iptr; 571 int32_t weight, max_weight; 572 573 ASSERT(MUTEX_HELD(&cpu_lock)); 574 mutex_enter(&intr_dist_lock); 575 576 /* 577 * zero cpu_intr_weight on all cpus - it is safe to traverse 578 * cpu_list since we hold cpu_lock. 579 */ 580 cp = cpu_list; 581 do { 582 cp->cpu_intr_weight = 0; 583 } while ((cp = cp->cpu_next) != cpu_list); 584 585 /* 586 * Assume that this redistribution may encounter a device weight 587 * via driver.conf tuning of "ddi-intr-weight" that is at most 588 * intr_dist_weight_maxfactor times larger. 589 */ 590 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 591 if (max_weight > intr_dist_weight_maxmax) 592 max_weight = intr_dist_weight_maxmax; 593 intr_dist_weight_max = 1; 594 595 INTR_DEBUG((CE_CONT, "intr_dist: " 596 "intr_redist_all_cpus: %d-0\n", max_weight)); 597 598 /* 599 * Redistribute weighted, from heavy to light. The callback that 600 * specifies a weight equal to weight_max should redirect all 601 * interrupts of weight weight_max or greater [weight_max, inf.). 602 * Interrupts of lesser weight should be processed on the call with 603 * the matching weight. This allows all the heaver weight interrupts 604 * on all weighted busses (multiple pci busses) to be redirected prior 605 * to any lesser weight interrupts. 606 */ 607 for (weight = max_weight; weight >= 0; weight--) 608 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 609 ((void (*)(void *, int32_t, int32_t))iptr->func) 610 (iptr->arg, max_weight, weight); 611 612 /* redistribute normal (non-weighted) interrupts */ 613 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 614 ((void (*)(void *))iptr->func)(iptr->arg); 615 mutex_exit(&intr_dist_lock); 616 } 617 618 void 619 intr_redist_all_cpus_shutdown(void) 620 { 621 intr_policy = INTR_CURRENT_CPU; 622 intr_redist_all_cpus(); 623 } 624 625 /* 626 * Determine what CPU to target, based on interrupt policy. 627 * 628 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 629 * advance through interrupt enabled cpus (round-robin). 630 * 631 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 632 * cpu_intr_weight, round robin when all equal. 633 * 634 * Weighted interrupt distribution provides two things: "spread" of weight 635 * (associated with algorithm itself) and "isolation" (associated with a 636 * particular device weight). A redistribution is what provides optimal 637 * "isolation" of heavy weight interrupts, optimal "spread" of weight 638 * (relative to what came before) is always occurring. 639 * 640 * An interrupt weight is a subjective number that represents the 641 * percentage of a CPU required to service a device's interrupts: the 642 * default weight is 0% (however the algorithm still maintains 643 * round-robin), a network interface controller (NIC) may have a large 644 * weight (35%). Interrupt weight only has meaning relative to the 645 * interrupt weight of other devices: a CPU can be weighted more than 646 * 100%, and a single device might consume more than 100% of a CPU. 647 * 648 * A coarse interrupt weight can be defined by the parent nexus driver 649 * based on bus specific information, like pci class codes. A nexus 650 * driver that supports device interrupt weighting for its children 651 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 652 * and removes the weight of a device from the CPU that an interrupt 653 * is directed at. The quality of initialization improves when the 654 * device interrupt weights more accuracy reflect actual run-time weights, 655 * and as the assignments are ordered from is heavy to light. 656 * 657 * The implementation also supports interrupt weight being specified in 658 * driver.conf files via the property "ddi-intr-weight", which takes 659 * precedence over the nexus supplied weight. This support is added to 660 * permit possible tweaking in the product in response to customer 661 * problems. This is not a formal or committed interface. 662 * 663 * While a weighted approach chooses the CPU providing the best spread 664 * given past weights, less than optimal isolation can result in cases 665 * where heavy weight devices show up last. The nexus driver's interrupt 666 * redistribution logic should use intr_dist_add/rem_weighted so that 667 * interrupts can be redistributed heavy first for optimal isolation. 668 */ 669 uint32_t 670 intr_dist_cpuid(void) 671 { 672 static struct cpu *curr_cpu; 673 struct cpu *start_cpu; 674 struct cpu *new_cpu; 675 struct cpu *cp; 676 int cpuid = -1; 677 678 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 679 mutex_enter(&intr_dist_cpu_lock); 680 681 switch (intr_policy) { 682 case INTR_CURRENT_CPU: 683 cpuid = CPU->cpu_id; 684 break; 685 686 case INTR_BOOT_CPU: 687 panic("INTR_BOOT_CPU no longer supported."); 688 /*NOTREACHED*/ 689 690 case INTR_FLAT_DIST: 691 case INTR_WEIGHTED_DIST: 692 default: 693 /* 694 * Ensure that curr_cpu is valid - cpu_next will be NULL if 695 * the cpu has been deleted (cpu structs are never freed). 696 */ 697 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 698 curr_cpu = CPU; 699 700 /* 701 * Advance to online CPU after curr_cpu (round-robin). For 702 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 703 * weight. For a nexus that does not support weight the 704 * default weight of zero is used. We degrade to round-robin 705 * behavior among equal weightes. The default weight is zero 706 * and round-robin behavior continues. 707 * 708 * Disable preemption while traversing cpu_next_onln to 709 * ensure the list does not change. This works because 710 * modifiers of this list and other lists in a struct cpu 711 * call pause_cpus() before making changes. 712 */ 713 kpreempt_disable(); 714 cp = start_cpu = curr_cpu->cpu_next_onln; 715 new_cpu = NULL; 716 do { 717 /* Skip CPUs with interrupts disabled */ 718 if ((cp->cpu_flags & CPU_ENABLE) == 0) 719 continue; 720 721 if (intr_policy == INTR_FLAT_DIST) { 722 /* select CPU */ 723 new_cpu = cp; 724 break; 725 } else if ((new_cpu == NULL) || 726 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 727 /* Choose if lighter weight */ 728 new_cpu = cp; 729 } 730 } while ((cp = cp->cpu_next_onln) != start_cpu); 731 ASSERT(new_cpu); 732 cpuid = new_cpu->cpu_id; 733 734 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 735 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 736 737 /* update static pointer for next round-robin */ 738 curr_cpu = new_cpu; 739 kpreempt_enable(); 740 break; 741 } 742 mutex_exit(&intr_dist_cpu_lock); 743 return (cpuid); 744 } 745 746 /* 747 * Add or remove the the weight of a device from a CPUs interrupt weight. 748 * 749 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 750 * their children to improve the overall quality of interrupt initialization. 751 * 752 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 753 * among multiple devices (sharing ino) then the nexus should call 754 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 755 * that share must specify the same cpuid. 756 * 757 * If a nexus driver is unable to determine the cpu at remove_intr time 758 * for some of its interrupts, then it should not call add_device_weight - 759 * intr_dist_cpuid will still provide round-robin. 760 * 761 * An established device weight (from dev_info node) takes precedence over 762 * the weight passed in. If a device weight is not already established 763 * then the passed in nexus weight is established. 764 */ 765 void 766 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 767 dev_info_t *dip, int32_t nweight) 768 { 769 int32_t eweight; 770 771 /* 772 * For non-weighted policy everything has weight of zero (and we get 773 * round-robin distribution from intr_dist_cpuid). 774 * NB: intr_policy is limited to this file. A weighted nexus driver is 775 * calls this rouitne even if intr_policy has been patched to 776 * INTR_FLAG_DIST. 777 */ 778 ASSERT(dip); 779 if (intr_policy != INTR_WEIGHTED_DIST) 780 return; 781 782 eweight = i_ddi_get_intr_weight(dip); 783 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 784 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 785 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 786 ddi_get_instance(ddi_get_parent(dip)), 787 ddi_driver_name(dip), ddi_get_instance(dip))); 788 789 /* if no establish weight, establish nexus weight */ 790 if (eweight < 0) { 791 if (nweight > 0) 792 (void) i_ddi_set_intr_weight(dip, nweight); 793 else 794 nweight = 0; 795 } else 796 nweight = eweight; /* use established weight */ 797 798 /* Establish exclusion for cpu_intr_weight manipulation */ 799 mutex_enter(&intr_dist_cpu_lock); 800 cpu[cpuid]->cpu_intr_weight += nweight; 801 802 /* update intr_dist_weight_max */ 803 if (nweight > intr_dist_weight_max) 804 intr_dist_weight_max = nweight; 805 mutex_exit(&intr_dist_cpu_lock); 806 } 807 808 void 809 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 810 { 811 struct cpu *cp; 812 int32_t weight; 813 814 ASSERT(dip); 815 if (intr_policy != INTR_WEIGHTED_DIST) 816 return; 817 818 /* remove weight of device from cpu */ 819 weight = i_ddi_get_intr_weight(dip); 820 if (weight < 0) 821 weight = 0; 822 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 823 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 824 ddi_driver_name(ddi_get_parent(dip)), 825 ddi_get_instance(ddi_get_parent(dip)), 826 ddi_driver_name(dip), ddi_get_instance(dip))); 827 828 /* Establish exclusion for cpu_intr_weight manipulation */ 829 mutex_enter(&intr_dist_cpu_lock); 830 cp = cpu[cpuid]; 831 cp->cpu_intr_weight -= weight; 832 if (cp->cpu_intr_weight < 0) 833 cp->cpu_intr_weight = 0; /* sanity */ 834 mutex_exit(&intr_dist_cpu_lock); 835 } 836 837 ulong_t 838 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1) 839 { 840 uint64_t inum; 841 842 inum = add_softintr(pil, func, arg1, SOFTINT_ST); 843 return ((ulong_t)inum); 844 } 845 846 void 847 invoke_softint(processorid_t cpuid, ulong_t hdl) 848 { 849 uint64_t inum = hdl; 850 851 if (cpuid == CPU->cpu_id) 852 setsoftint(inum); 853 else 854 xt_one(cpuid, setsoftint_tl1, inum, 0); 855 } 856 857 void 858 remove_softint(ulong_t hdl) 859 { 860 uint64_t inum = hdl; 861 862 (void) rem_softintr(inum); 863 } 864 865 void 866 sync_softint(cpuset_t set) 867 { 868 xt_sync(set); 869 } 870