1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/sysmacros.h> 29 #include <sys/stack.h> 30 #include <sys/cpuvar.h> 31 #include <sys/ivintr.h> 32 #include <sys/intreg.h> 33 #include <sys/membar.h> 34 #include <sys/kmem.h> 35 #include <sys/intr.h> 36 #include <sys/sunndi.h> 37 #include <sys/cmn_err.h> 38 #include <sys/privregs.h> 39 #include <sys/systm.h> 40 #include <sys/archsystm.h> 41 #include <sys/machsystm.h> 42 #include <sys/x_call.h> 43 #include <vm/seg_kp.h> 44 #include <sys/debug.h> 45 #include <sys/cyclic.h> 46 #include <sys/kdi_impl.h> 47 48 #include <sys/cpu_sgnblk_defs.h> 49 50 /* Global locks which protect the interrupt distribution lists */ 51 static kmutex_t intr_dist_lock; 52 static kmutex_t intr_dist_cpu_lock; 53 54 /* Head of the interrupt distribution lists */ 55 static struct intr_dist *intr_dist_head = NULL; 56 static struct intr_dist *intr_dist_whead = NULL; 57 58 uint64_t siron_inum; 59 uint64_t *siron_cpu_inum = NULL; 60 uint64_t siron_poke_cpu_inum; 61 static int siron_cpu_setup(cpu_setup_t, int, void *); 62 extern uint_t softlevel1(); 63 64 uint64_t poke_cpu_inum; 65 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2); 66 uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2); 67 68 /* 69 * Note:- 70 * siron_pending was originally created to prevent a resource over consumption 71 * bug in setsoftint(exhaustion of interrupt pool free list). 72 * It's original intention is obsolete with the use of iv_pending in 73 * setsoftint. However, siron_pending stayed around, acting as a second 74 * gatekeeper preventing soft interrupts from being queued. In this capacity, 75 * it can lead to hangs on MP systems, where due to global visibility issues 76 * it can end up set while iv_pending is reset, preventing soft interrupts from 77 * ever being processed. In addition to its gatekeeper role, init_intr also 78 * uses it to flag the situation where siron() was called before siron_inum has 79 * been defined. 80 * 81 * siron() does not need an extra gatekeeper; any cpu that wishes should be 82 * allowed to queue a soft interrupt. It is softint()'s job to ensure 83 * correct handling of the queues. Therefore, siron_pending has been 84 * stripped of its gatekeeper task, retaining only its intr_init job, where 85 * it indicates that there is a pending need to call siron(). 86 */ 87 int siron_pending; 88 89 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 90 int intr_dist_debug = 0; 91 int32_t intr_dist_weight_max = 1; 92 int32_t intr_dist_weight_maxmax = 1000; 93 int intr_dist_weight_maxfactor = 2; 94 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 95 96 /* 97 * intr_init() - Interrupt initialization 98 * Initialize the system's interrupt vector table. 99 */ 100 void 101 intr_init(cpu_t *cp) 102 { 103 extern uint_t softlevel1(); 104 105 init_ivintr(); 106 REGISTER_BBUS_INTR(); 107 108 /* 109 * We just allocate memory for per-cpu siron right now. Rest of 110 * the work is done when CPU is configured. 111 */ 112 siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP); 113 siron_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST); 114 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT); 115 siron_poke_cpu_inum = add_softintr(PIL_13, 116 siron_poke_cpu_intr, 0, SOFTINT_MT); 117 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 118 119 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 120 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 121 122 /* 123 * A soft interrupt may have been requested prior to the initialization 124 * of soft interrupts. Soft interrupts can't be dispatched until after 125 * init_intr(), so we have to wait until now before we can dispatch the 126 * pending soft interrupt (if any). 127 */ 128 if (siron_pending) { 129 siron_pending = 0; 130 siron(); 131 } 132 } 133 134 /* 135 * poke_cpu_intr - fall through when poke_cpu calls 136 */ 137 /* ARGSUSED */ 138 uint_t 139 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 140 { 141 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 142 membar_stld_stst(); 143 return (1); 144 } 145 146 /* 147 * kmdb uses siron (and thus setsoftint) while the world is stopped in order to 148 * inform its driver component that there's work to be done. We need to keep 149 * DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron, 150 * giving kmdb's version a kdi_ prefix to keep DTrace at bay. The 151 * implementation of setsoftint is complicated enough that we don't want to 152 * duplicate it, but at the same time we don't want to preclude tracing either. 153 * The meat of setsoftint() therefore goes into kdi_setsoftint, with 154 * setsoftint() implemented as a wrapper. This allows tracing, while still 155 * providing a way for kmdb to sneak in unmolested. 156 */ 157 void 158 kdi_siron(void) 159 { 160 if (siron_inum != 0) 161 kdi_setsoftint(siron_inum); 162 else 163 siron_pending = 1; 164 } 165 166 void 167 setsoftint(uint64_t inum) 168 { 169 kdi_setsoftint(inum); 170 } 171 172 /* 173 * Generates softlevel1 interrupt on current CPU if it 174 * is not pending already. 175 */ 176 void 177 siron(void) 178 { 179 uint64_t inum; 180 181 if (siron_inum != 0) { 182 if (siron_cpu_inum[CPU->cpu_id] != 0) 183 inum = siron_cpu_inum[CPU->cpu_id]; 184 else 185 inum = siron_inum; 186 187 setsoftint(inum); 188 } else 189 siron_pending = 1; 190 } 191 192 /* 193 * This routine creates per-CPU siron inum for CPUs which are 194 * configured during boot. 195 */ 196 void 197 siron_mp_init() 198 { 199 cpu_t *c; 200 201 mutex_enter(&cpu_lock); 202 c = cpu_list; 203 do { 204 (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL); 205 } while ((c = c->cpu_next) != cpu_list); 206 207 register_cpu_setup_func(siron_cpu_setup, NULL); 208 mutex_exit(&cpu_lock); 209 } 210 211 /* 212 * siron_poke_cpu_intr - cross-call handler. 213 */ 214 /* ARGSUSED */ 215 uint_t 216 siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2) 217 { 218 /* generate level1 softint */ 219 siron(); 220 return (1); 221 } 222 223 /* 224 * This routine generates a cross-call on target CPU(s). 225 */ 226 void 227 siron_poke_cpu(cpuset_t poke) 228 { 229 int cpuid = CPU->cpu_id; 230 231 if (CPU_IN_SET(poke, cpuid)) { 232 siron(); 233 CPUSET_DEL(poke, cpuid); 234 if (CPUSET_ISNULL(poke)) 235 return; 236 } 237 238 xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0); 239 } 240 241 /* 242 * This callback function allows us to create per-CPU siron inum. 243 */ 244 /* ARGSUSED */ 245 static int 246 siron_cpu_setup(cpu_setup_t what, int id, void *arg) 247 { 248 cpu_t *cp = cpu[id]; 249 250 ASSERT(MUTEX_HELD(&cpu_lock)); 251 ASSERT(cp != NULL); 252 253 switch (what) { 254 case CPU_CONFIG: 255 siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1, 256 (softintrfunc)softlevel1, 0, SOFTINT_ST); 257 break; 258 case CPU_UNCONFIG: 259 (void) rem_softintr(siron_cpu_inum[cp->cpu_id]); 260 siron_cpu_inum[cp->cpu_id] = 0; 261 break; 262 default: 263 break; 264 } 265 266 return (0); 267 } 268 269 /* 270 * no_ivintr() 271 * called by setvecint_tl1() through sys_trap() 272 * vector interrupt received but not valid or not 273 * registered in intr_vec_table 274 * considered as a spurious mondo interrupt 275 */ 276 /* ARGSUSED */ 277 void 278 no_ivintr(struct regs *rp, int inum, int pil) 279 { 280 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 281 inum, pil); 282 283 #ifdef DEBUG_VEC_INTR 284 prom_enter_mon(); 285 #endif /* DEBUG_VEC_INTR */ 286 } 287 288 void 289 intr_dequeue_req(uint_t pil, uint64_t inum) 290 { 291 intr_vec_t *iv, *next, *prev; 292 struct machcpu *mcpu; 293 uint32_t clr; 294 processorid_t cpu_id; 295 extern uint_t getpstate(void); 296 297 ASSERT((getpstate() & PSTATE_IE) == 0); 298 299 mcpu = &CPU->cpu_m; 300 cpu_id = CPU->cpu_id; 301 302 iv = (intr_vec_t *)inum; 303 prev = NULL; 304 next = mcpu->intr_head[pil]; 305 306 /* Find a matching entry in the list */ 307 while (next != NULL) { 308 if (next == iv) 309 break; 310 prev = next; 311 next = IV_GET_PIL_NEXT(next, cpu_id); 312 } 313 314 if (next != NULL) { 315 intr_vec_t *next_iv = IV_GET_PIL_NEXT(next, cpu_id); 316 317 /* Remove entry from list */ 318 if (prev != NULL) 319 IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */ 320 else 321 mcpu->intr_head[pil] = next_iv; /* head */ 322 323 if (next_iv == NULL) 324 mcpu->intr_tail[pil] = prev; /* tail */ 325 } 326 327 /* Clear pending interrupts at this level if the list is empty */ 328 if (mcpu->intr_head[pil] == NULL) { 329 clr = 1 << pil; 330 if (pil == PIL_14) 331 clr |= (TICK_INT_MASK | STICK_INT_MASK); 332 wr_clr_softint(clr); 333 } 334 } 335 336 337 /* 338 * Send a directed interrupt of specified interrupt number id to a cpu. 339 */ 340 void 341 send_dirint( 342 int cpuix, /* cpu to be interrupted */ 343 int intr_id) /* interrupt number id */ 344 { 345 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 346 } 347 348 /* 349 * Take the specified CPU out of participation in interrupts. 350 * Called by p_online(2) when a processor is being taken off-line. 351 * This allows interrupt threads being handled on the processor to 352 * complete before the processor is idled. 353 */ 354 int 355 cpu_disable_intr(struct cpu *cp) 356 { 357 ASSERT(MUTEX_HELD(&cpu_lock)); 358 359 /* 360 * Turn off the CPU_ENABLE flag before calling the redistribution 361 * function, since it checks for this in the cpu flags. 362 */ 363 cp->cpu_flags &= ~CPU_ENABLE; 364 365 intr_redist_all_cpus(); 366 367 return (0); 368 } 369 370 /* 371 * Allow the specified CPU to participate in interrupts. 372 * Called by p_online(2) if a processor could not be taken off-line 373 * because of bound threads, in order to resume processing interrupts. 374 * Also called after starting a processor. 375 */ 376 void 377 cpu_enable_intr(struct cpu *cp) 378 { 379 ASSERT(MUTEX_HELD(&cpu_lock)); 380 381 cp->cpu_flags |= CPU_ENABLE; 382 383 intr_redist_all_cpus(); 384 } 385 386 /* 387 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 388 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 389 * are issued to redirect interrupts of a specified weight, from heavy to 390 * light. This allows all the interrupts of a given weight to be redistributed 391 * for all weighted nexus drivers prior to those of less weight. 392 */ 393 static void 394 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 395 { 396 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 397 struct intr_dist *iptr; 398 struct intr_dist **pptr; 399 400 ASSERT(func); 401 new->func = func; 402 new->arg = arg; 403 new->next = NULL; 404 405 /* Add to tail so that redistribution occurs in original order. */ 406 mutex_enter(&intr_dist_lock); 407 for (iptr = *phead, pptr = phead; iptr != NULL; 408 pptr = &iptr->next, iptr = iptr->next) { 409 /* check for problems as we locate the tail */ 410 if ((iptr->func == func) && (iptr->arg == arg)) { 411 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 412 /*NOTREACHED*/ 413 } 414 } 415 *pptr = new; 416 417 mutex_exit(&intr_dist_lock); 418 } 419 420 void 421 intr_dist_add(void (*func)(void *), void *arg) 422 { 423 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 424 } 425 426 void 427 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 428 { 429 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 430 } 431 432 /* 433 * Search for the interrupt distribution structure with the specified 434 * mondo vec reg in the interrupt distribution list. If a match is found, 435 * then delete the entry from the list. The caller is responsible for 436 * modifying the mondo vector registers. 437 */ 438 static void 439 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 440 { 441 struct intr_dist *iptr; 442 struct intr_dist **vect; 443 444 mutex_enter(&intr_dist_lock); 445 for (iptr = *headp, vect = headp; 446 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 447 if ((iptr->func == func) && (iptr->arg == arg)) { 448 *vect = iptr->next; 449 kmem_free(iptr, sizeof (struct intr_dist)); 450 mutex_exit(&intr_dist_lock); 451 return; 452 } 453 } 454 455 if (!panicstr) 456 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 457 mutex_exit(&intr_dist_lock); 458 } 459 460 void 461 intr_dist_rem(void (*func)(void *), void *arg) 462 { 463 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 464 } 465 466 void 467 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 468 { 469 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 470 } 471 472 /* 473 * Initiate interrupt redistribution. Redistribution improves the isolation 474 * associated with interrupt weights by ordering operations from heavy weight 475 * to light weight. When a CPUs orientation changes relative to interrupts, 476 * there is *always* a redistribution to accommodate this change (call to 477 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 478 * that a redistribution could improve the quality of an initialization. For 479 * example, if you are not using a NIC it may not be attached with s10 (devfs). 480 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 481 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 482 * occurring late, so optimal "isolation" relative to weight is not occurring. 483 * The same applies to detach, although in this case doing the redistribution 484 * might improve "spread" for medium weight devices since the "isolation" of 485 * a higher weight device may no longer be present. 486 * 487 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 488 * 489 * NB: There is risk associated with automatically triggering execution of the 490 * redistribution code at arbitrary times. The risk comes from the fact that 491 * there is a lot of low-level hardware interaction associated with a 492 * redistribution. At some point we may want this code to perform automatic 493 * redistribution (redistribution thread; trigger timeout when add/remove 494 * weight delta is large enough, and call cv_signal from timeout - causing 495 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 496 * risky at this time. 497 */ 498 void 499 i_ddi_intr_redist_all_cpus() 500 { 501 mutex_enter(&cpu_lock); 502 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 503 intr_redist_all_cpus(); 504 mutex_exit(&cpu_lock); 505 } 506 507 /* 508 * Redistribute all interrupts 509 * 510 * This function redistributes all interrupting devices, running the 511 * parent callback functions for each node. 512 */ 513 void 514 intr_redist_all_cpus(void) 515 { 516 struct cpu *cp; 517 struct intr_dist *iptr; 518 int32_t weight, max_weight; 519 520 ASSERT(MUTEX_HELD(&cpu_lock)); 521 mutex_enter(&intr_dist_lock); 522 523 /* 524 * zero cpu_intr_weight on all cpus - it is safe to traverse 525 * cpu_list since we hold cpu_lock. 526 */ 527 cp = cpu_list; 528 do { 529 cp->cpu_intr_weight = 0; 530 } while ((cp = cp->cpu_next) != cpu_list); 531 532 /* 533 * Assume that this redistribution may encounter a device weight 534 * via driver.conf tuning of "ddi-intr-weight" that is at most 535 * intr_dist_weight_maxfactor times larger. 536 */ 537 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 538 if (max_weight > intr_dist_weight_maxmax) 539 max_weight = intr_dist_weight_maxmax; 540 intr_dist_weight_max = 1; 541 542 INTR_DEBUG((CE_CONT, "intr_dist: " 543 "intr_redist_all_cpus: %d-0\n", max_weight)); 544 545 /* 546 * Redistribute weighted, from heavy to light. The callback that 547 * specifies a weight equal to weight_max should redirect all 548 * interrupts of weight weight_max or greater [weight_max, inf.). 549 * Interrupts of lesser weight should be processed on the call with 550 * the matching weight. This allows all the heaver weight interrupts 551 * on all weighted busses (multiple pci busses) to be redirected prior 552 * to any lesser weight interrupts. 553 */ 554 for (weight = max_weight; weight >= 0; weight--) 555 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 556 ((void (*)(void *, int32_t, int32_t))iptr->func) 557 (iptr->arg, max_weight, weight); 558 559 /* redistribute normal (non-weighted) interrupts */ 560 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 561 ((void (*)(void *))iptr->func)(iptr->arg); 562 mutex_exit(&intr_dist_lock); 563 } 564 565 void 566 intr_redist_all_cpus_shutdown(void) 567 { 568 intr_policy = INTR_CURRENT_CPU; 569 intr_redist_all_cpus(); 570 } 571 572 /* 573 * Determine what CPU to target, based on interrupt policy. 574 * 575 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 576 * advance through interrupt enabled cpus (round-robin). 577 * 578 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 579 * cpu_intr_weight, round robin when all equal. 580 * 581 * Weighted interrupt distribution provides two things: "spread" of weight 582 * (associated with algorithm itself) and "isolation" (associated with a 583 * particular device weight). A redistribution is what provides optimal 584 * "isolation" of heavy weight interrupts, optimal "spread" of weight 585 * (relative to what came before) is always occurring. 586 * 587 * An interrupt weight is a subjective number that represents the 588 * percentage of a CPU required to service a device's interrupts: the 589 * default weight is 0% (however the algorithm still maintains 590 * round-robin), a network interface controller (NIC) may have a large 591 * weight (35%). Interrupt weight only has meaning relative to the 592 * interrupt weight of other devices: a CPU can be weighted more than 593 * 100%, and a single device might consume more than 100% of a CPU. 594 * 595 * A coarse interrupt weight can be defined by the parent nexus driver 596 * based on bus specific information, like pci class codes. A nexus 597 * driver that supports device interrupt weighting for its children 598 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 599 * and removes the weight of a device from the CPU that an interrupt 600 * is directed at. The quality of initialization improves when the 601 * device interrupt weights more accuracy reflect actual run-time weights, 602 * and as the assignments are ordered from is heavy to light. 603 * 604 * The implementation also supports interrupt weight being specified in 605 * driver.conf files via the property "ddi-intr-weight", which takes 606 * precedence over the nexus supplied weight. This support is added to 607 * permit possible tweaking in the product in response to customer 608 * problems. This is not a formal or committed interface. 609 * 610 * While a weighted approach chooses the CPU providing the best spread 611 * given past weights, less than optimal isolation can result in cases 612 * where heavy weight devices show up last. The nexus driver's interrupt 613 * redistribution logic should use intr_dist_add/rem_weighted so that 614 * interrupts can be redistributed heavy first for optimal isolation. 615 */ 616 uint32_t 617 intr_dist_cpuid(void) 618 { 619 static struct cpu *curr_cpu; 620 struct cpu *start_cpu; 621 struct cpu *new_cpu; 622 struct cpu *cp; 623 int cpuid = -1; 624 625 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 626 mutex_enter(&intr_dist_cpu_lock); 627 628 switch (intr_policy) { 629 case INTR_CURRENT_CPU: 630 cpuid = CPU->cpu_id; 631 break; 632 633 case INTR_BOOT_CPU: 634 panic("INTR_BOOT_CPU no longer supported."); 635 /*NOTREACHED*/ 636 637 case INTR_FLAT_DIST: 638 case INTR_WEIGHTED_DIST: 639 default: 640 /* 641 * Ensure that curr_cpu is valid - cpu_next will be NULL if 642 * the cpu has been deleted (cpu structs are never freed). 643 */ 644 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 645 curr_cpu = CPU; 646 647 /* 648 * Advance to online CPU after curr_cpu (round-robin). For 649 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 650 * weight. For a nexus that does not support weight the 651 * default weight of zero is used. We degrade to round-robin 652 * behavior among equal weightes. The default weight is zero 653 * and round-robin behavior continues. 654 * 655 * Disable preemption while traversing cpu_next_onln to 656 * ensure the list does not change. This works because 657 * modifiers of this list and other lists in a struct cpu 658 * call pause_cpus() before making changes. 659 */ 660 kpreempt_disable(); 661 cp = start_cpu = curr_cpu->cpu_next_onln; 662 new_cpu = NULL; 663 do { 664 /* Skip CPUs with interrupts disabled */ 665 if ((cp->cpu_flags & CPU_ENABLE) == 0) 666 continue; 667 668 if (intr_policy == INTR_FLAT_DIST) { 669 /* select CPU */ 670 new_cpu = cp; 671 break; 672 } else if ((new_cpu == NULL) || 673 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 674 /* Choose if lighter weight */ 675 new_cpu = cp; 676 } 677 } while ((cp = cp->cpu_next_onln) != start_cpu); 678 ASSERT(new_cpu); 679 cpuid = new_cpu->cpu_id; 680 681 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 682 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 683 684 /* update static pointer for next round-robin */ 685 curr_cpu = new_cpu; 686 kpreempt_enable(); 687 break; 688 } 689 mutex_exit(&intr_dist_cpu_lock); 690 return (cpuid); 691 } 692 693 /* 694 * Add or remove the the weight of a device from a CPUs interrupt weight. 695 * 696 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 697 * their children to improve the overall quality of interrupt initialization. 698 * 699 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 700 * among multiple devices (sharing ino) then the nexus should call 701 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 702 * that share must specify the same cpuid. 703 * 704 * If a nexus driver is unable to determine the cpu at remove_intr time 705 * for some of its interrupts, then it should not call add_device_weight - 706 * intr_dist_cpuid will still provide round-robin. 707 * 708 * An established device weight (from dev_info node) takes precedence over 709 * the weight passed in. If a device weight is not already established 710 * then the passed in nexus weight is established. 711 */ 712 void 713 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 714 dev_info_t *dip, int32_t nweight) 715 { 716 int32_t eweight; 717 718 /* 719 * For non-weighted policy everything has weight of zero (and we get 720 * round-robin distribution from intr_dist_cpuid). 721 * NB: intr_policy is limited to this file. A weighted nexus driver is 722 * calls this rouitne even if intr_policy has been patched to 723 * INTR_FLAG_DIST. 724 */ 725 ASSERT(dip); 726 if (intr_policy != INTR_WEIGHTED_DIST) 727 return; 728 729 eweight = i_ddi_get_intr_weight(dip); 730 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 731 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 732 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 733 ddi_get_instance(ddi_get_parent(dip)), 734 ddi_driver_name(dip), ddi_get_instance(dip))); 735 736 /* if no establish weight, establish nexus weight */ 737 if (eweight < 0) { 738 if (nweight > 0) 739 (void) i_ddi_set_intr_weight(dip, nweight); 740 else 741 nweight = 0; 742 } else 743 nweight = eweight; /* use established weight */ 744 745 /* Establish exclusion for cpu_intr_weight manipulation */ 746 mutex_enter(&intr_dist_cpu_lock); 747 cpu[cpuid]->cpu_intr_weight += nweight; 748 749 /* update intr_dist_weight_max */ 750 if (nweight > intr_dist_weight_max) 751 intr_dist_weight_max = nweight; 752 mutex_exit(&intr_dist_cpu_lock); 753 } 754 755 void 756 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 757 { 758 struct cpu *cp; 759 int32_t weight; 760 761 ASSERT(dip); 762 if (intr_policy != INTR_WEIGHTED_DIST) 763 return; 764 765 /* remove weight of device from cpu */ 766 weight = i_ddi_get_intr_weight(dip); 767 if (weight < 0) 768 weight = 0; 769 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 770 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 771 ddi_driver_name(ddi_get_parent(dip)), 772 ddi_get_instance(ddi_get_parent(dip)), 773 ddi_driver_name(dip), ddi_get_instance(dip))); 774 775 /* Establish exclusion for cpu_intr_weight manipulation */ 776 mutex_enter(&intr_dist_cpu_lock); 777 cp = cpu[cpuid]; 778 cp->cpu_intr_weight -= weight; 779 if (cp->cpu_intr_weight < 0) 780 cp->cpu_intr_weight = 0; /* sanity */ 781 mutex_exit(&intr_dist_cpu_lock); 782 } 783