1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/sysmacros.h> 29 #include <sys/stack.h> 30 #include <sys/cpuvar.h> 31 #include <sys/ivintr.h> 32 #include <sys/intreg.h> 33 #include <sys/membar.h> 34 #include <sys/kmem.h> 35 #include <sys/intr.h> 36 #include <sys/sunndi.h> 37 #include <sys/cmn_err.h> 38 #include <sys/privregs.h> 39 #include <sys/systm.h> 40 #include <sys/archsystm.h> 41 #include <sys/machsystm.h> 42 #include <sys/x_call.h> 43 #include <vm/seg_kp.h> 44 #include <sys/debug.h> 45 #include <sys/cyclic.h> 46 47 #include <sys/cpu_sgnblk_defs.h> 48 49 kmutex_t soft_iv_lock; /* protect software interrupt vector table */ 50 /* Global locks which protect the interrupt distribution lists */ 51 static kmutex_t intr_dist_lock; 52 static kmutex_t intr_dist_cpu_lock; 53 54 /* Head of the interrupt distribution lists */ 55 static struct intr_dist *intr_dist_head = NULL; 56 static struct intr_dist *intr_dist_whead = NULL; 57 58 uint_t swinum_base; 59 uint_t maxswinum; 60 uint_t siron_inum; 61 uint_t poke_cpu_inum; 62 /* 63 * Note:- 64 * siron_pending was originally created to prevent a resource over consumption 65 * bug in setsoftint(exhaustion of interrupt pool free list). 66 * It's original intention is obsolete with the use of iv_pending in 67 * setsoftint. However, siron_pending stayed around, acting as a second 68 * gatekeeper preventing soft interrupts from being queued. In this capacity, 69 * it can lead to hangs on MP systems, where due to global visibility issues 70 * it can end up set while iv_pending is reset, preventing soft interrupts from 71 * ever being processed. In addition to its gatekeeper role, init_intr also 72 * uses it to flag the situation where siron() was called before siron_inum has 73 * been defined. 74 * 75 * siron() does not need an extra gatekeeper; any cpu that wishes should be 76 * allowed to queue a soft interrupt. It is softint()'s job to ensure 77 * correct handling of the queues. Therefore, siron_pending has been 78 * stripped of its gatekeeper task, retaining only its intr_init job, where 79 * it indicates that there is a pending need to call siron(). 80 */ 81 int siron_pending; 82 83 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 84 int intr_dist_debug = 0; 85 int32_t intr_dist_weight_max = 1; 86 int32_t intr_dist_weight_maxmax = 1000; 87 int intr_dist_weight_maxfactor = 2; 88 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 89 90 static void sw_ivintr_init(cpu_t *); 91 92 /* 93 * intr_init() - interrupt initialization 94 * Initialize the system's software interrupt vector table and 95 * CPU's interrupt free list 96 */ 97 void 98 intr_init(cpu_t *cp) 99 { 100 init_ivintr(); 101 sw_ivintr_init(cp); 102 init_intr_pool(cp); 103 104 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 105 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 106 107 /* 108 * A soft interrupt may have been requested prior to the initialization 109 * of soft interrupts. Soft interrupts can't be dispatched until after 110 * init_intr_pool, so we have to wait until now before we can dispatch 111 * the pending soft interrupt (if any). 112 */ 113 if (siron_pending) { 114 siron_pending = 0; 115 siron(); 116 } 117 } 118 119 /* 120 * poke_cpu_intr - fall through when poke_cpu calls 121 */ 122 123 /* ARGSUSED */ 124 uint_t 125 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 126 { 127 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 128 membar_stld_stst(); 129 return (1); 130 } 131 132 /* 133 * sw_ivintr_init() - software interrupt vector initialization 134 * called after CPU is active 135 * the software interrupt vector table is part of the intr_vector[] 136 */ 137 static void 138 sw_ivintr_init(cpu_t *cp) 139 { 140 extern uint_t softlevel1(); 141 142 mutex_init(&soft_iv_lock, NULL, MUTEX_DEFAULT, NULL); 143 144 swinum_base = SOFTIVNUM; 145 146 /* 147 * the maximum software interrupt == MAX_SOFT_INO 148 */ 149 maxswinum = swinum_base + MAX_SOFT_INO; 150 151 REGISTER_BBUS_INTR(); 152 153 siron_inum = add_softintr(PIL_1, softlevel1, 0); 154 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0); 155 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 156 } 157 158 cpuset_t intr_add_pools_inuse; 159 160 /* 161 * cleanup_intr_pool() 162 * Free up the extra intr request pool for this cpu. 163 */ 164 void 165 cleanup_intr_pool(cpu_t *cp) 166 { 167 extern struct intr_req *intr_add_head; 168 int poolno; 169 struct intr_req *pool; 170 171 poolno = cp->cpu_m.intr_pool_added; 172 if (poolno >= 0) { 173 cp->cpu_m.intr_pool_added = -1; 174 pool = (poolno * INTR_PENDING_MAX * intr_add_pools) + 175 176 intr_add_head; /* not byte arithmetic */ 177 bzero(pool, INTR_PENDING_MAX * intr_add_pools * 178 sizeof (struct intr_req)); 179 180 CPUSET_DEL(intr_add_pools_inuse, poolno); 181 } 182 } 183 184 /* 185 * init_intr_pool() 186 * initialize the intr request pool for the cpu 187 * should be called for each cpu 188 */ 189 void 190 init_intr_pool(cpu_t *cp) 191 { 192 extern struct intr_req *intr_add_head; 193 #ifdef DEBUG 194 extern struct intr_req *intr_add_tail; 195 #endif /* DEBUG */ 196 int i, pool; 197 198 cp->cpu_m.intr_pool_added = -1; 199 200 for (i = 0; i < INTR_PENDING_MAX-1; i++) { 201 cp->cpu_m.intr_pool[i].intr_next = 202 &cp->cpu_m.intr_pool[i+1]; 203 } 204 cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = NULL; 205 206 cp->cpu_m.intr_head[0] = &cp->cpu_m.intr_pool[0]; 207 cp->cpu_m.intr_tail[0] = &cp->cpu_m.intr_pool[INTR_PENDING_MAX-1]; 208 209 if (intr_add_pools != 0) { 210 211 /* 212 * If additional interrupt pools have been allocated, 213 * initialize those too and add them to the free list. 214 */ 215 216 struct intr_req *trace; 217 218 for (pool = 0; pool < max_ncpus; pool++) { 219 if (!(CPU_IN_SET(intr_add_pools_inuse, pool))) 220 break; 221 } 222 if (pool >= max_ncpus) { 223 /* 224 * XXX - intr pools are alloc'd, just not as 225 * much as we would like. 226 */ 227 cmn_err(CE_WARN, "Failed to alloc all requested intr " 228 "pools for cpu%d", cp->cpu_id); 229 return; 230 } 231 CPUSET_ADD(intr_add_pools_inuse, pool); 232 cp->cpu_m.intr_pool_added = pool; 233 234 trace = (pool * INTR_PENDING_MAX * intr_add_pools) + 235 intr_add_head; /* not byte arithmetic */ 236 237 cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = trace; 238 239 for (i = 1; i < intr_add_pools * INTR_PENDING_MAX; i++, trace++) 240 trace->intr_next = trace + 1; 241 trace->intr_next = NULL; 242 243 ASSERT(trace >= intr_add_head && trace <= intr_add_tail); 244 245 cp->cpu_m.intr_tail[0] = trace; 246 } 247 } 248 249 250 /* 251 * siron - primitive for sun/os/softint.c 252 */ 253 void 254 siron(void) 255 { 256 if (siron_inum != 0) 257 setsoftint(siron_inum); 258 else 259 siron_pending = 1; 260 } 261 262 /* 263 * no_ivintr() 264 * called by vec_interrupt() through sys_trap() 265 * vector interrupt received but not valid or not 266 * registered in intr_vector[] 267 * considered as a spurious mondo interrupt 268 */ 269 /* ARGSUSED */ 270 void 271 no_ivintr(struct regs *rp, int inum, int pil) 272 { 273 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 274 inum, pil); 275 276 277 #ifdef DEBUG_VEC_INTR 278 prom_enter_mon(); 279 #endif /* DEBUG_VEC_INTR */ 280 } 281 282 /* 283 * no_intr_pool() 284 * called by vec_interrupt() through sys_trap() 285 * vector interrupt received but no intr_req entries 286 */ 287 /* ARGSUSED */ 288 void 289 no_intr_pool(struct regs *rp, int inum, int pil) 290 { 291 #ifdef DEBUG_VEC_INTR 292 cmn_err(CE_WARN, "intr_req pool empty: num 0x%x, pil 0x%x", 293 inum, pil); 294 prom_enter_mon(); 295 #else 296 cmn_err(CE_PANIC, "intr_req pool empty: num 0x%x, pil 0x%x", 297 inum, pil); 298 #endif /* DEBUG_VEC_INTR */ 299 } 300 301 void 302 intr_dequeue_req(uint_t pil, uint32_t inum) 303 { 304 struct intr_req *ir, *prev; 305 struct machcpu *mcpu; 306 uint32_t clr; 307 extern uint_t getpstate(void); 308 309 ASSERT((getpstate() & PSTATE_IE) == 0); 310 311 mcpu = &CPU->cpu_m; 312 313 /* Find a matching entry in the list */ 314 prev = NULL; 315 ir = mcpu->intr_head[pil]; 316 while (ir != NULL) { 317 if (ir->intr_number == inum) 318 break; 319 prev = ir; 320 ir = ir->intr_next; 321 } 322 if (ir != NULL) { 323 /* 324 * Remove entry from list 325 */ 326 if (prev != NULL) 327 prev->intr_next = ir->intr_next; /* non-head */ 328 else 329 mcpu->intr_head[pil] = ir->intr_next; /* head */ 330 331 if (ir->intr_next == NULL) 332 mcpu->intr_tail[pil] = prev; /* tail */ 333 334 /* 335 * Place on free list 336 */ 337 ir->intr_next = mcpu->intr_head[0]; 338 mcpu->intr_head[0] = ir; 339 } 340 341 /* 342 * clear pending interrupts at this level if the list is empty 343 */ 344 if (mcpu->intr_head[pil] == NULL) { 345 clr = 1 << pil; 346 if (pil == PIL_14) 347 clr |= (TICK_INT_MASK | STICK_INT_MASK); 348 wr_clr_softint(clr); 349 } 350 } 351 352 353 /* 354 * Send a directed interrupt of specified interrupt number id to a cpu. 355 */ 356 void 357 send_dirint( 358 int cpuix, /* cpu to be interrupted */ 359 int intr_id) /* interrupt number id */ 360 { 361 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 362 } 363 364 /* 365 * Take the specified CPU out of participation in interrupts. 366 * Called by p_online(2) when a processor is being taken off-line. 367 * This allows interrupt threads being handled on the processor to 368 * complete before the processor is idled. 369 */ 370 int 371 cpu_disable_intr(struct cpu *cp) 372 { 373 ASSERT(MUTEX_HELD(&cpu_lock)); 374 375 /* 376 * Turn off the CPU_ENABLE flag before calling the redistribution 377 * function, since it checks for this in the cpu flags. 378 */ 379 cp->cpu_flags &= ~CPU_ENABLE; 380 381 intr_redist_all_cpus(); 382 383 return (0); 384 } 385 386 /* 387 * Allow the specified CPU to participate in interrupts. 388 * Called by p_online(2) if a processor could not be taken off-line 389 * because of bound threads, in order to resume processing interrupts. 390 * Also called after starting a processor. 391 */ 392 void 393 cpu_enable_intr(struct cpu *cp) 394 { 395 ASSERT(MUTEX_HELD(&cpu_lock)); 396 397 cp->cpu_flags |= CPU_ENABLE; 398 399 intr_redist_all_cpus(); 400 } 401 402 /* 403 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 404 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 405 * are issued to redirect interrupts of a specified weight, from heavy to 406 * light. This allows all the interrupts of a given weight to be redistributed 407 * for all weighted nexus drivers prior to those of less weight. 408 */ 409 static void 410 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 411 { 412 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 413 struct intr_dist *iptr; 414 struct intr_dist **pptr; 415 416 ASSERT(func); 417 new->func = func; 418 new->arg = arg; 419 new->next = NULL; 420 421 /* Add to tail so that redistribution occurs in original order. */ 422 mutex_enter(&intr_dist_lock); 423 for (iptr = *phead, pptr = phead; iptr != NULL; 424 pptr = &iptr->next, iptr = iptr->next) { 425 /* check for problems as we locate the tail */ 426 if ((iptr->func == func) && (iptr->arg == arg)) { 427 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 428 /*NOTREACHED*/ 429 } 430 } 431 *pptr = new; 432 433 mutex_exit(&intr_dist_lock); 434 } 435 436 void 437 intr_dist_add(void (*func)(void *), void *arg) 438 { 439 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 440 } 441 442 void 443 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 444 { 445 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 446 } 447 448 /* 449 * Search for the interrupt distribution structure with the specified 450 * mondo vec reg in the interrupt distribution list. If a match is found, 451 * then delete the entry from the list. The caller is responsible for 452 * modifying the mondo vector registers. 453 */ 454 static void 455 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 456 { 457 struct intr_dist *iptr; 458 struct intr_dist **vect; 459 460 mutex_enter(&intr_dist_lock); 461 for (iptr = *headp, vect = headp; 462 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 463 if ((iptr->func == func) && (iptr->arg == arg)) { 464 *vect = iptr->next; 465 kmem_free(iptr, sizeof (struct intr_dist)); 466 mutex_exit(&intr_dist_lock); 467 return; 468 } 469 } 470 471 if (!panicstr) 472 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 473 mutex_exit(&intr_dist_lock); 474 } 475 476 void 477 intr_dist_rem(void (*func)(void *), void *arg) 478 { 479 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 480 } 481 482 void 483 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 484 { 485 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 486 } 487 488 /* 489 * Initiate interrupt redistribution. Redistribution improves the isolation 490 * associated with interrupt weights by ordering operations from heavy weight 491 * to light weight. When a CPUs orientation changes relative to interrupts, 492 * there is *always* a redistribution to accommodate this change (call to 493 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 494 * that a redistribution could improve the quality of an initialization. For 495 * example, if you are not using a NIC it may not be attached with s10 (devfs). 496 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 497 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 498 * occurring late, so optimal "isolation" relative to weight is not occurring. 499 * The same applies to detach, although in this case doing the redistribution 500 * might improve "spread" for medium weight devices since the "isolation" of 501 * a higher weight device may no longer be present. 502 * 503 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 504 * 505 * NB: There is risk associated with automatically triggering execution of the 506 * redistribution code at arbitrary times. The risk comes from the fact that 507 * there is a lot of low-level hardware interaction associated with a 508 * redistribution. At some point we may want this code to perform automatic 509 * redistribution (redistribution thread; trigger timeout when add/remove 510 * weight delta is large enough, and call cv_signal from timeout - causing 511 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 512 * risky at this time. 513 */ 514 void 515 i_ddi_intr_redist_all_cpus() 516 { 517 mutex_enter(&cpu_lock); 518 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 519 intr_redist_all_cpus(); 520 mutex_exit(&cpu_lock); 521 } 522 523 /* 524 * Redistribute all interrupts 525 * 526 * This function redistributes all interrupting devices, running the 527 * parent callback functions for each node. 528 */ 529 void 530 intr_redist_all_cpus(void) 531 { 532 struct cpu *cp; 533 struct intr_dist *iptr; 534 int32_t weight, max_weight; 535 536 ASSERT(MUTEX_HELD(&cpu_lock)); 537 mutex_enter(&intr_dist_lock); 538 539 /* 540 * zero cpu_intr_weight on all cpus - it is safe to traverse 541 * cpu_list since we hold cpu_lock. 542 */ 543 cp = cpu_list; 544 do { 545 cp->cpu_intr_weight = 0; 546 } while ((cp = cp->cpu_next) != cpu_list); 547 548 /* 549 * Assume that this redistribution may encounter a device weight 550 * via driver.conf tuning of "ddi-intr-weight" that is at most 551 * intr_dist_weight_maxfactor times larger. 552 */ 553 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 554 if (max_weight > intr_dist_weight_maxmax) 555 max_weight = intr_dist_weight_maxmax; 556 intr_dist_weight_max = 1; 557 558 INTR_DEBUG((CE_CONT, "intr_dist: " 559 "intr_redist_all_cpus: %d-0\n", max_weight)); 560 561 /* 562 * Redistribute weighted, from heavy to light. The callback that 563 * specifies a weight equal to weight_max should redirect all 564 * interrupts of weight weight_max or greater [weight_max, inf.). 565 * Interrupts of lesser weight should be processed on the call with 566 * the matching weight. This allows all the heaver weight interrupts 567 * on all weighted busses (multiple pci busses) to be redirected prior 568 * to any lesser weight interrupts. 569 */ 570 for (weight = max_weight; weight >= 0; weight--) 571 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 572 ((void (*)(void *, int32_t, int32_t))iptr->func) 573 (iptr->arg, max_weight, weight); 574 575 /* redistribute normal (non-weighted) interrupts */ 576 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 577 ((void (*)(void *))iptr->func)(iptr->arg); 578 mutex_exit(&intr_dist_lock); 579 } 580 581 void 582 intr_redist_all_cpus_shutdown(void) 583 { 584 intr_policy = INTR_CURRENT_CPU; 585 intr_redist_all_cpus(); 586 } 587 588 /* 589 * Determine what CPU to target, based on interrupt policy. 590 * 591 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 592 * advance through interrupt enabled cpus (round-robin). 593 * 594 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 595 * cpu_intr_weight, round robin when all equal. 596 * 597 * Weighted interrupt distribution provides two things: "spread" of weight 598 * (associated with algorithm itself) and "isolation" (associated with a 599 * particular device weight). A redistribution is what provides optimal 600 * "isolation" of heavy weight interrupts, optimal "spread" of weight 601 * (relative to what came before) is always occurring. 602 * 603 * An interrupt weight is a subjective number that represents the 604 * percentage of a CPU required to service a device's interrupts: the 605 * default weight is 0% (however the algorithm still maintains 606 * round-robin), a network interface controller (NIC) may have a large 607 * weight (35%). Interrupt weight only has meaning relative to the 608 * interrupt weight of other devices: a CPU can be weighted more than 609 * 100%, and a single device might consume more than 100% of a CPU. 610 * 611 * A coarse interrupt weight can be defined by the parent nexus driver 612 * based on bus specific information, like pci class codes. A nexus 613 * driver that supports device interrupt weighting for its children 614 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 615 * and removes the weight of a device from the CPU that an interrupt 616 * is directed at. The quality of initialization improves when the 617 * device interrupt weights more accuracy reflect actual run-time weights, 618 * and as the assignments are ordered from is heavy to light. 619 * 620 * The implementation also supports interrupt weight being specified in 621 * driver.conf files via the property "ddi-intr-weight", which takes 622 * precedence over the nexus supplied weight. This support is added to 623 * permit possible tweaking in the product in response to customer 624 * problems. This is not a formal or committed interface. 625 * 626 * While a weighted approach chooses the CPU providing the best spread 627 * given past weights, less than optimal isolation can result in cases 628 * where heavy weight devices show up last. The nexus driver's interrupt 629 * redistribution logic should use intr_dist_add/rem_weighted so that 630 * interrupts can be redistributed heavy first for optimal isolation. 631 */ 632 uint32_t 633 intr_dist_cpuid(void) 634 { 635 static struct cpu *curr_cpu; 636 struct cpu *start_cpu; 637 struct cpu *new_cpu; 638 struct cpu *cp; 639 int cpuid = -1; 640 641 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 642 mutex_enter(&intr_dist_cpu_lock); 643 644 switch (intr_policy) { 645 case INTR_CURRENT_CPU: 646 cpuid = CPU->cpu_id; 647 break; 648 649 case INTR_BOOT_CPU: 650 panic("INTR_BOOT_CPU no longer supported."); 651 /*NOTREACHED*/ 652 653 case INTR_FLAT_DIST: 654 case INTR_WEIGHTED_DIST: 655 default: 656 /* 657 * Ensure that curr_cpu is valid - cpu_next will be NULL if 658 * the cpu has been deleted (cpu structs are never freed). 659 */ 660 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 661 curr_cpu = CPU; 662 663 /* 664 * Advance to online CPU after curr_cpu (round-robin). For 665 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 666 * weight. For a nexus that does not support weight the 667 * default weight of zero is used. We degrade to round-robin 668 * behavior among equal weightes. The default weight is zero 669 * and round-robin behavior continues. 670 * 671 * Disable preemption while traversing cpu_next_onln to 672 * ensure the list does not change. This works because 673 * modifiers of this list and other lists in a struct cpu 674 * call pause_cpus() before making changes. 675 */ 676 kpreempt_disable(); 677 cp = start_cpu = curr_cpu->cpu_next_onln; 678 new_cpu = NULL; 679 do { 680 /* Skip CPUs with interrupts disabled */ 681 if ((cp->cpu_flags & CPU_ENABLE) == 0) 682 continue; 683 684 if (intr_policy == INTR_FLAT_DIST) { 685 /* select CPU */ 686 new_cpu = cp; 687 break; 688 } else if ((new_cpu == NULL) || 689 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 690 /* Choose if lighter weight */ 691 new_cpu = cp; 692 } 693 } while ((cp = cp->cpu_next_onln) != start_cpu); 694 ASSERT(new_cpu); 695 cpuid = new_cpu->cpu_id; 696 697 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 698 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 699 700 /* update static pointer for next round-robin */ 701 curr_cpu = new_cpu; 702 kpreempt_enable(); 703 break; 704 } 705 mutex_exit(&intr_dist_cpu_lock); 706 return (cpuid); 707 } 708 709 /* 710 * Add or remove the the weight of a device from a CPUs interrupt weight. 711 * 712 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 713 * their children to improve the overall quality of interrupt initialization. 714 * 715 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 716 * among multiple devices (sharing ino) then the nexus should call 717 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 718 * that share must specify the same cpuid. 719 * 720 * If a nexus driver is unable to determine the cpu at remove_intr time 721 * for some of its interrupts, then it should not call add_device_weight - 722 * intr_dist_cpuid will still provide round-robin. 723 * 724 * An established device weight (from dev_info node) takes precedence over 725 * the weight passed in. If a device weight is not already established 726 * then the passed in nexus weight is established. 727 */ 728 void 729 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 730 dev_info_t *dip, int32_t nweight) 731 { 732 int32_t eweight; 733 734 /* 735 * For non-weighted policy everything has weight of zero (and we get 736 * round-robin distribution from intr_dist_cpuid). 737 * NB: intr_policy is limited to this file. A weighted nexus driver is 738 * calls this rouitne even if intr_policy has been patched to 739 * INTR_FLAG_DIST. 740 */ 741 ASSERT(dip); 742 if (intr_policy != INTR_WEIGHTED_DIST) 743 return; 744 745 eweight = i_ddi_get_intr_weight(dip); 746 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 747 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 748 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 749 ddi_get_instance(ddi_get_parent(dip)), 750 ddi_driver_name(dip), ddi_get_instance(dip))); 751 752 /* if no establish weight, establish nexus weight */ 753 if (eweight < 0) { 754 if (nweight > 0) 755 (void) i_ddi_set_intr_weight(dip, nweight); 756 else 757 nweight = 0; 758 } else 759 nweight = eweight; /* use established weight */ 760 761 /* Establish exclusion for cpu_intr_weight manipulation */ 762 mutex_enter(&intr_dist_cpu_lock); 763 cpu[cpuid]->cpu_intr_weight += nweight; 764 765 /* update intr_dist_weight_max */ 766 if (nweight > intr_dist_weight_max) 767 intr_dist_weight_max = nweight; 768 mutex_exit(&intr_dist_cpu_lock); 769 } 770 771 void 772 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 773 { 774 struct cpu *cp; 775 int32_t weight; 776 777 ASSERT(dip); 778 if (intr_policy != INTR_WEIGHTED_DIST) 779 return; 780 781 /* remove weight of device from cpu */ 782 weight = i_ddi_get_intr_weight(dip); 783 if (weight < 0) 784 weight = 0; 785 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 786 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 787 ddi_driver_name(ddi_get_parent(dip)), 788 ddi_get_instance(ddi_get_parent(dip)), 789 ddi_driver_name(dip), ddi_get_instance(dip))); 790 791 /* Establish exclusion for cpu_intr_weight manipulation */ 792 mutex_enter(&intr_dist_cpu_lock); 793 cp = cpu[cpuid]; 794 cp->cpu_intr_weight -= weight; 795 if (cp->cpu_intr_weight < 0) 796 cp->cpu_intr_weight = 0; /* sanity */ 797 mutex_exit(&intr_dist_cpu_lock); 798 } 799