1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/sysmacros.h> 30 #include <sys/stack.h> 31 #include <sys/cpuvar.h> 32 #include <sys/ivintr.h> 33 #include <sys/intreg.h> 34 #include <sys/membar.h> 35 #include <sys/kmem.h> 36 #include <sys/intr.h> 37 #include <sys/sunndi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/privregs.h> 40 #include <sys/systm.h> 41 #include <sys/archsystm.h> 42 #include <sys/machsystm.h> 43 #include <sys/x_call.h> 44 #include <vm/seg_kp.h> 45 #include <sys/debug.h> 46 #include <sys/cyclic.h> 47 48 #include <sys/cpu_sgnblk_defs.h> 49 50 kmutex_t soft_iv_lock; /* protect software interrupt vector table */ 51 /* Global locks which protect the interrupt distribution lists */ 52 static kmutex_t intr_dist_lock; 53 static kmutex_t intr_dist_cpu_lock; 54 55 /* Head of the interrupt distribution lists */ 56 static struct intr_dist *intr_dist_head = NULL; 57 static struct intr_dist *intr_dist_whead = NULL; 58 59 uint_t swinum_base; 60 uint_t maxswinum; 61 uint_t siron_inum; 62 uint_t poke_cpu_inum; 63 /* 64 * Note:- 65 * siron_pending was originally created to prevent a resource over consumption 66 * bug in setsoftint(exhaustion of interrupt pool free list). 67 * It's original intention is obsolete with the use of iv_pending in 68 * setsoftint. However, siron_pending stayed around, acting as a second 69 * gatekeeper preventing soft interrupts from being queued. In this capacity, 70 * it can lead to hangs on MP systems, where due to global visibility issues 71 * it can end up set while iv_pending is reset, preventing soft interrupts from 72 * ever being processed. In addition to its gatekeeper role, init_intr also 73 * uses it to flag the situation where siron() was called before siron_inum has 74 * been defined. 75 * 76 * siron() does not need an extra gatekeeper; any cpu that wishes should be 77 * allowed to queue a soft interrupt. It is softint()'s job to ensure 78 * correct handling of the queues. Therefore, siron_pending has been 79 * stripped of its gatekeeper task, retaining only its intr_init job, where 80 * it indicates that there is a pending need to call siron(). 81 */ 82 int siron_pending; 83 84 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 85 int intr_dist_debug = 0; 86 int32_t intr_dist_weight_max = 1; 87 int32_t intr_dist_weight_maxmax = 1000; 88 int intr_dist_weight_maxfactor = 2; 89 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 90 91 static void sw_ivintr_init(cpu_t *); 92 93 /* 94 * intr_init() - interrupt initialization 95 * Initialize the system's software interrupt vector table and 96 * CPU's interrupt free list 97 */ 98 void 99 intr_init(cpu_t *cp) 100 { 101 init_ivintr(); 102 sw_ivintr_init(cp); 103 init_intr_pool(cp); 104 105 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 106 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 107 108 /* 109 * A soft interrupt may have been requested prior to the initialization 110 * of soft interrupts. Soft interrupts can't be dispatched until after 111 * init_intr_pool, so we have to wait until now before we can dispatch 112 * the pending soft interrupt (if any). 113 */ 114 if (siron_pending) { 115 siron_pending = 0; 116 siron(); 117 } 118 } 119 120 /* 121 * poke_cpu_intr - fall through when poke_cpu calls 122 */ 123 124 /* ARGSUSED */ 125 uint_t 126 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 127 { 128 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 129 membar_stld_stst(); 130 return (1); 131 } 132 133 /* 134 * sw_ivintr_init() - software interrupt vector initialization 135 * called after CPU is active 136 * the software interrupt vector table is part of the intr_vector[] 137 */ 138 static void 139 sw_ivintr_init(cpu_t *cp) 140 { 141 extern uint_t softlevel1(); 142 143 mutex_init(&soft_iv_lock, NULL, MUTEX_DEFAULT, NULL); 144 145 swinum_base = SOFTIVNUM; 146 147 /* 148 * the maximum software interrupt == MAX_SOFT_INO 149 */ 150 maxswinum = swinum_base + MAX_SOFT_INO; 151 152 REGISTER_BBUS_INTR(); 153 154 siron_inum = add_softintr(PIL_1, softlevel1, 0); 155 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0); 156 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 157 } 158 159 cpuset_t intr_add_pools_inuse; 160 161 /* 162 * cleanup_intr_pool() 163 * Free up the extra intr request pool for this cpu. 164 */ 165 void 166 cleanup_intr_pool(cpu_t *cp) 167 { 168 extern struct intr_req *intr_add_head; 169 int poolno; 170 struct intr_req *pool; 171 172 poolno = cp->cpu_m.intr_pool_added; 173 if (poolno >= 0) { 174 cp->cpu_m.intr_pool_added = -1; 175 pool = (poolno * INTR_PENDING_MAX * intr_add_pools) + 176 177 intr_add_head; /* not byte arithmetic */ 178 bzero(pool, INTR_PENDING_MAX * intr_add_pools * 179 sizeof (struct intr_req)); 180 181 CPUSET_DEL(intr_add_pools_inuse, poolno); 182 } 183 } 184 185 /* 186 * init_intr_pool() 187 * initialize the intr request pool for the cpu 188 * should be called for each cpu 189 */ 190 void 191 init_intr_pool(cpu_t *cp) 192 { 193 extern struct intr_req *intr_add_head; 194 #ifdef DEBUG 195 extern struct intr_req *intr_add_tail; 196 #endif /* DEBUG */ 197 int i, pool; 198 199 cp->cpu_m.intr_pool_added = -1; 200 201 for (i = 0; i < INTR_PENDING_MAX-1; i++) { 202 cp->cpu_m.intr_pool[i].intr_next = 203 &cp->cpu_m.intr_pool[i+1]; 204 } 205 cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = NULL; 206 207 cp->cpu_m.intr_head[0] = &cp->cpu_m.intr_pool[0]; 208 cp->cpu_m.intr_tail[0] = &cp->cpu_m.intr_pool[INTR_PENDING_MAX-1]; 209 210 if (intr_add_pools != 0) { 211 212 /* 213 * If additional interrupt pools have been allocated, 214 * initialize those too and add them to the free list. 215 */ 216 217 struct intr_req *trace; 218 219 for (pool = 0; pool < max_ncpus; pool++) { 220 if (!(CPU_IN_SET(intr_add_pools_inuse, pool))) 221 break; 222 } 223 if (pool >= max_ncpus) { 224 /* 225 * XXX - intr pools are alloc'd, just not as 226 * much as we would like. 227 */ 228 cmn_err(CE_WARN, "Failed to alloc all requested intr " 229 "pools for cpu%d", cp->cpu_id); 230 return; 231 } 232 CPUSET_ADD(intr_add_pools_inuse, pool); 233 cp->cpu_m.intr_pool_added = pool; 234 235 trace = (pool * INTR_PENDING_MAX * intr_add_pools) + 236 intr_add_head; /* not byte arithmetic */ 237 238 cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = trace; 239 240 for (i = 1; i < intr_add_pools * INTR_PENDING_MAX; i++, trace++) 241 trace->intr_next = trace + 1; 242 trace->intr_next = NULL; 243 244 ASSERT(trace >= intr_add_head && trace <= intr_add_tail); 245 246 cp->cpu_m.intr_tail[0] = trace; 247 } 248 } 249 250 251 /* 252 * siron - primitive for sun/os/softint.c 253 */ 254 void 255 siron(void) 256 { 257 if (siron_inum != 0) 258 setsoftint(siron_inum); 259 else 260 siron_pending = 1; 261 } 262 263 /* 264 * no_ivintr() 265 * called by vec_interrupt() through sys_trap() 266 * vector interrupt received but not valid or not 267 * registered in intr_vector[] 268 * considered as a spurious mondo interrupt 269 */ 270 /* ARGSUSED */ 271 void 272 no_ivintr(struct regs *rp, int inum, int pil) 273 { 274 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 275 inum, pil); 276 277 278 #ifdef DEBUG_VEC_INTR 279 prom_enter_mon(); 280 #endif /* DEBUG_VEC_INTR */ 281 } 282 283 /* 284 * no_intr_pool() 285 * called by vec_interrupt() through sys_trap() 286 * vector interrupt received but no intr_req entries 287 */ 288 /* ARGSUSED */ 289 void 290 no_intr_pool(struct regs *rp, int inum, int pil) 291 { 292 #ifdef DEBUG_VEC_INTR 293 cmn_err(CE_WARN, "intr_req pool empty: num 0x%x, pil 0x%x", 294 inum, pil); 295 prom_enter_mon(); 296 #else 297 cmn_err(CE_PANIC, "intr_req pool empty: num 0x%x, pil 0x%x", 298 inum, pil); 299 #endif /* DEBUG_VEC_INTR */ 300 } 301 302 void 303 intr_dequeue_req(uint_t pil, uint32_t inum) 304 { 305 struct intr_req *ir, *prev; 306 struct machcpu *mcpu; 307 uint32_t clr; 308 extern uint_t getpstate(void); 309 310 ASSERT((getpstate() & PSTATE_IE) == 0); 311 312 mcpu = &CPU->cpu_m; 313 314 /* Find a matching entry in the list */ 315 prev = NULL; 316 ir = mcpu->intr_head[pil]; 317 while (ir != NULL) { 318 if (ir->intr_number == inum) 319 break; 320 prev = ir; 321 ir = ir->intr_next; 322 } 323 if (ir != NULL) { 324 /* 325 * Remove entry from list 326 */ 327 if (prev != NULL) 328 prev->intr_next = ir->intr_next; /* non-head */ 329 else 330 mcpu->intr_head[pil] = ir->intr_next; /* head */ 331 332 if (ir->intr_next == NULL) 333 mcpu->intr_tail[pil] = prev; /* tail */ 334 335 /* 336 * Place on free list 337 */ 338 ir->intr_next = mcpu->intr_head[0]; 339 mcpu->intr_head[0] = ir; 340 } 341 342 /* 343 * clear pending interrupts at this level if the list is empty 344 */ 345 if (mcpu->intr_head[pil] == NULL) { 346 clr = 1 << pil; 347 if (pil == PIL_14) 348 clr |= (TICK_INT_MASK | STICK_INT_MASK); 349 wr_clr_softint(clr); 350 } 351 } 352 353 354 /* 355 * Send a directed interrupt of specified interrupt number id to a cpu. 356 */ 357 void 358 send_dirint( 359 int cpuix, /* cpu to be interrupted */ 360 int intr_id) /* interrupt number id */ 361 { 362 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 363 } 364 365 void 366 init_intr_threads(struct cpu *cp) 367 { 368 int i; 369 370 for (i = 0; i < NINTR_THREADS; i++) 371 thread_create_intr(cp); 372 373 cp->cpu_intr_stack = (caddr_t)segkp_get(segkp, INTR_STACK_SIZE, 374 KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED) + 375 INTR_STACK_SIZE - SA(MINFRAME); 376 } 377 378 /* 379 * Take the specified CPU out of participation in interrupts. 380 * Called by p_online(2) when a processor is being taken off-line. 381 * This allows interrupt threads being handled on the processor to 382 * complete before the processor is idled. 383 */ 384 int 385 cpu_disable_intr(struct cpu *cp) 386 { 387 ASSERT(MUTEX_HELD(&cpu_lock)); 388 389 /* 390 * Turn off the CPU_ENABLE flag before calling the redistribution 391 * function, since it checks for this in the cpu flags. 392 */ 393 cp->cpu_flags &= ~CPU_ENABLE; 394 395 intr_redist_all_cpus(); 396 397 return (0); 398 } 399 400 /* 401 * Allow the specified CPU to participate in interrupts. 402 * Called by p_online(2) if a processor could not be taken off-line 403 * because of bound threads, in order to resume processing interrupts. 404 * Also called after starting a processor. 405 */ 406 void 407 cpu_enable_intr(struct cpu *cp) 408 { 409 ASSERT(MUTEX_HELD(&cpu_lock)); 410 411 cp->cpu_flags |= CPU_ENABLE; 412 413 intr_redist_all_cpus(); 414 } 415 416 /* 417 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 418 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 419 * are issued to redirect interrupts of a specified weight, from heavy to 420 * light. This allows all the interrupts of a given weight to be redistributed 421 * for all weighted nexus drivers prior to those of less weight. 422 */ 423 static void 424 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 425 { 426 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 427 struct intr_dist *iptr; 428 struct intr_dist **pptr; 429 430 ASSERT(func); 431 new->func = func; 432 new->arg = arg; 433 new->next = NULL; 434 435 /* Add to tail so that redistribution occurs in original order. */ 436 mutex_enter(&intr_dist_lock); 437 for (iptr = *phead, pptr = phead; iptr != NULL; 438 pptr = &iptr->next, iptr = iptr->next) { 439 /* check for problems as we locate the tail */ 440 if ((iptr->func == func) && (iptr->arg == arg)) { 441 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 442 /*NOTREACHED*/ 443 } 444 } 445 *pptr = new; 446 447 mutex_exit(&intr_dist_lock); 448 } 449 450 void 451 intr_dist_add(void (*func)(void *), void *arg) 452 { 453 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 454 } 455 456 void 457 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 458 { 459 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 460 } 461 462 /* 463 * Search for the interrupt distribution structure with the specified 464 * mondo vec reg in the interrupt distribution list. If a match is found, 465 * then delete the entry from the list. The caller is responsible for 466 * modifying the mondo vector registers. 467 */ 468 static void 469 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 470 { 471 struct intr_dist *iptr; 472 struct intr_dist **vect; 473 474 mutex_enter(&intr_dist_lock); 475 for (iptr = *headp, vect = headp; 476 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 477 if ((iptr->func == func) && (iptr->arg == arg)) { 478 *vect = iptr->next; 479 kmem_free(iptr, sizeof (struct intr_dist)); 480 mutex_exit(&intr_dist_lock); 481 return; 482 } 483 } 484 485 if (!panicstr) 486 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 487 mutex_exit(&intr_dist_lock); 488 } 489 490 void 491 intr_dist_rem(void (*func)(void *), void *arg) 492 { 493 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 494 } 495 496 void 497 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 498 { 499 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 500 } 501 502 /* 503 * Initiate interrupt redistribution. Redistribution improves the isolation 504 * associated with interrupt weights by ordering operations from heavy weight 505 * to light weight. When a CPUs orientation changes relative to interrupts, 506 * there is *always* a redistribution to accommodate this change (call to 507 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 508 * that a redistribution could improve the quality of an initialization. For 509 * example, if you are not using a NIC it may not be attached with s10 (devfs). 510 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 511 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 512 * occurring late, so optimal "isolation" relative to weight is not occurring. 513 * The same applies to detach, although in this case doing the redistribution 514 * might improve "spread" for medium weight devices since the "isolation" of 515 * a higher weight device may no longer be present. 516 * 517 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 518 * 519 * NB: There is risk associated with automatically triggering execution of the 520 * redistribution code at arbitrary times. The risk comes from the fact that 521 * there is a lot of low-level hardware interaction associated with a 522 * redistribution. At some point we may want this code to perform automatic 523 * redistribution (redistribution thread; trigger timeout when add/remove 524 * weight delta is large enough, and call cv_signal from timeout - causing 525 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 526 * risky at this time. 527 */ 528 void 529 i_ddi_intr_redist_all_cpus() 530 { 531 mutex_enter(&cpu_lock); 532 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 533 intr_redist_all_cpus(); 534 mutex_exit(&cpu_lock); 535 } 536 537 /* 538 * Redistribute all interrupts 539 * 540 * This function redistributes all interrupting devices, running the 541 * parent callback functions for each node. 542 */ 543 void 544 intr_redist_all_cpus(void) 545 { 546 struct cpu *cp; 547 struct intr_dist *iptr; 548 int32_t weight, max_weight; 549 550 ASSERT(MUTEX_HELD(&cpu_lock)); 551 mutex_enter(&intr_dist_lock); 552 553 /* 554 * zero cpu_intr_weight on all cpus - it is safe to traverse 555 * cpu_list since we hold cpu_lock. 556 */ 557 cp = cpu_list; 558 do { 559 cp->cpu_intr_weight = 0; 560 } while ((cp = cp->cpu_next) != cpu_list); 561 562 /* 563 * Assume that this redistribution may encounter a device weight 564 * via driver.conf tuning of "ddi-intr-weight" that is at most 565 * intr_dist_weight_maxfactor times larger. 566 */ 567 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 568 if (max_weight > intr_dist_weight_maxmax) 569 max_weight = intr_dist_weight_maxmax; 570 intr_dist_weight_max = 1; 571 572 INTR_DEBUG((CE_CONT, "intr_dist: " 573 "intr_redist_all_cpus: %d-0\n", max_weight)); 574 575 /* 576 * Redistribute weighted, from heavy to light. The callback that 577 * specifies a weight equal to weight_max should redirect all 578 * interrupts of weight weight_max or greater [weight_max, inf.). 579 * Interrupts of lesser weight should be processed on the call with 580 * the matching weight. This allows all the heaver weight interrupts 581 * on all weighted busses (multiple pci busses) to be redirected prior 582 * to any lesser weight interrupts. 583 */ 584 for (weight = max_weight; weight >= 0; weight--) 585 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 586 ((void (*)(void *, int32_t, int32_t))iptr->func) 587 (iptr->arg, max_weight, weight); 588 589 /* redistribute normal (non-weighted) interrupts */ 590 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 591 ((void (*)(void *))iptr->func)(iptr->arg); 592 mutex_exit(&intr_dist_lock); 593 } 594 595 void 596 intr_redist_all_cpus_shutdown(void) 597 { 598 intr_policy = INTR_CURRENT_CPU; 599 intr_redist_all_cpus(); 600 } 601 602 /* 603 * Determine what CPU to target, based on interrupt policy. 604 * 605 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 606 * advance through interrupt enabled cpus (round-robin). 607 * 608 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 609 * cpu_intr_weight, round robin when all equal. 610 * 611 * Weighted interrupt distribution provides two things: "spread" of weight 612 * (associated with algorithm itself) and "isolation" (associated with a 613 * particular device weight). A redistribution is what provides optimal 614 * "isolation" of heavy weight interrupts, optimal "spread" of weight 615 * (relative to what came before) is always occurring. 616 * 617 * An interrupt weight is a subjective number that represents the 618 * percentage of a CPU required to service a device's interrupts: the 619 * default weight is 0% (however the algorithm still maintains 620 * round-robin), a network interface controller (NIC) may have a large 621 * weight (35%). Interrupt weight only has meaning relative to the 622 * interrupt weight of other devices: a CPU can be weighted more than 623 * 100%, and a single device might consume more than 100% of a CPU. 624 * 625 * A coarse interrupt weight can be defined by the parent nexus driver 626 * based on bus specific information, like pci class codes. A nexus 627 * driver that supports device interrupt weighting for its children 628 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 629 * and removes the weight of a device from the CPU that an interrupt 630 * is directed at. The quality of initialization improves when the 631 * device interrupt weights more accuracy reflect actual run-time weights, 632 * and as the assignments are ordered from is heavy to light. 633 * 634 * The implementation also supports interrupt weight being specified in 635 * driver.conf files via the property "ddi-intr-weight", which takes 636 * precedence over the nexus supplied weight. This support is added to 637 * permit possible tweaking in the product in response to customer 638 * problems. This is not a formal or committed interface. 639 * 640 * While a weighted approach chooses the CPU providing the best spread 641 * given past weights, less than optimal isolation can result in cases 642 * where heavy weight devices show up last. The nexus driver's interrupt 643 * redistribution logic should use intr_dist_add/rem_weighted so that 644 * interrupts can be redistributed heavy first for optimal isolation. 645 */ 646 uint32_t 647 intr_dist_cpuid(void) 648 { 649 static struct cpu *curr_cpu; 650 struct cpu *start_cpu; 651 struct cpu *new_cpu; 652 struct cpu *cp; 653 int cpuid = -1; 654 655 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 656 mutex_enter(&intr_dist_cpu_lock); 657 658 switch (intr_policy) { 659 case INTR_CURRENT_CPU: 660 cpuid = CPU->cpu_id; 661 break; 662 663 case INTR_BOOT_CPU: 664 panic("INTR_BOOT_CPU no longer supported."); 665 /*NOTREACHED*/ 666 667 case INTR_FLAT_DIST: 668 case INTR_WEIGHTED_DIST: 669 default: 670 /* 671 * Ensure that curr_cpu is valid - cpu_next will be NULL if 672 * the cpu has been deleted (cpu structs are never freed). 673 */ 674 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 675 curr_cpu = CPU; 676 677 /* 678 * Advance to online CPU after curr_cpu (round-robin). For 679 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 680 * weight. For a nexus that does not support weight the 681 * default weight of zero is used. We degrade to round-robin 682 * behavior among equal weightes. The default weight is zero 683 * and round-robin behavior continues. 684 * 685 * Disable preemption while traversing cpu_next_onln to 686 * ensure the list does not change. This works because 687 * modifiers of this list and other lists in a struct cpu 688 * call pause_cpus() before making changes. 689 */ 690 kpreempt_disable(); 691 cp = start_cpu = curr_cpu->cpu_next_onln; 692 new_cpu = NULL; 693 do { 694 /* Skip CPUs with interrupts disabled */ 695 if ((cp->cpu_flags & CPU_ENABLE) == 0) 696 continue; 697 698 if (intr_policy == INTR_FLAT_DIST) { 699 /* select CPU */ 700 new_cpu = cp; 701 break; 702 } else if ((new_cpu == NULL) || 703 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 704 /* Choose if lighter weight */ 705 new_cpu = cp; 706 } 707 } while ((cp = cp->cpu_next_onln) != start_cpu); 708 ASSERT(new_cpu); 709 cpuid = new_cpu->cpu_id; 710 711 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 712 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 713 714 /* update static pointer for next round-robin */ 715 curr_cpu = new_cpu; 716 kpreempt_enable(); 717 break; 718 } 719 mutex_exit(&intr_dist_cpu_lock); 720 return (cpuid); 721 } 722 723 /* 724 * Add or remove the the weight of a device from a CPUs interrupt weight. 725 * 726 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 727 * their children to improve the overall quality of interrupt initialization. 728 * 729 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 730 * among multiple devices (sharing ino) then the nexus should call 731 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 732 * that share must specify the same cpuid. 733 * 734 * If a nexus driver is unable to determine the cpu at remove_intr time 735 * for some of its interrupts, then it should not call add_device_weight - 736 * intr_dist_cpuid will still provide round-robin. 737 * 738 * An established device weight (from dev_info node) takes precedence over 739 * the weight passed in. If a device weight is not already established 740 * then the passed in nexus weight is established. 741 */ 742 void 743 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 744 dev_info_t *dip, int32_t nweight) 745 { 746 int32_t eweight; 747 748 /* 749 * For non-weighted policy everything has weight of zero (and we get 750 * round-robin distribution from intr_dist_cpuid). 751 * NB: intr_policy is limited to this file. A weighted nexus driver is 752 * calls this rouitne even if intr_policy has been patched to 753 * INTR_FLAG_DIST. 754 */ 755 ASSERT(dip); 756 if (intr_policy != INTR_WEIGHTED_DIST) 757 return; 758 759 eweight = i_ddi_get_intr_weight(dip); 760 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 761 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 762 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 763 ddi_get_instance(ddi_get_parent(dip)), 764 ddi_driver_name(dip), ddi_get_instance(dip))); 765 766 /* if no establish weight, establish nexus weight */ 767 if (eweight < 0) { 768 if (nweight > 0) 769 (void) i_ddi_set_intr_weight(dip, nweight); 770 else 771 nweight = 0; 772 } else 773 nweight = eweight; /* use established weight */ 774 775 /* Establish exclusion for cpu_intr_weight manipulation */ 776 mutex_enter(&intr_dist_cpu_lock); 777 cpu[cpuid]->cpu_intr_weight += nweight; 778 779 /* update intr_dist_weight_max */ 780 if (nweight > intr_dist_weight_max) 781 intr_dist_weight_max = nweight; 782 mutex_exit(&intr_dist_cpu_lock); 783 } 784 785 void 786 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 787 { 788 struct cpu *cp; 789 int32_t weight; 790 791 ASSERT(dip); 792 if (intr_policy != INTR_WEIGHTED_DIST) 793 return; 794 795 /* remove weight of device from cpu */ 796 weight = i_ddi_get_intr_weight(dip); 797 if (weight < 0) 798 weight = 0; 799 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 800 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 801 ddi_driver_name(ddi_get_parent(dip)), 802 ddi_get_instance(ddi_get_parent(dip)), 803 ddi_driver_name(dip), ddi_get_instance(dip))); 804 805 /* Establish exclusion for cpu_intr_weight manipulation */ 806 mutex_enter(&intr_dist_cpu_lock); 807 cp = cpu[cpuid]; 808 cp->cpu_intr_weight -= weight; 809 if (cp->cpu_intr_weight < 0) 810 cp->cpu_intr_weight = 0; /* sanity */ 811 mutex_exit(&intr_dist_cpu_lock); 812 } 813