1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/sysmacros.h> 30 #include <sys/stack.h> 31 #include <sys/cpuvar.h> 32 #include <sys/ivintr.h> 33 #include <sys/intreg.h> 34 #include <sys/membar.h> 35 #include <sys/kmem.h> 36 #include <sys/intr.h> 37 #include <sys/sunndi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/privregs.h> 40 #include <sys/systm.h> 41 #include <sys/archsystm.h> 42 #include <sys/machsystm.h> 43 #include <sys/x_call.h> 44 #include <vm/seg_kp.h> 45 #include <sys/debug.h> 46 #include <sys/cyclic.h> 47 48 #include <sys/cpu_sgnblk_defs.h> 49 50 kmutex_t soft_iv_lock; /* protect software interrupt vector table */ 51 /* Global locks which protect the interrupt distribution lists */ 52 static kmutex_t intr_dist_lock; 53 static kmutex_t intr_dist_cpu_lock; 54 55 /* Head of the interrupt distribution lists */ 56 static struct intr_dist *intr_dist_head = NULL; 57 static struct intr_dist *intr_dist_whead = NULL; 58 59 uint_t swinum_base; 60 uint_t maxswinum; 61 uint_t siron_inum; 62 uint_t poke_cpu_inum; 63 int siron_pending; 64 65 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 66 int intr_dist_debug = 0; 67 int32_t intr_dist_weight_max = 1; 68 int32_t intr_dist_weight_maxmax = 1000; 69 int intr_dist_weight_maxfactor = 2; 70 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 71 72 static void sw_ivintr_init(cpu_t *); 73 74 /* 75 * intr_init() - interrupt initialization 76 * Initialize the system's software interrupt vector table and 77 * CPU's interrupt free list 78 */ 79 void 80 intr_init(cpu_t *cp) 81 { 82 init_ivintr(); 83 sw_ivintr_init(cp); 84 init_intr_pool(cp); 85 86 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 87 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 88 89 /* 90 * A soft interrupt may have been requested prior to the initialization 91 * of soft interrupts. Soft interrupts can't be dispatched until after 92 * init_intr_pool, so we have to wait until now before we can dispatch 93 * the pending soft interrupt (if any). 94 */ 95 if (siron_pending) 96 setsoftint(siron_inum); 97 } 98 99 /* 100 * poke_cpu_intr - fall through when poke_cpu calls 101 */ 102 103 /* ARGSUSED */ 104 uint_t 105 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 106 { 107 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 108 membar_stld_stst(); 109 return (1); 110 } 111 112 /* 113 * sw_ivintr_init() - software interrupt vector initialization 114 * called after CPU is active 115 * the software interrupt vector table is part of the intr_vector[] 116 */ 117 static void 118 sw_ivintr_init(cpu_t *cp) 119 { 120 extern uint_t softlevel1(); 121 122 mutex_init(&soft_iv_lock, NULL, MUTEX_DEFAULT, NULL); 123 124 swinum_base = SOFTIVNUM; 125 126 /* 127 * the maximum software interrupt == MAX_SOFT_INO 128 */ 129 maxswinum = swinum_base + MAX_SOFT_INO; 130 131 REGISTER_BBUS_INTR(); 132 133 siron_inum = add_softintr(PIL_1, softlevel1, 0); 134 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0); 135 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 136 } 137 138 cpuset_t intr_add_pools_inuse; 139 140 /* 141 * cleanup_intr_pool() 142 * Free up the extra intr request pool for this cpu. 143 */ 144 void 145 cleanup_intr_pool(cpu_t *cp) 146 { 147 extern struct intr_req *intr_add_head; 148 int poolno; 149 struct intr_req *pool; 150 151 poolno = cp->cpu_m.intr_pool_added; 152 if (poolno >= 0) { 153 cp->cpu_m.intr_pool_added = -1; 154 pool = (poolno * INTR_PENDING_MAX * intr_add_pools) + 155 156 intr_add_head; /* not byte arithmetic */ 157 bzero(pool, INTR_PENDING_MAX * intr_add_pools * 158 sizeof (struct intr_req)); 159 160 CPUSET_DEL(intr_add_pools_inuse, poolno); 161 } 162 } 163 164 /* 165 * init_intr_pool() 166 * initialize the intr request pool for the cpu 167 * should be called for each cpu 168 */ 169 void 170 init_intr_pool(cpu_t *cp) 171 { 172 extern struct intr_req *intr_add_head; 173 #ifdef DEBUG 174 extern struct intr_req *intr_add_tail; 175 #endif /* DEBUG */ 176 int i, pool; 177 178 cp->cpu_m.intr_pool_added = -1; 179 180 for (i = 0; i < INTR_PENDING_MAX-1; i++) { 181 cp->cpu_m.intr_pool[i].intr_next = 182 &cp->cpu_m.intr_pool[i+1]; 183 } 184 cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = NULL; 185 186 cp->cpu_m.intr_head[0] = &cp->cpu_m.intr_pool[0]; 187 cp->cpu_m.intr_tail[0] = &cp->cpu_m.intr_pool[INTR_PENDING_MAX-1]; 188 189 if (intr_add_pools != 0) { 190 191 /* 192 * If additional interrupt pools have been allocated, 193 * initialize those too and add them to the free list. 194 */ 195 196 struct intr_req *trace; 197 198 for (pool = 0; pool < max_ncpus; pool++) { 199 if (!(CPU_IN_SET(intr_add_pools_inuse, pool))) 200 break; 201 } 202 if (pool >= max_ncpus) { 203 /* 204 * XXX - intr pools are alloc'd, just not as 205 * much as we would like. 206 */ 207 cmn_err(CE_WARN, "Failed to alloc all requested intr " 208 "pools for cpu%d", cp->cpu_id); 209 return; 210 } 211 CPUSET_ADD(intr_add_pools_inuse, pool); 212 cp->cpu_m.intr_pool_added = pool; 213 214 trace = (pool * INTR_PENDING_MAX * intr_add_pools) + 215 intr_add_head; /* not byte arithmetic */ 216 217 cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = trace; 218 219 for (i = 1; i < intr_add_pools * INTR_PENDING_MAX; i++, trace++) 220 trace->intr_next = trace + 1; 221 trace->intr_next = NULL; 222 223 ASSERT(trace >= intr_add_head && trace <= intr_add_tail); 224 225 cp->cpu_m.intr_tail[0] = trace; 226 } 227 } 228 229 230 /* 231 * siron - primitive for sun/os/softint.c 232 */ 233 void 234 siron(void) 235 { 236 if (!siron_pending) { 237 siron_pending = 1; 238 if (siron_inum != 0) 239 setsoftint(siron_inum); 240 } 241 } 242 243 /* 244 * no_ivintr() 245 * called by vec_interrupt() through sys_trap() 246 * vector interrupt received but not valid or not 247 * registered in intr_vector[] 248 * considered as a spurious mondo interrupt 249 */ 250 /* ARGSUSED */ 251 void 252 no_ivintr(struct regs *rp, int inum, int pil) 253 { 254 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 255 inum, pil); 256 257 258 #ifdef DEBUG_VEC_INTR 259 prom_enter_mon(); 260 #endif /* DEBUG_VEC_INTR */ 261 } 262 263 /* 264 * no_intr_pool() 265 * called by vec_interrupt() through sys_trap() 266 * vector interrupt received but no intr_req entries 267 */ 268 /* ARGSUSED */ 269 void 270 no_intr_pool(struct regs *rp, int inum, int pil) 271 { 272 #ifdef DEBUG_VEC_INTR 273 cmn_err(CE_WARN, "intr_req pool empty: num 0x%x, pil 0x%x", 274 inum, pil); 275 prom_enter_mon(); 276 #else 277 cmn_err(CE_PANIC, "intr_req pool empty: num 0x%x, pil 0x%x", 278 inum, pil); 279 #endif /* DEBUG_VEC_INTR */ 280 } 281 282 void 283 intr_dequeue_req(uint_t pil, uint32_t inum) 284 { 285 struct intr_req *ir, *prev; 286 struct machcpu *mcpu; 287 uint32_t clr; 288 extern uint_t getpstate(void); 289 290 ASSERT((getpstate() & PSTATE_IE) == 0); 291 292 mcpu = &CPU->cpu_m; 293 294 /* Find a matching entry in the list */ 295 prev = NULL; 296 ir = mcpu->intr_head[pil]; 297 while (ir != NULL) { 298 if (ir->intr_number == inum) 299 break; 300 prev = ir; 301 ir = ir->intr_next; 302 } 303 if (ir != NULL) { 304 /* 305 * Remove entry from list 306 */ 307 if (prev != NULL) 308 prev->intr_next = ir->intr_next; /* non-head */ 309 else 310 mcpu->intr_head[pil] = ir->intr_next; /* head */ 311 312 if (ir->intr_next == NULL) 313 mcpu->intr_tail[pil] = prev; /* tail */ 314 315 /* 316 * Place on free list 317 */ 318 ir->intr_next = mcpu->intr_head[0]; 319 mcpu->intr_head[0] = ir; 320 } 321 322 /* 323 * clear pending interrupts at this level if the list is empty 324 */ 325 if (mcpu->intr_head[pil] == NULL) { 326 clr = 1 << pil; 327 if (pil == PIL_14) 328 clr |= (TICK_INT_MASK | STICK_INT_MASK); 329 wr_clr_softint(clr); 330 } 331 } 332 333 334 /* 335 * Send a directed interrupt of specified interrupt number id to a cpu. 336 */ 337 void 338 send_dirint( 339 int cpuix, /* cpu to be interrupted */ 340 int intr_id) /* interrupt number id */ 341 { 342 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 343 } 344 345 void 346 init_intr_threads(struct cpu *cp) 347 { 348 int i; 349 350 for (i = 0; i < NINTR_THREADS; i++) 351 thread_create_intr(cp); 352 353 cp->cpu_intr_stack = (caddr_t)segkp_get(segkp, INTR_STACK_SIZE, 354 KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED) + 355 INTR_STACK_SIZE - SA(MINFRAME); 356 } 357 358 /* 359 * Take the specified CPU out of participation in interrupts. 360 * Called by p_online(2) when a processor is being taken off-line. 361 * This allows interrupt threads being handled on the processor to 362 * complete before the processor is idled. 363 */ 364 int 365 cpu_disable_intr(struct cpu *cp) 366 { 367 ASSERT(MUTEX_HELD(&cpu_lock)); 368 369 /* 370 * Turn off the CPU_ENABLE flag before calling the redistribution 371 * function, since it checks for this in the cpu flags. 372 */ 373 cp->cpu_flags &= ~CPU_ENABLE; 374 375 intr_redist_all_cpus(); 376 377 return (0); 378 } 379 380 /* 381 * Allow the specified CPU to participate in interrupts. 382 * Called by p_online(2) if a processor could not be taken off-line 383 * because of bound threads, in order to resume processing interrupts. 384 * Also called after starting a processor. 385 */ 386 void 387 cpu_enable_intr(struct cpu *cp) 388 { 389 ASSERT(MUTEX_HELD(&cpu_lock)); 390 391 cp->cpu_flags |= CPU_ENABLE; 392 393 intr_redist_all_cpus(); 394 } 395 396 /* 397 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 398 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 399 * are issued to redirect interrupts of a specified weight, from heavy to 400 * light. This allows all the interrupts of a given weight to be redistributed 401 * for all weighted nexus drivers prior to those of less weight. 402 */ 403 static void 404 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 405 { 406 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 407 struct intr_dist *iptr; 408 struct intr_dist **pptr; 409 410 ASSERT(func); 411 new->func = func; 412 new->arg = arg; 413 new->next = NULL; 414 415 /* Add to tail so that redistribution occurs in original order. */ 416 mutex_enter(&intr_dist_lock); 417 for (iptr = *phead, pptr = phead; iptr != NULL; 418 pptr = &iptr->next, iptr = iptr->next) { 419 /* check for problems as we locate the tail */ 420 if ((iptr->func == func) && (iptr->arg == arg)) { 421 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 422 /*NOTREACHED*/ 423 } 424 } 425 *pptr = new; 426 427 mutex_exit(&intr_dist_lock); 428 } 429 430 void 431 intr_dist_add(void (*func)(void *), void *arg) 432 { 433 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 434 } 435 436 void 437 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 438 { 439 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 440 } 441 442 /* 443 * Search for the interrupt distribution structure with the specified 444 * mondo vec reg in the interrupt distribution list. If a match is found, 445 * then delete the entry from the list. The caller is responsible for 446 * modifying the mondo vector registers. 447 */ 448 static void 449 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 450 { 451 struct intr_dist *iptr; 452 struct intr_dist **vect; 453 454 mutex_enter(&intr_dist_lock); 455 for (iptr = *headp, vect = headp; 456 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 457 if ((iptr->func == func) && (iptr->arg == arg)) { 458 *vect = iptr->next; 459 kmem_free(iptr, sizeof (struct intr_dist)); 460 mutex_exit(&intr_dist_lock); 461 return; 462 } 463 } 464 465 if (!panicstr) 466 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 467 mutex_exit(&intr_dist_lock); 468 } 469 470 void 471 intr_dist_rem(void (*func)(void *), void *arg) 472 { 473 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 474 } 475 476 void 477 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 478 { 479 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 480 } 481 482 /* 483 * Initiate interrupt redistribution. Redistribution improves the isolation 484 * associated with interrupt weights by ordering operations from heavy weight 485 * to light weight. When a CPUs orientation changes relative to interrupts, 486 * there is *always* a redistribution to accommodate this change (call to 487 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 488 * that a redistribution could improve the quality of an initialization. For 489 * example, if you are not using a NIC it may not be attached with s10 (devfs). 490 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 491 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 492 * occurring late, so optimal "isolation" relative to weight is not occurring. 493 * The same applies to detach, although in this case doing the redistribution 494 * might improve "spread" for medium weight devices since the "isolation" of 495 * a higher weight device may no longer be present. 496 * 497 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 498 * 499 * NB: There is risk associated with automatically triggering execution of the 500 * redistribution code at arbitrary times. The risk comes from the fact that 501 * there is a lot of low-level hardware interaction associated with a 502 * redistribution. At some point we may want this code to perform automatic 503 * redistribution (redistribution thread; trigger timeout when add/remove 504 * weight delta is large enough, and call cv_signal from timeout - causing 505 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 506 * risky at this time. 507 */ 508 void 509 i_ddi_intr_redist_all_cpus() 510 { 511 mutex_enter(&cpu_lock); 512 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 513 intr_redist_all_cpus(); 514 mutex_exit(&cpu_lock); 515 } 516 517 /* 518 * Redistribute all interrupts 519 * 520 * This function redistributes all interrupting devices, running the 521 * parent callback functions for each node. 522 */ 523 void 524 intr_redist_all_cpus(void) 525 { 526 struct cpu *cp; 527 struct intr_dist *iptr; 528 int32_t weight, max_weight; 529 530 ASSERT(MUTEX_HELD(&cpu_lock)); 531 mutex_enter(&intr_dist_lock); 532 533 /* 534 * zero cpu_intr_weight on all cpus - it is safe to traverse 535 * cpu_list since we hold cpu_lock. 536 */ 537 cp = cpu_list; 538 do { 539 cp->cpu_intr_weight = 0; 540 } while ((cp = cp->cpu_next) != cpu_list); 541 542 /* 543 * Assume that this redistribution may encounter a device weight 544 * via driver.conf tuning of "ddi-intr-weight" that is at most 545 * intr_dist_weight_maxfactor times larger. 546 */ 547 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 548 if (max_weight > intr_dist_weight_maxmax) 549 max_weight = intr_dist_weight_maxmax; 550 intr_dist_weight_max = 1; 551 552 INTR_DEBUG((CE_CONT, "intr_dist: " 553 "intr_redist_all_cpus: %d-0\n", max_weight)); 554 555 /* 556 * Redistribute weighted, from heavy to light. The callback that 557 * specifies a weight equal to weight_max should redirect all 558 * interrupts of weight weight_max or greater [weight_max, inf.). 559 * Interrupts of lesser weight should be processed on the call with 560 * the matching weight. This allows all the heaver weight interrupts 561 * on all weighted busses (multiple pci busses) to be redirected prior 562 * to any lesser weight interrupts. 563 */ 564 for (weight = max_weight; weight >= 0; weight--) 565 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 566 ((void (*)(void *, int32_t, int32_t))iptr->func) 567 (iptr->arg, max_weight, weight); 568 569 /* redistribute normal (non-weighted) interrupts */ 570 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 571 ((void (*)(void *))iptr->func)(iptr->arg); 572 mutex_exit(&intr_dist_lock); 573 } 574 575 void 576 intr_redist_all_cpus_shutdown(void) 577 { 578 intr_policy = INTR_CURRENT_CPU; 579 intr_redist_all_cpus(); 580 } 581 582 /* 583 * Determine what CPU to target, based on interrupt policy. 584 * 585 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 586 * advance through interrupt enabled cpus (round-robin). 587 * 588 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 589 * cpu_intr_weight, round robin when all equal. 590 * 591 * Weighted interrupt distribution provides two things: "spread" of weight 592 * (associated with algorithm itself) and "isolation" (associated with a 593 * particular device weight). A redistribution is what provides optimal 594 * "isolation" of heavy weight interrupts, optimal "spread" of weight 595 * (relative to what came before) is always occurring. 596 * 597 * An interrupt weight is a subjective number that represents the 598 * percentage of a CPU required to service a device's interrupts: the 599 * default weight is 0% (however the algorithm still maintains 600 * round-robin), a network interface controller (NIC) may have a large 601 * weight (35%). Interrupt weight only has meaning relative to the 602 * interrupt weight of other devices: a CPU can be weighted more than 603 * 100%, and a single device might consume more than 100% of a CPU. 604 * 605 * A coarse interrupt weight can be defined by the parent nexus driver 606 * based on bus specific information, like pci class codes. A nexus 607 * driver that supports device interrupt weighting for its children 608 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 609 * and removes the weight of a device from the CPU that an interrupt 610 * is directed at. The quality of initialization improves when the 611 * device interrupt weights more accuracy reflect actual run-time weights, 612 * and as the assignments are ordered from is heavy to light. 613 * 614 * The implementation also supports interrupt weight being specified in 615 * driver.conf files via the property "ddi-intr-weight", which takes 616 * precedence over the nexus supplied weight. This support is added to 617 * permit possible tweaking in the product in response to customer 618 * problems. This is not a formal or committed interface. 619 * 620 * While a weighted approach chooses the CPU providing the best spread 621 * given past weights, less than optimal isolation can result in cases 622 * where heavy weight devices show up last. The nexus driver's interrupt 623 * redistribution logic should use intr_dist_add/rem_weighted so that 624 * interrupts can be redistributed heavy first for optimal isolation. 625 */ 626 uint32_t 627 intr_dist_cpuid(void) 628 { 629 static struct cpu *curr_cpu; 630 struct cpu *start_cpu; 631 struct cpu *new_cpu; 632 struct cpu *cp; 633 int cpuid = -1; 634 635 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 636 mutex_enter(&intr_dist_cpu_lock); 637 638 switch (intr_policy) { 639 case INTR_CURRENT_CPU: 640 cpuid = CPU->cpu_id; 641 break; 642 643 case INTR_BOOT_CPU: 644 panic("INTR_BOOT_CPU no longer supported."); 645 /*NOTREACHED*/ 646 647 case INTR_FLAT_DIST: 648 case INTR_WEIGHTED_DIST: 649 default: 650 /* 651 * Ensure that curr_cpu is valid - cpu_next will be NULL if 652 * the cpu has been deleted (cpu structs are never freed). 653 */ 654 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 655 curr_cpu = CPU; 656 657 /* 658 * Advance to online CPU after curr_cpu (round-robin). For 659 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 660 * weight. For a nexus that does not support weight the 661 * default weight of zero is used. We degrade to round-robin 662 * behavior among equal weightes. The default weight is zero 663 * and round-robin behavior continues. 664 * 665 * Disable preemption while traversing cpu_next_onln to 666 * ensure the list does not change. This works because 667 * modifiers of this list and other lists in a struct cpu 668 * call pause_cpus() before making changes. 669 */ 670 kpreempt_disable(); 671 cp = start_cpu = curr_cpu->cpu_next_onln; 672 new_cpu = NULL; 673 do { 674 /* Skip CPUs with interrupts disabled */ 675 if ((cp->cpu_flags & CPU_ENABLE) == 0) 676 continue; 677 678 if (intr_policy == INTR_FLAT_DIST) { 679 /* select CPU */ 680 new_cpu = cp; 681 break; 682 } else if ((new_cpu == NULL) || 683 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 684 /* Choose if lighter weight */ 685 new_cpu = cp; 686 } 687 } while ((cp = cp->cpu_next_onln) != start_cpu); 688 ASSERT(new_cpu); 689 cpuid = new_cpu->cpu_id; 690 691 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 692 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 693 694 /* update static pointer for next round-robin */ 695 curr_cpu = new_cpu; 696 kpreempt_enable(); 697 break; 698 } 699 mutex_exit(&intr_dist_cpu_lock); 700 return (cpuid); 701 } 702 703 /* 704 * Add or remove the the weight of a device from a CPUs interrupt weight. 705 * 706 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 707 * their children to improve the overall quality of interrupt initialization. 708 * 709 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 710 * among multiple devices (sharing ino) then the nexus should call 711 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 712 * that share must specify the same cpuid. 713 * 714 * If a nexus driver is unable to determine the cpu at remove_intr time 715 * for some of its interrupts, then it should not call add_device_weight - 716 * intr_dist_cpuid will still provide round-robin. 717 * 718 * An established device weight (from dev_info node) takes precedence over 719 * the weight passed in. If a device weight is not already established 720 * then the passed in nexus weight is established. 721 */ 722 void 723 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 724 dev_info_t *dip, int32_t nweight) 725 { 726 int32_t eweight; 727 728 /* 729 * For non-weighted policy everything has weight of zero (and we get 730 * round-robin distribution from intr_dist_cpuid). 731 * NB: intr_policy is limited to this file. A weighted nexus driver is 732 * calls this rouitne even if intr_policy has been patched to 733 * INTR_FLAG_DIST. 734 */ 735 ASSERT(dip); 736 if (intr_policy != INTR_WEIGHTED_DIST) 737 return; 738 739 eweight = i_ddi_get_intr_weight(dip); 740 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 741 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 742 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 743 ddi_get_instance(ddi_get_parent(dip)), 744 ddi_driver_name(dip), ddi_get_instance(dip))); 745 746 /* if no establish weight, establish nexus weight */ 747 if (eweight < 0) { 748 if (nweight > 0) 749 (void) i_ddi_set_intr_weight(dip, nweight); 750 else 751 nweight = 0; 752 } else 753 nweight = eweight; /* use established weight */ 754 755 /* Establish exclusion for cpu_intr_weight manipulation */ 756 mutex_enter(&intr_dist_cpu_lock); 757 cpu[cpuid]->cpu_intr_weight += nweight; 758 759 /* update intr_dist_weight_max */ 760 if (nweight > intr_dist_weight_max) 761 intr_dist_weight_max = nweight; 762 mutex_exit(&intr_dist_cpu_lock); 763 } 764 765 void 766 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 767 { 768 struct cpu *cp; 769 int32_t weight; 770 771 ASSERT(dip); 772 if (intr_policy != INTR_WEIGHTED_DIST) 773 return; 774 775 /* remove weight of device from cpu */ 776 weight = i_ddi_get_intr_weight(dip); 777 if (weight < 0) 778 weight = 0; 779 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 780 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 781 ddi_driver_name(ddi_get_parent(dip)), 782 ddi_get_instance(ddi_get_parent(dip)), 783 ddi_driver_name(dip), ddi_get_instance(dip))); 784 785 /* Establish exclusion for cpu_intr_weight manipulation */ 786 mutex_enter(&intr_dist_cpu_lock); 787 cp = cpu[cpuid]; 788 cp->cpu_intr_weight -= weight; 789 if (cp->cpu_intr_weight < 0) 790 cp->cpu_intr_weight = 0; /* sanity */ 791 mutex_exit(&intr_dist_cpu_lock); 792 } 793