1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2019 Joyent, Inc.
27 */
28 /*
29 * Copyright 2019 Peter Tribble.
30 */
31
32 #include <sys/sysmacros.h>
33 #include <sys/stack.h>
34 #include <sys/cpuvar.h>
35 #include <sys/ivintr.h>
36 #include <sys/intreg.h>
37 #include <sys/membar.h>
38 #include <sys/kmem.h>
39 #include <sys/intr.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/cmn_err.h>
43 #include <sys/privregs.h>
44 #include <sys/systm.h>
45 #include <sys/archsystm.h>
46 #include <sys/machsystm.h>
47 #include <sys/x_call.h>
48 #include <vm/seg_kp.h>
49 #include <sys/debug.h>
50 #include <sys/cyclic.h>
51 #include <sys/kdi_impl.h>
52 #include <sys/ddi_periodic.h>
53
54 #include <sys/cpu_sgnblk_defs.h>
55
56 /* Global locks which protect the interrupt distribution lists */
57 static kmutex_t intr_dist_lock;
58 static kmutex_t intr_dist_cpu_lock;
59
60 /* Head of the interrupt distribution lists */
61 static struct intr_dist *intr_dist_head = NULL;
62 static struct intr_dist *intr_dist_whead = NULL;
63
64 static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */
65 uint64_t *siron_cpu_inum = NULL;
66 uint64_t siron_poke_cpu_inum;
67 static int siron_cpu_setup(cpu_setup_t, int, void *);
68 extern uint_t softlevel1();
69
70 static uint64_t siron1_inum; /* backward compatibility */
71 uint64_t poke_cpu_inum;
72 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
73 uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2);
74
75 /*
76 * Variable to enable/disable printing a message when an invalid vecintr
77 * is received.
78 */
79 uint_t ignore_invalid_vecintr = 0;
80
81 /*
82 * Note:-
83 * siron_pending was originally created to prevent a resource over consumption
84 * bug in setsoftint(exhaustion of interrupt pool free list).
85 * It's original intention is obsolete with the use of iv_pending in
86 * setsoftint. However, siron_pending stayed around, acting as a second
87 * gatekeeper preventing soft interrupts from being queued. In this capacity,
88 * it can lead to hangs on MP systems, where due to global visibility issues
89 * it can end up set while iv_pending is reset, preventing soft interrupts from
90 * ever being processed. In addition to its gatekeeper role, init_intr also
91 * uses it to flag the situation where siron() was called before siron_inum has
92 * been defined.
93 *
94 * siron() does not need an extra gatekeeper; any cpu that wishes should be
95 * allowed to queue a soft interrupt. It is softint()'s job to ensure
96 * correct handling of the queues. Therefore, siron_pending has been
97 * stripped of its gatekeeper task, retaining only its intr_init job, where
98 * it indicates that there is a pending need to call siron().
99 */
100 static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */
101 static int siron1_pending; /* backward compatibility */
102
103 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */
104 int intr_dist_debug = 0;
105 int32_t intr_dist_weight_max = 1;
106 int32_t intr_dist_weight_maxmax = 1000;
107 int intr_dist_weight_maxfactor = 2;
108 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
109
110 /*
111 * intr_init() - Interrupt initialization
112 * Initialize the system's interrupt vector table.
113 */
114 void
intr_init(cpu_t * cp)115 intr_init(cpu_t *cp)
116 {
117 int i;
118 extern uint_t softlevel1();
119
120 init_ivintr();
121
122 /*
123 * Register these software interrupts for ddi timer.
124 * Software interrupts up to the level 10 are supported.
125 */
126 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
127 siron_inum[i - 1] = add_softintr(i,
128 (softintrfunc)ddi_periodic_softintr,
129 (caddr_t)(uintptr_t)(i), SOFTINT_ST);
130 }
131
132 siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
133 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
134 siron_poke_cpu_inum = add_softintr(PIL_13,
135 siron_poke_cpu_intr, 0, SOFTINT_MT);
136 cp->cpu_m.poke_cpu_outstanding = B_FALSE;
137
138 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
139 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
140
141 /*
142 * A soft interrupt may have been requested prior to the initialization
143 * of soft interrupts. Soft interrupts can't be dispatched until after
144 * init_intr(), so we have to wait until now before we can dispatch the
145 * pending soft interrupt (if any).
146 */
147 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
148 if (siron_pending[i-1]) {
149 siron_pending[i-1] = 0;
150 sir_on(i);
151 }
152 }
153 if (siron1_pending) {
154 siron1_pending = 0;
155 siron();
156 }
157 }
158
159 /*
160 * poke_cpu_intr - fall through when poke_cpu calls
161 */
162 /* ARGSUSED */
163 uint_t
poke_cpu_intr(caddr_t arg1,caddr_t arg2)164 poke_cpu_intr(caddr_t arg1, caddr_t arg2)
165 {
166 CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
167 membar_stld_stst();
168 return (1);
169 }
170
171 /*
172 * Trigger software interrupts dedicated to ddi timer.
173 */
174 void
sir_on(int level)175 sir_on(int level)
176 {
177 ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10);
178 if (siron_inum[level-1])
179 setsoftint(siron_inum[level-1]);
180 else
181 siron_pending[level-1] = 1;
182 }
183
184 /*
185 * kmdb uses siron (and thus setsoftint) while the world is stopped in order to
186 * inform its driver component that there's work to be done. We need to keep
187 * DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron,
188 * giving kmdb's version a kdi_ prefix to keep DTrace at bay. The
189 * implementation of setsoftint is complicated enough that we don't want to
190 * duplicate it, but at the same time we don't want to preclude tracing either.
191 * The meat of setsoftint() therefore goes into kdi_setsoftint, with
192 * setsoftint() implemented as a wrapper. This allows tracing, while still
193 * providing a way for kmdb to sneak in unmolested.
194 */
195 void
kdi_siron(void)196 kdi_siron(void)
197 {
198 if (siron1_inum != 0)
199 kdi_setsoftint(siron1_inum);
200 else
201 siron1_pending = 1;
202 }
203
204 void
setsoftint(uint64_t inum)205 setsoftint(uint64_t inum)
206 {
207 kdi_setsoftint(inum);
208 }
209
210 /*
211 * Generates softlevel1 interrupt on current CPU if it
212 * is not pending already.
213 */
214 void
siron(void)215 siron(void)
216 {
217 uint64_t inum;
218
219 if (siron1_inum != 0) {
220 /*
221 * Once siron_cpu_inum has been allocated, we can
222 * use per-CPU siron inum.
223 */
224 if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0)
225 inum = siron_cpu_inum[CPU->cpu_id];
226 else
227 inum = siron1_inum;
228
229 setsoftint(inum);
230 } else
231 siron1_pending = 1;
232 }
233
234
235 static void
siron_init(void)236 siron_init(void)
237 {
238 /*
239 * We just allocate memory for per-cpu siron right now. Rest of
240 * the work is done when CPU is configured.
241 */
242 siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
243 }
244
245 /*
246 * This routine creates per-CPU siron inum for CPUs which are
247 * configured during boot.
248 */
249 void
siron_mp_init()250 siron_mp_init()
251 {
252 cpu_t *c;
253
254 /*
255 * Get the memory for per-CPU siron inums
256 */
257 siron_init();
258
259 mutex_enter(&cpu_lock);
260 c = cpu_list;
261 do {
262 (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL);
263 } while ((c = c->cpu_next) != cpu_list);
264
265 register_cpu_setup_func(siron_cpu_setup, NULL);
266 mutex_exit(&cpu_lock);
267 }
268
269 /*
270 * siron_poke_cpu_intr - cross-call handler.
271 */
272 /* ARGSUSED */
273 uint_t
siron_poke_cpu_intr(caddr_t arg1,caddr_t arg2)274 siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2)
275 {
276 /* generate level1 softint */
277 siron();
278 return (1);
279 }
280
281 /*
282 * This routine generates a cross-call on target CPU(s).
283 */
284 void
siron_poke_cpu(cpuset_t poke)285 siron_poke_cpu(cpuset_t poke)
286 {
287 int cpuid = CPU->cpu_id;
288
289 if (CPU_IN_SET(poke, cpuid)) {
290 siron();
291 CPUSET_DEL(poke, cpuid);
292 if (CPUSET_ISNULL(poke))
293 return;
294 }
295
296 xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0);
297 }
298
299 /*
300 * This callback function allows us to create per-CPU siron inum.
301 */
302 /* ARGSUSED */
303 static int
siron_cpu_setup(cpu_setup_t what,int id,void * arg)304 siron_cpu_setup(cpu_setup_t what, int id, void *arg)
305 {
306 cpu_t *cp = cpu[id];
307
308 ASSERT(MUTEX_HELD(&cpu_lock));
309 ASSERT(cp != NULL);
310
311 switch (what) {
312 case CPU_CONFIG:
313 siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1,
314 (softintrfunc)softlevel1, 0, SOFTINT_ST);
315 break;
316 case CPU_UNCONFIG:
317 (void) rem_softintr(siron_cpu_inum[cp->cpu_id]);
318 siron_cpu_inum[cp->cpu_id] = 0;
319 break;
320 default:
321 break;
322 }
323
324 return (0);
325 }
326
327 /*
328 * no_ivintr()
329 * called by setvecint_tl1() through sys_trap()
330 * vector interrupt received but not valid or not
331 * registered in intr_vec_table
332 * considered as a spurious mondo interrupt
333 */
334 /* ARGSUSED */
335 void
no_ivintr(struct regs * rp,int inum,int pil)336 no_ivintr(struct regs *rp, int inum, int pil)
337 {
338 if (!ignore_invalid_vecintr)
339 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
340 inum, pil);
341
342 #ifdef DEBUG_VEC_INTR
343 prom_enter_mon();
344 #endif /* DEBUG_VEC_INTR */
345 }
346
347 void
intr_dequeue_req(uint_t pil,uint64_t inum)348 intr_dequeue_req(uint_t pil, uint64_t inum)
349 {
350 intr_vec_t *iv, *next, *prev;
351 struct machcpu *mcpu;
352 uint32_t clr;
353 processorid_t cpu_id;
354 extern uint_t getpstate(void);
355
356 ASSERT((getpstate() & PSTATE_IE) == 0);
357
358 mcpu = &CPU->cpu_m;
359 cpu_id = CPU->cpu_id;
360
361 iv = (intr_vec_t *)inum;
362 prev = NULL;
363 next = mcpu->intr_head[pil];
364
365 /* Find a matching entry in the list */
366 while (next != NULL) {
367 if (next == iv)
368 break;
369 prev = next;
370 next = IV_GET_PIL_NEXT(next, cpu_id);
371 }
372
373 if (next != NULL) {
374 intr_vec_t *next_iv = IV_GET_PIL_NEXT(next, cpu_id);
375
376 /* Remove entry from list */
377 if (prev != NULL)
378 IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */
379 else
380 mcpu->intr_head[pil] = next_iv; /* head */
381
382 if (next_iv == NULL)
383 mcpu->intr_tail[pil] = prev; /* tail */
384 }
385
386 /* Clear pending interrupts at this level if the list is empty */
387 if (mcpu->intr_head[pil] == NULL) {
388 clr = 1 << pil;
389 if (pil == PIL_14)
390 clr |= (TICK_INT_MASK | STICK_INT_MASK);
391 wr_clr_softint(clr);
392 }
393 }
394
395
396 /*
397 * Send a directed interrupt of specified interrupt number id to a cpu.
398 */
399 void
send_dirint(int cpuix,int intr_id)400 send_dirint(
401 int cpuix, /* cpu to be interrupted */
402 int intr_id) /* interrupt number id */
403 {
404 xt_one(cpuix, setsoftint_tl1, intr_id, 0);
405 }
406
407 /*
408 * Take the specified CPU out of participation in interrupts.
409 * Called by p_online(2) when a processor is being taken off-line.
410 * This allows interrupt threads being handled on the processor to
411 * complete before the processor is idled.
412 */
413 int
cpu_disable_intr(struct cpu * cp)414 cpu_disable_intr(struct cpu *cp)
415 {
416 ASSERT(MUTEX_HELD(&cpu_lock));
417
418 /*
419 * Turn off the CPU_ENABLE flag before calling the redistribution
420 * function, since it checks for this in the cpu flags.
421 */
422 cp->cpu_flags &= ~CPU_ENABLE;
423 ncpus_intr_enabled--;
424
425 intr_redist_all_cpus();
426
427 return (0);
428 }
429
430 /*
431 * Allow the specified CPU to participate in interrupts.
432 * Called by p_online(2) if a processor could not be taken off-line
433 * because of bound threads, in order to resume processing interrupts.
434 * Also called after starting a processor.
435 */
436 void
cpu_enable_intr(struct cpu * cp)437 cpu_enable_intr(struct cpu *cp)
438 {
439 ASSERT(MUTEX_HELD(&cpu_lock));
440
441 cp->cpu_flags |= CPU_ENABLE;
442 ncpus_intr_enabled++;
443
444 intr_redist_all_cpus();
445 }
446
447 /*
448 * Add function to callback list for intr_redist_all_cpus. We keep two lists,
449 * one for weighted callbacks and one for normal callbacks. Weighted callbacks
450 * are issued to redirect interrupts of a specified weight, from heavy to
451 * light. This allows all the interrupts of a given weight to be redistributed
452 * for all weighted nexus drivers prior to those of less weight.
453 */
454 static void
intr_dist_add_list(struct intr_dist ** phead,void (* func)(void *),void * arg)455 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
456 {
457 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
458 struct intr_dist *iptr;
459 struct intr_dist **pptr;
460
461 ASSERT(func);
462 new->func = func;
463 new->arg = arg;
464 new->next = NULL;
465
466 /* Add to tail so that redistribution occurs in original order. */
467 mutex_enter(&intr_dist_lock);
468 for (iptr = *phead, pptr = phead; iptr != NULL;
469 pptr = &iptr->next, iptr = iptr->next) {
470 /* check for problems as we locate the tail */
471 if ((iptr->func == func) && (iptr->arg == arg)) {
472 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
473 /*NOTREACHED*/
474 }
475 }
476 *pptr = new;
477
478 mutex_exit(&intr_dist_lock);
479 }
480
481 void
intr_dist_add(void (* func)(void *),void * arg)482 intr_dist_add(void (*func)(void *), void *arg)
483 {
484 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
485 }
486
487 void
intr_dist_add_weighted(void (* func)(void *,int32_t,int32_t),void * arg)488 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
489 {
490 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
491 }
492
493 /*
494 * Search for the interrupt distribution structure with the specified
495 * mondo vec reg in the interrupt distribution list. If a match is found,
496 * then delete the entry from the list. The caller is responsible for
497 * modifying the mondo vector registers.
498 */
499 static void
intr_dist_rem_list(struct intr_dist ** headp,void (* func)(void *),void * arg)500 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
501 {
502 struct intr_dist *iptr;
503 struct intr_dist **vect;
504
505 mutex_enter(&intr_dist_lock);
506 for (iptr = *headp, vect = headp;
507 iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
508 if ((iptr->func == func) && (iptr->arg == arg)) {
509 *vect = iptr->next;
510 kmem_free(iptr, sizeof (struct intr_dist));
511 mutex_exit(&intr_dist_lock);
512 return;
513 }
514 }
515
516 if (!panicstr)
517 cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
518 mutex_exit(&intr_dist_lock);
519 }
520
521 void
intr_dist_rem(void (* func)(void *),void * arg)522 intr_dist_rem(void (*func)(void *), void *arg)
523 {
524 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
525 }
526
527 void
intr_dist_rem_weighted(void (* func)(void *,int32_t,int32_t),void * arg)528 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
529 {
530 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
531 }
532
533 /*
534 * Initiate interrupt redistribution. Redistribution improves the isolation
535 * associated with interrupt weights by ordering operations from heavy weight
536 * to light weight. When a CPUs orientation changes relative to interrupts,
537 * there is *always* a redistribution to accommodate this change (call to
538 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible
539 * that a redistribution could improve the quality of an initialization. For
540 * example, if you are not using a NIC it may not be attached with s10 (devfs).
541 * If you then configure the NIC (ifconfig), this may cause the NIC to attach
542 * and plumb interrupts. The CPU assignment for the NIC's interrupts is
543 * occurring late, so optimal "isolation" relative to weight is not occurring.
544 * The same applies to detach, although in this case doing the redistribution
545 * might improve "spread" for medium weight devices since the "isolation" of
546 * a higher weight device may no longer be present.
547 *
548 * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
549 *
550 * NB: There is risk associated with automatically triggering execution of the
551 * redistribution code at arbitrary times. The risk comes from the fact that
552 * there is a lot of low-level hardware interaction associated with a
553 * redistribution. At some point we may want this code to perform automatic
554 * redistribution (redistribution thread; trigger timeout when add/remove
555 * weight delta is large enough, and call cv_signal from timeout - causing
556 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
557 * risky at this time.
558 */
559 void
i_ddi_intr_redist_all_cpus()560 i_ddi_intr_redist_all_cpus()
561 {
562 mutex_enter(&cpu_lock);
563 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
564 intr_redist_all_cpus();
565 mutex_exit(&cpu_lock);
566 }
567
568 /*
569 * Redistribute all interrupts
570 *
571 * This function redistributes all interrupting devices, running the
572 * parent callback functions for each node.
573 */
574 void
intr_redist_all_cpus(void)575 intr_redist_all_cpus(void)
576 {
577 struct cpu *cp;
578 struct intr_dist *iptr;
579 int32_t weight, max_weight;
580
581 ASSERT(MUTEX_HELD(&cpu_lock));
582 mutex_enter(&intr_dist_lock);
583
584 /*
585 * zero cpu_intr_weight on all cpus - it is safe to traverse
586 * cpu_list since we hold cpu_lock.
587 */
588 cp = cpu_list;
589 do {
590 cp->cpu_intr_weight = 0;
591 } while ((cp = cp->cpu_next) != cpu_list);
592
593 /*
594 * Assume that this redistribution may encounter a device weight
595 * via driver.conf tuning of "ddi-intr-weight" that is at most
596 * intr_dist_weight_maxfactor times larger.
597 */
598 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
599 if (max_weight > intr_dist_weight_maxmax)
600 max_weight = intr_dist_weight_maxmax;
601 intr_dist_weight_max = 1;
602
603 INTR_DEBUG((CE_CONT, "intr_dist: "
604 "intr_redist_all_cpus: %d-0\n", max_weight));
605
606 /*
607 * Redistribute weighted, from heavy to light. The callback that
608 * specifies a weight equal to weight_max should redirect all
609 * interrupts of weight weight_max or greater [weight_max, inf.).
610 * Interrupts of lesser weight should be processed on the call with
611 * the matching weight. This allows all the heaver weight interrupts
612 * on all weighted busses (multiple pci busses) to be redirected prior
613 * to any lesser weight interrupts.
614 */
615 for (weight = max_weight; weight >= 0; weight--)
616 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
617 ((void (*)(void *, int32_t, int32_t))iptr->func)
618 (iptr->arg, max_weight, weight);
619
620 /* redistribute normal (non-weighted) interrupts */
621 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
622 ((void (*)(void *))iptr->func)(iptr->arg);
623 mutex_exit(&intr_dist_lock);
624 }
625
626 void
intr_redist_all_cpus_shutdown(void)627 intr_redist_all_cpus_shutdown(void)
628 {
629 intr_policy = INTR_CURRENT_CPU;
630 intr_redist_all_cpus();
631 }
632
633 /*
634 * Determine what CPU to target, based on interrupt policy.
635 *
636 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
637 * advance through interrupt enabled cpus (round-robin).
638 *
639 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
640 * cpu_intr_weight, round robin when all equal.
641 *
642 * Weighted interrupt distribution provides two things: "spread" of weight
643 * (associated with algorithm itself) and "isolation" (associated with a
644 * particular device weight). A redistribution is what provides optimal
645 * "isolation" of heavy weight interrupts, optimal "spread" of weight
646 * (relative to what came before) is always occurring.
647 *
648 * An interrupt weight is a subjective number that represents the
649 * percentage of a CPU required to service a device's interrupts: the
650 * default weight is 0% (however the algorithm still maintains
651 * round-robin), a network interface controller (NIC) may have a large
652 * weight (35%). Interrupt weight only has meaning relative to the
653 * interrupt weight of other devices: a CPU can be weighted more than
654 * 100%, and a single device might consume more than 100% of a CPU.
655 *
656 * A coarse interrupt weight can be defined by the parent nexus driver
657 * based on bus specific information, like pci class codes. A nexus
658 * driver that supports device interrupt weighting for its children
659 * should call intr_dist_cpuid_add/rem_device_weight(), which adds
660 * and removes the weight of a device from the CPU that an interrupt
661 * is directed at. The quality of initialization improves when the
662 * device interrupt weights more accuracy reflect actual run-time weights,
663 * and as the assignments are ordered from is heavy to light.
664 *
665 * The implementation also supports interrupt weight being specified in
666 * driver.conf files via the property "ddi-intr-weight", which takes
667 * precedence over the nexus supplied weight. This support is added to
668 * permit possible tweaking in the product in response to customer
669 * problems. This is not a formal or committed interface.
670 *
671 * While a weighted approach chooses the CPU providing the best spread
672 * given past weights, less than optimal isolation can result in cases
673 * where heavy weight devices show up last. The nexus driver's interrupt
674 * redistribution logic should use intr_dist_add/rem_weighted so that
675 * interrupts can be redistributed heavy first for optimal isolation.
676 */
677 uint32_t
intr_dist_cpuid(void)678 intr_dist_cpuid(void)
679 {
680 static struct cpu *curr_cpu;
681 struct cpu *start_cpu;
682 struct cpu *new_cpu;
683 struct cpu *cp;
684 int cpuid = -1;
685
686 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
687 mutex_enter(&intr_dist_cpu_lock);
688
689 switch (intr_policy) {
690 case INTR_CURRENT_CPU:
691 cpuid = CPU->cpu_id;
692 break;
693
694 case INTR_BOOT_CPU:
695 panic("INTR_BOOT_CPU no longer supported.");
696 /*NOTREACHED*/
697
698 case INTR_FLAT_DIST:
699 case INTR_WEIGHTED_DIST:
700 default:
701 /*
702 * Ensure that curr_cpu is valid - cpu_next will be NULL if
703 * the cpu has been deleted (cpu structs are never freed).
704 */
705 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
706 curr_cpu = CPU;
707
708 /*
709 * Advance to online CPU after curr_cpu (round-robin). For
710 * INTR_WEIGHTED_DIST we choose the cpu with the lightest
711 * weight. For a nexus that does not support weight the
712 * default weight of zero is used. We degrade to round-robin
713 * behavior among equal weightes. The default weight is zero
714 * and round-robin behavior continues.
715 *
716 * Disable preemption while traversing cpu_next_onln to
717 * ensure the list does not change. This works because
718 * modifiers of this list and other lists in a struct cpu
719 * call pause_cpus() before making changes.
720 */
721 kpreempt_disable();
722 cp = start_cpu = curr_cpu->cpu_next_onln;
723 new_cpu = NULL;
724 do {
725 /* Skip CPUs with interrupts disabled */
726 if ((cp->cpu_flags & CPU_ENABLE) == 0)
727 continue;
728
729 if (intr_policy == INTR_FLAT_DIST) {
730 /* select CPU */
731 new_cpu = cp;
732 break;
733 } else if ((new_cpu == NULL) ||
734 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
735 /* Choose if lighter weight */
736 new_cpu = cp;
737 }
738 } while ((cp = cp->cpu_next_onln) != start_cpu);
739 ASSERT(new_cpu);
740 cpuid = new_cpu->cpu_id;
741
742 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
743 "targeted\n", cpuid, new_cpu->cpu_intr_weight));
744
745 /* update static pointer for next round-robin */
746 curr_cpu = new_cpu;
747 kpreempt_enable();
748 break;
749 }
750 mutex_exit(&intr_dist_cpu_lock);
751 return (cpuid);
752 }
753
754 /*
755 * Add or remove the the weight of a device from a CPUs interrupt weight.
756 *
757 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
758 * their children to improve the overall quality of interrupt initialization.
759 *
760 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
761 * among multiple devices (sharing ino) then the nexus should call
762 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
763 * that share must specify the same cpuid.
764 *
765 * If a nexus driver is unable to determine the cpu at remove_intr time
766 * for some of its interrupts, then it should not call add_device_weight -
767 * intr_dist_cpuid will still provide round-robin.
768 *
769 * An established device weight (from dev_info node) takes precedence over
770 * the weight passed in. If a device weight is not already established
771 * then the passed in nexus weight is established.
772 */
773 void
intr_dist_cpuid_add_device_weight(uint32_t cpuid,dev_info_t * dip,int32_t nweight)774 intr_dist_cpuid_add_device_weight(uint32_t cpuid,
775 dev_info_t *dip, int32_t nweight)
776 {
777 int32_t eweight;
778
779 /*
780 * For non-weighted policy everything has weight of zero (and we get
781 * round-robin distribution from intr_dist_cpuid).
782 * NB: intr_policy is limited to this file. A weighted nexus driver is
783 * calls this rouitne even if intr_policy has been patched to
784 * INTR_FLAG_DIST.
785 */
786 ASSERT(dip);
787 if (intr_policy != INTR_WEIGHTED_DIST)
788 return;
789
790 eweight = i_ddi_get_intr_weight(dip);
791 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
792 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
793 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
794 ddi_get_instance(ddi_get_parent(dip)),
795 ddi_driver_name(dip), ddi_get_instance(dip)));
796
797 /* if no establish weight, establish nexus weight */
798 if (eweight < 0) {
799 if (nweight > 0)
800 (void) i_ddi_set_intr_weight(dip, nweight);
801 else
802 nweight = 0;
803 } else
804 nweight = eweight; /* use established weight */
805
806 /* Establish exclusion for cpu_intr_weight manipulation */
807 mutex_enter(&intr_dist_cpu_lock);
808 cpu[cpuid]->cpu_intr_weight += nweight;
809
810 /* update intr_dist_weight_max */
811 if (nweight > intr_dist_weight_max)
812 intr_dist_weight_max = nweight;
813 mutex_exit(&intr_dist_cpu_lock);
814 }
815
816 void
intr_dist_cpuid_rem_device_weight(uint32_t cpuid,dev_info_t * dip)817 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
818 {
819 struct cpu *cp;
820 int32_t weight;
821
822 ASSERT(dip);
823 if (intr_policy != INTR_WEIGHTED_DIST)
824 return;
825
826 /* remove weight of device from cpu */
827 weight = i_ddi_get_intr_weight(dip);
828 if (weight < 0)
829 weight = 0;
830 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for "
831 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
832 ddi_driver_name(ddi_get_parent(dip)),
833 ddi_get_instance(ddi_get_parent(dip)),
834 ddi_driver_name(dip), ddi_get_instance(dip)));
835
836 /* Establish exclusion for cpu_intr_weight manipulation */
837 mutex_enter(&intr_dist_cpu_lock);
838 cp = cpu[cpuid];
839 cp->cpu_intr_weight -= weight;
840 if (cp->cpu_intr_weight < 0)
841 cp->cpu_intr_weight = 0; /* sanity */
842 mutex_exit(&intr_dist_cpu_lock);
843 }
844
845 ulong_t
create_softint(uint_t pil,uint_t (* func)(caddr_t,caddr_t),caddr_t arg1)846 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1)
847 {
848 uint64_t inum;
849
850 inum = add_softintr(pil, func, arg1, SOFTINT_MT);
851 return ((ulong_t)inum);
852 }
853
854 void
invoke_softint(processorid_t cpuid,ulong_t hdl)855 invoke_softint(processorid_t cpuid, ulong_t hdl)
856 {
857 uint64_t inum = hdl;
858
859 if (cpuid == CPU->cpu_id)
860 setsoftint(inum);
861 else
862 xt_one(cpuid, setsoftint_tl1, inum, 0);
863 }
864
865 void
remove_softint(ulong_t hdl)866 remove_softint(ulong_t hdl)
867 {
868 uint64_t inum = hdl;
869
870 (void) rem_softintr(inum);
871 }
872
873 void
sync_softint(cpuset_t set)874 sync_softint(cpuset_t set)
875 {
876 xt_sync(set);
877 }
878