xref: /titanic_52/usr/src/uts/sun4/os/intr.c (revision fbd1c0dae6f4a2ccc2ce0527c7f19d3dd5ea90b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/sysmacros.h>
29 #include <sys/stack.h>
30 #include <sys/cpuvar.h>
31 #include <sys/ivintr.h>
32 #include <sys/intreg.h>
33 #include <sys/membar.h>
34 #include <sys/kmem.h>
35 #include <sys/intr.h>
36 #include <sys/sunndi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/privregs.h>
39 #include <sys/systm.h>
40 #include <sys/archsystm.h>
41 #include <sys/machsystm.h>
42 #include <sys/x_call.h>
43 #include <vm/seg_kp.h>
44 #include <sys/debug.h>
45 #include <sys/cyclic.h>
46 #include <sys/kdi_impl.h>
47 
48 #include <sys/cpu_sgnblk_defs.h>
49 
50 /* Global locks which protect the interrupt distribution lists */
51 static kmutex_t intr_dist_lock;
52 static kmutex_t intr_dist_cpu_lock;
53 
54 /* Head of the interrupt distribution lists */
55 static struct intr_dist *intr_dist_head = NULL;
56 static struct intr_dist *intr_dist_whead = NULL;
57 
58 uint64_t siron_inum;
59 uint64_t poke_cpu_inum;
60 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
61 
62 /*
63  * Note:-
64  * siron_pending was originally created to prevent a resource over consumption
65  * bug in setsoftint(exhaustion of interrupt pool free list).
66  * It's original intention is obsolete with the use of iv_pending in
67  * setsoftint. However, siron_pending stayed around, acting as a second
68  * gatekeeper preventing soft interrupts from being queued. In this capacity,
69  * it can lead to hangs on MP systems, where due to global visibility issues
70  * it can end up set while iv_pending is reset, preventing soft interrupts from
71  * ever being processed. In addition to its gatekeeper role, init_intr also
72  * uses it to flag the situation where siron() was called before siron_inum has
73  * been defined.
74  *
75  * siron() does not need an extra gatekeeper; any cpu that wishes should be
76  * allowed to queue a soft interrupt. It is softint()'s job to ensure
77  * correct handling of the queues. Therefore, siron_pending has been
78  * stripped of its gatekeeper task, retaining only its intr_init job, where
79  * it indicates that there is a pending need to call siron().
80  */
81 int siron_pending;
82 
83 int intr_policy = INTR_WEIGHTED_DIST;	/* interrupt distribution policy */
84 int intr_dist_debug = 0;
85 int32_t intr_dist_weight_max = 1;
86 int32_t intr_dist_weight_maxmax = 1000;
87 int intr_dist_weight_maxfactor = 2;
88 #define	INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
89 
90 /*
91  * intr_init() - Interrupt initialization
92  *	Initialize the system's interrupt vector table.
93  */
94 void
95 intr_init(cpu_t *cp)
96 {
97 	extern uint_t softlevel1();
98 
99 	init_ivintr();
100 	REGISTER_BBUS_INTR();
101 
102 	siron_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
103 	poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
104 	cp->cpu_m.poke_cpu_outstanding = B_FALSE;
105 
106 	mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
107 	mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
108 
109 	/*
110 	 * A soft interrupt may have been requested prior to the initialization
111 	 * of soft interrupts.  Soft interrupts can't be dispatched until after
112 	 * init_intr(), so we have to wait until now before we can dispatch the
113 	 * pending soft interrupt (if any).
114 	 */
115 	if (siron_pending) {
116 		siron_pending = 0;
117 		siron();
118 	}
119 }
120 
121 /*
122  * poke_cpu_intr - fall through when poke_cpu calls
123  */
124 /* ARGSUSED */
125 uint_t
126 poke_cpu_intr(caddr_t arg1, caddr_t arg2)
127 {
128 	CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
129 	membar_stld_stst();
130 	return (1);
131 }
132 
133 /*
134  * kmdb uses siron (and thus setsoftint) while the world is stopped in order to
135  * inform its driver component that there's work to be done.  We need to keep
136  * DTrace from instrumenting kmdb's siron and setsoftint.  We duplicate siron,
137  * giving kmdb's version a kdi_ prefix to keep DTrace at bay.  The
138  * implementation of setsoftint is complicated enough that we don't want to
139  * duplicate it, but at the same time we don't want to preclude tracing either.
140  * The meat of setsoftint() therefore goes into kdi_setsoftint, with
141  * setsoftint() implemented as a wrapper.  This allows tracing, while still
142  * providing a way for kmdb to sneak in unmolested.
143  */
144 void
145 kdi_siron(void)
146 {
147 	if (siron_inum != 0)
148 		kdi_setsoftint(siron_inum);
149 	else
150 		siron_pending = 1;
151 }
152 
153 void
154 setsoftint(uint64_t inum)
155 {
156 	kdi_setsoftint(inum);
157 }
158 
159 void
160 siron(void)
161 {
162 	if (siron_inum != 0)
163 		setsoftint(siron_inum);
164 	else
165 		siron_pending = 1;
166 }
167 
168 /*
169  * no_ivintr()
170  * 	called by setvecint_tl1() through sys_trap()
171  *	vector interrupt received but not valid or not
172  *	registered in intr_vec_table
173  *	considered as a spurious mondo interrupt
174  */
175 /* ARGSUSED */
176 void
177 no_ivintr(struct regs *rp, int inum, int pil)
178 {
179 	cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
180 	    inum, pil);
181 
182 #ifdef DEBUG_VEC_INTR
183 	prom_enter_mon();
184 #endif /* DEBUG_VEC_INTR */
185 }
186 
187 void
188 intr_dequeue_req(uint_t pil, uint64_t inum)
189 {
190 	intr_vec_t	*iv, *next, *prev;
191 	struct machcpu	*mcpu;
192 	uint32_t	clr;
193 	processorid_t	cpu_id;
194 	extern uint_t	getpstate(void);
195 
196 	ASSERT((getpstate() & PSTATE_IE) == 0);
197 
198 	mcpu = &CPU->cpu_m;
199 	cpu_id = CPU->cpu_id;
200 
201 	iv = (intr_vec_t *)inum;
202 	prev = NULL;
203 	next = mcpu->intr_head[pil];
204 
205 	/* Find a matching entry in the list */
206 	while (next != NULL) {
207 		if (next == iv)
208 			break;
209 		prev = next;
210 		next = IV_GET_PIL_NEXT(next, cpu_id);
211 	}
212 
213 	if (next != NULL) {
214 		intr_vec_t	*next_iv = IV_GET_PIL_NEXT(next, cpu_id);
215 
216 		/* Remove entry from list */
217 		if (prev != NULL)
218 			IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */
219 		else
220 			mcpu->intr_head[pil] = next_iv; /* head */
221 
222 		if (next_iv == NULL)
223 			mcpu->intr_tail[pil] = prev; /* tail */
224 	}
225 
226 	/* Clear pending interrupts at this level if the list is empty */
227 	if (mcpu->intr_head[pil] == NULL) {
228 		clr = 1 << pil;
229 		if (pil == PIL_14)
230 			clr |= (TICK_INT_MASK | STICK_INT_MASK);
231 		wr_clr_softint(clr);
232 	}
233 }
234 
235 
236 /*
237  * Send a directed interrupt of specified interrupt number id to a cpu.
238  */
239 void
240 send_dirint(
241 	int cpuix,		/* cpu to be interrupted */
242 	int intr_id)		/* interrupt number id */
243 {
244 	xt_one(cpuix, setsoftint_tl1, intr_id, 0);
245 }
246 
247 /*
248  * Take the specified CPU out of participation in interrupts.
249  *	Called by p_online(2) when a processor is being taken off-line.
250  *	This allows interrupt threads being handled on the processor to
251  *	complete before the processor is idled.
252  */
253 int
254 cpu_disable_intr(struct cpu *cp)
255 {
256 	ASSERT(MUTEX_HELD(&cpu_lock));
257 
258 	/*
259 	 * Turn off the CPU_ENABLE flag before calling the redistribution
260 	 * function, since it checks for this in the cpu flags.
261 	 */
262 	cp->cpu_flags &= ~CPU_ENABLE;
263 
264 	intr_redist_all_cpus();
265 
266 	return (0);
267 }
268 
269 /*
270  * Allow the specified CPU to participate in interrupts.
271  *	Called by p_online(2) if a processor could not be taken off-line
272  *	because of bound threads, in order to resume processing interrupts.
273  *	Also called after starting a processor.
274  */
275 void
276 cpu_enable_intr(struct cpu *cp)
277 {
278 	ASSERT(MUTEX_HELD(&cpu_lock));
279 
280 	cp->cpu_flags |= CPU_ENABLE;
281 
282 	intr_redist_all_cpus();
283 }
284 
285 /*
286  * Add function to callback list for intr_redist_all_cpus.  We keep two lists,
287  * one for weighted callbacks and one for normal callbacks. Weighted callbacks
288  * are issued to redirect interrupts of a specified weight, from heavy to
289  * light.  This allows all the interrupts of a given weight to be redistributed
290  * for all weighted nexus drivers prior to those of less weight.
291  */
292 static void
293 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
294 {
295 	struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
296 	struct intr_dist *iptr;
297 	struct intr_dist **pptr;
298 
299 	ASSERT(func);
300 	new->func = func;
301 	new->arg = arg;
302 	new->next = NULL;
303 
304 	/* Add to tail so that redistribution occurs in original order. */
305 	mutex_enter(&intr_dist_lock);
306 	for (iptr = *phead, pptr = phead; iptr != NULL;
307 	    pptr = &iptr->next, iptr = iptr->next) {
308 		/* check for problems as we locate the tail */
309 		if ((iptr->func == func) && (iptr->arg == arg)) {
310 			cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
311 			/*NOTREACHED*/
312 		}
313 	}
314 	*pptr = new;
315 
316 	mutex_exit(&intr_dist_lock);
317 }
318 
319 void
320 intr_dist_add(void (*func)(void *), void *arg)
321 {
322 	intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
323 }
324 
325 void
326 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
327 {
328 	intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
329 }
330 
331 /*
332  * Search for the interrupt distribution structure with the specified
333  * mondo vec reg in the interrupt distribution list. If a match is found,
334  * then delete the entry from the list. The caller is responsible for
335  * modifying the mondo vector registers.
336  */
337 static void
338 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
339 {
340 	struct intr_dist *iptr;
341 	struct intr_dist **vect;
342 
343 	mutex_enter(&intr_dist_lock);
344 	for (iptr = *headp, vect = headp;
345 	    iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
346 		if ((iptr->func == func) && (iptr->arg == arg)) {
347 			*vect = iptr->next;
348 			kmem_free(iptr, sizeof (struct intr_dist));
349 			mutex_exit(&intr_dist_lock);
350 			return;
351 		}
352 	}
353 
354 	if (!panicstr)
355 		cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
356 	mutex_exit(&intr_dist_lock);
357 }
358 
359 void
360 intr_dist_rem(void (*func)(void *), void *arg)
361 {
362 	intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
363 }
364 
365 void
366 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
367 {
368 	intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
369 }
370 
371 /*
372  * Initiate interrupt redistribution.  Redistribution improves the isolation
373  * associated with interrupt weights by ordering operations from heavy weight
374  * to light weight.  When a CPUs orientation changes relative to interrupts,
375  * there is *always* a redistribution to accommodate this change (call to
376  * intr_redist_all_cpus()).  As devices (not CPUs) attach/detach it is possible
377  * that a redistribution could improve the quality of an initialization. For
378  * example, if you are not using a NIC it may not be attached with s10 (devfs).
379  * If you then configure the NIC (ifconfig), this may cause the NIC to attach
380  * and plumb interrupts.  The CPU assignment for the NIC's interrupts is
381  * occurring late, so optimal "isolation" relative to weight is not occurring.
382  * The same applies to detach, although in this case doing the redistribution
383  * might improve "spread" for medium weight devices since the "isolation" of
384  * a higher weight device may no longer be present.
385  *
386  * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
387  *
388  * NB: There is risk associated with automatically triggering execution of the
389  * redistribution code at arbitrary times. The risk comes from the fact that
390  * there is a lot of low-level hardware interaction associated with a
391  * redistribution.  At some point we may want this code to perform automatic
392  * redistribution (redistribution thread; trigger timeout when add/remove
393  * weight delta is large enough, and call cv_signal from timeout - causing
394  * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
395  * risky at this time.
396  */
397 void
398 i_ddi_intr_redist_all_cpus()
399 {
400 	mutex_enter(&cpu_lock);
401 	INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
402 	intr_redist_all_cpus();
403 	mutex_exit(&cpu_lock);
404 }
405 
406 /*
407  * Redistribute all interrupts
408  *
409  * This function redistributes all interrupting devices, running the
410  * parent callback functions for each node.
411  */
412 void
413 intr_redist_all_cpus(void)
414 {
415 	struct cpu *cp;
416 	struct intr_dist *iptr;
417 	int32_t weight, max_weight;
418 
419 	ASSERT(MUTEX_HELD(&cpu_lock));
420 	mutex_enter(&intr_dist_lock);
421 
422 	/*
423 	 * zero cpu_intr_weight on all cpus - it is safe to traverse
424 	 * cpu_list since we hold cpu_lock.
425 	 */
426 	cp = cpu_list;
427 	do {
428 		cp->cpu_intr_weight = 0;
429 	} while ((cp = cp->cpu_next) != cpu_list);
430 
431 	/*
432 	 * Assume that this redistribution may encounter a device weight
433 	 * via driver.conf tuning of "ddi-intr-weight" that is at most
434 	 * intr_dist_weight_maxfactor times larger.
435 	 */
436 	max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
437 	if (max_weight > intr_dist_weight_maxmax)
438 		max_weight = intr_dist_weight_maxmax;
439 	intr_dist_weight_max = 1;
440 
441 	INTR_DEBUG((CE_CONT, "intr_dist: "
442 	    "intr_redist_all_cpus: %d-0\n", max_weight));
443 
444 	/*
445 	 * Redistribute weighted, from heavy to light.  The callback that
446 	 * specifies a weight equal to weight_max should redirect all
447 	 * interrupts of weight weight_max or greater [weight_max, inf.).
448 	 * Interrupts of lesser weight should be processed on the call with
449 	 * the matching weight. This allows all the heaver weight interrupts
450 	 * on all weighted busses (multiple pci busses) to be redirected prior
451 	 * to any lesser weight interrupts.
452 	 */
453 	for (weight = max_weight; weight >= 0; weight--)
454 		for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
455 			((void (*)(void *, int32_t, int32_t))iptr->func)
456 			    (iptr->arg, max_weight, weight);
457 
458 	/* redistribute normal (non-weighted) interrupts */
459 	for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
460 		((void (*)(void *))iptr->func)(iptr->arg);
461 	mutex_exit(&intr_dist_lock);
462 }
463 
464 void
465 intr_redist_all_cpus_shutdown(void)
466 {
467 	intr_policy = INTR_CURRENT_CPU;
468 	intr_redist_all_cpus();
469 }
470 
471 /*
472  * Determine what CPU to target, based on interrupt policy.
473  *
474  * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
475  *	advance through interrupt enabled cpus (round-robin).
476  *
477  * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
478  *	cpu_intr_weight, round robin when all equal.
479  *
480  *	Weighted interrupt distribution provides two things: "spread" of weight
481  *	(associated with algorithm itself) and "isolation" (associated with a
482  *	particular device weight). A redistribution is what provides optimal
483  *	"isolation" of heavy weight interrupts, optimal "spread" of weight
484  *	(relative to what came before) is always occurring.
485  *
486  *	An interrupt weight is a subjective number that represents the
487  *	percentage of a CPU required to service a device's interrupts: the
488  *	default weight is 0% (however the algorithm still maintains
489  *	round-robin), a network interface controller (NIC) may have a large
490  *	weight (35%). Interrupt weight only has meaning relative to the
491  *	interrupt weight of other devices: a CPU can be weighted more than
492  *	100%, and a single device might consume more than 100% of a CPU.
493  *
494  *	A coarse interrupt weight can be defined by the parent nexus driver
495  *	based on bus specific information, like pci class codes. A nexus
496  *	driver that supports device interrupt weighting for its children
497  *	should call intr_dist_cpuid_add/rem_device_weight(), which adds
498  *	and removes the weight of a device from the CPU that an interrupt
499  *	is directed at.  The quality of initialization improves when the
500  *	device interrupt weights more accuracy reflect actual run-time weights,
501  *	and as the assignments are ordered from is heavy to light.
502  *
503  *	The implementation also supports interrupt weight being specified in
504  *	driver.conf files via the property "ddi-intr-weight", which takes
505  *	precedence over the nexus supplied weight.  This support is added to
506  *	permit possible tweaking in the product in response to customer
507  *	problems. This is not a formal or committed interface.
508  *
509  *	While a weighted approach chooses the CPU providing the best spread
510  *	given past weights, less than optimal isolation can result in cases
511  *	where heavy weight devices show up last. The nexus driver's interrupt
512  *	redistribution logic should use intr_dist_add/rem_weighted so that
513  *	interrupts can be redistributed heavy first for optimal isolation.
514  */
515 uint32_t
516 intr_dist_cpuid(void)
517 {
518 	static struct cpu	*curr_cpu;
519 	struct cpu		*start_cpu;
520 	struct cpu		*new_cpu;
521 	struct cpu		*cp;
522 	int			cpuid = -1;
523 
524 	/* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
525 	mutex_enter(&intr_dist_cpu_lock);
526 
527 	switch (intr_policy) {
528 	case INTR_CURRENT_CPU:
529 		cpuid = CPU->cpu_id;
530 		break;
531 
532 	case INTR_BOOT_CPU:
533 		panic("INTR_BOOT_CPU no longer supported.");
534 		/*NOTREACHED*/
535 
536 	case INTR_FLAT_DIST:
537 	case INTR_WEIGHTED_DIST:
538 	default:
539 		/*
540 		 * Ensure that curr_cpu is valid - cpu_next will be NULL if
541 		 * the cpu has been deleted (cpu structs are never freed).
542 		 */
543 		if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
544 			curr_cpu = CPU;
545 
546 		/*
547 		 * Advance to online CPU after curr_cpu (round-robin). For
548 		 * INTR_WEIGHTED_DIST we choose the cpu with the lightest
549 		 * weight.  For a nexus that does not support weight the
550 		 * default weight of zero is used. We degrade to round-robin
551 		 * behavior among equal weightes.  The default weight is zero
552 		 * and round-robin behavior continues.
553 		 *
554 		 * Disable preemption while traversing cpu_next_onln to
555 		 * ensure the list does not change.  This works because
556 		 * modifiers of this list and other lists in a struct cpu
557 		 * call pause_cpus() before making changes.
558 		 */
559 		kpreempt_disable();
560 		cp = start_cpu = curr_cpu->cpu_next_onln;
561 		new_cpu = NULL;
562 		do {
563 			/* Skip CPUs with interrupts disabled */
564 			if ((cp->cpu_flags & CPU_ENABLE) == 0)
565 				continue;
566 
567 			if (intr_policy == INTR_FLAT_DIST) {
568 				/* select CPU */
569 				new_cpu = cp;
570 				break;
571 			} else if ((new_cpu == NULL) ||
572 			    (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
573 				/* Choose if lighter weight */
574 				new_cpu = cp;
575 			}
576 		} while ((cp = cp->cpu_next_onln) != start_cpu);
577 		ASSERT(new_cpu);
578 		cpuid = new_cpu->cpu_id;
579 
580 		INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
581 		    "targeted\n", cpuid, new_cpu->cpu_intr_weight));
582 
583 		/* update static pointer for next round-robin */
584 		curr_cpu = new_cpu;
585 		kpreempt_enable();
586 		break;
587 	}
588 	mutex_exit(&intr_dist_cpu_lock);
589 	return (cpuid);
590 }
591 
592 /*
593  * Add or remove the the weight of a device from a CPUs interrupt weight.
594  *
595  * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
596  * their children to improve the overall quality of interrupt initialization.
597  *
598  * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
599  * among multiple devices (sharing ino) then the nexus should call
600  * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
601  * that share must specify the same cpuid.
602  *
603  * If a nexus driver is unable to determine the cpu at remove_intr time
604  * for some of its interrupts, then it should not call add_device_weight -
605  * intr_dist_cpuid will still provide round-robin.
606  *
607  * An established device weight (from dev_info node) takes precedence over
608  * the weight passed in.  If a device weight is not already established
609  * then the passed in nexus weight is established.
610  */
611 void
612 intr_dist_cpuid_add_device_weight(uint32_t cpuid,
613     dev_info_t *dip, int32_t nweight)
614 {
615 	int32_t		eweight;
616 
617 	/*
618 	 * For non-weighted policy everything has weight of zero (and we get
619 	 * round-robin distribution from intr_dist_cpuid).
620 	 * NB: intr_policy is limited to this file. A weighted nexus driver is
621 	 * calls this rouitne even if intr_policy has been patched to
622 	 * INTR_FLAG_DIST.
623 	 */
624 	ASSERT(dip);
625 	if (intr_policy != INTR_WEIGHTED_DIST)
626 		return;
627 
628 	eweight = i_ddi_get_intr_weight(dip);
629 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
630 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
631 	    nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
632 	    ddi_get_instance(ddi_get_parent(dip)),
633 	    ddi_driver_name(dip), ddi_get_instance(dip)));
634 
635 	/* if no establish weight, establish nexus weight */
636 	if (eweight < 0) {
637 		if (nweight > 0)
638 			(void) i_ddi_set_intr_weight(dip, nweight);
639 		else
640 			nweight = 0;
641 	} else
642 		nweight = eweight;	/* use established weight */
643 
644 	/* Establish exclusion for cpu_intr_weight manipulation */
645 	mutex_enter(&intr_dist_cpu_lock);
646 	cpu[cpuid]->cpu_intr_weight += nweight;
647 
648 	/* update intr_dist_weight_max */
649 	if (nweight > intr_dist_weight_max)
650 		intr_dist_weight_max = nweight;
651 	mutex_exit(&intr_dist_cpu_lock);
652 }
653 
654 void
655 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
656 {
657 	struct cpu	*cp;
658 	int32_t		weight;
659 
660 	ASSERT(dip);
661 	if (intr_policy != INTR_WEIGHTED_DIST)
662 		return;
663 
664 	/* remove weight of device from cpu */
665 	weight = i_ddi_get_intr_weight(dip);
666 	if (weight < 0)
667 		weight = 0;
668 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d    for "
669 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
670 	    ddi_driver_name(ddi_get_parent(dip)),
671 	    ddi_get_instance(ddi_get_parent(dip)),
672 	    ddi_driver_name(dip), ddi_get_instance(dip)));
673 
674 	/* Establish exclusion for cpu_intr_weight manipulation */
675 	mutex_enter(&intr_dist_cpu_lock);
676 	cp = cpu[cpuid];
677 	cp->cpu_intr_weight -= weight;
678 	if (cp->cpu_intr_weight < 0)
679 		cp->cpu_intr_weight = 0;	/* sanity */
680 	mutex_exit(&intr_dist_cpu_lock);
681 }
682