xref: /titanic_50/usr/src/uts/i86pc/os/intr.c (revision fea9cb91bd8e12d84069b4dab1268363668b4bff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/cpuvar.h>
30 #include <sys/regset.h>
31 #include <sys/psw.h>
32 #include <sys/types.h>
33 #include <sys/thread.h>
34 #include <sys/systm.h>
35 #include <sys/segments.h>
36 #include <sys/pcb.h>
37 #include <sys/trap.h>
38 #include <sys/ftrace.h>
39 #include <sys/traptrace.h>
40 #include <sys/clock.h>
41 #include <sys/panic.h>
42 #include <sys/disp.h>
43 #include <vm/seg_kp.h>
44 #include <sys/stack.h>
45 #include <sys/sysmacros.h>
46 #include <sys/cmn_err.h>
47 #include <sys/kstat.h>
48 #include <sys/smp_impldefs.h>
49 #include <sys/pool_pset.h>
50 #include <sys/zone.h>
51 #include <sys/bitmap.h>
52 
53 #if defined(__amd64)
54 
55 #if defined(__lint)
56 /*
57  * atomic_btr32() is a gcc __inline__ function, defined in <asm/bitmap.h>
58  * For lint purposes, define it here.
59  */
60 uint_t
61 atomic_btr32(uint32_t *pending, uint_t pil)
62 {
63 	return (*pending &= ~(1 << pil));
64 }
65 #else
66 
67 extern uint_t atomic_btr32(uint32_t *pending, uint_t pil);
68 
69 #endif
70 
71 /*
72  * This code is amd64-only for now, but as time permits, we should
73  * use this on i386 too.
74  */
75 
76 /*
77  * Some questions to ponder:
78  * -	in several of these routines, we make multiple calls to tsc_read()
79  *	without invoking functions .. couldn't we just reuse the same
80  *	timestamp sometimes?
81  * -	if we have the inline, we can probably make set_base_spl be a
82  *	C routine too.
83  */
84 
85 static uint_t
86 bsrw_insn(uint16_t mask)
87 {
88 	uint_t index = sizeof (mask) * NBBY - 1;
89 
90 	ASSERT(mask != 0);
91 
92 	while ((mask & (1 << index)) == 0)
93 		index--;
94 	return (index);
95 }
96 
97 /*
98  * Do all the work necessary to set up the cpu and thread structures
99  * to dispatch a high-level interrupt.
100  *
101  * Returns 0 if we're -not- already on the high-level interrupt stack,
102  * (and *must* switch to it), non-zero if we are already on that stack.
103  *
104  * Called with interrupts masked.
105  * The 'pil' is already set to the appropriate level for rp->r_trapno.
106  */
107 int
108 hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil, struct regs *rp)
109 {
110 	struct machcpu *mcpu = &cpu->cpu_m;
111 	uint_t mask;
112 	hrtime_t intrtime;
113 
114 	ASSERT(pil > LOCK_LEVEL);
115 
116 	if (pil == CBE_HIGH_PIL) {
117 		cpu->cpu_profile_pil = oldpil;
118 		if (USERMODE(rp->r_cs)) {
119 			cpu->cpu_profile_pc = 0;
120 			cpu->cpu_profile_upc = rp->r_pc;
121 		} else {
122 			cpu->cpu_profile_pc = rp->r_pc;
123 			cpu->cpu_profile_upc = 0;
124 		}
125 	}
126 
127 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
128 	if (mask != 0) {
129 		int nestpil;
130 
131 		/*
132 		 * We have interrupted another high-level interrupt.
133 		 * Load starting timestamp, compute interval, update
134 		 * cumulative counter.
135 		 */
136 		nestpil = bsrw_insn((uint16_t)mask);
137 		ASSERT(nestpil < pil);
138 		intrtime = tsc_read() -
139 		    mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
140 		mcpu->intrstat[nestpil][0] += intrtime;
141 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
142 		/*
143 		 * Another high-level interrupt is active below this one, so
144 		 * there is no need to check for an interrupt thread.  That
145 		 * will be done by the lowest priority high-level interrupt
146 		 * active.
147 		 */
148 	} else {
149 		kthread_t *t = cpu->cpu_thread;
150 
151 		/*
152 		 * See if we are interrupting a low-level interrupt thread.
153 		 * If so, account for its time slice only if its time stamp
154 		 * is non-zero.
155 		 */
156 		if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
157 			intrtime = tsc_read() - t->t_intr_start;
158 			mcpu->intrstat[t->t_pil][0] += intrtime;
159 			cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
160 			t->t_intr_start = 0;
161 		}
162 	}
163 
164 	/*
165 	 * Store starting timestamp in CPU structure for this PIL.
166 	 */
167 	mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = tsc_read();
168 
169 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
170 
171 	if (pil == 15) {
172 		/*
173 		 * To support reentrant level 15 interrupts, we maintain a
174 		 * recursion count in the top half of cpu_intr_actv.  Only
175 		 * when this count hits zero do we clear the PIL 15 bit from
176 		 * the lower half of cpu_intr_actv.
177 		 */
178 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
179 		(*refcntp)++;
180 	}
181 
182 	mask = cpu->cpu_intr_actv;
183 
184 	cpu->cpu_intr_actv |= (1 << pil);
185 
186 	return (mask & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
187 }
188 
189 /*
190  * Does most of the work of returning from a high level interrupt.
191  *
192  * Returns 0 if there are no more high level interrupts (in which
193  * case we must switch back to the interrupted thread stack) or
194  * non-zero if there are more (in which case we should stay on it).
195  *
196  * Called with interrupts masked
197  */
198 int
199 hilevel_intr_epilog(struct cpu *cpu, uint_t pil, uint_t oldpil, uint_t vecnum)
200 {
201 	struct machcpu *mcpu = &cpu->cpu_m;
202 	uint_t mask;
203 	hrtime_t intrtime;
204 
205 	ASSERT(mcpu->mcpu_pri == pil);
206 
207 	cpu->cpu_stats.sys.intr[pil - 1]++;
208 
209 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
210 
211 	if (pil == 15) {
212 		/*
213 		 * To support reentrant level 15 interrupts, we maintain a
214 		 * recursion count in the top half of cpu_intr_actv.  Only
215 		 * when this count hits zero do we clear the PIL 15 bit from
216 		 * the lower half of cpu_intr_actv.
217 		 */
218 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
219 
220 		ASSERT(*refcntp > 0);
221 
222 		if (--(*refcntp) == 0)
223 			cpu->cpu_intr_actv &= ~(1 << pil);
224 	} else {
225 		cpu->cpu_intr_actv &= ~(1 << pil);
226 	}
227 
228 	ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
229 
230 	intrtime = tsc_read() - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
231 	mcpu->intrstat[pil][0] += intrtime;
232 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
233 
234 	/*
235 	 * Check for lower-pil nested high-level interrupt beneath
236 	 * current one.  If so, place a starting timestamp in its
237 	 * pil_high_start entry.
238 	 */
239 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
240 	if (mask != 0) {
241 		int nestpil;
242 
243 		/*
244 		 * find PIL of nested interrupt
245 		 */
246 		nestpil = bsrw_insn((uint16_t)mask);
247 		ASSERT(nestpil < pil);
248 		mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = tsc_read();
249 		/*
250 		 * (Another high-level interrupt is active below this one,
251 		 * so there is no need to check for an interrupt
252 		 * thread.  That will be done by the lowest priority
253 		 * high-level interrupt active.)
254 		 */
255 	} else {
256 		/*
257 		 * Check to see if there is a low-level interrupt active.
258 		 * If so, place a starting timestamp in the thread
259 		 * structure.
260 		 */
261 		kthread_t *t = cpu->cpu_thread;
262 
263 		if (t->t_flag & T_INTR_THREAD)
264 			t->t_intr_start = tsc_read();
265 	}
266 
267 	mcpu->mcpu_pri = oldpil;
268 	(void) (*setlvlx)(oldpil, vecnum);
269 
270 	return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
271 }
272 
273 /*
274  * Set up the cpu, thread and interrupt thread structures for
275  * executing an interrupt thread.  The new stack pointer of the
276  * interrupt thread (which *must* be switched to) is returned.
277  */
278 caddr_t
279 intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil)
280 {
281 	struct machcpu *mcpu = &cpu->cpu_m;
282 	kthread_t *t, *volatile it;
283 
284 	ASSERT(pil > 0);
285 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
286 	cpu->cpu_intr_actv |= (1 << pil);
287 
288 	/*
289 	 * Get set to run an interrupt thread.
290 	 * There should always be an interrupt thread, since we
291 	 * allocate one for each level on each CPU.
292 	 *
293 	 * t_intr_start could be zero due to cpu_intr_swtch_enter.
294 	 */
295 	t = cpu->cpu_thread;
296 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
297 		hrtime_t intrtime = tsc_read() - t->t_intr_start;
298 		mcpu->intrstat[t->t_pil][0] += intrtime;
299 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
300 		t->t_intr_start = 0;
301 	}
302 
303 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
304 
305 	t->t_sp = (uintptr_t)stackptr;	/* mark stack in curthread for resume */
306 
307 	/*
308 	 * unlink the interrupt thread off the cpu
309 	 *
310 	 * Note that the code in kcpc_overflow_intr -relies- on the
311 	 * ordering of events here - in particular that t->t_lwp of
312 	 * the interrupt thread is set to the pinned thread *before*
313 	 * curthread is changed.
314 	 */
315 	it = cpu->cpu_intr_thread;
316 	cpu->cpu_intr_thread = it->t_link;
317 	it->t_intr = t;
318 	it->t_lwp = t->t_lwp;
319 
320 	/*
321 	 * (threads on the interrupt thread free list could have state
322 	 * preset to TS_ONPROC, but it helps in debugging if
323 	 * they're TS_FREE.)
324 	 */
325 	it->t_state = TS_ONPROC;
326 
327 	cpu->cpu_thread = it;		/* new curthread on this cpu */
328 	it->t_pil = (uchar_t)pil;
329 	it->t_pri = intr_pri + (pri_t)pil;
330 	it->t_intr_start = tsc_read();
331 
332 	return (it->t_stk);
333 }
334 
335 
336 #ifdef DEBUG
337 int intr_thread_cnt;
338 #endif
339 
340 /*
341  * Called with interrupts disabled
342  */
343 void
344 intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil)
345 {
346 	struct machcpu *mcpu = &cpu->cpu_m;
347 	kthread_t *t;
348 	kthread_t *it = cpu->cpu_thread;	/* curthread */
349 	uint_t pil, basespl;
350 	hrtime_t intrtime;
351 
352 	pil = it->t_pil;
353 	cpu->cpu_stats.sys.intr[pil - 1]++;
354 
355 	ASSERT(it->t_intr_start != 0);
356 	intrtime = tsc_read() - it->t_intr_start;
357 	mcpu->intrstat[pil][0] += intrtime;
358 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
359 
360 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
361 	cpu->cpu_intr_actv &= ~(1 << pil);
362 
363 	/*
364 	 * If there is still an interrupted thread underneath this one
365 	 * then the interrupt was never blocked and the return is
366 	 * fairly simple.  Otherwise it isn't.
367 	 */
368 	if ((t = it->t_intr) == NULL) {
369 		/*
370 		 * The interrupted thread is no longer pinned underneath
371 		 * the interrupt thread.  This means the interrupt must
372 		 * have blocked, and the interrupted thread has been
373 		 * unpinned, and has probably been running around the
374 		 * system for a while.
375 		 *
376 		 * Since there is no longer a thread under this one, put
377 		 * this interrupt thread back on the CPU's free list and
378 		 * resume the idle thread which will dispatch the next
379 		 * thread to run.
380 		 */
381 #ifdef DEBUG
382 		intr_thread_cnt++;
383 #endif
384 		cpu->cpu_stats.sys.intrblk++;
385 		/*
386 		 * Set CPU's base SPL based on active interrupts bitmask
387 		 */
388 		set_base_spl();
389 		basespl = cpu->cpu_base_spl;
390 		mcpu->mcpu_pri = basespl;
391 		(*setlvlx)(basespl, vec);
392 		(void) splhigh();
393 		it->t_state = TS_FREE;
394 		/*
395 		 * Return interrupt thread to pool
396 		 */
397 		it->t_link = cpu->cpu_intr_thread;
398 		cpu->cpu_intr_thread = it;
399 		swtch();
400 		/*NOTREACHED*/
401 	}
402 
403 	/*
404 	 * Return interrupt thread to the pool
405 	 */
406 	it->t_link = cpu->cpu_intr_thread;
407 	cpu->cpu_intr_thread = it;
408 	it->t_state = TS_FREE;
409 
410 	basespl = cpu->cpu_base_spl;
411 	pil = MAX(oldpil, basespl);
412 	mcpu->mcpu_pri = pil;
413 	(*setlvlx)(pil, vec);
414 	t->t_intr_start = tsc_read();
415 	cpu->cpu_thread = t;
416 }
417 
418 /*
419  * Called with interrupts disabled by an interrupt thread to determine
420  * how much time has elapsed. See interrupt.s:intr_get_time() for detailed
421  * theory of operation.
422  */
423 uint64_t
424 intr_thread_get_time(struct cpu *cpu)
425 {
426 	struct machcpu *mcpu = &cpu->cpu_m;
427 	kthread_t *t = cpu->cpu_thread;
428 	uint64_t time, delta, ret;
429 	uint_t pil = t->t_pil;
430 
431 	ASSERT((cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK) == 0);
432 	ASSERT(t->t_flag & T_INTR_THREAD);
433 	ASSERT(pil != 0);
434 	ASSERT(t->t_intr_start != 0);
435 
436 	time = tsc_read();
437 	delta = time - t->t_intr_start;
438 	t->t_intr_start = time;
439 
440 	time = mcpu->intrstat[pil][0] + delta;
441 	ret = time - mcpu->intrstat[pil][1];
442 	mcpu->intrstat[pil][0] = time;
443 	mcpu->intrstat[pil][1] = time;
444 
445 	return (ret);
446 }
447 
448 caddr_t
449 dosoftint_prolog(
450 	struct cpu *cpu,
451 	caddr_t stackptr,
452 	uint32_t st_pending,
453 	uint_t oldpil)
454 {
455 	kthread_t *t, *volatile it;
456 	struct machcpu *mcpu = &cpu->cpu_m;
457 	uint_t pil;
458 
459 top:
460 	ASSERT(st_pending == mcpu->mcpu_softinfo.st_pending);
461 
462 	pil = bsrw_insn((uint16_t)st_pending);
463 	if (pil <= oldpil || pil <= cpu->cpu_base_spl)
464 		return (0);
465 
466 	/*
467 	 * XX64	Sigh.
468 	 *
469 	 * This is a transliteration of the i386 assembler code for
470 	 * soft interrupts.  One question is "why does this need
471 	 * to be atomic?"  One possible race is -other- processors
472 	 * posting soft interrupts to us in set_pending() i.e. the
473 	 * CPU might get preempted just after the address computation,
474 	 * but just before the atomic transaction, so another CPU would
475 	 * actually set the original CPU's st_pending bit.  However,
476 	 * it looks like it would be simpler to disable preemption there.
477 	 * Are there other races for which preemption control doesn't work?
478 	 *
479 	 * The i386 assembler version -also- checks to see if the bit
480 	 * being cleared was actually set; if it wasn't, it rechecks
481 	 * for more.  This seems a bit strange, as the only code that
482 	 * ever clears the bit is -this- code running with interrupts
483 	 * disabled on -this- CPU.  This code would probably be cheaper:
484 	 *
485 	 * atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending,
486 	 *   ~(1 << pil));
487 	 *
488 	 * and t->t_preempt--/++ around set_pending() even cheaper,
489 	 * but at this point, correctness is critical, so we slavishly
490 	 * emulate the i386 port.
491 	 */
492 	if (atomic_btr32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, pil)
493 	    == 0) {
494 		st_pending = mcpu->mcpu_softinfo.st_pending;
495 		goto top;
496 	}
497 
498 	mcpu->mcpu_pri = pil;
499 	(*setspl)(pil);
500 
501 	/*
502 	 * Get set to run interrupt thread.
503 	 * There should always be an interrupt thread since we
504 	 * allocate one for each level on the CPU.
505 	 */
506 	it = cpu->cpu_intr_thread;
507 	cpu->cpu_intr_thread = it->t_link;
508 
509 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
510 	t = cpu->cpu_thread;
511 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
512 		hrtime_t intrtime = tsc_read() - t->t_intr_start;
513 		mcpu->intrstat[pil][0] += intrtime;
514 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
515 		t->t_intr_start = 0;
516 	}
517 
518 	/*
519 	 * Note that the code in kcpc_overflow_intr -relies- on the
520 	 * ordering of events here - in particular that t->t_lwp of
521 	 * the interrupt thread is set to the pinned thread *before*
522 	 * curthread is changed.
523 	 */
524 	it->t_lwp = t->t_lwp;
525 	it->t_state = TS_ONPROC;
526 
527 	/*
528 	 * Push interrupted thread onto list from new thread.
529 	 * Set the new thread as the current one.
530 	 * Set interrupted thread's T_SP because if it is the idle thread,
531 	 * resume() may use that stack between threads.
532 	 */
533 
534 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
535 	t->t_sp = (uintptr_t)stackptr;
536 
537 	it->t_intr = t;
538 	cpu->cpu_thread = it;
539 
540 	/*
541 	 * Set bit for this pil in CPU's interrupt active bitmask.
542 	 */
543 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
544 	cpu->cpu_intr_actv |= (1 << pil);
545 
546 	/*
547 	 * Initialize thread priority level from intr_pri
548 	 */
549 	it->t_pil = (uchar_t)pil;
550 	it->t_pri = (pri_t)pil + intr_pri;
551 	it->t_intr_start = tsc_read();
552 
553 	return (it->t_stk);
554 }
555 
556 void
557 dosoftint_epilog(struct cpu *cpu, uint_t oldpil)
558 {
559 	struct machcpu *mcpu = &cpu->cpu_m;
560 	kthread_t *t, *it;
561 	uint_t pil, basespl;
562 	hrtime_t intrtime;
563 
564 	it = cpu->cpu_thread;
565 	pil = it->t_pil;
566 
567 	cpu->cpu_stats.sys.intr[pil - 1]++;
568 
569 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
570 	cpu->cpu_intr_actv &= ~(1 << pil);
571 	intrtime = tsc_read() - it->t_intr_start;
572 	mcpu->intrstat[pil][0] += intrtime;
573 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
574 
575 	/*
576 	 * If there is still an interrupted thread underneath this one
577 	 * then the interrupt was never blocked and the return is
578 	 * fairly simple.  Otherwise it isn't.
579 	 */
580 	if ((t = it->t_intr) == NULL) {
581 		/*
582 		 * Put thread back on the interrupt thread list.
583 		 * This was an interrupt thread, so set CPU's base SPL.
584 		 */
585 		set_base_spl();
586 		it->t_state = TS_FREE;
587 		it->t_link = cpu->cpu_intr_thread;
588 		cpu->cpu_intr_thread = it;
589 		(void) splhigh();
590 		swtch();
591 		/*NOTREACHED*/
592 	}
593 	it->t_link = cpu->cpu_intr_thread;
594 	cpu->cpu_intr_thread = it;
595 	it->t_state = TS_FREE;
596 	cpu->cpu_thread = t;
597 	if (t->t_flag & T_INTR_THREAD)
598 		t->t_intr_start = tsc_read();
599 	basespl = cpu->cpu_base_spl;
600 	pil = MAX(oldpil, basespl);
601 	mcpu->mcpu_pri = pil;
602 	(*setspl)(pil);
603 }
604 
605 /*
606  * Make the interrupted thread 'to' be runnable.
607  *
608  * Since t->t_sp has already been saved, t->t_pc is all
609  * that needs to be set in this function.
610  *
611  * Returns the interrupt level of the interrupt thread.
612  */
613 int
614 intr_passivate(
615 	kthread_t *it,		/* interrupt thread */
616 	kthread_t *t)		/* interrupted thread */
617 {
618 	extern void _sys_rtt();
619 
620 	ASSERT(it->t_flag & T_INTR_THREAD);
621 	ASSERT(SA(t->t_sp) == t->t_sp);
622 
623 	t->t_pc = (uintptr_t)_sys_rtt;
624 	return (it->t_pil);
625 }
626 
627 #endif	/* __amd64 */
628 
629 /*
630  * Allocate threads and stacks for interrupt handling.
631  */
632 #define	NINTR_THREADS	(LOCK_LEVEL-1)	/* number of interrupt threads */
633 
634 void
635 init_intr_threads(struct cpu *cp)
636 {
637 	int i;
638 
639 	for (i = 0; i < NINTR_THREADS; i++)
640 		thread_create_intr(cp);
641 
642 	cp->cpu_intr_stack = (caddr_t)segkp_get(segkp, INTR_STACK_SIZE,
643 		KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED) +
644 		INTR_STACK_SIZE - SA(MINFRAME);
645 }
646 
647 /*
648  * Create interrupt kstats for this CPU.
649  */
650 void
651 cpu_create_intrstat(cpu_t *cp)
652 {
653 	int		i;
654 	kstat_t		*intr_ksp;
655 	kstat_named_t	*knp;
656 	char		name[KSTAT_STRLEN];
657 	zoneid_t	zoneid;
658 
659 	ASSERT(MUTEX_HELD(&cpu_lock));
660 
661 	if (pool_pset_enabled())
662 		zoneid = GLOBAL_ZONEID;
663 	else
664 		zoneid = ALL_ZONES;
665 
666 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
667 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
668 
669 	/*
670 	 * Initialize each PIL's named kstat
671 	 */
672 	if (intr_ksp != NULL) {
673 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
674 		knp = (kstat_named_t *)intr_ksp->ks_data;
675 		intr_ksp->ks_private = cp;
676 		for (i = 0; i < PIL_MAX; i++) {
677 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
678 			    i + 1);
679 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
680 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
681 			    i + 1);
682 			kstat_named_init(&knp[(i * 2) + 1], name,
683 			    KSTAT_DATA_UINT64);
684 		}
685 		kstat_install(intr_ksp);
686 	}
687 }
688 
689 /*
690  * Delete interrupt kstats for this CPU.
691  */
692 void
693 cpu_delete_intrstat(cpu_t *cp)
694 {
695 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
696 }
697 
698 /*
699  * Convert interrupt statistics from CPU ticks to nanoseconds and
700  * update kstat.
701  */
702 int
703 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
704 {
705 	kstat_named_t	*knp = ksp->ks_data;
706 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
707 	int		i;
708 	hrtime_t	hrt;
709 
710 	if (rw == KSTAT_WRITE)
711 		return (EACCES);
712 
713 	for (i = 0; i < PIL_MAX; i++) {
714 		hrt = (hrtime_t)cpup->cpu_m.intrstat[i + 1][0];
715 		tsc_scalehrtime(&hrt);
716 		knp[i * 2].value.ui64 = (uint64_t)hrt;
717 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
718 	}
719 
720 	return (0);
721 }
722 
723 /*
724  * An interrupt thread is ending a time slice, so compute the interval it
725  * ran for and update the statistic for its PIL.
726  */
727 void
728 cpu_intr_swtch_enter(kthread_id_t t)
729 {
730 	uint64_t	interval;
731 	uint64_t	start;
732 	cpu_t		*cpu;
733 
734 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
735 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
736 
737 	/*
738 	 * We could be here with a zero timestamp. This could happen if:
739 	 * an interrupt thread which no longer has a pinned thread underneath
740 	 * it (i.e. it blocked at some point in its past) has finished running
741 	 * its handler. intr_thread() updated the interrupt statistic for its
742 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
743 	 * return to, swtch() gets called and we end up here.
744 	 *
745 	 * Note that we use atomic ops below (cas64 and atomic_add_64), which
746 	 * we don't use in the functions above, because we're not called
747 	 * with interrupts blocked, but the epilog/prolog functions are.
748 	 */
749 	if (t->t_intr_start) {
750 		do {
751 			start = t->t_intr_start;
752 			interval = tsc_read() - start;
753 		} while (cas64(&t->t_intr_start, start, 0) != start);
754 		cpu = CPU;
755 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
756 
757 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
758 		    interval);
759 	} else
760 		ASSERT(t->t_intr == NULL);
761 }
762 
763 /*
764  * An interrupt thread is returning from swtch(). Place a starting timestamp
765  * in its thread structure.
766  */
767 void
768 cpu_intr_swtch_exit(kthread_id_t t)
769 {
770 	uint64_t ts;
771 
772 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
773 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
774 
775 	do {
776 		ts = t->t_intr_start;
777 	} while (cas64(&t->t_intr_start, ts, tsc_read()) != ts);
778 }
779