1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/cpuvar.h> 29 #include <sys/regset.h> 30 #include <sys/psw.h> 31 #include <sys/types.h> 32 #include <sys/thread.h> 33 #include <sys/systm.h> 34 #include <sys/segments.h> 35 #include <sys/pcb.h> 36 #include <sys/trap.h> 37 #include <sys/ftrace.h> 38 #include <sys/traptrace.h> 39 #include <sys/clock.h> 40 #include <sys/panic.h> 41 #include <sys/disp.h> 42 #include <vm/seg_kp.h> 43 #include <sys/stack.h> 44 #include <sys/sysmacros.h> 45 #include <sys/cmn_err.h> 46 #include <sys/kstat.h> 47 #include <sys/smp_impldefs.h> 48 #include <sys/pool_pset.h> 49 #include <sys/zone.h> 50 #include <sys/bitmap.h> 51 #include <sys/archsystm.h> 52 #include <sys/machsystm.h> 53 #include <sys/ontrap.h> 54 #include <sys/x86_archext.h> 55 #include <sys/promif.h> 56 57 58 /* 59 * Set cpu's base SPL level to the highest active interrupt level 60 */ 61 void 62 set_base_spl(void) 63 { 64 struct cpu *cpu = CPU; 65 uint16_t active = (uint16_t)cpu->cpu_intr_actv; 66 67 cpu->cpu_base_spl = active == 0 ? 0 : bsrw_insn(active); 68 } 69 70 /* 71 * Do all the work necessary to set up the cpu and thread structures 72 * to dispatch a high-level interrupt. 73 * 74 * Returns 0 if we're -not- already on the high-level interrupt stack, 75 * (and *must* switch to it), non-zero if we are already on that stack. 76 * 77 * Called with interrupts masked. 78 * The 'pil' is already set to the appropriate level for rp->r_trapno. 79 */ 80 static int 81 hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil, struct regs *rp) 82 { 83 struct machcpu *mcpu = &cpu->cpu_m; 84 uint_t mask; 85 hrtime_t intrtime; 86 hrtime_t now = tsc_read(); 87 88 ASSERT(pil > LOCK_LEVEL); 89 90 if (pil == CBE_HIGH_PIL) { 91 cpu->cpu_profile_pil = oldpil; 92 if (USERMODE(rp->r_cs)) { 93 cpu->cpu_profile_pc = 0; 94 cpu->cpu_profile_upc = rp->r_pc; 95 } else { 96 cpu->cpu_profile_pc = rp->r_pc; 97 cpu->cpu_profile_upc = 0; 98 } 99 } 100 101 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK; 102 if (mask != 0) { 103 int nestpil; 104 105 /* 106 * We have interrupted another high-level interrupt. 107 * Load starting timestamp, compute interval, update 108 * cumulative counter. 109 */ 110 nestpil = bsrw_insn((uint16_t)mask); 111 ASSERT(nestpil < pil); 112 intrtime = now - 113 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)]; 114 mcpu->intrstat[nestpil][0] += intrtime; 115 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 116 /* 117 * Another high-level interrupt is active below this one, so 118 * there is no need to check for an interrupt thread. That 119 * will be done by the lowest priority high-level interrupt 120 * active. 121 */ 122 } else { 123 kthread_t *t = cpu->cpu_thread; 124 125 /* 126 * See if we are interrupting a low-level interrupt thread. 127 * If so, account for its time slice only if its time stamp 128 * is non-zero. 129 */ 130 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) { 131 intrtime = now - t->t_intr_start; 132 mcpu->intrstat[t->t_pil][0] += intrtime; 133 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 134 t->t_intr_start = 0; 135 } 136 } 137 138 /* 139 * Store starting timestamp in CPU structure for this PIL. 140 */ 141 mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now; 142 143 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 144 145 if (pil == 15) { 146 /* 147 * To support reentrant level 15 interrupts, we maintain a 148 * recursion count in the top half of cpu_intr_actv. Only 149 * when this count hits zero do we clear the PIL 15 bit from 150 * the lower half of cpu_intr_actv. 151 */ 152 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1; 153 (*refcntp)++; 154 } 155 156 mask = cpu->cpu_intr_actv; 157 158 cpu->cpu_intr_actv |= (1 << pil); 159 160 return (mask & CPU_INTR_ACTV_HIGH_LEVEL_MASK); 161 } 162 163 /* 164 * Does most of the work of returning from a high level interrupt. 165 * 166 * Returns 0 if there are no more high level interrupts (in which 167 * case we must switch back to the interrupted thread stack) or 168 * non-zero if there are more (in which case we should stay on it). 169 * 170 * Called with interrupts masked 171 */ 172 static int 173 hilevel_intr_epilog(struct cpu *cpu, uint_t pil, uint_t oldpil, uint_t vecnum) 174 { 175 struct machcpu *mcpu = &cpu->cpu_m; 176 uint_t mask; 177 hrtime_t intrtime; 178 hrtime_t now = tsc_read(); 179 180 ASSERT(mcpu->mcpu_pri == pil); 181 182 cpu->cpu_stats.sys.intr[pil - 1]++; 183 184 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 185 186 if (pil == 15) { 187 /* 188 * To support reentrant level 15 interrupts, we maintain a 189 * recursion count in the top half of cpu_intr_actv. Only 190 * when this count hits zero do we clear the PIL 15 bit from 191 * the lower half of cpu_intr_actv. 192 */ 193 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1; 194 195 ASSERT(*refcntp > 0); 196 197 if (--(*refcntp) == 0) 198 cpu->cpu_intr_actv &= ~(1 << pil); 199 } else { 200 cpu->cpu_intr_actv &= ~(1 << pil); 201 } 202 203 ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0); 204 205 intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)]; 206 mcpu->intrstat[pil][0] += intrtime; 207 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 208 209 /* 210 * Check for lower-pil nested high-level interrupt beneath 211 * current one. If so, place a starting timestamp in its 212 * pil_high_start entry. 213 */ 214 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK; 215 if (mask != 0) { 216 int nestpil; 217 218 /* 219 * find PIL of nested interrupt 220 */ 221 nestpil = bsrw_insn((uint16_t)mask); 222 ASSERT(nestpil < pil); 223 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now; 224 /* 225 * (Another high-level interrupt is active below this one, 226 * so there is no need to check for an interrupt 227 * thread. That will be done by the lowest priority 228 * high-level interrupt active.) 229 */ 230 } else { 231 /* 232 * Check to see if there is a low-level interrupt active. 233 * If so, place a starting timestamp in the thread 234 * structure. 235 */ 236 kthread_t *t = cpu->cpu_thread; 237 238 if (t->t_flag & T_INTR_THREAD) 239 t->t_intr_start = now; 240 } 241 242 mcpu->mcpu_pri = oldpil; 243 (void) (*setlvlx)(oldpil, vecnum); 244 245 return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK); 246 } 247 248 /* 249 * Set up the cpu, thread and interrupt thread structures for 250 * executing an interrupt thread. The new stack pointer of the 251 * interrupt thread (which *must* be switched to) is returned. 252 */ 253 static caddr_t 254 intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil) 255 { 256 struct machcpu *mcpu = &cpu->cpu_m; 257 kthread_t *t, *volatile it; 258 hrtime_t now = tsc_read(); 259 260 ASSERT(pil > 0); 261 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 262 cpu->cpu_intr_actv |= (1 << pil); 263 264 /* 265 * Get set to run an interrupt thread. 266 * There should always be an interrupt thread, since we 267 * allocate one for each level on each CPU. 268 * 269 * t_intr_start could be zero due to cpu_intr_swtch_enter. 270 */ 271 t = cpu->cpu_thread; 272 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) { 273 hrtime_t intrtime = now - t->t_intr_start; 274 mcpu->intrstat[t->t_pil][0] += intrtime; 275 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 276 t->t_intr_start = 0; 277 } 278 279 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr); 280 281 t->t_sp = (uintptr_t)stackptr; /* mark stack in curthread for resume */ 282 283 /* 284 * unlink the interrupt thread off the cpu 285 * 286 * Note that the code in kcpc_overflow_intr -relies- on the 287 * ordering of events here - in particular that t->t_lwp of 288 * the interrupt thread is set to the pinned thread *before* 289 * curthread is changed. 290 */ 291 it = cpu->cpu_intr_thread; 292 cpu->cpu_intr_thread = it->t_link; 293 it->t_intr = t; 294 it->t_lwp = t->t_lwp; 295 296 /* 297 * (threads on the interrupt thread free list could have state 298 * preset to TS_ONPROC, but it helps in debugging if 299 * they're TS_FREE.) 300 */ 301 it->t_state = TS_ONPROC; 302 303 cpu->cpu_thread = it; /* new curthread on this cpu */ 304 it->t_pil = (uchar_t)pil; 305 it->t_pri = intr_pri + (pri_t)pil; 306 it->t_intr_start = now; 307 308 return (it->t_stk); 309 } 310 311 312 #ifdef DEBUG 313 int intr_thread_cnt; 314 #endif 315 316 /* 317 * Called with interrupts disabled 318 */ 319 static void 320 intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil) 321 { 322 struct machcpu *mcpu = &cpu->cpu_m; 323 kthread_t *t; 324 kthread_t *it = cpu->cpu_thread; /* curthread */ 325 uint_t pil, basespl; 326 hrtime_t intrtime; 327 hrtime_t now = tsc_read(); 328 329 pil = it->t_pil; 330 cpu->cpu_stats.sys.intr[pil - 1]++; 331 332 ASSERT(it->t_intr_start != 0); 333 intrtime = now - it->t_intr_start; 334 mcpu->intrstat[pil][0] += intrtime; 335 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 336 337 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 338 cpu->cpu_intr_actv &= ~(1 << pil); 339 340 /* 341 * If there is still an interrupted thread underneath this one 342 * then the interrupt was never blocked and the return is 343 * fairly simple. Otherwise it isn't. 344 */ 345 if ((t = it->t_intr) == NULL) { 346 /* 347 * The interrupted thread is no longer pinned underneath 348 * the interrupt thread. This means the interrupt must 349 * have blocked, and the interrupted thread has been 350 * unpinned, and has probably been running around the 351 * system for a while. 352 * 353 * Since there is no longer a thread under this one, put 354 * this interrupt thread back on the CPU's free list and 355 * resume the idle thread which will dispatch the next 356 * thread to run. 357 */ 358 #ifdef DEBUG 359 intr_thread_cnt++; 360 #endif 361 cpu->cpu_stats.sys.intrblk++; 362 /* 363 * Set CPU's base SPL based on active interrupts bitmask 364 */ 365 set_base_spl(); 366 basespl = cpu->cpu_base_spl; 367 mcpu->mcpu_pri = basespl; 368 (*setlvlx)(basespl, vec); 369 (void) splhigh(); 370 sti(); 371 it->t_state = TS_FREE; 372 /* 373 * Return interrupt thread to pool 374 */ 375 it->t_link = cpu->cpu_intr_thread; 376 cpu->cpu_intr_thread = it; 377 swtch(); 378 panic("intr_thread_epilog: swtch returned"); 379 /*NOTREACHED*/ 380 } 381 382 /* 383 * Return interrupt thread to the pool 384 */ 385 it->t_link = cpu->cpu_intr_thread; 386 cpu->cpu_intr_thread = it; 387 it->t_state = TS_FREE; 388 389 basespl = cpu->cpu_base_spl; 390 pil = MAX(oldpil, basespl); 391 mcpu->mcpu_pri = pil; 392 (*setlvlx)(pil, vec); 393 t->t_intr_start = now; 394 cpu->cpu_thread = t; 395 } 396 397 /* 398 * intr_get_time() is a resource for interrupt handlers to determine how 399 * much time has been spent handling the current interrupt. Such a function 400 * is needed because higher level interrupts can arrive during the 401 * processing of an interrupt. intr_get_time() only returns time spent in the 402 * current interrupt handler. 403 * 404 * The caller must be calling from an interrupt handler running at a pil 405 * below or at lock level. Timings are not provided for high-level 406 * interrupts. 407 * 408 * The first time intr_get_time() is called while handling an interrupt, 409 * it returns the time since the interrupt handler was invoked. Subsequent 410 * calls will return the time since the prior call to intr_get_time(). Time 411 * is returned as ticks. Use tsc_scalehrtime() to convert ticks to nsec. 412 * 413 * Theory Of Intrstat[][]: 414 * 415 * uint64_t intrstat[pil][0..1] is an array indexed by pil level, with two 416 * uint64_ts per pil. 417 * 418 * intrstat[pil][0] is a cumulative count of the number of ticks spent 419 * handling all interrupts at the specified pil on this CPU. It is 420 * exported via kstats to the user. 421 * 422 * intrstat[pil][1] is always a count of ticks less than or equal to the 423 * value in [0]. The difference between [1] and [0] is the value returned 424 * by a call to intr_get_time(). At the start of interrupt processing, 425 * [0] and [1] will be equal (or nearly so). As the interrupt consumes 426 * time, [0] will increase, but [1] will remain the same. A call to 427 * intr_get_time() will return the difference, then update [1] to be the 428 * same as [0]. Future calls will return the time since the last call. 429 * Finally, when the interrupt completes, [1] is updated to the same as [0]. 430 * 431 * Implementation: 432 * 433 * intr_get_time() works much like a higher level interrupt arriving. It 434 * "checkpoints" the timing information by incrementing intrstat[pil][0] 435 * to include elapsed running time, and by setting t_intr_start to rdtsc. 436 * It then sets the return value to intrstat[pil][0] - intrstat[pil][1], 437 * and updates intrstat[pil][1] to be the same as the new value of 438 * intrstat[pil][0]. 439 * 440 * In the normal handling of interrupts, after an interrupt handler returns 441 * and the code in intr_thread() updates intrstat[pil][0], it then sets 442 * intrstat[pil][1] to the new value of intrstat[pil][0]. When [0] == [1], 443 * the timings are reset, i.e. intr_get_time() will return [0] - [1] which 444 * is 0. 445 * 446 * Whenever interrupts arrive on a CPU which is handling a lower pil 447 * interrupt, they update the lower pil's [0] to show time spent in the 448 * handler that they've interrupted. This results in a growing discrepancy 449 * between [0] and [1], which is returned the next time intr_get_time() is 450 * called. Time spent in the higher-pil interrupt will not be returned in 451 * the next intr_get_time() call from the original interrupt, because 452 * the higher-pil interrupt's time is accumulated in intrstat[higherpil][]. 453 */ 454 uint64_t 455 intr_get_time(void) 456 { 457 struct cpu *cpu; 458 struct machcpu *mcpu; 459 kthread_t *t; 460 uint64_t time, delta, ret; 461 uint_t pil; 462 463 cli(); 464 cpu = CPU; 465 mcpu = &cpu->cpu_m; 466 t = cpu->cpu_thread; 467 pil = t->t_pil; 468 ASSERT((cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK) == 0); 469 ASSERT(t->t_flag & T_INTR_THREAD); 470 ASSERT(pil != 0); 471 ASSERT(t->t_intr_start != 0); 472 473 time = tsc_read(); 474 delta = time - t->t_intr_start; 475 t->t_intr_start = time; 476 477 time = mcpu->intrstat[pil][0] + delta; 478 ret = time - mcpu->intrstat[pil][1]; 479 mcpu->intrstat[pil][0] = time; 480 mcpu->intrstat[pil][1] = time; 481 cpu->cpu_intracct[cpu->cpu_mstate] += delta; 482 483 sti(); 484 return (ret); 485 } 486 487 static caddr_t 488 dosoftint_prolog( 489 struct cpu *cpu, 490 caddr_t stackptr, 491 uint32_t st_pending, 492 uint_t oldpil) 493 { 494 kthread_t *t, *volatile it; 495 struct machcpu *mcpu = &cpu->cpu_m; 496 uint_t pil; 497 hrtime_t now; 498 499 top: 500 ASSERT(st_pending == mcpu->mcpu_softinfo.st_pending); 501 502 pil = bsrw_insn((uint16_t)st_pending); 503 if (pil <= oldpil || pil <= cpu->cpu_base_spl) 504 return (0); 505 506 /* 507 * XX64 Sigh. 508 * 509 * This is a transliteration of the i386 assembler code for 510 * soft interrupts. One question is "why does this need 511 * to be atomic?" One possible race is -other- processors 512 * posting soft interrupts to us in set_pending() i.e. the 513 * CPU might get preempted just after the address computation, 514 * but just before the atomic transaction, so another CPU would 515 * actually set the original CPU's st_pending bit. However, 516 * it looks like it would be simpler to disable preemption there. 517 * Are there other races for which preemption control doesn't work? 518 * 519 * The i386 assembler version -also- checks to see if the bit 520 * being cleared was actually set; if it wasn't, it rechecks 521 * for more. This seems a bit strange, as the only code that 522 * ever clears the bit is -this- code running with interrupts 523 * disabled on -this- CPU. This code would probably be cheaper: 524 * 525 * atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, 526 * ~(1 << pil)); 527 * 528 * and t->t_preempt--/++ around set_pending() even cheaper, 529 * but at this point, correctness is critical, so we slavishly 530 * emulate the i386 port. 531 */ 532 if (atomic_btr32((uint32_t *) 533 &mcpu->mcpu_softinfo.st_pending, pil) == 0) { 534 st_pending = mcpu->mcpu_softinfo.st_pending; 535 goto top; 536 } 537 538 mcpu->mcpu_pri = pil; 539 (*setspl)(pil); 540 541 now = tsc_read(); 542 543 /* 544 * Get set to run interrupt thread. 545 * There should always be an interrupt thread since we 546 * allocate one for each level on the CPU. 547 */ 548 it = cpu->cpu_intr_thread; 549 cpu->cpu_intr_thread = it->t_link; 550 551 /* t_intr_start could be zero due to cpu_intr_swtch_enter. */ 552 t = cpu->cpu_thread; 553 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) { 554 hrtime_t intrtime = now - t->t_intr_start; 555 mcpu->intrstat[pil][0] += intrtime; 556 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 557 t->t_intr_start = 0; 558 } 559 560 /* 561 * Note that the code in kcpc_overflow_intr -relies- on the 562 * ordering of events here - in particular that t->t_lwp of 563 * the interrupt thread is set to the pinned thread *before* 564 * curthread is changed. 565 */ 566 it->t_lwp = t->t_lwp; 567 it->t_state = TS_ONPROC; 568 569 /* 570 * Push interrupted thread onto list from new thread. 571 * Set the new thread as the current one. 572 * Set interrupted thread's T_SP because if it is the idle thread, 573 * resume() may use that stack between threads. 574 */ 575 576 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr); 577 t->t_sp = (uintptr_t)stackptr; 578 579 it->t_intr = t; 580 cpu->cpu_thread = it; 581 582 /* 583 * Set bit for this pil in CPU's interrupt active bitmask. 584 */ 585 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 586 cpu->cpu_intr_actv |= (1 << pil); 587 588 /* 589 * Initialize thread priority level from intr_pri 590 */ 591 it->t_pil = (uchar_t)pil; 592 it->t_pri = (pri_t)pil + intr_pri; 593 it->t_intr_start = now; 594 595 return (it->t_stk); 596 } 597 598 static void 599 dosoftint_epilog(struct cpu *cpu, uint_t oldpil) 600 { 601 struct machcpu *mcpu = &cpu->cpu_m; 602 kthread_t *t, *it; 603 uint_t pil, basespl; 604 hrtime_t intrtime; 605 hrtime_t now = tsc_read(); 606 607 it = cpu->cpu_thread; 608 pil = it->t_pil; 609 610 cpu->cpu_stats.sys.intr[pil - 1]++; 611 612 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 613 cpu->cpu_intr_actv &= ~(1 << pil); 614 intrtime = now - it->t_intr_start; 615 mcpu->intrstat[pil][0] += intrtime; 616 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 617 618 /* 619 * If there is still an interrupted thread underneath this one 620 * then the interrupt was never blocked and the return is 621 * fairly simple. Otherwise it isn't. 622 */ 623 if ((t = it->t_intr) == NULL) { 624 /* 625 * Put thread back on the interrupt thread list. 626 * This was an interrupt thread, so set CPU's base SPL. 627 */ 628 set_base_spl(); 629 it->t_state = TS_FREE; 630 it->t_link = cpu->cpu_intr_thread; 631 cpu->cpu_intr_thread = it; 632 (void) splhigh(); 633 sti(); 634 swtch(); 635 /*NOTREACHED*/ 636 panic("dosoftint_epilog: swtch returned"); 637 } 638 it->t_link = cpu->cpu_intr_thread; 639 cpu->cpu_intr_thread = it; 640 it->t_state = TS_FREE; 641 cpu->cpu_thread = t; 642 if (t->t_flag & T_INTR_THREAD) 643 t->t_intr_start = now; 644 basespl = cpu->cpu_base_spl; 645 pil = MAX(oldpil, basespl); 646 mcpu->mcpu_pri = pil; 647 (*setspl)(pil); 648 } 649 650 651 /* 652 * Make the interrupted thread 'to' be runnable. 653 * 654 * Since t->t_sp has already been saved, t->t_pc is all 655 * that needs to be set in this function. 656 * 657 * Returns the interrupt level of the interrupt thread. 658 */ 659 int 660 intr_passivate( 661 kthread_t *it, /* interrupt thread */ 662 kthread_t *t) /* interrupted thread */ 663 { 664 extern void _sys_rtt(); 665 666 ASSERT(it->t_flag & T_INTR_THREAD); 667 ASSERT(SA(t->t_sp) == t->t_sp); 668 669 t->t_pc = (uintptr_t)_sys_rtt; 670 return (it->t_pil); 671 } 672 673 /* 674 * Create interrupt kstats for this CPU. 675 */ 676 void 677 cpu_create_intrstat(cpu_t *cp) 678 { 679 int i; 680 kstat_t *intr_ksp; 681 kstat_named_t *knp; 682 char name[KSTAT_STRLEN]; 683 zoneid_t zoneid; 684 685 ASSERT(MUTEX_HELD(&cpu_lock)); 686 687 if (pool_pset_enabled()) 688 zoneid = GLOBAL_ZONEID; 689 else 690 zoneid = ALL_ZONES; 691 692 intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc", 693 KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid); 694 695 /* 696 * Initialize each PIL's named kstat 697 */ 698 if (intr_ksp != NULL) { 699 intr_ksp->ks_update = cpu_kstat_intrstat_update; 700 knp = (kstat_named_t *)intr_ksp->ks_data; 701 intr_ksp->ks_private = cp; 702 for (i = 0; i < PIL_MAX; i++) { 703 (void) snprintf(name, KSTAT_STRLEN, "level-%d-time", 704 i + 1); 705 kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64); 706 (void) snprintf(name, KSTAT_STRLEN, "level-%d-count", 707 i + 1); 708 kstat_named_init(&knp[(i * 2) + 1], name, 709 KSTAT_DATA_UINT64); 710 } 711 kstat_install(intr_ksp); 712 } 713 } 714 715 /* 716 * Delete interrupt kstats for this CPU. 717 */ 718 void 719 cpu_delete_intrstat(cpu_t *cp) 720 { 721 kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES); 722 } 723 724 /* 725 * Convert interrupt statistics from CPU ticks to nanoseconds and 726 * update kstat. 727 */ 728 int 729 cpu_kstat_intrstat_update(kstat_t *ksp, int rw) 730 { 731 kstat_named_t *knp = ksp->ks_data; 732 cpu_t *cpup = (cpu_t *)ksp->ks_private; 733 int i; 734 hrtime_t hrt; 735 736 if (rw == KSTAT_WRITE) 737 return (EACCES); 738 739 for (i = 0; i < PIL_MAX; i++) { 740 hrt = (hrtime_t)cpup->cpu_m.intrstat[i + 1][0]; 741 tsc_scalehrtime(&hrt); 742 knp[i * 2].value.ui64 = (uint64_t)hrt; 743 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i]; 744 } 745 746 return (0); 747 } 748 749 /* 750 * An interrupt thread is ending a time slice, so compute the interval it 751 * ran for and update the statistic for its PIL. 752 */ 753 void 754 cpu_intr_swtch_enter(kthread_id_t t) 755 { 756 uint64_t interval; 757 uint64_t start; 758 cpu_t *cpu; 759 760 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 761 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 762 763 /* 764 * We could be here with a zero timestamp. This could happen if: 765 * an interrupt thread which no longer has a pinned thread underneath 766 * it (i.e. it blocked at some point in its past) has finished running 767 * its handler. intr_thread() updated the interrupt statistic for its 768 * PIL and zeroed its timestamp. Since there was no pinned thread to 769 * return to, swtch() gets called and we end up here. 770 * 771 * Note that we use atomic ops below (cas64 and atomic_add_64), which 772 * we don't use in the functions above, because we're not called 773 * with interrupts blocked, but the epilog/prolog functions are. 774 */ 775 if (t->t_intr_start) { 776 do { 777 start = t->t_intr_start; 778 interval = tsc_read() - start; 779 } while (cas64(&t->t_intr_start, start, 0) != start); 780 cpu = CPU; 781 cpu->cpu_m.intrstat[t->t_pil][0] += interval; 782 783 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate], 784 interval); 785 } else 786 ASSERT(t->t_intr == NULL); 787 } 788 789 /* 790 * An interrupt thread is returning from swtch(). Place a starting timestamp 791 * in its thread structure. 792 */ 793 void 794 cpu_intr_swtch_exit(kthread_id_t t) 795 { 796 uint64_t ts; 797 798 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 799 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 800 801 do { 802 ts = t->t_intr_start; 803 } while (cas64(&t->t_intr_start, ts, tsc_read()) != ts); 804 } 805 806 /* 807 * Dispatch a hilevel interrupt (one above LOCK_LEVEL) 808 */ 809 /*ARGSUSED*/ 810 static void 811 dispatch_hilevel(uint_t vector, uint_t arg2) 812 { 813 sti(); 814 av_dispatch_autovect(vector); 815 cli(); 816 } 817 818 /* 819 * Dispatch a soft interrupt 820 */ 821 /*ARGSUSED*/ 822 static void 823 dispatch_softint(uint_t oldpil, uint_t arg2) 824 { 825 struct cpu *cpu = CPU; 826 827 sti(); 828 av_dispatch_softvect((int)cpu->cpu_thread->t_pil); 829 cli(); 830 831 /* 832 * Must run softint_epilog() on the interrupt thread stack, since 833 * there may not be a return from it if the interrupt thread blocked. 834 */ 835 dosoftint_epilog(cpu, oldpil); 836 } 837 838 /* 839 * Dispatch a normal interrupt 840 */ 841 static void 842 dispatch_hardint(uint_t vector, uint_t oldipl) 843 { 844 struct cpu *cpu = CPU; 845 846 sti(); 847 av_dispatch_autovect(vector); 848 cli(); 849 850 /* 851 * Must run intr_thread_epilog() on the interrupt thread stack, since 852 * there may not be a return from it if the interrupt thread blocked. 853 */ 854 intr_thread_epilog(cpu, vector, oldipl); 855 } 856 857 /* 858 * Deliver any softints the current interrupt priority allows. 859 * Called with interrupts disabled. 860 */ 861 void 862 dosoftint(struct regs *regs) 863 { 864 struct cpu *cpu = CPU; 865 int oldipl; 866 caddr_t newsp; 867 868 while (cpu->cpu_softinfo.st_pending) { 869 oldipl = cpu->cpu_pri; 870 newsp = dosoftint_prolog(cpu, (caddr_t)regs, 871 cpu->cpu_softinfo.st_pending, oldipl); 872 /* 873 * If returned stack pointer is NULL, priority is too high 874 * to run any of the pending softints now. 875 * Break out and they will be run later. 876 */ 877 if (newsp == NULL) 878 break; 879 switch_sp_and_call(newsp, dispatch_softint, oldipl, 0); 880 } 881 } 882 883 /* 884 * Interrupt service routine, called with interrupts disabled. 885 */ 886 /*ARGSUSED*/ 887 void 888 do_interrupt(struct regs *rp, trap_trace_rec_t *ttp) 889 { 890 struct cpu *cpu = CPU; 891 int newipl, oldipl = cpu->cpu_pri; 892 uint_t vector; 893 caddr_t newsp; 894 895 #ifdef TRAPTRACE 896 ttp->ttr_marker = TT_INTERRUPT; 897 ttp->ttr_ipl = 0xff; 898 ttp->ttr_pri = oldipl; 899 ttp->ttr_spl = cpu->cpu_base_spl; 900 ttp->ttr_vector = 0xff; 901 #endif /* TRAPTRACE */ 902 903 /* 904 * If it's a softint go do it now. 905 */ 906 if (rp->r_trapno == T_SOFTINT) { 907 dosoftint(rp); 908 ASSERT(!interrupts_enabled()); 909 return; 910 } 911 912 /* 913 * Raise the interrupt priority. 914 */ 915 newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno); 916 #ifdef TRAPTRACE 917 ttp->ttr_ipl = newipl; 918 #endif /* TRAPTRACE */ 919 920 /* 921 * Bail if it is a spurious interrupt 922 */ 923 if (newipl == -1) 924 return; 925 cpu->cpu_pri = newipl; 926 vector = rp->r_trapno; 927 #ifdef TRAPTRACE 928 ttp->ttr_vector = vector; 929 #endif /* TRAPTRACE */ 930 if (newipl > LOCK_LEVEL) { 931 /* 932 * High priority interrupts run on this cpu's interrupt stack. 933 */ 934 if (hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) { 935 newsp = cpu->cpu_intr_stack; 936 switch_sp_and_call(newsp, dispatch_hilevel, vector, 0); 937 } else { /* already on the interrupt stack */ 938 dispatch_hilevel(vector, 0); 939 } 940 (void) hilevel_intr_epilog(cpu, newipl, oldipl, vector); 941 } else { 942 /* 943 * Run this interrupt in a separate thread. 944 */ 945 newsp = intr_thread_prolog(cpu, (caddr_t)rp, newipl); 946 switch_sp_and_call(newsp, dispatch_hardint, vector, oldipl); 947 } 948 949 /* 950 * Deliver any pending soft interrupts. 951 */ 952 if (cpu->cpu_softinfo.st_pending) 953 dosoftint(rp); 954 } 955 956 /* 957 * Common tasks always done by _sys_rtt, called with interrupts disabled. 958 * Returns 1 if returning to userland, 0 if returning to system mode. 959 */ 960 int 961 sys_rtt_common(struct regs *rp) 962 { 963 kthread_t *tp; 964 extern void mutex_exit_critical_start(); 965 extern long mutex_exit_critical_size; 966 967 loop: 968 969 /* 970 * Check if returning to user 971 */ 972 tp = CPU->cpu_thread; 973 if (USERMODE(rp->r_cs)) { 974 /* 975 * Check if AST pending. 976 */ 977 if (tp->t_astflag) { 978 /* 979 * Let trap() handle the AST 980 */ 981 sti(); 982 rp->r_trapno = T_AST; 983 trap(rp, (caddr_t)0, CPU->cpu_id); 984 cli(); 985 goto loop; 986 } 987 988 #if defined(__amd64) 989 /* 990 * We are done if segment registers do not need updating. 991 */ 992 if ((tp->t_lwp->lwp_pcb.pcb_flags & RUPDATE_PENDING) == 0) 993 return (1); 994 995 if (update_sregs(rp, tp->t_lwp)) { 996 /* 997 * 1 or more of the selectors is bad. 998 * Deliver a SIGSEGV. 999 */ 1000 proc_t *p = ttoproc(tp); 1001 1002 sti(); 1003 mutex_enter(&p->p_lock); 1004 tp->t_lwp->lwp_cursig = SIGSEGV; 1005 mutex_exit(&p->p_lock); 1006 psig(); 1007 tp->t_sig_check = 1; 1008 cli(); 1009 } 1010 tp->t_lwp->lwp_pcb.pcb_flags &= ~RUPDATE_PENDING; 1011 1012 #endif /* __amd64 */ 1013 return (1); 1014 } 1015 1016 /* 1017 * Here if we are returning to supervisor mode. 1018 * Check for a kernel preemption request. 1019 */ 1020 if (CPU->cpu_kprunrun && (rp->r_ps & PS_IE)) { 1021 1022 /* 1023 * Do nothing if already in kpreempt 1024 */ 1025 if (!tp->t_preempt_lk) { 1026 tp->t_preempt_lk = 1; 1027 sti(); 1028 kpreempt(1); /* asynchronous kpreempt call */ 1029 cli(); 1030 tp->t_preempt_lk = 0; 1031 } 1032 } 1033 1034 /* 1035 * If we interrupted the mutex_exit() critical region we must 1036 * reset the PC back to the beginning to prevent missed wakeups 1037 * See the comments in mutex_exit() for details. 1038 */ 1039 if ((uintptr_t)rp->r_pc - (uintptr_t)mutex_exit_critical_start < 1040 mutex_exit_critical_size) { 1041 rp->r_pc = (greg_t)mutex_exit_critical_start; 1042 } 1043 return (0); 1044 } 1045 1046 void 1047 send_dirint(int cpuid, int int_level) 1048 { 1049 (*send_dirintf)(cpuid, int_level); 1050 } 1051 1052 /* 1053 * do_splx routine, takes new ipl to set 1054 * returns the old ipl. 1055 * We are careful not to set priority lower than CPU->cpu_base_pri, 1056 * even though it seems we're raising the priority, it could be set 1057 * higher at any time by an interrupt routine, so we must block interrupts 1058 * and look at CPU->cpu_base_pri 1059 */ 1060 int 1061 do_splx(int newpri) 1062 { 1063 ulong_t flag; 1064 cpu_t *cpu; 1065 int curpri, basepri; 1066 1067 flag = intr_clear(); 1068 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ 1069 curpri = cpu->cpu_m.mcpu_pri; 1070 basepri = cpu->cpu_base_spl; 1071 if (newpri < basepri) 1072 newpri = basepri; 1073 cpu->cpu_m.mcpu_pri = newpri; 1074 (*setspl)(newpri); 1075 /* 1076 * If we are going to reenable interrupts see if new priority level 1077 * allows pending softint delivery. 1078 */ 1079 if ((flag & PS_IE) && 1080 bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri) 1081 fakesoftint(); 1082 ASSERT(!interrupts_enabled()); 1083 intr_restore(flag); 1084 return (curpri); 1085 } 1086 1087 /* 1088 * Common spl raise routine, takes new ipl to set 1089 * returns the old ipl, will not lower ipl. 1090 */ 1091 int 1092 splr(int newpri) 1093 { 1094 ulong_t flag; 1095 cpu_t *cpu; 1096 int curpri, basepri; 1097 1098 flag = intr_clear(); 1099 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ 1100 curpri = cpu->cpu_m.mcpu_pri; 1101 /* 1102 * Only do something if new priority is larger 1103 */ 1104 if (newpri > curpri) { 1105 basepri = cpu->cpu_base_spl; 1106 if (newpri < basepri) 1107 newpri = basepri; 1108 cpu->cpu_m.mcpu_pri = newpri; 1109 (*setspl)(newpri); 1110 /* 1111 * See if new priority level allows pending softint delivery 1112 */ 1113 if ((flag & PS_IE) && 1114 bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri) 1115 fakesoftint(); 1116 } 1117 intr_restore(flag); 1118 return (curpri); 1119 } 1120 1121 int 1122 getpil(void) 1123 { 1124 return (CPU->cpu_m.mcpu_pri); 1125 } 1126 1127 int 1128 interrupts_enabled(void) 1129 { 1130 ulong_t flag; 1131 1132 flag = getflags(); 1133 return ((flag & PS_IE) == PS_IE); 1134 } 1135 1136 #ifdef DEBUG 1137 void 1138 assert_ints_enabled(void) 1139 { 1140 ASSERT(!interrupts_unleashed || interrupts_enabled()); 1141 } 1142 #endif /* DEBUG */ 1143