1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/cpuvar.h> 28 #include <sys/cpu_event.h> 29 #include <sys/regset.h> 30 #include <sys/psw.h> 31 #include <sys/types.h> 32 #include <sys/thread.h> 33 #include <sys/systm.h> 34 #include <sys/segments.h> 35 #include <sys/pcb.h> 36 #include <sys/trap.h> 37 #include <sys/ftrace.h> 38 #include <sys/traptrace.h> 39 #include <sys/clock.h> 40 #include <sys/panic.h> 41 #include <sys/disp.h> 42 #include <vm/seg_kp.h> 43 #include <sys/stack.h> 44 #include <sys/sysmacros.h> 45 #include <sys/cmn_err.h> 46 #include <sys/kstat.h> 47 #include <sys/smp_impldefs.h> 48 #include <sys/pool_pset.h> 49 #include <sys/zone.h> 50 #include <sys/bitmap.h> 51 #include <sys/archsystm.h> 52 #include <sys/machsystm.h> 53 #include <sys/ontrap.h> 54 #include <sys/x86_archext.h> 55 #include <sys/promif.h> 56 #include <vm/hat_i86.h> 57 #if defined(__xpv) 58 #include <sys/hypervisor.h> 59 #endif 60 61 62 #if defined(__xpv) && defined(DEBUG) 63 64 /* 65 * This panic message is intended as an aid to interrupt debugging. 66 * 67 * The associated assertion tests the condition of enabling 68 * events when events are already enabled. The implication 69 * being that whatever code the programmer thought was 70 * protected by having events disabled until the second 71 * enable happened really wasn't protected at all .. 72 */ 73 74 int stistipanic = 1; /* controls the debug panic check */ 75 const char *stistimsg = "stisti"; 76 ulong_t laststi[NCPU]; 77 78 /* 79 * This variable tracks the last place events were disabled on each cpu 80 * it assists in debugging when asserts that interrupts are enabled trip. 81 */ 82 ulong_t lastcli[NCPU]; 83 84 #endif 85 86 /* 87 * Set cpu's base SPL level to the highest active interrupt level 88 */ 89 void 90 set_base_spl(void) 91 { 92 struct cpu *cpu = CPU; 93 uint16_t active = (uint16_t)cpu->cpu_intr_actv; 94 95 cpu->cpu_base_spl = active == 0 ? 0 : bsrw_insn(active); 96 } 97 98 /* 99 * Do all the work necessary to set up the cpu and thread structures 100 * to dispatch a high-level interrupt. 101 * 102 * Returns 0 if we're -not- already on the high-level interrupt stack, 103 * (and *must* switch to it), non-zero if we are already on that stack. 104 * 105 * Called with interrupts masked. 106 * The 'pil' is already set to the appropriate level for rp->r_trapno. 107 */ 108 static int 109 hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil, struct regs *rp) 110 { 111 struct machcpu *mcpu = &cpu->cpu_m; 112 uint_t mask; 113 hrtime_t intrtime; 114 hrtime_t now = tsc_read(); 115 116 ASSERT(pil > LOCK_LEVEL); 117 118 if (pil == CBE_HIGH_PIL) { 119 cpu->cpu_profile_pil = oldpil; 120 if (USERMODE(rp->r_cs)) { 121 cpu->cpu_profile_pc = 0; 122 cpu->cpu_profile_upc = rp->r_pc; 123 cpu->cpu_cpcprofile_pc = 0; 124 cpu->cpu_cpcprofile_upc = rp->r_pc; 125 } else { 126 cpu->cpu_profile_pc = rp->r_pc; 127 cpu->cpu_profile_upc = 0; 128 cpu->cpu_cpcprofile_pc = rp->r_pc; 129 cpu->cpu_cpcprofile_upc = 0; 130 } 131 } 132 133 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK; 134 if (mask != 0) { 135 int nestpil; 136 137 /* 138 * We have interrupted another high-level interrupt. 139 * Load starting timestamp, compute interval, update 140 * cumulative counter. 141 */ 142 nestpil = bsrw_insn((uint16_t)mask); 143 ASSERT(nestpil < pil); 144 intrtime = now - 145 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)]; 146 mcpu->intrstat[nestpil][0] += intrtime; 147 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 148 /* 149 * Another high-level interrupt is active below this one, so 150 * there is no need to check for an interrupt thread. That 151 * will be done by the lowest priority high-level interrupt 152 * active. 153 */ 154 } else { 155 kthread_t *t = cpu->cpu_thread; 156 157 /* 158 * See if we are interrupting a low-level interrupt thread. 159 * If so, account for its time slice only if its time stamp 160 * is non-zero. 161 */ 162 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) { 163 intrtime = now - t->t_intr_start; 164 mcpu->intrstat[t->t_pil][0] += intrtime; 165 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 166 t->t_intr_start = 0; 167 } 168 } 169 170 /* 171 * Store starting timestamp in CPU structure for this PIL. 172 */ 173 mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now; 174 175 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 176 177 if (pil == 15) { 178 /* 179 * To support reentrant level 15 interrupts, we maintain a 180 * recursion count in the top half of cpu_intr_actv. Only 181 * when this count hits zero do we clear the PIL 15 bit from 182 * the lower half of cpu_intr_actv. 183 */ 184 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1; 185 (*refcntp)++; 186 } 187 188 mask = cpu->cpu_intr_actv; 189 190 cpu->cpu_intr_actv |= (1 << pil); 191 192 return (mask & CPU_INTR_ACTV_HIGH_LEVEL_MASK); 193 } 194 195 /* 196 * Does most of the work of returning from a high level interrupt. 197 * 198 * Returns 0 if there are no more high level interrupts (in which 199 * case we must switch back to the interrupted thread stack) or 200 * non-zero if there are more (in which case we should stay on it). 201 * 202 * Called with interrupts masked 203 */ 204 static int 205 hilevel_intr_epilog(struct cpu *cpu, uint_t pil, uint_t oldpil, uint_t vecnum) 206 { 207 struct machcpu *mcpu = &cpu->cpu_m; 208 uint_t mask; 209 hrtime_t intrtime; 210 hrtime_t now = tsc_read(); 211 212 ASSERT(mcpu->mcpu_pri == pil); 213 214 cpu->cpu_stats.sys.intr[pil - 1]++; 215 216 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 217 218 if (pil == 15) { 219 /* 220 * To support reentrant level 15 interrupts, we maintain a 221 * recursion count in the top half of cpu_intr_actv. Only 222 * when this count hits zero do we clear the PIL 15 bit from 223 * the lower half of cpu_intr_actv. 224 */ 225 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1; 226 227 ASSERT(*refcntp > 0); 228 229 if (--(*refcntp) == 0) 230 cpu->cpu_intr_actv &= ~(1 << pil); 231 } else { 232 cpu->cpu_intr_actv &= ~(1 << pil); 233 } 234 235 ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0); 236 237 intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)]; 238 mcpu->intrstat[pil][0] += intrtime; 239 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 240 241 /* 242 * Check for lower-pil nested high-level interrupt beneath 243 * current one. If so, place a starting timestamp in its 244 * pil_high_start entry. 245 */ 246 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK; 247 if (mask != 0) { 248 int nestpil; 249 250 /* 251 * find PIL of nested interrupt 252 */ 253 nestpil = bsrw_insn((uint16_t)mask); 254 ASSERT(nestpil < pil); 255 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now; 256 /* 257 * (Another high-level interrupt is active below this one, 258 * so there is no need to check for an interrupt 259 * thread. That will be done by the lowest priority 260 * high-level interrupt active.) 261 */ 262 } else { 263 /* 264 * Check to see if there is a low-level interrupt active. 265 * If so, place a starting timestamp in the thread 266 * structure. 267 */ 268 kthread_t *t = cpu->cpu_thread; 269 270 if (t->t_flag & T_INTR_THREAD) 271 t->t_intr_start = now; 272 } 273 274 mcpu->mcpu_pri = oldpil; 275 (void) (*setlvlx)(oldpil, vecnum); 276 277 return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK); 278 } 279 280 /* 281 * Set up the cpu, thread and interrupt thread structures for 282 * executing an interrupt thread. The new stack pointer of the 283 * interrupt thread (which *must* be switched to) is returned. 284 */ 285 static caddr_t 286 intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil) 287 { 288 struct machcpu *mcpu = &cpu->cpu_m; 289 kthread_t *t, *volatile it; 290 hrtime_t now = tsc_read(); 291 292 ASSERT(pil > 0); 293 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 294 cpu->cpu_intr_actv |= (1 << pil); 295 296 /* 297 * Get set to run an interrupt thread. 298 * There should always be an interrupt thread, since we 299 * allocate one for each level on each CPU. 300 * 301 * t_intr_start could be zero due to cpu_intr_swtch_enter. 302 */ 303 t = cpu->cpu_thread; 304 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) { 305 hrtime_t intrtime = now - t->t_intr_start; 306 mcpu->intrstat[t->t_pil][0] += intrtime; 307 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 308 t->t_intr_start = 0; 309 } 310 311 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr); 312 313 t->t_sp = (uintptr_t)stackptr; /* mark stack in curthread for resume */ 314 315 /* 316 * unlink the interrupt thread off the cpu 317 * 318 * Note that the code in kcpc_overflow_intr -relies- on the 319 * ordering of events here - in particular that t->t_lwp of 320 * the interrupt thread is set to the pinned thread *before* 321 * curthread is changed. 322 */ 323 it = cpu->cpu_intr_thread; 324 cpu->cpu_intr_thread = it->t_link; 325 it->t_intr = t; 326 it->t_lwp = t->t_lwp; 327 328 /* 329 * (threads on the interrupt thread free list could have state 330 * preset to TS_ONPROC, but it helps in debugging if 331 * they're TS_FREE.) 332 */ 333 it->t_state = TS_ONPROC; 334 335 cpu->cpu_thread = it; /* new curthread on this cpu */ 336 it->t_pil = (uchar_t)pil; 337 it->t_pri = intr_pri + (pri_t)pil; 338 it->t_intr_start = now; 339 340 return (it->t_stk); 341 } 342 343 344 #ifdef DEBUG 345 int intr_thread_cnt; 346 #endif 347 348 /* 349 * Called with interrupts disabled 350 */ 351 static void 352 intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil) 353 { 354 struct machcpu *mcpu = &cpu->cpu_m; 355 kthread_t *t; 356 kthread_t *it = cpu->cpu_thread; /* curthread */ 357 uint_t pil, basespl; 358 hrtime_t intrtime; 359 hrtime_t now = tsc_read(); 360 361 pil = it->t_pil; 362 cpu->cpu_stats.sys.intr[pil - 1]++; 363 364 ASSERT(it->t_intr_start != 0); 365 intrtime = now - it->t_intr_start; 366 mcpu->intrstat[pil][0] += intrtime; 367 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 368 369 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 370 cpu->cpu_intr_actv &= ~(1 << pil); 371 372 /* 373 * If there is still an interrupted thread underneath this one 374 * then the interrupt was never blocked and the return is 375 * fairly simple. Otherwise it isn't. 376 */ 377 if ((t = it->t_intr) == NULL) { 378 /* 379 * The interrupted thread is no longer pinned underneath 380 * the interrupt thread. This means the interrupt must 381 * have blocked, and the interrupted thread has been 382 * unpinned, and has probably been running around the 383 * system for a while. 384 * 385 * Since there is no longer a thread under this one, put 386 * this interrupt thread back on the CPU's free list and 387 * resume the idle thread which will dispatch the next 388 * thread to run. 389 */ 390 #ifdef DEBUG 391 intr_thread_cnt++; 392 #endif 393 cpu->cpu_stats.sys.intrblk++; 394 /* 395 * Set CPU's base SPL based on active interrupts bitmask 396 */ 397 set_base_spl(); 398 basespl = cpu->cpu_base_spl; 399 mcpu->mcpu_pri = basespl; 400 (*setlvlx)(basespl, vec); 401 (void) splhigh(); 402 sti(); 403 it->t_state = TS_FREE; 404 /* 405 * Return interrupt thread to pool 406 */ 407 it->t_link = cpu->cpu_intr_thread; 408 cpu->cpu_intr_thread = it; 409 swtch(); 410 panic("intr_thread_epilog: swtch returned"); 411 /*NOTREACHED*/ 412 } 413 414 /* 415 * Return interrupt thread to the pool 416 */ 417 it->t_link = cpu->cpu_intr_thread; 418 cpu->cpu_intr_thread = it; 419 it->t_state = TS_FREE; 420 421 basespl = cpu->cpu_base_spl; 422 pil = MAX(oldpil, basespl); 423 mcpu->mcpu_pri = pil; 424 (*setlvlx)(pil, vec); 425 t->t_intr_start = now; 426 cpu->cpu_thread = t; 427 } 428 429 /* 430 * intr_get_time() is a resource for interrupt handlers to determine how 431 * much time has been spent handling the current interrupt. Such a function 432 * is needed because higher level interrupts can arrive during the 433 * processing of an interrupt. intr_get_time() only returns time spent in the 434 * current interrupt handler. 435 * 436 * The caller must be calling from an interrupt handler running at a pil 437 * below or at lock level. Timings are not provided for high-level 438 * interrupts. 439 * 440 * The first time intr_get_time() is called while handling an interrupt, 441 * it returns the time since the interrupt handler was invoked. Subsequent 442 * calls will return the time since the prior call to intr_get_time(). Time 443 * is returned as ticks. Use scalehrtimef() to convert ticks to nsec. 444 * 445 * Theory Of Intrstat[][]: 446 * 447 * uint64_t intrstat[pil][0..1] is an array indexed by pil level, with two 448 * uint64_ts per pil. 449 * 450 * intrstat[pil][0] is a cumulative count of the number of ticks spent 451 * handling all interrupts at the specified pil on this CPU. It is 452 * exported via kstats to the user. 453 * 454 * intrstat[pil][1] is always a count of ticks less than or equal to the 455 * value in [0]. The difference between [1] and [0] is the value returned 456 * by a call to intr_get_time(). At the start of interrupt processing, 457 * [0] and [1] will be equal (or nearly so). As the interrupt consumes 458 * time, [0] will increase, but [1] will remain the same. A call to 459 * intr_get_time() will return the difference, then update [1] to be the 460 * same as [0]. Future calls will return the time since the last call. 461 * Finally, when the interrupt completes, [1] is updated to the same as [0]. 462 * 463 * Implementation: 464 * 465 * intr_get_time() works much like a higher level interrupt arriving. It 466 * "checkpoints" the timing information by incrementing intrstat[pil][0] 467 * to include elapsed running time, and by setting t_intr_start to rdtsc. 468 * It then sets the return value to intrstat[pil][0] - intrstat[pil][1], 469 * and updates intrstat[pil][1] to be the same as the new value of 470 * intrstat[pil][0]. 471 * 472 * In the normal handling of interrupts, after an interrupt handler returns 473 * and the code in intr_thread() updates intrstat[pil][0], it then sets 474 * intrstat[pil][1] to the new value of intrstat[pil][0]. When [0] == [1], 475 * the timings are reset, i.e. intr_get_time() will return [0] - [1] which 476 * is 0. 477 * 478 * Whenever interrupts arrive on a CPU which is handling a lower pil 479 * interrupt, they update the lower pil's [0] to show time spent in the 480 * handler that they've interrupted. This results in a growing discrepancy 481 * between [0] and [1], which is returned the next time intr_get_time() is 482 * called. Time spent in the higher-pil interrupt will not be returned in 483 * the next intr_get_time() call from the original interrupt, because 484 * the higher-pil interrupt's time is accumulated in intrstat[higherpil][]. 485 */ 486 uint64_t 487 intr_get_time(void) 488 { 489 struct cpu *cpu; 490 struct machcpu *mcpu; 491 kthread_t *t; 492 uint64_t time, delta, ret; 493 uint_t pil; 494 495 cli(); 496 cpu = CPU; 497 mcpu = &cpu->cpu_m; 498 t = cpu->cpu_thread; 499 pil = t->t_pil; 500 ASSERT((cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK) == 0); 501 ASSERT(t->t_flag & T_INTR_THREAD); 502 ASSERT(pil != 0); 503 ASSERT(t->t_intr_start != 0); 504 505 time = tsc_read(); 506 delta = time - t->t_intr_start; 507 t->t_intr_start = time; 508 509 time = mcpu->intrstat[pil][0] + delta; 510 ret = time - mcpu->intrstat[pil][1]; 511 mcpu->intrstat[pil][0] = time; 512 mcpu->intrstat[pil][1] = time; 513 cpu->cpu_intracct[cpu->cpu_mstate] += delta; 514 515 sti(); 516 return (ret); 517 } 518 519 static caddr_t 520 dosoftint_prolog( 521 struct cpu *cpu, 522 caddr_t stackptr, 523 uint32_t st_pending, 524 uint_t oldpil) 525 { 526 kthread_t *t, *volatile it; 527 struct machcpu *mcpu = &cpu->cpu_m; 528 uint_t pil; 529 hrtime_t now; 530 531 top: 532 ASSERT(st_pending == mcpu->mcpu_softinfo.st_pending); 533 534 pil = bsrw_insn((uint16_t)st_pending); 535 if (pil <= oldpil || pil <= cpu->cpu_base_spl) 536 return (0); 537 538 /* 539 * XX64 Sigh. 540 * 541 * This is a transliteration of the i386 assembler code for 542 * soft interrupts. One question is "why does this need 543 * to be atomic?" One possible race is -other- processors 544 * posting soft interrupts to us in set_pending() i.e. the 545 * CPU might get preempted just after the address computation, 546 * but just before the atomic transaction, so another CPU would 547 * actually set the original CPU's st_pending bit. However, 548 * it looks like it would be simpler to disable preemption there. 549 * Are there other races for which preemption control doesn't work? 550 * 551 * The i386 assembler version -also- checks to see if the bit 552 * being cleared was actually set; if it wasn't, it rechecks 553 * for more. This seems a bit strange, as the only code that 554 * ever clears the bit is -this- code running with interrupts 555 * disabled on -this- CPU. This code would probably be cheaper: 556 * 557 * atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, 558 * ~(1 << pil)); 559 * 560 * and t->t_preempt--/++ around set_pending() even cheaper, 561 * but at this point, correctness is critical, so we slavishly 562 * emulate the i386 port. 563 */ 564 if (atomic_btr32((uint32_t *) 565 &mcpu->mcpu_softinfo.st_pending, pil) == 0) { 566 st_pending = mcpu->mcpu_softinfo.st_pending; 567 goto top; 568 } 569 570 mcpu->mcpu_pri = pil; 571 (*setspl)(pil); 572 573 now = tsc_read(); 574 575 /* 576 * Get set to run interrupt thread. 577 * There should always be an interrupt thread since we 578 * allocate one for each level on the CPU. 579 */ 580 it = cpu->cpu_intr_thread; 581 cpu->cpu_intr_thread = it->t_link; 582 583 /* t_intr_start could be zero due to cpu_intr_swtch_enter. */ 584 t = cpu->cpu_thread; 585 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) { 586 hrtime_t intrtime = now - t->t_intr_start; 587 mcpu->intrstat[pil][0] += intrtime; 588 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 589 t->t_intr_start = 0; 590 } 591 592 /* 593 * Note that the code in kcpc_overflow_intr -relies- on the 594 * ordering of events here - in particular that t->t_lwp of 595 * the interrupt thread is set to the pinned thread *before* 596 * curthread is changed. 597 */ 598 it->t_lwp = t->t_lwp; 599 it->t_state = TS_ONPROC; 600 601 /* 602 * Push interrupted thread onto list from new thread. 603 * Set the new thread as the current one. 604 * Set interrupted thread's T_SP because if it is the idle thread, 605 * resume() may use that stack between threads. 606 */ 607 608 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr); 609 t->t_sp = (uintptr_t)stackptr; 610 611 it->t_intr = t; 612 cpu->cpu_thread = it; 613 614 /* 615 * Set bit for this pil in CPU's interrupt active bitmask. 616 */ 617 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 618 cpu->cpu_intr_actv |= (1 << pil); 619 620 /* 621 * Initialize thread priority level from intr_pri 622 */ 623 it->t_pil = (uchar_t)pil; 624 it->t_pri = (pri_t)pil + intr_pri; 625 it->t_intr_start = now; 626 627 return (it->t_stk); 628 } 629 630 static void 631 dosoftint_epilog(struct cpu *cpu, uint_t oldpil) 632 { 633 struct machcpu *mcpu = &cpu->cpu_m; 634 kthread_t *t, *it; 635 uint_t pil, basespl; 636 hrtime_t intrtime; 637 hrtime_t now = tsc_read(); 638 639 it = cpu->cpu_thread; 640 pil = it->t_pil; 641 642 cpu->cpu_stats.sys.intr[pil - 1]++; 643 644 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 645 cpu->cpu_intr_actv &= ~(1 << pil); 646 intrtime = now - it->t_intr_start; 647 mcpu->intrstat[pil][0] += intrtime; 648 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 649 650 /* 651 * If there is still an interrupted thread underneath this one 652 * then the interrupt was never blocked and the return is 653 * fairly simple. Otherwise it isn't. 654 */ 655 if ((t = it->t_intr) == NULL) { 656 /* 657 * Put thread back on the interrupt thread list. 658 * This was an interrupt thread, so set CPU's base SPL. 659 */ 660 set_base_spl(); 661 it->t_state = TS_FREE; 662 it->t_link = cpu->cpu_intr_thread; 663 cpu->cpu_intr_thread = it; 664 (void) splhigh(); 665 sti(); 666 swtch(); 667 /*NOTREACHED*/ 668 panic("dosoftint_epilog: swtch returned"); 669 } 670 it->t_link = cpu->cpu_intr_thread; 671 cpu->cpu_intr_thread = it; 672 it->t_state = TS_FREE; 673 cpu->cpu_thread = t; 674 if (t->t_flag & T_INTR_THREAD) 675 t->t_intr_start = now; 676 basespl = cpu->cpu_base_spl; 677 pil = MAX(oldpil, basespl); 678 mcpu->mcpu_pri = pil; 679 (*setspl)(pil); 680 } 681 682 683 /* 684 * Make the interrupted thread 'to' be runnable. 685 * 686 * Since t->t_sp has already been saved, t->t_pc is all 687 * that needs to be set in this function. 688 * 689 * Returns the interrupt level of the interrupt thread. 690 */ 691 int 692 intr_passivate( 693 kthread_t *it, /* interrupt thread */ 694 kthread_t *t) /* interrupted thread */ 695 { 696 extern void _sys_rtt(); 697 698 ASSERT(it->t_flag & T_INTR_THREAD); 699 ASSERT(SA(t->t_sp) == t->t_sp); 700 701 t->t_pc = (uintptr_t)_sys_rtt; 702 return (it->t_pil); 703 } 704 705 /* 706 * Create interrupt kstats for this CPU. 707 */ 708 void 709 cpu_create_intrstat(cpu_t *cp) 710 { 711 int i; 712 kstat_t *intr_ksp; 713 kstat_named_t *knp; 714 char name[KSTAT_STRLEN]; 715 zoneid_t zoneid; 716 717 ASSERT(MUTEX_HELD(&cpu_lock)); 718 719 if (pool_pset_enabled()) 720 zoneid = GLOBAL_ZONEID; 721 else 722 zoneid = ALL_ZONES; 723 724 intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc", 725 KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid); 726 727 /* 728 * Initialize each PIL's named kstat 729 */ 730 if (intr_ksp != NULL) { 731 intr_ksp->ks_update = cpu_kstat_intrstat_update; 732 knp = (kstat_named_t *)intr_ksp->ks_data; 733 intr_ksp->ks_private = cp; 734 for (i = 0; i < PIL_MAX; i++) { 735 (void) snprintf(name, KSTAT_STRLEN, "level-%d-time", 736 i + 1); 737 kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64); 738 (void) snprintf(name, KSTAT_STRLEN, "level-%d-count", 739 i + 1); 740 kstat_named_init(&knp[(i * 2) + 1], name, 741 KSTAT_DATA_UINT64); 742 } 743 kstat_install(intr_ksp); 744 } 745 } 746 747 /* 748 * Delete interrupt kstats for this CPU. 749 */ 750 void 751 cpu_delete_intrstat(cpu_t *cp) 752 { 753 kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES); 754 } 755 756 /* 757 * Convert interrupt statistics from CPU ticks to nanoseconds and 758 * update kstat. 759 */ 760 int 761 cpu_kstat_intrstat_update(kstat_t *ksp, int rw) 762 { 763 kstat_named_t *knp = ksp->ks_data; 764 cpu_t *cpup = (cpu_t *)ksp->ks_private; 765 int i; 766 hrtime_t hrt; 767 768 if (rw == KSTAT_WRITE) 769 return (EACCES); 770 771 for (i = 0; i < PIL_MAX; i++) { 772 hrt = (hrtime_t)cpup->cpu_m.intrstat[i + 1][0]; 773 scalehrtimef(&hrt); 774 knp[i * 2].value.ui64 = (uint64_t)hrt; 775 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i]; 776 } 777 778 return (0); 779 } 780 781 /* 782 * An interrupt thread is ending a time slice, so compute the interval it 783 * ran for and update the statistic for its PIL. 784 */ 785 void 786 cpu_intr_swtch_enter(kthread_id_t t) 787 { 788 uint64_t interval; 789 uint64_t start; 790 cpu_t *cpu; 791 792 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 793 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 794 795 /* 796 * We could be here with a zero timestamp. This could happen if: 797 * an interrupt thread which no longer has a pinned thread underneath 798 * it (i.e. it blocked at some point in its past) has finished running 799 * its handler. intr_thread() updated the interrupt statistic for its 800 * PIL and zeroed its timestamp. Since there was no pinned thread to 801 * return to, swtch() gets called and we end up here. 802 * 803 * Note that we use atomic ops below (cas64 and atomic_add_64), which 804 * we don't use in the functions above, because we're not called 805 * with interrupts blocked, but the epilog/prolog functions are. 806 */ 807 if (t->t_intr_start) { 808 do { 809 start = t->t_intr_start; 810 interval = tsc_read() - start; 811 } while (cas64(&t->t_intr_start, start, 0) != start); 812 cpu = CPU; 813 cpu->cpu_m.intrstat[t->t_pil][0] += interval; 814 815 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate], 816 interval); 817 } else 818 ASSERT(t->t_intr == NULL); 819 } 820 821 /* 822 * An interrupt thread is returning from swtch(). Place a starting timestamp 823 * in its thread structure. 824 */ 825 void 826 cpu_intr_swtch_exit(kthread_id_t t) 827 { 828 uint64_t ts; 829 830 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 831 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 832 833 do { 834 ts = t->t_intr_start; 835 } while (cas64(&t->t_intr_start, ts, tsc_read()) != ts); 836 } 837 838 /* 839 * Dispatch a hilevel interrupt (one above LOCK_LEVEL) 840 */ 841 /*ARGSUSED*/ 842 static void 843 dispatch_hilevel(uint_t vector, uint_t arg2) 844 { 845 sti(); 846 av_dispatch_autovect(vector); 847 cli(); 848 } 849 850 /* 851 * Dispatch a soft interrupt 852 */ 853 /*ARGSUSED*/ 854 static void 855 dispatch_softint(uint_t oldpil, uint_t arg2) 856 { 857 struct cpu *cpu = CPU; 858 859 sti(); 860 av_dispatch_softvect((int)cpu->cpu_thread->t_pil); 861 cli(); 862 863 /* 864 * Must run softint_epilog() on the interrupt thread stack, since 865 * there may not be a return from it if the interrupt thread blocked. 866 */ 867 dosoftint_epilog(cpu, oldpil); 868 } 869 870 /* 871 * Dispatch a normal interrupt 872 */ 873 static void 874 dispatch_hardint(uint_t vector, uint_t oldipl) 875 { 876 struct cpu *cpu = CPU; 877 878 sti(); 879 av_dispatch_autovect(vector); 880 cli(); 881 882 /* 883 * Must run intr_thread_epilog() on the interrupt thread stack, since 884 * there may not be a return from it if the interrupt thread blocked. 885 */ 886 intr_thread_epilog(cpu, vector, oldipl); 887 } 888 889 /* 890 * Deliver any softints the current interrupt priority allows. 891 * Called with interrupts disabled. 892 */ 893 void 894 dosoftint(struct regs *regs) 895 { 896 struct cpu *cpu = CPU; 897 int oldipl; 898 caddr_t newsp; 899 900 while (cpu->cpu_softinfo.st_pending) { 901 oldipl = cpu->cpu_pri; 902 newsp = dosoftint_prolog(cpu, (caddr_t)regs, 903 cpu->cpu_softinfo.st_pending, oldipl); 904 /* 905 * If returned stack pointer is NULL, priority is too high 906 * to run any of the pending softints now. 907 * Break out and they will be run later. 908 */ 909 if (newsp == NULL) 910 break; 911 switch_sp_and_call(newsp, dispatch_softint, oldipl, 0); 912 } 913 } 914 915 /* 916 * Interrupt service routine, called with interrupts disabled. 917 */ 918 /*ARGSUSED*/ 919 void 920 do_interrupt(struct regs *rp, trap_trace_rec_t *ttp) 921 { 922 struct cpu *cpu = CPU; 923 int newipl, oldipl = cpu->cpu_pri; 924 uint_t vector; 925 caddr_t newsp; 926 927 #ifdef TRAPTRACE 928 ttp->ttr_marker = TT_INTERRUPT; 929 ttp->ttr_ipl = 0xff; 930 ttp->ttr_pri = oldipl; 931 ttp->ttr_spl = cpu->cpu_base_spl; 932 ttp->ttr_vector = 0xff; 933 #endif /* TRAPTRACE */ 934 935 cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR); 936 937 /* 938 * If it's a softint go do it now. 939 */ 940 if (rp->r_trapno == T_SOFTINT) { 941 dosoftint(rp); 942 ASSERT(!interrupts_enabled()); 943 return; 944 } 945 946 /* 947 * Raise the interrupt priority. 948 */ 949 newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno); 950 #ifdef TRAPTRACE 951 ttp->ttr_ipl = newipl; 952 #endif /* TRAPTRACE */ 953 954 /* 955 * Bail if it is a spurious interrupt 956 */ 957 if (newipl == -1) 958 return; 959 cpu->cpu_pri = newipl; 960 vector = rp->r_trapno; 961 #ifdef TRAPTRACE 962 ttp->ttr_vector = vector; 963 #endif /* TRAPTRACE */ 964 if (newipl > LOCK_LEVEL) { 965 /* 966 * High priority interrupts run on this cpu's interrupt stack. 967 */ 968 if (hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) { 969 newsp = cpu->cpu_intr_stack; 970 switch_sp_and_call(newsp, dispatch_hilevel, vector, 0); 971 } else { /* already on the interrupt stack */ 972 dispatch_hilevel(vector, 0); 973 } 974 (void) hilevel_intr_epilog(cpu, newipl, oldipl, vector); 975 } else { 976 /* 977 * Run this interrupt in a separate thread. 978 */ 979 newsp = intr_thread_prolog(cpu, (caddr_t)rp, newipl); 980 switch_sp_and_call(newsp, dispatch_hardint, vector, oldipl); 981 } 982 983 #if !defined(__xpv) 984 /* 985 * Deliver any pending soft interrupts. 986 */ 987 if (cpu->cpu_softinfo.st_pending) 988 dosoftint(rp); 989 #endif /* !__xpv */ 990 } 991 992 993 /* 994 * Common tasks always done by _sys_rtt, called with interrupts disabled. 995 * Returns 1 if returning to userland, 0 if returning to system mode. 996 */ 997 int 998 sys_rtt_common(struct regs *rp) 999 { 1000 kthread_t *tp; 1001 extern void mutex_exit_critical_start(); 1002 extern long mutex_exit_critical_size; 1003 extern void mutex_owner_running_critical_start(); 1004 extern long mutex_owner_running_critical_size; 1005 1006 loop: 1007 1008 /* 1009 * Check if returning to user 1010 */ 1011 tp = CPU->cpu_thread; 1012 if (USERMODE(rp->r_cs)) { 1013 /* 1014 * Check if AST pending. 1015 */ 1016 if (tp->t_astflag) { 1017 /* 1018 * Let trap() handle the AST 1019 */ 1020 sti(); 1021 rp->r_trapno = T_AST; 1022 trap(rp, (caddr_t)0, CPU->cpu_id); 1023 cli(); 1024 goto loop; 1025 } 1026 1027 #if defined(__amd64) 1028 /* 1029 * We are done if segment registers do not need updating. 1030 */ 1031 if (tp->t_lwp->lwp_pcb.pcb_rupdate == 0) 1032 return (1); 1033 1034 if (update_sregs(rp, tp->t_lwp)) { 1035 /* 1036 * 1 or more of the selectors is bad. 1037 * Deliver a SIGSEGV. 1038 */ 1039 proc_t *p = ttoproc(tp); 1040 1041 sti(); 1042 mutex_enter(&p->p_lock); 1043 tp->t_lwp->lwp_cursig = SIGSEGV; 1044 mutex_exit(&p->p_lock); 1045 psig(); 1046 tp->t_sig_check = 1; 1047 cli(); 1048 } 1049 tp->t_lwp->lwp_pcb.pcb_rupdate = 0; 1050 1051 #endif /* __amd64 */ 1052 return (1); 1053 } 1054 1055 /* 1056 * Here if we are returning to supervisor mode. 1057 * Check for a kernel preemption request. 1058 */ 1059 if (CPU->cpu_kprunrun && (rp->r_ps & PS_IE)) { 1060 1061 /* 1062 * Do nothing if already in kpreempt 1063 */ 1064 if (!tp->t_preempt_lk) { 1065 tp->t_preempt_lk = 1; 1066 sti(); 1067 kpreempt(1); /* asynchronous kpreempt call */ 1068 cli(); 1069 tp->t_preempt_lk = 0; 1070 } 1071 } 1072 1073 /* 1074 * If we interrupted the mutex_exit() critical region we must 1075 * reset the PC back to the beginning to prevent missed wakeups 1076 * See the comments in mutex_exit() for details. 1077 */ 1078 if ((uintptr_t)rp->r_pc - (uintptr_t)mutex_exit_critical_start < 1079 mutex_exit_critical_size) { 1080 rp->r_pc = (greg_t)mutex_exit_critical_start; 1081 } 1082 1083 /* 1084 * If we interrupted the mutex_owner_running() critical region we 1085 * must reset the PC back to the beginning to prevent dereferencing 1086 * of a freed thread pointer. See the comments in mutex_owner_running 1087 * for details. 1088 */ 1089 if ((uintptr_t)rp->r_pc - 1090 (uintptr_t)mutex_owner_running_critical_start < 1091 mutex_owner_running_critical_size) { 1092 rp->r_pc = (greg_t)mutex_owner_running_critical_start; 1093 } 1094 1095 return (0); 1096 } 1097 1098 void 1099 send_dirint(int cpuid, int int_level) 1100 { 1101 (*send_dirintf)(cpuid, int_level); 1102 } 1103 1104 /* 1105 * do_splx routine, takes new ipl to set 1106 * returns the old ipl. 1107 * We are careful not to set priority lower than CPU->cpu_base_pri, 1108 * even though it seems we're raising the priority, it could be set 1109 * higher at any time by an interrupt routine, so we must block interrupts 1110 * and look at CPU->cpu_base_pri 1111 */ 1112 int 1113 do_splx(int newpri) 1114 { 1115 ulong_t flag; 1116 cpu_t *cpu; 1117 int curpri, basepri; 1118 1119 flag = intr_clear(); 1120 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ 1121 curpri = cpu->cpu_m.mcpu_pri; 1122 basepri = cpu->cpu_base_spl; 1123 if (newpri < basepri) 1124 newpri = basepri; 1125 cpu->cpu_m.mcpu_pri = newpri; 1126 (*setspl)(newpri); 1127 /* 1128 * If we are going to reenable interrupts see if new priority level 1129 * allows pending softint delivery. 1130 */ 1131 if ((flag & PS_IE) && 1132 bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri) 1133 fakesoftint(); 1134 ASSERT(!interrupts_enabled()); 1135 intr_restore(flag); 1136 return (curpri); 1137 } 1138 1139 /* 1140 * Common spl raise routine, takes new ipl to set 1141 * returns the old ipl, will not lower ipl. 1142 */ 1143 int 1144 splr(int newpri) 1145 { 1146 ulong_t flag; 1147 cpu_t *cpu; 1148 int curpri, basepri; 1149 1150 flag = intr_clear(); 1151 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ 1152 curpri = cpu->cpu_m.mcpu_pri; 1153 /* 1154 * Only do something if new priority is larger 1155 */ 1156 if (newpri > curpri) { 1157 basepri = cpu->cpu_base_spl; 1158 if (newpri < basepri) 1159 newpri = basepri; 1160 cpu->cpu_m.mcpu_pri = newpri; 1161 (*setspl)(newpri); 1162 /* 1163 * See if new priority level allows pending softint delivery 1164 */ 1165 if ((flag & PS_IE) && 1166 bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri) 1167 fakesoftint(); 1168 } 1169 intr_restore(flag); 1170 return (curpri); 1171 } 1172 1173 int 1174 getpil(void) 1175 { 1176 return (CPU->cpu_m.mcpu_pri); 1177 } 1178 1179 int 1180 interrupts_enabled(void) 1181 { 1182 ulong_t flag; 1183 1184 flag = getflags(); 1185 return ((flag & PS_IE) == PS_IE); 1186 } 1187 1188 #ifdef DEBUG 1189 void 1190 assert_ints_enabled(void) 1191 { 1192 ASSERT(!interrupts_unleashed || interrupts_enabled()); 1193 } 1194 #endif /* DEBUG */ 1195