1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/cpuvar.h> 29 #include <sys/regset.h> 30 #include <sys/psw.h> 31 #include <sys/types.h> 32 #include <sys/thread.h> 33 #include <sys/systm.h> 34 #include <sys/segments.h> 35 #include <sys/pcb.h> 36 #include <sys/trap.h> 37 #include <sys/ftrace.h> 38 #include <sys/traptrace.h> 39 #include <sys/clock.h> 40 #include <sys/panic.h> 41 #include <sys/disp.h> 42 #include <vm/seg_kp.h> 43 #include <sys/stack.h> 44 #include <sys/sysmacros.h> 45 #include <sys/cmn_err.h> 46 #include <sys/kstat.h> 47 #include <sys/smp_impldefs.h> 48 #include <sys/pool_pset.h> 49 #include <sys/zone.h> 50 #include <sys/bitmap.h> 51 #include <sys/archsystm.h> 52 #include <sys/machsystm.h> 53 #include <sys/ontrap.h> 54 #include <sys/x86_archext.h> 55 #include <sys/promif.h> 56 #include <vm/hat_i86.h> 57 58 59 /* 60 * Set cpu's base SPL level to the highest active interrupt level 61 */ 62 void 63 set_base_spl(void) 64 { 65 struct cpu *cpu = CPU; 66 uint16_t active = (uint16_t)cpu->cpu_intr_actv; 67 68 cpu->cpu_base_spl = active == 0 ? 0 : bsrw_insn(active); 69 } 70 71 /* 72 * Do all the work necessary to set up the cpu and thread structures 73 * to dispatch a high-level interrupt. 74 * 75 * Returns 0 if we're -not- already on the high-level interrupt stack, 76 * (and *must* switch to it), non-zero if we are already on that stack. 77 * 78 * Called with interrupts masked. 79 * The 'pil' is already set to the appropriate level for rp->r_trapno. 80 */ 81 static int 82 hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil, struct regs *rp) 83 { 84 struct machcpu *mcpu = &cpu->cpu_m; 85 uint_t mask; 86 hrtime_t intrtime; 87 hrtime_t now = tsc_read(); 88 89 ASSERT(pil > LOCK_LEVEL); 90 91 if (pil == CBE_HIGH_PIL) { 92 cpu->cpu_profile_pil = oldpil; 93 if (USERMODE(rp->r_cs)) { 94 cpu->cpu_profile_pc = 0; 95 cpu->cpu_profile_upc = rp->r_pc; 96 } else { 97 cpu->cpu_profile_pc = rp->r_pc; 98 cpu->cpu_profile_upc = 0; 99 } 100 } 101 102 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK; 103 if (mask != 0) { 104 int nestpil; 105 106 /* 107 * We have interrupted another high-level interrupt. 108 * Load starting timestamp, compute interval, update 109 * cumulative counter. 110 */ 111 nestpil = bsrw_insn((uint16_t)mask); 112 ASSERT(nestpil < pil); 113 intrtime = now - 114 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)]; 115 mcpu->intrstat[nestpil][0] += intrtime; 116 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 117 /* 118 * Another high-level interrupt is active below this one, so 119 * there is no need to check for an interrupt thread. That 120 * will be done by the lowest priority high-level interrupt 121 * active. 122 */ 123 } else { 124 kthread_t *t = cpu->cpu_thread; 125 126 /* 127 * See if we are interrupting a low-level interrupt thread. 128 * If so, account for its time slice only if its time stamp 129 * is non-zero. 130 */ 131 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) { 132 intrtime = now - t->t_intr_start; 133 mcpu->intrstat[t->t_pil][0] += intrtime; 134 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 135 t->t_intr_start = 0; 136 } 137 } 138 139 /* 140 * Store starting timestamp in CPU structure for this PIL. 141 */ 142 mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now; 143 144 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 145 146 if (pil == 15) { 147 /* 148 * To support reentrant level 15 interrupts, we maintain a 149 * recursion count in the top half of cpu_intr_actv. Only 150 * when this count hits zero do we clear the PIL 15 bit from 151 * the lower half of cpu_intr_actv. 152 */ 153 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1; 154 (*refcntp)++; 155 } 156 157 mask = cpu->cpu_intr_actv; 158 159 cpu->cpu_intr_actv |= (1 << pil); 160 161 return (mask & CPU_INTR_ACTV_HIGH_LEVEL_MASK); 162 } 163 164 /* 165 * Does most of the work of returning from a high level interrupt. 166 * 167 * Returns 0 if there are no more high level interrupts (in which 168 * case we must switch back to the interrupted thread stack) or 169 * non-zero if there are more (in which case we should stay on it). 170 * 171 * Called with interrupts masked 172 */ 173 static int 174 hilevel_intr_epilog(struct cpu *cpu, uint_t pil, uint_t oldpil, uint_t vecnum) 175 { 176 struct machcpu *mcpu = &cpu->cpu_m; 177 uint_t mask; 178 hrtime_t intrtime; 179 hrtime_t now = tsc_read(); 180 181 ASSERT(mcpu->mcpu_pri == pil); 182 183 cpu->cpu_stats.sys.intr[pil - 1]++; 184 185 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 186 187 if (pil == 15) { 188 /* 189 * To support reentrant level 15 interrupts, we maintain a 190 * recursion count in the top half of cpu_intr_actv. Only 191 * when this count hits zero do we clear the PIL 15 bit from 192 * the lower half of cpu_intr_actv. 193 */ 194 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1; 195 196 ASSERT(*refcntp > 0); 197 198 if (--(*refcntp) == 0) 199 cpu->cpu_intr_actv &= ~(1 << pil); 200 } else { 201 cpu->cpu_intr_actv &= ~(1 << pil); 202 } 203 204 ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0); 205 206 intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)]; 207 mcpu->intrstat[pil][0] += intrtime; 208 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 209 210 /* 211 * Check for lower-pil nested high-level interrupt beneath 212 * current one. If so, place a starting timestamp in its 213 * pil_high_start entry. 214 */ 215 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK; 216 if (mask != 0) { 217 int nestpil; 218 219 /* 220 * find PIL of nested interrupt 221 */ 222 nestpil = bsrw_insn((uint16_t)mask); 223 ASSERT(nestpil < pil); 224 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now; 225 /* 226 * (Another high-level interrupt is active below this one, 227 * so there is no need to check for an interrupt 228 * thread. That will be done by the lowest priority 229 * high-level interrupt active.) 230 */ 231 } else { 232 /* 233 * Check to see if there is a low-level interrupt active. 234 * If so, place a starting timestamp in the thread 235 * structure. 236 */ 237 kthread_t *t = cpu->cpu_thread; 238 239 if (t->t_flag & T_INTR_THREAD) 240 t->t_intr_start = now; 241 } 242 243 mcpu->mcpu_pri = oldpil; 244 (void) (*setlvlx)(oldpil, vecnum); 245 246 return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK); 247 } 248 249 /* 250 * Set up the cpu, thread and interrupt thread structures for 251 * executing an interrupt thread. The new stack pointer of the 252 * interrupt thread (which *must* be switched to) is returned. 253 */ 254 static caddr_t 255 intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil) 256 { 257 struct machcpu *mcpu = &cpu->cpu_m; 258 kthread_t *t, *volatile it; 259 hrtime_t now = tsc_read(); 260 261 ASSERT(pil > 0); 262 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 263 cpu->cpu_intr_actv |= (1 << pil); 264 265 /* 266 * Get set to run an interrupt thread. 267 * There should always be an interrupt thread, since we 268 * allocate one for each level on each CPU. 269 * 270 * t_intr_start could be zero due to cpu_intr_swtch_enter. 271 */ 272 t = cpu->cpu_thread; 273 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) { 274 hrtime_t intrtime = now - t->t_intr_start; 275 mcpu->intrstat[t->t_pil][0] += intrtime; 276 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 277 t->t_intr_start = 0; 278 } 279 280 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr); 281 282 t->t_sp = (uintptr_t)stackptr; /* mark stack in curthread for resume */ 283 284 /* 285 * unlink the interrupt thread off the cpu 286 * 287 * Note that the code in kcpc_overflow_intr -relies- on the 288 * ordering of events here - in particular that t->t_lwp of 289 * the interrupt thread is set to the pinned thread *before* 290 * curthread is changed. 291 */ 292 it = cpu->cpu_intr_thread; 293 cpu->cpu_intr_thread = it->t_link; 294 it->t_intr = t; 295 it->t_lwp = t->t_lwp; 296 297 /* 298 * (threads on the interrupt thread free list could have state 299 * preset to TS_ONPROC, but it helps in debugging if 300 * they're TS_FREE.) 301 */ 302 it->t_state = TS_ONPROC; 303 304 cpu->cpu_thread = it; /* new curthread on this cpu */ 305 it->t_pil = (uchar_t)pil; 306 it->t_pri = intr_pri + (pri_t)pil; 307 it->t_intr_start = now; 308 309 return (it->t_stk); 310 } 311 312 313 #ifdef DEBUG 314 int intr_thread_cnt; 315 #endif 316 317 /* 318 * Called with interrupts disabled 319 */ 320 static void 321 intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil) 322 { 323 struct machcpu *mcpu = &cpu->cpu_m; 324 kthread_t *t; 325 kthread_t *it = cpu->cpu_thread; /* curthread */ 326 uint_t pil, basespl; 327 hrtime_t intrtime; 328 hrtime_t now = tsc_read(); 329 330 pil = it->t_pil; 331 cpu->cpu_stats.sys.intr[pil - 1]++; 332 333 ASSERT(it->t_intr_start != 0); 334 intrtime = now - it->t_intr_start; 335 mcpu->intrstat[pil][0] += intrtime; 336 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 337 338 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 339 cpu->cpu_intr_actv &= ~(1 << pil); 340 341 /* 342 * If there is still an interrupted thread underneath this one 343 * then the interrupt was never blocked and the return is 344 * fairly simple. Otherwise it isn't. 345 */ 346 if ((t = it->t_intr) == NULL) { 347 /* 348 * The interrupted thread is no longer pinned underneath 349 * the interrupt thread. This means the interrupt must 350 * have blocked, and the interrupted thread has been 351 * unpinned, and has probably been running around the 352 * system for a while. 353 * 354 * Since there is no longer a thread under this one, put 355 * this interrupt thread back on the CPU's free list and 356 * resume the idle thread which will dispatch the next 357 * thread to run. 358 */ 359 #ifdef DEBUG 360 intr_thread_cnt++; 361 #endif 362 cpu->cpu_stats.sys.intrblk++; 363 /* 364 * Set CPU's base SPL based on active interrupts bitmask 365 */ 366 set_base_spl(); 367 basespl = cpu->cpu_base_spl; 368 mcpu->mcpu_pri = basespl; 369 (*setlvlx)(basespl, vec); 370 (void) splhigh(); 371 sti(); 372 it->t_state = TS_FREE; 373 /* 374 * Return interrupt thread to pool 375 */ 376 it->t_link = cpu->cpu_intr_thread; 377 cpu->cpu_intr_thread = it; 378 swtch(); 379 panic("intr_thread_epilog: swtch returned"); 380 /*NOTREACHED*/ 381 } 382 383 /* 384 * Return interrupt thread to the pool 385 */ 386 it->t_link = cpu->cpu_intr_thread; 387 cpu->cpu_intr_thread = it; 388 it->t_state = TS_FREE; 389 390 basespl = cpu->cpu_base_spl; 391 pil = MAX(oldpil, basespl); 392 mcpu->mcpu_pri = pil; 393 (*setlvlx)(pil, vec); 394 t->t_intr_start = now; 395 cpu->cpu_thread = t; 396 } 397 398 /* 399 * intr_get_time() is a resource for interrupt handlers to determine how 400 * much time has been spent handling the current interrupt. Such a function 401 * is needed because higher level interrupts can arrive during the 402 * processing of an interrupt. intr_get_time() only returns time spent in the 403 * current interrupt handler. 404 * 405 * The caller must be calling from an interrupt handler running at a pil 406 * below or at lock level. Timings are not provided for high-level 407 * interrupts. 408 * 409 * The first time intr_get_time() is called while handling an interrupt, 410 * it returns the time since the interrupt handler was invoked. Subsequent 411 * calls will return the time since the prior call to intr_get_time(). Time 412 * is returned as ticks. Use tsc_scalehrtime() to convert ticks to nsec. 413 * 414 * Theory Of Intrstat[][]: 415 * 416 * uint64_t intrstat[pil][0..1] is an array indexed by pil level, with two 417 * uint64_ts per pil. 418 * 419 * intrstat[pil][0] is a cumulative count of the number of ticks spent 420 * handling all interrupts at the specified pil on this CPU. It is 421 * exported via kstats to the user. 422 * 423 * intrstat[pil][1] is always a count of ticks less than or equal to the 424 * value in [0]. The difference between [1] and [0] is the value returned 425 * by a call to intr_get_time(). At the start of interrupt processing, 426 * [0] and [1] will be equal (or nearly so). As the interrupt consumes 427 * time, [0] will increase, but [1] will remain the same. A call to 428 * intr_get_time() will return the difference, then update [1] to be the 429 * same as [0]. Future calls will return the time since the last call. 430 * Finally, when the interrupt completes, [1] is updated to the same as [0]. 431 * 432 * Implementation: 433 * 434 * intr_get_time() works much like a higher level interrupt arriving. It 435 * "checkpoints" the timing information by incrementing intrstat[pil][0] 436 * to include elapsed running time, and by setting t_intr_start to rdtsc. 437 * It then sets the return value to intrstat[pil][0] - intrstat[pil][1], 438 * and updates intrstat[pil][1] to be the same as the new value of 439 * intrstat[pil][0]. 440 * 441 * In the normal handling of interrupts, after an interrupt handler returns 442 * and the code in intr_thread() updates intrstat[pil][0], it then sets 443 * intrstat[pil][1] to the new value of intrstat[pil][0]. When [0] == [1], 444 * the timings are reset, i.e. intr_get_time() will return [0] - [1] which 445 * is 0. 446 * 447 * Whenever interrupts arrive on a CPU which is handling a lower pil 448 * interrupt, they update the lower pil's [0] to show time spent in the 449 * handler that they've interrupted. This results in a growing discrepancy 450 * between [0] and [1], which is returned the next time intr_get_time() is 451 * called. Time spent in the higher-pil interrupt will not be returned in 452 * the next intr_get_time() call from the original interrupt, because 453 * the higher-pil interrupt's time is accumulated in intrstat[higherpil][]. 454 */ 455 uint64_t 456 intr_get_time(void) 457 { 458 struct cpu *cpu; 459 struct machcpu *mcpu; 460 kthread_t *t; 461 uint64_t time, delta, ret; 462 uint_t pil; 463 464 cli(); 465 cpu = CPU; 466 mcpu = &cpu->cpu_m; 467 t = cpu->cpu_thread; 468 pil = t->t_pil; 469 ASSERT((cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK) == 0); 470 ASSERT(t->t_flag & T_INTR_THREAD); 471 ASSERT(pil != 0); 472 ASSERT(t->t_intr_start != 0); 473 474 time = tsc_read(); 475 delta = time - t->t_intr_start; 476 t->t_intr_start = time; 477 478 time = mcpu->intrstat[pil][0] + delta; 479 ret = time - mcpu->intrstat[pil][1]; 480 mcpu->intrstat[pil][0] = time; 481 mcpu->intrstat[pil][1] = time; 482 cpu->cpu_intracct[cpu->cpu_mstate] += delta; 483 484 sti(); 485 return (ret); 486 } 487 488 static caddr_t 489 dosoftint_prolog( 490 struct cpu *cpu, 491 caddr_t stackptr, 492 uint32_t st_pending, 493 uint_t oldpil) 494 { 495 kthread_t *t, *volatile it; 496 struct machcpu *mcpu = &cpu->cpu_m; 497 uint_t pil; 498 hrtime_t now; 499 500 top: 501 ASSERT(st_pending == mcpu->mcpu_softinfo.st_pending); 502 503 pil = bsrw_insn((uint16_t)st_pending); 504 if (pil <= oldpil || pil <= cpu->cpu_base_spl) 505 return (0); 506 507 /* 508 * XX64 Sigh. 509 * 510 * This is a transliteration of the i386 assembler code for 511 * soft interrupts. One question is "why does this need 512 * to be atomic?" One possible race is -other- processors 513 * posting soft interrupts to us in set_pending() i.e. the 514 * CPU might get preempted just after the address computation, 515 * but just before the atomic transaction, so another CPU would 516 * actually set the original CPU's st_pending bit. However, 517 * it looks like it would be simpler to disable preemption there. 518 * Are there other races for which preemption control doesn't work? 519 * 520 * The i386 assembler version -also- checks to see if the bit 521 * being cleared was actually set; if it wasn't, it rechecks 522 * for more. This seems a bit strange, as the only code that 523 * ever clears the bit is -this- code running with interrupts 524 * disabled on -this- CPU. This code would probably be cheaper: 525 * 526 * atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, 527 * ~(1 << pil)); 528 * 529 * and t->t_preempt--/++ around set_pending() even cheaper, 530 * but at this point, correctness is critical, so we slavishly 531 * emulate the i386 port. 532 */ 533 if (atomic_btr32((uint32_t *) 534 &mcpu->mcpu_softinfo.st_pending, pil) == 0) { 535 st_pending = mcpu->mcpu_softinfo.st_pending; 536 goto top; 537 } 538 539 mcpu->mcpu_pri = pil; 540 (*setspl)(pil); 541 542 now = tsc_read(); 543 544 /* 545 * Get set to run interrupt thread. 546 * There should always be an interrupt thread since we 547 * allocate one for each level on the CPU. 548 */ 549 it = cpu->cpu_intr_thread; 550 cpu->cpu_intr_thread = it->t_link; 551 552 /* t_intr_start could be zero due to cpu_intr_swtch_enter. */ 553 t = cpu->cpu_thread; 554 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) { 555 hrtime_t intrtime = now - t->t_intr_start; 556 mcpu->intrstat[pil][0] += intrtime; 557 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 558 t->t_intr_start = 0; 559 } 560 561 /* 562 * Note that the code in kcpc_overflow_intr -relies- on the 563 * ordering of events here - in particular that t->t_lwp of 564 * the interrupt thread is set to the pinned thread *before* 565 * curthread is changed. 566 */ 567 it->t_lwp = t->t_lwp; 568 it->t_state = TS_ONPROC; 569 570 /* 571 * Push interrupted thread onto list from new thread. 572 * Set the new thread as the current one. 573 * Set interrupted thread's T_SP because if it is the idle thread, 574 * resume() may use that stack between threads. 575 */ 576 577 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr); 578 t->t_sp = (uintptr_t)stackptr; 579 580 it->t_intr = t; 581 cpu->cpu_thread = it; 582 583 /* 584 * Set bit for this pil in CPU's interrupt active bitmask. 585 */ 586 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0); 587 cpu->cpu_intr_actv |= (1 << pil); 588 589 /* 590 * Initialize thread priority level from intr_pri 591 */ 592 it->t_pil = (uchar_t)pil; 593 it->t_pri = (pri_t)pil + intr_pri; 594 it->t_intr_start = now; 595 596 return (it->t_stk); 597 } 598 599 static void 600 dosoftint_epilog(struct cpu *cpu, uint_t oldpil) 601 { 602 struct machcpu *mcpu = &cpu->cpu_m; 603 kthread_t *t, *it; 604 uint_t pil, basespl; 605 hrtime_t intrtime; 606 hrtime_t now = tsc_read(); 607 608 it = cpu->cpu_thread; 609 pil = it->t_pil; 610 611 cpu->cpu_stats.sys.intr[pil - 1]++; 612 613 ASSERT(cpu->cpu_intr_actv & (1 << pil)); 614 cpu->cpu_intr_actv &= ~(1 << pil); 615 intrtime = now - it->t_intr_start; 616 mcpu->intrstat[pil][0] += intrtime; 617 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime; 618 619 /* 620 * If there is still an interrupted thread underneath this one 621 * then the interrupt was never blocked and the return is 622 * fairly simple. Otherwise it isn't. 623 */ 624 if ((t = it->t_intr) == NULL) { 625 /* 626 * Put thread back on the interrupt thread list. 627 * This was an interrupt thread, so set CPU's base SPL. 628 */ 629 set_base_spl(); 630 it->t_state = TS_FREE; 631 it->t_link = cpu->cpu_intr_thread; 632 cpu->cpu_intr_thread = it; 633 (void) splhigh(); 634 sti(); 635 swtch(); 636 /*NOTREACHED*/ 637 panic("dosoftint_epilog: swtch returned"); 638 } 639 it->t_link = cpu->cpu_intr_thread; 640 cpu->cpu_intr_thread = it; 641 it->t_state = TS_FREE; 642 cpu->cpu_thread = t; 643 if (t->t_flag & T_INTR_THREAD) 644 t->t_intr_start = now; 645 basespl = cpu->cpu_base_spl; 646 pil = MAX(oldpil, basespl); 647 mcpu->mcpu_pri = pil; 648 (*setspl)(pil); 649 } 650 651 652 /* 653 * Make the interrupted thread 'to' be runnable. 654 * 655 * Since t->t_sp has already been saved, t->t_pc is all 656 * that needs to be set in this function. 657 * 658 * Returns the interrupt level of the interrupt thread. 659 */ 660 int 661 intr_passivate( 662 kthread_t *it, /* interrupt thread */ 663 kthread_t *t) /* interrupted thread */ 664 { 665 extern void _sys_rtt(); 666 667 ASSERT(it->t_flag & T_INTR_THREAD); 668 ASSERT(SA(t->t_sp) == t->t_sp); 669 670 t->t_pc = (uintptr_t)_sys_rtt; 671 return (it->t_pil); 672 } 673 674 /* 675 * Create interrupt kstats for this CPU. 676 */ 677 void 678 cpu_create_intrstat(cpu_t *cp) 679 { 680 int i; 681 kstat_t *intr_ksp; 682 kstat_named_t *knp; 683 char name[KSTAT_STRLEN]; 684 zoneid_t zoneid; 685 686 ASSERT(MUTEX_HELD(&cpu_lock)); 687 688 if (pool_pset_enabled()) 689 zoneid = GLOBAL_ZONEID; 690 else 691 zoneid = ALL_ZONES; 692 693 intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc", 694 KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid); 695 696 /* 697 * Initialize each PIL's named kstat 698 */ 699 if (intr_ksp != NULL) { 700 intr_ksp->ks_update = cpu_kstat_intrstat_update; 701 knp = (kstat_named_t *)intr_ksp->ks_data; 702 intr_ksp->ks_private = cp; 703 for (i = 0; i < PIL_MAX; i++) { 704 (void) snprintf(name, KSTAT_STRLEN, "level-%d-time", 705 i + 1); 706 kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64); 707 (void) snprintf(name, KSTAT_STRLEN, "level-%d-count", 708 i + 1); 709 kstat_named_init(&knp[(i * 2) + 1], name, 710 KSTAT_DATA_UINT64); 711 } 712 kstat_install(intr_ksp); 713 } 714 } 715 716 /* 717 * Delete interrupt kstats for this CPU. 718 */ 719 void 720 cpu_delete_intrstat(cpu_t *cp) 721 { 722 kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES); 723 } 724 725 /* 726 * Convert interrupt statistics from CPU ticks to nanoseconds and 727 * update kstat. 728 */ 729 int 730 cpu_kstat_intrstat_update(kstat_t *ksp, int rw) 731 { 732 kstat_named_t *knp = ksp->ks_data; 733 cpu_t *cpup = (cpu_t *)ksp->ks_private; 734 int i; 735 hrtime_t hrt; 736 737 if (rw == KSTAT_WRITE) 738 return (EACCES); 739 740 for (i = 0; i < PIL_MAX; i++) { 741 hrt = (hrtime_t)cpup->cpu_m.intrstat[i + 1][0]; 742 tsc_scalehrtime(&hrt); 743 knp[i * 2].value.ui64 = (uint64_t)hrt; 744 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i]; 745 } 746 747 return (0); 748 } 749 750 /* 751 * An interrupt thread is ending a time slice, so compute the interval it 752 * ran for and update the statistic for its PIL. 753 */ 754 void 755 cpu_intr_swtch_enter(kthread_id_t t) 756 { 757 uint64_t interval; 758 uint64_t start; 759 cpu_t *cpu; 760 761 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 762 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 763 764 /* 765 * We could be here with a zero timestamp. This could happen if: 766 * an interrupt thread which no longer has a pinned thread underneath 767 * it (i.e. it blocked at some point in its past) has finished running 768 * its handler. intr_thread() updated the interrupt statistic for its 769 * PIL and zeroed its timestamp. Since there was no pinned thread to 770 * return to, swtch() gets called and we end up here. 771 * 772 * Note that we use atomic ops below (cas64 and atomic_add_64), which 773 * we don't use in the functions above, because we're not called 774 * with interrupts blocked, but the epilog/prolog functions are. 775 */ 776 if (t->t_intr_start) { 777 do { 778 start = t->t_intr_start; 779 interval = tsc_read() - start; 780 } while (cas64(&t->t_intr_start, start, 0) != start); 781 cpu = CPU; 782 cpu->cpu_m.intrstat[t->t_pil][0] += interval; 783 784 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate], 785 interval); 786 } else 787 ASSERT(t->t_intr == NULL); 788 } 789 790 /* 791 * An interrupt thread is returning from swtch(). Place a starting timestamp 792 * in its thread structure. 793 */ 794 void 795 cpu_intr_swtch_exit(kthread_id_t t) 796 { 797 uint64_t ts; 798 799 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 800 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 801 802 do { 803 ts = t->t_intr_start; 804 } while (cas64(&t->t_intr_start, ts, tsc_read()) != ts); 805 } 806 807 /* 808 * Dispatch a hilevel interrupt (one above LOCK_LEVEL) 809 */ 810 /*ARGSUSED*/ 811 static void 812 dispatch_hilevel(uint_t vector, uint_t arg2) 813 { 814 sti(); 815 av_dispatch_autovect(vector); 816 cli(); 817 } 818 819 /* 820 * Dispatch a soft interrupt 821 */ 822 /*ARGSUSED*/ 823 static void 824 dispatch_softint(uint_t oldpil, uint_t arg2) 825 { 826 struct cpu *cpu = CPU; 827 828 sti(); 829 av_dispatch_softvect((int)cpu->cpu_thread->t_pil); 830 cli(); 831 832 /* 833 * Must run softint_epilog() on the interrupt thread stack, since 834 * there may not be a return from it if the interrupt thread blocked. 835 */ 836 dosoftint_epilog(cpu, oldpil); 837 } 838 839 /* 840 * Dispatch a normal interrupt 841 */ 842 static void 843 dispatch_hardint(uint_t vector, uint_t oldipl) 844 { 845 struct cpu *cpu = CPU; 846 847 sti(); 848 av_dispatch_autovect(vector); 849 cli(); 850 851 /* 852 * Must run intr_thread_epilog() on the interrupt thread stack, since 853 * there may not be a return from it if the interrupt thread blocked. 854 */ 855 intr_thread_epilog(cpu, vector, oldipl); 856 } 857 858 /* 859 * Deliver any softints the current interrupt priority allows. 860 * Called with interrupts disabled. 861 */ 862 void 863 dosoftint(struct regs *regs) 864 { 865 struct cpu *cpu = CPU; 866 int oldipl; 867 caddr_t newsp; 868 869 while (cpu->cpu_softinfo.st_pending) { 870 oldipl = cpu->cpu_pri; 871 newsp = dosoftint_prolog(cpu, (caddr_t)regs, 872 cpu->cpu_softinfo.st_pending, oldipl); 873 /* 874 * If returned stack pointer is NULL, priority is too high 875 * to run any of the pending softints now. 876 * Break out and they will be run later. 877 */ 878 if (newsp == NULL) 879 break; 880 switch_sp_and_call(newsp, dispatch_softint, oldipl, 0); 881 } 882 } 883 884 /* 885 * Interrupt service routine, called with interrupts disabled. 886 */ 887 /*ARGSUSED*/ 888 void 889 do_interrupt(struct regs *rp, trap_trace_rec_t *ttp) 890 { 891 struct cpu *cpu = CPU; 892 int newipl, oldipl = cpu->cpu_pri; 893 uint_t vector; 894 caddr_t newsp; 895 896 #ifdef TRAPTRACE 897 ttp->ttr_marker = TT_INTERRUPT; 898 ttp->ttr_ipl = 0xff; 899 ttp->ttr_pri = oldipl; 900 ttp->ttr_spl = cpu->cpu_base_spl; 901 ttp->ttr_vector = 0xff; 902 #endif /* TRAPTRACE */ 903 904 /* 905 * Handle any pending TLB flushing 906 */ 907 tlb_service(); 908 909 /* 910 * If it's a softint go do it now. 911 */ 912 if (rp->r_trapno == T_SOFTINT) { 913 dosoftint(rp); 914 ASSERT(!interrupts_enabled()); 915 return; 916 } 917 918 /* 919 * Raise the interrupt priority. 920 */ 921 newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno); 922 #ifdef TRAPTRACE 923 ttp->ttr_ipl = newipl; 924 #endif /* TRAPTRACE */ 925 926 /* 927 * Bail if it is a spurious interrupt 928 */ 929 if (newipl == -1) 930 return; 931 cpu->cpu_pri = newipl; 932 vector = rp->r_trapno; 933 #ifdef TRAPTRACE 934 ttp->ttr_vector = vector; 935 #endif /* TRAPTRACE */ 936 if (newipl > LOCK_LEVEL) { 937 /* 938 * High priority interrupts run on this cpu's interrupt stack. 939 */ 940 if (hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) { 941 newsp = cpu->cpu_intr_stack; 942 switch_sp_and_call(newsp, dispatch_hilevel, vector, 0); 943 } else { /* already on the interrupt stack */ 944 dispatch_hilevel(vector, 0); 945 } 946 (void) hilevel_intr_epilog(cpu, newipl, oldipl, vector); 947 } else { 948 /* 949 * Run this interrupt in a separate thread. 950 */ 951 newsp = intr_thread_prolog(cpu, (caddr_t)rp, newipl); 952 switch_sp_and_call(newsp, dispatch_hardint, vector, oldipl); 953 } 954 955 /* 956 * Deliver any pending soft interrupts. 957 */ 958 if (cpu->cpu_softinfo.st_pending) 959 dosoftint(rp); 960 } 961 962 /* 963 * Common tasks always done by _sys_rtt, called with interrupts disabled. 964 * Returns 1 if returning to userland, 0 if returning to system mode. 965 */ 966 int 967 sys_rtt_common(struct regs *rp) 968 { 969 kthread_t *tp; 970 extern void mutex_exit_critical_start(); 971 extern long mutex_exit_critical_size; 972 973 loop: 974 975 /* 976 * Check if returning to user 977 */ 978 tp = CPU->cpu_thread; 979 if (USERMODE(rp->r_cs)) { 980 /* 981 * Check if AST pending. 982 */ 983 if (tp->t_astflag) { 984 /* 985 * Let trap() handle the AST 986 */ 987 sti(); 988 rp->r_trapno = T_AST; 989 trap(rp, (caddr_t)0, CPU->cpu_id); 990 cli(); 991 goto loop; 992 } 993 994 #if defined(__amd64) 995 /* 996 * We are done if segment registers do not need updating. 997 */ 998 if ((tp->t_lwp->lwp_pcb.pcb_flags & RUPDATE_PENDING) == 0) 999 return (1); 1000 1001 if (update_sregs(rp, tp->t_lwp)) { 1002 /* 1003 * 1 or more of the selectors is bad. 1004 * Deliver a SIGSEGV. 1005 */ 1006 proc_t *p = ttoproc(tp); 1007 1008 sti(); 1009 mutex_enter(&p->p_lock); 1010 tp->t_lwp->lwp_cursig = SIGSEGV; 1011 mutex_exit(&p->p_lock); 1012 psig(); 1013 tp->t_sig_check = 1; 1014 cli(); 1015 } 1016 tp->t_lwp->lwp_pcb.pcb_flags &= ~RUPDATE_PENDING; 1017 1018 #endif /* __amd64 */ 1019 return (1); 1020 } 1021 1022 /* 1023 * Here if we are returning to supervisor mode. 1024 * Check for a kernel preemption request. 1025 */ 1026 if (CPU->cpu_kprunrun && (rp->r_ps & PS_IE)) { 1027 1028 /* 1029 * Do nothing if already in kpreempt 1030 */ 1031 if (!tp->t_preempt_lk) { 1032 tp->t_preempt_lk = 1; 1033 sti(); 1034 kpreempt(1); /* asynchronous kpreempt call */ 1035 cli(); 1036 tp->t_preempt_lk = 0; 1037 } 1038 } 1039 1040 /* 1041 * If we interrupted the mutex_exit() critical region we must 1042 * reset the PC back to the beginning to prevent missed wakeups 1043 * See the comments in mutex_exit() for details. 1044 */ 1045 if ((uintptr_t)rp->r_pc - (uintptr_t)mutex_exit_critical_start < 1046 mutex_exit_critical_size) { 1047 rp->r_pc = (greg_t)mutex_exit_critical_start; 1048 } 1049 return (0); 1050 } 1051 1052 void 1053 send_dirint(int cpuid, int int_level) 1054 { 1055 (*send_dirintf)(cpuid, int_level); 1056 } 1057 1058 /* 1059 * do_splx routine, takes new ipl to set 1060 * returns the old ipl. 1061 * We are careful not to set priority lower than CPU->cpu_base_pri, 1062 * even though it seems we're raising the priority, it could be set 1063 * higher at any time by an interrupt routine, so we must block interrupts 1064 * and look at CPU->cpu_base_pri 1065 */ 1066 int 1067 do_splx(int newpri) 1068 { 1069 ulong_t flag; 1070 cpu_t *cpu; 1071 int curpri, basepri; 1072 1073 flag = intr_clear(); 1074 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ 1075 curpri = cpu->cpu_m.mcpu_pri; 1076 basepri = cpu->cpu_base_spl; 1077 if (newpri < basepri) 1078 newpri = basepri; 1079 cpu->cpu_m.mcpu_pri = newpri; 1080 (*setspl)(newpri); 1081 /* 1082 * If we are going to reenable interrupts see if new priority level 1083 * allows pending softint delivery. 1084 */ 1085 if ((flag & PS_IE) && 1086 bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri) 1087 fakesoftint(); 1088 ASSERT(!interrupts_enabled()); 1089 intr_restore(flag); 1090 return (curpri); 1091 } 1092 1093 /* 1094 * Common spl raise routine, takes new ipl to set 1095 * returns the old ipl, will not lower ipl. 1096 */ 1097 int 1098 splr(int newpri) 1099 { 1100 ulong_t flag; 1101 cpu_t *cpu; 1102 int curpri, basepri; 1103 1104 flag = intr_clear(); 1105 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ 1106 curpri = cpu->cpu_m.mcpu_pri; 1107 /* 1108 * Only do something if new priority is larger 1109 */ 1110 if (newpri > curpri) { 1111 basepri = cpu->cpu_base_spl; 1112 if (newpri < basepri) 1113 newpri = basepri; 1114 cpu->cpu_m.mcpu_pri = newpri; 1115 (*setspl)(newpri); 1116 /* 1117 * See if new priority level allows pending softint delivery 1118 */ 1119 if ((flag & PS_IE) && 1120 bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri) 1121 fakesoftint(); 1122 } 1123 intr_restore(flag); 1124 return (curpri); 1125 } 1126 1127 int 1128 getpil(void) 1129 { 1130 return (CPU->cpu_m.mcpu_pri); 1131 } 1132 1133 int 1134 interrupts_enabled(void) 1135 { 1136 ulong_t flag; 1137 1138 flag = getflags(); 1139 return ((flag & PS_IE) == PS_IE); 1140 } 1141 1142 #ifdef DEBUG 1143 void 1144 assert_ints_enabled(void) 1145 { 1146 ASSERT(!interrupts_unleashed || interrupts_enabled()); 1147 } 1148 #endif /* DEBUG */ 1149