1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/kstat.h> 30 #include <sys/param.h> 31 #include <sys/stack.h> 32 #include <sys/regset.h> 33 #include <sys/thread.h> 34 #include <sys/proc.h> 35 #include <sys/procfs_isa.h> 36 #include <sys/kmem.h> 37 #include <sys/cpuvar.h> 38 #include <sys/systm.h> 39 #include <sys/machpcb.h> 40 #include <sys/machasi.h> 41 #include <sys/vis.h> 42 #include <sys/fpu/fpusystm.h> 43 #include <sys/cpu_module.h> 44 #include <sys/privregs.h> 45 #include <sys/archsystm.h> 46 #include <sys/atomic.h> 47 #include <sys/cmn_err.h> 48 #include <sys/time.h> 49 #include <sys/clock.h> 50 #include <sys/chip.h> 51 #include <sys/cmp.h> 52 #include <sys/platform_module.h> 53 #include <sys/bl.h> 54 #include <sys/nvpair.h> 55 #include <sys/kdi_impl.h> 56 #include <sys/machsystm.h> 57 #include <sys/sysmacros.h> 58 #include <sys/promif.h> 59 #include <sys/pool_pset.h> 60 #include <vm/seg_kmem.h> 61 62 int maxphys = MMU_PAGESIZE * 16; /* 128k */ 63 int klustsize = MMU_PAGESIZE * 16; /* 128k */ 64 65 /* 66 * Initialize kernel thread's stack. 67 */ 68 caddr_t 69 thread_stk_init(caddr_t stk) 70 { 71 kfpu_t *fp; 72 ulong_t align; 73 74 /* allocate extra space for floating point state */ 75 stk -= SA(sizeof (kfpu_t) + GSR_SIZE); 76 align = (uintptr_t)stk & 0x3f; 77 stk -= align; /* force v9_fpu to be 16 byte aligned */ 78 fp = (kfpu_t *)stk; 79 fp->fpu_fprs = 0; 80 81 stk -= SA(MINFRAME); 82 return (stk); 83 } 84 85 #define WIN32_SIZE (MAXWIN * sizeof (struct rwindow32)) 86 #define WIN64_SIZE (MAXWIN * sizeof (struct rwindow64)) 87 88 kmem_cache_t *wbuf32_cache; 89 kmem_cache_t *wbuf64_cache; 90 91 void 92 lwp_stk_cache_init(void) 93 { 94 /* 95 * Window buffers are allocated from the static arena 96 * because they are accessed at TL>0. We also must use 97 * KMC_NOHASH to prevent them from straddling page 98 * boundaries as they are accessed by physical address. 99 */ 100 wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE, 101 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 102 wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE, 103 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 104 } 105 106 /* 107 * Initialize lwp's kernel stack. 108 * Note that now that the floating point register save area (kfpu_t) 109 * has been broken out from machpcb and aligned on a 64 byte boundary so that 110 * we can do block load/stores to/from it, there are a couple of potential 111 * optimizations to save stack space. 1. The floating point register save 112 * area could be aligned on a 16 byte boundary, and the floating point code 113 * changed to (a) check the alignment and (b) use different save/restore 114 * macros depending upon the alignment. 2. The lwp_stk_init code below 115 * could be changed to calculate if less space would be wasted if machpcb 116 * was first instead of second. However there is a REGOFF macro used in 117 * locore, syscall_trap, machdep and mlsetup that assumes that the saved 118 * register area is a fixed distance from the %sp, and would have to be 119 * changed to a pointer or something...JJ said later. 120 */ 121 caddr_t 122 lwp_stk_init(klwp_t *lwp, caddr_t stk) 123 { 124 struct machpcb *mpcb; 125 kfpu_t *fp; 126 uintptr_t aln; 127 128 stk -= SA(sizeof (kfpu_t) + GSR_SIZE); 129 aln = (uintptr_t)stk & 0x3F; 130 stk -= aln; 131 fp = (kfpu_t *)stk; 132 stk -= SA(sizeof (struct machpcb)); 133 mpcb = (struct machpcb *)stk; 134 bzero(mpcb, sizeof (struct machpcb)); 135 bzero(fp, sizeof (kfpu_t) + GSR_SIZE); 136 lwp->lwp_regs = (void *)&mpcb->mpcb_regs; 137 lwp->lwp_fpu = (void *)fp; 138 mpcb->mpcb_fpu = fp; 139 mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q; 140 mpcb->mpcb_thread = lwp->lwp_thread; 141 mpcb->mpcb_wbcnt = 0; 142 if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) { 143 mpcb->mpcb_wstate = WSTATE_USER32; 144 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP); 145 } else { 146 mpcb->mpcb_wstate = WSTATE_USER64; 147 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP); 148 } 149 ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0); 150 mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf); 151 mpcb->mpcb_pa = va_to_pa(mpcb); 152 return (stk); 153 } 154 155 void 156 lwp_stk_fini(klwp_t *lwp) 157 { 158 struct machpcb *mpcb = lwptompcb(lwp); 159 160 /* 161 * there might be windows still in the wbuf due to unmapped 162 * stack, misaligned stack pointer, etc. We just free it. 163 */ 164 mpcb->mpcb_wbcnt = 0; 165 if (mpcb->mpcb_wstate == WSTATE_USER32) 166 kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf); 167 else 168 kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf); 169 mpcb->mpcb_wbuf = NULL; 170 mpcb->mpcb_wbuf_pa = -1; 171 } 172 173 174 /* 175 * Copy regs from parent to child. 176 */ 177 void 178 lwp_forkregs(klwp_t *lwp, klwp_t *clwp) 179 { 180 kthread_t *t, *pt = lwptot(lwp); 181 struct machpcb *mpcb = lwptompcb(clwp); 182 struct machpcb *pmpcb = lwptompcb(lwp); 183 kfpu_t *fp, *pfp = lwptofpu(lwp); 184 caddr_t wbuf; 185 uint_t wstate; 186 187 t = mpcb->mpcb_thread; 188 /* 189 * remember child's fp and wbuf since they will get erased during 190 * the bcopy. 191 */ 192 fp = mpcb->mpcb_fpu; 193 wbuf = mpcb->mpcb_wbuf; 194 wstate = mpcb->mpcb_wstate; 195 /* 196 * Don't copy mpcb_frame since we hand-crafted it 197 * in thread_load(). 198 */ 199 bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF); 200 mpcb->mpcb_thread = t; 201 mpcb->mpcb_fpu = fp; 202 fp->fpu_q = mpcb->mpcb_fpu_q; 203 204 /* 205 * It is theoretically possibly for the lwp's wstate to 206 * be different from its value assigned in lwp_stk_init, 207 * since lwp_stk_init assumed the data model of the process. 208 * Here, we took on the data model of the cloned lwp. 209 */ 210 if (mpcb->mpcb_wstate != wstate) { 211 if (wstate == WSTATE_USER32) { 212 kmem_cache_free(wbuf32_cache, wbuf); 213 wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP); 214 wstate = WSTATE_USER64; 215 } else { 216 kmem_cache_free(wbuf64_cache, wbuf); 217 wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP); 218 wstate = WSTATE_USER32; 219 } 220 } 221 222 mpcb->mpcb_pa = va_to_pa(mpcb); 223 mpcb->mpcb_wbuf = wbuf; 224 mpcb->mpcb_wbuf_pa = va_to_pa(wbuf); 225 226 ASSERT(mpcb->mpcb_wstate == wstate); 227 228 if (mpcb->mpcb_wbcnt != 0) { 229 bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf, 230 mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ? 231 sizeof (struct rwindow32) : sizeof (struct rwindow64))); 232 } 233 234 if (pt == curthread) 235 pfp->fpu_fprs = _fp_read_fprs(); 236 if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) { 237 if (pt == curthread && fpu_exists) { 238 save_gsr(clwp->lwp_fpu); 239 } else { 240 uint64_t gsr; 241 gsr = get_gsr(lwp->lwp_fpu); 242 set_gsr(gsr, clwp->lwp_fpu); 243 } 244 fp_fork(lwp, clwp); 245 } 246 } 247 248 /* 249 * Free lwp fpu regs. 250 */ 251 void 252 lwp_freeregs(klwp_t *lwp, int isexec) 253 { 254 kfpu_t *fp = lwptofpu(lwp); 255 256 if (lwptot(lwp) == curthread) 257 fp->fpu_fprs = _fp_read_fprs(); 258 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) 259 fp_free(fp, isexec); 260 } 261 262 /* 263 * This function is currently unused on sparc. 264 */ 265 /*ARGSUSED*/ 266 void 267 lwp_attach_brand_hdlrs(klwp_t *lwp) 268 {} 269 270 /* 271 * fill in the extra register state area specified with the 272 * specified lwp's platform-dependent non-floating-point extra 273 * register state information 274 */ 275 /* ARGSUSED */ 276 void 277 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp) 278 { 279 /* for sun4u nothing to do here, added for symmetry */ 280 } 281 282 /* 283 * fill in the extra register state area specified with the specified lwp's 284 * platform-dependent floating-point extra register state information. 285 * NOTE: 'lwp' might not correspond to 'curthread' since this is 286 * called from code in /proc to get the registers of another lwp. 287 */ 288 void 289 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp) 290 { 291 prxregset_t *xregs = (prxregset_t *)xrp; 292 kfpu_t *fp = lwptofpu(lwp); 293 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 294 uint64_t gsr; 295 296 /* 297 * fp_fksave() does not flush the GSR register into 298 * the lwp area, so do it now 299 */ 300 kpreempt_disable(); 301 if (ttolwp(curthread) == lwp && fpu_exists) { 302 fp->fpu_fprs = _fp_read_fprs(); 303 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 304 _fp_write_fprs(fprs); 305 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 306 } 307 save_gsr(fp); 308 } 309 gsr = get_gsr(fp); 310 kpreempt_enable(); 311 PRXREG_GSR(xregs) = gsr; 312 } 313 314 /* 315 * set the specified lwp's platform-dependent non-floating-point 316 * extra register state based on the specified input 317 */ 318 /* ARGSUSED */ 319 void 320 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp) 321 { 322 /* for sun4u nothing to do here, added for symmetry */ 323 } 324 325 /* 326 * set the specified lwp's platform-dependent floating-point 327 * extra register state based on the specified input 328 */ 329 void 330 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp) 331 { 332 prxregset_t *xregs = (prxregset_t *)xrp; 333 kfpu_t *fp = lwptofpu(lwp); 334 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 335 uint64_t gsr = PRXREG_GSR(xregs); 336 337 kpreempt_disable(); 338 set_gsr(gsr, lwptofpu(lwp)); 339 340 if ((lwp == ttolwp(curthread)) && fpu_exists) { 341 fp->fpu_fprs = _fp_read_fprs(); 342 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 343 _fp_write_fprs(fprs); 344 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 345 } 346 restore_gsr(lwptofpu(lwp)); 347 } 348 kpreempt_enable(); 349 } 350 351 /* 352 * fill in the sun4u asrs, ie, the lwp's platform-dependent 353 * non-floating-point extra register state information 354 */ 355 /* ARGSUSED */ 356 void 357 getasrs(klwp_t *lwp, asrset_t asr) 358 { 359 /* for sun4u nothing to do here, added for symmetry */ 360 } 361 362 /* 363 * fill in the sun4u asrs, ie, the lwp's platform-dependent 364 * floating-point extra register state information 365 */ 366 void 367 getfpasrs(klwp_t *lwp, asrset_t asr) 368 { 369 kfpu_t *fp = lwptofpu(lwp); 370 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 371 372 kpreempt_disable(); 373 if (ttolwp(curthread) == lwp) 374 fp->fpu_fprs = _fp_read_fprs(); 375 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { 376 if (fpu_exists && ttolwp(curthread) == lwp) { 377 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 378 _fp_write_fprs(fprs); 379 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 380 } 381 save_gsr(fp); 382 } 383 asr[ASR_GSR] = (int64_t)get_gsr(fp); 384 } 385 kpreempt_enable(); 386 } 387 388 /* 389 * set the sun4u asrs, ie, the lwp's platform-dependent 390 * non-floating-point extra register state information 391 */ 392 /* ARGSUSED */ 393 void 394 setasrs(klwp_t *lwp, asrset_t asr) 395 { 396 /* for sun4u nothing to do here, added for symmetry */ 397 } 398 399 void 400 setfpasrs(klwp_t *lwp, asrset_t asr) 401 { 402 kfpu_t *fp = lwptofpu(lwp); 403 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 404 405 kpreempt_disable(); 406 if (ttolwp(curthread) == lwp) 407 fp->fpu_fprs = _fp_read_fprs(); 408 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { 409 set_gsr(asr[ASR_GSR], fp); 410 if (fpu_exists && ttolwp(curthread) == lwp) { 411 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 412 _fp_write_fprs(fprs); 413 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 414 } 415 restore_gsr(fp); 416 } 417 } 418 kpreempt_enable(); 419 } 420 421 /* 422 * Create interrupt kstats for this CPU. 423 */ 424 void 425 cpu_create_intrstat(cpu_t *cp) 426 { 427 int i; 428 kstat_t *intr_ksp; 429 kstat_named_t *knp; 430 char name[KSTAT_STRLEN]; 431 zoneid_t zoneid; 432 433 ASSERT(MUTEX_HELD(&cpu_lock)); 434 435 if (pool_pset_enabled()) 436 zoneid = GLOBAL_ZONEID; 437 else 438 zoneid = ALL_ZONES; 439 440 intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc", 441 KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid); 442 443 /* 444 * Initialize each PIL's named kstat 445 */ 446 if (intr_ksp != NULL) { 447 intr_ksp->ks_update = cpu_kstat_intrstat_update; 448 knp = (kstat_named_t *)intr_ksp->ks_data; 449 intr_ksp->ks_private = cp; 450 for (i = 0; i < PIL_MAX; i++) { 451 (void) snprintf(name, KSTAT_STRLEN, "level-%d-time", 452 i + 1); 453 kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64); 454 (void) snprintf(name, KSTAT_STRLEN, "level-%d-count", 455 i + 1); 456 kstat_named_init(&knp[(i * 2) + 1], name, 457 KSTAT_DATA_UINT64); 458 } 459 kstat_install(intr_ksp); 460 } 461 } 462 463 /* 464 * Delete interrupt kstats for this CPU. 465 */ 466 void 467 cpu_delete_intrstat(cpu_t *cp) 468 { 469 kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES); 470 } 471 472 /* 473 * Convert interrupt statistics from CPU ticks to nanoseconds and 474 * update kstat. 475 */ 476 int 477 cpu_kstat_intrstat_update(kstat_t *ksp, int rw) 478 { 479 kstat_named_t *knp = ksp->ks_data; 480 cpu_t *cpup = (cpu_t *)ksp->ks_private; 481 int i; 482 483 if (rw == KSTAT_WRITE) 484 return (EACCES); 485 486 /* 487 * We use separate passes to copy and convert the statistics to 488 * nanoseconds. This assures that the snapshot of the data is as 489 * self-consistent as possible. 490 */ 491 492 for (i = 0; i < PIL_MAX; i++) { 493 knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0]; 494 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i]; 495 } 496 497 for (i = 0; i < PIL_MAX; i++) { 498 knp[i * 2].value.ui64 = 499 (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64, 500 cpup->cpu_id); 501 } 502 503 return (0); 504 } 505 506 /* 507 * Called by common/os/cpu.c for psrinfo(1m) kstats 508 */ 509 char * 510 cpu_fru_fmri(cpu_t *cp) 511 { 512 return (cpunodes[cp->cpu_id].fru_fmri); 513 } 514 515 /* 516 * An interrupt thread is ending a time slice, so compute the interval it 517 * ran for and update the statistic for its PIL. 518 */ 519 void 520 cpu_intr_swtch_enter(kthread_id_t t) 521 { 522 uint64_t interval; 523 uint64_t start; 524 cpu_t *cpu; 525 526 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 527 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 528 529 /* 530 * We could be here with a zero timestamp. This could happen if: 531 * an interrupt thread which no longer has a pinned thread underneath 532 * it (i.e. it blocked at some point in its past) has finished running 533 * its handler. intr_thread() updated the interrupt statistic for its 534 * PIL and zeroed its timestamp. Since there was no pinned thread to 535 * return to, swtch() gets called and we end up here. 536 * 537 * It can also happen if an interrupt thread in intr_thread() calls 538 * preempt. It will have already taken care of updating stats. In 539 * this event, the interrupt thread will be runnable. 540 */ 541 if (t->t_intr_start) { 542 do { 543 start = t->t_intr_start; 544 interval = gettick_counter() - start; 545 } while (cas64(&t->t_intr_start, start, 0) != start); 546 cpu = CPU; 547 if (cpu->cpu_m.divisor > 1) 548 interval *= cpu->cpu_m.divisor; 549 cpu->cpu_m.intrstat[t->t_pil][0] += interval; 550 551 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate], 552 interval); 553 } else 554 ASSERT(t->t_intr == NULL || t->t_state == TS_RUN); 555 } 556 557 558 /* 559 * An interrupt thread is returning from swtch(). Place a starting timestamp 560 * in its thread structure. 561 */ 562 void 563 cpu_intr_swtch_exit(kthread_id_t t) 564 { 565 uint64_t ts; 566 567 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 568 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 569 570 do { 571 ts = t->t_intr_start; 572 } while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts); 573 } 574 575 576 int 577 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class) 578 { 579 if (&plat_blacklist) 580 return (plat_blacklist(cmd, scheme, fmri, class)); 581 582 return (ENOTSUP); 583 } 584 585 int 586 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp) 587 { 588 extern void kdi_flush_caches(void); 589 size_t nread = 0; 590 uint32_t word; 591 int slop, i; 592 593 kdi_flush_caches(); 594 membar_enter(); 595 596 /* We might not begin on a word boundary. */ 597 if ((slop = addr & 3) != 0) { 598 word = ldphys(addr & ~3); 599 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++) 600 *buf++ = ((uchar_t *)&word)[i]; 601 addr = roundup(addr, 4); 602 } 603 604 while (nbytes > 0) { 605 word = ldphys(addr); 606 for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++) 607 *buf++ = ((uchar_t *)&word)[i]; 608 } 609 610 kdi_flush_caches(); 611 612 *ncopiedp = nread; 613 return (0); 614 } 615 616 int 617 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp) 618 { 619 extern void kdi_flush_caches(void); 620 size_t nwritten = 0; 621 uint32_t word; 622 int slop, i; 623 624 kdi_flush_caches(); 625 626 /* We might not begin on a word boundary. */ 627 if ((slop = addr & 3) != 0) { 628 word = ldphys(addr & ~3); 629 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++) 630 ((uchar_t *)&word)[i] = *buf++; 631 stphys(addr & ~3, word); 632 addr = roundup(addr, 4); 633 } 634 635 while (nbytes > 3) { 636 for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++) 637 ((uchar_t *)&word)[i] = *buf++; 638 stphys(addr, word); 639 addr += 4; 640 } 641 642 /* We might not end with a whole word. */ 643 if (nbytes > 0) { 644 word = ldphys(addr); 645 for (i = 0; nbytes > 0; i++, nbytes--, nwritten++) 646 ((uchar_t *)&word)[i] = *buf++; 647 stphys(addr, word); 648 } 649 650 membar_enter(); 651 kdi_flush_caches(); 652 653 *ncopiedp = nwritten; 654 return (0); 655 } 656 657 static void 658 kdi_kernpanic(struct regs *regs, uint_t tt) 659 { 660 sync_reg_buf = *regs; 661 sync_tt = tt; 662 663 sync_handler(); 664 } 665 666 static void 667 kdi_plat_call(void (*platfn)(void)) 668 { 669 if (platfn != NULL) { 670 prom_suspend_prepost(); 671 platfn(); 672 prom_resume_prepost(); 673 } 674 } 675 676 void 677 mach_kdi_init(kdi_t *kdi) 678 { 679 kdi->kdi_plat_call = kdi_plat_call; 680 kdi->mkdi_cpu_index = kdi_cpu_index; 681 kdi->mkdi_trap_vatotte = kdi_trap_vatotte; 682 kdi->mkdi_kernpanic = kdi_kernpanic; 683 } 684 685 686 /* 687 * get_cpu_mstate() is passed an array of timestamps, NCMSTATES 688 * long, and it fills in the array with the time spent on cpu in 689 * each of the mstates, where time is returned in nsec. 690 * 691 * No guarantee is made that the returned values in times[] will 692 * monotonically increase on sequential calls, although this will 693 * be true in the long run. Any such guarantee must be handled by 694 * the caller, if needed. This can happen if we fail to account 695 * for elapsed time due to a generation counter conflict, yet we 696 * did account for it on a prior call (see below). 697 * 698 * The complication is that the cpu in question may be updating 699 * its microstate at the same time that we are reading it. 700 * Because the microstate is only updated when the CPU's state 701 * changes, the values in cpu_intracct[] can be indefinitely out 702 * of date. To determine true current values, it is necessary to 703 * compare the current time with cpu_mstate_start, and add the 704 * difference to times[cpu_mstate]. 705 * 706 * This can be a problem if those values are changing out from 707 * under us. Because the code path in new_cpu_mstate() is 708 * performance critical, we have not added a lock to it. Instead, 709 * we have added a generation counter. Before beginning 710 * modifications, the counter is set to 0. After modifications, 711 * it is set to the old value plus one. 712 * 713 * get_cpu_mstate() will not consider the values of cpu_mstate 714 * and cpu_mstate_start to be usable unless the value of 715 * cpu_mstate_gen is both non-zero and unchanged, both before and 716 * after reading the mstate information. Note that we must 717 * protect against out-of-order loads around accesses to the 718 * generation counter. Also, this is a best effort approach in 719 * that we do not retry should the counter be found to have 720 * changed. 721 * 722 * cpu_intracct[] is used to identify time spent in each CPU 723 * mstate while handling interrupts. Such time should be reported 724 * against system time, and so is subtracted out from its 725 * corresponding cpu_acct[] time and added to 726 * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in 727 * %ticks, but acct time may be stored as %sticks, thus requiring 728 * different conversions before they can be compared. 729 */ 730 731 void 732 get_cpu_mstate(cpu_t *cpu, hrtime_t *times) 733 { 734 int i; 735 hrtime_t now, start; 736 uint16_t gen; 737 uint16_t state; 738 hrtime_t intracct[NCMSTATES]; 739 740 /* 741 * Load all volatile state under the protection of membar. 742 * cpu_acct[cpu_mstate] must be loaded to avoid double counting 743 * of (now - cpu_mstate_start) by a change in CPU mstate that 744 * arrives after we make our last check of cpu_mstate_gen. 745 */ 746 747 now = gethrtime_unscaled(); 748 gen = cpu->cpu_mstate_gen; 749 750 membar_consumer(); /* guarantee load ordering */ 751 start = cpu->cpu_mstate_start; 752 state = cpu->cpu_mstate; 753 for (i = 0; i < NCMSTATES; i++) { 754 intracct[i] = cpu->cpu_intracct[i]; 755 times[i] = cpu->cpu_acct[i]; 756 } 757 membar_consumer(); /* guarantee load ordering */ 758 759 if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start) 760 times[state] += now - start; 761 762 for (i = 0; i < NCMSTATES; i++) { 763 scalehrtime(×[i]); 764 intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id); 765 } 766 767 for (i = 0; i < NCMSTATES; i++) { 768 if (i == CMS_SYSTEM) 769 continue; 770 times[i] -= intracct[i]; 771 if (times[i] < 0) { 772 intracct[i] += times[i]; 773 times[i] = 0; 774 } 775 times[CMS_SYSTEM] += intracct[i]; 776 } 777 } 778