1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/kstat.h> 30 #include <sys/param.h> 31 #include <sys/stack.h> 32 #include <sys/regset.h> 33 #include <sys/thread.h> 34 #include <sys/proc.h> 35 #include <sys/procfs_isa.h> 36 #include <sys/kmem.h> 37 #include <sys/cpuvar.h> 38 #include <sys/systm.h> 39 #include <sys/machpcb.h> 40 #include <sys/machasi.h> 41 #include <sys/vis.h> 42 #include <sys/fpu/fpusystm.h> 43 #include <sys/cpu_module.h> 44 #include <sys/privregs.h> 45 #include <sys/archsystm.h> 46 #include <sys/atomic.h> 47 #include <sys/cmn_err.h> 48 #include <sys/time.h> 49 #include <sys/clock.h> 50 #include <sys/cmp.h> 51 #include <sys/platform_module.h> 52 #include <sys/bl.h> 53 #include <sys/nvpair.h> 54 #include <sys/kdi_impl.h> 55 #include <sys/machsystm.h> 56 #include <sys/sysmacros.h> 57 #include <sys/promif.h> 58 #include <sys/pool_pset.h> 59 #include <vm/seg_kmem.h> 60 61 int maxphys = MMU_PAGESIZE * 16; /* 128k */ 62 int klustsize = MMU_PAGESIZE * 16; /* 128k */ 63 64 /* 65 * Initialize kernel thread's stack. 66 */ 67 caddr_t 68 thread_stk_init(caddr_t stk) 69 { 70 kfpu_t *fp; 71 ulong_t align; 72 73 /* allocate extra space for floating point state */ 74 stk -= SA(sizeof (kfpu_t) + GSR_SIZE); 75 align = (uintptr_t)stk & 0x3f; 76 stk -= align; /* force v9_fpu to be 16 byte aligned */ 77 fp = (kfpu_t *)stk; 78 fp->fpu_fprs = 0; 79 80 stk -= SA(MINFRAME); 81 return (stk); 82 } 83 84 #define WIN32_SIZE (MAXWIN * sizeof (struct rwindow32)) 85 #define WIN64_SIZE (MAXWIN * sizeof (struct rwindow64)) 86 87 kmem_cache_t *wbuf32_cache; 88 kmem_cache_t *wbuf64_cache; 89 90 void 91 lwp_stk_cache_init(void) 92 { 93 /* 94 * Window buffers are allocated from the static arena 95 * because they are accessed at TL>0. We also must use 96 * KMC_NOHASH to prevent them from straddling page 97 * boundaries as they are accessed by physical address. 98 */ 99 wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE, 100 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 101 wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE, 102 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 103 } 104 105 /* 106 * Initialize lwp's kernel stack. 107 * Note that now that the floating point register save area (kfpu_t) 108 * has been broken out from machpcb and aligned on a 64 byte boundary so that 109 * we can do block load/stores to/from it, there are a couple of potential 110 * optimizations to save stack space. 1. The floating point register save 111 * area could be aligned on a 16 byte boundary, and the floating point code 112 * changed to (a) check the alignment and (b) use different save/restore 113 * macros depending upon the alignment. 2. The lwp_stk_init code below 114 * could be changed to calculate if less space would be wasted if machpcb 115 * was first instead of second. However there is a REGOFF macro used in 116 * locore, syscall_trap, machdep and mlsetup that assumes that the saved 117 * register area is a fixed distance from the %sp, and would have to be 118 * changed to a pointer or something...JJ said later. 119 */ 120 caddr_t 121 lwp_stk_init(klwp_t *lwp, caddr_t stk) 122 { 123 struct machpcb *mpcb; 124 kfpu_t *fp; 125 uintptr_t aln; 126 127 stk -= SA(sizeof (kfpu_t) + GSR_SIZE); 128 aln = (uintptr_t)stk & 0x3F; 129 stk -= aln; 130 fp = (kfpu_t *)stk; 131 stk -= SA(sizeof (struct machpcb)); 132 mpcb = (struct machpcb *)stk; 133 bzero(mpcb, sizeof (struct machpcb)); 134 bzero(fp, sizeof (kfpu_t) + GSR_SIZE); 135 lwp->lwp_regs = (void *)&mpcb->mpcb_regs; 136 lwp->lwp_fpu = (void *)fp; 137 mpcb->mpcb_fpu = fp; 138 mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q; 139 mpcb->mpcb_thread = lwp->lwp_thread; 140 mpcb->mpcb_wbcnt = 0; 141 if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) { 142 mpcb->mpcb_wstate = WSTATE_USER32; 143 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP); 144 } else { 145 mpcb->mpcb_wstate = WSTATE_USER64; 146 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP); 147 } 148 ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0); 149 mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf); 150 mpcb->mpcb_pa = va_to_pa(mpcb); 151 return (stk); 152 } 153 154 void 155 lwp_stk_fini(klwp_t *lwp) 156 { 157 struct machpcb *mpcb = lwptompcb(lwp); 158 159 /* 160 * there might be windows still in the wbuf due to unmapped 161 * stack, misaligned stack pointer, etc. We just free it. 162 */ 163 mpcb->mpcb_wbcnt = 0; 164 if (mpcb->mpcb_wstate == WSTATE_USER32) 165 kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf); 166 else 167 kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf); 168 mpcb->mpcb_wbuf = NULL; 169 mpcb->mpcb_wbuf_pa = -1; 170 } 171 172 173 /* 174 * Copy regs from parent to child. 175 */ 176 void 177 lwp_forkregs(klwp_t *lwp, klwp_t *clwp) 178 { 179 kthread_t *t, *pt = lwptot(lwp); 180 struct machpcb *mpcb = lwptompcb(clwp); 181 struct machpcb *pmpcb = lwptompcb(lwp); 182 kfpu_t *fp, *pfp = lwptofpu(lwp); 183 caddr_t wbuf; 184 uint_t wstate; 185 186 t = mpcb->mpcb_thread; 187 /* 188 * remember child's fp and wbuf since they will get erased during 189 * the bcopy. 190 */ 191 fp = mpcb->mpcb_fpu; 192 wbuf = mpcb->mpcb_wbuf; 193 wstate = mpcb->mpcb_wstate; 194 /* 195 * Don't copy mpcb_frame since we hand-crafted it 196 * in thread_load(). 197 */ 198 bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF); 199 mpcb->mpcb_thread = t; 200 mpcb->mpcb_fpu = fp; 201 fp->fpu_q = mpcb->mpcb_fpu_q; 202 203 /* 204 * It is theoretically possibly for the lwp's wstate to 205 * be different from its value assigned in lwp_stk_init, 206 * since lwp_stk_init assumed the data model of the process. 207 * Here, we took on the data model of the cloned lwp. 208 */ 209 if (mpcb->mpcb_wstate != wstate) { 210 if (wstate == WSTATE_USER32) { 211 kmem_cache_free(wbuf32_cache, wbuf); 212 wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP); 213 wstate = WSTATE_USER64; 214 } else { 215 kmem_cache_free(wbuf64_cache, wbuf); 216 wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP); 217 wstate = WSTATE_USER32; 218 } 219 } 220 221 mpcb->mpcb_pa = va_to_pa(mpcb); 222 mpcb->mpcb_wbuf = wbuf; 223 mpcb->mpcb_wbuf_pa = va_to_pa(wbuf); 224 225 ASSERT(mpcb->mpcb_wstate == wstate); 226 227 if (mpcb->mpcb_wbcnt != 0) { 228 bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf, 229 mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ? 230 sizeof (struct rwindow32) : sizeof (struct rwindow64))); 231 } 232 233 if (pt == curthread) 234 pfp->fpu_fprs = _fp_read_fprs(); 235 if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) { 236 if (pt == curthread && fpu_exists) { 237 save_gsr(clwp->lwp_fpu); 238 } else { 239 uint64_t gsr; 240 gsr = get_gsr(lwp->lwp_fpu); 241 set_gsr(gsr, clwp->lwp_fpu); 242 } 243 fp_fork(lwp, clwp); 244 } 245 } 246 247 /* 248 * Free lwp fpu regs. 249 */ 250 void 251 lwp_freeregs(klwp_t *lwp, int isexec) 252 { 253 kfpu_t *fp = lwptofpu(lwp); 254 255 if (lwptot(lwp) == curthread) 256 fp->fpu_fprs = _fp_read_fprs(); 257 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) 258 fp_free(fp, isexec); 259 } 260 261 /* 262 * This function is currently unused on sparc. 263 */ 264 /*ARGSUSED*/ 265 void 266 lwp_attach_brand_hdlrs(klwp_t *lwp) 267 {} 268 269 /* 270 * fill in the extra register state area specified with the 271 * specified lwp's platform-dependent non-floating-point extra 272 * register state information 273 */ 274 /* ARGSUSED */ 275 void 276 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp) 277 { 278 /* for sun4u nothing to do here, added for symmetry */ 279 } 280 281 /* 282 * fill in the extra register state area specified with the specified lwp's 283 * platform-dependent floating-point extra register state information. 284 * NOTE: 'lwp' might not correspond to 'curthread' since this is 285 * called from code in /proc to get the registers of another lwp. 286 */ 287 void 288 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp) 289 { 290 prxregset_t *xregs = (prxregset_t *)xrp; 291 kfpu_t *fp = lwptofpu(lwp); 292 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 293 uint64_t gsr; 294 295 /* 296 * fp_fksave() does not flush the GSR register into 297 * the lwp area, so do it now 298 */ 299 kpreempt_disable(); 300 if (ttolwp(curthread) == lwp && fpu_exists) { 301 fp->fpu_fprs = _fp_read_fprs(); 302 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 303 _fp_write_fprs(fprs); 304 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 305 } 306 save_gsr(fp); 307 } 308 gsr = get_gsr(fp); 309 kpreempt_enable(); 310 PRXREG_GSR(xregs) = gsr; 311 } 312 313 /* 314 * set the specified lwp's platform-dependent non-floating-point 315 * extra register state based on the specified input 316 */ 317 /* ARGSUSED */ 318 void 319 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp) 320 { 321 /* for sun4u nothing to do here, added for symmetry */ 322 } 323 324 /* 325 * set the specified lwp's platform-dependent floating-point 326 * extra register state based on the specified input 327 */ 328 void 329 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp) 330 { 331 prxregset_t *xregs = (prxregset_t *)xrp; 332 kfpu_t *fp = lwptofpu(lwp); 333 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 334 uint64_t gsr = PRXREG_GSR(xregs); 335 336 kpreempt_disable(); 337 set_gsr(gsr, lwptofpu(lwp)); 338 339 if ((lwp == ttolwp(curthread)) && fpu_exists) { 340 fp->fpu_fprs = _fp_read_fprs(); 341 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 342 _fp_write_fprs(fprs); 343 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 344 } 345 restore_gsr(lwptofpu(lwp)); 346 } 347 kpreempt_enable(); 348 } 349 350 /* 351 * fill in the sun4u asrs, ie, the lwp's platform-dependent 352 * non-floating-point extra register state information 353 */ 354 /* ARGSUSED */ 355 void 356 getasrs(klwp_t *lwp, asrset_t asr) 357 { 358 /* for sun4u nothing to do here, added for symmetry */ 359 } 360 361 /* 362 * fill in the sun4u asrs, ie, the lwp's platform-dependent 363 * floating-point extra register state information 364 */ 365 void 366 getfpasrs(klwp_t *lwp, asrset_t asr) 367 { 368 kfpu_t *fp = lwptofpu(lwp); 369 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 370 371 kpreempt_disable(); 372 if (ttolwp(curthread) == lwp) 373 fp->fpu_fprs = _fp_read_fprs(); 374 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { 375 if (fpu_exists && ttolwp(curthread) == lwp) { 376 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 377 _fp_write_fprs(fprs); 378 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 379 } 380 save_gsr(fp); 381 } 382 asr[ASR_GSR] = (int64_t)get_gsr(fp); 383 } 384 kpreempt_enable(); 385 } 386 387 /* 388 * set the sun4u asrs, ie, the lwp's platform-dependent 389 * non-floating-point extra register state information 390 */ 391 /* ARGSUSED */ 392 void 393 setasrs(klwp_t *lwp, asrset_t asr) 394 { 395 /* for sun4u nothing to do here, added for symmetry */ 396 } 397 398 void 399 setfpasrs(klwp_t *lwp, asrset_t asr) 400 { 401 kfpu_t *fp = lwptofpu(lwp); 402 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 403 404 kpreempt_disable(); 405 if (ttolwp(curthread) == lwp) 406 fp->fpu_fprs = _fp_read_fprs(); 407 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { 408 set_gsr(asr[ASR_GSR], fp); 409 if (fpu_exists && ttolwp(curthread) == lwp) { 410 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 411 _fp_write_fprs(fprs); 412 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 413 } 414 restore_gsr(fp); 415 } 416 } 417 kpreempt_enable(); 418 } 419 420 /* 421 * Create interrupt kstats for this CPU. 422 */ 423 void 424 cpu_create_intrstat(cpu_t *cp) 425 { 426 int i; 427 kstat_t *intr_ksp; 428 kstat_named_t *knp; 429 char name[KSTAT_STRLEN]; 430 zoneid_t zoneid; 431 432 ASSERT(MUTEX_HELD(&cpu_lock)); 433 434 if (pool_pset_enabled()) 435 zoneid = GLOBAL_ZONEID; 436 else 437 zoneid = ALL_ZONES; 438 439 intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc", 440 KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid); 441 442 /* 443 * Initialize each PIL's named kstat 444 */ 445 if (intr_ksp != NULL) { 446 intr_ksp->ks_update = cpu_kstat_intrstat_update; 447 knp = (kstat_named_t *)intr_ksp->ks_data; 448 intr_ksp->ks_private = cp; 449 for (i = 0; i < PIL_MAX; i++) { 450 (void) snprintf(name, KSTAT_STRLEN, "level-%d-time", 451 i + 1); 452 kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64); 453 (void) snprintf(name, KSTAT_STRLEN, "level-%d-count", 454 i + 1); 455 kstat_named_init(&knp[(i * 2) + 1], name, 456 KSTAT_DATA_UINT64); 457 } 458 kstat_install(intr_ksp); 459 } 460 } 461 462 /* 463 * Delete interrupt kstats for this CPU. 464 */ 465 void 466 cpu_delete_intrstat(cpu_t *cp) 467 { 468 kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES); 469 } 470 471 /* 472 * Convert interrupt statistics from CPU ticks to nanoseconds and 473 * update kstat. 474 */ 475 int 476 cpu_kstat_intrstat_update(kstat_t *ksp, int rw) 477 { 478 kstat_named_t *knp = ksp->ks_data; 479 cpu_t *cpup = (cpu_t *)ksp->ks_private; 480 int i; 481 482 if (rw == KSTAT_WRITE) 483 return (EACCES); 484 485 /* 486 * We use separate passes to copy and convert the statistics to 487 * nanoseconds. This assures that the snapshot of the data is as 488 * self-consistent as possible. 489 */ 490 491 for (i = 0; i < PIL_MAX; i++) { 492 knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0]; 493 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i]; 494 } 495 496 for (i = 0; i < PIL_MAX; i++) { 497 knp[i * 2].value.ui64 = 498 (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64, 499 cpup->cpu_id); 500 } 501 502 return (0); 503 } 504 505 /* 506 * Called by common/os/cpu.c for psrinfo(1m) kstats 507 */ 508 char * 509 cpu_fru_fmri(cpu_t *cp) 510 { 511 return (cpunodes[cp->cpu_id].fru_fmri); 512 } 513 514 /* 515 * An interrupt thread is ending a time slice, so compute the interval it 516 * ran for and update the statistic for its PIL. 517 */ 518 void 519 cpu_intr_swtch_enter(kthread_id_t t) 520 { 521 uint64_t interval; 522 uint64_t start; 523 cpu_t *cpu; 524 525 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 526 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 527 528 /* 529 * We could be here with a zero timestamp. This could happen if: 530 * an interrupt thread which no longer has a pinned thread underneath 531 * it (i.e. it blocked at some point in its past) has finished running 532 * its handler. intr_thread() updated the interrupt statistic for its 533 * PIL and zeroed its timestamp. Since there was no pinned thread to 534 * return to, swtch() gets called and we end up here. 535 * 536 * It can also happen if an interrupt thread in intr_thread() calls 537 * preempt. It will have already taken care of updating stats. In 538 * this event, the interrupt thread will be runnable. 539 */ 540 if (t->t_intr_start) { 541 do { 542 start = t->t_intr_start; 543 interval = gettick_counter() - start; 544 } while (cas64(&t->t_intr_start, start, 0) != start); 545 cpu = CPU; 546 if (cpu->cpu_m.divisor > 1) 547 interval *= cpu->cpu_m.divisor; 548 cpu->cpu_m.intrstat[t->t_pil][0] += interval; 549 550 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate], 551 interval); 552 } else 553 ASSERT(t->t_intr == NULL || t->t_state == TS_RUN); 554 } 555 556 557 /* 558 * An interrupt thread is returning from swtch(). Place a starting timestamp 559 * in its thread structure. 560 */ 561 void 562 cpu_intr_swtch_exit(kthread_id_t t) 563 { 564 uint64_t ts; 565 566 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 567 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 568 569 do { 570 ts = t->t_intr_start; 571 } while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts); 572 } 573 574 575 int 576 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class) 577 { 578 if (&plat_blacklist) 579 return (plat_blacklist(cmd, scheme, fmri, class)); 580 581 return (ENOTSUP); 582 } 583 584 int 585 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp) 586 { 587 extern void kdi_flush_caches(void); 588 size_t nread = 0; 589 uint32_t word; 590 int slop, i; 591 592 kdi_flush_caches(); 593 membar_enter(); 594 595 /* We might not begin on a word boundary. */ 596 if ((slop = addr & 3) != 0) { 597 word = ldphys(addr & ~3); 598 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++) 599 *buf++ = ((uchar_t *)&word)[i]; 600 addr = roundup(addr, 4); 601 } 602 603 while (nbytes > 0) { 604 word = ldphys(addr); 605 for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++) 606 *buf++ = ((uchar_t *)&word)[i]; 607 } 608 609 kdi_flush_caches(); 610 611 *ncopiedp = nread; 612 return (0); 613 } 614 615 int 616 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp) 617 { 618 extern void kdi_flush_caches(void); 619 size_t nwritten = 0; 620 uint32_t word; 621 int slop, i; 622 623 kdi_flush_caches(); 624 625 /* We might not begin on a word boundary. */ 626 if ((slop = addr & 3) != 0) { 627 word = ldphys(addr & ~3); 628 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++) 629 ((uchar_t *)&word)[i] = *buf++; 630 stphys(addr & ~3, word); 631 addr = roundup(addr, 4); 632 } 633 634 while (nbytes > 3) { 635 for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++) 636 ((uchar_t *)&word)[i] = *buf++; 637 stphys(addr, word); 638 addr += 4; 639 } 640 641 /* We might not end with a whole word. */ 642 if (nbytes > 0) { 643 word = ldphys(addr); 644 for (i = 0; nbytes > 0; i++, nbytes--, nwritten++) 645 ((uchar_t *)&word)[i] = *buf++; 646 stphys(addr, word); 647 } 648 649 membar_enter(); 650 kdi_flush_caches(); 651 652 *ncopiedp = nwritten; 653 return (0); 654 } 655 656 static void 657 kdi_kernpanic(struct regs *regs, uint_t tt) 658 { 659 sync_reg_buf = *regs; 660 sync_tt = tt; 661 662 sync_handler(); 663 } 664 665 static void 666 kdi_plat_call(void (*platfn)(void)) 667 { 668 if (platfn != NULL) { 669 prom_suspend_prepost(); 670 platfn(); 671 prom_resume_prepost(); 672 } 673 } 674 675 void 676 mach_kdi_init(kdi_t *kdi) 677 { 678 kdi->kdi_plat_call = kdi_plat_call; 679 kdi->mkdi_cpu_index = kdi_cpu_index; 680 kdi->mkdi_trap_vatotte = kdi_trap_vatotte; 681 kdi->mkdi_kernpanic = kdi_kernpanic; 682 } 683 684 685 /* 686 * get_cpu_mstate() is passed an array of timestamps, NCMSTATES 687 * long, and it fills in the array with the time spent on cpu in 688 * each of the mstates, where time is returned in nsec. 689 * 690 * No guarantee is made that the returned values in times[] will 691 * monotonically increase on sequential calls, although this will 692 * be true in the long run. Any such guarantee must be handled by 693 * the caller, if needed. This can happen if we fail to account 694 * for elapsed time due to a generation counter conflict, yet we 695 * did account for it on a prior call (see below). 696 * 697 * The complication is that the cpu in question may be updating 698 * its microstate at the same time that we are reading it. 699 * Because the microstate is only updated when the CPU's state 700 * changes, the values in cpu_intracct[] can be indefinitely out 701 * of date. To determine true current values, it is necessary to 702 * compare the current time with cpu_mstate_start, and add the 703 * difference to times[cpu_mstate]. 704 * 705 * This can be a problem if those values are changing out from 706 * under us. Because the code path in new_cpu_mstate() is 707 * performance critical, we have not added a lock to it. Instead, 708 * we have added a generation counter. Before beginning 709 * modifications, the counter is set to 0. After modifications, 710 * it is set to the old value plus one. 711 * 712 * get_cpu_mstate() will not consider the values of cpu_mstate 713 * and cpu_mstate_start to be usable unless the value of 714 * cpu_mstate_gen is both non-zero and unchanged, both before and 715 * after reading the mstate information. Note that we must 716 * protect against out-of-order loads around accesses to the 717 * generation counter. Also, this is a best effort approach in 718 * that we do not retry should the counter be found to have 719 * changed. 720 * 721 * cpu_intracct[] is used to identify time spent in each CPU 722 * mstate while handling interrupts. Such time should be reported 723 * against system time, and so is subtracted out from its 724 * corresponding cpu_acct[] time and added to 725 * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in 726 * %ticks, but acct time may be stored as %sticks, thus requiring 727 * different conversions before they can be compared. 728 */ 729 730 void 731 get_cpu_mstate(cpu_t *cpu, hrtime_t *times) 732 { 733 int i; 734 hrtime_t now, start; 735 uint16_t gen; 736 uint16_t state; 737 hrtime_t intracct[NCMSTATES]; 738 739 /* 740 * Load all volatile state under the protection of membar. 741 * cpu_acct[cpu_mstate] must be loaded to avoid double counting 742 * of (now - cpu_mstate_start) by a change in CPU mstate that 743 * arrives after we make our last check of cpu_mstate_gen. 744 */ 745 746 now = gethrtime_unscaled(); 747 gen = cpu->cpu_mstate_gen; 748 749 membar_consumer(); /* guarantee load ordering */ 750 start = cpu->cpu_mstate_start; 751 state = cpu->cpu_mstate; 752 for (i = 0; i < NCMSTATES; i++) { 753 intracct[i] = cpu->cpu_intracct[i]; 754 times[i] = cpu->cpu_acct[i]; 755 } 756 membar_consumer(); /* guarantee load ordering */ 757 758 if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start) 759 times[state] += now - start; 760 761 for (i = 0; i < NCMSTATES; i++) { 762 scalehrtime(×[i]); 763 intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id); 764 } 765 766 for (i = 0; i < NCMSTATES; i++) { 767 if (i == CMS_SYSTEM) 768 continue; 769 times[i] -= intracct[i]; 770 if (times[i] < 0) { 771 intracct[i] += times[i]; 772 times[i] = 0; 773 } 774 times[CMS_SYSTEM] += intracct[i]; 775 } 776 } 777