1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/kstat.h> 30 #include <sys/param.h> 31 #include <sys/stack.h> 32 #include <sys/regset.h> 33 #include <sys/thread.h> 34 #include <sys/proc.h> 35 #include <sys/procfs_isa.h> 36 #include <sys/kmem.h> 37 #include <sys/cpuvar.h> 38 #include <sys/systm.h> 39 #include <sys/machpcb.h> 40 #include <sys/machasi.h> 41 #include <sys/vis.h> 42 #include <sys/fpu/fpusystm.h> 43 #include <sys/cpu_module.h> 44 #include <sys/privregs.h> 45 #include <sys/archsystm.h> 46 #include <sys/atomic.h> 47 #include <sys/cmn_err.h> 48 #include <sys/time.h> 49 #include <sys/clock.h> 50 #include <sys/chip.h> 51 #include <sys/cmp.h> 52 #include <sys/platform_module.h> 53 #include <sys/bl.h> 54 #include <sys/nvpair.h> 55 #include <sys/kdi_impl.h> 56 #include <sys/machsystm.h> 57 #include <sys/sysmacros.h> 58 #include <sys/promif.h> 59 #include <sys/pool_pset.h> 60 #include <vm/seg_kmem.h> 61 62 int maxphys = MMU_PAGESIZE * 16; /* 128k */ 63 int klustsize = MMU_PAGESIZE * 16; /* 128k */ 64 65 /* 66 * Initialize kernel thread's stack. 67 */ 68 caddr_t 69 thread_stk_init(caddr_t stk) 70 { 71 kfpu_t *fp; 72 ulong_t align; 73 74 /* allocate extra space for floating point state */ 75 stk -= SA(sizeof (kfpu_t) + GSR_SIZE); 76 align = (uintptr_t)stk & 0x3f; 77 stk -= align; /* force v9_fpu to be 16 byte aligned */ 78 fp = (kfpu_t *)stk; 79 fp->fpu_fprs = 0; 80 81 stk -= SA(MINFRAME); 82 return (stk); 83 } 84 85 #define WIN32_SIZE (MAXWIN * sizeof (struct rwindow32)) 86 #define WIN64_SIZE (MAXWIN * sizeof (struct rwindow64)) 87 88 kmem_cache_t *wbuf32_cache; 89 kmem_cache_t *wbuf64_cache; 90 91 void 92 lwp_stk_cache_init(void) 93 { 94 wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE, 95 0, NULL, NULL, NULL, NULL, static_arena, 0); 96 wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE, 97 0, NULL, NULL, NULL, NULL, static_arena, 0); 98 } 99 100 /* 101 * Initialize lwp's kernel stack. 102 * Note that now that the floating point register save area (kfpu_t) 103 * has been broken out from machpcb and aligned on a 64 byte boundary so that 104 * we can do block load/stores to/from it, there are a couple of potential 105 * optimizations to save stack space. 1. The floating point register save 106 * area could be aligned on a 16 byte boundary, and the floating point code 107 * changed to (a) check the alignment and (b) use different save/restore 108 * macros depending upon the alignment. 2. The lwp_stk_init code below 109 * could be changed to calculate if less space would be wasted if machpcb 110 * was first instead of second. However there is a REGOFF macro used in 111 * locore, syscall_trap, machdep and mlsetup that assumes that the saved 112 * register area is a fixed distance from the %sp, and would have to be 113 * changed to a pointer or something...JJ said later. 114 */ 115 caddr_t 116 lwp_stk_init(klwp_t *lwp, caddr_t stk) 117 { 118 struct machpcb *mpcb; 119 kfpu_t *fp; 120 uintptr_t aln; 121 122 stk -= SA(sizeof (kfpu_t) + GSR_SIZE); 123 aln = (uintptr_t)stk & 0x3F; 124 stk -= aln; 125 fp = (kfpu_t *)stk; 126 stk -= SA(sizeof (struct machpcb)); 127 mpcb = (struct machpcb *)stk; 128 bzero(mpcb, sizeof (struct machpcb)); 129 bzero(fp, sizeof (kfpu_t) + GSR_SIZE); 130 lwp->lwp_regs = (void *)&mpcb->mpcb_regs; 131 lwp->lwp_fpu = (void *)fp; 132 mpcb->mpcb_fpu = fp; 133 mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q; 134 mpcb->mpcb_thread = lwp->lwp_thread; 135 mpcb->mpcb_wbcnt = 0; 136 if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) { 137 mpcb->mpcb_wstate = WSTATE_USER32; 138 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP); 139 } else { 140 mpcb->mpcb_wstate = WSTATE_USER64; 141 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP); 142 } 143 ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0); 144 mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf); 145 mpcb->mpcb_pa = va_to_pa(mpcb); 146 return (stk); 147 } 148 149 void 150 lwp_stk_fini(klwp_t *lwp) 151 { 152 struct machpcb *mpcb = lwptompcb(lwp); 153 154 /* 155 * there might be windows still in the wbuf due to unmapped 156 * stack, misaligned stack pointer, etc. We just free it. 157 */ 158 mpcb->mpcb_wbcnt = 0; 159 if (mpcb->mpcb_wstate == WSTATE_USER32) 160 kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf); 161 else 162 kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf); 163 mpcb->mpcb_wbuf = NULL; 164 mpcb->mpcb_wbuf_pa = -1; 165 } 166 167 168 /* 169 * Copy regs from parent to child. 170 */ 171 void 172 lwp_forkregs(klwp_t *lwp, klwp_t *clwp) 173 { 174 kthread_t *t, *pt = lwptot(lwp); 175 struct machpcb *mpcb = lwptompcb(clwp); 176 struct machpcb *pmpcb = lwptompcb(lwp); 177 kfpu_t *fp, *pfp = lwptofpu(lwp); 178 caddr_t wbuf; 179 uint_t wstate; 180 181 t = mpcb->mpcb_thread; 182 /* 183 * remember child's fp and wbuf since they will get erased during 184 * the bcopy. 185 */ 186 fp = mpcb->mpcb_fpu; 187 wbuf = mpcb->mpcb_wbuf; 188 wstate = mpcb->mpcb_wstate; 189 /* 190 * Don't copy mpcb_frame since we hand-crafted it 191 * in thread_load(). 192 */ 193 bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF); 194 mpcb->mpcb_thread = t; 195 mpcb->mpcb_fpu = fp; 196 fp->fpu_q = mpcb->mpcb_fpu_q; 197 198 /* 199 * It is theoretically possibly for the lwp's wstate to 200 * be different from its value assigned in lwp_stk_init, 201 * since lwp_stk_init assumed the data model of the process. 202 * Here, we took on the data model of the cloned lwp. 203 */ 204 if (mpcb->mpcb_wstate != wstate) { 205 if (wstate == WSTATE_USER32) { 206 kmem_cache_free(wbuf32_cache, wbuf); 207 wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP); 208 wstate = WSTATE_USER64; 209 } else { 210 kmem_cache_free(wbuf64_cache, wbuf); 211 wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP); 212 wstate = WSTATE_USER32; 213 } 214 } 215 216 mpcb->mpcb_pa = va_to_pa(mpcb); 217 mpcb->mpcb_wbuf = wbuf; 218 mpcb->mpcb_wbuf_pa = va_to_pa(wbuf); 219 220 ASSERT(mpcb->mpcb_wstate == wstate); 221 222 if (mpcb->mpcb_wbcnt != 0) { 223 bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf, 224 mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ? 225 sizeof (struct rwindow32) : sizeof (struct rwindow64))); 226 } 227 228 if (pt == curthread) 229 pfp->fpu_fprs = _fp_read_fprs(); 230 if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) { 231 if (pt == curthread && fpu_exists) { 232 save_gsr(clwp->lwp_fpu); 233 } else { 234 uint64_t gsr; 235 gsr = get_gsr(lwp->lwp_fpu); 236 set_gsr(gsr, clwp->lwp_fpu); 237 } 238 fp_fork(lwp, clwp); 239 } 240 } 241 242 /* 243 * Free lwp fpu regs. 244 */ 245 void 246 lwp_freeregs(klwp_t *lwp, int isexec) 247 { 248 kfpu_t *fp = lwptofpu(lwp); 249 250 if (lwptot(lwp) == curthread) 251 fp->fpu_fprs = _fp_read_fprs(); 252 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) 253 fp_free(fp, isexec); 254 } 255 256 /* 257 * fill in the extra register state area specified with the 258 * specified lwp's platform-dependent non-floating-point extra 259 * register state information 260 */ 261 /* ARGSUSED */ 262 void 263 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp) 264 { 265 /* for sun4u nothing to do here, added for symmetry */ 266 } 267 268 /* 269 * fill in the extra register state area specified with the specified lwp's 270 * platform-dependent floating-point extra register state information. 271 * NOTE: 'lwp' might not correspond to 'curthread' since this is 272 * called from code in /proc to get the registers of another lwp. 273 */ 274 void 275 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp) 276 { 277 prxregset_t *xregs = (prxregset_t *)xrp; 278 kfpu_t *fp = lwptofpu(lwp); 279 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 280 uint64_t gsr; 281 282 /* 283 * fp_fksave() does not flush the GSR register into 284 * the lwp area, so do it now 285 */ 286 kpreempt_disable(); 287 if (ttolwp(curthread) == lwp && fpu_exists) { 288 fp->fpu_fprs = _fp_read_fprs(); 289 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 290 _fp_write_fprs(fprs); 291 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 292 } 293 save_gsr(fp); 294 } 295 gsr = get_gsr(fp); 296 kpreempt_enable(); 297 PRXREG_GSR(xregs) = gsr; 298 } 299 300 /* 301 * set the specified lwp's platform-dependent non-floating-point 302 * extra register state based on the specified input 303 */ 304 /* ARGSUSED */ 305 void 306 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp) 307 { 308 /* for sun4u nothing to do here, added for symmetry */ 309 } 310 311 /* 312 * set the specified lwp's platform-dependent floating-point 313 * extra register state based on the specified input 314 */ 315 void 316 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp) 317 { 318 prxregset_t *xregs = (prxregset_t *)xrp; 319 kfpu_t *fp = lwptofpu(lwp); 320 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 321 uint64_t gsr = PRXREG_GSR(xregs); 322 323 kpreempt_disable(); 324 set_gsr(gsr, lwptofpu(lwp)); 325 326 if ((lwp == ttolwp(curthread)) && fpu_exists) { 327 fp->fpu_fprs = _fp_read_fprs(); 328 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 329 _fp_write_fprs(fprs); 330 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 331 } 332 restore_gsr(lwptofpu(lwp)); 333 } 334 kpreempt_enable(); 335 } 336 337 /* 338 * fill in the sun4u asrs, ie, the lwp's platform-dependent 339 * non-floating-point extra register state information 340 */ 341 /* ARGSUSED */ 342 void 343 getasrs(klwp_t *lwp, asrset_t asr) 344 { 345 /* for sun4u nothing to do here, added for symmetry */ 346 } 347 348 /* 349 * fill in the sun4u asrs, ie, the lwp's platform-dependent 350 * floating-point extra register state information 351 */ 352 void 353 getfpasrs(klwp_t *lwp, asrset_t asr) 354 { 355 kfpu_t *fp = lwptofpu(lwp); 356 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 357 358 kpreempt_disable(); 359 if (ttolwp(curthread) == lwp) 360 fp->fpu_fprs = _fp_read_fprs(); 361 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { 362 if (fpu_exists && ttolwp(curthread) == lwp) { 363 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 364 _fp_write_fprs(fprs); 365 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 366 } 367 save_gsr(fp); 368 } 369 asr[ASR_GSR] = (int64_t)get_gsr(fp); 370 } 371 kpreempt_enable(); 372 } 373 374 /* 375 * set the sun4u asrs, ie, the lwp's platform-dependent 376 * non-floating-point extra register state information 377 */ 378 /* ARGSUSED */ 379 void 380 setasrs(klwp_t *lwp, asrset_t asr) 381 { 382 /* for sun4u nothing to do here, added for symmetry */ 383 } 384 385 void 386 setfpasrs(klwp_t *lwp, asrset_t asr) 387 { 388 kfpu_t *fp = lwptofpu(lwp); 389 uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); 390 391 kpreempt_disable(); 392 if (ttolwp(curthread) == lwp) 393 fp->fpu_fprs = _fp_read_fprs(); 394 if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { 395 set_gsr(asr[ASR_GSR], fp); 396 if (fpu_exists && ttolwp(curthread) == lwp) { 397 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { 398 _fp_write_fprs(fprs); 399 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; 400 } 401 restore_gsr(fp); 402 } 403 } 404 kpreempt_enable(); 405 } 406 407 /* 408 * Create interrupt kstats for this CPU. 409 */ 410 void 411 cpu_create_intrstat(cpu_t *cp) 412 { 413 int i; 414 kstat_t *intr_ksp; 415 kstat_named_t *knp; 416 char name[KSTAT_STRLEN]; 417 zoneid_t zoneid; 418 419 ASSERT(MUTEX_HELD(&cpu_lock)); 420 421 if (pool_pset_enabled()) 422 zoneid = GLOBAL_ZONEID; 423 else 424 zoneid = ALL_ZONES; 425 426 intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc", 427 KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid); 428 429 /* 430 * Initialize each PIL's named kstat 431 */ 432 if (intr_ksp != NULL) { 433 intr_ksp->ks_update = cpu_kstat_intrstat_update; 434 knp = (kstat_named_t *)intr_ksp->ks_data; 435 intr_ksp->ks_private = cp; 436 for (i = 0; i < PIL_MAX; i++) { 437 (void) snprintf(name, KSTAT_STRLEN, "level-%d-time", 438 i + 1); 439 kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64); 440 (void) snprintf(name, KSTAT_STRLEN, "level-%d-count", 441 i + 1); 442 kstat_named_init(&knp[(i * 2) + 1], name, 443 KSTAT_DATA_UINT64); 444 } 445 kstat_install(intr_ksp); 446 } 447 } 448 449 /* 450 * Delete interrupt kstats for this CPU. 451 */ 452 void 453 cpu_delete_intrstat(cpu_t *cp) 454 { 455 kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES); 456 } 457 458 /* 459 * Convert interrupt statistics from CPU ticks to nanoseconds and 460 * update kstat. 461 */ 462 int 463 cpu_kstat_intrstat_update(kstat_t *ksp, int rw) 464 { 465 kstat_named_t *knp = ksp->ks_data; 466 cpu_t *cpup = (cpu_t *)ksp->ks_private; 467 int i; 468 469 if (rw == KSTAT_WRITE) 470 return (EACCES); 471 472 /* 473 * We use separate passes to copy and convert the statistics to 474 * nanoseconds. This assures that the snapshot of the data is as 475 * self-consistent as possible. 476 */ 477 478 for (i = 0; i < PIL_MAX; i++) { 479 knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0]; 480 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i]; 481 } 482 483 for (i = 0; i < PIL_MAX; i++) { 484 knp[i * 2].value.ui64 = 485 (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64, 486 cpup->cpu_id); 487 } 488 489 return (0); 490 } 491 492 /* 493 * Called by common/os/cpu.c for psrinfo(1m) kstats 494 */ 495 char * 496 cpu_fru_fmri(cpu_t *cp) 497 { 498 return (cpunodes[cp->cpu_id].fru_fmri); 499 } 500 501 /* 502 * An interrupt thread is ending a time slice, so compute the interval it 503 * ran for and update the statistic for its PIL. 504 */ 505 void 506 cpu_intr_swtch_enter(kthread_id_t t) 507 { 508 uint64_t interval; 509 uint64_t start; 510 cpu_t *cpu; 511 512 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 513 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 514 515 /* 516 * We could be here with a zero timestamp. This could happen if: 517 * an interrupt thread which no longer has a pinned thread underneath 518 * it (i.e. it blocked at some point in its past) has finished running 519 * its handler. intr_thread() updated the interrupt statistic for its 520 * PIL and zeroed its timestamp. Since there was no pinned thread to 521 * return to, swtch() gets called and we end up here. 522 * 523 * It can also happen if an interrupt thread in intr_thread() calls 524 * preempt. It will have already taken care of updating stats. In 525 * this event, the interrupt thread will be runnable. 526 */ 527 if (t->t_intr_start) { 528 do { 529 start = t->t_intr_start; 530 interval = gettick_counter() - start; 531 } while (cas64(&t->t_intr_start, start, 0) != start); 532 cpu = CPU; 533 if (cpu->cpu_m.divisor > 1) 534 interval *= cpu->cpu_m.divisor; 535 cpu->cpu_m.intrstat[t->t_pil][0] += interval; 536 537 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate], 538 interval); 539 } else 540 ASSERT(t->t_intr == NULL || t->t_state == TS_RUN); 541 } 542 543 544 /* 545 * An interrupt thread is returning from swtch(). Place a starting timestamp 546 * in its thread structure. 547 */ 548 void 549 cpu_intr_swtch_exit(kthread_id_t t) 550 { 551 uint64_t ts; 552 553 ASSERT((t->t_flag & T_INTR_THREAD) != 0); 554 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL); 555 556 do { 557 ts = t->t_intr_start; 558 } while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts); 559 } 560 561 562 int 563 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class) 564 { 565 if (&plat_blacklist) 566 return (plat_blacklist(cmd, scheme, fmri, class)); 567 568 return (ENOTSUP); 569 } 570 571 int 572 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp) 573 { 574 extern void kdi_flush_caches(void); 575 size_t nread = 0; 576 uint32_t word; 577 int slop, i; 578 579 kdi_flush_caches(); 580 membar_enter(); 581 582 /* We might not begin on a word boundary. */ 583 if ((slop = addr & 3) != 0) { 584 word = ldphys(addr & ~3); 585 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++) 586 *buf++ = ((uchar_t *)&word)[i]; 587 addr = roundup(addr, 4); 588 } 589 590 while (nbytes > 0) { 591 word = ldphys(addr); 592 for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++) 593 *buf++ = ((uchar_t *)&word)[i]; 594 } 595 596 kdi_flush_caches(); 597 598 *ncopiedp = nread; 599 return (0); 600 } 601 602 int 603 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp) 604 { 605 extern void kdi_flush_caches(void); 606 size_t nwritten = 0; 607 uint32_t word; 608 int slop, i; 609 610 kdi_flush_caches(); 611 612 /* We might not begin on a word boundary. */ 613 if ((slop = addr & 3) != 0) { 614 word = ldphys(addr & ~3); 615 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++) 616 ((uchar_t *)&word)[i] = *buf++; 617 stphys(addr & ~3, word); 618 addr = roundup(addr, 4); 619 } 620 621 while (nbytes > 3) { 622 for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++) 623 ((uchar_t *)&word)[i] = *buf++; 624 stphys(addr, word); 625 addr += 4; 626 } 627 628 /* We might not end with a whole word. */ 629 if (nbytes > 0) { 630 word = ldphys(addr); 631 for (i = 0; nbytes > 0; i++, nbytes--, nwritten++) 632 ((uchar_t *)&word)[i] = *buf++; 633 stphys(addr, word); 634 } 635 636 membar_enter(); 637 kdi_flush_caches(); 638 639 *ncopiedp = nwritten; 640 return (0); 641 } 642 643 static void 644 kdi_kernpanic(struct regs *regs, uint_t tt) 645 { 646 sync_reg_buf = *regs; 647 sync_tt = tt; 648 649 sync_handler(); 650 } 651 652 static void 653 kdi_plat_call(void (*platfn)(void)) 654 { 655 if (platfn != NULL) { 656 prom_suspend_prepost(); 657 platfn(); 658 prom_resume_prepost(); 659 } 660 } 661 662 void 663 mach_kdi_init(kdi_t *kdi) 664 { 665 kdi->kdi_plat_call = kdi_plat_call; 666 kdi->mkdi_cpu_index = kdi_cpu_index; 667 kdi->mkdi_trap_vatotte = kdi_trap_vatotte; 668 kdi->mkdi_kernpanic = kdi_kernpanic; 669 } 670 671 672 /* 673 * get_cpu_mstate() is passed an array of timestamps, NCMSTATES 674 * long, and it fills in the array with the time spent on cpu in 675 * each of the mstates, where time is returned in nsec. 676 * 677 * No guarantee is made that the returned values in times[] will 678 * monotonically increase on sequential calls, although this will 679 * be true in the long run. Any such guarantee must be handled by 680 * the caller, if needed. This can happen if we fail to account 681 * for elapsed time due to a generation counter conflict, yet we 682 * did account for it on a prior call (see below). 683 * 684 * The complication is that the cpu in question may be updating 685 * its microstate at the same time that we are reading it. 686 * Because the microstate is only updated when the CPU's state 687 * changes, the values in cpu_intracct[] can be indefinitely out 688 * of date. To determine true current values, it is necessary to 689 * compare the current time with cpu_mstate_start, and add the 690 * difference to times[cpu_mstate]. 691 * 692 * This can be a problem if those values are changing out from 693 * under us. Because the code path in new_cpu_mstate() is 694 * performance critical, we have not added a lock to it. Instead, 695 * we have added a generation counter. Before beginning 696 * modifications, the counter is set to 0. After modifications, 697 * it is set to the old value plus one. 698 * 699 * get_cpu_mstate() will not consider the values of cpu_mstate 700 * and cpu_mstate_start to be usable unless the value of 701 * cpu_mstate_gen is both non-zero and unchanged, both before and 702 * after reading the mstate information. Note that we must 703 * protect against out-of-order loads around accesses to the 704 * generation counter. Also, this is a best effort approach in 705 * that we do not retry should the counter be found to have 706 * changed. 707 * 708 * cpu_intracct[] is used to identify time spent in each CPU 709 * mstate while handling interrupts. Such time should be reported 710 * against system time, and so is subtracted out from its 711 * corresponding cpu_acct[] time and added to 712 * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in 713 * %ticks, but acct time may be stored as %sticks, thus requiring 714 * different conversions before they can be compared. 715 */ 716 717 void 718 get_cpu_mstate(cpu_t *cpu, hrtime_t *times) 719 { 720 int i; 721 hrtime_t now, start; 722 uint16_t gen; 723 uint16_t state; 724 hrtime_t intracct[NCMSTATES]; 725 726 /* 727 * Load all volatile state under the protection of membar. 728 * cpu_acct[cpu_mstate] must be loaded to avoid double counting 729 * of (now - cpu_mstate_start) by a change in CPU mstate that 730 * arrives after we make our last check of cpu_mstate_gen. 731 */ 732 733 now = gethrtime_unscaled(); 734 gen = cpu->cpu_mstate_gen; 735 736 membar_consumer(); /* guarantee load ordering */ 737 start = cpu->cpu_mstate_start; 738 state = cpu->cpu_mstate; 739 for (i = 0; i < NCMSTATES; i++) { 740 intracct[i] = cpu->cpu_intracct[i]; 741 times[i] = cpu->cpu_acct[i]; 742 } 743 membar_consumer(); /* guarantee load ordering */ 744 745 if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start) 746 times[state] += now - start; 747 748 for (i = 0; i < NCMSTATES; i++) { 749 scalehrtime(×[i]); 750 intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id); 751 } 752 753 for (i = 0; i < NCMSTATES; i++) { 754 if (i == CMS_SYSTEM) 755 continue; 756 times[i] -= intracct[i]; 757 if (times[i] < 0) { 758 intracct[i] += times[i]; 759 times[i] = 0; 760 } 761 times[CMS_SYSTEM] += intracct[i]; 762 } 763 } 764