1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25#if !defined(lint) 26#include "assym.h" 27#endif /* !lint */ 28 29/* 30 * General assembly language routines. 31 * It is the intent of this file to contain routines that are 32 * specific to cpu architecture. 33 */ 34 35/* 36 * WARNING: If you add a fast trap handler which can be invoked by a 37 * non-privileged user, you may have to use the FAST_TRAP_DONE macro 38 * instead of "done" instruction to return back to the user mode. See 39 * comments for the "fast_trap_done" entry point for more information. 40 */ 41#define FAST_TRAP_DONE \ 42 ba,a fast_trap_done 43 44/* 45 * Override GET_NATIVE_TIME for the cpu module code. This is not 46 * guaranteed to be exactly one instruction, be careful of using 47 * the macro in delay slots. 48 * 49 * Do not use any instruction that modifies condition codes as the 50 * caller may depend on these to remain unchanged across the macro. 51 */ 52#if defined(CHEETAH) || defined(OLYMPUS_C) 53 54#define GET_NATIVE_TIME(out, scr1, scr2) \ 55 rd STICK, out 56#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 57 rd STICK, reg; \ 58 add reg, delta, reg; \ 59 wr reg, STICK 60#define RD_TICKCMPR(out, scr) \ 61 rd STICK_COMPARE, out 62#define WR_TICKCMPR(in, scr1, scr2, label) \ 63 wr in, STICK_COMPARE 64 65#elif defined(HUMMINGBIRD) 66#include <sys/spitregs.h> 67 68/* 69 * the current hummingbird version of %stick and %stick_cmp 70 * were both implemented as (2) 32-bit locations in ASI_IO space; 71 * the hdwr should support atomic r/w; meanwhile: ugly alert! ... 72 * 73 * 64-bit opcodes are required, but move only 32-bits: 74 * 75 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst 76 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys 77 * 78 * reg equivalent [phys]ASI_IO 79 * ------------------ --------------- 80 * %stick_cmp low-32 0x1FE.0000.F060 81 * %stick_cmp high-32 0x1FE.0000.F068 82 * %stick low-32 0x1FE.0000.F070 83 * %stick high-32 0x1FE.0000.F078 84 */ 85#define HSTC_LOW 0x60 /* stick_cmp low 32-bits */ 86#define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */ 87#define HST_LOW 0x70 /* stick low 32-bits */ 88#define HST_HIGH 0x78 /* stick high 32-bits */ 89#define HST_DIFF 0x08 /* low<-->high diff */ 90 91/* 92 * Any change in the number of instructions in SETL41() 93 * will affect SETL41_OFF 94 */ 95#define SETL41(reg, byte) \ 96 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \ 97 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \ 98 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \ 99 or reg, byte, reg; /* 0000.01FE.0000.F0xx */ 100 101/* 102 * SETL41_OFF is used to calulate the relative PC value when a 103 * branch instruction needs to go over SETL41() macro 104 */ 105#define SETL41_OFF 16 106 107/* 108 * reading stick requires 2 loads, and there could be an intervening 109 * low-to-high 32-bit rollover resulting in a return value that is 110 * off by about (2 ^ 32); this rare case is prevented by re-reading 111 * the low-32 bits after the high-32 and verifying the "after" value 112 * is >= the "before" value; if not, increment the high-32 value. 113 * 114 * this method is limited to 1 rollover, and based on the fixed 115 * stick-frequency (5555555), requires the loads to complete within 116 * 773 seconds; incrementing the high-32 value will not overflow for 117 * about 52644 years. 118 * 119 * writing stick requires 2 stores; if the old/new low-32 value is 120 * near 0xffffffff, there could be another rollover (also rare). 121 * to prevent this, we first write a 0 to the low-32, then write 122 * new values to the high-32 then the low-32. 123 * 124 * When we detect a carry in the lower %stick register, we need to 125 * read HST_HIGH again. However at the point where we detect this, 126 * we need to rebuild the register address HST_HIGH.This involves more 127 * than one instructions and a branch is unavoidable. However, most of 128 * the time, there is no carry. So we take the penalty of a branch 129 * instruction only when there is carry (less frequent). 130 * 131 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41(). 132 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since 133 * addr already points to HST_LOW. 134 * 135 * NOTE: this method requires disabling interrupts before using 136 * DELTA_NATIVE_TIME. 137 */ 138#define GET_NATIVE_TIME(out, scr, tmp) \ 139 SETL41(scr, HST_LOW); \ 140 ldxa [scr]ASI_IO, tmp; \ 141 inc HST_DIFF, scr; \ 142 ldxa [scr]ASI_IO, out; \ 143 dec HST_DIFF, scr; \ 144 ldxa [scr]ASI_IO, scr; \ 145 sub scr, tmp, tmp; \ 146 brlz,pn tmp, .-(SETL41_OFF+24); \ 147 sllx out, 32, out; \ 148 or out, scr, out 149#define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \ 150 SETL41(addr, HST_LOW); \ 151 ldxa [addr]ASI_IO, tmp; \ 152 inc HST_DIFF, addr; \ 153 ldxa [addr]ASI_IO, high; \ 154 dec HST_DIFF, addr; \ 155 ldxa [addr]ASI_IO, low; \ 156 sub low, tmp, tmp; \ 157 brlz,pn tmp, .-24; \ 158 sllx high, 32, high; \ 159 or high, low, high; \ 160 add high, delta, high; \ 161 srl high, 0, low; \ 162 srlx high, 32, high; \ 163 stxa %g0, [addr]ASI_IO; \ 164 inc HST_DIFF, addr; \ 165 stxa high, [addr]ASI_IO; \ 166 dec HST_DIFF, addr; \ 167 stxa low, [addr]ASI_IO 168#define RD_TICKCMPR(out, scr) \ 169 SETL41(scr, HSTC_LOW); \ 170 ldxa [scr]ASI_IO, out; \ 171 inc HST_DIFF, scr; \ 172 ldxa [scr]ASI_IO, scr; \ 173 sllx scr, 32, scr; \ 174 or scr, out, out 175#define WR_TICKCMPR(in, scra, scrd, label) \ 176 SETL41(scra, HSTC_HIGH); \ 177 srlx in, 32, scrd; \ 178 stxa scrd, [scra]ASI_IO; \ 179 dec HST_DIFF, scra; \ 180 stxa in, [scra]ASI_IO 181 182#else /* !CHEETAH && !HUMMINGBIRD */ 183 184#define GET_NATIVE_TIME(out, scr1, scr2) \ 185 rdpr %tick, out 186#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 187 rdpr %tick, reg; \ 188 add reg, delta, reg; \ 189 wrpr reg, %tick 190#define RD_TICKCMPR(out, scr) \ 191 rd TICK_COMPARE, out 192#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */ 193/* 194 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules. 195 * The failure occurs only when the following instruction decodes to wr or 196 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE 197 * with a read, thus stalling the pipe and keeping following instructions 198 * from causing data corruption. Aligning to a quadword will ensure these 199 * two instructions are not split due to i$ misses. 200 */ 201#define WR_TICKCMPR(cmpr,scr1,scr2,label) \ 202 ba,a .bb_errata_1.label ;\ 203 .align 64 ;\ 204.bb_errata_1.label: ;\ 205 wr cmpr, TICK_COMPARE ;\ 206 rd TICK_COMPARE, %g0 207#else /* BB_ERRATA_1 */ 208#define WR_TICKCMPR(in,scr1,scr2,label) \ 209 wr in, TICK_COMPARE 210#endif /* BB_ERRATA_1 */ 211 212#endif /* !CHEETAH && !HUMMINGBIRD */ 213 214#include <sys/clock.h> 215 216#if defined(lint) 217#include <sys/types.h> 218#include <sys/scb.h> 219#include <sys/systm.h> 220#include <sys/regset.h> 221#include <sys/sunddi.h> 222#include <sys/lockstat.h> 223#endif /* lint */ 224 225 226#include <sys/asm_linkage.h> 227#include <sys/privregs.h> 228#include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */ 229#include <sys/machthread.h> 230#include <sys/clock.h> 231#include <sys/intreg.h> 232#include <sys/psr_compat.h> 233#include <sys/isa_defs.h> 234#include <sys/dditypes.h> 235#include <sys/intr.h> 236 237#if !defined(lint) 238#include "assym.h" 239#endif /* !lint */ 240 241#if defined(lint) 242 243uint_t 244get_impl(void) 245{ return (0); } 246 247#else /* lint */ 248 249 ENTRY(get_impl) 250 GET_CPU_IMPL(%o0) 251 retl 252 nop 253 SET_SIZE(get_impl) 254 255#endif /* lint */ 256 257#if defined(lint) 258/* 259 * Softint generated when counter field of tick reg matches value field 260 * of tick_cmpr reg 261 */ 262/*ARGSUSED*/ 263void 264tickcmpr_set(uint64_t clock_cycles) 265{} 266 267#else /* lint */ 268 269 ENTRY_NP(tickcmpr_set) 270 ! get 64-bit clock_cycles interval 271 mov %o0, %o2 272 mov 8, %o3 ! A reasonable initial step size 2731: 274 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR 275 276 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the 277 sllx %o0, 1, %o0 ! value we wrote was in the future. 278 srlx %o0, 1, %o0 279 280 cmp %o2, %o0 ! If the value we wrote was in the 281 bg,pt %xcc, 2f ! future, then blow out of here. 282 sllx %o3, 1, %o3 ! If not, then double our step size, 283 ba,pt %xcc, 1b ! and take another lap. 284 add %o0, %o3, %o2 ! 2852: 286 retl 287 nop 288 SET_SIZE(tickcmpr_set) 289 290#endif /* lint */ 291 292#if defined(lint) 293 294void 295tickcmpr_disable(void) 296{} 297 298#else /* lint */ 299 300 ENTRY_NP(tickcmpr_disable) 301 mov 1, %g1 302 sllx %g1, TICKINT_DIS_SHFT, %o0 303 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR 304 retl 305 nop 306 SET_SIZE(tickcmpr_disable) 307 308#endif /* lint */ 309 310#if defined(lint) 311 312/* 313 * tick_write_delta() increments %tick by the specified delta. This should 314 * only be called after a CPR event to assure that gethrtime() continues to 315 * increase monotonically. Obviously, writing %tick needs to de done very 316 * carefully to avoid introducing unnecessary %tick skew across CPUs. For 317 * this reason, we make sure we're i-cache hot before actually writing to 318 * %tick. 319 */ 320/*ARGSUSED*/ 321void 322tick_write_delta(uint64_t delta) 323{} 324 325#else /* lint */ 326 327#ifdef DEBUG 328 .seg ".text" 329tick_write_panic: 330 .asciz "tick_write_delta: interrupts already disabled on entry" 331#endif /* DEBUG */ 332 333 ENTRY_NP(tick_write_delta) 334 rdpr %pstate, %g1 335#ifdef DEBUG 336 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 337 bnz 0f ! aren't already disabled. 338 sethi %hi(tick_write_panic), %o1 339 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller 340 call panic 341 or %i1, %lo(tick_write_panic), %o0 342#endif /* DEBUG */ 3430: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 344 mov %o0, %o2 345 ba 0f ! Branch to cache line-aligned instr. 346 nop 347 .align 16 3480: nop ! The next 3 instructions are now hot. 349 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick 350 351 retl ! Return 352 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts 353#endif /* lint */ 354 355#if defined(lint) 356/* 357 * return 1 if disabled 358 */ 359 360int 361tickcmpr_disabled(void) 362{ return (0); } 363 364#else /* lint */ 365 366 ENTRY_NP(tickcmpr_disabled) 367 RD_TICKCMPR(%g1, %o0) 368 retl 369 srlx %g1, TICKINT_DIS_SHFT, %o0 370 SET_SIZE(tickcmpr_disabled) 371 372#endif /* lint */ 373 374/* 375 * Get current tick 376 */ 377#if defined(lint) 378 379u_longlong_t 380gettick(void) 381{ return (0); } 382 383u_longlong_t 384randtick(void) 385{ return (0); } 386 387#else /* lint */ 388 389 ENTRY(gettick) 390 ALTENTRY(randtick) 391 GET_NATIVE_TIME(%o0, %o2, %o3) 392 retl 393 nop 394 SET_SIZE(randtick) 395 SET_SIZE(gettick) 396 397#endif /* lint */ 398 399 400/* 401 * Return the counter portion of the tick register. 402 */ 403 404#if defined(lint) 405 406uint64_t 407gettick_counter(void) 408{ return(0); } 409 410#else /* lint */ 411 412 ENTRY_NP(gettick_counter) 413 rdpr %tick, %o0 414 sllx %o0, 1, %o0 415 retl 416 srlx %o0, 1, %o0 ! shake off npt bit 417 SET_SIZE(gettick_counter) 418#endif /* lint */ 419 420/* 421 * Provide a C callable interface to the trap that reads the hi-res timer. 422 * Returns 64-bit nanosecond timestamp in %o0 and %o1. 423 */ 424 425#if defined(lint) 426 427hrtime_t 428gethrtime(void) 429{ 430 return ((hrtime_t)0); 431} 432 433hrtime_t 434gethrtime_unscaled(void) 435{ 436 return ((hrtime_t)0); 437} 438 439hrtime_t 440gethrtime_max(void) 441{ 442 return ((hrtime_t)0); 443} 444 445void 446scalehrtime(hrtime_t *hrt) 447{ 448 *hrt = 0; 449} 450 451void 452gethrestime(timespec_t *tp) 453{ 454 tp->tv_sec = 0; 455 tp->tv_nsec = 0; 456} 457 458time_t 459gethrestime_sec(void) 460{ 461 return (0); 462} 463 464void 465gethrestime_lasttick(timespec_t *tp) 466{ 467 tp->tv_sec = 0; 468 tp->tv_nsec = 0; 469} 470 471/*ARGSUSED*/ 472void 473hres_tick(void) 474{ 475} 476 477void 478panic_hres_tick(void) 479{ 480} 481 482#else /* lint */ 483 484 ENTRY_NP(gethrtime) 485 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2) 486 ! %g1 = hrtime 487 retl 488 mov %g1, %o0 489 SET_SIZE(gethrtime) 490 491 ENTRY_NP(gethrtime_unscaled) 492 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 493 retl 494 mov %g1, %o0 495 SET_SIZE(gethrtime_unscaled) 496 497 ENTRY_NP(gethrtime_waitfree) 498 ALTENTRY(dtrace_gethrtime) 499 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 500 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3) 501 retl 502 mov %g1, %o0 503 SET_SIZE(dtrace_gethrtime) 504 SET_SIZE(gethrtime_waitfree) 505 506 ENTRY(gethrtime_max) 507 NATIVE_TIME_MAX(%g1) 508 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1) 509 510 ! hrtime_t's are signed, max hrtime_t must be positive 511 mov -1, %o2 512 brlz,a %g1, 1f 513 srlx %o2, 1, %g1 5141: 515 retl 516 mov %g1, %o0 517 SET_SIZE(gethrtime_max) 518 519 ENTRY(scalehrtime) 520 ldx [%o0], %o1 521 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3) 522 retl 523 stx %o1, [%o0] 524 SET_SIZE(scalehrtime) 525 526/* 527 * Fast trap to return a timestamp, uses trap window, leaves traps 528 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1. 529 * 530 * This is the handler for the ST_GETHRTIME trap. 531 */ 532 533 ENTRY_NP(get_timestamp) 534 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime 535 srlx %g1, 32, %o0 ! %o0 = hi32(%g1) 536 srl %g1, 0, %o1 ! %o1 = lo32(%g1) 537 FAST_TRAP_DONE 538 SET_SIZE(get_timestamp) 539 540/* 541 * Macro to convert GET_HRESTIME() bits into a timestamp. 542 * 543 * We use two separate macros so that the platform-dependent GET_HRESTIME() 544 * can be as small as possible; CONV_HRESTIME() implements the generic part. 545 */ 546#define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \ 547 brz,pt adj, 3f; /* no adjustments, it's easy */ \ 548 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \ 549 brlz,pn adj, 2f; /* if hrestime_adj negative */ \ 550 srlx nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \ 551 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \ 552 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \ 553 ba 3f; /* go convert to sec/nsec */ \ 554 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \ 5552: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \ 556 bge,a,pt %xcc, 3f; /* is adj less negative? */ \ 557 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \ 558 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \ 5593: cmp hrestnsec, nano; /* more than a billion? */ \ 560 bl,pt %xcc, 4f; /* if not, we're done */ \ 561 nop; /* delay: do nothing :( */ \ 562 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \ 563 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \ 564 ba,a 3b; /* check >= billion again */ \ 5654: 566 567 ENTRY_NP(gethrestime) 568 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 569 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5) 570 stn %o1, [%o0] 571 retl 572 stn %o2, [%o0 + CLONGSIZE] 573 SET_SIZE(gethrestime) 574 575/* 576 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime 577 * seconds. 578 */ 579 ENTRY_NP(gethrestime_sec) 580 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 581 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5) 582 retl ! %o0 current hrestime seconds 583 nop 584 SET_SIZE(gethrestime_sec) 585 586/* 587 * Returns the hrestime on the last tick. This is simpler than gethrestime() 588 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick() 589 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME, 590 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't 591 * rely on load dependencies to effect the membar #LoadLoad, instead declaring 592 * it explicitly.) 593 */ 594 ENTRY_NP(gethrestime_lasttick) 595 sethi %hi(hres_lock), %o1 5960: 597 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value 598 membar #LoadLoad ! Load of lock must complete 599 andn %o2, 1, %o2 ! Mask off lowest bit 600 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds. 601 add %o1, %lo(hrestime), %o4 602 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds. 603 membar #LoadLoad ! All loads must complete 604 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value 605 cmp %o3, %o2 ! If lock is locked or has 606 bne 0b ! changed, retry. 607 stn %g1, [%o0] ! Delay: store seconds 608 retl 609 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds 610 SET_SIZE(gethrestime_lasttick) 611 612/* 613 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1. 614 * 615 * This is the handler for the ST_GETHRESTIME trap. 616 */ 617 618 ENTRY_NP(get_hrestime) 619 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3) 620 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3) 621 FAST_TRAP_DONE 622 SET_SIZE(get_hrestime) 623 624/* 625 * Fast trap to return lwp virtual time, uses trap window, leaves traps 626 * disabled. Returns a 64-bit number in %o0:%o1, which is the number 627 * of nanoseconds consumed. 628 * 629 * This is the handler for the ST_GETHRVTIME trap. 630 * 631 * Register usage: 632 * %o0, %o1 = return lwp virtual time 633 * %o2 = CPU/thread 634 * %o3 = lwp 635 * %g1 = scratch 636 * %g5 = scratch 637 */ 638 ENTRY_NP(get_virtime) 639 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks 640 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2 641 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2 642 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3 643 644 /* 645 * Subtract start time of current microstate from time 646 * of day to get increment for lwp virtual time. 647 */ 648 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start 649 sub %g5, %g1, %g5 650 651 /* 652 * Add current value of ms_acct[LMS_USER] 653 */ 654 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER] 655 add %g5, %g1, %g5 656 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 657 658 srl %g5, 0, %o1 ! %o1 = lo32(%g5) 659 srlx %g5, 32, %o0 ! %o0 = hi32(%g5) 660 661 FAST_TRAP_DONE 662 SET_SIZE(get_virtime) 663 664 665 666 .seg ".text" 667hrtime_base_panic: 668 .asciz "hrtime_base stepping back" 669 670 671 ENTRY_NP(hres_tick) 672 save %sp, -SA(MINFRAME), %sp ! get a new window 673 674 sethi %hi(hrestime), %l4 675 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking 6767: tst %l5 677 bz,pt %xcc, 8f ! if we got it, drive on 678 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor 679 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6809: tst %l5 681 bz,a,pn %xcc, 7b 682 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 683 ba,pt %xcc, 9b 684 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6858: 686 membar #StoreLoad|#StoreStore 687 688 ! 689 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale). 690 ! 691 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base 692 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time 693 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current 694 ! convert native time to nsecs 695 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT) 696 697 sub %l0, %g1, %i1 ! get accurate nsec delta 698 699 ldx [%l4 + %lo(hrtime_base)], %l1 700 cmp %l1, %l0 701 bg,pn %xcc, 9f 702 nop 703 704 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base 705 706 ! 707 ! apply adjustment, if any 708 ! 709 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj 710 brz %l0, 2f 711 ! hrestime_adj == 0 ? 712 ! yes, skip adjustments 713 clr %l5 ! delay: set adj to zero 714 tst %l0 ! is hrestime_adj >= 0 ? 715 bge,pt %xcc, 1f ! yes, go handle positive case 716 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj 717 718 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ? 719 bl,pt %xcc, 2f ! yes, use current adj 720 neg %l5 ! delay: %l5 = -adj 721 ba,pt %xcc, 2f 722 mov %l0, %l5 ! no, so set adj = hrestime_adj 7231: 724 subcc %l0, %l5, %g0 ! hrestime_adj < adj ? 725 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj 726 mov %l0, %l5 ! delay: adj = hrestime_adj 7272: 728 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta 729 sub %l0, %l5, %l0 ! timedelta -= adj 730 731 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta 732 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta 733 734 or %l4, %lo(hrestime), %l2 735 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec 736 ldn [%l2 + CLONGSIZE], %i3 737 add %i3, %l5, %i3 ! hrestime.nsec += adj 738 add %i3, %i1, %i3 ! hrestime.nsec += nslt 739 740 set NANOSEC, %l5 ! %l5 = NANOSEC 741 cmp %i3, %l5 742 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC 743 sethi %hi(one_sec), %i1 ! delay 744 add %i2, 0x1, %i2 ! hrestime.tv_sec++ 745 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC 746 mov 0x1, %l5 747 st %l5, [%i1 + %lo(one_sec)] 7485: 749 stn %i2, [%l2] 750 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime 751 752 membar #StoreStore 753 754 ld [%l4 + %lo(hres_lock)], %i1 755 inc %i1 ! release lock 756 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock 757 758 ret 759 restore 760 7619: 762 ! 763 ! release hres_lock 764 ! 765 ld [%l4 + %lo(hres_lock)], %i1 766 inc %i1 767 st %i1, [%l4 + %lo(hres_lock)] 768 769 sethi %hi(hrtime_base_panic), %o0 770 call panic 771 or %o0, %lo(hrtime_base_panic), %o0 772 773 SET_SIZE(hres_tick) 774 775#endif /* lint */ 776 777#if !defined(lint) && !defined(__lint) 778 779 .seg ".text" 780kstat_q_panic_msg: 781 .asciz "kstat_q_exit: qlen == 0" 782 783 ENTRY(kstat_q_panic) 784 save %sp, -SA(MINFRAME), %sp 785 sethi %hi(kstat_q_panic_msg), %o0 786 call panic 787 or %o0, %lo(kstat_q_panic_msg), %o0 788 /*NOTREACHED*/ 789 SET_SIZE(kstat_q_panic) 790 791#define BRZPN brz,pn 792#define BRZPT brz,pt 793 794#define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \ 795 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \ 796 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 797 QBR %o1, QZERO; /* done if qlen == 0 */ \ 798 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \ 799 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \ 800 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \ 801 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \ 802 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 803 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 804 add %o4, %o2, %o4; /* %o4 = new time */ \ 805 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 806 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \ 807 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \ 808QRETURN; \ 809 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */ 810 811#if !defined(DEBUG) 812/* 813 * same as KSTAT_Q_UPDATE but without: 814 * QBR %o1, QZERO; 815 * to be used only with non-debug build. mimics ASSERT() behaviour. 816 */ 817#define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \ 818 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \ 819 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 820 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \ 821 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \ 822 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \ 823 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \ 824 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 825 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 826 add %o4, %o2, %o4; /* %o4 = new time */ \ 827 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 828 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \ 829 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \ 830QRETURN; \ 831 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */ 832#endif 833 834 .align 16 835 ENTRY(kstat_waitq_enter) 836 GET_NATIVE_TIME(%g1, %g2, %g3) 837 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 838 SET_SIZE(kstat_waitq_enter) 839 840 .align 16 841 ENTRY(kstat_waitq_exit) 842 GET_NATIVE_TIME(%g1, %g2, %g3) 843#if defined(DEBUG) 844 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W) 845#else 846 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W) 847#endif 848 SET_SIZE(kstat_waitq_exit) 849 850 .align 16 851 ENTRY(kstat_runq_enter) 852 GET_NATIVE_TIME(%g1, %g2, %g3) 853 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 854 SET_SIZE(kstat_runq_enter) 855 856 .align 16 857 ENTRY(kstat_runq_exit) 858 GET_NATIVE_TIME(%g1, %g2, %g3) 859#if defined(DEBUG) 860 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R) 861#else 862 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R) 863#endif 864 SET_SIZE(kstat_runq_exit) 865 866 .align 16 867 ENTRY(kstat_waitq_to_runq) 868 GET_NATIVE_TIME(%g1, %g2, %g3) 869#if defined(DEBUG) 870 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W) 871#else 872 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W) 873#endif 874 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 875 SET_SIZE(kstat_waitq_to_runq) 876 877 .align 16 878 ENTRY(kstat_runq_back_to_waitq) 879 GET_NATIVE_TIME(%g1, %g2, %g3) 880#if defined(DEBUG) 881 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R) 882#else 883 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R) 884#endif 885 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 886 SET_SIZE(kstat_runq_back_to_waitq) 887 888#endif /* !(lint || __lint) */ 889 890#ifdef lint 891 892int64_t timedelta; 893hrtime_t hres_last_tick; 894volatile timestruc_t hrestime; 895int64_t hrestime_adj; 896volatile int hres_lock; 897uint_t nsec_scale; 898hrtime_t hrtime_base; 899int traptrace_use_stick; 900 901#else /* lint */ 902 /* 903 * -- WARNING -- 904 * 905 * The following variables MUST be together on a 128-byte boundary. 906 * In addition to the primary performance motivation (having them all 907 * on the same cache line(s)), code here and in the GET*TIME() macros 908 * assumes that they all have the same high 22 address bits (so 909 * there's only one sethi). 910 */ 911 .seg ".data" 912 .global timedelta, hres_last_tick, hrestime, hrestime_adj 913 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick 914 .global nsec_shift, adj_shift 915 916 /* XXX - above comment claims 128-bytes is necessary */ 917 .align 64 918timedelta: 919 .word 0, 0 /* int64_t */ 920hres_last_tick: 921 .word 0, 0 /* hrtime_t */ 922hrestime: 923 .nword 0, 0 /* 2 longs */ 924hrestime_adj: 925 .word 0, 0 /* int64_t */ 926hres_lock: 927 .word 0 928nsec_scale: 929 .word 0 930hrtime_base: 931 .word 0, 0 932traptrace_use_stick: 933 .word 0 934nsec_shift: 935 .word NSEC_SHIFT 936adj_shift: 937 .word ADJ_SHIFT 938 939#endif /* lint */ 940 941 942/* 943 * drv_usecwait(clock_t n) [DDI/DKI - section 9F] 944 * usec_delay(int n) [compatibility - should go one day] 945 * Delay by spinning. 946 * 947 * delay for n microseconds. numbers <= 0 delay 1 usec 948 * 949 * With UltraSPARC-III the combination of supporting mixed-speed CPUs 950 * and variable clock rate for power management requires that we 951 * use %stick to implement this routine. 952 * 953 * For OPL platforms that support the "sleep" instruction, we 954 * conditionally (ifdef'ed) insert a "sleep" instruction in 955 * the loop. Note that theoritically we should have move (duplicated) 956 * the code down to spitfire/us3/opl specific asm files - but this 957 * is alot of code duplication just to add one "sleep" instruction. 958 * We chose less code duplication for this. 959 */ 960 961#if defined(lint) 962 963/*ARGSUSED*/ 964void 965drv_usecwait(clock_t n) 966{} 967 968/*ARGSUSED*/ 969void 970usec_delay(int n) 971{} 972 973#else /* lint */ 974 975 ENTRY(drv_usecwait) 976 ALTENTRY(usec_delay) 977 brlez,a,pn %o0, 0f 978 mov 1, %o0 9790: 980 sethi %hi(sticks_per_usec), %o1 981 lduw [%o1 + %lo(sticks_per_usec)], %o1 982 mulx %o1, %o0, %o1 ! Scale usec to ticks 983 inc %o1 ! We don't start on a tick edge 984 GET_NATIVE_TIME(%o2, %o3, %o4) 985 add %o1, %o2, %o1 986 9871: 988#ifdef _OPL 989 .word 0x81b01060 ! insert "sleep" instruction 990#endif /* _OPL */ ! use byte code for now 991 cmp %o1, %o2 992 GET_NATIVE_TIME(%o2, %o3, %o4) 993 bgeu,pt %xcc, 1b 994 nop 995 retl 996 nop 997 SET_SIZE(usec_delay) 998 SET_SIZE(drv_usecwait) 999#endif /* lint */ 1000 1001#if defined(lint) 1002 1003/* ARGSUSED */ 1004void 1005pil14_interrupt(int level) 1006{} 1007 1008#else /* lint */ 1009 1010/* 1011 * Level-14 interrupt prologue. 1012 */ 1013 ENTRY_NP(pil14_interrupt) 1014 CPU_ADDR(%g1, %g2) 1015 rdpr %pil, %g6 ! %g6 = interrupted PIL 1016 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL 1017 rdpr %tstate, %g6 1018 rdpr %tpc, %g5 1019 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 1020 bnz,a,pt %xcc, 1f 1021 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC 1022 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC 1023 ba pil_interrupt_common ! must be large-disp branch 1024 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC 10251: ba pil_interrupt_common ! must be large-disp branch 1026 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC 1027 SET_SIZE(pil14_interrupt) 1028 1029 ENTRY_NP(tick_rtt) 1030 ! 1031 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is 1032 ! disabled. If TICK_COMPARE is enabled, we know that we need to 1033 ! reenqueue the interrupt request structure. We'll then check TICKINT 1034 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE 1035 ! interrupt. In this case, TICK_COMPARE may have been rewritten 1036 ! recently; we'll compare %o5 to the current time to verify that it's 1037 ! in the future. 1038 ! 1039 ! Note that %o5 is live until after 1f. 1040 ! XXX - there is a subroutine call while %o5 is live! 1041 ! 1042 RD_TICKCMPR(%o5, %g1) 1043 srlx %o5, TICKINT_DIS_SHFT, %g1 1044 brnz,pt %g1, 2f 1045 nop 1046 1047 rdpr %pstate, %g5 1048 andn %g5, PSTATE_IE, %g1 1049 wrpr %g0, %g1, %pstate ! Disable vec interrupts 1050 1051 sethi %hi(cbe_level14_inum), %o1 1052 ldx [%o1 + %lo(cbe_level14_inum)], %o1 1053 call intr_enqueue_req ! preserves %o5 and %g5 1054 mov PIL_14, %o0 1055 1056 ! Check SOFTINT for TICKINT/STICKINT 1057 rd SOFTINT, %o4 1058 set (TICK_INT_MASK | STICK_INT_MASK), %o0 1059 andcc %o4, %o0, %g0 1060 bz,a,pn %icc, 2f 1061 wrpr %g0, %g5, %pstate ! Enable vec interrupts 1062 1063 ! clear TICKINT/STICKINT 1064 wr %o0, CLEAR_SOFTINT 1065 1066 ! 1067 ! Now that we've cleared TICKINT, we can reread %tick and confirm 1068 ! that the value we programmed is still in the future. If it isn't, 1069 ! we need to reprogram TICK_COMPARE to fire as soon as possible. 1070 ! 1071 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1072 sllx %o0, 1, %o0 ! Clear the DIS bit 1073 srlx %o0, 1, %o0 1074 cmp %o5, %o0 ! In the future? 1075 bg,a,pt %xcc, 2f ! Yes, drive on. 1076 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1077 1078 ! 1079 ! If we're here, then we have programmed TICK_COMPARE with a %tick 1080 ! which is in the past; we'll now load an initial step size, and loop 1081 ! until we've managed to program TICK_COMPARE to fire in the future. 1082 ! 1083 mov 8, %o4 ! 8 = arbitrary inital step 10841: add %o0, %o4, %o5 ! Add the step 1085 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR 1086 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1087 sllx %o0, 1, %o0 ! Clear the DIS bit 1088 srlx %o0, 1, %o0 1089 cmp %o5, %o0 ! In the future? 1090 bg,a,pt %xcc, 2f ! Yes, drive on. 1091 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1092 ba 1b ! No, try again. 1093 sllx %o4, 1, %o4 ! delay: double step size 1094 10952: ba current_thread_complete 1096 nop 1097 SET_SIZE(tick_rtt) 1098 1099#endif /* lint */ 1100 1101#if defined(lint) 1102 1103/* ARGSUSED */ 1104void 1105pil15_interrupt(int level) 1106{} 1107 1108#else /* lint */ 1109 1110/* 1111 * Level-15 interrupt prologue. 1112 */ 1113 ENTRY_NP(pil15_interrupt) 1114 CPU_ADDR(%g1, %g2) 1115 rdpr %tstate, %g6 1116 rdpr %tpc, %g5 1117 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 1118 bnz,a,pt %xcc, 1f 1119 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC 1120 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC 1121 ba pil15_epilogue ! must be large-disp branch 1122 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC 11231: ba pil15_epilogue ! must be large-disp branch 1124 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC 1125 SET_SIZE(pil15_interrupt) 1126 1127#endif /* lint */ 1128 1129#if defined(lint) || defined(__lint) 1130 1131/* ARGSUSED */ 1132uint64_t 1133find_cpufrequency(volatile uchar_t *clock_ptr) 1134{ 1135 return (0); 1136} 1137 1138#else /* lint */ 1139 1140#ifdef DEBUG 1141 .seg ".text" 1142find_cpufreq_panic: 1143 .asciz "find_cpufrequency: interrupts already disabled on entry" 1144#endif /* DEBUG */ 1145 1146 ENTRY_NP(find_cpufrequency) 1147 rdpr %pstate, %g1 1148 1149#ifdef DEBUG 1150 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 1151 bnz 0f ! are currently enabled 1152 sethi %hi(find_cpufreq_panic), %o1 1153 call panic 1154 or %o1, %lo(find_cpufreq_panic), %o0 1155#endif /* DEBUG */ 1156 11570: 1158 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 11593: 1160 ldub [%o0], %o1 ! Read the number of seconds 1161 mov %o1, %o2 ! remember initial value in %o2 11621: 1163 GET_NATIVE_TIME(%o3, %g4, %g5) 1164 cmp %o1, %o2 ! did the seconds register roll over? 1165 be,pt %icc, 1b ! branch back if unchanged 1166 ldub [%o0], %o2 ! delay: load the new seconds val 1167 1168 brz,pn %o2, 3b ! if the minutes just rolled over, 1169 ! the last second could have been 1170 ! inaccurate; try again. 1171 mov %o2, %o4 ! delay: store init. val. in %o2 11722: 1173 GET_NATIVE_TIME(%o5, %g4, %g5) 1174 cmp %o2, %o4 ! did the seconds register roll over? 1175 be,pt %icc, 2b ! branch back if unchanged 1176 ldub [%o0], %o4 ! delay: load the new seconds val 1177 1178 brz,pn %o4, 0b ! if the minutes just rolled over, 1179 ! the last second could have been 1180 ! inaccurate; try again. 1181 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts 1182 1183 retl 1184 sub %o5, %o3, %o0 ! return the difference in ticks 1185 SET_SIZE(find_cpufrequency) 1186 1187#endif /* lint */ 1188 1189#if defined(lint) 1190/* 1191 * Prefetch a page_t for write or read, this assumes a linear 1192 * scan of sequential page_t's. 1193 */ 1194/*ARGSUSED*/ 1195void 1196prefetch_page_w(void *pp) 1197{} 1198 1199/*ARGSUSED*/ 1200void 1201prefetch_page_r(void *pp) 1202{} 1203#else /* lint */ 1204 1205#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1206 defined(SERRANO) 1207 ! 1208 ! On US-III, the prefetch instruction queue is 8 entries deep. 1209 ! Also, prefetches for write put data in the E$, which has 1210 ! lines of 512 bytes for an 8MB cache. Each E$ line is further 1211 ! subblocked into 64 byte chunks. 1212 ! 1213 ! Since prefetch can only bring in 64 bytes at a time (See Sparc 1214 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes, 1215 ! then 2 prefetches are required in order to bring an entire 1216 ! page into the E$. 1217 ! 1218 ! Since the prefetch queue is 8 entries deep, we currently can 1219 ! only have 4 prefetches for page_t's outstanding. Thus, we 1220 ! prefetch n+4 ahead of where we are now: 1221 ! 1222 ! 4 * sizeof(page_t) -> 512 1223 ! 4 * sizeof(page_t) +64 -> 576 1224 ! 1225 ! Example 1226 ! ======= 1227 ! contiguous page array in memory... 1228 ! 1229 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|... 1230 ! ^ ^ ^ ^ ^ ^ 1231 ! pp | pp+4*sizeof(page)+64 1232 ! | 1233 ! pp+4*sizeof(page) 1234 ! 1235 ! Prefetch 1236 ! Queue 1237 ! +-------+<--- In this iteration, we're working with pp (AAA1), 1238 ! |Preftch| but we enqueue prefetch for addr = XXX1 1239 ! | XXX1 | 1240 ! +-------+<--- this queue slot will be a prefetch instruction for 1241 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second 1242 ! | XXX2 | half of page XXX) 1243 ! +-------+ 1244 ! |Preftch|<-+- The next time around this function, we'll be 1245 ! | YYY1 | | working with pp = BBB1, but will be enqueueing 1246 ! +-------+ | prefetches to for both halves of page YYY, 1247 ! |Preftch| | while both halves of page XXX are in transit 1248 ! | YYY2 |<-+ make their way into the E$. 1249 ! +-------+ 1250 ! |Preftch| 1251 ! | ZZZ1 | 1252 ! +-------+ 1253 ! . . 1254 ! : : 1255 ! 1256 ! E$ 1257 ! +============================================... 1258 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 | 1259 ! +============================================... 1260 ! | | | | | | | 1261 ! +============================================... 1262 ! . 1263 ! : 1264 ! 1265 ! So we should expect the first four page accesses to stall 1266 ! while we warm up the cache, afterwhich, most of the pages 1267 ! will have their pp ready in the E$. 1268 ! 1269 ! Also note that if sizeof(page_t) grows beyond 128, then 1270 ! we'll need an additional prefetch to get an entire page 1271 ! into the E$, thus reducing the number of outstanding page 1272 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots) 1273 ! etc. 1274 ! 1275 ! Cheetah+ 1276 ! ======== 1277 ! On Cheetah+ we use "#n_write" prefetches as these avoid 1278 ! unnecessary RTS->RTO bus transaction state change, and 1279 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta 1280 ! PRM). On Cheetah, #n_write prefetches are reflected with 1281 ! RTS->RTO state transition regardless. 1282 ! 1283#define STRIDE1 512 1284#define STRIDE2 576 1285 1286#if STRIDE1 != (PAGE_SIZE * 4) 1287#error "STRIDE1 != (PAGE_SIZE * 4)" 1288#endif /* STRIDE1 != (PAGE_SIZE * 4) */ 1289 1290 ENTRY(prefetch_page_w) 1291 prefetch [%o0+STRIDE1], #n_writes 1292 retl 1293 prefetch [%o0+STRIDE2], #n_writes 1294 SET_SIZE(prefetch_page_w) 1295 1296 ! 1297 ! Note on CHEETAH to prefetch for read, we really use #one_write. 1298 ! This fetches to E$ (general use) rather than P$ (floating point use). 1299 ! 1300 ENTRY(prefetch_page_r) 1301 prefetch [%o0+STRIDE1], #one_write 1302 retl 1303 prefetch [%o0+STRIDE2], #one_write 1304 SET_SIZE(prefetch_page_r) 1305 1306#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1307 1308 ! 1309 ! UltraSparcII can have up to 3 prefetches outstanding. 1310 ! A page_t is 128 bytes (2 prefetches of 64 bytes each) 1311 ! So prefetch for pp + 1, which is 1312 ! 1313 ! pp + sizeof(page_t) 1314 ! and 1315 ! pp + sizeof(page_t) + 64 1316 ! 1317#define STRIDE1 128 1318#define STRIDE2 192 1319 1320#if STRIDE1 != PAGE_SIZE 1321#error "STRIDE1 != PAGE_SIZE" 1322#endif /* STRIDE1 != PAGE_SIZE */ 1323 1324 ENTRY(prefetch_page_w) 1325 prefetch [%o0+STRIDE1], #n_writes 1326 retl 1327 prefetch [%o0+STRIDE2], #n_writes 1328 SET_SIZE(prefetch_page_w) 1329 1330 ENTRY(prefetch_page_r) 1331 prefetch [%o0+STRIDE1], #n_reads 1332 retl 1333 prefetch [%o0+STRIDE2], #n_reads 1334 SET_SIZE(prefetch_page_r) 1335 1336#elif defined(OLYMPUS_C) 1337 ! 1338 ! Prefetch strides for Olympus-C 1339 ! 1340 1341#define STRIDE1 0x440 1342#define STRIDE2 0x640 1343 1344 ENTRY(prefetch_page_w) 1345 prefetch [%o0+STRIDE1], #n_writes 1346 retl 1347 prefetch [%o0+STRIDE2], #n_writes 1348 SET_SIZE(prefetch_page_w) 1349 1350 ENTRY(prefetch_page_r) 1351 prefetch [%o0+STRIDE1], #n_writes 1352 retl 1353 prefetch [%o0+STRIDE2], #n_writes 1354 SET_SIZE(prefetch_page_r) 1355#else /* OLYMPUS_C */ 1356 1357#error "You need to fix this for your new cpu type." 1358 1359#endif /* OLYMPUS_C */ 1360 1361#endif /* lint */ 1362 1363#if defined(lint) 1364/* 1365 * Prefetch struct smap for write. 1366 */ 1367/*ARGSUSED*/ 1368void 1369prefetch_smap_w(void *smp) 1370{} 1371#else /* lint */ 1372 1373#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1374 defined(SERRANO) 1375 1376#define PREFETCH_Q_LEN 8 1377 1378#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1379 1380#define PREFETCH_Q_LEN 3 1381 1382#elif defined(OLYMPUS_C) 1383 ! 1384 ! Use length of one for now. 1385 ! 1386#define PREFETCH_Q_LEN 1 1387 1388#else /* OLYMPUS_C */ 1389 1390#error You need to fix this for your new cpu type. 1391 1392#endif /* OLYMPUS_C */ 1393 1394#include <vm/kpm.h> 1395 1396#ifdef SEGKPM_SUPPORT 1397 1398#define SMAP_SIZE 72 1399#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64) 1400 1401#else /* SEGKPM_SUPPORT */ 1402 1403 ! 1404 ! The hardware will prefetch the 64 byte cache aligned block 1405 ! that contains the address specified in the prefetch instruction. 1406 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch 1407 ! per pass will suffice as long as we prefetch far enough ahead to 1408 ! make sure we don't stall for the cases where the smap object 1409 ! spans multiple hardware prefetch blocks. Let's prefetch as far 1410 ! ahead as the hardware will allow. 1411 ! 1412 ! The smap array is processed with decreasing address pointers. 1413 ! 1414#define SMAP_SIZE 48 1415#define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE) 1416 1417#endif /* SEGKPM_SUPPORT */ 1418 1419 ENTRY(prefetch_smap_w) 1420 retl 1421 prefetch [%o0-SMAP_STRIDE], #n_writes 1422 SET_SIZE(prefetch_smap_w) 1423 1424#endif /* lint */ 1425 1426#if defined(lint) || defined(__lint) 1427 1428/* ARGSUSED */ 1429uint64_t 1430getidsr(void) 1431{ return 0; } 1432 1433#else /* lint */ 1434 1435 ENTRY_NP(getidsr) 1436 retl 1437 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0 1438 SET_SIZE(getidsr) 1439 1440#endif /* lint */ 1441