1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#if !defined(lint) 27#include "assym.h" 28#endif /* !lint */ 29 30/* 31 * General assembly language routines. 32 * It is the intent of this file to contain routines that are 33 * specific to cpu architecture. 34 */ 35 36/* 37 * WARNING: If you add a fast trap handler which can be invoked by a 38 * non-privileged user, you may have to use the FAST_TRAP_DONE macro 39 * instead of "done" instruction to return back to the user mode. See 40 * comments for the "fast_trap_done" entry point for more information. 41 */ 42#define FAST_TRAP_DONE \ 43 ba,a fast_trap_done 44 45/* 46 * Override GET_NATIVE_TIME for the cpu module code. This is not 47 * guaranteed to be exactly one instruction, be careful of using 48 * the macro in delay slots. 49 * 50 * Do not use any instruction that modifies condition codes as the 51 * caller may depend on these to remain unchanged across the macro. 52 */ 53#if defined(CHEETAH) || defined(OLYMPUS_C) 54 55#define GET_NATIVE_TIME(out, scr1, scr2) \ 56 rd STICK, out 57#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 58 rd STICK, reg; \ 59 add reg, delta, reg; \ 60 wr reg, STICK 61#define RD_TICKCMPR(out, scr) \ 62 rd STICK_COMPARE, out 63#define WR_TICKCMPR(in, scr1, scr2, label) \ 64 wr in, STICK_COMPARE 65 66#elif defined(HUMMINGBIRD) 67#include <sys/spitregs.h> 68 69/* 70 * the current hummingbird version of %stick and %stick_cmp 71 * were both implemented as (2) 32-bit locations in ASI_IO space; 72 * the hdwr should support atomic r/w; meanwhile: ugly alert! ... 73 * 74 * 64-bit opcodes are required, but move only 32-bits: 75 * 76 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst 77 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys 78 * 79 * reg equivalent [phys]ASI_IO 80 * ------------------ --------------- 81 * %stick_cmp low-32 0x1FE.0000.F060 82 * %stick_cmp high-32 0x1FE.0000.F068 83 * %stick low-32 0x1FE.0000.F070 84 * %stick high-32 0x1FE.0000.F078 85 */ 86#define HSTC_LOW 0x60 /* stick_cmp low 32-bits */ 87#define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */ 88#define HST_LOW 0x70 /* stick low 32-bits */ 89#define HST_HIGH 0x78 /* stick high 32-bits */ 90#define HST_DIFF 0x08 /* low<-->high diff */ 91 92/* 93 * Any change in the number of instructions in SETL41() 94 * will affect SETL41_OFF 95 */ 96#define SETL41(reg, byte) \ 97 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \ 98 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \ 99 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \ 100 or reg, byte, reg; /* 0000.01FE.0000.F0xx */ 101 102/* 103 * SETL41_OFF is used to calulate the relative PC value when a 104 * branch instruction needs to go over SETL41() macro 105 */ 106#define SETL41_OFF 16 107 108/* 109 * reading stick requires 2 loads, and there could be an intervening 110 * low-to-high 32-bit rollover resulting in a return value that is 111 * off by about (2 ^ 32); this rare case is prevented by re-reading 112 * the low-32 bits after the high-32 and verifying the "after" value 113 * is >= the "before" value; if not, increment the high-32 value. 114 * 115 * this method is limited to 1 rollover, and based on the fixed 116 * stick-frequency (5555555), requires the loads to complete within 117 * 773 seconds; incrementing the high-32 value will not overflow for 118 * about 52644 years. 119 * 120 * writing stick requires 2 stores; if the old/new low-32 value is 121 * near 0xffffffff, there could be another rollover (also rare). 122 * to prevent this, we first write a 0 to the low-32, then write 123 * new values to the high-32 then the low-32. 124 * 125 * When we detect a carry in the lower %stick register, we need to 126 * read HST_HIGH again. However at the point where we detect this, 127 * we need to rebuild the register address HST_HIGH.This involves more 128 * than one instructions and a branch is unavoidable. However, most of 129 * the time, there is no carry. So we take the penalty of a branch 130 * instruction only when there is carry (less frequent). 131 * 132 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41(). 133 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since 134 * addr already points to HST_LOW. 135 * 136 * NOTE: this method requires disabling interrupts before using 137 * DELTA_NATIVE_TIME. 138 */ 139#define GET_NATIVE_TIME(out, scr, tmp) \ 140 SETL41(scr, HST_LOW); \ 141 ldxa [scr]ASI_IO, tmp; \ 142 inc HST_DIFF, scr; \ 143 ldxa [scr]ASI_IO, out; \ 144 dec HST_DIFF, scr; \ 145 ldxa [scr]ASI_IO, scr; \ 146 sub scr, tmp, tmp; \ 147 brlz,pn tmp, .-(SETL41_OFF+24); \ 148 sllx out, 32, out; \ 149 or out, scr, out 150#define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \ 151 SETL41(addr, HST_LOW); \ 152 ldxa [addr]ASI_IO, tmp; \ 153 inc HST_DIFF, addr; \ 154 ldxa [addr]ASI_IO, high; \ 155 dec HST_DIFF, addr; \ 156 ldxa [addr]ASI_IO, low; \ 157 sub low, tmp, tmp; \ 158 brlz,pn tmp, .-24; \ 159 sllx high, 32, high; \ 160 or high, low, high; \ 161 add high, delta, high; \ 162 srl high, 0, low; \ 163 srlx high, 32, high; \ 164 stxa %g0, [addr]ASI_IO; \ 165 inc HST_DIFF, addr; \ 166 stxa high, [addr]ASI_IO; \ 167 dec HST_DIFF, addr; \ 168 stxa low, [addr]ASI_IO 169#define RD_TICKCMPR(out, scr) \ 170 SETL41(scr, HSTC_LOW); \ 171 ldxa [scr]ASI_IO, out; \ 172 inc HST_DIFF, scr; \ 173 ldxa [scr]ASI_IO, scr; \ 174 sllx scr, 32, scr; \ 175 or scr, out, out 176#define WR_TICKCMPR(in, scra, scrd, label) \ 177 SETL41(scra, HSTC_HIGH); \ 178 srlx in, 32, scrd; \ 179 stxa scrd, [scra]ASI_IO; \ 180 dec HST_DIFF, scra; \ 181 stxa in, [scra]ASI_IO 182 183#else /* !CHEETAH && !HUMMINGBIRD */ 184 185#define GET_NATIVE_TIME(out, scr1, scr2) \ 186 rdpr %tick, out 187#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 188 rdpr %tick, reg; \ 189 add reg, delta, reg; \ 190 wrpr reg, %tick 191#define RD_TICKCMPR(out, scr) \ 192 rd TICK_COMPARE, out 193#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */ 194/* 195 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules. 196 * The failure occurs only when the following instruction decodes to wr or 197 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE 198 * with a read, thus stalling the pipe and keeping following instructions 199 * from causing data corruption. Aligning to a quadword will ensure these 200 * two instructions are not split due to i$ misses. 201 */ 202#define WR_TICKCMPR(cmpr,scr1,scr2,label) \ 203 ba,a .bb_errata_1.label ;\ 204 .align 64 ;\ 205.bb_errata_1.label: ;\ 206 wr cmpr, TICK_COMPARE ;\ 207 rd TICK_COMPARE, %g0 208#else /* BB_ERRATA_1 */ 209#define WR_TICKCMPR(in,scr1,scr2,label) \ 210 wr in, TICK_COMPARE 211#endif /* BB_ERRATA_1 */ 212 213#endif /* !CHEETAH && !HUMMINGBIRD */ 214 215#include <sys/clock.h> 216 217#if defined(lint) 218#include <sys/types.h> 219#include <sys/scb.h> 220#include <sys/systm.h> 221#include <sys/regset.h> 222#include <sys/sunddi.h> 223#include <sys/lockstat.h> 224#endif /* lint */ 225 226 227#include <sys/asm_linkage.h> 228#include <sys/privregs.h> 229#include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */ 230#include <sys/machthread.h> 231#include <sys/clock.h> 232#include <sys/intreg.h> 233#include <sys/psr_compat.h> 234#include <sys/isa_defs.h> 235#include <sys/dditypes.h> 236#include <sys/intr.h> 237 238#if !defined(lint) 239#include "assym.h" 240#endif /* !lint */ 241 242#if defined(lint) 243 244uint_t 245get_impl(void) 246{ return (0); } 247 248#else /* lint */ 249 250 ENTRY(get_impl) 251 GET_CPU_IMPL(%o0) 252 retl 253 nop 254 SET_SIZE(get_impl) 255 256#endif /* lint */ 257 258#if defined(lint) 259/* 260 * Softint generated when counter field of tick reg matches value field 261 * of tick_cmpr reg 262 */ 263/*ARGSUSED*/ 264void 265tickcmpr_set(uint64_t clock_cycles) 266{} 267 268#else /* lint */ 269 270 ENTRY_NP(tickcmpr_set) 271 ! get 64-bit clock_cycles interval 272 mov %o0, %o2 273 mov 8, %o3 ! A reasonable initial step size 2741: 275 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR 276 277 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the 278 sllx %o0, 1, %o0 ! value we wrote was in the future. 279 srlx %o0, 1, %o0 280 281 cmp %o2, %o0 ! If the value we wrote was in the 282 bg,pt %xcc, 2f ! future, then blow out of here. 283 sllx %o3, 1, %o3 ! If not, then double our step size, 284 ba,pt %xcc, 1b ! and take another lap. 285 add %o0, %o3, %o2 ! 2862: 287 retl 288 nop 289 SET_SIZE(tickcmpr_set) 290 291#endif /* lint */ 292 293#if defined(lint) 294 295void 296tickcmpr_disable(void) 297{} 298 299#else /* lint */ 300 301 ENTRY_NP(tickcmpr_disable) 302 mov 1, %g1 303 sllx %g1, TICKINT_DIS_SHFT, %o0 304 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR 305 retl 306 nop 307 SET_SIZE(tickcmpr_disable) 308 309#endif /* lint */ 310 311#if defined(lint) 312 313/* 314 * tick_write_delta() increments %tick by the specified delta. This should 315 * only be called after a CPR event to assure that gethrtime() continues to 316 * increase monotonically. Obviously, writing %tick needs to de done very 317 * carefully to avoid introducing unnecessary %tick skew across CPUs. For 318 * this reason, we make sure we're i-cache hot before actually writing to 319 * %tick. 320 */ 321/*ARGSUSED*/ 322void 323tick_write_delta(uint64_t delta) 324{} 325 326#else /* lint */ 327 328#ifdef DEBUG 329 .seg ".text" 330tick_write_panic: 331 .asciz "tick_write_delta: interrupts already disabled on entry" 332#endif /* DEBUG */ 333 334 ENTRY_NP(tick_write_delta) 335 rdpr %pstate, %g1 336#ifdef DEBUG 337 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 338 bnz 0f ! aren't already disabled. 339 sethi %hi(tick_write_panic), %o1 340 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller 341 call panic 342 or %i1, %lo(tick_write_panic), %o0 343#endif /* DEBUG */ 3440: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 345 mov %o0, %o2 346 ba 0f ! Branch to cache line-aligned instr. 347 nop 348 .align 16 3490: nop ! The next 3 instructions are now hot. 350 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick 351 352 retl ! Return 353 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts 354#endif /* lint */ 355 356#if defined(lint) 357/* 358 * return 1 if disabled 359 */ 360 361int 362tickcmpr_disabled(void) 363{ return (0); } 364 365#else /* lint */ 366 367 ENTRY_NP(tickcmpr_disabled) 368 RD_TICKCMPR(%g1, %o0) 369 retl 370 srlx %g1, TICKINT_DIS_SHFT, %o0 371 SET_SIZE(tickcmpr_disabled) 372 373#endif /* lint */ 374 375/* 376 * Get current tick 377 */ 378#if defined(lint) 379 380u_longlong_t 381gettick(void) 382{ return (0); } 383 384#else /* lint */ 385 386 ENTRY(gettick) 387 GET_NATIVE_TIME(%o0, %o2, %o3) 388 retl 389 nop 390 SET_SIZE(gettick) 391 392#endif /* lint */ 393 394 395/* 396 * Return the counter portion of the tick register. 397 */ 398 399#if defined(lint) 400 401uint64_t 402gettick_counter(void) 403{ return(0); } 404 405#else /* lint */ 406 407 ENTRY_NP(gettick_counter) 408 rdpr %tick, %o0 409 sllx %o0, 1, %o0 410 retl 411 srlx %o0, 1, %o0 ! shake off npt bit 412 SET_SIZE(gettick_counter) 413#endif /* lint */ 414 415/* 416 * Provide a C callable interface to the trap that reads the hi-res timer. 417 * Returns 64-bit nanosecond timestamp in %o0 and %o1. 418 */ 419 420#if defined(lint) 421 422hrtime_t 423gethrtime(void) 424{ 425 return ((hrtime_t)0); 426} 427 428hrtime_t 429gethrtime_unscaled(void) 430{ 431 return ((hrtime_t)0); 432} 433 434hrtime_t 435gethrtime_max(void) 436{ 437 return ((hrtime_t)0); 438} 439 440void 441scalehrtime(hrtime_t *hrt) 442{ 443 *hrt = 0; 444} 445 446void 447gethrestime(timespec_t *tp) 448{ 449 tp->tv_sec = 0; 450 tp->tv_nsec = 0; 451} 452 453time_t 454gethrestime_sec(void) 455{ 456 return (0); 457} 458 459void 460gethrestime_lasttick(timespec_t *tp) 461{ 462 tp->tv_sec = 0; 463 tp->tv_nsec = 0; 464} 465 466/*ARGSUSED*/ 467void 468hres_tick(void) 469{ 470} 471 472void 473panic_hres_tick(void) 474{ 475} 476 477#else /* lint */ 478 479 ENTRY_NP(gethrtime) 480 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2) 481 ! %g1 = hrtime 482 retl 483 mov %g1, %o0 484 SET_SIZE(gethrtime) 485 486 ENTRY_NP(gethrtime_unscaled) 487 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 488 retl 489 mov %g1, %o0 490 SET_SIZE(gethrtime_unscaled) 491 492 ENTRY_NP(gethrtime_waitfree) 493 ALTENTRY(dtrace_gethrtime) 494 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 495 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3) 496 retl 497 mov %g1, %o0 498 SET_SIZE(dtrace_gethrtime) 499 SET_SIZE(gethrtime_waitfree) 500 501 ENTRY(gethrtime_max) 502 NATIVE_TIME_MAX(%g1) 503 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1) 504 505 ! hrtime_t's are signed, max hrtime_t must be positive 506 mov -1, %o2 507 brlz,a %g1, 1f 508 srlx %o2, 1, %g1 5091: 510 retl 511 mov %g1, %o0 512 SET_SIZE(gethrtime_max) 513 514 ENTRY(scalehrtime) 515 ldx [%o0], %o1 516 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3) 517 retl 518 stx %o1, [%o0] 519 SET_SIZE(scalehrtime) 520 521/* 522 * Fast trap to return a timestamp, uses trap window, leaves traps 523 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1. 524 * 525 * This is the handler for the ST_GETHRTIME trap. 526 */ 527 528 ENTRY_NP(get_timestamp) 529 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime 530 srlx %g1, 32, %o0 ! %o0 = hi32(%g1) 531 srl %g1, 0, %o1 ! %o1 = lo32(%g1) 532 FAST_TRAP_DONE 533 SET_SIZE(get_timestamp) 534 535/* 536 * Macro to convert GET_HRESTIME() bits into a timestamp. 537 * 538 * We use two separate macros so that the platform-dependent GET_HRESTIME() 539 * can be as small as possible; CONV_HRESTIME() implements the generic part. 540 */ 541#define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \ 542 brz,pt adj, 3f; /* no adjustments, it's easy */ \ 543 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \ 544 brlz,pn adj, 2f; /* if hrestime_adj negative */ \ 545 srlx nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \ 546 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \ 547 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \ 548 ba 3f; /* go convert to sec/nsec */ \ 549 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \ 5502: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \ 551 bge,a,pt %xcc, 3f; /* is adj less negative? */ \ 552 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \ 553 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \ 5543: cmp hrestnsec, nano; /* more than a billion? */ \ 555 bl,pt %xcc, 4f; /* if not, we're done */ \ 556 nop; /* delay: do nothing :( */ \ 557 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \ 558 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \ 559 ba,a 3b; /* check >= billion again */ \ 5604: 561 562 ENTRY_NP(gethrestime) 563 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 564 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5) 565 stn %o1, [%o0] 566 retl 567 stn %o2, [%o0 + CLONGSIZE] 568 SET_SIZE(gethrestime) 569 570/* 571 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime 572 * seconds. 573 */ 574 ENTRY_NP(gethrestime_sec) 575 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 576 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5) 577 retl ! %o0 current hrestime seconds 578 nop 579 SET_SIZE(gethrestime_sec) 580 581/* 582 * Returns the hrestime on the last tick. This is simpler than gethrestime() 583 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick() 584 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME, 585 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't 586 * rely on load dependencies to effect the membar #LoadLoad, instead declaring 587 * it explicitly.) 588 */ 589 ENTRY_NP(gethrestime_lasttick) 590 sethi %hi(hres_lock), %o1 5910: 592 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value 593 membar #LoadLoad ! Load of lock must complete 594 andn %o2, 1, %o2 ! Mask off lowest bit 595 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds. 596 add %o1, %lo(hrestime), %o4 597 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds. 598 membar #LoadLoad ! All loads must complete 599 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value 600 cmp %o3, %o2 ! If lock is locked or has 601 bne 0b ! changed, retry. 602 stn %g1, [%o0] ! Delay: store seconds 603 retl 604 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds 605 SET_SIZE(gethrestime_lasttick) 606 607/* 608 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1. 609 * 610 * This is the handler for the ST_GETHRESTIME trap. 611 */ 612 613 ENTRY_NP(get_hrestime) 614 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3) 615 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3) 616 FAST_TRAP_DONE 617 SET_SIZE(get_hrestime) 618 619/* 620 * Fast trap to return lwp virtual time, uses trap window, leaves traps 621 * disabled. Returns a 64-bit number in %o0:%o1, which is the number 622 * of nanoseconds consumed. 623 * 624 * This is the handler for the ST_GETHRVTIME trap. 625 * 626 * Register usage: 627 * %o0, %o1 = return lwp virtual time 628 * %o2 = CPU/thread 629 * %o3 = lwp 630 * %g1 = scratch 631 * %g5 = scratch 632 */ 633 ENTRY_NP(get_virtime) 634 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks 635 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2 636 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2 637 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3 638 639 /* 640 * Subtract start time of current microstate from time 641 * of day to get increment for lwp virtual time. 642 */ 643 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start 644 sub %g5, %g1, %g5 645 646 /* 647 * Add current value of ms_acct[LMS_USER] 648 */ 649 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER] 650 add %g5, %g1, %g5 651 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 652 653 srl %g5, 0, %o1 ! %o1 = lo32(%g5) 654 srlx %g5, 32, %o0 ! %o0 = hi32(%g5) 655 656 FAST_TRAP_DONE 657 SET_SIZE(get_virtime) 658 659 660 661 .seg ".text" 662hrtime_base_panic: 663 .asciz "hrtime_base stepping back" 664 665 666 ENTRY_NP(hres_tick) 667 save %sp, -SA(MINFRAME), %sp ! get a new window 668 669 sethi %hi(hrestime), %l4 670 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking 6717: tst %l5 672 bz,pt %xcc, 8f ! if we got it, drive on 673 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor 674 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6759: tst %l5 676 bz,a,pn %xcc, 7b 677 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 678 ba,pt %xcc, 9b 679 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6808: 681 membar #StoreLoad|#StoreStore 682 683 ! 684 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale). 685 ! 686 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base 687 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time 688 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current 689 ! convert native time to nsecs 690 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT) 691 692 sub %l0, %g1, %i1 ! get accurate nsec delta 693 694 ldx [%l4 + %lo(hrtime_base)], %l1 695 cmp %l1, %l0 696 bg,pn %xcc, 9f 697 nop 698 699 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base 700 701 ! 702 ! apply adjustment, if any 703 ! 704 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj 705 brz %l0, 2f 706 ! hrestime_adj == 0 ? 707 ! yes, skip adjustments 708 clr %l5 ! delay: set adj to zero 709 tst %l0 ! is hrestime_adj >= 0 ? 710 bge,pt %xcc, 1f ! yes, go handle positive case 711 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj 712 713 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ? 714 bl,pt %xcc, 2f ! yes, use current adj 715 neg %l5 ! delay: %l5 = -adj 716 ba,pt %xcc, 2f 717 mov %l0, %l5 ! no, so set adj = hrestime_adj 7181: 719 subcc %l0, %l5, %g0 ! hrestime_adj < adj ? 720 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj 721 mov %l0, %l5 ! delay: adj = hrestime_adj 7222: 723 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta 724 sub %l0, %l5, %l0 ! timedelta -= adj 725 726 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta 727 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta 728 729 or %l4, %lo(hrestime), %l2 730 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec 731 ldn [%l2 + CLONGSIZE], %i3 732 add %i3, %l5, %i3 ! hrestime.nsec += adj 733 add %i3, %i1, %i3 ! hrestime.nsec += nslt 734 735 set NANOSEC, %l5 ! %l5 = NANOSEC 736 cmp %i3, %l5 737 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC 738 sethi %hi(one_sec), %i1 ! delay 739 add %i2, 0x1, %i2 ! hrestime.tv_sec++ 740 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC 741 mov 0x1, %l5 742 st %l5, [%i1 + %lo(one_sec)] 7435: 744 stn %i2, [%l2] 745 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime 746 747 membar #StoreStore 748 749 ld [%l4 + %lo(hres_lock)], %i1 750 inc %i1 ! release lock 751 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock 752 753 ret 754 restore 755 7569: 757 ! 758 ! release hres_lock 759 ! 760 ld [%l4 + %lo(hres_lock)], %i1 761 inc %i1 762 st %i1, [%l4 + %lo(hres_lock)] 763 764 sethi %hi(hrtime_base_panic), %o0 765 call panic 766 or %o0, %lo(hrtime_base_panic), %o0 767 768 SET_SIZE(hres_tick) 769 770#endif /* lint */ 771 772#if !defined(lint) && !defined(__lint) 773 774 .seg ".text" 775kstat_q_panic_msg: 776 .asciz "kstat_q_exit: qlen == 0" 777 778 ENTRY(kstat_q_panic) 779 save %sp, -SA(MINFRAME), %sp 780 sethi %hi(kstat_q_panic_msg), %o0 781 call panic 782 or %o0, %lo(kstat_q_panic_msg), %o0 783 /*NOTREACHED*/ 784 SET_SIZE(kstat_q_panic) 785 786#define BRZPN brz,pn 787#define BRZPT brz,pt 788 789#define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \ 790 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \ 791 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 792 QBR %o1, QZERO; /* done if qlen == 0 */ \ 793 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \ 794 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \ 795 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \ 796 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \ 797 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 798 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 799 add %o4, %o2, %o4; /* %o4 = new time */ \ 800 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 801 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \ 802 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \ 803QRETURN; \ 804 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */ 805 806 .align 16 807 ENTRY(kstat_waitq_enter) 808 GET_NATIVE_TIME(%g1, %g2, %g3) 809 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 810 SET_SIZE(kstat_waitq_enter) 811 812 .align 16 813 ENTRY(kstat_waitq_exit) 814 GET_NATIVE_TIME(%g1, %g2, %g3) 815 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W) 816 SET_SIZE(kstat_waitq_exit) 817 818 .align 16 819 ENTRY(kstat_runq_enter) 820 GET_NATIVE_TIME(%g1, %g2, %g3) 821 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 822 SET_SIZE(kstat_runq_enter) 823 824 .align 16 825 ENTRY(kstat_runq_exit) 826 GET_NATIVE_TIME(%g1, %g2, %g3) 827 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R) 828 SET_SIZE(kstat_runq_exit) 829 830 .align 16 831 ENTRY(kstat_waitq_to_runq) 832 GET_NATIVE_TIME(%g1, %g2, %g3) 833 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W) 834 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 835 SET_SIZE(kstat_waitq_to_runq) 836 837 .align 16 838 ENTRY(kstat_runq_back_to_waitq) 839 GET_NATIVE_TIME(%g1, %g2, %g3) 840 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R) 841 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 842 SET_SIZE(kstat_runq_back_to_waitq) 843 844#endif /* !(lint || __lint) */ 845 846#ifdef lint 847 848int64_t timedelta; 849hrtime_t hres_last_tick; 850volatile timestruc_t hrestime; 851int64_t hrestime_adj; 852volatile int hres_lock; 853uint_t nsec_scale; 854hrtime_t hrtime_base; 855int traptrace_use_stick; 856 857#else /* lint */ 858 /* 859 * -- WARNING -- 860 * 861 * The following variables MUST be together on a 128-byte boundary. 862 * In addition to the primary performance motivation (having them all 863 * on the same cache line(s)), code here and in the GET*TIME() macros 864 * assumes that they all have the same high 22 address bits (so 865 * there's only one sethi). 866 */ 867 .seg ".data" 868 .global timedelta, hres_last_tick, hrestime, hrestime_adj 869 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick 870 .global nsec_shift, adj_shift 871 872 /* XXX - above comment claims 128-bytes is necessary */ 873 .align 64 874timedelta: 875 .word 0, 0 /* int64_t */ 876hres_last_tick: 877 .word 0, 0 /* hrtime_t */ 878hrestime: 879 .nword 0, 0 /* 2 longs */ 880hrestime_adj: 881 .word 0, 0 /* int64_t */ 882hres_lock: 883 .word 0 884nsec_scale: 885 .word 0 886hrtime_base: 887 .word 0, 0 888traptrace_use_stick: 889 .word 0 890nsec_shift: 891 .word NSEC_SHIFT 892adj_shift: 893 .word ADJ_SHIFT 894 895#endif /* lint */ 896 897 898/* 899 * drv_usecwait(clock_t n) [DDI/DKI - section 9F] 900 * usec_delay(int n) [compatibility - should go one day] 901 * Delay by spinning. 902 * 903 * delay for n microseconds. numbers <= 0 delay 1 usec 904 * 905 * With UltraSPARC-III the combination of supporting mixed-speed CPUs 906 * and variable clock rate for power management requires that we 907 * use %stick to implement this routine. 908 * 909 * For OPL platforms that support the "sleep" instruction, we 910 * conditionally (ifdef'ed) insert a "sleep" instruction in 911 * the loop. Note that theoritically we should have move (duplicated) 912 * the code down to spitfire/us3/opl specific asm files - but this 913 * is alot of code duplication just to add one "sleep" instruction. 914 * We chose less code duplication for this. 915 */ 916 917#if defined(lint) 918 919/*ARGSUSED*/ 920void 921drv_usecwait(clock_t n) 922{} 923 924/*ARGSUSED*/ 925void 926usec_delay(int n) 927{} 928 929#else /* lint */ 930 931 ENTRY(drv_usecwait) 932 ALTENTRY(usec_delay) 933 brlez,a,pn %o0, 0f 934 mov 1, %o0 9350: 936 sethi %hi(sticks_per_usec), %o1 937 lduw [%o1 + %lo(sticks_per_usec)], %o1 938 mulx %o1, %o0, %o1 ! Scale usec to ticks 939 inc %o1 ! We don't start on a tick edge 940 GET_NATIVE_TIME(%o2, %o3, %o4) 941 add %o1, %o2, %o1 942 9431: 944#ifdef _OPL 945 .word 0x81b01060 ! insert "sleep" instruction 946#endif /* _OPL */ ! use byte code for now 947 cmp %o1, %o2 948 GET_NATIVE_TIME(%o2, %o3, %o4) 949 bgeu,pt %xcc, 1b 950 nop 951 retl 952 nop 953 SET_SIZE(usec_delay) 954 SET_SIZE(drv_usecwait) 955#endif /* lint */ 956 957#if defined(lint) 958 959/* ARGSUSED */ 960void 961pil14_interrupt(int level) 962{} 963 964#else /* lint */ 965 966/* 967 * Level-14 interrupt prologue. 968 */ 969 ENTRY_NP(pil14_interrupt) 970 CPU_ADDR(%g1, %g2) 971 rdpr %pil, %g6 ! %g6 = interrupted PIL 972 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL 973 rdpr %tstate, %g6 974 rdpr %tpc, %g5 975 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 976 bnz,a,pt %xcc, 1f 977 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC 978 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC 979 ba pil_interrupt_common ! must be large-disp branch 980 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC 9811: ba pil_interrupt_common ! must be large-disp branch 982 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC 983 SET_SIZE(pil14_interrupt) 984 985 ENTRY_NP(tick_rtt) 986 ! 987 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is 988 ! disabled. If TICK_COMPARE is enabled, we know that we need to 989 ! reenqueue the interrupt request structure. We'll then check TICKINT 990 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE 991 ! interrupt. In this case, TICK_COMPARE may have been rewritten 992 ! recently; we'll compare %o5 to the current time to verify that it's 993 ! in the future. 994 ! 995 ! Note that %o5 is live until after 1f. 996 ! XXX - there is a subroutine call while %o5 is live! 997 ! 998 RD_TICKCMPR(%o5, %g1) 999 srlx %o5, TICKINT_DIS_SHFT, %g1 1000 brnz,pt %g1, 2f 1001 nop 1002 1003 rdpr %pstate, %g5 1004 andn %g5, PSTATE_IE, %g1 1005 wrpr %g0, %g1, %pstate ! Disable vec interrupts 1006 1007 sethi %hi(cbe_level14_inum), %o1 1008 ldx [%o1 + %lo(cbe_level14_inum)], %o1 1009 call intr_enqueue_req ! preserves %o5 and %g5 1010 mov PIL_14, %o0 1011 1012 ! Check SOFTINT for TICKINT/STICKINT 1013 rd SOFTINT, %o4 1014 set (TICK_INT_MASK | STICK_INT_MASK), %o0 1015 andcc %o4, %o0, %g0 1016 bz,a,pn %icc, 2f 1017 wrpr %g0, %g5, %pstate ! Enable vec interrupts 1018 1019 ! clear TICKINT/STICKINT 1020 wr %o0, CLEAR_SOFTINT 1021 1022 ! 1023 ! Now that we've cleared TICKINT, we can reread %tick and confirm 1024 ! that the value we programmed is still in the future. If it isn't, 1025 ! we need to reprogram TICK_COMPARE to fire as soon as possible. 1026 ! 1027 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1028 sllx %o0, 1, %o0 ! Clear the DIS bit 1029 srlx %o0, 1, %o0 1030 cmp %o5, %o0 ! In the future? 1031 bg,a,pt %xcc, 2f ! Yes, drive on. 1032 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1033 1034 ! 1035 ! If we're here, then we have programmed TICK_COMPARE with a %tick 1036 ! which is in the past; we'll now load an initial step size, and loop 1037 ! until we've managed to program TICK_COMPARE to fire in the future. 1038 ! 1039 mov 8, %o4 ! 8 = arbitrary inital step 10401: add %o0, %o4, %o5 ! Add the step 1041 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR 1042 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1043 sllx %o0, 1, %o0 ! Clear the DIS bit 1044 srlx %o0, 1, %o0 1045 cmp %o5, %o0 ! In the future? 1046 bg,a,pt %xcc, 2f ! Yes, drive on. 1047 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1048 ba 1b ! No, try again. 1049 sllx %o4, 1, %o4 ! delay: double step size 1050 10512: ba current_thread_complete 1052 nop 1053 SET_SIZE(tick_rtt) 1054 1055#endif /* lint */ 1056 1057#if defined(lint) 1058 1059/* ARGSUSED */ 1060void 1061pil15_interrupt(int level) 1062{} 1063 1064#else /* lint */ 1065 1066/* 1067 * Level-15 interrupt prologue. 1068 */ 1069 ENTRY_NP(pil15_interrupt) 1070 CPU_ADDR(%g1, %g2) 1071 rdpr %tstate, %g6 1072 rdpr %tpc, %g5 1073 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 1074 bnz,a,pt %xcc, 1f 1075 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC 1076 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC 1077 ba pil15_epilogue ! must be large-disp branch 1078 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC 10791: ba pil15_epilogue ! must be large-disp branch 1080 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC 1081 SET_SIZE(pil15_interrupt) 1082 1083#endif /* lint */ 1084 1085#if defined(lint) || defined(__lint) 1086 1087/* ARGSUSED */ 1088uint64_t 1089find_cpufrequency(volatile uchar_t *clock_ptr) 1090{ 1091 return (0); 1092} 1093 1094#else /* lint */ 1095 1096#ifdef DEBUG 1097 .seg ".text" 1098find_cpufreq_panic: 1099 .asciz "find_cpufrequency: interrupts already disabled on entry" 1100#endif /* DEBUG */ 1101 1102 ENTRY_NP(find_cpufrequency) 1103 rdpr %pstate, %g1 1104 1105#ifdef DEBUG 1106 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 1107 bnz 0f ! are currently enabled 1108 sethi %hi(find_cpufreq_panic), %o1 1109 call panic 1110 or %o1, %lo(find_cpufreq_panic), %o0 1111#endif /* DEBUG */ 1112 11130: 1114 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 11153: 1116 ldub [%o0], %o1 ! Read the number of seconds 1117 mov %o1, %o2 ! remember initial value in %o2 11181: 1119 GET_NATIVE_TIME(%o3, %g4, %g5) 1120 cmp %o1, %o2 ! did the seconds register roll over? 1121 be,pt %icc, 1b ! branch back if unchanged 1122 ldub [%o0], %o2 ! delay: load the new seconds val 1123 1124 brz,pn %o2, 3b ! if the minutes just rolled over, 1125 ! the last second could have been 1126 ! inaccurate; try again. 1127 mov %o2, %o4 ! delay: store init. val. in %o2 11282: 1129 GET_NATIVE_TIME(%o5, %g4, %g5) 1130 cmp %o2, %o4 ! did the seconds register roll over? 1131 be,pt %icc, 2b ! branch back if unchanged 1132 ldub [%o0], %o4 ! delay: load the new seconds val 1133 1134 brz,pn %o4, 0b ! if the minutes just rolled over, 1135 ! the last second could have been 1136 ! inaccurate; try again. 1137 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts 1138 1139 retl 1140 sub %o5, %o3, %o0 ! return the difference in ticks 1141 SET_SIZE(find_cpufrequency) 1142 1143#endif /* lint */ 1144 1145#if defined(lint) 1146/* 1147 * Prefetch a page_t for write or read, this assumes a linear 1148 * scan of sequential page_t's. 1149 */ 1150/*ARGSUSED*/ 1151void 1152prefetch_page_w(void *pp) 1153{} 1154 1155/*ARGSUSED*/ 1156void 1157prefetch_page_r(void *pp) 1158{} 1159#else /* lint */ 1160 1161#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1162 defined(SERRANO) 1163 ! 1164 ! On US-III, the prefetch instruction queue is 8 entries deep. 1165 ! Also, prefetches for write put data in the E$, which has 1166 ! lines of 512 bytes for an 8MB cache. Each E$ line is further 1167 ! subblocked into 64 byte chunks. 1168 ! 1169 ! Since prefetch can only bring in 64 bytes at a time (See Sparc 1170 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes, 1171 ! then 2 prefetches are required in order to bring an entire 1172 ! page into the E$. 1173 ! 1174 ! Since the prefetch queue is 8 entries deep, we currently can 1175 ! only have 4 prefetches for page_t's outstanding. Thus, we 1176 ! prefetch n+4 ahead of where we are now: 1177 ! 1178 ! 4 * sizeof(page_t) -> 512 1179 ! 4 * sizeof(page_t) +64 -> 576 1180 ! 1181 ! Example 1182 ! ======= 1183 ! contiguous page array in memory... 1184 ! 1185 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|... 1186 ! ^ ^ ^ ^ ^ ^ 1187 ! pp | pp+4*sizeof(page)+64 1188 ! | 1189 ! pp+4*sizeof(page) 1190 ! 1191 ! Prefetch 1192 ! Queue 1193 ! +-------+<--- In this iteration, we're working with pp (AAA1), 1194 ! |Preftch| but we enqueue prefetch for addr = XXX1 1195 ! | XXX1 | 1196 ! +-------+<--- this queue slot will be a prefetch instruction for 1197 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second 1198 ! | XXX2 | half of page XXX) 1199 ! +-------+ 1200 ! |Preftch|<-+- The next time around this function, we'll be 1201 ! | YYY1 | | working with pp = BBB1, but will be enqueueing 1202 ! +-------+ | prefetches to for both halves of page YYY, 1203 ! |Preftch| | while both halves of page XXX are in transit 1204 ! | YYY2 |<-+ make their way into the E$. 1205 ! +-------+ 1206 ! |Preftch| 1207 ! | ZZZ1 | 1208 ! +-------+ 1209 ! . . 1210 ! : : 1211 ! 1212 ! E$ 1213 ! +============================================... 1214 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 | 1215 ! +============================================... 1216 ! | | | | | | | 1217 ! +============================================... 1218 ! . 1219 ! : 1220 ! 1221 ! So we should expect the first four page accesses to stall 1222 ! while we warm up the cache, afterwhich, most of the pages 1223 ! will have their pp ready in the E$. 1224 ! 1225 ! Also note that if sizeof(page_t) grows beyond 128, then 1226 ! we'll need an additional prefetch to get an entire page 1227 ! into the E$, thus reducing the number of outstanding page 1228 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots) 1229 ! etc. 1230 ! 1231 ! Cheetah+ 1232 ! ======== 1233 ! On Cheetah+ we use "#n_write" prefetches as these avoid 1234 ! unnecessary RTS->RTO bus transaction state change, and 1235 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta 1236 ! PRM). On Cheetah, #n_write prefetches are reflected with 1237 ! RTS->RTO state transition regardless. 1238 ! 1239#define STRIDE1 512 1240#define STRIDE2 576 1241 1242#if STRIDE1 != (PAGE_SIZE * 4) 1243#error "STRIDE1 != (PAGE_SIZE * 4)" 1244#endif /* STRIDE1 != (PAGE_SIZE * 4) */ 1245 1246 ENTRY(prefetch_page_w) 1247 prefetch [%o0+STRIDE1], #n_writes 1248 retl 1249 prefetch [%o0+STRIDE2], #n_writes 1250 SET_SIZE(prefetch_page_w) 1251 1252 ! 1253 ! Note on CHEETAH to prefetch for read, we really use #one_write. 1254 ! This fetches to E$ (general use) rather than P$ (floating point use). 1255 ! 1256 ENTRY(prefetch_page_r) 1257 prefetch [%o0+STRIDE1], #one_write 1258 retl 1259 prefetch [%o0+STRIDE2], #one_write 1260 SET_SIZE(prefetch_page_r) 1261 1262#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1263 1264 ! 1265 ! UltraSparcII can have up to 3 prefetches outstanding. 1266 ! A page_t is 128 bytes (2 prefetches of 64 bytes each) 1267 ! So prefetch for pp + 1, which is 1268 ! 1269 ! pp + sizeof(page_t) 1270 ! and 1271 ! pp + sizeof(page_t) + 64 1272 ! 1273#define STRIDE1 128 1274#define STRIDE2 192 1275 1276#if STRIDE1 != PAGE_SIZE 1277#error "STRIDE1 != PAGE_SIZE" 1278#endif /* STRIDE1 != PAGE_SIZE */ 1279 1280 ENTRY(prefetch_page_w) 1281 prefetch [%o0+STRIDE1], #n_writes 1282 retl 1283 prefetch [%o0+STRIDE2], #n_writes 1284 SET_SIZE(prefetch_page_w) 1285 1286 ENTRY(prefetch_page_r) 1287 prefetch [%o0+STRIDE1], #n_reads 1288 retl 1289 prefetch [%o0+STRIDE2], #n_reads 1290 SET_SIZE(prefetch_page_r) 1291 1292#elif defined(OLYMPUS_C) 1293 ! 1294 ! Prefetch strides for Olympus-C 1295 ! 1296 1297#define STRIDE1 0x440 1298#define STRIDE2 0x640 1299 1300 ENTRY(prefetch_page_w) 1301 prefetch [%o0+STRIDE1], #n_writes 1302 retl 1303 prefetch [%o0+STRIDE2], #n_writes 1304 SET_SIZE(prefetch_page_w) 1305 1306 ENTRY(prefetch_page_r) 1307 prefetch [%o0+STRIDE1], #n_writes 1308 retl 1309 prefetch [%o0+STRIDE2], #n_writes 1310 SET_SIZE(prefetch_page_r) 1311#else /* OLYMPUS_C */ 1312 1313#error "You need to fix this for your new cpu type." 1314 1315#endif /* OLYMPUS_C */ 1316 1317#endif /* lint */ 1318 1319#if defined(lint) 1320/* 1321 * Prefetch struct smap for write. 1322 */ 1323/*ARGSUSED*/ 1324void 1325prefetch_smap_w(void *smp) 1326{} 1327#else /* lint */ 1328 1329#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1330 defined(SERRANO) 1331 1332#define PREFETCH_Q_LEN 8 1333 1334#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1335 1336#define PREFETCH_Q_LEN 3 1337 1338#elif defined(OLYMPUS_C) 1339 ! 1340 ! Use length of one for now. 1341 ! 1342#define PREFETCH_Q_LEN 1 1343 1344#else /* OLYMPUS_C */ 1345 1346#error You need to fix this for your new cpu type. 1347 1348#endif /* OLYMPUS_C */ 1349 1350#include <vm/kpm.h> 1351 1352#ifdef SEGKPM_SUPPORT 1353 1354#define SMAP_SIZE 72 1355#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64) 1356 1357#else /* SEGKPM_SUPPORT */ 1358 1359 ! 1360 ! The hardware will prefetch the 64 byte cache aligned block 1361 ! that contains the address specified in the prefetch instruction. 1362 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch 1363 ! per pass will suffice as long as we prefetch far enough ahead to 1364 ! make sure we don't stall for the cases where the smap object 1365 ! spans multiple hardware prefetch blocks. Let's prefetch as far 1366 ! ahead as the hardware will allow. 1367 ! 1368 ! The smap array is processed with decreasing address pointers. 1369 ! 1370#define SMAP_SIZE 48 1371#define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE) 1372 1373#endif /* SEGKPM_SUPPORT */ 1374 1375 ENTRY(prefetch_smap_w) 1376 retl 1377 prefetch [%o0-SMAP_STRIDE], #n_writes 1378 SET_SIZE(prefetch_smap_w) 1379 1380#endif /* lint */ 1381 1382#if defined(lint) || defined(__lint) 1383 1384/* ARGSUSED */ 1385uint64_t 1386getidsr(void) 1387{ return 0; } 1388 1389#else /* lint */ 1390 1391 ENTRY_NP(getidsr) 1392 retl 1393 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0 1394 SET_SIZE(getidsr) 1395 1396#endif /* lint */ 1397