1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#if !defined(lint) 29#include "assym.h" 30#endif /* !lint */ 31 32/* 33 * General assembly language routines. 34 * It is the intent of this file to contain routines that are 35 * specific to cpu architecture. 36 */ 37 38/* 39 * WARNING: If you add a fast trap handler which can be invoked by a 40 * non-privileged user, you may have to use the FAST_TRAP_DONE macro 41 * instead of "done" instruction to return back to the user mode. See 42 * comments for the "fast_trap_done" entry point for more information. 43 */ 44#define FAST_TRAP_DONE \ 45 ba,a fast_trap_done 46 47/* 48 * Override GET_NATIVE_TIME for the cpu module code. This is not 49 * guaranteed to be exactly one instruction, be careful of using 50 * the macro in delay slots. 51 * 52 * Do not use any instruction that modifies condition codes as the 53 * caller may depend on these to remain unchanged across the macro. 54 */ 55#if defined(CHEETAH) || defined(OLYMPUS_C) 56 57#define GET_NATIVE_TIME(out, scr1, scr2) \ 58 rd STICK, out 59#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 60 rd STICK, reg; \ 61 add reg, delta, reg; \ 62 wr reg, STICK 63#define RD_TICKCMPR(out, scr) \ 64 rd STICK_COMPARE, out 65#define WR_TICKCMPR(in, scr1, scr2, label) \ 66 wr in, STICK_COMPARE 67 68#elif defined(HUMMINGBIRD) 69#include <sys/spitregs.h> 70 71/* 72 * the current hummingbird version of %stick and %stick_cmp 73 * were both implemented as (2) 32-bit locations in ASI_IO space; 74 * the hdwr should support atomic r/w; meanwhile: ugly alert! ... 75 * 76 * 64-bit opcodes are required, but move only 32-bits: 77 * 78 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst 79 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys 80 * 81 * reg equivalent [phys]ASI_IO 82 * ------------------ --------------- 83 * %stick_cmp low-32 0x1FE.0000.F060 84 * %stick_cmp high-32 0x1FE.0000.F068 85 * %stick low-32 0x1FE.0000.F070 86 * %stick high-32 0x1FE.0000.F078 87 */ 88#define HSTC_LOW 0x60 /* stick_cmp low 32-bits */ 89#define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */ 90#define HST_LOW 0x70 /* stick low 32-bits */ 91#define HST_HIGH 0x78 /* stick high 32-bits */ 92#define HST_DIFF 0x08 /* low<-->high diff */ 93 94/* 95 * Any change in the number of instructions in SETL41() 96 * will affect SETL41_OFF 97 */ 98#define SETL41(reg, byte) \ 99 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \ 100 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \ 101 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \ 102 or reg, byte, reg; /* 0000.01FE.0000.F0xx */ 103 104/* 105 * SETL41_OFF is used to calulate the relative PC value when a 106 * branch instruction needs to go over SETL41() macro 107 */ 108#define SETL41_OFF 16 109 110/* 111 * reading stick requires 2 loads, and there could be an intervening 112 * low-to-high 32-bit rollover resulting in a return value that is 113 * off by about (2 ^ 32); this rare case is prevented by re-reading 114 * the low-32 bits after the high-32 and verifying the "after" value 115 * is >= the "before" value; if not, increment the high-32 value. 116 * 117 * this method is limited to 1 rollover, and based on the fixed 118 * stick-frequency (5555555), requires the loads to complete within 119 * 773 seconds; incrementing the high-32 value will not overflow for 120 * about 52644 years. 121 * 122 * writing stick requires 2 stores; if the old/new low-32 value is 123 * near 0xffffffff, there could be another rollover (also rare). 124 * to prevent this, we first write a 0 to the low-32, then write 125 * new values to the high-32 then the low-32. 126 * 127 * When we detect a carry in the lower %stick register, we need to 128 * read HST_HIGH again. However at the point where we detect this, 129 * we need to rebuild the register address HST_HIGH.This involves more 130 * than one instructions and a branch is unavoidable. However, most of 131 * the time, there is no carry. So we take the penalty of a branch 132 * instruction only when there is carry (less frequent). 133 * 134 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41(). 135 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since 136 * addr already points to HST_LOW. 137 * 138 * NOTE: this method requires disabling interrupts before using 139 * DELTA_NATIVE_TIME. 140 */ 141#define GET_NATIVE_TIME(out, scr, tmp) \ 142 SETL41(scr, HST_LOW); \ 143 ldxa [scr]ASI_IO, tmp; \ 144 inc HST_DIFF, scr; \ 145 ldxa [scr]ASI_IO, out; \ 146 dec HST_DIFF, scr; \ 147 ldxa [scr]ASI_IO, scr; \ 148 sub scr, tmp, tmp; \ 149 brlz,pn tmp, .-(SETL41_OFF+24); \ 150 sllx out, 32, out; \ 151 or out, scr, out 152#define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \ 153 SETL41(addr, HST_LOW); \ 154 ldxa [addr]ASI_IO, tmp; \ 155 inc HST_DIFF, addr; \ 156 ldxa [addr]ASI_IO, high; \ 157 dec HST_DIFF, addr; \ 158 ldxa [addr]ASI_IO, low; \ 159 sub low, tmp, tmp; \ 160 brlz,pn tmp, .-24; \ 161 sllx high, 32, high; \ 162 or high, low, high; \ 163 add high, delta, high; \ 164 srl high, 0, low; \ 165 srlx high, 32, high; \ 166 stxa %g0, [addr]ASI_IO; \ 167 inc HST_DIFF, addr; \ 168 stxa high, [addr]ASI_IO; \ 169 dec HST_DIFF, addr; \ 170 stxa low, [addr]ASI_IO 171#define RD_TICKCMPR(out, scr) \ 172 SETL41(scr, HSTC_LOW); \ 173 ldxa [scr]ASI_IO, out; \ 174 inc HST_DIFF, scr; \ 175 ldxa [scr]ASI_IO, scr; \ 176 sllx scr, 32, scr; \ 177 or scr, out, out 178#define WR_TICKCMPR(in, scra, scrd, label) \ 179 SETL41(scra, HSTC_HIGH); \ 180 srlx in, 32, scrd; \ 181 stxa scrd, [scra]ASI_IO; \ 182 dec HST_DIFF, scra; \ 183 stxa in, [scra]ASI_IO 184 185#else /* !CHEETAH && !HUMMINGBIRD */ 186 187#define GET_NATIVE_TIME(out, scr1, scr2) \ 188 rdpr %tick, out 189#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 190 rdpr %tick, reg; \ 191 add reg, delta, reg; \ 192 wrpr reg, %tick 193#define RD_TICKCMPR(out, scr) \ 194 rd TICK_COMPARE, out 195#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */ 196/* 197 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules. 198 * The failure occurs only when the following instruction decodes to wr or 199 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE 200 * with a read, thus stalling the pipe and keeping following instructions 201 * from causing data corruption. Aligning to a quadword will ensure these 202 * two instructions are not split due to i$ misses. 203 */ 204#define WR_TICKCMPR(cmpr,scr1,scr2,label) \ 205 ba,a .bb_errata_1.label ;\ 206 .align 64 ;\ 207.bb_errata_1.label: ;\ 208 wr cmpr, TICK_COMPARE ;\ 209 rd TICK_COMPARE, %g0 210#else /* BB_ERRATA_1 */ 211#define WR_TICKCMPR(in,scr1,scr2,label) \ 212 wr in, TICK_COMPARE 213#endif /* BB_ERRATA_1 */ 214 215#endif /* !CHEETAH && !HUMMINGBIRD */ 216 217#include <sys/clock.h> 218 219#if defined(lint) 220#include <sys/types.h> 221#include <sys/scb.h> 222#include <sys/systm.h> 223#include <sys/regset.h> 224#include <sys/sunddi.h> 225#include <sys/lockstat.h> 226#endif /* lint */ 227 228 229#include <sys/asm_linkage.h> 230#include <sys/privregs.h> 231#include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */ 232#include <sys/machthread.h> 233#include <sys/clock.h> 234#include <sys/intreg.h> 235#include <sys/psr_compat.h> 236#include <sys/isa_defs.h> 237#include <sys/dditypes.h> 238#include <sys/intr.h> 239 240#if !defined(lint) 241#include "assym.h" 242#endif /* !lint */ 243 244#if defined(lint) 245 246uint_t 247get_impl(void) 248{ return (0); } 249 250#else /* lint */ 251 252 ENTRY(get_impl) 253 GET_CPU_IMPL(%o0) 254 retl 255 nop 256 SET_SIZE(get_impl) 257 258#endif /* lint */ 259 260#if defined(lint) 261/* 262 * Softint generated when counter field of tick reg matches value field 263 * of tick_cmpr reg 264 */ 265/*ARGSUSED*/ 266void 267tickcmpr_set(uint64_t clock_cycles) 268{} 269 270#else /* lint */ 271 272 ENTRY_NP(tickcmpr_set) 273 ! get 64-bit clock_cycles interval 274 mov %o0, %o2 275 mov 8, %o3 ! A reasonable initial step size 2761: 277 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR 278 279 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the 280 sllx %o0, 1, %o0 ! value we wrote was in the future. 281 srlx %o0, 1, %o0 282 283 cmp %o2, %o0 ! If the value we wrote was in the 284 bg,pt %xcc, 2f ! future, then blow out of here. 285 sllx %o3, 1, %o3 ! If not, then double our step size, 286 ba,pt %xcc, 1b ! and take another lap. 287 add %o0, %o3, %o2 ! 2882: 289 retl 290 nop 291 SET_SIZE(tickcmpr_set) 292 293#endif /* lint */ 294 295#if defined(lint) 296 297void 298tickcmpr_disable(void) 299{} 300 301#else /* lint */ 302 303 ENTRY_NP(tickcmpr_disable) 304 mov 1, %g1 305 sllx %g1, TICKINT_DIS_SHFT, %o0 306 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR 307 retl 308 nop 309 SET_SIZE(tickcmpr_disable) 310 311#endif /* lint */ 312 313#if defined(lint) 314 315/* 316 * tick_write_delta() increments %tick by the specified delta. This should 317 * only be called after a CPR event to assure that gethrtime() continues to 318 * increase monotonically. Obviously, writing %tick needs to de done very 319 * carefully to avoid introducing unnecessary %tick skew across CPUs. For 320 * this reason, we make sure we're i-cache hot before actually writing to 321 * %tick. 322 */ 323/*ARGSUSED*/ 324void 325tick_write_delta(uint64_t delta) 326{} 327 328#else /* lint */ 329 330#ifdef DEBUG 331 .seg ".text" 332tick_write_panic: 333 .asciz "tick_write_delta: interrupts already disabled on entry" 334#endif /* DEBUG */ 335 336 ENTRY_NP(tick_write_delta) 337 rdpr %pstate, %g1 338#ifdef DEBUG 339 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 340 bnz 0f ! aren't already disabled. 341 sethi %hi(tick_write_panic), %o1 342 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller 343 call panic 344 or %i1, %lo(tick_write_panic), %o0 345#endif /* DEBUG */ 3460: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 347 mov %o0, %o2 348 ba 0f ! Branch to cache line-aligned instr. 349 nop 350 .align 16 3510: nop ! The next 3 instructions are now hot. 352 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick 353 354 retl ! Return 355 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts 356#endif /* lint */ 357 358#if defined(lint) 359/* 360 * return 1 if disabled 361 */ 362 363int 364tickcmpr_disabled(void) 365{ return (0); } 366 367#else /* lint */ 368 369 ENTRY_NP(tickcmpr_disabled) 370 RD_TICKCMPR(%g1, %o0) 371 retl 372 srlx %g1, TICKINT_DIS_SHFT, %o0 373 SET_SIZE(tickcmpr_disabled) 374 375#endif /* lint */ 376 377/* 378 * Get current tick 379 */ 380#if defined(lint) 381 382u_longlong_t 383gettick(void) 384{ return (0); } 385 386#else /* lint */ 387 388 ENTRY(gettick) 389 GET_NATIVE_TIME(%o0, %o2, %o3) 390 retl 391 nop 392 SET_SIZE(gettick) 393 394#endif /* lint */ 395 396 397/* 398 * Return the counter portion of the tick register. 399 */ 400 401#if defined(lint) 402 403uint64_t 404gettick_counter(void) 405{ return(0); } 406 407#else /* lint */ 408 409 ENTRY_NP(gettick_counter) 410 rdpr %tick, %o0 411 sllx %o0, 1, %o0 412 retl 413 srlx %o0, 1, %o0 ! shake off npt bit 414 SET_SIZE(gettick_counter) 415#endif /* lint */ 416 417/* 418 * Provide a C callable interface to the trap that reads the hi-res timer. 419 * Returns 64-bit nanosecond timestamp in %o0 and %o1. 420 */ 421 422#if defined(lint) 423 424hrtime_t 425gethrtime(void) 426{ 427 return ((hrtime_t)0); 428} 429 430hrtime_t 431gethrtime_unscaled(void) 432{ 433 return ((hrtime_t)0); 434} 435 436hrtime_t 437gethrtime_max(void) 438{ 439 return ((hrtime_t)0); 440} 441 442void 443scalehrtime(hrtime_t *hrt) 444{ 445 *hrt = 0; 446} 447 448void 449gethrestime(timespec_t *tp) 450{ 451 tp->tv_sec = 0; 452 tp->tv_nsec = 0; 453} 454 455time_t 456gethrestime_sec(void) 457{ 458 return (0); 459} 460 461void 462gethrestime_lasttick(timespec_t *tp) 463{ 464 tp->tv_sec = 0; 465 tp->tv_nsec = 0; 466} 467 468/*ARGSUSED*/ 469void 470hres_tick(void) 471{ 472} 473 474void 475panic_hres_tick(void) 476{ 477} 478 479#else /* lint */ 480 481 ENTRY_NP(gethrtime) 482 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2) 483 ! %g1 = hrtime 484 retl 485 mov %g1, %o0 486 SET_SIZE(gethrtime) 487 488 ENTRY_NP(gethrtime_unscaled) 489 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 490 retl 491 mov %g1, %o0 492 SET_SIZE(gethrtime_unscaled) 493 494 ENTRY_NP(gethrtime_waitfree) 495 ALTENTRY(dtrace_gethrtime) 496 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 497 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3) 498 retl 499 mov %g1, %o0 500 SET_SIZE(dtrace_gethrtime) 501 SET_SIZE(gethrtime_waitfree) 502 503 ENTRY(gethrtime_max) 504 NATIVE_TIME_MAX(%g1) 505 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1) 506 507 ! hrtime_t's are signed, max hrtime_t must be positive 508 mov -1, %o2 509 brlz,a %g1, 1f 510 srlx %o2, 1, %g1 5111: 512 retl 513 mov %g1, %o0 514 SET_SIZE(gethrtime_max) 515 516 ENTRY(scalehrtime) 517 ldx [%o0], %o1 518 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3) 519 retl 520 stx %o1, [%o0] 521 SET_SIZE(scalehrtime) 522 523/* 524 * Fast trap to return a timestamp, uses trap window, leaves traps 525 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1. 526 * 527 * This is the handler for the ST_GETHRTIME trap. 528 */ 529 530 ENTRY_NP(get_timestamp) 531 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime 532 srlx %g1, 32, %o0 ! %o0 = hi32(%g1) 533 srl %g1, 0, %o1 ! %o1 = lo32(%g1) 534 FAST_TRAP_DONE 535 SET_SIZE(get_timestamp) 536 537/* 538 * Macro to convert GET_HRESTIME() bits into a timestamp. 539 * 540 * We use two separate macros so that the platform-dependent GET_HRESTIME() 541 * can be as small as possible; CONV_HRESTIME() implements the generic part. 542 */ 543#define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \ 544 brz,pt adj, 3f; /* no adjustments, it's easy */ \ 545 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \ 546 brlz,pn adj, 2f; /* if hrestime_adj negative */ \ 547 srl nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \ 548 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \ 549 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \ 550 ba 3f; /* go convert to sec/nsec */ \ 551 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \ 5522: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \ 553 bge,a,pt %xcc, 3f; /* is adj less negative? */ \ 554 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \ 555 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \ 5563: cmp hrestnsec, nano; /* more than a billion? */ \ 557 bl,pt %xcc, 4f; /* if not, we're done */ \ 558 nop; /* delay: do nothing :( */ \ 559 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \ 560 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \ 5614: 562 563 ENTRY_NP(gethrestime) 564 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 565 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5) 566 stn %o1, [%o0] 567 retl 568 stn %o2, [%o0 + CLONGSIZE] 569 SET_SIZE(gethrestime) 570 571/* 572 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime 573 * seconds. 574 */ 575 ENTRY_NP(gethrestime_sec) 576 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 577 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5) 578 retl ! %o0 current hrestime seconds 579 nop 580 SET_SIZE(gethrestime_sec) 581 582/* 583 * Returns the hrestime on the last tick. This is simpler than gethrestime() 584 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick() 585 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME, 586 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't 587 * rely on load dependencies to effect the membar #LoadLoad, instead declaring 588 * it explicitly.) 589 */ 590 ENTRY_NP(gethrestime_lasttick) 591 sethi %hi(hres_lock), %o1 5920: 593 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value 594 membar #LoadLoad ! Load of lock must complete 595 andn %o2, 1, %o2 ! Mask off lowest bit 596 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds. 597 add %o1, %lo(hrestime), %o4 598 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds. 599 membar #LoadLoad ! All loads must complete 600 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value 601 cmp %o3, %o2 ! If lock is locked or has 602 bne 0b ! changed, retry. 603 stn %g1, [%o0] ! Delay: store seconds 604 retl 605 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds 606 SET_SIZE(gethrestime_lasttick) 607 608/* 609 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1. 610 * 611 * This is the handler for the ST_GETHRESTIME trap. 612 */ 613 614 ENTRY_NP(get_hrestime) 615 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3) 616 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3) 617 FAST_TRAP_DONE 618 SET_SIZE(get_hrestime) 619 620/* 621 * Fast trap to return lwp virtual time, uses trap window, leaves traps 622 * disabled. Returns a 64-bit number in %o0:%o1, which is the number 623 * of nanoseconds consumed. 624 * 625 * This is the handler for the ST_GETHRVTIME trap. 626 * 627 * Register usage: 628 * %o0, %o1 = return lwp virtual time 629 * %o2 = CPU/thread 630 * %o3 = lwp 631 * %g1 = scratch 632 * %g5 = scratch 633 */ 634 ENTRY_NP(get_virtime) 635 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks 636 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2 637 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2 638 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3 639 640 /* 641 * Subtract start time of current microstate from time 642 * of day to get increment for lwp virtual time. 643 */ 644 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start 645 sub %g5, %g1, %g5 646 647 /* 648 * Add current value of ms_acct[LMS_USER] 649 */ 650 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER] 651 add %g5, %g1, %g5 652 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 653 654 srl %g5, 0, %o1 ! %o1 = lo32(%g5) 655 srlx %g5, 32, %o0 ! %o0 = hi32(%g5) 656 657 FAST_TRAP_DONE 658 SET_SIZE(get_virtime) 659 660 661 662 .seg ".text" 663hrtime_base_panic: 664 .asciz "hrtime_base stepping back" 665 666 667 ENTRY_NP(hres_tick) 668 save %sp, -SA(MINFRAME), %sp ! get a new window 669 670 sethi %hi(hrestime), %l4 671 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking 6727: tst %l5 673 bz,pt %xcc, 8f ! if we got it, drive on 674 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor 675 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6769: tst %l5 677 bz,a,pn %xcc, 7b 678 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 679 ba,pt %xcc, 9b 680 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6818: 682 membar #StoreLoad|#StoreStore 683 684 ! 685 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale). 686 ! 687 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base 688 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time 689 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current 690 ! convert native time to nsecs 691 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT) 692 693 sub %l0, %g1, %i1 ! get accurate nsec delta 694 695 ldx [%l4 + %lo(hrtime_base)], %l1 696 cmp %l1, %l0 697 bg,pn %xcc, 9f 698 nop 699 700 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base 701 702 ! 703 ! apply adjustment, if any 704 ! 705 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj 706 brz %l0, 2f 707 ! hrestime_adj == 0 ? 708 ! yes, skip adjustments 709 clr %l5 ! delay: set adj to zero 710 tst %l0 ! is hrestime_adj >= 0 ? 711 bge,pt %xcc, 1f ! yes, go handle positive case 712 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj 713 714 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ? 715 bl,pt %xcc, 2f ! yes, use current adj 716 neg %l5 ! delay: %l5 = -adj 717 ba,pt %xcc, 2f 718 mov %l0, %l5 ! no, so set adj = hrestime_adj 7191: 720 subcc %l0, %l5, %g0 ! hrestime_adj < adj ? 721 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj 722 mov %l0, %l5 ! delay: adj = hrestime_adj 7232: 724 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta 725 sub %l0, %l5, %l0 ! timedelta -= adj 726 727 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta 728 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta 729 730 or %l4, %lo(hrestime), %l2 731 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec 732 ldn [%l2 + CLONGSIZE], %i3 733 add %i3, %l5, %i3 ! hrestime.nsec += adj 734 add %i3, %i1, %i3 ! hrestime.nsec += nslt 735 736 set NANOSEC, %l5 ! %l5 = NANOSEC 737 cmp %i3, %l5 738 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC 739 sethi %hi(one_sec), %i1 ! delay 740 add %i2, 0x1, %i2 ! hrestime.tv_sec++ 741 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC 742 mov 0x1, %l5 743 st %l5, [%i1 + %lo(one_sec)] 7445: 745 stn %i2, [%l2] 746 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime 747 748 membar #StoreStore 749 750 ld [%l4 + %lo(hres_lock)], %i1 751 inc %i1 ! release lock 752 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock 753 754 ret 755 restore 756 7579: 758 ! 759 ! release hres_lock 760 ! 761 ld [%l4 + %lo(hres_lock)], %i1 762 inc %i1 763 st %i1, [%l4 + %lo(hres_lock)] 764 765 sethi %hi(hrtime_base_panic), %o0 766 call panic 767 or %o0, %lo(hrtime_base_panic), %o0 768 769 SET_SIZE(hres_tick) 770 771#endif /* lint */ 772 773#if !defined(lint) && !defined(__lint) 774 775 .seg ".text" 776kstat_q_panic_msg: 777 .asciz "kstat_q_exit: qlen == 0" 778 779 ENTRY(kstat_q_panic) 780 save %sp, -SA(MINFRAME), %sp 781 sethi %hi(kstat_q_panic_msg), %o0 782 call panic 783 or %o0, %lo(kstat_q_panic_msg), %o0 784 /*NOTREACHED*/ 785 SET_SIZE(kstat_q_panic) 786 787#define BRZPN brz,pn 788#define BRZPT brz,pt 789 790#define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \ 791 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \ 792 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 793 QBR %o1, QZERO; /* done if qlen == 0 */ \ 794 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \ 795 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \ 796 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \ 797 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \ 798 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 799 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 800 add %o4, %o2, %o4; /* %o4 = new time */ \ 801 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 802 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \ 803 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \ 804QRETURN; \ 805 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */ 806 807 .align 16 808 ENTRY(kstat_waitq_enter) 809 GET_NATIVE_TIME(%g1, %g2, %g3) 810 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 811 SET_SIZE(kstat_waitq_enter) 812 813 .align 16 814 ENTRY(kstat_waitq_exit) 815 GET_NATIVE_TIME(%g1, %g2, %g3) 816 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W) 817 SET_SIZE(kstat_waitq_exit) 818 819 .align 16 820 ENTRY(kstat_runq_enter) 821 GET_NATIVE_TIME(%g1, %g2, %g3) 822 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 823 SET_SIZE(kstat_runq_enter) 824 825 .align 16 826 ENTRY(kstat_runq_exit) 827 GET_NATIVE_TIME(%g1, %g2, %g3) 828 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R) 829 SET_SIZE(kstat_runq_exit) 830 831 .align 16 832 ENTRY(kstat_waitq_to_runq) 833 GET_NATIVE_TIME(%g1, %g2, %g3) 834 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W) 835 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 836 SET_SIZE(kstat_waitq_to_runq) 837 838 .align 16 839 ENTRY(kstat_runq_back_to_waitq) 840 GET_NATIVE_TIME(%g1, %g2, %g3) 841 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R) 842 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 843 SET_SIZE(kstat_runq_back_to_waitq) 844 845#endif /* !(lint || __lint) */ 846 847#ifdef lint 848 849int64_t timedelta; 850hrtime_t hres_last_tick; 851timestruc_t hrestime; 852int64_t hrestime_adj; 853int hres_lock; 854uint_t nsec_scale; 855hrtime_t hrtime_base; 856int traptrace_use_stick; 857 858#else /* lint */ 859 /* 860 * -- WARNING -- 861 * 862 * The following variables MUST be together on a 128-byte boundary. 863 * In addition to the primary performance motivation (having them all 864 * on the same cache line(s)), code here and in the GET*TIME() macros 865 * assumes that they all have the same high 22 address bits (so 866 * there's only one sethi). 867 */ 868 .seg ".data" 869 .global timedelta, hres_last_tick, hrestime, hrestime_adj 870 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick 871 .global nsec_shift, adj_shift 872 873 /* XXX - above comment claims 128-bytes is necessary */ 874 .align 64 875timedelta: 876 .word 0, 0 /* int64_t */ 877hres_last_tick: 878 .word 0, 0 /* hrtime_t */ 879hrestime: 880 .nword 0, 0 /* 2 longs */ 881hrestime_adj: 882 .word 0, 0 /* int64_t */ 883hres_lock: 884 .word 0 885nsec_scale: 886 .word 0 887hrtime_base: 888 .word 0, 0 889traptrace_use_stick: 890 .word 0 891nsec_shift: 892 .word NSEC_SHIFT 893adj_shift: 894 .word ADJ_SHIFT 895 896#endif /* lint */ 897 898 899/* 900 * drv_usecwait(clock_t n) [DDI/DKI - section 9F] 901 * usec_delay(int n) [compatibility - should go one day] 902 * Delay by spinning. 903 * 904 * delay for n microseconds. numbers <= 0 delay 1 usec 905 * 906 * With UltraSPARC-III the combination of supporting mixed-speed CPUs 907 * and variable clock rate for power management requires that we 908 * use %stick to implement this routine. 909 * 910 * For OPL platforms that support the "sleep" instruction, we 911 * conditionally (ifdef'ed) insert a "sleep" instruction in 912 * the loop. Note that theoritically we should have move (duplicated) 913 * the code down to spitfire/us3/opl specific asm files - but this 914 * is alot of code duplication just to add one "sleep" instruction. 915 * We chose less code duplication for this. 916 */ 917 918#if defined(lint) 919 920/*ARGSUSED*/ 921void 922drv_usecwait(clock_t n) 923{} 924 925/*ARGSUSED*/ 926void 927usec_delay(int n) 928{} 929 930#else /* lint */ 931 932 ENTRY(drv_usecwait) 933 ALTENTRY(usec_delay) 934 brlez,a,pn %o0, 0f 935 mov 1, %o0 9360: 937 sethi %hi(sticks_per_usec), %o1 938 lduw [%o1 + %lo(sticks_per_usec)], %o1 939 mulx %o1, %o0, %o1 ! Scale usec to ticks 940 inc %o1 ! We don't start on a tick edge 941 GET_NATIVE_TIME(%o2, %o3, %o4) 942 add %o1, %o2, %o1 943 9441: 945#ifdef _OPL 946 .word 0x81b01060 ! insert "sleep" instruction 947#endif /* _OPL */ ! use byte code for now 948 cmp %o1, %o2 949 GET_NATIVE_TIME(%o2, %o3, %o4) 950 bgeu,pt %xcc, 1b 951 nop 952 retl 953 nop 954 SET_SIZE(usec_delay) 955 SET_SIZE(drv_usecwait) 956#endif /* lint */ 957 958#if defined(lint) 959 960/* ARGSUSED */ 961void 962pil14_interrupt(int level) 963{} 964 965#else /* lint */ 966 967/* 968 * Level-14 interrupt prologue. 969 */ 970 ENTRY_NP(pil14_interrupt) 971 CPU_ADDR(%g1, %g2) 972 rdpr %pil, %g6 ! %g6 = interrupted PIL 973 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL 974 rdpr %tstate, %g6 975 rdpr %tpc, %g5 976 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 977 bnz,a,pt %xcc, 1f 978 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC 979 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC 980 ba pil_interrupt_common ! must be large-disp branch 981 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC 9821: ba pil_interrupt_common ! must be large-disp branch 983 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC 984 SET_SIZE(pil14_interrupt) 985 986 ENTRY_NP(tick_rtt) 987 ! 988 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is 989 ! disabled. If TICK_COMPARE is enabled, we know that we need to 990 ! reenqueue the interrupt request structure. We'll then check TICKINT 991 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE 992 ! interrupt. In this case, TICK_COMPARE may have been rewritten 993 ! recently; we'll compare %o5 to the current time to verify that it's 994 ! in the future. 995 ! 996 ! Note that %o5 is live until after 1f. 997 ! XXX - there is a subroutine call while %o5 is live! 998 ! 999 RD_TICKCMPR(%o5, %g1) 1000 srlx %o5, TICKINT_DIS_SHFT, %g1 1001 brnz,pt %g1, 2f 1002 nop 1003 1004 rdpr %pstate, %g5 1005 andn %g5, PSTATE_IE, %g1 1006 wrpr %g0, %g1, %pstate ! Disable vec interrupts 1007 1008 sethi %hi(cbe_level14_inum), %o1 1009 ld [%o1 + %lo(cbe_level14_inum)], %o1 1010 call intr_enqueue_req ! preserves %o5 and %g5 1011 mov PIL_14, %o0 1012 1013 ! Check SOFTINT for TICKINT/STICKINT 1014 rd SOFTINT, %o4 1015 set (TICK_INT_MASK | STICK_INT_MASK), %o0 1016 andcc %o4, %o0, %g0 1017 bz,a,pn %icc, 2f 1018 wrpr %g0, %g5, %pstate ! Enable vec interrupts 1019 1020 ! clear TICKINT/STICKINT 1021 wr %o0, CLEAR_SOFTINT 1022 1023 ! 1024 ! Now that we've cleared TICKINT, we can reread %tick and confirm 1025 ! that the value we programmed is still in the future. If it isn't, 1026 ! we need to reprogram TICK_COMPARE to fire as soon as possible. 1027 ! 1028 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1029 sllx %o0, 1, %o0 ! Clear the DIS bit 1030 srlx %o0, 1, %o0 1031 cmp %o5, %o0 ! In the future? 1032 bg,a,pt %xcc, 2f ! Yes, drive on. 1033 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1034 1035 ! 1036 ! If we're here, then we have programmed TICK_COMPARE with a %tick 1037 ! which is in the past; we'll now load an initial step size, and loop 1038 ! until we've managed to program TICK_COMPARE to fire in the future. 1039 ! 1040 mov 8, %o4 ! 8 = arbitrary inital step 10411: add %o0, %o4, %o5 ! Add the step 1042 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR 1043 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1044 sllx %o0, 1, %o0 ! Clear the DIS bit 1045 srlx %o0, 1, %o0 1046 cmp %o5, %o0 ! In the future? 1047 bg,a,pt %xcc, 2f ! Yes, drive on. 1048 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1049 ba 1b ! No, try again. 1050 sllx %o4, 1, %o4 ! delay: double step size 1051 10522: ba current_thread_complete 1053 nop 1054 SET_SIZE(tick_rtt) 1055 1056#endif /* lint */ 1057 1058#if defined(lint) || defined(__lint) 1059 1060/* ARGSUSED */ 1061uint64_t 1062find_cpufrequency(volatile uchar_t *clock_ptr) 1063{ 1064 return (0); 1065} 1066 1067#else /* lint */ 1068 1069#ifdef DEBUG 1070 .seg ".text" 1071find_cpufreq_panic: 1072 .asciz "find_cpufrequency: interrupts already disabled on entry" 1073#endif /* DEBUG */ 1074 1075 ENTRY_NP(find_cpufrequency) 1076 rdpr %pstate, %g1 1077 1078#ifdef DEBUG 1079 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 1080 bnz 0f ! are currently enabled 1081 sethi %hi(find_cpufreq_panic), %o1 1082 call panic 1083 or %o1, %lo(find_cpufreq_panic), %o0 1084#endif /* DEBUG */ 1085 10860: 1087 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 10883: 1089 ldub [%o0], %o1 ! Read the number of seconds 1090 mov %o1, %o2 ! remember initial value in %o2 10911: 1092 GET_NATIVE_TIME(%o3, %g4, %g5) 1093 cmp %o1, %o2 ! did the seconds register roll over? 1094 be,pt %icc, 1b ! branch back if unchanged 1095 ldub [%o0], %o2 ! delay: load the new seconds val 1096 1097 brz,pn %o2, 3b ! if the minutes just rolled over, 1098 ! the last second could have been 1099 ! inaccurate; try again. 1100 mov %o2, %o4 ! delay: store init. val. in %o2 11012: 1102 GET_NATIVE_TIME(%o5, %g4, %g5) 1103 cmp %o2, %o4 ! did the seconds register roll over? 1104 be,pt %icc, 2b ! branch back if unchanged 1105 ldub [%o0], %o4 ! delay: load the new seconds val 1106 1107 brz,pn %o4, 0b ! if the minutes just rolled over, 1108 ! the last second could have been 1109 ! inaccurate; try again. 1110 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts 1111 1112 retl 1113 sub %o5, %o3, %o0 ! return the difference in ticks 1114 SET_SIZE(find_cpufrequency) 1115 1116#endif /* lint */ 1117 1118#if defined(lint) 1119/* 1120 * Prefetch a page_t for write or read, this assumes a linear 1121 * scan of sequential page_t's. 1122 */ 1123/*ARGSUSED*/ 1124void 1125prefetch_page_w(void *pp) 1126{} 1127 1128/*ARGSUSED*/ 1129void 1130prefetch_page_r(void *pp) 1131{} 1132#else /* lint */ 1133 1134#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1135 defined(SERRANO) 1136 ! 1137 ! On US-III, the prefetch instruction queue is 8 entries deep. 1138 ! Also, prefetches for write put data in the E$, which has 1139 ! lines of 512 bytes for an 8MB cache. Each E$ line is further 1140 ! subblocked into 64 byte chunks. 1141 ! 1142 ! Since prefetch can only bring in 64 bytes at a time (See Sparc 1143 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes, 1144 ! then 2 prefetches are required in order to bring an entire 1145 ! page into the E$. 1146 ! 1147 ! Since the prefetch queue is 8 entries deep, we currently can 1148 ! only have 4 prefetches for page_t's outstanding. Thus, we 1149 ! prefetch n+4 ahead of where we are now: 1150 ! 1151 ! 4 * sizeof(page_t) -> 512 1152 ! 4 * sizeof(page_t) +64 -> 576 1153 ! 1154 ! Example 1155 ! ======= 1156 ! contiguous page array in memory... 1157 ! 1158 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|... 1159 ! ^ ^ ^ ^ ^ ^ 1160 ! pp | pp+4*sizeof(page)+64 1161 ! | 1162 ! pp+4*sizeof(page) 1163 ! 1164 ! Prefetch 1165 ! Queue 1166 ! +-------+<--- In this iteration, we're working with pp (AAA1), 1167 ! |Preftch| but we enqueue prefetch for addr = XXX1 1168 ! | XXX1 | 1169 ! +-------+<--- this queue slot will be a prefetch instruction for 1170 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second 1171 ! | XXX2 | half of page XXX) 1172 ! +-------+ 1173 ! |Preftch|<-+- The next time around this function, we'll be 1174 ! | YYY1 | | working with pp = BBB1, but will be enqueueing 1175 ! +-------+ | prefetches to for both halves of page YYY, 1176 ! |Preftch| | while both halves of page XXX are in transit 1177 ! | YYY2 |<-+ make their way into the E$. 1178 ! +-------+ 1179 ! |Preftch| 1180 ! | ZZZ1 | 1181 ! +-------+ 1182 ! . . 1183 ! : : 1184 ! 1185 ! E$ 1186 ! +============================================... 1187 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 | 1188 ! +============================================... 1189 ! | | | | | | | 1190 ! +============================================... 1191 ! . 1192 ! : 1193 ! 1194 ! So we should expect the first four page accesses to stall 1195 ! while we warm up the cache, afterwhich, most of the pages 1196 ! will have their pp ready in the E$. 1197 ! 1198 ! Also note that if sizeof(page_t) grows beyond 128, then 1199 ! we'll need an additional prefetch to get an entire page 1200 ! into the E$, thus reducing the number of outstanding page 1201 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots) 1202 ! etc. 1203 ! 1204 ! Cheetah+ 1205 ! ======== 1206 ! On Cheetah+ we use "#n_write" prefetches as these avoid 1207 ! unnecessary RTS->RTO bus transaction state change, and 1208 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta 1209 ! PRM). On Cheetah, #n_write prefetches are reflected with 1210 ! RTS->RTO state transition regardless. 1211 ! 1212#define STRIDE1 512 1213#define STRIDE2 576 1214 1215#if STRIDE1 != (PAGE_SIZE * 4) 1216#error "STRIDE1 != (PAGE_SIZE * 4)" 1217#endif /* STRIDE1 != (PAGE_SIZE * 4) */ 1218 1219 ENTRY(prefetch_page_w) 1220 prefetch [%o0+STRIDE1], #n_writes 1221 retl 1222 prefetch [%o0+STRIDE2], #n_writes 1223 SET_SIZE(prefetch_page_w) 1224 1225 ! 1226 ! Note on CHEETAH to prefetch for read, we really use #one_write. 1227 ! This fetches to E$ (general use) rather than P$ (floating point use). 1228 ! 1229 ENTRY(prefetch_page_r) 1230 prefetch [%o0+STRIDE1], #one_write 1231 retl 1232 prefetch [%o0+STRIDE2], #one_write 1233 SET_SIZE(prefetch_page_r) 1234 1235#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1236 1237 ! 1238 ! UltraSparcII can have up to 3 prefetches outstanding. 1239 ! A page_t is 128 bytes (2 prefetches of 64 bytes each) 1240 ! So prefetch for pp + 1, which is 1241 ! 1242 ! pp + sizeof(page_t) 1243 ! and 1244 ! pp + sizeof(page_t) + 64 1245 ! 1246#define STRIDE1 128 1247#define STRIDE2 192 1248 1249#if STRIDE1 != PAGE_SIZE 1250#error "STRIDE1 != PAGE_SIZE" 1251#endif /* STRIDE1 != PAGE_SIZE */ 1252 1253 ENTRY(prefetch_page_w) 1254 prefetch [%o0+STRIDE1], #n_writes 1255 retl 1256 prefetch [%o0+STRIDE2], #n_writes 1257 SET_SIZE(prefetch_page_w) 1258 1259 ENTRY(prefetch_page_r) 1260 prefetch [%o0+STRIDE1], #n_reads 1261 retl 1262 prefetch [%o0+STRIDE2], #n_reads 1263 SET_SIZE(prefetch_page_r) 1264 1265#elif defined(OLYMPUS_C) 1266 ! 1267 ! Prefetch strides for Olympus-C 1268 ! 1269 1270#define STRIDE1 512 1271#define STRIDE2 640 1272 1273 ENTRY(prefetch_page_w) 1274 prefetch [%o0+STRIDE1], #n_writes 1275 retl 1276 prefetch [%o0+STRIDE2], #n_writes 1277 SET_SIZE(prefetch_page_w) 1278 1279 ENTRY(prefetch_page_r) 1280 prefetch [%o0+STRIDE1], #n_writes 1281 retl 1282 prefetch [%o0+STRIDE2], #n_writes 1283 SET_SIZE(prefetch_page_r) 1284#else /* OLYMPUS_C */ 1285 1286#error "You need to fix this for your new cpu type." 1287 1288#endif /* OLYMPUS_C */ 1289 1290#endif /* lint */ 1291 1292#if defined(lint) 1293/* 1294 * Prefetch struct smap for write. 1295 */ 1296/*ARGSUSED*/ 1297void 1298prefetch_smap_w(void *smp) 1299{} 1300#else /* lint */ 1301 1302#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1303 defined(SERRANO) 1304 1305#define PREFETCH_Q_LEN 8 1306 1307#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1308 1309#define PREFETCH_Q_LEN 3 1310 1311#elif defined(OLYMPUS_C) 1312 ! 1313 ! (TBD) Use length of one for now. 1314#define PREFETCH_Q_LEN 1 1315 1316#else /* OLYMPUS_C */ 1317 1318#error You need to fix this for your new cpu type. 1319 1320#endif /* OLYMPUS_C */ 1321 1322#include <vm/kpm.h> 1323 1324#ifdef SEGKPM_SUPPORT 1325 1326#define SMAP_SIZE 72 1327#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64) 1328 1329#else /* SEGKPM_SUPPORT */ 1330 1331 ! 1332 ! The hardware will prefetch the 64 byte cache aligned block 1333 ! that contains the address specified in the prefetch instruction. 1334 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch 1335 ! per pass will suffice as long as we prefetch far enough ahead to 1336 ! make sure we don't stall for the cases where the smap object 1337 ! spans multiple hardware prefetch blocks. Let's prefetch as far 1338 ! ahead as the hardware will allow. 1339 ! 1340 ! The smap array is processed with decreasing address pointers. 1341 ! 1342#define SMAP_SIZE 48 1343#define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE) 1344 1345#endif /* SEGKPM_SUPPORT */ 1346 1347 ENTRY(prefetch_smap_w) 1348 retl 1349 prefetch [%o0-SMAP_STRIDE], #n_writes 1350 SET_SIZE(prefetch_smap_w) 1351 1352#endif /* lint */ 1353 1354#if defined(lint) || defined(__lint) 1355 1356/* ARGSUSED */ 1357uint64_t 1358getidsr(void) 1359{ return 0; } 1360 1361#else /* lint */ 1362 1363 ENTRY_NP(getidsr) 1364 retl 1365 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0 1366 SET_SIZE(getidsr) 1367 1368#endif /* lint */ 1369