1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29#if !defined(lint) 30#include "assym.h" 31#endif /* !lint */ 32 33/* 34 * General assembly language routines. 35 * It is the intent of this file to contain routines that are 36 * specific to cpu architecture. 37 */ 38 39/* 40 * WARNING: If you add a fast trap handler which can be invoked by a 41 * non-privileged user, you may have to use the FAST_TRAP_DONE macro 42 * instead of "done" instruction to return back to the user mode. See 43 * comments for the "fast_trap_done" entry point for more information. 44 */ 45#define FAST_TRAP_DONE \ 46 ba,a fast_trap_done 47 48/* 49 * Override GET_NATIVE_TIME for the cpu module code. This is not 50 * guaranteed to be exactly one instruction, be careful of using 51 * the macro in delay slots. 52 * 53 * Do not use any instruction that modifies condition codes as the 54 * caller may depend on these to remain unchanged across the macro. 55 */ 56#if defined(CHEETAH) 57 58#define GET_NATIVE_TIME(out, scr1, scr2) \ 59 rd STICK, out 60#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 61 rd STICK, reg; \ 62 add reg, delta, reg; \ 63 wr reg, STICK 64#define RD_TICKCMPR(out, scr) \ 65 rd STICK_COMPARE, out 66#define WR_TICKCMPR(in, scr1, scr2, label) \ 67 wr in, STICK_COMPARE 68 69#elif defined(HUMMINGBIRD) 70#include <sys/spitregs.h> 71 72/* 73 * the current hummingbird version of %stick and %stick_cmp 74 * were both implemented as (2) 32-bit locations in ASI_IO space; 75 * the hdwr should support atomic r/w; meanwhile: ugly alert! ... 76 * 77 * 64-bit opcodes are required, but move only 32-bits: 78 * 79 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst 80 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys 81 * 82 * reg equivalent [phys]ASI_IO 83 * ------------------ --------------- 84 * %stick_cmp low-32 0x1FE.0000.F060 85 * %stick_cmp high-32 0x1FE.0000.F068 86 * %stick low-32 0x1FE.0000.F070 87 * %stick high-32 0x1FE.0000.F078 88 */ 89#define HSTC_LOW 0x60 /* stick_cmp low 32-bits */ 90#define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */ 91#define HST_LOW 0x70 /* stick low 32-bits */ 92#define HST_HIGH 0x78 /* stick high 32-bits */ 93#define HST_DIFF 0x08 /* low<-->high diff */ 94 95/* 96 * Any change in the number of instructions in SETL41() 97 * will affect SETL41_OFF 98 */ 99#define SETL41(reg, byte) \ 100 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \ 101 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \ 102 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \ 103 or reg, byte, reg; /* 0000.01FE.0000.F0xx */ 104 105/* 106 * SETL41_OFF is used to calulate the relative PC value when a 107 * branch instruction needs to go over SETL41() macro 108 */ 109#define SETL41_OFF 16 110 111/* 112 * reading stick requires 2 loads, and there could be an intervening 113 * low-to-high 32-bit rollover resulting in a return value that is 114 * off by about (2 ^ 32); this rare case is prevented by re-reading 115 * the low-32 bits after the high-32 and verifying the "after" value 116 * is >= the "before" value; if not, increment the high-32 value. 117 * 118 * this method is limited to 1 rollover, and based on the fixed 119 * stick-frequency (5555555), requires the loads to complete within 120 * 773 seconds; incrementing the high-32 value will not overflow for 121 * about 52644 years. 122 * 123 * writing stick requires 2 stores; if the old/new low-32 value is 124 * near 0xffffffff, there could be another rollover (also rare). 125 * to prevent this, we first write a 0 to the low-32, then write 126 * new values to the high-32 then the low-32. 127 * 128 * When we detect a carry in the lower %stick register, we need to 129 * read HST_HIGH again. However at the point where we detect this, 130 * we need to rebuild the register address HST_HIGH.This involves more 131 * than one instructions and a branch is unavoidable. However, most of 132 * the time, there is no carry. So we take the penalty of a branch 133 * instruction only when there is carry (less frequent). 134 * 135 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41(). 136 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since 137 * addr already points to HST_LOW. 138 * 139 * NOTE: this method requires disabling interrupts before using 140 * DELTA_NATIVE_TIME. 141 */ 142#define GET_NATIVE_TIME(out, scr, tmp) \ 143 SETL41(scr, HST_LOW); \ 144 ldxa [scr]ASI_IO, tmp; \ 145 inc HST_DIFF, scr; \ 146 ldxa [scr]ASI_IO, out; \ 147 dec HST_DIFF, scr; \ 148 ldxa [scr]ASI_IO, scr; \ 149 sub scr, tmp, tmp; \ 150 brlz,pn tmp, .-(SETL41_OFF+24); \ 151 sllx out, 32, out; \ 152 or out, scr, out 153#define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \ 154 SETL41(addr, HST_LOW); \ 155 ldxa [addr]ASI_IO, tmp; \ 156 inc HST_DIFF, addr; \ 157 ldxa [addr]ASI_IO, high; \ 158 dec HST_DIFF, addr; \ 159 ldxa [addr]ASI_IO, low; \ 160 sub low, tmp, tmp; \ 161 brlz,pn tmp, .-24; \ 162 sllx high, 32, high; \ 163 or high, low, high; \ 164 add high, delta, high; \ 165 srl high, 0, low; \ 166 srlx high, 32, high; \ 167 stxa %g0, [addr]ASI_IO; \ 168 inc HST_DIFF, addr; \ 169 stxa high, [addr]ASI_IO; \ 170 dec HST_DIFF, addr; \ 171 stxa low, [addr]ASI_IO 172#define RD_TICKCMPR(out, scr) \ 173 SETL41(scr, HSTC_LOW); \ 174 ldxa [scr]ASI_IO, out; \ 175 inc HST_DIFF, scr; \ 176 ldxa [scr]ASI_IO, scr; \ 177 sllx scr, 32, scr; \ 178 or scr, out, out 179#define WR_TICKCMPR(in, scra, scrd, label) \ 180 SETL41(scra, HSTC_HIGH); \ 181 srlx in, 32, scrd; \ 182 stxa scrd, [scra]ASI_IO; \ 183 dec HST_DIFF, scra; \ 184 stxa in, [scra]ASI_IO 185 186#else /* !CHEETAH && !HUMMINGBIRD */ 187 188#define GET_NATIVE_TIME(out, scr1, scr2) \ 189 rdpr %tick, out 190#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 191 rdpr %tick, reg; \ 192 add reg, delta, reg; \ 193 wrpr reg, %tick 194#define RD_TICKCMPR(out, scr) \ 195 rd TICK_COMPARE, out 196#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */ 197/* 198 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules. 199 * The failure occurs only when the following instruction decodes to wr or 200 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE 201 * with a read, thus stalling the pipe and keeping following instructions 202 * from causing data corruption. Aligning to a quadword will ensure these 203 * two instructions are not split due to i$ misses. 204 */ 205#define WR_TICKCMPR(cmpr,scr1,scr2,label) \ 206 ba,a .bb_errata_1.label ;\ 207 .align 64 ;\ 208.bb_errata_1.label: ;\ 209 wr cmpr, TICK_COMPARE ;\ 210 rd TICK_COMPARE, %g0 211#else /* BB_ERRATA_1 */ 212#define WR_TICKCMPR(in,scr1,scr2,label) \ 213 wr in, TICK_COMPARE 214#endif /* BB_ERRATA_1 */ 215 216#endif /* !CHEETAH && !HUMMINGBIRD */ 217 218#include <sys/clock.h> 219 220#if defined(lint) 221#include <sys/types.h> 222#include <sys/scb.h> 223#include <sys/systm.h> 224#include <sys/regset.h> 225#include <sys/sunddi.h> 226#include <sys/lockstat.h> 227#endif /* lint */ 228 229 230#include <sys/asm_linkage.h> 231#include <sys/privregs.h> 232#include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */ 233#include <sys/machthread.h> 234#include <sys/clock.h> 235#include <sys/intreg.h> 236#include <sys/psr_compat.h> 237#include <sys/isa_defs.h> 238#include <sys/dditypes.h> 239#include <sys/intr.h> 240 241#if !defined(lint) 242#include "assym.h" 243#endif /* !lint */ 244 245#if defined(lint) 246 247uint_t 248get_impl(void) 249{ return (0); } 250 251#else /* lint */ 252 253 ENTRY(get_impl) 254 GET_CPU_IMPL(%o0) 255 retl 256 nop 257 SET_SIZE(get_impl) 258 259#endif /* lint */ 260 261#if defined(lint) 262/* 263 * Softint generated when counter field of tick reg matches value field 264 * of tick_cmpr reg 265 */ 266/*ARGSUSED*/ 267void 268tickcmpr_set(uint64_t clock_cycles) 269{} 270 271#else /* lint */ 272 273 ENTRY_NP(tickcmpr_set) 274 ! get 64-bit clock_cycles interval 275 mov %o0, %o2 276 mov 8, %o3 ! A reasonable initial step size 2771: 278 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR 279 280 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the 281 sllx %o0, 1, %o0 ! value we wrote was in the future. 282 srlx %o0, 1, %o0 283 284 cmp %o2, %o0 ! If the value we wrote was in the 285 bg,pt %xcc, 2f ! future, then blow out of here. 286 sllx %o3, 1, %o3 ! If not, then double our step size, 287 ba,pt %xcc, 1b ! and take another lap. 288 add %o0, %o3, %o2 ! 2892: 290 retl 291 nop 292 SET_SIZE(tickcmpr_set) 293 294#endif /* lint */ 295 296#if defined(lint) 297 298void 299tickcmpr_disable(void) 300{} 301 302#else /* lint */ 303 304 ENTRY_NP(tickcmpr_disable) 305 mov 1, %g1 306 sllx %g1, TICKINT_DIS_SHFT, %o0 307 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR 308 retl 309 nop 310 SET_SIZE(tickcmpr_disable) 311 312#endif /* lint */ 313 314#if defined(lint) 315 316/* 317 * tick_write_delta() increments %tick by the specified delta. This should 318 * only be called after a CPR event to assure that gethrtime() continues to 319 * increase monotonically. Obviously, writing %tick needs to de done very 320 * carefully to avoid introducing unnecessary %tick skew across CPUs. For 321 * this reason, we make sure we're i-cache hot before actually writing to 322 * %tick. 323 */ 324/*ARGSUSED*/ 325void 326tick_write_delta(uint64_t delta) 327{} 328 329#else /* lint */ 330 331#ifdef DEBUG 332 .seg ".text" 333tick_write_panic: 334 .asciz "tick_write_delta: interrupts already disabled on entry" 335#endif /* DEBUG */ 336 337 ENTRY_NP(tick_write_delta) 338 rdpr %pstate, %g1 339#ifdef DEBUG 340 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 341 bnz 0f ! aren't already disabled. 342 sethi %hi(tick_write_panic), %o1 343 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller 344 call panic 345 or %i1, %lo(tick_write_panic), %o0 346#endif /* DEBUG */ 3470: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 348 mov %o0, %o2 349 ba 0f ! Branch to cache line-aligned instr. 350 nop 351 .align 16 3520: nop ! The next 3 instructions are now hot. 353 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick 354 355 retl ! Return 356 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts 357#endif /* lint */ 358 359#if defined(lint) 360/* 361 * return 1 if disabled 362 */ 363 364int 365tickcmpr_disabled(void) 366{ return (0); } 367 368#else /* lint */ 369 370 ENTRY_NP(tickcmpr_disabled) 371 RD_TICKCMPR(%g1, %o0) 372 retl 373 srlx %g1, TICKINT_DIS_SHFT, %o0 374 SET_SIZE(tickcmpr_disabled) 375 376#endif /* lint */ 377 378/* 379 * Get current tick 380 */ 381#if defined(lint) 382 383u_longlong_t 384gettick(void) 385{ return (0); } 386 387#else /* lint */ 388 389 ENTRY(gettick) 390 GET_NATIVE_TIME(%o0, %o2, %o3) 391 retl 392 nop 393 SET_SIZE(gettick) 394 395#endif /* lint */ 396 397 398/* 399 * Return the counter portion of the tick register. 400 */ 401 402#if defined(lint) 403 404uint64_t 405gettick_counter(void) 406{ return(0); } 407 408#else /* lint */ 409 410 ENTRY_NP(gettick_counter) 411 rdpr %tick, %o0 412 sllx %o0, 1, %o0 413 retl 414 srlx %o0, 1, %o0 ! shake off npt bit 415 SET_SIZE(gettick_counter) 416#endif /* lint */ 417 418/* 419 * Provide a C callable interface to the trap that reads the hi-res timer. 420 * Returns 64-bit nanosecond timestamp in %o0 and %o1. 421 */ 422 423#if defined(lint) 424 425hrtime_t 426gethrtime(void) 427{ 428 return ((hrtime_t)0); 429} 430 431hrtime_t 432gethrtime_unscaled(void) 433{ 434 return ((hrtime_t)0); 435} 436 437hrtime_t 438gethrtime_max(void) 439{ 440 return ((hrtime_t)0); 441} 442 443void 444scalehrtime(hrtime_t *hrt) 445{ 446 *hrt = 0; 447} 448 449void 450gethrestime(timespec_t *tp) 451{ 452 tp->tv_sec = 0; 453 tp->tv_nsec = 0; 454} 455 456time_t 457gethrestime_sec(void) 458{ 459 return (0); 460} 461 462void 463gethrestime_lasttick(timespec_t *tp) 464{ 465 tp->tv_sec = 0; 466 tp->tv_nsec = 0; 467} 468 469/*ARGSUSED*/ 470void 471hres_tick(void) 472{ 473} 474 475void 476panic_hres_tick(void) 477{ 478} 479 480#else /* lint */ 481 482 ENTRY_NP(gethrtime) 483 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2) 484 ! %g1 = hrtime 485 retl 486 mov %g1, %o0 487 SET_SIZE(gethrtime) 488 489 ENTRY_NP(gethrtime_unscaled) 490 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 491 retl 492 mov %g1, %o0 493 SET_SIZE(gethrtime_unscaled) 494 495 ENTRY_NP(gethrtime_waitfree) 496 ALTENTRY(dtrace_gethrtime) 497 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 498 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3) 499 retl 500 mov %g1, %o0 501 SET_SIZE(dtrace_gethrtime) 502 SET_SIZE(gethrtime_waitfree) 503 504 ENTRY(gethrtime_max) 505 NATIVE_TIME_MAX(%g1) 506 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1) 507 508 ! hrtime_t's are signed, max hrtime_t must be positive 509 mov -1, %o2 510 brlz,a %g1, 1f 511 srlx %o2, 1, %g1 5121: 513 retl 514 mov %g1, %o0 515 SET_SIZE(gethrtime_max) 516 517 ENTRY(scalehrtime) 518 ldx [%o0], %o1 519 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3) 520 retl 521 stx %o1, [%o0] 522 SET_SIZE(scalehrtime) 523 524/* 525 * Fast trap to return a timestamp, uses trap window, leaves traps 526 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1. 527 * 528 * This is the handler for the ST_GETHRTIME trap. 529 */ 530 531 ENTRY_NP(get_timestamp) 532 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime 533 srlx %g1, 32, %o0 ! %o0 = hi32(%g1) 534 srl %g1, 0, %o1 ! %o1 = lo32(%g1) 535 FAST_TRAP_DONE 536 SET_SIZE(get_timestamp) 537 538/* 539 * Macro to convert GET_HRESTIME() bits into a timestamp. 540 * 541 * We use two separate macros so that the platform-dependent GET_HRESTIME() 542 * can be as small as possible; CONV_HRESTIME() implements the generic part. 543 */ 544#define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \ 545 brz,pt adj, 3f; /* no adjustments, it's easy */ \ 546 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \ 547 brlz,pn adj, 2f; /* if hrestime_adj negative */ \ 548 srl nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \ 549 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \ 550 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \ 551 ba 3f; /* go convert to sec/nsec */ \ 552 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \ 5532: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \ 554 bge,a,pt %xcc, 3f; /* is adj less negative? */ \ 555 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \ 556 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \ 5573: cmp hrestnsec, nano; /* more than a billion? */ \ 558 bl,pt %xcc, 4f; /* if not, we're done */ \ 559 nop; /* delay: do nothing :( */ \ 560 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \ 561 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \ 5624: 563 564 ENTRY_NP(gethrestime) 565 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 566 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5) 567 stn %o1, [%o0] 568 retl 569 stn %o2, [%o0 + CLONGSIZE] 570 SET_SIZE(gethrestime) 571 572/* 573 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime 574 * seconds. 575 */ 576 ENTRY_NP(gethrestime_sec) 577 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 578 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5) 579 retl ! %o0 current hrestime seconds 580 nop 581 SET_SIZE(gethrestime_sec) 582 583/* 584 * Returns the hrestime on the last tick. This is simpler than gethrestime() 585 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick() 586 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME, 587 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't 588 * rely on load dependencies to effect the membar #LoadLoad, instead declaring 589 * it explicitly.) 590 */ 591 ENTRY_NP(gethrestime_lasttick) 592 sethi %hi(hres_lock), %o1 5930: 594 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value 595 membar #LoadLoad ! Load of lock must complete 596 andn %o2, 1, %o2 ! Mask off lowest bit 597 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds. 598 add %o1, %lo(hrestime), %o4 599 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds. 600 membar #LoadLoad ! All loads must complete 601 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value 602 cmp %o3, %o2 ! If lock is locked or has 603 bne 0b ! changed, retry. 604 stn %g1, [%o0] ! Delay: store seconds 605 retl 606 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds 607 SET_SIZE(gethrestime_lasttick) 608 609/* 610 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1. 611 * 612 * This is the handler for the ST_GETHRESTIME trap. 613 */ 614 615 ENTRY_NP(get_hrestime) 616 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3) 617 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3) 618 FAST_TRAP_DONE 619 SET_SIZE(get_hrestime) 620 621/* 622 * Fast trap to return lwp virtual time, uses trap window, leaves traps 623 * disabled. Returns a 64-bit number in %o0:%o1, which is the number 624 * of nanoseconds consumed. 625 * 626 * This is the handler for the ST_GETHRVTIME trap. 627 * 628 * Register usage: 629 * %o0, %o1 = return lwp virtual time 630 * %o2 = CPU/thread 631 * %o3 = lwp 632 * %g1 = scratch 633 * %g5 = scratch 634 */ 635 ENTRY_NP(get_virtime) 636 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks 637 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2 638 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2 639 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3 640 641 /* 642 * Subtract start time of current microstate from time 643 * of day to get increment for lwp virtual time. 644 */ 645 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start 646 sub %g5, %g1, %g5 647 648 /* 649 * Add current value of ms_acct[LMS_USER] 650 */ 651 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER] 652 add %g5, %g1, %g5 653 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 654 655 srl %g5, 0, %o1 ! %o1 = lo32(%g5) 656 srlx %g5, 32, %o0 ! %o0 = hi32(%g5) 657 658 FAST_TRAP_DONE 659 SET_SIZE(get_virtime) 660 661 662 663 .seg ".text" 664hrtime_base_panic: 665 .asciz "hrtime_base stepping back" 666 667 668 ENTRY_NP(hres_tick) 669 save %sp, -SA(MINFRAME), %sp ! get a new window 670 671 sethi %hi(hrestime), %l4 672 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking 6737: tst %l5 674 bz,pt %xcc, 8f ! if we got it, drive on 675 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor 676 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6779: tst %l5 678 bz,a,pn %xcc, 7b 679 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 680 ba,pt %xcc, 9b 681 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 6828: 683 membar #StoreLoad|#StoreStore 684 685 ! 686 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale). 687 ! 688 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base 689 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time 690 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current 691 ! convert native time to nsecs 692 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT) 693 694 sub %l0, %g1, %i1 ! get accurate nsec delta 695 696 ldx [%l4 + %lo(hrtime_base)], %l1 697 cmp %l1, %l0 698 bg,pn %xcc, 9f 699 nop 700 701 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base 702 703 ! 704 ! apply adjustment, if any 705 ! 706 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj 707 brz %l0, 2f 708 ! hrestime_adj == 0 ? 709 ! yes, skip adjustments 710 clr %l5 ! delay: set adj to zero 711 tst %l0 ! is hrestime_adj >= 0 ? 712 bge,pt %xcc, 1f ! yes, go handle positive case 713 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj 714 715 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ? 716 bl,pt %xcc, 2f ! yes, use current adj 717 neg %l5 ! delay: %l5 = -adj 718 ba,pt %xcc, 2f 719 mov %l0, %l5 ! no, so set adj = hrestime_adj 7201: 721 subcc %l0, %l5, %g0 ! hrestime_adj < adj ? 722 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj 723 mov %l0, %l5 ! delay: adj = hrestime_adj 7242: 725 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta 726 sub %l0, %l5, %l0 ! timedelta -= adj 727 728 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta 729 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta 730 731 or %l4, %lo(hrestime), %l2 732 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec 733 ldn [%l2 + CLONGSIZE], %i3 734 add %i3, %l5, %i3 ! hrestime.nsec += adj 735 add %i3, %i1, %i3 ! hrestime.nsec += nslt 736 737 set NANOSEC, %l5 ! %l5 = NANOSEC 738 cmp %i3, %l5 739 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC 740 sethi %hi(one_sec), %i1 ! delay 741 add %i2, 0x1, %i2 ! hrestime.tv_sec++ 742 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC 743 mov 0x1, %l5 744 st %l5, [%i1 + %lo(one_sec)] 7455: 746 stn %i2, [%l2] 747 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime 748 749 membar #StoreStore 750 751 ld [%l4 + %lo(hres_lock)], %i1 752 inc %i1 ! release lock 753 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock 754 755 ret 756 restore 757 7589: 759 ! 760 ! release hres_lock 761 ! 762 ld [%l4 + %lo(hres_lock)], %i1 763 inc %i1 764 st %i1, [%l4 + %lo(hres_lock)] 765 766 sethi %hi(hrtime_base_panic), %o0 767 call panic 768 or %o0, %lo(hrtime_base_panic), %o0 769 770 SET_SIZE(hres_tick) 771 772#endif /* lint */ 773 774#if !defined(lint) && !defined(__lint) 775 776 .seg ".text" 777kstat_q_panic_msg: 778 .asciz "kstat_q_exit: qlen == 0" 779 780 ENTRY(kstat_q_panic) 781 save %sp, -SA(MINFRAME), %sp 782 sethi %hi(kstat_q_panic_msg), %o0 783 call panic 784 or %o0, %lo(kstat_q_panic_msg), %o0 785 /*NOTREACHED*/ 786 SET_SIZE(kstat_q_panic) 787 788#define BRZPN brz,pn 789#define BRZPT brz,pt 790 791#define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \ 792 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \ 793 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 794 QBR %o1, QZERO; /* done if qlen == 0 */ \ 795 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \ 796 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \ 797 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \ 798 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \ 799 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 800 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 801 add %o4, %o2, %o4; /* %o4 = new time */ \ 802 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 803 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \ 804 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \ 805QRETURN; \ 806 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */ 807 808 .align 16 809 ENTRY(kstat_waitq_enter) 810 GET_NATIVE_TIME(%g1, %g2, %g3) 811 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 812 SET_SIZE(kstat_waitq_enter) 813 814 .align 16 815 ENTRY(kstat_waitq_exit) 816 GET_NATIVE_TIME(%g1, %g2, %g3) 817 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W) 818 SET_SIZE(kstat_waitq_exit) 819 820 .align 16 821 ENTRY(kstat_runq_enter) 822 GET_NATIVE_TIME(%g1, %g2, %g3) 823 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 824 SET_SIZE(kstat_runq_enter) 825 826 .align 16 827 ENTRY(kstat_runq_exit) 828 GET_NATIVE_TIME(%g1, %g2, %g3) 829 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R) 830 SET_SIZE(kstat_runq_exit) 831 832 .align 16 833 ENTRY(kstat_waitq_to_runq) 834 GET_NATIVE_TIME(%g1, %g2, %g3) 835 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W) 836 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 837 SET_SIZE(kstat_waitq_to_runq) 838 839 .align 16 840 ENTRY(kstat_runq_back_to_waitq) 841 GET_NATIVE_TIME(%g1, %g2, %g3) 842 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R) 843 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 844 SET_SIZE(kstat_runq_back_to_waitq) 845 846#endif /* !(lint || __lint) */ 847 848#ifdef lint 849 850int64_t timedelta; 851hrtime_t hres_last_tick; 852timestruc_t hrestime; 853int64_t hrestime_adj; 854int hres_lock; 855uint_t nsec_scale; 856hrtime_t hrtime_base; 857int traptrace_use_stick; 858 859#else /* lint */ 860 /* 861 * -- WARNING -- 862 * 863 * The following variables MUST be together on a 128-byte boundary. 864 * In addition to the primary performance motivation (having them all 865 * on the same cache line(s)), code here and in the GET*TIME() macros 866 * assumes that they all have the same high 22 address bits (so 867 * there's only one sethi). 868 */ 869 .seg ".data" 870 .global timedelta, hres_last_tick, hrestime, hrestime_adj 871 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick 872 .global nsec_shift, adj_shift 873 874 /* XXX - above comment claims 128-bytes is necessary */ 875 .align 64 876timedelta: 877 .word 0, 0 /* int64_t */ 878hres_last_tick: 879 .word 0, 0 /* hrtime_t */ 880hrestime: 881 .nword 0, 0 /* 2 longs */ 882hrestime_adj: 883 .word 0, 0 /* int64_t */ 884hres_lock: 885 .word 0 886nsec_scale: 887 .word 0 888hrtime_base: 889 .word 0, 0 890traptrace_use_stick: 891 .word 0 892nsec_shift: 893 .word NSEC_SHIFT 894adj_shift: 895 .word ADJ_SHIFT 896 897#endif /* lint */ 898 899 900/* 901 * drv_usecwait(clock_t n) [DDI/DKI - section 9F] 902 * usec_delay(int n) [compatibility - should go one day] 903 * Delay by spinning. 904 * 905 * delay for n microseconds. numbers <= 0 delay 1 usec 906 * 907 * With UltraSPARC-III the combination of supporting mixed-speed CPUs 908 * and variable clock rate for power management requires that we 909 * use %stick to implement this routine. 910 */ 911 912#if defined(lint) 913 914/*ARGSUSED*/ 915void 916drv_usecwait(clock_t n) 917{} 918 919/*ARGSUSED*/ 920void 921usec_delay(int n) 922{} 923 924#else /* lint */ 925 926 ENTRY(drv_usecwait) 927 ALTENTRY(usec_delay) 928 brlez,a,pn %o0, 0f 929 mov 1, %o0 9300: 931 sethi %hi(sticks_per_usec), %o1 932 lduw [%o1 + %lo(sticks_per_usec)], %o1 933 mulx %o1, %o0, %o1 ! Scale usec to ticks 934 inc %o1 ! We don't start on a tick edge 935 GET_NATIVE_TIME(%o2, %o3, %o4) 936 add %o1, %o2, %o1 937 9381: cmp %o1, %o2 939 GET_NATIVE_TIME(%o2, %o3, %o4) 940 bgeu,pt %xcc, 1b 941 nop 942 retl 943 nop 944 SET_SIZE(usec_delay) 945 SET_SIZE(drv_usecwait) 946#endif /* lint */ 947 948#if defined(lint) 949 950/* ARGSUSED */ 951void 952pil14_interrupt(int level) 953{} 954 955#else /* lint */ 956 957/* 958 * Level-14 interrupt prologue. 959 */ 960 ENTRY_NP(pil14_interrupt) 961 CPU_ADDR(%g1, %g2) 962 rdpr %pil, %g6 ! %g6 = interrupted PIL 963 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL 964 rdpr %tstate, %g6 965 rdpr %tpc, %g5 966 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 967 bnz,a,pt %xcc, 1f 968 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC 969 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC 970 ba pil_interrupt_common ! must be large-disp branch 971 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC 9721: ba pil_interrupt_common ! must be large-disp branch 973 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC 974 SET_SIZE(pil14_interrupt) 975 976 ENTRY_NP(tick_rtt) 977 ! 978 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is 979 ! disabled. If TICK_COMPARE is enabled, we know that we need to 980 ! reenqueue the interrupt request structure. We'll then check TICKINT 981 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE 982 ! interrupt. In this case, TICK_COMPARE may have been rewritten 983 ! recently; we'll compare %o5 to the current time to verify that it's 984 ! in the future. 985 ! 986 ! Note that %o5 is live until after 1f. 987 ! XXX - there is a subroutine call while %o5 is live! 988 ! 989 RD_TICKCMPR(%o5, %g1) 990 srlx %o5, TICKINT_DIS_SHFT, %g1 991 brnz,pt %g1, 2f 992 nop 993 994 rdpr %pstate, %g5 995 andn %g5, PSTATE_IE, %g1 996 wrpr %g0, %g1, %pstate ! Disable vec interrupts 997 998 sethi %hi(cbe_level14_inum), %o1 999 ld [%o1 + %lo(cbe_level14_inum)], %o1 1000 call intr_enqueue_req ! preserves %o5 and %g5 1001 mov PIL_14, %o0 1002 1003 ! Check SOFTINT for TICKINT/STICKINT 1004 rd SOFTINT, %o4 1005 set (TICK_INT_MASK | STICK_INT_MASK), %o0 1006 andcc %o4, %o0, %g0 1007 bz,a,pn %icc, 2f 1008 wrpr %g0, %g5, %pstate ! Enable vec interrupts 1009 1010 ! clear TICKINT/STICKINT 1011 wr %o0, CLEAR_SOFTINT 1012 1013 ! 1014 ! Now that we've cleared TICKINT, we can reread %tick and confirm 1015 ! that the value we programmed is still in the future. If it isn't, 1016 ! we need to reprogram TICK_COMPARE to fire as soon as possible. 1017 ! 1018 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1019 sllx %o0, 1, %o0 ! Clear the DIS bit 1020 srlx %o0, 1, %o0 1021 cmp %o5, %o0 ! In the future? 1022 bg,a,pt %xcc, 2f ! Yes, drive on. 1023 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1024 1025 ! 1026 ! If we're here, then we have programmed TICK_COMPARE with a %tick 1027 ! which is in the past; we'll now load an initial step size, and loop 1028 ! until we've managed to program TICK_COMPARE to fire in the future. 1029 ! 1030 mov 8, %o4 ! 8 = arbitrary inital step 10311: add %o0, %o4, %o5 ! Add the step 1032 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR 1033 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 1034 sllx %o0, 1, %o0 ! Clear the DIS bit 1035 srlx %o0, 1, %o0 1036 cmp %o5, %o0 ! In the future? 1037 bg,a,pt %xcc, 2f ! Yes, drive on. 1038 wrpr %g0, %g5, %pstate ! delay: enable vec intr 1039 ba 1b ! No, try again. 1040 sllx %o4, 1, %o4 ! delay: double step size 1041 10422: ba current_thread_complete 1043 nop 1044 SET_SIZE(tick_rtt) 1045 1046#endif /* lint */ 1047 1048#if defined(lint) || defined(__lint) 1049 1050/* ARGSUSED */ 1051uint64_t 1052find_cpufrequency(volatile uchar_t *clock_ptr) 1053{ 1054 return (0); 1055} 1056 1057#else /* lint */ 1058 1059#ifdef DEBUG 1060 .seg ".text" 1061find_cpufreq_panic: 1062 .asciz "find_cpufrequency: interrupts already disabled on entry" 1063#endif /* DEBUG */ 1064 1065 ENTRY_NP(find_cpufrequency) 1066 rdpr %pstate, %g1 1067 1068#ifdef DEBUG 1069 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 1070 bnz 0f ! are currently enabled 1071 sethi %hi(find_cpufreq_panic), %o1 1072 call panic 1073 or %o1, %lo(find_cpufreq_panic), %o0 1074#endif /* DEBUG */ 1075 10760: 1077 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 10783: 1079 ldub [%o0], %o1 ! Read the number of seconds 1080 mov %o1, %o2 ! remember initial value in %o2 10811: 1082 GET_NATIVE_TIME(%o3, %g4, %g5) 1083 cmp %o1, %o2 ! did the seconds register roll over? 1084 be,pt %icc, 1b ! branch back if unchanged 1085 ldub [%o0], %o2 ! delay: load the new seconds val 1086 1087 brz,pn %o2, 3b ! if the minutes just rolled over, 1088 ! the last second could have been 1089 ! inaccurate; try again. 1090 mov %o2, %o4 ! delay: store init. val. in %o2 10912: 1092 GET_NATIVE_TIME(%o5, %g4, %g5) 1093 cmp %o2, %o4 ! did the seconds register roll over? 1094 be,pt %icc, 2b ! branch back if unchanged 1095 ldub [%o0], %o4 ! delay: load the new seconds val 1096 1097 brz,pn %o4, 0b ! if the minutes just rolled over, 1098 ! the last second could have been 1099 ! inaccurate; try again. 1100 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts 1101 1102 retl 1103 sub %o5, %o3, %o0 ! return the difference in ticks 1104 SET_SIZE(find_cpufrequency) 1105 1106#endif /* lint */ 1107 1108#if defined(lint) 1109/* 1110 * Prefetch a page_t for write or read, this assumes a linear 1111 * scan of sequential page_t's. 1112 */ 1113/*ARGSUSED*/ 1114void 1115prefetch_page_w(void *pp) 1116{} 1117 1118/*ARGSUSED*/ 1119void 1120prefetch_page_r(void *pp) 1121{} 1122#else /* lint */ 1123 1124#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1125 defined(SERRANO) 1126 ! 1127 ! On US-III, the prefetch instruction queue is 8 entries deep. 1128 ! Also, prefetches for write put data in the E$, which has 1129 ! lines of 512 bytes for an 8MB cache. Each E$ line is further 1130 ! subblocked into 64 byte chunks. 1131 ! 1132 ! Since prefetch can only bring in 64 bytes at a time (See Sparc 1133 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes, 1134 ! then 2 prefetches are required in order to bring an entire 1135 ! page into the E$. 1136 ! 1137 ! Since the prefetch queue is 8 entries deep, we currently can 1138 ! only have 4 prefetches for page_t's outstanding. Thus, we 1139 ! prefetch n+4 ahead of where we are now: 1140 ! 1141 ! 4 * sizeof(page_t) -> 512 1142 ! 4 * sizeof(page_t) +64 -> 576 1143 ! 1144 ! Example 1145 ! ======= 1146 ! contiguous page array in memory... 1147 ! 1148 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|... 1149 ! ^ ^ ^ ^ ^ ^ 1150 ! pp | pp+4*sizeof(page)+64 1151 ! | 1152 ! pp+4*sizeof(page) 1153 ! 1154 ! Prefetch 1155 ! Queue 1156 ! +-------+<--- In this iteration, we're working with pp (AAA1), 1157 ! |Preftch| but we enqueue prefetch for addr = XXX1 1158 ! | XXX1 | 1159 ! +-------+<--- this queue slot will be a prefetch instruction for 1160 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second 1161 ! | XXX2 | half of page XXX) 1162 ! +-------+ 1163 ! |Preftch|<-+- The next time around this function, we'll be 1164 ! | YYY1 | | working with pp = BBB1, but will be enqueueing 1165 ! +-------+ | prefetches to for both halves of page YYY, 1166 ! |Preftch| | while both halves of page XXX are in transit 1167 ! | YYY2 |<-+ make their way into the E$. 1168 ! +-------+ 1169 ! |Preftch| 1170 ! | ZZZ1 | 1171 ! +-------+ 1172 ! . . 1173 ! : : 1174 ! 1175 ! E$ 1176 ! +============================================... 1177 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 | 1178 ! +============================================... 1179 ! | | | | | | | 1180 ! +============================================... 1181 ! . 1182 ! : 1183 ! 1184 ! So we should expect the first four page accesses to stall 1185 ! while we warm up the cache, afterwhich, most of the pages 1186 ! will have their pp ready in the E$. 1187 ! 1188 ! Also note that if sizeof(page_t) grows beyond 128, then 1189 ! we'll need an additional prefetch to get an entire page 1190 ! into the E$, thus reducing the number of outstanding page 1191 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots) 1192 ! etc. 1193 ! 1194 ! Cheetah+ 1195 ! ======== 1196 ! On Cheetah+ we use "#n_write" prefetches as these avoid 1197 ! unnecessary RTS->RTO bus transaction state change, and 1198 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta 1199 ! PRM). On Cheetah, #n_write prefetches are reflected with 1200 ! RTS->RTO state transition regardless. 1201 ! 1202#define STRIDE1 512 1203#define STRIDE2 576 1204 1205#if STRIDE1 != (PAGE_SIZE * 4) 1206#error "STRIDE1 != (PAGE_SIZE * 4)" 1207#endif /* STRIDE1 != (PAGE_SIZE * 4) */ 1208 1209 ENTRY(prefetch_page_w) 1210 prefetch [%o0+STRIDE1], #n_writes 1211 retl 1212 prefetch [%o0+STRIDE2], #n_writes 1213 SET_SIZE(prefetch_page_w) 1214 1215 ! 1216 ! Note on CHEETAH to prefetch for read, we really use #one_write. 1217 ! This fetches to E$ (general use) rather than P$ (floating point use). 1218 ! 1219 ENTRY(prefetch_page_r) 1220 prefetch [%o0+STRIDE1], #one_write 1221 retl 1222 prefetch [%o0+STRIDE2], #one_write 1223 SET_SIZE(prefetch_page_r) 1224 1225#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1226 1227 ! 1228 ! UltraSparcII can have up to 3 prefetches outstanding. 1229 ! A page_t is 128 bytes (2 prefetches of 64 bytes each) 1230 ! So prefetch for pp + 1, which is 1231 ! 1232 ! pp + sizeof(page_t) 1233 ! and 1234 ! pp + sizeof(page_t) + 64 1235 ! 1236#define STRIDE1 128 1237#define STRIDE2 192 1238 1239#if STRIDE1 != PAGE_SIZE 1240#error "STRIDE1 != PAGE_SIZE" 1241#endif /* STRIDE1 != PAGE_SIZE */ 1242 1243 ENTRY(prefetch_page_w) 1244 prefetch [%o0+STRIDE1], #n_writes 1245 retl 1246 prefetch [%o0+STRIDE2], #n_writes 1247 SET_SIZE(prefetch_page_w) 1248 1249 ENTRY(prefetch_page_r) 1250 prefetch [%o0+STRIDE1], #n_reads 1251 retl 1252 prefetch [%o0+STRIDE2], #n_reads 1253 SET_SIZE(prefetch_page_r) 1254#else /* SPITFIRE || HUMMINGBIRD */ 1255 1256#error "You need to fix this for your new cpu type." 1257 1258#endif /* SPITFIRE || HUMMINGBIRD */ 1259 1260#endif /* lint */ 1261 1262#if defined(lint) 1263/* 1264 * Prefetch struct smap for write. 1265 */ 1266/*ARGSUSED*/ 1267void 1268prefetch_smap_w(void *smp) 1269{} 1270#else /* lint */ 1271 1272#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1273 defined(SERRANO) 1274 1275#define PREFETCH_Q_LEN 8 1276 1277#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1278 1279#define PREFETCH_Q_LEN 3 1280 1281#else /* SPITFIRE || HUMMINGBIRD */ 1282 1283#error You need to fix this for your new cpu type. 1284 1285#endif /* SPITFIRE || HUMMINGBIRD */ 1286 1287#include <vm/kpm.h> 1288 1289#ifdef SEGKPM_SUPPORT 1290 1291#define SMAP_SIZE 72 1292#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64) 1293 1294#else /* SEGKPM_SUPPORT */ 1295 1296 ! 1297 ! The hardware will prefetch the 64 byte cache aligned block 1298 ! that contains the address specified in the prefetch instruction. 1299 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch 1300 ! per pass will suffice as long as we prefetch far enough ahead to 1301 ! make sure we don't stall for the cases where the smap object 1302 ! spans multiple hardware prefetch blocks. Let's prefetch as far 1303 ! ahead as the hardware will allow. 1304 ! 1305 ! The smap array is processed with decreasing address pointers. 1306 ! 1307#define SMAP_SIZE 48 1308#define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE) 1309 1310#endif /* SEGKPM_SUPPORT */ 1311 1312 ENTRY(prefetch_smap_w) 1313 retl 1314 prefetch [%o0-SMAP_STRIDE], #n_writes 1315 SET_SIZE(prefetch_smap_w) 1316 1317#endif /* lint */ 1318 1319#if defined(lint) || defined(__lint) 1320 1321/* ARGSUSED */ 1322uint64_t 1323getidsr(void) 1324{ return 0; } 1325 1326#else /* lint */ 1327 1328 ENTRY_NP(getidsr) 1329 retl 1330 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0 1331 SET_SIZE(getidsr) 1332 1333#endif /* lint */ 1334