1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25#include "assym.h" 26 27/* 28 * General assembly language routines. 29 * It is the intent of this file to contain routines that are 30 * specific to cpu architecture. 31 */ 32 33/* 34 * WARNING: If you add a fast trap handler which can be invoked by a 35 * non-privileged user, you may have to use the FAST_TRAP_DONE macro 36 * instead of "done" instruction to return back to the user mode. See 37 * comments for the "fast_trap_done" entry point for more information. 38 */ 39#define FAST_TRAP_DONE \ 40 ba,a fast_trap_done 41 42/* 43 * Override GET_NATIVE_TIME for the cpu module code. This is not 44 * guaranteed to be exactly one instruction, be careful of using 45 * the macro in delay slots. 46 * 47 * Do not use any instruction that modifies condition codes as the 48 * caller may depend on these to remain unchanged across the macro. 49 */ 50#if defined(CHEETAH) || defined(OLYMPUS_C) 51 52#define GET_NATIVE_TIME(out, scr1, scr2) \ 53 rd STICK, out 54#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 55 rd STICK, reg; \ 56 add reg, delta, reg; \ 57 wr reg, STICK 58#define RD_TICKCMPR(out, scr) \ 59 rd STICK_COMPARE, out 60#define WR_TICKCMPR(in, scr1, scr2, label) \ 61 wr in, STICK_COMPARE 62 63#elif defined(HUMMINGBIRD) 64#include <sys/spitregs.h> 65 66/* 67 * the current hummingbird version of %stick and %stick_cmp 68 * were both implemented as (2) 32-bit locations in ASI_IO space; 69 * the hdwr should support atomic r/w; meanwhile: ugly alert! ... 70 * 71 * 64-bit opcodes are required, but move only 32-bits: 72 * 73 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst 74 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys 75 * 76 * reg equivalent [phys]ASI_IO 77 * ------------------ --------------- 78 * %stick_cmp low-32 0x1FE.0000.F060 79 * %stick_cmp high-32 0x1FE.0000.F068 80 * %stick low-32 0x1FE.0000.F070 81 * %stick high-32 0x1FE.0000.F078 82 */ 83#define HSTC_LOW 0x60 /* stick_cmp low 32-bits */ 84#define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */ 85#define HST_LOW 0x70 /* stick low 32-bits */ 86#define HST_HIGH 0x78 /* stick high 32-bits */ 87#define HST_DIFF 0x08 /* low<-->high diff */ 88 89/* 90 * Any change in the number of instructions in SETL41() 91 * will affect SETL41_OFF 92 */ 93#define SETL41(reg, byte) \ 94 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \ 95 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \ 96 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \ 97 or reg, byte, reg; /* 0000.01FE.0000.F0xx */ 98 99/* 100 * SETL41_OFF is used to calulate the relative PC value when a 101 * branch instruction needs to go over SETL41() macro 102 */ 103#define SETL41_OFF 16 104 105/* 106 * reading stick requires 2 loads, and there could be an intervening 107 * low-to-high 32-bit rollover resulting in a return value that is 108 * off by about (2 ^ 32); this rare case is prevented by re-reading 109 * the low-32 bits after the high-32 and verifying the "after" value 110 * is >= the "before" value; if not, increment the high-32 value. 111 * 112 * this method is limited to 1 rollover, and based on the fixed 113 * stick-frequency (5555555), requires the loads to complete within 114 * 773 seconds; incrementing the high-32 value will not overflow for 115 * about 52644 years. 116 * 117 * writing stick requires 2 stores; if the old/new low-32 value is 118 * near 0xffffffff, there could be another rollover (also rare). 119 * to prevent this, we first write a 0 to the low-32, then write 120 * new values to the high-32 then the low-32. 121 * 122 * When we detect a carry in the lower %stick register, we need to 123 * read HST_HIGH again. However at the point where we detect this, 124 * we need to rebuild the register address HST_HIGH.This involves more 125 * than one instructions and a branch is unavoidable. However, most of 126 * the time, there is no carry. So we take the penalty of a branch 127 * instruction only when there is carry (less frequent). 128 * 129 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41(). 130 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since 131 * addr already points to HST_LOW. 132 * 133 * NOTE: this method requires disabling interrupts before using 134 * DELTA_NATIVE_TIME. 135 */ 136#define GET_NATIVE_TIME(out, scr, tmp) \ 137 SETL41(scr, HST_LOW); \ 138 ldxa [scr]ASI_IO, tmp; \ 139 inc HST_DIFF, scr; \ 140 ldxa [scr]ASI_IO, out; \ 141 dec HST_DIFF, scr; \ 142 ldxa [scr]ASI_IO, scr; \ 143 sub scr, tmp, tmp; \ 144 brlz,pn tmp, .-(SETL41_OFF+24); \ 145 sllx out, 32, out; \ 146 or out, scr, out 147#define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \ 148 SETL41(addr, HST_LOW); \ 149 ldxa [addr]ASI_IO, tmp; \ 150 inc HST_DIFF, addr; \ 151 ldxa [addr]ASI_IO, high; \ 152 dec HST_DIFF, addr; \ 153 ldxa [addr]ASI_IO, low; \ 154 sub low, tmp, tmp; \ 155 brlz,pn tmp, .-24; \ 156 sllx high, 32, high; \ 157 or high, low, high; \ 158 add high, delta, high; \ 159 srl high, 0, low; \ 160 srlx high, 32, high; \ 161 stxa %g0, [addr]ASI_IO; \ 162 inc HST_DIFF, addr; \ 163 stxa high, [addr]ASI_IO; \ 164 dec HST_DIFF, addr; \ 165 stxa low, [addr]ASI_IO 166#define RD_TICKCMPR(out, scr) \ 167 SETL41(scr, HSTC_LOW); \ 168 ldxa [scr]ASI_IO, out; \ 169 inc HST_DIFF, scr; \ 170 ldxa [scr]ASI_IO, scr; \ 171 sllx scr, 32, scr; \ 172 or scr, out, out 173#define WR_TICKCMPR(in, scra, scrd, label) \ 174 SETL41(scra, HSTC_HIGH); \ 175 srlx in, 32, scrd; \ 176 stxa scrd, [scra]ASI_IO; \ 177 dec HST_DIFF, scra; \ 178 stxa in, [scra]ASI_IO 179 180#else /* !CHEETAH && !HUMMINGBIRD */ 181 182#define GET_NATIVE_TIME(out, scr1, scr2) \ 183 rdpr %tick, out 184#define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 185 rdpr %tick, reg; \ 186 add reg, delta, reg; \ 187 wrpr reg, %tick 188#define RD_TICKCMPR(out, scr) \ 189 rd TICK_COMPARE, out 190#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */ 191/* 192 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules. 193 * The failure occurs only when the following instruction decodes to wr or 194 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE 195 * with a read, thus stalling the pipe and keeping following instructions 196 * from causing data corruption. Aligning to a quadword will ensure these 197 * two instructions are not split due to i$ misses. 198 */ 199#define WR_TICKCMPR(cmpr,scr1,scr2,label) \ 200 ba,a .bb_errata_1.label ;\ 201 .align 64 ;\ 202.bb_errata_1.label: ;\ 203 wr cmpr, TICK_COMPARE ;\ 204 rd TICK_COMPARE, %g0 205#else /* BB_ERRATA_1 */ 206#define WR_TICKCMPR(in,scr1,scr2,label) \ 207 wr in, TICK_COMPARE 208#endif /* BB_ERRATA_1 */ 209 210#endif /* !CHEETAH && !HUMMINGBIRD */ 211 212#include <sys/clock.h> 213 214 215#include <sys/asm_linkage.h> 216#include <sys/privregs.h> 217#include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */ 218#include <sys/machthread.h> 219#include <sys/clock.h> 220#include <sys/intreg.h> 221#include <sys/psr_compat.h> 222#include <sys/isa_defs.h> 223#include <sys/dditypes.h> 224#include <sys/intr.h> 225 226#include "assym.h" 227 228 ENTRY(get_impl) 229 GET_CPU_IMPL(%o0) 230 retl 231 nop 232 SET_SIZE(get_impl) 233 234/* 235 * Softint generated when counter field of tick reg matches value field 236 * of tick_cmpr reg 237 */ 238 ENTRY_NP(tickcmpr_set) 239 ! get 64-bit clock_cycles interval 240 mov %o0, %o2 241 mov 8, %o3 ! A reasonable initial step size 2421: 243 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR 244 245 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the 246 sllx %o0, 1, %o0 ! value we wrote was in the future. 247 srlx %o0, 1, %o0 248 249 cmp %o2, %o0 ! If the value we wrote was in the 250 bg,pt %xcc, 2f ! future, then blow out of here. 251 sllx %o3, 1, %o3 ! If not, then double our step size, 252 ba,pt %xcc, 1b ! and take another lap. 253 add %o0, %o3, %o2 ! 2542: 255 retl 256 nop 257 SET_SIZE(tickcmpr_set) 258 259 ENTRY_NP(tickcmpr_disable) 260 mov 1, %g1 261 sllx %g1, TICKINT_DIS_SHFT, %o0 262 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR 263 retl 264 nop 265 SET_SIZE(tickcmpr_disable) 266 267#ifdef DEBUG 268 .seg ".text" 269tick_write_panic: 270 .asciz "tick_write_delta: interrupts already disabled on entry" 271#endif /* DEBUG */ 272 273/* 274 * tick_write_delta() increments %tick by the specified delta. This should 275 * only be called after a CPR event to assure that gethrtime() continues to 276 * increase monotonically. Obviously, writing %tick needs to de done very 277 * carefully to avoid introducing unnecessary %tick skew across CPUs. For 278 * this reason, we make sure we're i-cache hot before actually writing to 279 * %tick. 280 */ 281 ENTRY_NP(tick_write_delta) 282 rdpr %pstate, %g1 283#ifdef DEBUG 284 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 285 bnz 0f ! aren't already disabled. 286 sethi %hi(tick_write_panic), %o1 287 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller 288 call panic 289 or %i1, %lo(tick_write_panic), %o0 290#endif /* DEBUG */ 2910: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 292 mov %o0, %o2 293 ba 0f ! Branch to cache line-aligned instr. 294 nop 295 .align 16 2960: nop ! The next 3 instructions are now hot. 297 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick 298 299 retl ! Return 300 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts 301 302 ENTRY_NP(tickcmpr_disabled) 303 RD_TICKCMPR(%g1, %o0) 304 retl 305 srlx %g1, TICKINT_DIS_SHFT, %o0 306 SET_SIZE(tickcmpr_disabled) 307 308/* 309 * Get current tick 310 */ 311 312 ENTRY(gettick) 313 ALTENTRY(randtick) 314 GET_NATIVE_TIME(%o0, %o2, %o3) 315 retl 316 nop 317 SET_SIZE(randtick) 318 SET_SIZE(gettick) 319 320 321/* 322 * Return the counter portion of the tick register. 323 */ 324 325 ENTRY_NP(gettick_counter) 326 rdpr %tick, %o0 327 sllx %o0, 1, %o0 328 retl 329 srlx %o0, 1, %o0 ! shake off npt bit 330 SET_SIZE(gettick_counter) 331 332/* 333 * Provide a C callable interface to the trap that reads the hi-res timer. 334 * Returns 64-bit nanosecond timestamp in %o0 and %o1. 335 */ 336 337 ENTRY_NP(gethrtime) 338 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2) 339 ! %g1 = hrtime 340 retl 341 mov %g1, %o0 342 SET_SIZE(gethrtime) 343 344 ENTRY_NP(gethrtime_unscaled) 345 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 346 retl 347 mov %g1, %o0 348 SET_SIZE(gethrtime_unscaled) 349 350 ENTRY_NP(gethrtime_waitfree) 351 ALTENTRY(dtrace_gethrtime) 352 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 353 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3) 354 retl 355 mov %g1, %o0 356 SET_SIZE(dtrace_gethrtime) 357 SET_SIZE(gethrtime_waitfree) 358 359 ENTRY(gethrtime_max) 360 NATIVE_TIME_MAX(%g1) 361 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1) 362 363 ! hrtime_t's are signed, max hrtime_t must be positive 364 mov -1, %o2 365 brlz,a %g1, 1f 366 srlx %o2, 1, %g1 3671: 368 retl 369 mov %g1, %o0 370 SET_SIZE(gethrtime_max) 371 372 ENTRY(scalehrtime) 373 ldx [%o0], %o1 374 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3) 375 retl 376 stx %o1, [%o0] 377 SET_SIZE(scalehrtime) 378 379/* 380 * Fast trap to return a timestamp, uses trap window, leaves traps 381 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1. 382 * 383 * This is the handler for the ST_GETHRTIME trap. 384 */ 385 386 ENTRY_NP(get_timestamp) 387 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime 388 srlx %g1, 32, %o0 ! %o0 = hi32(%g1) 389 srl %g1, 0, %o1 ! %o1 = lo32(%g1) 390 FAST_TRAP_DONE 391 SET_SIZE(get_timestamp) 392 393/* 394 * Macro to convert GET_HRESTIME() bits into a timestamp. 395 * 396 * We use two separate macros so that the platform-dependent GET_HRESTIME() 397 * can be as small as possible; CONV_HRESTIME() implements the generic part. 398 */ 399#define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \ 400 brz,pt adj, 3f; /* no adjustments, it's easy */ \ 401 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \ 402 brlz,pn adj, 2f; /* if hrestime_adj negative */ \ 403 srlx nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \ 404 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \ 405 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \ 406 ba 3f; /* go convert to sec/nsec */ \ 407 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \ 4082: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \ 409 bge,a,pt %xcc, 3f; /* is adj less negative? */ \ 410 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \ 411 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \ 4123: cmp hrestnsec, nano; /* more than a billion? */ \ 413 bl,pt %xcc, 4f; /* if not, we're done */ \ 414 nop; /* delay: do nothing :( */ \ 415 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \ 416 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \ 417 ba,a 3b; /* check >= billion again */ \ 4184: 419 420 ENTRY_NP(gethrestime) 421 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 422 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5) 423 stn %o1, [%o0] 424 retl 425 stn %o2, [%o0 + CLONGSIZE] 426 SET_SIZE(gethrestime) 427 428/* 429 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime 430 * seconds. 431 */ 432 ENTRY_NP(gethrestime_sec) 433 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 434 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5) 435 retl ! %o0 current hrestime seconds 436 nop 437 SET_SIZE(gethrestime_sec) 438 439/* 440 * Returns the hrestime on the last tick. This is simpler than gethrestime() 441 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick() 442 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME, 443 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't 444 * rely on load dependencies to effect the membar #LoadLoad, instead declaring 445 * it explicitly.) 446 */ 447 ENTRY_NP(gethrestime_lasttick) 448 sethi %hi(hres_lock), %o1 4490: 450 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value 451 membar #LoadLoad ! Load of lock must complete 452 andn %o2, 1, %o2 ! Mask off lowest bit 453 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds. 454 add %o1, %lo(hrestime), %o4 455 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds. 456 membar #LoadLoad ! All loads must complete 457 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value 458 cmp %o3, %o2 ! If lock is locked or has 459 bne 0b ! changed, retry. 460 stn %g1, [%o0] ! Delay: store seconds 461 retl 462 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds 463 SET_SIZE(gethrestime_lasttick) 464 465/* 466 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1. 467 * 468 * This is the handler for the ST_GETHRESTIME trap. 469 */ 470 471 ENTRY_NP(get_hrestime) 472 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3) 473 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3) 474 FAST_TRAP_DONE 475 SET_SIZE(get_hrestime) 476 477/* 478 * Fast trap to return lwp virtual time, uses trap window, leaves traps 479 * disabled. Returns a 64-bit number in %o0:%o1, which is the number 480 * of nanoseconds consumed. 481 * 482 * This is the handler for the ST_GETHRVTIME trap. 483 * 484 * Register usage: 485 * %o0, %o1 = return lwp virtual time 486 * %o2 = CPU/thread 487 * %o3 = lwp 488 * %g1 = scratch 489 * %g5 = scratch 490 */ 491 ENTRY_NP(get_virtime) 492 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks 493 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2 494 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2 495 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3 496 497 /* 498 * Subtract start time of current microstate from time 499 * of day to get increment for lwp virtual time. 500 */ 501 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start 502 sub %g5, %g1, %g5 503 504 /* 505 * Add current value of ms_acct[LMS_USER] 506 */ 507 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER] 508 add %g5, %g1, %g5 509 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 510 511 srl %g5, 0, %o1 ! %o1 = lo32(%g5) 512 srlx %g5, 32, %o0 ! %o0 = hi32(%g5) 513 514 FAST_TRAP_DONE 515 SET_SIZE(get_virtime) 516 517 518 519 .seg ".text" 520hrtime_base_panic: 521 .asciz "hrtime_base stepping back" 522 523 524 ENTRY_NP(hres_tick) 525 save %sp, -SA(MINFRAME), %sp ! get a new window 526 527 sethi %hi(hrestime), %l4 528 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking 5297: tst %l5 530 bz,pt %xcc, 8f ! if we got it, drive on 531 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor 532 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 5339: tst %l5 534 bz,a,pn %xcc, 7b 535 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 536 ba,pt %xcc, 9b 537 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 5388: 539 membar #StoreLoad|#StoreStore 540 541 ! 542 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale). 543 ! 544 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base 545 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time 546 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current 547 ! convert native time to nsecs 548 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT) 549 550 sub %l0, %g1, %i1 ! get accurate nsec delta 551 552 ldx [%l4 + %lo(hrtime_base)], %l1 553 cmp %l1, %l0 554 bg,pn %xcc, 9f 555 nop 556 557 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base 558 559 ! 560 ! apply adjustment, if any 561 ! 562 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj 563 brz %l0, 2f 564 ! hrestime_adj == 0 ? 565 ! yes, skip adjustments 566 clr %l5 ! delay: set adj to zero 567 tst %l0 ! is hrestime_adj >= 0 ? 568 bge,pt %xcc, 1f ! yes, go handle positive case 569 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj 570 571 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ? 572 bl,pt %xcc, 2f ! yes, use current adj 573 neg %l5 ! delay: %l5 = -adj 574 ba,pt %xcc, 2f 575 mov %l0, %l5 ! no, so set adj = hrestime_adj 5761: 577 subcc %l0, %l5, %g0 ! hrestime_adj < adj ? 578 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj 579 mov %l0, %l5 ! delay: adj = hrestime_adj 5802: 581 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta 582 sub %l0, %l5, %l0 ! timedelta -= adj 583 584 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta 585 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta 586 587 or %l4, %lo(hrestime), %l2 588 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec 589 ldn [%l2 + CLONGSIZE], %i3 590 add %i3, %l5, %i3 ! hrestime.nsec += adj 591 add %i3, %i1, %i3 ! hrestime.nsec += nslt 592 593 set NANOSEC, %l5 ! %l5 = NANOSEC 594 cmp %i3, %l5 595 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC 596 sethi %hi(one_sec), %i1 ! delay 597 add %i2, 0x1, %i2 ! hrestime.tv_sec++ 598 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC 599 mov 0x1, %l5 600 st %l5, [%i1 + %lo(one_sec)] 6015: 602 stn %i2, [%l2] 603 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime 604 605 membar #StoreStore 606 607 ld [%l4 + %lo(hres_lock)], %i1 608 inc %i1 ! release lock 609 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock 610 611 ret 612 restore 613 6149: 615 ! 616 ! release hres_lock 617 ! 618 ld [%l4 + %lo(hres_lock)], %i1 619 inc %i1 620 st %i1, [%l4 + %lo(hres_lock)] 621 622 sethi %hi(hrtime_base_panic), %o0 623 call panic 624 or %o0, %lo(hrtime_base_panic), %o0 625 626 SET_SIZE(hres_tick) 627 628 .seg ".text" 629kstat_q_panic_msg: 630 .asciz "kstat_q_exit: qlen == 0" 631 632 ENTRY(kstat_q_panic) 633 save %sp, -SA(MINFRAME), %sp 634 sethi %hi(kstat_q_panic_msg), %o0 635 call panic 636 or %o0, %lo(kstat_q_panic_msg), %o0 637 /*NOTREACHED*/ 638 SET_SIZE(kstat_q_panic) 639 640#define BRZPN brz,pn 641#define BRZPT brz,pt 642 643#define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \ 644 ld [%o0 + QTYPE##CNT], %o1; /* %o1 = old qlen */ \ 645 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 646 QBR %o1, QZERO; /* done if qlen == 0 */ \ 647 st %o2, [%o0 + QTYPE##CNT]; /* delay: save qlen */ \ 648 ldx [%o0 + QTYPE##LASTUPDATE], %o3; \ 649 ldx [%o0 + QTYPE##TIME], %o4; /* %o4 = old time */ \ 650 ldx [%o0 + QTYPE##LENTIME], %o5; /* %o5 = old lentime */ \ 651 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 652 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 653 add %o4, %o2, %o4; /* %o4 = new time */ \ 654 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 655 stx %o4, [%o0 + QTYPE##TIME]; /* save time */ \ 656 stx %o5, [%o0 + QTYPE##LENTIME]; /* save lentime */ \ 657QRETURN; \ 658 stx %g1, [%o0 + QTYPE##LASTUPDATE]; /* lastupdate = now */ 659 660#if !defined(DEBUG) 661/* 662 * same as KSTAT_Q_UPDATE but without: 663 * QBR %o1, QZERO; 664 * to be used only with non-debug build. mimics ASSERT() behaviour. 665 */ 666#define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \ 667 ld [%o0 + QTYPE##CNT], %o1; /* %o1 = old qlen */ \ 668 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 669 st %o2, [%o0 + QTYPE##CNT]; /* delay: save qlen */ \ 670 ldx [%o0 + QTYPE##LASTUPDATE], %o3; \ 671 ldx [%o0 + QTYPE##TIME], %o4; /* %o4 = old time */ \ 672 ldx [%o0 + QTYPE##LENTIME], %o5; /* %o5 = old lentime */ \ 673 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 674 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 675 add %o4, %o2, %o4; /* %o4 = new time */ \ 676 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 677 stx %o4, [%o0 + QTYPE##TIME]; /* save time */ \ 678 stx %o5, [%o0 + QTYPE##LENTIME]; /* save lentime */ \ 679QRETURN; \ 680 stx %g1, [%o0 + QTYPE##LASTUPDATE]; /* lastupdate = now */ 681#endif 682 683 .align 16 684 ENTRY(kstat_waitq_enter) 685 GET_NATIVE_TIME(%g1, %g2, %g3) 686 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 687 SET_SIZE(kstat_waitq_enter) 688 689 .align 16 690 ENTRY(kstat_waitq_exit) 691 GET_NATIVE_TIME(%g1, %g2, %g3) 692#if defined(DEBUG) 693 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W) 694#else 695 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W) 696#endif 697 SET_SIZE(kstat_waitq_exit) 698 699 .align 16 700 ENTRY(kstat_runq_enter) 701 GET_NATIVE_TIME(%g1, %g2, %g3) 702 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 703 SET_SIZE(kstat_runq_enter) 704 705 .align 16 706 ENTRY(kstat_runq_exit) 707 GET_NATIVE_TIME(%g1, %g2, %g3) 708#if defined(DEBUG) 709 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R) 710#else 711 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R) 712#endif 713 SET_SIZE(kstat_runq_exit) 714 715 .align 16 716 ENTRY(kstat_waitq_to_runq) 717 GET_NATIVE_TIME(%g1, %g2, %g3) 718#if defined(DEBUG) 719 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W) 720#else 721 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W) 722#endif 723 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 724 SET_SIZE(kstat_waitq_to_runq) 725 726 .align 16 727 ENTRY(kstat_runq_back_to_waitq) 728 GET_NATIVE_TIME(%g1, %g2, %g3) 729#if defined(DEBUG) 730 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R) 731#else 732 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R) 733#endif 734 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 735 SET_SIZE(kstat_runq_back_to_waitq) 736 737 /* 738 * -- WARNING -- 739 * 740 * The following variables MUST be together on a 128-byte boundary. 741 * In addition to the primary performance motivation (having them all 742 * on the same cache line(s)), code here and in the GET*TIME() macros 743 * assumes that they all have the same high 22 address bits (so 744 * there's only one sethi). 745 */ 746 .seg ".data" 747 .global timedelta, hres_last_tick, hrestime, hrestime_adj 748 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick 749 .global nsec_shift, adj_shift 750 751 /* XXX - above comment claims 128-bytes is necessary */ 752 .align 64 753timedelta: 754 .word 0, 0 /* int64_t */ 755hres_last_tick: 756 .word 0, 0 /* hrtime_t */ 757hrestime: 758 .nword 0, 0 /* 2 longs */ 759hrestime_adj: 760 .word 0, 0 /* int64_t */ 761hres_lock: 762 .word 0 763nsec_scale: 764 .word 0 765hrtime_base: 766 .word 0, 0 767traptrace_use_stick: 768 .word 0 769nsec_shift: 770 .word NSEC_SHIFT 771adj_shift: 772 .word ADJ_SHIFT 773 774 775/* 776 * drv_usecwait(clock_t n) [DDI/DKI - section 9F] 777 * usec_delay(int n) [compatibility - should go one day] 778 * Delay by spinning. 779 * 780 * delay for n microseconds. numbers <= 0 delay 1 usec 781 * 782 * With UltraSPARC-III the combination of supporting mixed-speed CPUs 783 * and variable clock rate for power management requires that we 784 * use %stick to implement this routine. 785 * 786 * For OPL platforms that support the "sleep" instruction, we 787 * conditionally (ifdef'ed) insert a "sleep" instruction in 788 * the loop. Note that theoritically we should have move (duplicated) 789 * the code down to spitfire/us3/opl specific asm files - but this 790 * is alot of code duplication just to add one "sleep" instruction. 791 * We chose less code duplication for this. 792 */ 793 794 ENTRY(drv_usecwait) 795 ALTENTRY(usec_delay) 796 brlez,a,pn %o0, 0f 797 mov 1, %o0 7980: 799 sethi %hi(sticks_per_usec), %o1 800 lduw [%o1 + %lo(sticks_per_usec)], %o1 801 mulx %o1, %o0, %o1 ! Scale usec to ticks 802 inc %o1 ! We don't start on a tick edge 803 GET_NATIVE_TIME(%o2, %o3, %o4) 804 add %o1, %o2, %o1 805 8061: 807#ifdef _OPL 808 .word 0x81b01060 ! insert "sleep" instruction 809#endif /* _OPL */ ! use byte code for now 810 cmp %o1, %o2 811 GET_NATIVE_TIME(%o2, %o3, %o4) 812 bgeu,pt %xcc, 1b 813 nop 814 retl 815 nop 816 SET_SIZE(usec_delay) 817 SET_SIZE(drv_usecwait) 818 819/* 820 * Level-14 interrupt prologue. 821 */ 822 ENTRY_NP(pil14_interrupt) 823 CPU_ADDR(%g1, %g2) 824 rdpr %pil, %g6 ! %g6 = interrupted PIL 825 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL 826 rdpr %tstate, %g6 827 rdpr %tpc, %g5 828 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 829 bnz,a,pt %xcc, 1f 830 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC 831 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC 832 ba pil_interrupt_common ! must be large-disp branch 833 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC 8341: ba pil_interrupt_common ! must be large-disp branch 835 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC 836 SET_SIZE(pil14_interrupt) 837 838 ENTRY_NP(tick_rtt) 839 ! 840 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is 841 ! disabled. If TICK_COMPARE is enabled, we know that we need to 842 ! reenqueue the interrupt request structure. We'll then check TICKINT 843 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE 844 ! interrupt. In this case, TICK_COMPARE may have been rewritten 845 ! recently; we'll compare %o5 to the current time to verify that it's 846 ! in the future. 847 ! 848 ! Note that %o5 is live until after 1f. 849 ! XXX - there is a subroutine call while %o5 is live! 850 ! 851 RD_TICKCMPR(%o5, %g1) 852 srlx %o5, TICKINT_DIS_SHFT, %g1 853 brnz,pt %g1, 2f 854 nop 855 856 rdpr %pstate, %g5 857 andn %g5, PSTATE_IE, %g1 858 wrpr %g0, %g1, %pstate ! Disable vec interrupts 859 860 sethi %hi(cbe_level14_inum), %o1 861 ldx [%o1 + %lo(cbe_level14_inum)], %o1 862 call intr_enqueue_req ! preserves %o5 and %g5 863 mov PIL_14, %o0 864 865 ! Check SOFTINT for TICKINT/STICKINT 866 rd SOFTINT, %o4 867 set (TICK_INT_MASK | STICK_INT_MASK), %o0 868 andcc %o4, %o0, %g0 869 bz,a,pn %icc, 2f 870 wrpr %g0, %g5, %pstate ! Enable vec interrupts 871 872 ! clear TICKINT/STICKINT 873 wr %o0, CLEAR_SOFTINT 874 875 ! 876 ! Now that we've cleared TICKINT, we can reread %tick and confirm 877 ! that the value we programmed is still in the future. If it isn't, 878 ! we need to reprogram TICK_COMPARE to fire as soon as possible. 879 ! 880 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 881 sllx %o0, 1, %o0 ! Clear the DIS bit 882 srlx %o0, 1, %o0 883 cmp %o5, %o0 ! In the future? 884 bg,a,pt %xcc, 2f ! Yes, drive on. 885 wrpr %g0, %g5, %pstate ! delay: enable vec intr 886 887 ! 888 ! If we're here, then we have programmed TICK_COMPARE with a %tick 889 ! which is in the past; we'll now load an initial step size, and loop 890 ! until we've managed to program TICK_COMPARE to fire in the future. 891 ! 892 mov 8, %o4 ! 8 = arbitrary inital step 8931: add %o0, %o4, %o5 ! Add the step 894 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR 895 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 896 sllx %o0, 1, %o0 ! Clear the DIS bit 897 srlx %o0, 1, %o0 898 cmp %o5, %o0 ! In the future? 899 bg,a,pt %xcc, 2f ! Yes, drive on. 900 wrpr %g0, %g5, %pstate ! delay: enable vec intr 901 ba 1b ! No, try again. 902 sllx %o4, 1, %o4 ! delay: double step size 903 9042: ba current_thread_complete 905 nop 906 SET_SIZE(tick_rtt) 907 908/* 909 * Level-15 interrupt prologue. 910 */ 911 ENTRY_NP(pil15_interrupt) 912 CPU_ADDR(%g1, %g2) 913 rdpr %tstate, %g6 914 rdpr %tpc, %g5 915 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 916 bnz,a,pt %xcc, 1f 917 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC 918 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC 919 ba pil15_epilogue ! must be large-disp branch 920 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC 9211: ba pil15_epilogue ! must be large-disp branch 922 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC 923 SET_SIZE(pil15_interrupt) 924 925#ifdef DEBUG 926 .seg ".text" 927find_cpufreq_panic: 928 .asciz "find_cpufrequency: interrupts already disabled on entry" 929#endif /* DEBUG */ 930 931 ENTRY_NP(find_cpufrequency) 932 rdpr %pstate, %g1 933 934#ifdef DEBUG 935 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 936 bnz 0f ! are currently enabled 937 sethi %hi(find_cpufreq_panic), %o1 938 call panic 939 or %o1, %lo(find_cpufreq_panic), %o0 940#endif /* DEBUG */ 941 9420: 943 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 9443: 945 ldub [%o0], %o1 ! Read the number of seconds 946 mov %o1, %o2 ! remember initial value in %o2 9471: 948 GET_NATIVE_TIME(%o3, %g4, %g5) 949 cmp %o1, %o2 ! did the seconds register roll over? 950 be,pt %icc, 1b ! branch back if unchanged 951 ldub [%o0], %o2 ! delay: load the new seconds val 952 953 brz,pn %o2, 3b ! if the minutes just rolled over, 954 ! the last second could have been 955 ! inaccurate; try again. 956 mov %o2, %o4 ! delay: store init. val. in %o2 9572: 958 GET_NATIVE_TIME(%o5, %g4, %g5) 959 cmp %o2, %o4 ! did the seconds register roll over? 960 be,pt %icc, 2b ! branch back if unchanged 961 ldub [%o0], %o4 ! delay: load the new seconds val 962 963 brz,pn %o4, 0b ! if the minutes just rolled over, 964 ! the last second could have been 965 ! inaccurate; try again. 966 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts 967 968 retl 969 sub %o5, %o3, %o0 ! return the difference in ticks 970 SET_SIZE(find_cpufrequency) 971 972#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 973 defined(SERRANO) 974 ! 975 ! On US-III, the prefetch instruction queue is 8 entries deep. 976 ! Also, prefetches for write put data in the E$, which has 977 ! lines of 512 bytes for an 8MB cache. Each E$ line is further 978 ! subblocked into 64 byte chunks. 979 ! 980 ! Since prefetch can only bring in 64 bytes at a time (See Sparc 981 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes, 982 ! then 2 prefetches are required in order to bring an entire 983 ! page into the E$. 984 ! 985 ! Since the prefetch queue is 8 entries deep, we currently can 986 ! only have 4 prefetches for page_t's outstanding. Thus, we 987 ! prefetch n+4 ahead of where we are now: 988 ! 989 ! 4 * sizeof(page_t) -> 512 990 ! 4 * sizeof(page_t) +64 -> 576 991 ! 992 ! Example 993 ! ======= 994 ! contiguous page array in memory... 995 ! 996 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|... 997 ! ^ ^ ^ ^ ^ ^ 998 ! pp | pp+4*sizeof(page)+64 999 ! | 1000 ! pp+4*sizeof(page) 1001 ! 1002 ! Prefetch 1003 ! Queue 1004 ! +-------+<--- In this iteration, we're working with pp (AAA1), 1005 ! |Preftch| but we enqueue prefetch for addr = XXX1 1006 ! | XXX1 | 1007 ! +-------+<--- this queue slot will be a prefetch instruction for 1008 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second 1009 ! | XXX2 | half of page XXX) 1010 ! +-------+ 1011 ! |Preftch|<-+- The next time around this function, we'll be 1012 ! | YYY1 | | working with pp = BBB1, but will be enqueueing 1013 ! +-------+ | prefetches to for both halves of page YYY, 1014 ! |Preftch| | while both halves of page XXX are in transit 1015 ! | YYY2 |<-+ make their way into the E$. 1016 ! +-------+ 1017 ! |Preftch| 1018 ! | ZZZ1 | 1019 ! +-------+ 1020 ! . . 1021 ! : : 1022 ! 1023 ! E$ 1024 ! +============================================... 1025 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 | 1026 ! +============================================... 1027 ! | | | | | | | 1028 ! +============================================... 1029 ! . 1030 ! : 1031 ! 1032 ! So we should expect the first four page accesses to stall 1033 ! while we warm up the cache, afterwhich, most of the pages 1034 ! will have their pp ready in the E$. 1035 ! 1036 ! Also note that if sizeof(page_t) grows beyond 128, then 1037 ! we'll need an additional prefetch to get an entire page 1038 ! into the E$, thus reducing the number of outstanding page 1039 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots) 1040 ! etc. 1041 ! 1042 ! Cheetah+ 1043 ! ======== 1044 ! On Cheetah+ we use "#n_write" prefetches as these avoid 1045 ! unnecessary RTS->RTO bus transaction state change, and 1046 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta 1047 ! PRM). On Cheetah, #n_write prefetches are reflected with 1048 ! RTS->RTO state transition regardless. 1049 ! 1050#define STRIDE1 512 1051#define STRIDE2 576 1052 1053#if STRIDE1 != (PAGE_SIZE * 4) 1054#error "STRIDE1 != (PAGE_SIZE * 4)" 1055#endif /* STRIDE1 != (PAGE_SIZE * 4) */ 1056 1057/* 1058 * Prefetch a page_t for write or read, this assumes a linear 1059 * scan of sequential page_t's. 1060 */ 1061 ENTRY(prefetch_page_w) 1062 prefetch [%o0+STRIDE1], #n_writes 1063 retl 1064 prefetch [%o0+STRIDE2], #n_writes 1065 SET_SIZE(prefetch_page_w) 1066 1067 ! 1068 ! Note on CHEETAH to prefetch for read, we really use #one_write. 1069 ! This fetches to E$ (general use) rather than P$ (floating point use). 1070 ! 1071 ENTRY(prefetch_page_r) 1072 prefetch [%o0+STRIDE1], #one_write 1073 retl 1074 prefetch [%o0+STRIDE2], #one_write 1075 SET_SIZE(prefetch_page_r) 1076 1077#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1078 1079 ! 1080 ! UltraSparcII can have up to 3 prefetches outstanding. 1081 ! A page_t is 128 bytes (2 prefetches of 64 bytes each) 1082 ! So prefetch for pp + 1, which is 1083 ! 1084 ! pp + sizeof(page_t) 1085 ! and 1086 ! pp + sizeof(page_t) + 64 1087 ! 1088#define STRIDE1 128 1089#define STRIDE2 192 1090 1091#if STRIDE1 != PAGE_SIZE 1092#error "STRIDE1 != PAGE_SIZE" 1093#endif /* STRIDE1 != PAGE_SIZE */ 1094 1095 ENTRY(prefetch_page_w) 1096 prefetch [%o0+STRIDE1], #n_writes 1097 retl 1098 prefetch [%o0+STRIDE2], #n_writes 1099 SET_SIZE(prefetch_page_w) 1100 1101 ENTRY(prefetch_page_r) 1102 prefetch [%o0+STRIDE1], #n_reads 1103 retl 1104 prefetch [%o0+STRIDE2], #n_reads 1105 SET_SIZE(prefetch_page_r) 1106 1107#elif defined(OLYMPUS_C) 1108 ! 1109 ! Prefetch strides for Olympus-C 1110 ! 1111 1112#define STRIDE1 0x440 1113#define STRIDE2 0x640 1114 1115 ENTRY(prefetch_page_w) 1116 prefetch [%o0+STRIDE1], #n_writes 1117 retl 1118 prefetch [%o0+STRIDE2], #n_writes 1119 SET_SIZE(prefetch_page_w) 1120 1121 ENTRY(prefetch_page_r) 1122 prefetch [%o0+STRIDE1], #n_writes 1123 retl 1124 prefetch [%o0+STRIDE2], #n_writes 1125 SET_SIZE(prefetch_page_r) 1126#else /* OLYMPUS_C */ 1127 1128#error "You need to fix this for your new cpu type." 1129 1130#endif /* OLYMPUS_C */ 1131 1132#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1133 defined(SERRANO) 1134 1135#define PREFETCH_Q_LEN 8 1136 1137#elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1138 1139#define PREFETCH_Q_LEN 3 1140 1141#elif defined(OLYMPUS_C) 1142 ! 1143 ! Use length of one for now. 1144 ! 1145#define PREFETCH_Q_LEN 1 1146 1147#else /* OLYMPUS_C */ 1148 1149#error You need to fix this for your new cpu type. 1150 1151#endif /* OLYMPUS_C */ 1152 1153#include <vm/kpm.h> 1154 1155#ifdef SEGKPM_SUPPORT 1156 1157#define SMAP_SIZE 72 1158#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64) 1159 1160#else /* SEGKPM_SUPPORT */ 1161 1162 ! 1163 ! The hardware will prefetch the 64 byte cache aligned block 1164 ! that contains the address specified in the prefetch instruction. 1165 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch 1166 ! per pass will suffice as long as we prefetch far enough ahead to 1167 ! make sure we don't stall for the cases where the smap object 1168 ! spans multiple hardware prefetch blocks. Let's prefetch as far 1169 ! ahead as the hardware will allow. 1170 ! 1171 ! The smap array is processed with decreasing address pointers. 1172 ! 1173#define SMAP_SIZE 48 1174#define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE) 1175 1176#endif /* SEGKPM_SUPPORT */ 1177 1178/* 1179 * Prefetch struct smap for write. 1180 */ 1181 ENTRY(prefetch_smap_w) 1182 retl 1183 prefetch [%o0-SMAP_STRIDE], #n_writes 1184 SET_SIZE(prefetch_smap_w) 1185 1186 ENTRY_NP(getidsr) 1187 retl 1188 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0 1189 SET_SIZE(getidsr) 1190