1*1e49577aSRod Evans/* 2*1e49577aSRod Evans * CDDL HEADER START 3*1e49577aSRod Evans * 4*1e49577aSRod Evans * The contents of this file are subject to the terms of the 5*1e49577aSRod Evans * Common Development and Distribution License (the "License"). 6*1e49577aSRod Evans * You may not use this file except in compliance with the License. 7*1e49577aSRod Evans * 8*1e49577aSRod Evans * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*1e49577aSRod Evans * or http://www.opensolaris.org/os/licensing. 10*1e49577aSRod Evans * See the License for the specific language governing permissions 11*1e49577aSRod Evans * and limitations under the License. 12*1e49577aSRod Evans * 13*1e49577aSRod Evans * When distributing Covered Code, include this CDDL HEADER in each 14*1e49577aSRod Evans * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*1e49577aSRod Evans * If applicable, add the following below this CDDL HEADER, with the 16*1e49577aSRod Evans * fields enclosed by brackets "[]" replaced with your own identifying 17*1e49577aSRod Evans * information: Portions Copyright [yyyy] [name of copyright owner] 18*1e49577aSRod Evans * 19*1e49577aSRod Evans * CDDL HEADER END 20*1e49577aSRod Evans */ 21*1e49577aSRod Evans 22*1e49577aSRod Evans/* 23*1e49577aSRod Evans * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24*1e49577aSRod Evans */ 25*1e49577aSRod Evans 26*1e49577aSRod Evans .file "memcpy.s" 27*1e49577aSRod Evans 28*1e49577aSRod Evans/* 29*1e49577aSRod Evans * memcpy(s1, s2, len) 30*1e49577aSRod Evans * 31*1e49577aSRod Evans * Copy s2 to s1, always copy n bytes. 32*1e49577aSRod Evans * Note: this does not work for overlapped copies, bcopy() does 33*1e49577aSRod Evans * 34*1e49577aSRod Evans * Fast assembler language version of the following C-program for memcpy 35*1e49577aSRod Evans * which represents the `standard' for the C-library. 36*1e49577aSRod Evans * 37*1e49577aSRod Evans * void * 38*1e49577aSRod Evans * memcpy(void *s, const void *s0, size_t n) 39*1e49577aSRod Evans * { 40*1e49577aSRod Evans * if (n != 0) { 41*1e49577aSRod Evans * char *s1 = s; 42*1e49577aSRod Evans * const char *s2 = s0; 43*1e49577aSRod Evans * do { 44*1e49577aSRod Evans * *s1++ = *s2++; 45*1e49577aSRod Evans * } while (--n != 0); 46*1e49577aSRod Evans * } 47*1e49577aSRod Evans * return ( s ); 48*1e49577aSRod Evans * } 49*1e49577aSRod Evans */ 50*1e49577aSRod Evans 51*1e49577aSRod Evans#include <sys/asm_linkage.h> 52*1e49577aSRod Evans#include <sys/sun4asi.h> 53*1e49577aSRod Evans#include <sys/trap.h> 54*1e49577aSRod Evans 55*1e49577aSRod Evans ANSI_PRAGMA_WEAK(memmove,function) 56*1e49577aSRod Evans ANSI_PRAGMA_WEAK(memcpy,function) 57*1e49577aSRod Evans 58*1e49577aSRod Evans ENTRY(memmove) 59*1e49577aSRod Evans cmp %o1, %o0 ! if from address is >= to use forward copy 60*1e49577aSRod Evans bgeu %ncc, forcpy ! else use backward if ... 61*1e49577aSRod Evans sub %o0, %o1, %o4 ! get difference of two addresses 62*1e49577aSRod Evans cmp %o2, %o4 ! compare size and difference of addresses 63*1e49577aSRod Evans bleu %ncc, forcpy ! if size is bigger, do overlapped copy 64*1e49577aSRod Evans nop 65*1e49577aSRod Evans 66*1e49577aSRod Evans ! 67*1e49577aSRod Evans ! an overlapped copy that must be done "backwards" 68*1e49577aSRod Evans ! 69*1e49577aSRod Evans.ovbc: 70*1e49577aSRod Evans mov %o0, %o5 ! save des address for return val 71*1e49577aSRod Evans add %o1, %o2, %o1 ! get to end of source space 72*1e49577aSRod Evans add %o0, %o2, %o0 ! get to end of destination space 73*1e49577aSRod Evans 74*1e49577aSRod Evans.chksize: 75*1e49577aSRod Evans cmp %o2, 8 76*1e49577aSRod Evans bgeu,pn %ncc, .dbalign 77*1e49577aSRod Evans nop 78*1e49577aSRod Evans 79*1e49577aSRod Evans 80*1e49577aSRod Evans.byte: 81*1e49577aSRod Evans1: deccc %o2 ! decrement count 82*1e49577aSRod Evans blu,pn %ncc, exit ! loop until done 83*1e49577aSRod Evans dec %o0 ! decrement to address 84*1e49577aSRod Evans dec %o1 ! decrement from address 85*1e49577aSRod Evans ldub [%o1], %o3 ! read a byte 86*1e49577aSRod Evans ba 1b ! loop until done 87*1e49577aSRod Evans stb %o3, [%o0] ! write byte 88*1e49577aSRod Evans 89*1e49577aSRod Evans.dbalign: 90*1e49577aSRod Evans andcc %o0, 7, %o3 91*1e49577aSRod Evans bz %ncc, .dbbck 92*1e49577aSRod Evans nop 93*1e49577aSRod Evans dec %o1 94*1e49577aSRod Evans dec %o0 95*1e49577aSRod Evans dec %o2 96*1e49577aSRod Evans ldub [%o1], %o3 97*1e49577aSRod Evans ba .chksize 98*1e49577aSRod Evans stb %o3, [%o0] 99*1e49577aSRod Evans 100*1e49577aSRod Evans.dbbck: 101*1e49577aSRod Evans 102*1e49577aSRod Evans rd %fprs, %o3 ! o3 = fprs 103*1e49577aSRod Evans 104*1e49577aSRod Evans 105*1e49577aSRod Evans ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 106*1e49577aSRod Evans ! So set it anyway, without checking. 107*1e49577aSRod Evans wr %g0, 0x4, %fprs ! fprs.fef = 1 108*1e49577aSRod Evans 109*1e49577aSRod Evans alignaddr %o1, %g0, %g1 ! align src 110*1e49577aSRod Evans ldd [%g1], %d0 ! get first 8 byte block 111*1e49577aSRod Evans sub %g1, 8, %g1 112*1e49577aSRod Evans andn %o2, 7, %o4 113*1e49577aSRod Evans sub %o1, %o4, %o1 114*1e49577aSRod Evans 115*1e49577aSRod Evans2: 116*1e49577aSRod Evans sub %o0, 8, %o0 ! since we are at the end 117*1e49577aSRod Evans ! when we first enter the loop 118*1e49577aSRod Evans ldd [%g1], %d2 119*1e49577aSRod Evans faligndata %d2, %d0, %d8 ! extract 8 bytes out 120*1e49577aSRod Evans std %d8, [%o0] ! store it 121*1e49577aSRod Evans 122*1e49577aSRod Evans sub %g1, 8, %g1 123*1e49577aSRod Evans sub %o2, 8, %o2 ! 8 less bytes to copy 124*1e49577aSRod Evans cmp %o2, 8 ! or do we have < 8 bytes 125*1e49577aSRod Evans bgeu,pt %ncc, 2b 126*1e49577aSRod Evans fmovd %d2, %d0 127*1e49577aSRod Evans 128*1e49577aSRod Evans and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 129*1e49577aSRod Evans ba .byte 130*1e49577aSRod Evans wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 131*1e49577aSRod Evans 132*1e49577aSRod Evans SET_SIZE(memmove) 133*1e49577aSRod Evans 134*1e49577aSRod Evans 135*1e49577aSRod Evans ENTRY(memcpy) 136*1e49577aSRod Evans ENTRY(__align_cpy_1) 137*1e49577aSRod Evansforcpy: 138*1e49577aSRod Evans mov %o0, %o5 ! save des address for return val 139*1e49577aSRod Evans 140*1e49577aSRod Evans cmp %o2, 32 ! for small counts copy bytes 141*1e49577aSRod Evans bgu,a %ncc, .alignsrc 142*1e49577aSRod Evans andcc %o1, 7, %o3 ! is src aligned on a 8 byte bound 143*1e49577aSRod Evans 144*1e49577aSRod Evans.bytecp: 145*1e49577aSRod Evans ! Do byte copy 146*1e49577aSRod Evans tst %o2 147*1e49577aSRod Evans bleu,a,pn %ncc, exit 148*1e49577aSRod Evans nop 149*1e49577aSRod Evans 150*1e49577aSRod Evans1: ldub [%o1], %o4 151*1e49577aSRod Evans inc %o1 152*1e49577aSRod Evans inc %o0 153*1e49577aSRod Evans deccc %o2 154*1e49577aSRod Evans bgu %ncc, 1b 155*1e49577aSRod Evans stb %o4, [%o0 - 1] 156*1e49577aSRod Evans 157*1e49577aSRod Evansexit: 158*1e49577aSRod Evans retl 159*1e49577aSRod Evans mov %o5, %o0 160*1e49577aSRod Evans 161*1e49577aSRod Evans.alignsrc: 162*1e49577aSRod Evans bz %ncc, .bigcpy ! src already double aligned 163*1e49577aSRod Evans sub %o3, 8, %o3 164*1e49577aSRod Evans neg %o3 ! bytes till src double aligned 165*1e49577aSRod Evans 166*1e49577aSRod Evans sub %o2, %o3, %o2 ! update o2 with new count 167*1e49577aSRod Evans 168*1e49577aSRod Evans ! Copy %o3 bytes till double aligned 169*1e49577aSRod Evans 170*1e49577aSRod Evans2: ldub [%o1], %o4 171*1e49577aSRod Evans inc %o1 172*1e49577aSRod Evans inc %o0 173*1e49577aSRod Evans deccc %o3 174*1e49577aSRod Evans bgu %ncc, 2b 175*1e49577aSRod Evans stb %o4, [%o0 - 1] 176*1e49577aSRod Evans 177*1e49577aSRod Evans ! Now Source (%o1) is double word aligned 178*1e49577aSRod Evans 179*1e49577aSRod Evans.bigcpy: ! >= 17 bytes to copy 180*1e49577aSRod Evans andcc %o0, 7, %o3 ! is dst aligned on a 8 byte bound 181*1e49577aSRod Evans bz %ncc, .blkchk ! already double aligned 182*1e49577aSRod Evans sub %o3, 8, %o3 183*1e49577aSRod Evans neg %o3 ! bytes till double aligned 184*1e49577aSRod Evans 185*1e49577aSRod Evans sub %o2, %o3, %o2 ! update o2 with new count 186*1e49577aSRod Evans 187*1e49577aSRod Evans ! Copy %o3 bytes till double aligned 188*1e49577aSRod Evans 189*1e49577aSRod Evans3: ldub [%o1], %o4 190*1e49577aSRod Evans inc %o1 191*1e49577aSRod Evans inc %o0 192*1e49577aSRod Evans deccc %o3 193*1e49577aSRod Evans bgu %ncc, 3b 194*1e49577aSRod Evans stb %o4, [%o0 - 1] 195*1e49577aSRod Evans 196*1e49577aSRod Evans ! Now Destination (%o0) is double word aligned 197*1e49577aSRod Evans.blkchk: 198*1e49577aSRod Evans cmp %o2, 384 ! if cnt < 256 + 128 - no Block ld/st 199*1e49577aSRod Evans bgeu,a %ncc, blkcpy ! do double word copy 200*1e49577aSRod Evans subcc %o0, %o1, %o4 ! %o4 = dest - src 201*1e49577aSRod Evans 202*1e49577aSRod Evans ! double word copy - using ldd and faligndata. Copies upto 203*1e49577aSRod Evans ! 8 byte multiple count and does byte copy for the residual. 204*1e49577aSRod Evans.dwcpy: 205*1e49577aSRod Evans rd %fprs, %o3 ! o3 = fprs 206*1e49577aSRod Evans 207*1e49577aSRod Evans ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 208*1e49577aSRod Evans ! So set it anyway, without checking. 209*1e49577aSRod Evans wr %g0, 0x4, %fprs ! fprs.fef = 1 210*1e49577aSRod Evans andn %o2, 7, %o4 ! o4 has 8 byte aligned cnt 211*1e49577aSRod Evans sub %o4, 8, %o4 212*1e49577aSRod Evans alignaddr %o1, %g0, %g1 213*1e49577aSRod Evans ldd [%g1], %d0 214*1e49577aSRod Evans add %g1, 8, %g1 215*1e49577aSRod Evans4: 216*1e49577aSRod Evans ldd [%g1], %d2 217*1e49577aSRod Evans add %g1, 8, %g1 218*1e49577aSRod Evans sub %o2, 8, %o2 219*1e49577aSRod Evans subcc %o4, 8, %o4 220*1e49577aSRod Evans faligndata %d0, %d2, %d8 221*1e49577aSRod Evans std %d8, [%o0] 222*1e49577aSRod Evans add %o1, 8, %o1 223*1e49577aSRod Evans bz,pn %ncc, .residcp 224*1e49577aSRod Evans add %o0, 8, %o0 225*1e49577aSRod Evans ldd [%g1], %d0 226*1e49577aSRod Evans add %g1, 8, %g1 227*1e49577aSRod Evans sub %o2, 8, %o2 228*1e49577aSRod Evans subcc %o4, 8, %o4 229*1e49577aSRod Evans faligndata %d2, %d0, %d8 230*1e49577aSRod Evans std %d8, [%o0] 231*1e49577aSRod Evans add %o1, 8, %o1 232*1e49577aSRod Evans bgu,pn %ncc, 4b 233*1e49577aSRod Evans add %o0, 8, %o0 234*1e49577aSRod Evans 235*1e49577aSRod Evans.residcp: ! Do byte copy 236*1e49577aSRod Evans tst %o2 237*1e49577aSRod Evans bz,a,pn %ncc, dwexit 238*1e49577aSRod Evans nop 239*1e49577aSRod Evans 240*1e49577aSRod Evans5: ldub [%o1], %o4 241*1e49577aSRod Evans inc %o1 242*1e49577aSRod Evans inc %o0 243*1e49577aSRod Evans deccc %o2 244*1e49577aSRod Evans bgu %ncc, 5b 245*1e49577aSRod Evans stb %o4, [%o0 - 1] 246*1e49577aSRod Evans 247*1e49577aSRod Evansdwexit: 248*1e49577aSRod Evans and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 249*1e49577aSRod Evans wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 250*1e49577aSRod Evans retl 251*1e49577aSRod Evans mov %o5, %o0 252*1e49577aSRod Evans 253*1e49577aSRod Evansblkcpy: 254*1e49577aSRod Evans ! subcc %o0, %o1, %o4 ! in delay slot of branch 255*1e49577aSRod Evans bneg,a,pn %ncc, 1f ! %o4 = abs(%o4) 256*1e49577aSRod Evans neg %o4 257*1e49577aSRod Evans1: 258*1e49577aSRod Evans /* 259*1e49577aSRod Evans * Compare against 256 since we should be checking block addresses 260*1e49577aSRod Evans * and (dest & ~63) - (src & ~63) can be 3 blocks even if 261*1e49577aSRod Evans * src = dest + (64 * 3) + 63. 262*1e49577aSRod Evans */ 263*1e49577aSRod Evans cmp %o4, 256 ! if smaller than 3 blocks skip 264*1e49577aSRod Evans blu,pn %ncc, .dwcpy ! and do it the slower way 265*1e49577aSRod Evans andcc %o0, 63, %o3 266*1e49577aSRod Evans 267*1e49577aSRod Evans save %sp, -SA(MINFRAME), %sp 268*1e49577aSRod Evans rd %fprs, %l3 ! l3 = fprs 269*1e49577aSRod Evans 270*1e49577aSRod Evans ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 271*1e49577aSRod Evans ! So set it anyway, without checking. 272*1e49577aSRod Evans wr %g0, 0x4, %fprs ! fprs.fef = 1 273*1e49577aSRod Evans 274*1e49577aSRod Evans bz,pn %ncc, blalign ! now block aligned 275*1e49577aSRod Evans sub %i3, 64, %i3 276*1e49577aSRod Evans neg %i3 ! bytes till block aligned 277*1e49577aSRod Evans sub %i2, %i3, %i2 ! update %i2 with new count 278*1e49577aSRod Evans 279*1e49577aSRod Evans ! Copy %i3 bytes till dst is block (64 byte) aligned. use 280*1e49577aSRod Evans ! double word copies. 281*1e49577aSRod Evans 282*1e49577aSRod Evans alignaddr %i1, %g0, %g1 283*1e49577aSRod Evans ldd [%g1], %d0 284*1e49577aSRod Evans add %g1, 8, %g1 285*1e49577aSRod Evans6: 286*1e49577aSRod Evans ldd [%g1], %d2 287*1e49577aSRod Evans add %g1, 8, %g1 288*1e49577aSRod Evans subcc %i3, 8, %i3 289*1e49577aSRod Evans faligndata %d0, %d2, %d8 290*1e49577aSRod Evans std %d8, [%i0] 291*1e49577aSRod Evans add %i1, 8, %i1 292*1e49577aSRod Evans bz,pn %ncc, blalign 293*1e49577aSRod Evans add %i0, 8, %i0 294*1e49577aSRod Evans ldd [%g1], %d0 295*1e49577aSRod Evans add %g1, 8, %g1 296*1e49577aSRod Evans subcc %i3, 8, %i3 297*1e49577aSRod Evans faligndata %d2, %d0, %d8 298*1e49577aSRod Evans std %d8, [%i0] 299*1e49577aSRod Evans add %i1, 8, %i1 300*1e49577aSRod Evans bgu,pn %ncc, 6b 301*1e49577aSRod Evans add %i0, 8, %i0 302*1e49577aSRod Evans 303*1e49577aSRod Evansblalign: 304*1e49577aSRod Evans membar #StoreLoad 305*1e49577aSRod Evans ! %i2 = total length 306*1e49577aSRod Evans ! %i3 = blocks (length - 64) / 64 307*1e49577aSRod Evans ! %i4 = doubles remaining (length - blocks) 308*1e49577aSRod Evans sub %i2, 64, %i3 309*1e49577aSRod Evans andn %i3, 63, %i3 310*1e49577aSRod Evans sub %i2, %i3, %i4 311*1e49577aSRod Evans andn %i4, 7, %i4 312*1e49577aSRod Evans sub %i4, 16, %i4 313*1e49577aSRod Evans sub %i2, %i4, %i2 314*1e49577aSRod Evans sub %i2, %i3, %i2 315*1e49577aSRod Evans 316*1e49577aSRod Evans andn %i1, 0x3F, %l7 ! blk aligned address 317*1e49577aSRod Evans alignaddr %i1, %g0, %g0 ! gen %gsr 318*1e49577aSRod Evans 319*1e49577aSRod Evans srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5 320*1e49577aSRod Evans andcc %l5, 7, %l6 ! mask everything except bits 1,2 3 321*1e49577aSRod Evans add %i1, %i4, %i1 322*1e49577aSRod Evans add %i1, %i3, %i1 323*1e49577aSRod Evans 324*1e49577aSRod Evans be,a %ncc, 1f ! branch taken if src is 64-byte aligned 325*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 326*1e49577aSRod Evans 327*1e49577aSRod Evans call .+8 ! get the address of this instruction in %o7 328*1e49577aSRod Evans sll %l6, 2, %l4 329*1e49577aSRod Evans add %o7, %l4, %o7 330*1e49577aSRod Evans jmp %o7 + 16 ! jump to the starting ldd instruction 331*1e49577aSRod Evans nop 332*1e49577aSRod Evans ldd [%l7+8], %d2 333*1e49577aSRod Evans ldd [%l7+16], %d4 334*1e49577aSRod Evans ldd [%l7+24], %d6 335*1e49577aSRod Evans ldd [%l7+32], %d8 336*1e49577aSRod Evans ldd [%l7+40], %d10 337*1e49577aSRod Evans ldd [%l7+48], %d12 338*1e49577aSRod Evans ldd [%l7+56], %d14 339*1e49577aSRod Evans1: 340*1e49577aSRod Evans add %l7, 64, %l7 341*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 342*1e49577aSRod Evans add %l7, 64, %l7 343*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 344*1e49577aSRod Evans add %l7, 64, %l7 345*1e49577aSRod Evans sub %i3, 128, %i3 346*1e49577aSRod Evans 347*1e49577aSRod Evans 348*1e49577aSRod Evans ! switch statement to get us to the right 8 byte blk within a 349*1e49577aSRod Evans ! 64 byte block 350*1e49577aSRod Evans 351*1e49577aSRod Evans cmp %l6, 4 352*1e49577aSRod Evans bgeu,a hlf 353*1e49577aSRod Evans cmp %l6, 6 354*1e49577aSRod Evans cmp %l6, 2 355*1e49577aSRod Evans bgeu,a sqtr 356*1e49577aSRod Evans nop 357*1e49577aSRod Evans cmp %l6, 1 358*1e49577aSRod Evans be,a seg1 359*1e49577aSRod Evans nop 360*1e49577aSRod Evans ba seg0 361*1e49577aSRod Evans nop 362*1e49577aSRod Evanssqtr: 363*1e49577aSRod Evans be,a seg2 364*1e49577aSRod Evans nop 365*1e49577aSRod Evans ba,a seg3 366*1e49577aSRod Evans nop 367*1e49577aSRod Evans 368*1e49577aSRod Evanshlf: 369*1e49577aSRod Evans bgeu,a fqtr 370*1e49577aSRod Evans nop 371*1e49577aSRod Evans cmp %l6, 5 372*1e49577aSRod Evans be,a seg5 373*1e49577aSRod Evans nop 374*1e49577aSRod Evans ba seg4 375*1e49577aSRod Evans nop 376*1e49577aSRod Evansfqtr: 377*1e49577aSRod Evans be,a seg6 378*1e49577aSRod Evans nop 379*1e49577aSRod Evans ba seg7 380*1e49577aSRod Evans nop 381*1e49577aSRod Evans 382*1e49577aSRod Evans#define FALIGN_D0 \ 383*1e49577aSRod Evans faligndata %d0, %d2, %d48 ;\ 384*1e49577aSRod Evans faligndata %d2, %d4, %d50 ;\ 385*1e49577aSRod Evans faligndata %d4, %d6, %d52 ;\ 386*1e49577aSRod Evans faligndata %d6, %d8, %d54 ;\ 387*1e49577aSRod Evans faligndata %d8, %d10, %d56 ;\ 388*1e49577aSRod Evans faligndata %d10, %d12, %d58 ;\ 389*1e49577aSRod Evans faligndata %d12, %d14, %d60 ;\ 390*1e49577aSRod Evans faligndata %d14, %d16, %d62 391*1e49577aSRod Evans 392*1e49577aSRod Evans#define FALIGN_D16 \ 393*1e49577aSRod Evans faligndata %d16, %d18, %d48 ;\ 394*1e49577aSRod Evans faligndata %d18, %d20, %d50 ;\ 395*1e49577aSRod Evans faligndata %d20, %d22, %d52 ;\ 396*1e49577aSRod Evans faligndata %d22, %d24, %d54 ;\ 397*1e49577aSRod Evans faligndata %d24, %d26, %d56 ;\ 398*1e49577aSRod Evans faligndata %d26, %d28, %d58 ;\ 399*1e49577aSRod Evans faligndata %d28, %d30, %d60 ;\ 400*1e49577aSRod Evans faligndata %d30, %d32, %d62 401*1e49577aSRod Evans 402*1e49577aSRod Evans#define FALIGN_D32 \ 403*1e49577aSRod Evans faligndata %d32, %d34, %d48 ;\ 404*1e49577aSRod Evans faligndata %d34, %d36, %d50 ;\ 405*1e49577aSRod Evans faligndata %d36, %d38, %d52 ;\ 406*1e49577aSRod Evans faligndata %d38, %d40, %d54 ;\ 407*1e49577aSRod Evans faligndata %d40, %d42, %d56 ;\ 408*1e49577aSRod Evans faligndata %d42, %d44, %d58 ;\ 409*1e49577aSRod Evans faligndata %d44, %d46, %d60 ;\ 410*1e49577aSRod Evans faligndata %d46, %d0, %d62 411*1e49577aSRod Evans 412*1e49577aSRod Evansseg0: 413*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 414*1e49577aSRod Evans FALIGN_D0 415*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 416*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 417*1e49577aSRod Evans add %l7, 64, %l7 418*1e49577aSRod Evans subcc %i3, 64, %i3 419*1e49577aSRod Evans bz,pn %ncc, 0f 420*1e49577aSRod Evans add %i0, 64, %i0 421*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 422*1e49577aSRod Evans FALIGN_D16 423*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 424*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 425*1e49577aSRod Evans add %l7, 64, %l7 426*1e49577aSRod Evans subcc %i3, 64, %i3 427*1e49577aSRod Evans bz,pn %ncc, 1f 428*1e49577aSRod Evans add %i0, 64, %i0 429*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 430*1e49577aSRod Evans FALIGN_D32 431*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 432*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 433*1e49577aSRod Evans add %l7, 64, %l7 434*1e49577aSRod Evans subcc %i3, 64, %i3 435*1e49577aSRod Evans bz,pn %ncc, 2f 436*1e49577aSRod Evans add %i0, 64, %i0 437*1e49577aSRod Evans ba,a,pt %ncc, seg0 438*1e49577aSRod Evans 439*1e49577aSRod Evans0: 440*1e49577aSRod Evans FALIGN_D16 441*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 442*1e49577aSRod Evans add %i0, 64, %i0 443*1e49577aSRod Evans membar #Sync 444*1e49577aSRod Evans FALIGN_D32 445*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 446*1e49577aSRod Evans ba,pt %ncc, blkd0 447*1e49577aSRod Evans add %i0, 64, %i0 448*1e49577aSRod Evans 449*1e49577aSRod Evans1: 450*1e49577aSRod Evans FALIGN_D32 451*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 452*1e49577aSRod Evans add %i0, 64, %i0 453*1e49577aSRod Evans membar #Sync 454*1e49577aSRod Evans FALIGN_D0 455*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 456*1e49577aSRod Evans ba,pt %ncc, blkd16 457*1e49577aSRod Evans add %i0, 64, %i0 458*1e49577aSRod Evans 459*1e49577aSRod Evans2: 460*1e49577aSRod Evans FALIGN_D0 461*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 462*1e49577aSRod Evans add %i0, 64, %i0 463*1e49577aSRod Evans membar #Sync 464*1e49577aSRod Evans FALIGN_D16 465*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 466*1e49577aSRod Evans ba,pt %ncc, blkd32 467*1e49577aSRod Evans add %i0, 64, %i0 468*1e49577aSRod Evans 469*1e49577aSRod Evans 470*1e49577aSRod Evans#define FALIGN_D2 \ 471*1e49577aSRod Evans faligndata %d2, %d4, %d48 ;\ 472*1e49577aSRod Evans faligndata %d4, %d6, %d50 ;\ 473*1e49577aSRod Evans faligndata %d6, %d8, %d52 ;\ 474*1e49577aSRod Evans faligndata %d8, %d10, %d54 ;\ 475*1e49577aSRod Evans faligndata %d10, %d12, %d56 ;\ 476*1e49577aSRod Evans faligndata %d12, %d14, %d58 ;\ 477*1e49577aSRod Evans faligndata %d14, %d16, %d60 ;\ 478*1e49577aSRod Evans faligndata %d16, %d18, %d62 479*1e49577aSRod Evans 480*1e49577aSRod Evans#define FALIGN_D18 \ 481*1e49577aSRod Evans faligndata %d18, %d20, %d48 ;\ 482*1e49577aSRod Evans faligndata %d20, %d22, %d50 ;\ 483*1e49577aSRod Evans faligndata %d22, %d24, %d52 ;\ 484*1e49577aSRod Evans faligndata %d24, %d26, %d54 ;\ 485*1e49577aSRod Evans faligndata %d26, %d28, %d56 ;\ 486*1e49577aSRod Evans faligndata %d28, %d30, %d58 ;\ 487*1e49577aSRod Evans faligndata %d30, %d32, %d60 ;\ 488*1e49577aSRod Evans faligndata %d32, %d34, %d62 489*1e49577aSRod Evans 490*1e49577aSRod Evans#define FALIGN_D34 \ 491*1e49577aSRod Evans faligndata %d34, %d36, %d48 ;\ 492*1e49577aSRod Evans faligndata %d36, %d38, %d50 ;\ 493*1e49577aSRod Evans faligndata %d38, %d40, %d52 ;\ 494*1e49577aSRod Evans faligndata %d40, %d42, %d54 ;\ 495*1e49577aSRod Evans faligndata %d42, %d44, %d56 ;\ 496*1e49577aSRod Evans faligndata %d44, %d46, %d58 ;\ 497*1e49577aSRod Evans faligndata %d46, %d0, %d60 ;\ 498*1e49577aSRod Evans faligndata %d0, %d2, %d62 499*1e49577aSRod Evans 500*1e49577aSRod Evansseg1: 501*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 502*1e49577aSRod Evans FALIGN_D2 503*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 504*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 505*1e49577aSRod Evans add %l7, 64, %l7 506*1e49577aSRod Evans subcc %i3, 64, %i3 507*1e49577aSRod Evans bz,pn %ncc, 0f 508*1e49577aSRod Evans add %i0, 64, %i0 509*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 510*1e49577aSRod Evans FALIGN_D18 511*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 512*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 513*1e49577aSRod Evans add %l7, 64, %l7 514*1e49577aSRod Evans subcc %i3, 64, %i3 515*1e49577aSRod Evans bz,pn %ncc, 1f 516*1e49577aSRod Evans add %i0, 64, %i0 517*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 518*1e49577aSRod Evans FALIGN_D34 519*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 520*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 521*1e49577aSRod Evans add %l7, 64, %l7 522*1e49577aSRod Evans subcc %i3, 64, %i3 523*1e49577aSRod Evans bz,pn %ncc, 2f 524*1e49577aSRod Evans add %i0, 64, %i0 525*1e49577aSRod Evans ba,a,pt %ncc, seg1 526*1e49577aSRod Evans0: 527*1e49577aSRod Evans FALIGN_D18 528*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 529*1e49577aSRod Evans add %i0, 64, %i0 530*1e49577aSRod Evans membar #Sync 531*1e49577aSRod Evans FALIGN_D34 532*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 533*1e49577aSRod Evans ba,pt %ncc, blkd2 534*1e49577aSRod Evans add %i0, 64, %i0 535*1e49577aSRod Evans 536*1e49577aSRod Evans1: 537*1e49577aSRod Evans FALIGN_D34 538*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 539*1e49577aSRod Evans add %i0, 64, %i0 540*1e49577aSRod Evans membar #Sync 541*1e49577aSRod Evans FALIGN_D2 542*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 543*1e49577aSRod Evans ba,pt %ncc, blkd18 544*1e49577aSRod Evans add %i0, 64, %i0 545*1e49577aSRod Evans 546*1e49577aSRod Evans2: 547*1e49577aSRod Evans FALIGN_D2 548*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 549*1e49577aSRod Evans add %i0, 64, %i0 550*1e49577aSRod Evans membar #Sync 551*1e49577aSRod Evans FALIGN_D18 552*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 553*1e49577aSRod Evans ba,pt %ncc, blkd34 554*1e49577aSRod Evans add %i0, 64, %i0 555*1e49577aSRod Evans 556*1e49577aSRod Evans#define FALIGN_D4 \ 557*1e49577aSRod Evans faligndata %d4, %d6, %d48 ;\ 558*1e49577aSRod Evans faligndata %d6, %d8, %d50 ;\ 559*1e49577aSRod Evans faligndata %d8, %d10, %d52 ;\ 560*1e49577aSRod Evans faligndata %d10, %d12, %d54 ;\ 561*1e49577aSRod Evans faligndata %d12, %d14, %d56 ;\ 562*1e49577aSRod Evans faligndata %d14, %d16, %d58 ;\ 563*1e49577aSRod Evans faligndata %d16, %d18, %d60 ;\ 564*1e49577aSRod Evans faligndata %d18, %d20, %d62 565*1e49577aSRod Evans 566*1e49577aSRod Evans#define FALIGN_D20 \ 567*1e49577aSRod Evans faligndata %d20, %d22, %d48 ;\ 568*1e49577aSRod Evans faligndata %d22, %d24, %d50 ;\ 569*1e49577aSRod Evans faligndata %d24, %d26, %d52 ;\ 570*1e49577aSRod Evans faligndata %d26, %d28, %d54 ;\ 571*1e49577aSRod Evans faligndata %d28, %d30, %d56 ;\ 572*1e49577aSRod Evans faligndata %d30, %d32, %d58 ;\ 573*1e49577aSRod Evans faligndata %d32, %d34, %d60 ;\ 574*1e49577aSRod Evans faligndata %d34, %d36, %d62 575*1e49577aSRod Evans 576*1e49577aSRod Evans#define FALIGN_D36 \ 577*1e49577aSRod Evans faligndata %d36, %d38, %d48 ;\ 578*1e49577aSRod Evans faligndata %d38, %d40, %d50 ;\ 579*1e49577aSRod Evans faligndata %d40, %d42, %d52 ;\ 580*1e49577aSRod Evans faligndata %d42, %d44, %d54 ;\ 581*1e49577aSRod Evans faligndata %d44, %d46, %d56 ;\ 582*1e49577aSRod Evans faligndata %d46, %d0, %d58 ;\ 583*1e49577aSRod Evans faligndata %d0, %d2, %d60 ;\ 584*1e49577aSRod Evans faligndata %d2, %d4, %d62 585*1e49577aSRod Evans 586*1e49577aSRod Evansseg2: 587*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 588*1e49577aSRod Evans FALIGN_D4 589*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 590*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 591*1e49577aSRod Evans add %l7, 64, %l7 592*1e49577aSRod Evans subcc %i3, 64, %i3 593*1e49577aSRod Evans bz,pn %ncc, 0f 594*1e49577aSRod Evans add %i0, 64, %i0 595*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 596*1e49577aSRod Evans FALIGN_D20 597*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 598*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 599*1e49577aSRod Evans add %l7, 64, %l7 600*1e49577aSRod Evans subcc %i3, 64, %i3 601*1e49577aSRod Evans bz,pn %ncc, 1f 602*1e49577aSRod Evans add %i0, 64, %i0 603*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 604*1e49577aSRod Evans FALIGN_D36 605*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 606*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 607*1e49577aSRod Evans add %l7, 64, %l7 608*1e49577aSRod Evans subcc %i3, 64, %i3 609*1e49577aSRod Evans bz,pn %ncc, 2f 610*1e49577aSRod Evans add %i0, 64, %i0 611*1e49577aSRod Evans ba,a,pt %ncc, seg2 612*1e49577aSRod Evans 613*1e49577aSRod Evans0: 614*1e49577aSRod Evans FALIGN_D20 615*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 616*1e49577aSRod Evans add %i0, 64, %i0 617*1e49577aSRod Evans membar #Sync 618*1e49577aSRod Evans FALIGN_D36 619*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 620*1e49577aSRod Evans ba,pt %ncc, blkd4 621*1e49577aSRod Evans add %i0, 64, %i0 622*1e49577aSRod Evans 623*1e49577aSRod Evans1: 624*1e49577aSRod Evans FALIGN_D36 625*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 626*1e49577aSRod Evans add %i0, 64, %i0 627*1e49577aSRod Evans membar #Sync 628*1e49577aSRod Evans FALIGN_D4 629*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 630*1e49577aSRod Evans ba,pt %ncc, blkd20 631*1e49577aSRod Evans add %i0, 64, %i0 632*1e49577aSRod Evans 633*1e49577aSRod Evans2: 634*1e49577aSRod Evans FALIGN_D4 635*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 636*1e49577aSRod Evans add %i0, 64, %i0 637*1e49577aSRod Evans membar #Sync 638*1e49577aSRod Evans FALIGN_D20 639*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 640*1e49577aSRod Evans ba,pt %ncc, blkd36 641*1e49577aSRod Evans add %i0, 64, %i0 642*1e49577aSRod Evans 643*1e49577aSRod Evans 644*1e49577aSRod Evans#define FALIGN_D6 \ 645*1e49577aSRod Evans faligndata %d6, %d8, %d48 ;\ 646*1e49577aSRod Evans faligndata %d8, %d10, %d50 ;\ 647*1e49577aSRod Evans faligndata %d10, %d12, %d52 ;\ 648*1e49577aSRod Evans faligndata %d12, %d14, %d54 ;\ 649*1e49577aSRod Evans faligndata %d14, %d16, %d56 ;\ 650*1e49577aSRod Evans faligndata %d16, %d18, %d58 ;\ 651*1e49577aSRod Evans faligndata %d18, %d20, %d60 ;\ 652*1e49577aSRod Evans faligndata %d20, %d22, %d62 653*1e49577aSRod Evans 654*1e49577aSRod Evans#define FALIGN_D22 \ 655*1e49577aSRod Evans faligndata %d22, %d24, %d48 ;\ 656*1e49577aSRod Evans faligndata %d24, %d26, %d50 ;\ 657*1e49577aSRod Evans faligndata %d26, %d28, %d52 ;\ 658*1e49577aSRod Evans faligndata %d28, %d30, %d54 ;\ 659*1e49577aSRod Evans faligndata %d30, %d32, %d56 ;\ 660*1e49577aSRod Evans faligndata %d32, %d34, %d58 ;\ 661*1e49577aSRod Evans faligndata %d34, %d36, %d60 ;\ 662*1e49577aSRod Evans faligndata %d36, %d38, %d62 663*1e49577aSRod Evans 664*1e49577aSRod Evans#define FALIGN_D38 \ 665*1e49577aSRod Evans faligndata %d38, %d40, %d48 ;\ 666*1e49577aSRod Evans faligndata %d40, %d42, %d50 ;\ 667*1e49577aSRod Evans faligndata %d42, %d44, %d52 ;\ 668*1e49577aSRod Evans faligndata %d44, %d46, %d54 ;\ 669*1e49577aSRod Evans faligndata %d46, %d0, %d56 ;\ 670*1e49577aSRod Evans faligndata %d0, %d2, %d58 ;\ 671*1e49577aSRod Evans faligndata %d2, %d4, %d60 ;\ 672*1e49577aSRod Evans faligndata %d4, %d6, %d62 673*1e49577aSRod Evans 674*1e49577aSRod Evansseg3: 675*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 676*1e49577aSRod Evans FALIGN_D6 677*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 678*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 679*1e49577aSRod Evans add %l7, 64, %l7 680*1e49577aSRod Evans subcc %i3, 64, %i3 681*1e49577aSRod Evans bz,pn %ncc, 0f 682*1e49577aSRod Evans add %i0, 64, %i0 683*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 684*1e49577aSRod Evans FALIGN_D22 685*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 686*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 687*1e49577aSRod Evans add %l7, 64, %l7 688*1e49577aSRod Evans subcc %i3, 64, %i3 689*1e49577aSRod Evans bz,pn %ncc, 1f 690*1e49577aSRod Evans add %i0, 64, %i0 691*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 692*1e49577aSRod Evans FALIGN_D38 693*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 694*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 695*1e49577aSRod Evans add %l7, 64, %l7 696*1e49577aSRod Evans subcc %i3, 64, %i3 697*1e49577aSRod Evans bz,pn %ncc, 2f 698*1e49577aSRod Evans add %i0, 64, %i0 699*1e49577aSRod Evans ba,a,pt %ncc, seg3 700*1e49577aSRod Evans 701*1e49577aSRod Evans0: 702*1e49577aSRod Evans FALIGN_D22 703*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 704*1e49577aSRod Evans add %i0, 64, %i0 705*1e49577aSRod Evans membar #Sync 706*1e49577aSRod Evans FALIGN_D38 707*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 708*1e49577aSRod Evans ba,pt %ncc, blkd6 709*1e49577aSRod Evans add %i0, 64, %i0 710*1e49577aSRod Evans 711*1e49577aSRod Evans1: 712*1e49577aSRod Evans FALIGN_D38 713*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 714*1e49577aSRod Evans add %i0, 64, %i0 715*1e49577aSRod Evans membar #Sync 716*1e49577aSRod Evans FALIGN_D6 717*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 718*1e49577aSRod Evans ba,pt %ncc, blkd22 719*1e49577aSRod Evans add %i0, 64, %i0 720*1e49577aSRod Evans 721*1e49577aSRod Evans2: 722*1e49577aSRod Evans FALIGN_D6 723*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 724*1e49577aSRod Evans add %i0, 64, %i0 725*1e49577aSRod Evans membar #Sync 726*1e49577aSRod Evans FALIGN_D22 727*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 728*1e49577aSRod Evans ba,pt %ncc, blkd38 729*1e49577aSRod Evans add %i0, 64, %i0 730*1e49577aSRod Evans 731*1e49577aSRod Evans 732*1e49577aSRod Evans#define FALIGN_D8 \ 733*1e49577aSRod Evans faligndata %d8, %d10, %d48 ;\ 734*1e49577aSRod Evans faligndata %d10, %d12, %d50 ;\ 735*1e49577aSRod Evans faligndata %d12, %d14, %d52 ;\ 736*1e49577aSRod Evans faligndata %d14, %d16, %d54 ;\ 737*1e49577aSRod Evans faligndata %d16, %d18, %d56 ;\ 738*1e49577aSRod Evans faligndata %d18, %d20, %d58 ;\ 739*1e49577aSRod Evans faligndata %d20, %d22, %d60 ;\ 740*1e49577aSRod Evans faligndata %d22, %d24, %d62 741*1e49577aSRod Evans 742*1e49577aSRod Evans#define FALIGN_D24 \ 743*1e49577aSRod Evans faligndata %d24, %d26, %d48 ;\ 744*1e49577aSRod Evans faligndata %d26, %d28, %d50 ;\ 745*1e49577aSRod Evans faligndata %d28, %d30, %d52 ;\ 746*1e49577aSRod Evans faligndata %d30, %d32, %d54 ;\ 747*1e49577aSRod Evans faligndata %d32, %d34, %d56 ;\ 748*1e49577aSRod Evans faligndata %d34, %d36, %d58 ;\ 749*1e49577aSRod Evans faligndata %d36, %d38, %d60 ;\ 750*1e49577aSRod Evans faligndata %d38, %d40, %d62 751*1e49577aSRod Evans 752*1e49577aSRod Evans#define FALIGN_D40 \ 753*1e49577aSRod Evans faligndata %d40, %d42, %d48 ;\ 754*1e49577aSRod Evans faligndata %d42, %d44, %d50 ;\ 755*1e49577aSRod Evans faligndata %d44, %d46, %d52 ;\ 756*1e49577aSRod Evans faligndata %d46, %d0, %d54 ;\ 757*1e49577aSRod Evans faligndata %d0, %d2, %d56 ;\ 758*1e49577aSRod Evans faligndata %d2, %d4, %d58 ;\ 759*1e49577aSRod Evans faligndata %d4, %d6, %d60 ;\ 760*1e49577aSRod Evans faligndata %d6, %d8, %d62 761*1e49577aSRod Evans 762*1e49577aSRod Evansseg4: 763*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 764*1e49577aSRod Evans FALIGN_D8 765*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 766*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 767*1e49577aSRod Evans add %l7, 64, %l7 768*1e49577aSRod Evans subcc %i3, 64, %i3 769*1e49577aSRod Evans bz,pn %ncc, 0f 770*1e49577aSRod Evans add %i0, 64, %i0 771*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 772*1e49577aSRod Evans FALIGN_D24 773*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 774*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 775*1e49577aSRod Evans add %l7, 64, %l7 776*1e49577aSRod Evans subcc %i3, 64, %i3 777*1e49577aSRod Evans bz,pn %ncc, 1f 778*1e49577aSRod Evans add %i0, 64, %i0 779*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 780*1e49577aSRod Evans FALIGN_D40 781*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 782*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 783*1e49577aSRod Evans add %l7, 64, %l7 784*1e49577aSRod Evans subcc %i3, 64, %i3 785*1e49577aSRod Evans bz,pn %ncc, 2f 786*1e49577aSRod Evans add %i0, 64, %i0 787*1e49577aSRod Evans ba,a,pt %ncc, seg4 788*1e49577aSRod Evans 789*1e49577aSRod Evans0: 790*1e49577aSRod Evans FALIGN_D24 791*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 792*1e49577aSRod Evans add %i0, 64, %i0 793*1e49577aSRod Evans membar #Sync 794*1e49577aSRod Evans FALIGN_D40 795*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 796*1e49577aSRod Evans ba,pt %ncc, blkd8 797*1e49577aSRod Evans add %i0, 64, %i0 798*1e49577aSRod Evans 799*1e49577aSRod Evans1: 800*1e49577aSRod Evans FALIGN_D40 801*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 802*1e49577aSRod Evans add %i0, 64, %i0 803*1e49577aSRod Evans membar #Sync 804*1e49577aSRod Evans FALIGN_D8 805*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 806*1e49577aSRod Evans ba,pt %ncc, blkd24 807*1e49577aSRod Evans add %i0, 64, %i0 808*1e49577aSRod Evans 809*1e49577aSRod Evans2: 810*1e49577aSRod Evans FALIGN_D8 811*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 812*1e49577aSRod Evans add %i0, 64, %i0 813*1e49577aSRod Evans membar #Sync 814*1e49577aSRod Evans FALIGN_D24 815*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 816*1e49577aSRod Evans ba,pt %ncc, blkd40 817*1e49577aSRod Evans add %i0, 64, %i0 818*1e49577aSRod Evans 819*1e49577aSRod Evans 820*1e49577aSRod Evans#define FALIGN_D10 \ 821*1e49577aSRod Evans faligndata %d10, %d12, %d48 ;\ 822*1e49577aSRod Evans faligndata %d12, %d14, %d50 ;\ 823*1e49577aSRod Evans faligndata %d14, %d16, %d52 ;\ 824*1e49577aSRod Evans faligndata %d16, %d18, %d54 ;\ 825*1e49577aSRod Evans faligndata %d18, %d20, %d56 ;\ 826*1e49577aSRod Evans faligndata %d20, %d22, %d58 ;\ 827*1e49577aSRod Evans faligndata %d22, %d24, %d60 ;\ 828*1e49577aSRod Evans faligndata %d24, %d26, %d62 829*1e49577aSRod Evans 830*1e49577aSRod Evans#define FALIGN_D26 \ 831*1e49577aSRod Evans faligndata %d26, %d28, %d48 ;\ 832*1e49577aSRod Evans faligndata %d28, %d30, %d50 ;\ 833*1e49577aSRod Evans faligndata %d30, %d32, %d52 ;\ 834*1e49577aSRod Evans faligndata %d32, %d34, %d54 ;\ 835*1e49577aSRod Evans faligndata %d34, %d36, %d56 ;\ 836*1e49577aSRod Evans faligndata %d36, %d38, %d58 ;\ 837*1e49577aSRod Evans faligndata %d38, %d40, %d60 ;\ 838*1e49577aSRod Evans faligndata %d40, %d42, %d62 839*1e49577aSRod Evans 840*1e49577aSRod Evans#define FALIGN_D42 \ 841*1e49577aSRod Evans faligndata %d42, %d44, %d48 ;\ 842*1e49577aSRod Evans faligndata %d44, %d46, %d50 ;\ 843*1e49577aSRod Evans faligndata %d46, %d0, %d52 ;\ 844*1e49577aSRod Evans faligndata %d0, %d2, %d54 ;\ 845*1e49577aSRod Evans faligndata %d2, %d4, %d56 ;\ 846*1e49577aSRod Evans faligndata %d4, %d6, %d58 ;\ 847*1e49577aSRod Evans faligndata %d6, %d8, %d60 ;\ 848*1e49577aSRod Evans faligndata %d8, %d10, %d62 849*1e49577aSRod Evans 850*1e49577aSRod Evansseg5: 851*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 852*1e49577aSRod Evans FALIGN_D10 853*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 854*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 855*1e49577aSRod Evans add %l7, 64, %l7 856*1e49577aSRod Evans subcc %i3, 64, %i3 857*1e49577aSRod Evans bz,pn %ncc, 0f 858*1e49577aSRod Evans add %i0, 64, %i0 859*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 860*1e49577aSRod Evans FALIGN_D26 861*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 862*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 863*1e49577aSRod Evans add %l7, 64, %l7 864*1e49577aSRod Evans subcc %i3, 64, %i3 865*1e49577aSRod Evans bz,pn %ncc, 1f 866*1e49577aSRod Evans add %i0, 64, %i0 867*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 868*1e49577aSRod Evans FALIGN_D42 869*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 870*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 871*1e49577aSRod Evans add %l7, 64, %l7 872*1e49577aSRod Evans subcc %i3, 64, %i3 873*1e49577aSRod Evans bz,pn %ncc, 2f 874*1e49577aSRod Evans add %i0, 64, %i0 875*1e49577aSRod Evans ba,a,pt %ncc, seg5 876*1e49577aSRod Evans 877*1e49577aSRod Evans0: 878*1e49577aSRod Evans FALIGN_D26 879*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 880*1e49577aSRod Evans add %i0, 64, %i0 881*1e49577aSRod Evans membar #Sync 882*1e49577aSRod Evans FALIGN_D42 883*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 884*1e49577aSRod Evans ba,pt %ncc, blkd10 885*1e49577aSRod Evans add %i0, 64, %i0 886*1e49577aSRod Evans 887*1e49577aSRod Evans1: 888*1e49577aSRod Evans FALIGN_D42 889*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 890*1e49577aSRod Evans add %i0, 64, %i0 891*1e49577aSRod Evans membar #Sync 892*1e49577aSRod Evans FALIGN_D10 893*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 894*1e49577aSRod Evans ba,pt %ncc, blkd26 895*1e49577aSRod Evans add %i0, 64, %i0 896*1e49577aSRod Evans 897*1e49577aSRod Evans2: 898*1e49577aSRod Evans FALIGN_D10 899*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 900*1e49577aSRod Evans add %i0, 64, %i0 901*1e49577aSRod Evans membar #Sync 902*1e49577aSRod Evans FALIGN_D26 903*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 904*1e49577aSRod Evans ba,pt %ncc, blkd42 905*1e49577aSRod Evans add %i0, 64, %i0 906*1e49577aSRod Evans 907*1e49577aSRod Evans 908*1e49577aSRod Evans#define FALIGN_D12 \ 909*1e49577aSRod Evans faligndata %d12, %d14, %d48 ;\ 910*1e49577aSRod Evans faligndata %d14, %d16, %d50 ;\ 911*1e49577aSRod Evans faligndata %d16, %d18, %d52 ;\ 912*1e49577aSRod Evans faligndata %d18, %d20, %d54 ;\ 913*1e49577aSRod Evans faligndata %d20, %d22, %d56 ;\ 914*1e49577aSRod Evans faligndata %d22, %d24, %d58 ;\ 915*1e49577aSRod Evans faligndata %d24, %d26, %d60 ;\ 916*1e49577aSRod Evans faligndata %d26, %d28, %d62 917*1e49577aSRod Evans 918*1e49577aSRod Evans#define FALIGN_D28 \ 919*1e49577aSRod Evans faligndata %d28, %d30, %d48 ;\ 920*1e49577aSRod Evans faligndata %d30, %d32, %d50 ;\ 921*1e49577aSRod Evans faligndata %d32, %d34, %d52 ;\ 922*1e49577aSRod Evans faligndata %d34, %d36, %d54 ;\ 923*1e49577aSRod Evans faligndata %d36, %d38, %d56 ;\ 924*1e49577aSRod Evans faligndata %d38, %d40, %d58 ;\ 925*1e49577aSRod Evans faligndata %d40, %d42, %d60 ;\ 926*1e49577aSRod Evans faligndata %d42, %d44, %d62 927*1e49577aSRod Evans 928*1e49577aSRod Evans#define FALIGN_D44 \ 929*1e49577aSRod Evans faligndata %d44, %d46, %d48 ;\ 930*1e49577aSRod Evans faligndata %d46, %d0, %d50 ;\ 931*1e49577aSRod Evans faligndata %d0, %d2, %d52 ;\ 932*1e49577aSRod Evans faligndata %d2, %d4, %d54 ;\ 933*1e49577aSRod Evans faligndata %d4, %d6, %d56 ;\ 934*1e49577aSRod Evans faligndata %d6, %d8, %d58 ;\ 935*1e49577aSRod Evans faligndata %d8, %d10, %d60 ;\ 936*1e49577aSRod Evans faligndata %d10, %d12, %d62 937*1e49577aSRod Evans 938*1e49577aSRod Evansseg6: 939*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 940*1e49577aSRod Evans FALIGN_D12 941*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 942*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 943*1e49577aSRod Evans add %l7, 64, %l7 944*1e49577aSRod Evans subcc %i3, 64, %i3 945*1e49577aSRod Evans bz,pn %ncc, 0f 946*1e49577aSRod Evans add %i0, 64, %i0 947*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 948*1e49577aSRod Evans FALIGN_D28 949*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 950*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 951*1e49577aSRod Evans add %l7, 64, %l7 952*1e49577aSRod Evans subcc %i3, 64, %i3 953*1e49577aSRod Evans bz,pn %ncc, 1f 954*1e49577aSRod Evans add %i0, 64, %i0 955*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 956*1e49577aSRod Evans FALIGN_D44 957*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 958*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 959*1e49577aSRod Evans add %l7, 64, %l7 960*1e49577aSRod Evans subcc %i3, 64, %i3 961*1e49577aSRod Evans bz,pn %ncc, 2f 962*1e49577aSRod Evans add %i0, 64, %i0 963*1e49577aSRod Evans ba,a,pt %ncc, seg6 964*1e49577aSRod Evans 965*1e49577aSRod Evans0: 966*1e49577aSRod Evans FALIGN_D28 967*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 968*1e49577aSRod Evans add %i0, 64, %i0 969*1e49577aSRod Evans membar #Sync 970*1e49577aSRod Evans FALIGN_D44 971*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 972*1e49577aSRod Evans ba,pt %ncc, blkd12 973*1e49577aSRod Evans add %i0, 64, %i0 974*1e49577aSRod Evans 975*1e49577aSRod Evans1: 976*1e49577aSRod Evans FALIGN_D44 977*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 978*1e49577aSRod Evans add %i0, 64, %i0 979*1e49577aSRod Evans membar #Sync 980*1e49577aSRod Evans FALIGN_D12 981*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 982*1e49577aSRod Evans ba,pt %ncc, blkd28 983*1e49577aSRod Evans add %i0, 64, %i0 984*1e49577aSRod Evans 985*1e49577aSRod Evans2: 986*1e49577aSRod Evans FALIGN_D12 987*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 988*1e49577aSRod Evans add %i0, 64, %i0 989*1e49577aSRod Evans membar #Sync 990*1e49577aSRod Evans FALIGN_D28 991*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 992*1e49577aSRod Evans ba,pt %ncc, blkd44 993*1e49577aSRod Evans add %i0, 64, %i0 994*1e49577aSRod Evans 995*1e49577aSRod Evans 996*1e49577aSRod Evans#define FALIGN_D14 \ 997*1e49577aSRod Evans faligndata %d14, %d16, %d48 ;\ 998*1e49577aSRod Evans faligndata %d16, %d18, %d50 ;\ 999*1e49577aSRod Evans faligndata %d18, %d20, %d52 ;\ 1000*1e49577aSRod Evans faligndata %d20, %d22, %d54 ;\ 1001*1e49577aSRod Evans faligndata %d22, %d24, %d56 ;\ 1002*1e49577aSRod Evans faligndata %d24, %d26, %d58 ;\ 1003*1e49577aSRod Evans faligndata %d26, %d28, %d60 ;\ 1004*1e49577aSRod Evans faligndata %d28, %d30, %d62 1005*1e49577aSRod Evans 1006*1e49577aSRod Evans#define FALIGN_D30 \ 1007*1e49577aSRod Evans faligndata %d30, %d32, %d48 ;\ 1008*1e49577aSRod Evans faligndata %d32, %d34, %d50 ;\ 1009*1e49577aSRod Evans faligndata %d34, %d36, %d52 ;\ 1010*1e49577aSRod Evans faligndata %d36, %d38, %d54 ;\ 1011*1e49577aSRod Evans faligndata %d38, %d40, %d56 ;\ 1012*1e49577aSRod Evans faligndata %d40, %d42, %d58 ;\ 1013*1e49577aSRod Evans faligndata %d42, %d44, %d60 ;\ 1014*1e49577aSRod Evans faligndata %d44, %d46, %d62 1015*1e49577aSRod Evans 1016*1e49577aSRod Evans#define FALIGN_D46 \ 1017*1e49577aSRod Evans faligndata %d46, %d0, %d48 ;\ 1018*1e49577aSRod Evans faligndata %d0, %d2, %d50 ;\ 1019*1e49577aSRod Evans faligndata %d2, %d4, %d52 ;\ 1020*1e49577aSRod Evans faligndata %d4, %d6, %d54 ;\ 1021*1e49577aSRod Evans faligndata %d6, %d8, %d56 ;\ 1022*1e49577aSRod Evans faligndata %d8, %d10, %d58 ;\ 1023*1e49577aSRod Evans faligndata %d10, %d12, %d60 ;\ 1024*1e49577aSRod Evans faligndata %d12, %d14, %d62 1025*1e49577aSRod Evans 1026*1e49577aSRod Evansseg7: 1027*1e49577aSRod Evans ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1028*1e49577aSRod Evans FALIGN_D14 1029*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d0 1030*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1031*1e49577aSRod Evans add %l7, 64, %l7 1032*1e49577aSRod Evans subcc %i3, 64, %i3 1033*1e49577aSRod Evans bz,pn %ncc, 0f 1034*1e49577aSRod Evans add %i0, 64, %i0 1035*1e49577aSRod Evans ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1036*1e49577aSRod Evans FALIGN_D30 1037*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d16 1038*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1039*1e49577aSRod Evans add %l7, 64, %l7 1040*1e49577aSRod Evans subcc %i3, 64, %i3 1041*1e49577aSRod Evans bz,pn %ncc, 1f 1042*1e49577aSRod Evans add %i0, 64, %i0 1043*1e49577aSRod Evans ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1044*1e49577aSRod Evans FALIGN_D46 1045*1e49577aSRod Evans ldda [%l7]ASI_BLK_P, %d32 1046*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1047*1e49577aSRod Evans add %l7, 64, %l7 1048*1e49577aSRod Evans subcc %i3, 64, %i3 1049*1e49577aSRod Evans bz,pn %ncc, 2f 1050*1e49577aSRod Evans add %i0, 64, %i0 1051*1e49577aSRod Evans ba,a,pt %ncc, seg7 1052*1e49577aSRod Evans 1053*1e49577aSRod Evans0: 1054*1e49577aSRod Evans FALIGN_D30 1055*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1056*1e49577aSRod Evans add %i0, 64, %i0 1057*1e49577aSRod Evans membar #Sync 1058*1e49577aSRod Evans FALIGN_D46 1059*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1060*1e49577aSRod Evans ba,pt %ncc, blkd14 1061*1e49577aSRod Evans add %i0, 64, %i0 1062*1e49577aSRod Evans 1063*1e49577aSRod Evans1: 1064*1e49577aSRod Evans FALIGN_D46 1065*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1066*1e49577aSRod Evans add %i0, 64, %i0 1067*1e49577aSRod Evans membar #Sync 1068*1e49577aSRod Evans FALIGN_D14 1069*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1070*1e49577aSRod Evans ba,pt %ncc, blkd30 1071*1e49577aSRod Evans add %i0, 64, %i0 1072*1e49577aSRod Evans 1073*1e49577aSRod Evans2: 1074*1e49577aSRod Evans FALIGN_D14 1075*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1076*1e49577aSRod Evans add %i0, 64, %i0 1077*1e49577aSRod Evans membar #Sync 1078*1e49577aSRod Evans FALIGN_D30 1079*1e49577aSRod Evans stda %d48, [%i0]ASI_BLK_P 1080*1e49577aSRod Evans ba,pt %ncc, blkd46 1081*1e49577aSRod Evans add %i0, 64, %i0 1082*1e49577aSRod Evans 1083*1e49577aSRod Evans 1084*1e49577aSRod Evans ! 1085*1e49577aSRod Evans ! dribble out the last partial block 1086*1e49577aSRod Evans ! 1087*1e49577aSRod Evansblkd0: 1088*1e49577aSRod Evans subcc %i4, 8, %i4 1089*1e49577aSRod Evans blu,pn %ncc, blkdone 1090*1e49577aSRod Evans faligndata %d0, %d2, %d48 1091*1e49577aSRod Evans std %d48, [%i0] 1092*1e49577aSRod Evans add %i0, 8, %i0 1093*1e49577aSRod Evansblkd2: 1094*1e49577aSRod Evans subcc %i4, 8, %i4 1095*1e49577aSRod Evans blu,pn %ncc, blkdone 1096*1e49577aSRod Evans faligndata %d2, %d4, %d48 1097*1e49577aSRod Evans std %d48, [%i0] 1098*1e49577aSRod Evans add %i0, 8, %i0 1099*1e49577aSRod Evansblkd4: 1100*1e49577aSRod Evans subcc %i4, 8, %i4 1101*1e49577aSRod Evans blu,pn %ncc, blkdone 1102*1e49577aSRod Evans faligndata %d4, %d6, %d48 1103*1e49577aSRod Evans std %d48, [%i0] 1104*1e49577aSRod Evans add %i0, 8, %i0 1105*1e49577aSRod Evansblkd6: 1106*1e49577aSRod Evans subcc %i4, 8, %i4 1107*1e49577aSRod Evans blu,pn %ncc, blkdone 1108*1e49577aSRod Evans faligndata %d6, %d8, %d48 1109*1e49577aSRod Evans std %d48, [%i0] 1110*1e49577aSRod Evans add %i0, 8, %i0 1111*1e49577aSRod Evansblkd8: 1112*1e49577aSRod Evans subcc %i4, 8, %i4 1113*1e49577aSRod Evans blu,pn %ncc, blkdone 1114*1e49577aSRod Evans faligndata %d8, %d10, %d48 1115*1e49577aSRod Evans std %d48, [%i0] 1116*1e49577aSRod Evans add %i0, 8, %i0 1117*1e49577aSRod Evansblkd10: 1118*1e49577aSRod Evans subcc %i4, 8, %i4 1119*1e49577aSRod Evans blu,pn %ncc, blkdone 1120*1e49577aSRod Evans faligndata %d10, %d12, %d48 1121*1e49577aSRod Evans std %d48, [%i0] 1122*1e49577aSRod Evans add %i0, 8, %i0 1123*1e49577aSRod Evansblkd12: 1124*1e49577aSRod Evans subcc %i4, 8, %i4 1125*1e49577aSRod Evans blu,pn %ncc, blkdone 1126*1e49577aSRod Evans faligndata %d12, %d14, %d48 1127*1e49577aSRod Evans std %d48, [%i0] 1128*1e49577aSRod Evans add %i0, 8, %i0 1129*1e49577aSRod Evansblkd14: 1130*1e49577aSRod Evans subcc %i4, 8, %i4 1131*1e49577aSRod Evans blu,pn %ncc, blkdone 1132*1e49577aSRod Evans fsrc1 %d14, %d0 1133*1e49577aSRod Evans ba,a,pt %ncc, blkleft 1134*1e49577aSRod Evans 1135*1e49577aSRod Evansblkd16: 1136*1e49577aSRod Evans subcc %i4, 8, %i4 1137*1e49577aSRod Evans blu,pn %ncc, blkdone 1138*1e49577aSRod Evans faligndata %d16, %d18, %d48 1139*1e49577aSRod Evans std %d48, [%i0] 1140*1e49577aSRod Evans add %i0, 8, %i0 1141*1e49577aSRod Evansblkd18: 1142*1e49577aSRod Evans subcc %i4, 8, %i4 1143*1e49577aSRod Evans blu,pn %ncc, blkdone 1144*1e49577aSRod Evans faligndata %d18, %d20, %d48 1145*1e49577aSRod Evans std %d48, [%i0] 1146*1e49577aSRod Evans add %i0, 8, %i0 1147*1e49577aSRod Evansblkd20: 1148*1e49577aSRod Evans subcc %i4, 8, %i4 1149*1e49577aSRod Evans blu,pn %ncc, blkdone 1150*1e49577aSRod Evans faligndata %d20, %d22, %d48 1151*1e49577aSRod Evans std %d48, [%i0] 1152*1e49577aSRod Evans add %i0, 8, %i0 1153*1e49577aSRod Evansblkd22: 1154*1e49577aSRod Evans subcc %i4, 8, %i4 1155*1e49577aSRod Evans blu,pn %ncc, blkdone 1156*1e49577aSRod Evans faligndata %d22, %d24, %d48 1157*1e49577aSRod Evans std %d48, [%i0] 1158*1e49577aSRod Evans add %i0, 8, %i0 1159*1e49577aSRod Evansblkd24: 1160*1e49577aSRod Evans subcc %i4, 8, %i4 1161*1e49577aSRod Evans blu,pn %ncc, blkdone 1162*1e49577aSRod Evans faligndata %d24, %d26, %d48 1163*1e49577aSRod Evans std %d48, [%i0] 1164*1e49577aSRod Evans add %i0, 8, %i0 1165*1e49577aSRod Evansblkd26: 1166*1e49577aSRod Evans subcc %i4, 8, %i4 1167*1e49577aSRod Evans blu,pn %ncc, blkdone 1168*1e49577aSRod Evans faligndata %d26, %d28, %d48 1169*1e49577aSRod Evans std %d48, [%i0] 1170*1e49577aSRod Evans add %i0, 8, %i0 1171*1e49577aSRod Evansblkd28: 1172*1e49577aSRod Evans subcc %i4, 8, %i4 1173*1e49577aSRod Evans blu,pn %ncc, blkdone 1174*1e49577aSRod Evans faligndata %d28, %d30, %d48 1175*1e49577aSRod Evans std %d48, [%i0] 1176*1e49577aSRod Evans add %i0, 8, %i0 1177*1e49577aSRod Evansblkd30: 1178*1e49577aSRod Evans subcc %i4, 8, %i4 1179*1e49577aSRod Evans blu,pn %ncc, blkdone 1180*1e49577aSRod Evans fsrc1 %d30, %d0 1181*1e49577aSRod Evans ba,a,pt %ncc, blkleft 1182*1e49577aSRod Evansblkd32: 1183*1e49577aSRod Evans subcc %i4, 8, %i4 1184*1e49577aSRod Evans blu,pn %ncc, blkdone 1185*1e49577aSRod Evans faligndata %d32, %d34, %d48 1186*1e49577aSRod Evans std %d48, [%i0] 1187*1e49577aSRod Evans add %i0, 8, %i0 1188*1e49577aSRod Evansblkd34: 1189*1e49577aSRod Evans subcc %i4, 8, %i4 1190*1e49577aSRod Evans blu,pn %ncc, blkdone 1191*1e49577aSRod Evans faligndata %d34, %d36, %d48 1192*1e49577aSRod Evans std %d48, [%i0] 1193*1e49577aSRod Evans add %i0, 8, %i0 1194*1e49577aSRod Evansblkd36: 1195*1e49577aSRod Evans subcc %i4, 8, %i4 1196*1e49577aSRod Evans blu,pn %ncc, blkdone 1197*1e49577aSRod Evans faligndata %d36, %d38, %d48 1198*1e49577aSRod Evans std %d48, [%i0] 1199*1e49577aSRod Evans add %i0, 8, %i0 1200*1e49577aSRod Evansblkd38: 1201*1e49577aSRod Evans subcc %i4, 8, %i4 1202*1e49577aSRod Evans blu,pn %ncc, blkdone 1203*1e49577aSRod Evans faligndata %d38, %d40, %d48 1204*1e49577aSRod Evans std %d48, [%i0] 1205*1e49577aSRod Evans add %i0, 8, %i0 1206*1e49577aSRod Evansblkd40: 1207*1e49577aSRod Evans subcc %i4, 8, %i4 1208*1e49577aSRod Evans blu,pn %ncc, blkdone 1209*1e49577aSRod Evans faligndata %d40, %d42, %d48 1210*1e49577aSRod Evans std %d48, [%i0] 1211*1e49577aSRod Evans add %i0, 8, %i0 1212*1e49577aSRod Evansblkd42: 1213*1e49577aSRod Evans subcc %i4, 8, %i4 1214*1e49577aSRod Evans blu,pn %ncc, blkdone 1215*1e49577aSRod Evans faligndata %d42, %d44, %d48 1216*1e49577aSRod Evans std %d48, [%i0] 1217*1e49577aSRod Evans add %i0, 8, %i0 1218*1e49577aSRod Evansblkd44: 1219*1e49577aSRod Evans subcc %i4, 8, %i4 1220*1e49577aSRod Evans blu,pn %ncc, blkdone 1221*1e49577aSRod Evans faligndata %d44, %d46, %d48 1222*1e49577aSRod Evans std %d48, [%i0] 1223*1e49577aSRod Evans add %i0, 8, %i0 1224*1e49577aSRod Evansblkd46: 1225*1e49577aSRod Evans subcc %i4, 8, %i4 1226*1e49577aSRod Evans blu,pn %ncc, blkdone 1227*1e49577aSRod Evans fsrc1 %d46, %d0 1228*1e49577aSRod Evans 1229*1e49577aSRod Evansblkleft: 1230*1e49577aSRod Evans ldd [%l7], %d2 1231*1e49577aSRod Evans add %l7, 8, %l7 1232*1e49577aSRod Evans subcc %i4, 8, %i4 1233*1e49577aSRod Evans faligndata %d0, %d2, %d8 1234*1e49577aSRod Evans std %d8, [%i0] 1235*1e49577aSRod Evans blu,pn %ncc, blkdone 1236*1e49577aSRod Evans add %i0, 8, %i0 1237*1e49577aSRod Evans ldd [%l7], %d0 1238*1e49577aSRod Evans add %l7, 8, %l7 1239*1e49577aSRod Evans subcc %i4, 8, %i4 1240*1e49577aSRod Evans faligndata %d2, %d0, %d8 1241*1e49577aSRod Evans std %d8, [%i0] 1242*1e49577aSRod Evans bgeu,pt %ncc, blkleft 1243*1e49577aSRod Evans add %i0, 8, %i0 1244*1e49577aSRod Evans 1245*1e49577aSRod Evansblkdone: 1246*1e49577aSRod Evans tst %i2 1247*1e49577aSRod Evans bz,pt %ncc, blkexit 1248*1e49577aSRod Evans and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 1249*1e49577aSRod Evans 1250*1e49577aSRod Evans7: ldub [%i1], %i4 1251*1e49577aSRod Evans inc %i1 1252*1e49577aSRod Evans inc %i0 1253*1e49577aSRod Evans deccc %i2 1254*1e49577aSRod Evans bgu %ncc, 7b 1255*1e49577aSRod Evans stb %i4, [%i0 - 1] 1256*1e49577aSRod Evans 1257*1e49577aSRod Evansblkexit: 1258*1e49577aSRod Evans and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 1259*1e49577aSRod Evans wr %l3, %g0, %fprs ! fprs = l3 - restore fprs.fef 1260*1e49577aSRod Evans membar #StoreLoad|#StoreStore 1261*1e49577aSRod Evans ret 1262*1e49577aSRod Evans restore %i5, %g0, %o0 1263*1e49577aSRod Evans 1264*1e49577aSRod Evans SET_SIZE(memcpy) 1265*1e49577aSRod Evans SET_SIZE(__align_cpy_1) 1266