1478b8fecSSam Ravnborg/* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy. 2478b8fecSSam Ravnborg * 3478b8fecSSam Ravnborg * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) 4478b8fecSSam Ravnborg * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) 5478b8fecSSam Ravnborg */ 6478b8fecSSam Ravnborg 7478b8fecSSam Ravnborg#ifdef __KERNEL__ 8478b8fecSSam Ravnborg#include <asm/visasm.h> 9478b8fecSSam Ravnborg#include <asm/asi.h> 10478b8fecSSam Ravnborg#define GLOBAL_SPARE g7 11478b8fecSSam Ravnborg#else 12478b8fecSSam Ravnborg#define GLOBAL_SPARE g5 13478b8fecSSam Ravnborg#define ASI_BLK_P 0xf0 14478b8fecSSam Ravnborg#define FPRS_FEF 0x04 15478b8fecSSam Ravnborg#ifdef MEMCPY_DEBUG 16478b8fecSSam Ravnborg#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ 17478b8fecSSam Ravnborg clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; 18478b8fecSSam Ravnborg#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 19478b8fecSSam Ravnborg#else 20478b8fecSSam Ravnborg#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs 21478b8fecSSam Ravnborg#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 22478b8fecSSam Ravnborg#endif 23478b8fecSSam Ravnborg#endif 24478b8fecSSam Ravnborg 25478b8fecSSam Ravnborg#ifndef EX_LD 26478b8fecSSam Ravnborg#define EX_LD(x) x 27478b8fecSSam Ravnborg#endif 28478b8fecSSam Ravnborg 29478b8fecSSam Ravnborg#ifndef EX_ST 30478b8fecSSam Ravnborg#define EX_ST(x) x 31478b8fecSSam Ravnborg#endif 32478b8fecSSam Ravnborg 33478b8fecSSam Ravnborg#ifndef EX_RETVAL 34478b8fecSSam Ravnborg#define EX_RETVAL(x) x 35478b8fecSSam Ravnborg#endif 36478b8fecSSam Ravnborg 37478b8fecSSam Ravnborg#ifndef LOAD 38478b8fecSSam Ravnborg#define LOAD(type,addr,dest) type [addr], dest 39478b8fecSSam Ravnborg#endif 40478b8fecSSam Ravnborg 41478b8fecSSam Ravnborg#ifndef LOAD_BLK 42478b8fecSSam Ravnborg#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest 43478b8fecSSam Ravnborg#endif 44478b8fecSSam Ravnborg 45478b8fecSSam Ravnborg#ifndef STORE 46478b8fecSSam Ravnborg#define STORE(type,src,addr) type src, [addr] 47478b8fecSSam Ravnborg#endif 48478b8fecSSam Ravnborg 49478b8fecSSam Ravnborg#ifndef STORE_BLK 50478b8fecSSam Ravnborg#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P 51478b8fecSSam Ravnborg#endif 52478b8fecSSam Ravnborg 53478b8fecSSam Ravnborg#ifndef FUNC_NAME 54478b8fecSSam Ravnborg#define FUNC_NAME memcpy 55478b8fecSSam Ravnborg#endif 56478b8fecSSam Ravnborg 57478b8fecSSam Ravnborg#ifndef PREAMBLE 58478b8fecSSam Ravnborg#define PREAMBLE 59478b8fecSSam Ravnborg#endif 60478b8fecSSam Ravnborg 61478b8fecSSam Ravnborg#ifndef XCC 62478b8fecSSam Ravnborg#define XCC xcc 63478b8fecSSam Ravnborg#endif 64478b8fecSSam Ravnborg 65478b8fecSSam Ravnborg#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ 66478b8fecSSam Ravnborg faligndata %f1, %f2, %f48; \ 67478b8fecSSam Ravnborg faligndata %f2, %f3, %f50; \ 68478b8fecSSam Ravnborg faligndata %f3, %f4, %f52; \ 69478b8fecSSam Ravnborg faligndata %f4, %f5, %f54; \ 70478b8fecSSam Ravnborg faligndata %f5, %f6, %f56; \ 71478b8fecSSam Ravnborg faligndata %f6, %f7, %f58; \ 72478b8fecSSam Ravnborg faligndata %f7, %f8, %f60; \ 73478b8fecSSam Ravnborg faligndata %f8, %f9, %f62; 74478b8fecSSam Ravnborg 75478b8fecSSam Ravnborg#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ 76478b8fecSSam Ravnborg EX_LD(LOAD_BLK(%src, %fdest)); \ 77478b8fecSSam Ravnborg EX_ST(STORE_BLK(%fsrc, %dest)); \ 78478b8fecSSam Ravnborg add %src, 0x40, %src; \ 79478b8fecSSam Ravnborg subcc %len, 0x40, %len; \ 80478b8fecSSam Ravnborg be,pn %xcc, jmptgt; \ 81478b8fecSSam Ravnborg add %dest, 0x40, %dest; \ 82478b8fecSSam Ravnborg 83478b8fecSSam Ravnborg#define LOOP_CHUNK1(src, dest, len, branch_dest) \ 84478b8fecSSam Ravnborg MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) 85478b8fecSSam Ravnborg#define LOOP_CHUNK2(src, dest, len, branch_dest) \ 86478b8fecSSam Ravnborg MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) 87478b8fecSSam Ravnborg#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 88478b8fecSSam Ravnborg MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 89478b8fecSSam Ravnborg 90478b8fecSSam Ravnborg#define DO_SYNC membar #Sync; 91478b8fecSSam Ravnborg#define STORE_SYNC(dest, fsrc) \ 92478b8fecSSam Ravnborg EX_ST(STORE_BLK(%fsrc, %dest)); \ 93478b8fecSSam Ravnborg add %dest, 0x40, %dest; \ 94478b8fecSSam Ravnborg DO_SYNC 95478b8fecSSam Ravnborg 96478b8fecSSam Ravnborg#define STORE_JUMP(dest, fsrc, target) \ 97478b8fecSSam Ravnborg EX_ST(STORE_BLK(%fsrc, %dest)); \ 98478b8fecSSam Ravnborg add %dest, 0x40, %dest; \ 99478b8fecSSam Ravnborg ba,pt %xcc, target; \ 100478b8fecSSam Ravnborg nop; 101478b8fecSSam Ravnborg 102478b8fecSSam Ravnborg#define FINISH_VISCHUNK(dest, f0, f1, left) \ 103478b8fecSSam Ravnborg subcc %left, 8, %left;\ 104478b8fecSSam Ravnborg bl,pn %xcc, 95f; \ 105478b8fecSSam Ravnborg faligndata %f0, %f1, %f48; \ 106478b8fecSSam Ravnborg EX_ST(STORE(std, %f48, %dest)); \ 107478b8fecSSam Ravnborg add %dest, 8, %dest; 108478b8fecSSam Ravnborg 109478b8fecSSam Ravnborg#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 110478b8fecSSam Ravnborg subcc %left, 8, %left; \ 111478b8fecSSam Ravnborg bl,pn %xcc, 95f; \ 112*6f1d827fSDavid S. Miller fsrc2 %f0, %f1; 113478b8fecSSam Ravnborg 114478b8fecSSam Ravnborg#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ 115478b8fecSSam Ravnborg UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 116478b8fecSSam Ravnborg ba,a,pt %xcc, 93f; 117478b8fecSSam Ravnborg 118478b8fecSSam Ravnborg .register %g2,#scratch 119478b8fecSSam Ravnborg .register %g3,#scratch 120478b8fecSSam Ravnborg 121478b8fecSSam Ravnborg .text 122478b8fecSSam Ravnborg .align 64 123478b8fecSSam Ravnborg 124478b8fecSSam Ravnborg .globl FUNC_NAME 125478b8fecSSam Ravnborg .type FUNC_NAME,#function 126478b8fecSSam RavnborgFUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 127478b8fecSSam Ravnborg srlx %o2, 31, %g2 128478b8fecSSam Ravnborg cmp %g2, 0 129478b8fecSSam Ravnborg tne %xcc, 5 130478b8fecSSam Ravnborg PREAMBLE 131478b8fecSSam Ravnborg mov %o0, %o4 132478b8fecSSam Ravnborg cmp %o2, 0 133478b8fecSSam Ravnborg be,pn %XCC, 85f 134478b8fecSSam Ravnborg or %o0, %o1, %o3 135478b8fecSSam Ravnborg cmp %o2, 16 136478b8fecSSam Ravnborg blu,a,pn %XCC, 80f 137478b8fecSSam Ravnborg or %o3, %o2, %o3 138478b8fecSSam Ravnborg 139478b8fecSSam Ravnborg cmp %o2, (5 * 64) 140478b8fecSSam Ravnborg blu,pt %XCC, 70f 141478b8fecSSam Ravnborg andcc %o3, 0x7, %g0 142478b8fecSSam Ravnborg 143478b8fecSSam Ravnborg /* Clobbers o5/g1/g2/g3/g7/icc/xcc. */ 144478b8fecSSam Ravnborg VISEntry 145478b8fecSSam Ravnborg 146478b8fecSSam Ravnborg /* Is 'dst' already aligned on an 64-byte boundary? */ 147478b8fecSSam Ravnborg andcc %o0, 0x3f, %g2 148478b8fecSSam Ravnborg be,pt %XCC, 2f 149478b8fecSSam Ravnborg 150478b8fecSSam Ravnborg /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number 151478b8fecSSam Ravnborg * of bytes to copy to make 'dst' 64-byte aligned. We pre- 152478b8fecSSam Ravnborg * subtract this from 'len'. 153478b8fecSSam Ravnborg */ 154478b8fecSSam Ravnborg sub %o0, %o1, %GLOBAL_SPARE 155478b8fecSSam Ravnborg sub %g2, 0x40, %g2 156478b8fecSSam Ravnborg sub %g0, %g2, %g2 157478b8fecSSam Ravnborg sub %o2, %g2, %o2 158478b8fecSSam Ravnborg andcc %g2, 0x7, %g1 159478b8fecSSam Ravnborg be,pt %icc, 2f 160478b8fecSSam Ravnborg and %g2, 0x38, %g2 161478b8fecSSam Ravnborg 162478b8fecSSam Ravnborg1: subcc %g1, 0x1, %g1 163478b8fecSSam Ravnborg EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) 164478b8fecSSam Ravnborg EX_ST(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) 165478b8fecSSam Ravnborg bgu,pt %XCC, 1b 166478b8fecSSam Ravnborg add %o1, 0x1, %o1 167478b8fecSSam Ravnborg 168478b8fecSSam Ravnborg add %o1, %GLOBAL_SPARE, %o0 169478b8fecSSam Ravnborg 170478b8fecSSam Ravnborg2: cmp %g2, 0x0 171478b8fecSSam Ravnborg and %o1, 0x7, %g1 172478b8fecSSam Ravnborg be,pt %icc, 3f 173478b8fecSSam Ravnborg alignaddr %o1, %g0, %o1 174478b8fecSSam Ravnborg 175478b8fecSSam Ravnborg EX_LD(LOAD(ldd, %o1, %f4)) 176478b8fecSSam Ravnborg1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) 177478b8fecSSam Ravnborg add %o1, 0x8, %o1 178478b8fecSSam Ravnborg subcc %g2, 0x8, %g2 179478b8fecSSam Ravnborg faligndata %f4, %f6, %f0 180478b8fecSSam Ravnborg EX_ST(STORE(std, %f0, %o0)) 181478b8fecSSam Ravnborg be,pn %icc, 3f 182478b8fecSSam Ravnborg add %o0, 0x8, %o0 183478b8fecSSam Ravnborg 184478b8fecSSam Ravnborg EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) 185478b8fecSSam Ravnborg add %o1, 0x8, %o1 186478b8fecSSam Ravnborg subcc %g2, 0x8, %g2 187478b8fecSSam Ravnborg faligndata %f6, %f4, %f0 188478b8fecSSam Ravnborg EX_ST(STORE(std, %f0, %o0)) 189478b8fecSSam Ravnborg bne,pt %icc, 1b 190478b8fecSSam Ravnborg add %o0, 0x8, %o0 191478b8fecSSam Ravnborg 192478b8fecSSam Ravnborg /* Destination is 64-byte aligned. */ 193478b8fecSSam Ravnborg3: 194478b8fecSSam Ravnborg membar #LoadStore | #StoreStore | #StoreLoad 195478b8fecSSam Ravnborg 196478b8fecSSam Ravnborg subcc %o2, 0x40, %GLOBAL_SPARE 197478b8fecSSam Ravnborg add %o1, %g1, %g1 198478b8fecSSam Ravnborg andncc %GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE 199478b8fecSSam Ravnborg srl %g1, 3, %g2 200478b8fecSSam Ravnborg sub %o2, %GLOBAL_SPARE, %g3 201478b8fecSSam Ravnborg andn %o1, (0x40 - 1), %o1 202478b8fecSSam Ravnborg and %g2, 7, %g2 203478b8fecSSam Ravnborg andncc %g3, 0x7, %g3 204*6f1d827fSDavid S. Miller fsrc2 %f0, %f2 205478b8fecSSam Ravnborg sub %g3, 0x8, %g3 206478b8fecSSam Ravnborg sub %o2, %GLOBAL_SPARE, %o2 207478b8fecSSam Ravnborg 208478b8fecSSam Ravnborg add %g1, %GLOBAL_SPARE, %g1 209478b8fecSSam Ravnborg subcc %o2, %g3, %o2 210478b8fecSSam Ravnborg 211478b8fecSSam Ravnborg EX_LD(LOAD_BLK(%o1, %f0)) 212478b8fecSSam Ravnborg add %o1, 0x40, %o1 213478b8fecSSam Ravnborg add %g1, %g3, %g1 214478b8fecSSam Ravnborg EX_LD(LOAD_BLK(%o1, %f16)) 215478b8fecSSam Ravnborg add %o1, 0x40, %o1 216478b8fecSSam Ravnborg sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE 217478b8fecSSam Ravnborg EX_LD(LOAD_BLK(%o1, %f32)) 218478b8fecSSam Ravnborg add %o1, 0x40, %o1 219478b8fecSSam Ravnborg 220478b8fecSSam Ravnborg /* There are 8 instances of the unrolled loop, 221478b8fecSSam Ravnborg * one for each possible alignment of the 222478b8fecSSam Ravnborg * source buffer. Each loop instance is 452 223478b8fecSSam Ravnborg * bytes. 224478b8fecSSam Ravnborg */ 225478b8fecSSam Ravnborg sll %g2, 3, %o3 226478b8fecSSam Ravnborg sub %o3, %g2, %o3 227478b8fecSSam Ravnborg sllx %o3, 4, %o3 228478b8fecSSam Ravnborg add %o3, %g2, %o3 229478b8fecSSam Ravnborg sllx %o3, 2, %g2 230478b8fecSSam Ravnborg1: rd %pc, %o3 231478b8fecSSam Ravnborg add %o3, %lo(1f - 1b), %o3 232478b8fecSSam Ravnborg jmpl %o3 + %g2, %g0 233478b8fecSSam Ravnborg nop 234478b8fecSSam Ravnborg 235478b8fecSSam Ravnborg .align 64 236478b8fecSSam Ravnborg1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 237478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 238478b8fecSSam Ravnborg FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 239478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 240478b8fecSSam Ravnborg FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 241478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 242478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 243478b8fecSSam Ravnborg faligndata %f0, %f2, %f48 244478b8fecSSam Ravnborg1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 245478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 246478b8fecSSam Ravnborg FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 247478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 40f) 248478b8fecSSam Ravnborg2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 249478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 250478b8fecSSam Ravnborg FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 251478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 48f) 252478b8fecSSam Ravnborg3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 253478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 254478b8fecSSam Ravnborg FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 255478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 56f) 256478b8fecSSam Ravnborg 257478b8fecSSam Ravnborg1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 258478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 259478b8fecSSam Ravnborg FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 260478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 261478b8fecSSam Ravnborg FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 262478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 263478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 264478b8fecSSam Ravnborg faligndata %f2, %f4, %f48 265478b8fecSSam Ravnborg1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 266478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 267478b8fecSSam Ravnborg FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 268478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 41f) 269478b8fecSSam Ravnborg2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 270478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 271478b8fecSSam Ravnborg FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 272478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 49f) 273478b8fecSSam Ravnborg3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 274478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 275478b8fecSSam Ravnborg FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 276478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 57f) 277478b8fecSSam Ravnborg 278478b8fecSSam Ravnborg1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 279478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 280478b8fecSSam Ravnborg FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 281478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 282478b8fecSSam Ravnborg FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 283478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 284478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 285478b8fecSSam Ravnborg faligndata %f4, %f6, %f48 286478b8fecSSam Ravnborg1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 287478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 288478b8fecSSam Ravnborg FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 289478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 42f) 290478b8fecSSam Ravnborg2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 291478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 292478b8fecSSam Ravnborg FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 293478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 50f) 294478b8fecSSam Ravnborg3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 295478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 296478b8fecSSam Ravnborg FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 297478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 58f) 298478b8fecSSam Ravnborg 299478b8fecSSam Ravnborg1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 300478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 301478b8fecSSam Ravnborg FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 302478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 303478b8fecSSam Ravnborg FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 304478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 305478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 306478b8fecSSam Ravnborg faligndata %f6, %f8, %f48 307478b8fecSSam Ravnborg1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 308478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 309478b8fecSSam Ravnborg FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 310478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 43f) 311478b8fecSSam Ravnborg2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 312478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 313478b8fecSSam Ravnborg FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 314478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 51f) 315478b8fecSSam Ravnborg3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 316478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 317478b8fecSSam Ravnborg FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 318478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 59f) 319478b8fecSSam Ravnborg 320478b8fecSSam Ravnborg1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 321478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 322478b8fecSSam Ravnborg FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 323478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 324478b8fecSSam Ravnborg FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 325478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 326478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 327478b8fecSSam Ravnborg faligndata %f8, %f10, %f48 328478b8fecSSam Ravnborg1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 329478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 330478b8fecSSam Ravnborg FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 331478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 44f) 332478b8fecSSam Ravnborg2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 333478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 334478b8fecSSam Ravnborg FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 335478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 52f) 336478b8fecSSam Ravnborg3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 337478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 338478b8fecSSam Ravnborg FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 339478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 60f) 340478b8fecSSam Ravnborg 341478b8fecSSam Ravnborg1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 342478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 343478b8fecSSam Ravnborg FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 344478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 345478b8fecSSam Ravnborg FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 346478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 347478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 348478b8fecSSam Ravnborg faligndata %f10, %f12, %f48 349478b8fecSSam Ravnborg1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 350478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 351478b8fecSSam Ravnborg FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 352478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 45f) 353478b8fecSSam Ravnborg2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 354478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 355478b8fecSSam Ravnborg FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 356478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 53f) 357478b8fecSSam Ravnborg3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 358478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 359478b8fecSSam Ravnborg FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 360478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 61f) 361478b8fecSSam Ravnborg 362478b8fecSSam Ravnborg1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 363478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 364478b8fecSSam Ravnborg FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 365478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 366478b8fecSSam Ravnborg FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 367478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 368478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 369478b8fecSSam Ravnborg faligndata %f12, %f14, %f48 370478b8fecSSam Ravnborg1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 371478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 372478b8fecSSam Ravnborg FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 373478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 46f) 374478b8fecSSam Ravnborg2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 375478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 376478b8fecSSam Ravnborg FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 377478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 54f) 378478b8fecSSam Ravnborg3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 379478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 380478b8fecSSam Ravnborg FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 381478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 62f) 382478b8fecSSam Ravnborg 383478b8fecSSam Ravnborg1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 384478b8fecSSam Ravnborg LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 385478b8fecSSam Ravnborg FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 386478b8fecSSam Ravnborg LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 387478b8fecSSam Ravnborg FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 388478b8fecSSam Ravnborg LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 389478b8fecSSam Ravnborg ba,pt %xcc, 1b+4 390478b8fecSSam Ravnborg faligndata %f14, %f16, %f48 391478b8fecSSam Ravnborg1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 392478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 393478b8fecSSam Ravnborg FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 394478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 47f) 395478b8fecSSam Ravnborg2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 396478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 397478b8fecSSam Ravnborg FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 398478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 55f) 399478b8fecSSam Ravnborg3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 400478b8fecSSam Ravnborg STORE_SYNC(o0, f48) 401478b8fecSSam Ravnborg FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 402478b8fecSSam Ravnborg STORE_JUMP(o0, f48, 63f) 403478b8fecSSam Ravnborg 404478b8fecSSam Ravnborg40: FINISH_VISCHUNK(o0, f0, f2, g3) 405478b8fecSSam Ravnborg41: FINISH_VISCHUNK(o0, f2, f4, g3) 406478b8fecSSam Ravnborg42: FINISH_VISCHUNK(o0, f4, f6, g3) 407478b8fecSSam Ravnborg43: FINISH_VISCHUNK(o0, f6, f8, g3) 408478b8fecSSam Ravnborg44: FINISH_VISCHUNK(o0, f8, f10, g3) 409478b8fecSSam Ravnborg45: FINISH_VISCHUNK(o0, f10, f12, g3) 410478b8fecSSam Ravnborg46: FINISH_VISCHUNK(o0, f12, f14, g3) 411478b8fecSSam Ravnborg47: UNEVEN_VISCHUNK(o0, f14, f0, g3) 412478b8fecSSam Ravnborg48: FINISH_VISCHUNK(o0, f16, f18, g3) 413478b8fecSSam Ravnborg49: FINISH_VISCHUNK(o0, f18, f20, g3) 414478b8fecSSam Ravnborg50: FINISH_VISCHUNK(o0, f20, f22, g3) 415478b8fecSSam Ravnborg51: FINISH_VISCHUNK(o0, f22, f24, g3) 416478b8fecSSam Ravnborg52: FINISH_VISCHUNK(o0, f24, f26, g3) 417478b8fecSSam Ravnborg53: FINISH_VISCHUNK(o0, f26, f28, g3) 418478b8fecSSam Ravnborg54: FINISH_VISCHUNK(o0, f28, f30, g3) 419478b8fecSSam Ravnborg55: UNEVEN_VISCHUNK(o0, f30, f0, g3) 420478b8fecSSam Ravnborg56: FINISH_VISCHUNK(o0, f32, f34, g3) 421478b8fecSSam Ravnborg57: FINISH_VISCHUNK(o0, f34, f36, g3) 422478b8fecSSam Ravnborg58: FINISH_VISCHUNK(o0, f36, f38, g3) 423478b8fecSSam Ravnborg59: FINISH_VISCHUNK(o0, f38, f40, g3) 424478b8fecSSam Ravnborg60: FINISH_VISCHUNK(o0, f40, f42, g3) 425478b8fecSSam Ravnborg61: FINISH_VISCHUNK(o0, f42, f44, g3) 426478b8fecSSam Ravnborg62: FINISH_VISCHUNK(o0, f44, f46, g3) 427478b8fecSSam Ravnborg63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) 428478b8fecSSam Ravnborg 429478b8fecSSam Ravnborg93: EX_LD(LOAD(ldd, %o1, %f2)) 430478b8fecSSam Ravnborg add %o1, 8, %o1 431478b8fecSSam Ravnborg subcc %g3, 8, %g3 432478b8fecSSam Ravnborg faligndata %f0, %f2, %f8 433478b8fecSSam Ravnborg EX_ST(STORE(std, %f8, %o0)) 434478b8fecSSam Ravnborg bl,pn %xcc, 95f 435478b8fecSSam Ravnborg add %o0, 8, %o0 436478b8fecSSam Ravnborg EX_LD(LOAD(ldd, %o1, %f0)) 437478b8fecSSam Ravnborg add %o1, 8, %o1 438478b8fecSSam Ravnborg subcc %g3, 8, %g3 439478b8fecSSam Ravnborg faligndata %f2, %f0, %f8 440478b8fecSSam Ravnborg EX_ST(STORE(std, %f8, %o0)) 441478b8fecSSam Ravnborg bge,pt %xcc, 93b 442478b8fecSSam Ravnborg add %o0, 8, %o0 443478b8fecSSam Ravnborg 444478b8fecSSam Ravnborg95: brz,pt %o2, 2f 445478b8fecSSam Ravnborg mov %g1, %o1 446478b8fecSSam Ravnborg 447478b8fecSSam Ravnborg1: EX_LD(LOAD(ldub, %o1, %o3)) 448478b8fecSSam Ravnborg add %o1, 1, %o1 449478b8fecSSam Ravnborg subcc %o2, 1, %o2 450478b8fecSSam Ravnborg EX_ST(STORE(stb, %o3, %o0)) 451478b8fecSSam Ravnborg bne,pt %xcc, 1b 452478b8fecSSam Ravnborg add %o0, 1, %o0 453478b8fecSSam Ravnborg 454478b8fecSSam Ravnborg2: membar #StoreLoad | #StoreStore 455478b8fecSSam Ravnborg VISExit 456478b8fecSSam Ravnborg retl 457478b8fecSSam Ravnborg mov EX_RETVAL(%o4), %o0 458478b8fecSSam Ravnborg 459478b8fecSSam Ravnborg .align 64 460478b8fecSSam Ravnborg70: /* 16 < len <= (5 * 64) */ 461478b8fecSSam Ravnborg bne,pn %XCC, 75f 462478b8fecSSam Ravnborg sub %o0, %o1, %o3 463478b8fecSSam Ravnborg 464478b8fecSSam Ravnborg72: andn %o2, 0xf, %GLOBAL_SPARE 465478b8fecSSam Ravnborg and %o2, 0xf, %o2 466478b8fecSSam Ravnborg1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) 467478b8fecSSam Ravnborg EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) 468478b8fecSSam Ravnborg subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE 469478b8fecSSam Ravnborg EX_ST(STORE(stx, %o5, %o1 + %o3)) 470478b8fecSSam Ravnborg add %o1, 0x8, %o1 471478b8fecSSam Ravnborg EX_ST(STORE(stx, %g1, %o1 + %o3)) 472478b8fecSSam Ravnborg bgu,pt %XCC, 1b 473478b8fecSSam Ravnborg add %o1, 0x8, %o1 474478b8fecSSam Ravnborg73: andcc %o2, 0x8, %g0 475478b8fecSSam Ravnborg be,pt %XCC, 1f 476478b8fecSSam Ravnborg nop 477478b8fecSSam Ravnborg EX_LD(LOAD(ldx, %o1, %o5)) 478478b8fecSSam Ravnborg sub %o2, 0x8, %o2 479478b8fecSSam Ravnborg EX_ST(STORE(stx, %o5, %o1 + %o3)) 480478b8fecSSam Ravnborg add %o1, 0x8, %o1 481478b8fecSSam Ravnborg1: andcc %o2, 0x4, %g0 482478b8fecSSam Ravnborg be,pt %XCC, 1f 483478b8fecSSam Ravnborg nop 484478b8fecSSam Ravnborg EX_LD(LOAD(lduw, %o1, %o5)) 485478b8fecSSam Ravnborg sub %o2, 0x4, %o2 486478b8fecSSam Ravnborg EX_ST(STORE(stw, %o5, %o1 + %o3)) 487478b8fecSSam Ravnborg add %o1, 0x4, %o1 488478b8fecSSam Ravnborg1: cmp %o2, 0 489478b8fecSSam Ravnborg be,pt %XCC, 85f 490478b8fecSSam Ravnborg nop 491478b8fecSSam Ravnborg ba,pt %xcc, 90f 492478b8fecSSam Ravnborg nop 493478b8fecSSam Ravnborg 494478b8fecSSam Ravnborg75: andcc %o0, 0x7, %g1 495478b8fecSSam Ravnborg sub %g1, 0x8, %g1 496478b8fecSSam Ravnborg be,pn %icc, 2f 497478b8fecSSam Ravnborg sub %g0, %g1, %g1 498478b8fecSSam Ravnborg sub %o2, %g1, %o2 499478b8fecSSam Ravnborg 500478b8fecSSam Ravnborg1: EX_LD(LOAD(ldub, %o1, %o5)) 501478b8fecSSam Ravnborg subcc %g1, 1, %g1 502478b8fecSSam Ravnborg EX_ST(STORE(stb, %o5, %o1 + %o3)) 503478b8fecSSam Ravnborg bgu,pt %icc, 1b 504478b8fecSSam Ravnborg add %o1, 1, %o1 505478b8fecSSam Ravnborg 506478b8fecSSam Ravnborg2: add %o1, %o3, %o0 507478b8fecSSam Ravnborg andcc %o1, 0x7, %g1 508478b8fecSSam Ravnborg bne,pt %icc, 8f 509478b8fecSSam Ravnborg sll %g1, 3, %g1 510478b8fecSSam Ravnborg 511478b8fecSSam Ravnborg cmp %o2, 16 512478b8fecSSam Ravnborg bgeu,pt %icc, 72b 513478b8fecSSam Ravnborg nop 514478b8fecSSam Ravnborg ba,a,pt %xcc, 73b 515478b8fecSSam Ravnborg 516478b8fecSSam Ravnborg8: mov 64, %o3 517478b8fecSSam Ravnborg andn %o1, 0x7, %o1 518478b8fecSSam Ravnborg EX_LD(LOAD(ldx, %o1, %g2)) 519478b8fecSSam Ravnborg sub %o3, %g1, %o3 520478b8fecSSam Ravnborg andn %o2, 0x7, %GLOBAL_SPARE 521478b8fecSSam Ravnborg sllx %g2, %g1, %g2 522478b8fecSSam Ravnborg1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 523478b8fecSSam Ravnborg subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE 524478b8fecSSam Ravnborg add %o1, 0x8, %o1 525478b8fecSSam Ravnborg srlx %g3, %o3, %o5 526478b8fecSSam Ravnborg or %o5, %g2, %o5 527478b8fecSSam Ravnborg EX_ST(STORE(stx, %o5, %o0)) 528478b8fecSSam Ravnborg add %o0, 0x8, %o0 529478b8fecSSam Ravnborg bgu,pt %icc, 1b 530478b8fecSSam Ravnborg sllx %g3, %g1, %g2 531478b8fecSSam Ravnborg 532478b8fecSSam Ravnborg srl %g1, 3, %g1 533478b8fecSSam Ravnborg andcc %o2, 0x7, %o2 534478b8fecSSam Ravnborg be,pn %icc, 85f 535478b8fecSSam Ravnborg add %o1, %g1, %o1 536478b8fecSSam Ravnborg ba,pt %xcc, 90f 537478b8fecSSam Ravnborg sub %o0, %o1, %o3 538478b8fecSSam Ravnborg 539478b8fecSSam Ravnborg .align 64 540478b8fecSSam Ravnborg80: /* 0 < len <= 16 */ 541478b8fecSSam Ravnborg andcc %o3, 0x3, %g0 542478b8fecSSam Ravnborg bne,pn %XCC, 90f 543478b8fecSSam Ravnborg sub %o0, %o1, %o3 544478b8fecSSam Ravnborg 545478b8fecSSam Ravnborg1: EX_LD(LOAD(lduw, %o1, %g1)) 546478b8fecSSam Ravnborg subcc %o2, 4, %o2 547478b8fecSSam Ravnborg EX_ST(STORE(stw, %g1, %o1 + %o3)) 548478b8fecSSam Ravnborg bgu,pt %XCC, 1b 549478b8fecSSam Ravnborg add %o1, 4, %o1 550478b8fecSSam Ravnborg 551478b8fecSSam Ravnborg85: retl 552478b8fecSSam Ravnborg mov EX_RETVAL(%o4), %o0 553478b8fecSSam Ravnborg 554478b8fecSSam Ravnborg .align 32 555478b8fecSSam Ravnborg90: EX_LD(LOAD(ldub, %o1, %g1)) 556478b8fecSSam Ravnborg subcc %o2, 1, %o2 557478b8fecSSam Ravnborg EX_ST(STORE(stb, %g1, %o1 + %o3)) 558478b8fecSSam Ravnborg bgu,pt %XCC, 90b 559478b8fecSSam Ravnborg add %o1, 1, %o1 560478b8fecSSam Ravnborg retl 561478b8fecSSam Ravnborg mov EX_RETVAL(%o4), %o0 562478b8fecSSam Ravnborg 563478b8fecSSam Ravnborg .size FUNC_NAME, .-FUNC_NAME 564