1*b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 2ae2c6ca6SDavid S. Miller/* NG4memcpy.S: Niagara-4 optimized memcpy. 3ae2c6ca6SDavid S. Miller * 4ae2c6ca6SDavid S. Miller * Copyright (C) 2012 David S. Miller (davem@davemloft.net) 5ae2c6ca6SDavid S. Miller */ 6ae2c6ca6SDavid S. Miller 7ae2c6ca6SDavid S. Miller#ifdef __KERNEL__ 895707704SDavid S. Miller#include <linux/linkage.h> 9ae2c6ca6SDavid S. Miller#include <asm/visasm.h> 10ae2c6ca6SDavid S. Miller#include <asm/asi.h> 11ae2c6ca6SDavid S. Miller#define GLOBAL_SPARE %g7 12ae2c6ca6SDavid S. Miller#else 13ae2c6ca6SDavid S. Miller#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 14ae2c6ca6SDavid S. Miller#define FPRS_FEF 0x04 15ae2c6ca6SDavid S. Miller 16ae2c6ca6SDavid S. Miller/* On T4 it is very expensive to access ASRs like %fprs and 17ae2c6ca6SDavid S. Miller * %asi, avoiding a read or a write can save ~50 cycles. 18ae2c6ca6SDavid S. Miller */ 19ae2c6ca6SDavid S. Miller#define FPU_ENTER \ 20ae2c6ca6SDavid S. Miller rd %fprs, %o5; \ 21ae2c6ca6SDavid S. Miller andcc %o5, FPRS_FEF, %g0; \ 22ae2c6ca6SDavid S. Miller be,a,pn %icc, 999f; \ 23ae2c6ca6SDavid S. Miller wr %g0, FPRS_FEF, %fprs; \ 24ae2c6ca6SDavid S. Miller 999: 25ae2c6ca6SDavid S. Miller 26ae2c6ca6SDavid S. Miller#ifdef MEMCPY_DEBUG 27ae2c6ca6SDavid S. Miller#define VISEntryHalf FPU_ENTER; \ 28ae2c6ca6SDavid S. Miller clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; 29ae2c6ca6SDavid S. Miller#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 30ae2c6ca6SDavid S. Miller#else 31ae2c6ca6SDavid S. Miller#define VISEntryHalf FPU_ENTER 32ae2c6ca6SDavid S. Miller#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 33ae2c6ca6SDavid S. Miller#endif 34ae2c6ca6SDavid S. Miller 35ae2c6ca6SDavid S. Miller#define GLOBAL_SPARE %g5 36ae2c6ca6SDavid S. Miller#endif 37ae2c6ca6SDavid S. Miller 38ae2c6ca6SDavid S. Miller#ifndef STORE_ASI 39ae2c6ca6SDavid S. Miller#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 40ae2c6ca6SDavid S. Miller#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P 41ae2c6ca6SDavid S. Miller#else 42ae2c6ca6SDavid S. Miller#define STORE_ASI 0x80 /* ASI_P */ 43ae2c6ca6SDavid S. Miller#endif 44ae2c6ca6SDavid S. Miller#endif 45ae2c6ca6SDavid S. Miller 46f4da3628SDavid S. Miller#if !defined(EX_LD) && !defined(EX_ST) 47f4da3628SDavid S. Miller#define NON_USER_COPY 48f4da3628SDavid S. Miller#endif 49f4da3628SDavid S. Miller 50ae2c6ca6SDavid S. Miller#ifndef EX_LD 5195707704SDavid S. Miller#define EX_LD(x,y) x 52ae2c6ca6SDavid S. Miller#endif 53a7c5724bSRob Gardner#ifndef EX_LD_FP 5495707704SDavid S. Miller#define EX_LD_FP(x,y) x 55a7c5724bSRob Gardner#endif 56ae2c6ca6SDavid S. Miller 57ae2c6ca6SDavid S. Miller#ifndef EX_ST 5895707704SDavid S. Miller#define EX_ST(x,y) x 59ae2c6ca6SDavid S. Miller#endif 60a7c5724bSRob Gardner#ifndef EX_ST_FP 6195707704SDavid S. Miller#define EX_ST_FP(x,y) x 62a7c5724bSRob Gardner#endif 63ae2c6ca6SDavid S. Miller 64ae2c6ca6SDavid S. Miller 65ae2c6ca6SDavid S. Miller#ifndef LOAD 66ae2c6ca6SDavid S. Miller#define LOAD(type,addr,dest) type [addr], dest 67ae2c6ca6SDavid S. Miller#endif 68ae2c6ca6SDavid S. Miller 69ae2c6ca6SDavid S. Miller#ifndef STORE 70ae2c6ca6SDavid S. Miller#ifndef MEMCPY_DEBUG 71ae2c6ca6SDavid S. Miller#define STORE(type,src,addr) type src, [addr] 72ae2c6ca6SDavid S. Miller#else 73ae2c6ca6SDavid S. Miller#define STORE(type,src,addr) type##a src, [addr] %asi 74ae2c6ca6SDavid S. Miller#endif 75ae2c6ca6SDavid S. Miller#endif 76ae2c6ca6SDavid S. Miller 77ae2c6ca6SDavid S. Miller#ifndef STORE_INIT 78ae2c6ca6SDavid S. Miller#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI 79ae2c6ca6SDavid S. Miller#endif 80ae2c6ca6SDavid S. Miller 81ae2c6ca6SDavid S. Miller#ifndef FUNC_NAME 82ae2c6ca6SDavid S. Miller#define FUNC_NAME NG4memcpy 83ae2c6ca6SDavid S. Miller#endif 84ae2c6ca6SDavid S. Miller#ifndef PREAMBLE 85ae2c6ca6SDavid S. Miller#define PREAMBLE 86ae2c6ca6SDavid S. Miller#endif 87ae2c6ca6SDavid S. Miller 88ae2c6ca6SDavid S. Miller#ifndef XCC 89ae2c6ca6SDavid S. Miller#define XCC xcc 90ae2c6ca6SDavid S. Miller#endif 91ae2c6ca6SDavid S. Miller 92ae2c6ca6SDavid S. Miller .register %g2,#scratch 93ae2c6ca6SDavid S. Miller .register %g3,#scratch 94ae2c6ca6SDavid S. Miller 95ae2c6ca6SDavid S. Miller .text 9695707704SDavid S. Miller#ifndef EX_RETVAL 9795707704SDavid S. Miller#define EX_RETVAL(x) x 9895707704SDavid S. Miller#endif 99ae2c6ca6SDavid S. Miller .align 64 100ae2c6ca6SDavid S. Miller 101ae2c6ca6SDavid S. Miller .globl FUNC_NAME 102ae2c6ca6SDavid S. Miller .type FUNC_NAME,#function 103ae2c6ca6SDavid S. MillerFUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 104ae2c6ca6SDavid S. Miller#ifdef MEMCPY_DEBUG 105ae2c6ca6SDavid S. Miller wr %g0, 0x80, %asi 106ae2c6ca6SDavid S. Miller#endif 107ae2c6ca6SDavid S. Miller srlx %o2, 31, %g2 108ae2c6ca6SDavid S. Miller cmp %g2, 0 109ae2c6ca6SDavid S. Miller tne %XCC, 5 110ae2c6ca6SDavid S. Miller PREAMBLE 111ae2c6ca6SDavid S. Miller mov %o0, %o3 112ae2c6ca6SDavid S. Miller brz,pn %o2, .Lexit 113ae2c6ca6SDavid S. Miller cmp %o2, 3 114ae2c6ca6SDavid S. Miller ble,pn %icc, .Ltiny 115ae2c6ca6SDavid S. Miller cmp %o2, 19 116ae2c6ca6SDavid S. Miller ble,pn %icc, .Lsmall 117ae2c6ca6SDavid S. Miller or %o0, %o1, %g2 118ae2c6ca6SDavid S. Miller cmp %o2, 128 119ae2c6ca6SDavid S. Miller bl,pn %icc, .Lmedium 120ae2c6ca6SDavid S. Miller nop 121ae2c6ca6SDavid S. Miller 122ae2c6ca6SDavid S. Miller.Llarge:/* len >= 0x80 */ 123ae2c6ca6SDavid S. Miller /* First get dest 8 byte aligned. */ 124ae2c6ca6SDavid S. Miller sub %g0, %o0, %g1 125ae2c6ca6SDavid S. Miller and %g1, 0x7, %g1 126ae2c6ca6SDavid S. Miller brz,pt %g1, 51f 127ae2c6ca6SDavid S. Miller sub %o2, %g1, %o2 128ae2c6ca6SDavid S. Miller 12995707704SDavid S. Miller 1301ab32693SBabu Moger1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 131ae2c6ca6SDavid S. Miller add %o1, 1, %o1 132ae2c6ca6SDavid S. Miller subcc %g1, 1, %g1 133ae2c6ca6SDavid S. Miller add %o0, 1, %o0 134ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 1351ab32693SBabu Moger EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 136ae2c6ca6SDavid S. Miller 137ae2c6ca6SDavid S. Miller51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) 138ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x080, #n_reads_strong) 139ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) 140ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x100, #n_reads_strong) 141ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x140, #n_reads_strong) 142ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x180, #n_reads_strong) 143ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) 144ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 145ae2c6ca6SDavid S. Miller 146ae2c6ca6SDavid S. Miller /* Check if we can use the straight fully aligned 147ae2c6ca6SDavid S. Miller * loop, or we require the alignaddr/faligndata variant. 148ae2c6ca6SDavid S. Miller */ 149ae2c6ca6SDavid S. Miller andcc %o1, 0x7, %o5 150ae2c6ca6SDavid S. Miller bne,pn %icc, .Llarge_src_unaligned 151ae2c6ca6SDavid S. Miller sub %g0, %o0, %g1 152ae2c6ca6SDavid S. Miller 153ae2c6ca6SDavid S. Miller /* Legitimize the use of initializing stores by getting dest 154ae2c6ca6SDavid S. Miller * to be 64-byte aligned. 155ae2c6ca6SDavid S. Miller */ 156ae2c6ca6SDavid S. Miller and %g1, 0x3f, %g1 157ae2c6ca6SDavid S. Miller brz,pt %g1, .Llarge_aligned 158ae2c6ca6SDavid S. Miller sub %o2, %g1, %o2 159ae2c6ca6SDavid S. Miller 1601ab32693SBabu Moger1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 161ae2c6ca6SDavid S. Miller add %o1, 8, %o1 162ae2c6ca6SDavid S. Miller subcc %g1, 8, %g1 163ae2c6ca6SDavid S. Miller add %o0, 8, %o0 164ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 1651ab32693SBabu Moger EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8) 166ae2c6ca6SDavid S. Miller 167ae2c6ca6SDavid S. Miller.Llarge_aligned: 168ae2c6ca6SDavid S. Miller /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ 169ae2c6ca6SDavid S. Miller andn %o2, 0x3f, %o4 170ae2c6ca6SDavid S. Miller sub %o2, %o4, %o2 171ae2c6ca6SDavid S. Miller 1721ab32693SBabu Moger1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4) 173ae2c6ca6SDavid S. Miller add %o1, 0x40, %o1 1741ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4) 175ae2c6ca6SDavid S. Miller subcc %o4, 0x40, %o4 1761ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64) 1771ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64) 1781ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64) 1791ab32693SBabu Moger EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64) 180ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 1811ab32693SBabu Moger EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56) 182ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 1831ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48) 1841ab32693SBabu Moger EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48) 185ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 1861ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40) 1871ab32693SBabu Moger EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40) 188ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 1891ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32) 1901ab32693SBabu Moger EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32) 191ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 1921ab32693SBabu Moger EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24) 193ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 1941ab32693SBabu Moger EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16) 195ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 1961ab32693SBabu Moger EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8) 197ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 198ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 199ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 200ae2c6ca6SDavid S. Miller 201ae2c6ca6SDavid S. Miller membar #StoreLoad | #StoreStore 202ae2c6ca6SDavid S. Miller 203ae2c6ca6SDavid S. Miller brz,pn %o2, .Lexit 204ae2c6ca6SDavid S. Miller cmp %o2, 19 205ae2c6ca6SDavid S. Miller ble,pn %icc, .Lsmall_unaligned 206ae2c6ca6SDavid S. Miller nop 207ae2c6ca6SDavid S. Miller ba,a,pt %icc, .Lmedium_noprefetch 208ae2c6ca6SDavid S. Miller 209ae2c6ca6SDavid S. Miller.Lexit: retl 210ae2c6ca6SDavid S. Miller mov EX_RETVAL(%o3), %o0 211ae2c6ca6SDavid S. Miller 212ae2c6ca6SDavid S. Miller.Llarge_src_unaligned: 213f4da3628SDavid S. Miller#ifdef NON_USER_COPY 214f4da3628SDavid S. Miller VISEntryHalfFast(.Lmedium_vis_entry_fail) 215f4da3628SDavid S. Miller#else 216f4da3628SDavid S. Miller VISEntryHalf 217f4da3628SDavid S. Miller#endif 218ae2c6ca6SDavid S. Miller andn %o2, 0x3f, %o4 219ae2c6ca6SDavid S. Miller sub %o2, %o4, %o2 220ae2c6ca6SDavid S. Miller alignaddr %o1, %g0, %g1 221ae2c6ca6SDavid S. Miller add %o1, %o4, %o1 2221ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4) 2231ab32693SBabu Moger1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4) 224ae2c6ca6SDavid S. Miller subcc %o4, 0x40, %o4 2251ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64) 2261ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64) 2271ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64) 2281ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64) 2291ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64) 2301ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64) 231ae2c6ca6SDavid S. Miller faligndata %f0, %f2, %f16 2321ab32693SBabu Moger EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64) 233ae2c6ca6SDavid S. Miller faligndata %f2, %f4, %f18 234ae2c6ca6SDavid S. Miller add %g1, 0x40, %g1 235ae2c6ca6SDavid S. Miller faligndata %f4, %f6, %f20 236ae2c6ca6SDavid S. Miller faligndata %f6, %f8, %f22 237ae2c6ca6SDavid S. Miller faligndata %f8, %f10, %f24 238ae2c6ca6SDavid S. Miller faligndata %f10, %f12, %f26 239ae2c6ca6SDavid S. Miller faligndata %f12, %f14, %f28 240ae2c6ca6SDavid S. Miller faligndata %f14, %f0, %f30 2411ab32693SBabu Moger EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64) 2421ab32693SBabu Moger EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56) 2431ab32693SBabu Moger EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48) 2441ab32693SBabu Moger EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40) 2451ab32693SBabu Moger EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32) 2461ab32693SBabu Moger EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24) 2471ab32693SBabu Moger EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16) 2481ab32693SBabu Moger EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8) 249ae2c6ca6SDavid S. Miller add %o0, 0x40, %o0 250ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 251ae2c6ca6SDavid S. Miller LOAD(prefetch, %g1 + 0x200, #n_reads_strong) 25244922150SDavid S. Miller#ifdef NON_USER_COPY 25344922150SDavid S. Miller VISExitHalfFast 25444922150SDavid S. Miller#else 255ae2c6ca6SDavid S. Miller VISExitHalf 25644922150SDavid S. Miller#endif 257ae2c6ca6SDavid S. Miller brz,pn %o2, .Lexit 258ae2c6ca6SDavid S. Miller cmp %o2, 19 259ae2c6ca6SDavid S. Miller ble,pn %icc, .Lsmall_unaligned 260ae2c6ca6SDavid S. Miller nop 261ae2c6ca6SDavid S. Miller ba,a,pt %icc, .Lmedium_unaligned 262ae2c6ca6SDavid S. Miller 263f4da3628SDavid S. Miller#ifdef NON_USER_COPY 264f4da3628SDavid S. Miller.Lmedium_vis_entry_fail: 265f4da3628SDavid S. Miller or %o0, %o1, %g2 266f4da3628SDavid S. Miller#endif 267ae2c6ca6SDavid S. Miller.Lmedium: 268ae2c6ca6SDavid S. Miller LOAD(prefetch, %o1 + 0x40, #n_reads_strong) 269ae2c6ca6SDavid S. Miller andcc %g2, 0x7, %g0 270ae2c6ca6SDavid S. Miller bne,pn %icc, .Lmedium_unaligned 271ae2c6ca6SDavid S. Miller nop 272ae2c6ca6SDavid S. Miller.Lmedium_noprefetch: 273ae2c6ca6SDavid S. Miller andncc %o2, 0x20 - 1, %o5 274ae2c6ca6SDavid S. Miller be,pn %icc, 2f 275ae2c6ca6SDavid S. Miller sub %o2, %o5, %o2 2761ab32693SBabu Moger1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 2771ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5) 2781ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5) 2791ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5) 280ae2c6ca6SDavid S. Miller add %o1, 0x20, %o1 281ae2c6ca6SDavid S. Miller subcc %o5, 0x20, %o5 2821ab32693SBabu Moger EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) 2831ab32693SBabu Moger EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) 2841ab32693SBabu Moger EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) 2851ab32693SBabu Moger EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) 286ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 287ae2c6ca6SDavid S. Miller add %o0, 0x20, %o0 288ae2c6ca6SDavid S. Miller2: andcc %o2, 0x18, %o5 289ae2c6ca6SDavid S. Miller be,pt %icc, 3f 290ae2c6ca6SDavid S. Miller sub %o2, %o5, %o2 29195707704SDavid S. Miller 2921ab32693SBabu Moger1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 293ae2c6ca6SDavid S. Miller add %o1, 0x08, %o1 294ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 295ae2c6ca6SDavid S. Miller subcc %o5, 0x08, %o5 296ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 2971ab32693SBabu Moger EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8) 298ae2c6ca6SDavid S. Miller3: brz,pt %o2, .Lexit 299ae2c6ca6SDavid S. Miller cmp %o2, 0x04 300ae2c6ca6SDavid S. Miller bl,pn %icc, .Ltiny 301ae2c6ca6SDavid S. Miller nop 3021ab32693SBabu Moger EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2) 303ae2c6ca6SDavid S. Miller add %o1, 0x04, %o1 304ae2c6ca6SDavid S. Miller add %o0, 0x04, %o0 305ae2c6ca6SDavid S. Miller subcc %o2, 0x04, %o2 306ae2c6ca6SDavid S. Miller bne,pn %icc, .Ltiny 3071ab32693SBabu Moger EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4) 308ae2c6ca6SDavid S. Miller ba,a,pt %icc, .Lexit 309ae2c6ca6SDavid S. Miller.Lmedium_unaligned: 310ae2c6ca6SDavid S. Miller /* First get dest 8 byte aligned. */ 311ae2c6ca6SDavid S. Miller sub %g0, %o0, %g1 312ae2c6ca6SDavid S. Miller and %g1, 0x7, %g1 313ae2c6ca6SDavid S. Miller brz,pt %g1, 2f 314ae2c6ca6SDavid S. Miller sub %o2, %g1, %o2 315ae2c6ca6SDavid S. Miller 3161ab32693SBabu Moger1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 317ae2c6ca6SDavid S. Miller add %o1, 1, %o1 318ae2c6ca6SDavid S. Miller subcc %g1, 1, %g1 319ae2c6ca6SDavid S. Miller add %o0, 1, %o0 320ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 3211ab32693SBabu Moger EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 322ae2c6ca6SDavid S. Miller2: 323ae2c6ca6SDavid S. Miller and %o1, 0x7, %g1 324ae2c6ca6SDavid S. Miller brz,pn %g1, .Lmedium_noprefetch 325ae2c6ca6SDavid S. Miller sll %g1, 3, %g1 326ae2c6ca6SDavid S. Miller mov 64, %g2 327ae2c6ca6SDavid S. Miller sub %g2, %g1, %g2 328ae2c6ca6SDavid S. Miller andn %o1, 0x7, %o1 3291ab32693SBabu Moger EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2) 330ae2c6ca6SDavid S. Miller sllx %o4, %g1, %o4 331ae2c6ca6SDavid S. Miller andn %o2, 0x08 - 1, %o5 332ae2c6ca6SDavid S. Miller sub %o2, %o5, %o2 3331ab32693SBabu Moger1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5) 334ae2c6ca6SDavid S. Miller add %o1, 0x08, %o1 335ae2c6ca6SDavid S. Miller subcc %o5, 0x08, %o5 336ae2c6ca6SDavid S. Miller srlx %g3, %g2, GLOBAL_SPARE 337ae2c6ca6SDavid S. Miller or GLOBAL_SPARE, %o4, GLOBAL_SPARE 3381ab32693SBabu Moger EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8) 339ae2c6ca6SDavid S. Miller add %o0, 0x08, %o0 340ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 341ae2c6ca6SDavid S. Miller sllx %g3, %g1, %o4 342ae2c6ca6SDavid S. Miller srl %g1, 3, %g1 343ae2c6ca6SDavid S. Miller add %o1, %g1, %o1 344ae2c6ca6SDavid S. Miller brz,pn %o2, .Lexit 345ae2c6ca6SDavid S. Miller nop 346ae2c6ca6SDavid S. Miller ba,pt %icc, .Lsmall_unaligned 347ae2c6ca6SDavid S. Miller 348ae2c6ca6SDavid S. Miller.Ltiny: 3491ab32693SBabu Moger EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) 350ae2c6ca6SDavid S. Miller subcc %o2, 1, %o2 351ae2c6ca6SDavid S. Miller be,pn %icc, .Lexit 3521ab32693SBabu Moger EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1) 3531ab32693SBabu Moger EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2) 354ae2c6ca6SDavid S. Miller subcc %o2, 1, %o2 355ae2c6ca6SDavid S. Miller be,pn %icc, .Lexit 3561ab32693SBabu Moger EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1) 3571ab32693SBabu Moger EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2) 358ae2c6ca6SDavid S. Miller ba,pt %icc, .Lexit 3591ab32693SBabu Moger EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2) 360ae2c6ca6SDavid S. Miller 361ae2c6ca6SDavid S. Miller.Lsmall: 362ae2c6ca6SDavid S. Miller andcc %g2, 0x3, %g0 363ae2c6ca6SDavid S. Miller bne,pn %icc, .Lsmall_unaligned 364ae2c6ca6SDavid S. Miller andn %o2, 0x4 - 1, %o5 365ae2c6ca6SDavid S. Miller sub %o2, %o5, %o2 366ae2c6ca6SDavid S. Miller1: 3671ab32693SBabu Moger EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 368ae2c6ca6SDavid S. Miller add %o1, 0x04, %o1 369ae2c6ca6SDavid S. Miller subcc %o5, 0x04, %o5 370ae2c6ca6SDavid S. Miller add %o0, 0x04, %o0 371ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 3721ab32693SBabu Moger EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4) 373ae2c6ca6SDavid S. Miller brz,pt %o2, .Lexit 374ae2c6ca6SDavid S. Miller nop 375ae2c6ca6SDavid S. Miller ba,a,pt %icc, .Ltiny 376ae2c6ca6SDavid S. Miller 377ae2c6ca6SDavid S. Miller.Lsmall_unaligned: 3781ab32693SBabu Moger1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) 379ae2c6ca6SDavid S. Miller add %o1, 1, %o1 380ae2c6ca6SDavid S. Miller add %o0, 1, %o0 381ae2c6ca6SDavid S. Miller subcc %o2, 1, %o2 382ae2c6ca6SDavid S. Miller bne,pt %icc, 1b 3831ab32693SBabu Moger EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1) 384ae2c6ca6SDavid S. Miller ba,a,pt %icc, .Lexit 3850ae2d26fSBabu Moger nop 386ae2c6ca6SDavid S. Miller .size FUNC_NAME, .-FUNC_NAME 387