11f7e3dc0SClaudiu Zissulescu/* 21f7e3dc0SClaudiu Zissulescu * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 31f7e3dc0SClaudiu Zissulescu * 41f7e3dc0SClaudiu Zissulescu * This program is free software; you can redistribute it and/or modify 51f7e3dc0SClaudiu Zissulescu * it under the terms of the GNU General Public License version 2 as 61f7e3dc0SClaudiu Zissulescu * published by the Free Software Foundation. 71f7e3dc0SClaudiu Zissulescu */ 81f7e3dc0SClaudiu Zissulescu 91f7e3dc0SClaudiu Zissulescu#include <linux/linkage.h> 101f7e3dc0SClaudiu Zissulescu 111f7e3dc0SClaudiu Zissulescu#ifdef __LITTLE_ENDIAN__ 121f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << 131f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> 141f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM 151f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM) 161f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF 171f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM 181f7e3dc0SClaudiu Zissulescu#else 191f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> 201f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << 211f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << 221f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << 231f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM 241f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 251f7e3dc0SClaudiu Zissulescu#endif 261f7e3dc0SClaudiu Zissulescu 271f7e3dc0SClaudiu Zissulescu#ifdef CONFIG_ARC_HAS_LL64 281f7e3dc0SClaudiu Zissulescu# define PREFETCH_READ(RX) prefetch [RX, 56] 291f7e3dc0SClaudiu Zissulescu# define PREFETCH_WRITE(RX) prefetchw [RX, 64] 301f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX) ldd.ab DST, [RX, 8] 311f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX) std.ab SRC, [RX, 8] 321f7e3dc0SClaudiu Zissulescu# define ZOLSHFT 5 331f7e3dc0SClaudiu Zissulescu# define ZOLAND 0x1F 341f7e3dc0SClaudiu Zissulescu#else 351f7e3dc0SClaudiu Zissulescu# define PREFETCH_READ(RX) prefetch [RX, 28] 361f7e3dc0SClaudiu Zissulescu# define PREFETCH_WRITE(RX) prefetchw [RX, 32] 371f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX) ld.ab DST, [RX, 4] 381f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX) st.ab SRC, [RX, 4] 391f7e3dc0SClaudiu Zissulescu# define ZOLSHFT 4 401f7e3dc0SClaudiu Zissulescu# define ZOLAND 0xF 411f7e3dc0SClaudiu Zissulescu#endif 421f7e3dc0SClaudiu Zissulescu 431f7e3dc0SClaudiu ZissulescuENTRY(memcpy) 441f7e3dc0SClaudiu Zissulescu prefetch [r1] ; Prefetch the read location 451f7e3dc0SClaudiu Zissulescu prefetchw [r0] ; Prefetch the write location 461f7e3dc0SClaudiu Zissulescu mov.f 0, r2 471f7e3dc0SClaudiu Zissulescu;;; if size is zero 481f7e3dc0SClaudiu Zissulescu jz.d [blink] 491f7e3dc0SClaudiu Zissulescu mov r3, r0 ; don;t clobber ret val 501f7e3dc0SClaudiu Zissulescu 511f7e3dc0SClaudiu Zissulescu;;; if size <= 8 521f7e3dc0SClaudiu Zissulescu cmp r2, 8 53*ac506b7fSVineet Gupta bls.d @.Lsmallchunk 541f7e3dc0SClaudiu Zissulescu mov.f lp_count, r2 551f7e3dc0SClaudiu Zissulescu 561f7e3dc0SClaudiu Zissulescu and.f r4, r0, 0x03 571f7e3dc0SClaudiu Zissulescu rsub lp_count, r4, 4 58*ac506b7fSVineet Gupta lpnz @.Laligndestination 591f7e3dc0SClaudiu Zissulescu ;; LOOP BEGIN 601f7e3dc0SClaudiu Zissulescu ldb.ab r5, [r1,1] 611f7e3dc0SClaudiu Zissulescu sub r2, r2, 1 621f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3,1] 63*ac506b7fSVineet Gupta.Laligndestination: 641f7e3dc0SClaudiu Zissulescu 651f7e3dc0SClaudiu Zissulescu;;; Check the alignment of the source 661f7e3dc0SClaudiu Zissulescu and.f r4, r1, 0x03 67*ac506b7fSVineet Gupta bnz.d @.Lsourceunaligned 681f7e3dc0SClaudiu Zissulescu 691f7e3dc0SClaudiu Zissulescu;;; CASE 0: Both source and destination are 32bit aligned 701f7e3dc0SClaudiu Zissulescu;;; Convert len to Dwords, unfold x4 711f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, ZOLSHFT 72*ac506b7fSVineet Gupta lpnz @.Lcopy32_64bytes 731f7e3dc0SClaudiu Zissulescu ;; LOOP START 741f7e3dc0SClaudiu Zissulescu LOADX (r6, r1) 751f7e3dc0SClaudiu Zissulescu PREFETCH_READ (r1) 761f7e3dc0SClaudiu Zissulescu PREFETCH_WRITE (r3) 771f7e3dc0SClaudiu Zissulescu LOADX (r8, r1) 781f7e3dc0SClaudiu Zissulescu LOADX (r10, r1) 791f7e3dc0SClaudiu Zissulescu LOADX (r4, r1) 801f7e3dc0SClaudiu Zissulescu STOREX (r6, r3) 811f7e3dc0SClaudiu Zissulescu STOREX (r8, r3) 821f7e3dc0SClaudiu Zissulescu STOREX (r10, r3) 831f7e3dc0SClaudiu Zissulescu STOREX (r4, r3) 84*ac506b7fSVineet Gupta.Lcopy32_64bytes: 851f7e3dc0SClaudiu Zissulescu 861f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes 87*ac506b7fSVineet Gupta.Lsmallchunk: 88*ac506b7fSVineet Gupta lpnz @.Lcopyremainingbytes 891f7e3dc0SClaudiu Zissulescu ;; LOOP START 901f7e3dc0SClaudiu Zissulescu ldb.ab r5, [r1,1] 911f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3,1] 92*ac506b7fSVineet Gupta.Lcopyremainingbytes: 931f7e3dc0SClaudiu Zissulescu 941f7e3dc0SClaudiu Zissulescu j [blink] 951f7e3dc0SClaudiu Zissulescu;;; END CASE 0 961f7e3dc0SClaudiu Zissulescu 97*ac506b7fSVineet Gupta.Lsourceunaligned: 981f7e3dc0SClaudiu Zissulescu cmp r4, 2 99*ac506b7fSVineet Gupta beq.d @.LunalignedOffby2 1001f7e3dc0SClaudiu Zissulescu sub r2, r2, 1 1011f7e3dc0SClaudiu Zissulescu 102*ac506b7fSVineet Gupta bhi.d @.LunalignedOffby3 1031f7e3dc0SClaudiu Zissulescu ldb.ab r5, [r1, 1] 1041f7e3dc0SClaudiu Zissulescu 1051f7e3dc0SClaudiu Zissulescu;;; CASE 1: The source is unaligned, off by 1 1061f7e3dc0SClaudiu Zissulescu ;; Hence I need to read 1 byte for a 16bit alignment 1071f7e3dc0SClaudiu Zissulescu ;; and 2bytes to reach 32bit alignment 1081f7e3dc0SClaudiu Zissulescu ldh.ab r6, [r1, 2] 1091f7e3dc0SClaudiu Zissulescu sub r2, r2, 2 1101f7e3dc0SClaudiu Zissulescu ;; Convert to words, unfold x2 1111f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, 3 1121f7e3dc0SClaudiu Zissulescu MERGE_1 (r6, r6, 8) 1131f7e3dc0SClaudiu Zissulescu MERGE_2 (r5, r5, 24) 1141f7e3dc0SClaudiu Zissulescu or r5, r5, r6 1151f7e3dc0SClaudiu Zissulescu 1161f7e3dc0SClaudiu Zissulescu ;; Both src and dst are aligned 117*ac506b7fSVineet Gupta lpnz @.Lcopy8bytes_1 1181f7e3dc0SClaudiu Zissulescu ;; LOOP START 1191f7e3dc0SClaudiu Zissulescu ld.ab r6, [r1, 4] 1201f7e3dc0SClaudiu Zissulescu prefetch [r1, 28] ;Prefetch the next read location 1211f7e3dc0SClaudiu Zissulescu ld.ab r8, [r1,4] 1221f7e3dc0SClaudiu Zissulescu prefetchw [r3, 32] ;Prefetch the next write location 1231f7e3dc0SClaudiu Zissulescu 1241f7e3dc0SClaudiu Zissulescu SHIFT_1 (r7, r6, 24) 1251f7e3dc0SClaudiu Zissulescu or r7, r7, r5 1261f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r6, 8) 1271f7e3dc0SClaudiu Zissulescu 1281f7e3dc0SClaudiu Zissulescu SHIFT_1 (r9, r8, 24) 1291f7e3dc0SClaudiu Zissulescu or r9, r9, r5 1301f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r8, 8) 1311f7e3dc0SClaudiu Zissulescu 1321f7e3dc0SClaudiu Zissulescu st.ab r7, [r3, 4] 1331f7e3dc0SClaudiu Zissulescu st.ab r9, [r3, 4] 134*ac506b7fSVineet Gupta.Lcopy8bytes_1: 1351f7e3dc0SClaudiu Zissulescu 1361f7e3dc0SClaudiu Zissulescu ;; Write back the remaining 16bits 1371f7e3dc0SClaudiu Zissulescu EXTRACT_1 (r6, r5, 16) 1381f7e3dc0SClaudiu Zissulescu sth.ab r6, [r3, 2] 1391f7e3dc0SClaudiu Zissulescu ;; Write back the remaining 8bits 1401f7e3dc0SClaudiu Zissulescu EXTRACT_2 (r5, r5, 16) 1411f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3, 1] 1421f7e3dc0SClaudiu Zissulescu 1431f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, 0x07 ;Last 8bytes 144*ac506b7fSVineet Gupta lpnz @.Lcopybytewise_1 1451f7e3dc0SClaudiu Zissulescu ;; LOOP START 1461f7e3dc0SClaudiu Zissulescu ldb.ab r6, [r1,1] 1471f7e3dc0SClaudiu Zissulescu stb.ab r6, [r3,1] 148*ac506b7fSVineet Gupta.Lcopybytewise_1: 1491f7e3dc0SClaudiu Zissulescu j [blink] 1501f7e3dc0SClaudiu Zissulescu 151*ac506b7fSVineet Gupta.LunalignedOffby2: 1521f7e3dc0SClaudiu Zissulescu;;; CASE 2: The source is unaligned, off by 2 1531f7e3dc0SClaudiu Zissulescu ldh.ab r5, [r1, 2] 1541f7e3dc0SClaudiu Zissulescu sub r2, r2, 1 1551f7e3dc0SClaudiu Zissulescu 1561f7e3dc0SClaudiu Zissulescu ;; Both src and dst are aligned 1571f7e3dc0SClaudiu Zissulescu ;; Convert to words, unfold x2 1581f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, 3 1591f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 1601f7e3dc0SClaudiu Zissulescu asl.nz r5, r5, 16 1611f7e3dc0SClaudiu Zissulescu#endif 162*ac506b7fSVineet Gupta lpnz @.Lcopy8bytes_2 1631f7e3dc0SClaudiu Zissulescu ;; LOOP START 1641f7e3dc0SClaudiu Zissulescu ld.ab r6, [r1, 4] 1651f7e3dc0SClaudiu Zissulescu prefetch [r1, 28] ;Prefetch the next read location 1661f7e3dc0SClaudiu Zissulescu ld.ab r8, [r1,4] 1671f7e3dc0SClaudiu Zissulescu prefetchw [r3, 32] ;Prefetch the next write location 1681f7e3dc0SClaudiu Zissulescu 1691f7e3dc0SClaudiu Zissulescu SHIFT_1 (r7, r6, 16) 1701f7e3dc0SClaudiu Zissulescu or r7, r7, r5 1711f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r6, 16) 1721f7e3dc0SClaudiu Zissulescu 1731f7e3dc0SClaudiu Zissulescu SHIFT_1 (r9, r8, 16) 1741f7e3dc0SClaudiu Zissulescu or r9, r9, r5 1751f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r8, 16) 1761f7e3dc0SClaudiu Zissulescu 1771f7e3dc0SClaudiu Zissulescu st.ab r7, [r3, 4] 1781f7e3dc0SClaudiu Zissulescu st.ab r9, [r3, 4] 179*ac506b7fSVineet Gupta.Lcopy8bytes_2: 1801f7e3dc0SClaudiu Zissulescu 1811f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 1821f7e3dc0SClaudiu Zissulescu lsr.nz r5, r5, 16 1831f7e3dc0SClaudiu Zissulescu#endif 1841f7e3dc0SClaudiu Zissulescu sth.ab r5, [r3, 2] 1851f7e3dc0SClaudiu Zissulescu 1861f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, 0x07 ;Last 8bytes 187*ac506b7fSVineet Gupta lpnz @.Lcopybytewise_2 1881f7e3dc0SClaudiu Zissulescu ;; LOOP START 1891f7e3dc0SClaudiu Zissulescu ldb.ab r6, [r1,1] 1901f7e3dc0SClaudiu Zissulescu stb.ab r6, [r3,1] 191*ac506b7fSVineet Gupta.Lcopybytewise_2: 1921f7e3dc0SClaudiu Zissulescu j [blink] 1931f7e3dc0SClaudiu Zissulescu 194*ac506b7fSVineet Gupta.LunalignedOffby3: 1951f7e3dc0SClaudiu Zissulescu;;; CASE 3: The source is unaligned, off by 3 1961f7e3dc0SClaudiu Zissulescu;;; Hence, I need to read 1byte for achieve the 32bit alignment 1971f7e3dc0SClaudiu Zissulescu 1981f7e3dc0SClaudiu Zissulescu ;; Both src and dst are aligned 1991f7e3dc0SClaudiu Zissulescu ;; Convert to words, unfold x2 2001f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, 3 2011f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 2021f7e3dc0SClaudiu Zissulescu asl.ne r5, r5, 24 2031f7e3dc0SClaudiu Zissulescu#endif 204*ac506b7fSVineet Gupta lpnz @.Lcopy8bytes_3 2051f7e3dc0SClaudiu Zissulescu ;; LOOP START 2061f7e3dc0SClaudiu Zissulescu ld.ab r6, [r1, 4] 2071f7e3dc0SClaudiu Zissulescu prefetch [r1, 28] ;Prefetch the next read location 2081f7e3dc0SClaudiu Zissulescu ld.ab r8, [r1,4] 20921481f2cSVineet Gupta prefetchw [r3, 32] ;Prefetch the next write location 2101f7e3dc0SClaudiu Zissulescu 2111f7e3dc0SClaudiu Zissulescu SHIFT_1 (r7, r6, 8) 2121f7e3dc0SClaudiu Zissulescu or r7, r7, r5 2131f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r6, 24) 2141f7e3dc0SClaudiu Zissulescu 2151f7e3dc0SClaudiu Zissulescu SHIFT_1 (r9, r8, 8) 2161f7e3dc0SClaudiu Zissulescu or r9, r9, r5 2171f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r8, 24) 2181f7e3dc0SClaudiu Zissulescu 2191f7e3dc0SClaudiu Zissulescu st.ab r7, [r3, 4] 2201f7e3dc0SClaudiu Zissulescu st.ab r9, [r3, 4] 221*ac506b7fSVineet Gupta.Lcopy8bytes_3: 2221f7e3dc0SClaudiu Zissulescu 2231f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 2241f7e3dc0SClaudiu Zissulescu lsr.nz r5, r5, 24 2251f7e3dc0SClaudiu Zissulescu#endif 2261f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3, 1] 2271f7e3dc0SClaudiu Zissulescu 2281f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, 0x07 ;Last 8bytes 229*ac506b7fSVineet Gupta lpnz @.Lcopybytewise_3 2301f7e3dc0SClaudiu Zissulescu ;; LOOP START 2311f7e3dc0SClaudiu Zissulescu ldb.ab r6, [r1,1] 2321f7e3dc0SClaudiu Zissulescu stb.ab r6, [r3,1] 233*ac506b7fSVineet Gupta.Lcopybytewise_3: 2341f7e3dc0SClaudiu Zissulescu j [blink] 2351f7e3dc0SClaudiu Zissulescu 2361f7e3dc0SClaudiu ZissulescuEND(memcpy) 237