1*d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 21f7e3dc0SClaudiu Zissulescu/* 31f7e3dc0SClaudiu Zissulescu * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 41f7e3dc0SClaudiu Zissulescu */ 51f7e3dc0SClaudiu Zissulescu 61f7e3dc0SClaudiu Zissulescu#include <linux/linkage.h> 71f7e3dc0SClaudiu Zissulescu 81f7e3dc0SClaudiu Zissulescu#ifdef __LITTLE_ENDIAN__ 91f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << 101f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> 111f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM 121f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM) 131f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF 141f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM 151f7e3dc0SClaudiu Zissulescu#else 161f7e3dc0SClaudiu Zissulescu# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> 171f7e3dc0SClaudiu Zissulescu# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << 181f7e3dc0SClaudiu Zissulescu# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << 191f7e3dc0SClaudiu Zissulescu# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << 201f7e3dc0SClaudiu Zissulescu# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM 211f7e3dc0SClaudiu Zissulescu# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 221f7e3dc0SClaudiu Zissulescu#endif 231f7e3dc0SClaudiu Zissulescu 241f7e3dc0SClaudiu Zissulescu#ifdef CONFIG_ARC_HAS_LL64 251f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX) ldd.ab DST, [RX, 8] 261f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX) std.ab SRC, [RX, 8] 271f7e3dc0SClaudiu Zissulescu# define ZOLSHFT 5 281f7e3dc0SClaudiu Zissulescu# define ZOLAND 0x1F 291f7e3dc0SClaudiu Zissulescu#else 301f7e3dc0SClaudiu Zissulescu# define LOADX(DST,RX) ld.ab DST, [RX, 4] 311f7e3dc0SClaudiu Zissulescu# define STOREX(SRC,RX) st.ab SRC, [RX, 4] 321f7e3dc0SClaudiu Zissulescu# define ZOLSHFT 4 331f7e3dc0SClaudiu Zissulescu# define ZOLAND 0xF 341f7e3dc0SClaudiu Zissulescu#endif 351f7e3dc0SClaudiu Zissulescu 3686effd0dSVineet GuptaENTRY_CFI(memcpy) 371f7e3dc0SClaudiu Zissulescu mov.f 0, r2 381f7e3dc0SClaudiu Zissulescu;;; if size is zero 391f7e3dc0SClaudiu Zissulescu jz.d [blink] 401f7e3dc0SClaudiu Zissulescu mov r3, r0 ; don;t clobber ret val 411f7e3dc0SClaudiu Zissulescu 421f7e3dc0SClaudiu Zissulescu;;; if size <= 8 431f7e3dc0SClaudiu Zissulescu cmp r2, 8 44ac506b7fSVineet Gupta bls.d @.Lsmallchunk 451f7e3dc0SClaudiu Zissulescu mov.f lp_count, r2 461f7e3dc0SClaudiu Zissulescu 471f7e3dc0SClaudiu Zissulescu and.f r4, r0, 0x03 481f7e3dc0SClaudiu Zissulescu rsub lp_count, r4, 4 49ac506b7fSVineet Gupta lpnz @.Laligndestination 501f7e3dc0SClaudiu Zissulescu ;; LOOP BEGIN 511f7e3dc0SClaudiu Zissulescu ldb.ab r5, [r1,1] 521f7e3dc0SClaudiu Zissulescu sub r2, r2, 1 531f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3,1] 54ac506b7fSVineet Gupta.Laligndestination: 551f7e3dc0SClaudiu Zissulescu 561f7e3dc0SClaudiu Zissulescu;;; Check the alignment of the source 571f7e3dc0SClaudiu Zissulescu and.f r4, r1, 0x03 58ac506b7fSVineet Gupta bnz.d @.Lsourceunaligned 591f7e3dc0SClaudiu Zissulescu 601f7e3dc0SClaudiu Zissulescu;;; CASE 0: Both source and destination are 32bit aligned 611f7e3dc0SClaudiu Zissulescu;;; Convert len to Dwords, unfold x4 621f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, ZOLSHFT 63ac506b7fSVineet Gupta lpnz @.Lcopy32_64bytes 641f7e3dc0SClaudiu Zissulescu ;; LOOP START 651f7e3dc0SClaudiu Zissulescu LOADX (r6, r1) 661f7e3dc0SClaudiu Zissulescu LOADX (r8, r1) 671f7e3dc0SClaudiu Zissulescu LOADX (r10, r1) 681f7e3dc0SClaudiu Zissulescu LOADX (r4, r1) 691f7e3dc0SClaudiu Zissulescu STOREX (r6, r3) 701f7e3dc0SClaudiu Zissulescu STOREX (r8, r3) 711f7e3dc0SClaudiu Zissulescu STOREX (r10, r3) 721f7e3dc0SClaudiu Zissulescu STOREX (r4, r3) 73ac506b7fSVineet Gupta.Lcopy32_64bytes: 741f7e3dc0SClaudiu Zissulescu 751f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes 76ac506b7fSVineet Gupta.Lsmallchunk: 77ac506b7fSVineet Gupta lpnz @.Lcopyremainingbytes 781f7e3dc0SClaudiu Zissulescu ;; LOOP START 791f7e3dc0SClaudiu Zissulescu ldb.ab r5, [r1,1] 801f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3,1] 81ac506b7fSVineet Gupta.Lcopyremainingbytes: 821f7e3dc0SClaudiu Zissulescu 831f7e3dc0SClaudiu Zissulescu j [blink] 841f7e3dc0SClaudiu Zissulescu;;; END CASE 0 851f7e3dc0SClaudiu Zissulescu 86ac506b7fSVineet Gupta.Lsourceunaligned: 871f7e3dc0SClaudiu Zissulescu cmp r4, 2 88ac506b7fSVineet Gupta beq.d @.LunalignedOffby2 891f7e3dc0SClaudiu Zissulescu sub r2, r2, 1 901f7e3dc0SClaudiu Zissulescu 91ac506b7fSVineet Gupta bhi.d @.LunalignedOffby3 921f7e3dc0SClaudiu Zissulescu ldb.ab r5, [r1, 1] 931f7e3dc0SClaudiu Zissulescu 941f7e3dc0SClaudiu Zissulescu;;; CASE 1: The source is unaligned, off by 1 951f7e3dc0SClaudiu Zissulescu ;; Hence I need to read 1 byte for a 16bit alignment 961f7e3dc0SClaudiu Zissulescu ;; and 2bytes to reach 32bit alignment 971f7e3dc0SClaudiu Zissulescu ldh.ab r6, [r1, 2] 981f7e3dc0SClaudiu Zissulescu sub r2, r2, 2 991f7e3dc0SClaudiu Zissulescu ;; Convert to words, unfold x2 1001f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, 3 1011f7e3dc0SClaudiu Zissulescu MERGE_1 (r6, r6, 8) 1021f7e3dc0SClaudiu Zissulescu MERGE_2 (r5, r5, 24) 1031f7e3dc0SClaudiu Zissulescu or r5, r5, r6 1041f7e3dc0SClaudiu Zissulescu 1051f7e3dc0SClaudiu Zissulescu ;; Both src and dst are aligned 106ac506b7fSVineet Gupta lpnz @.Lcopy8bytes_1 1071f7e3dc0SClaudiu Zissulescu ;; LOOP START 1081f7e3dc0SClaudiu Zissulescu ld.ab r6, [r1, 4] 1091f7e3dc0SClaudiu Zissulescu ld.ab r8, [r1,4] 1101f7e3dc0SClaudiu Zissulescu 1111f7e3dc0SClaudiu Zissulescu SHIFT_1 (r7, r6, 24) 1121f7e3dc0SClaudiu Zissulescu or r7, r7, r5 1131f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r6, 8) 1141f7e3dc0SClaudiu Zissulescu 1151f7e3dc0SClaudiu Zissulescu SHIFT_1 (r9, r8, 24) 1161f7e3dc0SClaudiu Zissulescu or r9, r9, r5 1171f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r8, 8) 1181f7e3dc0SClaudiu Zissulescu 1191f7e3dc0SClaudiu Zissulescu st.ab r7, [r3, 4] 1201f7e3dc0SClaudiu Zissulescu st.ab r9, [r3, 4] 121ac506b7fSVineet Gupta.Lcopy8bytes_1: 1221f7e3dc0SClaudiu Zissulescu 1231f7e3dc0SClaudiu Zissulescu ;; Write back the remaining 16bits 1241f7e3dc0SClaudiu Zissulescu EXTRACT_1 (r6, r5, 16) 1251f7e3dc0SClaudiu Zissulescu sth.ab r6, [r3, 2] 1261f7e3dc0SClaudiu Zissulescu ;; Write back the remaining 8bits 1271f7e3dc0SClaudiu Zissulescu EXTRACT_2 (r5, r5, 16) 1281f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3, 1] 1291f7e3dc0SClaudiu Zissulescu 1301f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, 0x07 ;Last 8bytes 131ac506b7fSVineet Gupta lpnz @.Lcopybytewise_1 1321f7e3dc0SClaudiu Zissulescu ;; LOOP START 1331f7e3dc0SClaudiu Zissulescu ldb.ab r6, [r1,1] 1341f7e3dc0SClaudiu Zissulescu stb.ab r6, [r3,1] 135ac506b7fSVineet Gupta.Lcopybytewise_1: 1361f7e3dc0SClaudiu Zissulescu j [blink] 1371f7e3dc0SClaudiu Zissulescu 138ac506b7fSVineet Gupta.LunalignedOffby2: 1391f7e3dc0SClaudiu Zissulescu;;; CASE 2: The source is unaligned, off by 2 1401f7e3dc0SClaudiu Zissulescu ldh.ab r5, [r1, 2] 1411f7e3dc0SClaudiu Zissulescu sub r2, r2, 1 1421f7e3dc0SClaudiu Zissulescu 1431f7e3dc0SClaudiu Zissulescu ;; Both src and dst are aligned 1441f7e3dc0SClaudiu Zissulescu ;; Convert to words, unfold x2 1451f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, 3 1461f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 1471f7e3dc0SClaudiu Zissulescu asl.nz r5, r5, 16 1481f7e3dc0SClaudiu Zissulescu#endif 149ac506b7fSVineet Gupta lpnz @.Lcopy8bytes_2 1501f7e3dc0SClaudiu Zissulescu ;; LOOP START 1511f7e3dc0SClaudiu Zissulescu ld.ab r6, [r1, 4] 1521f7e3dc0SClaudiu Zissulescu ld.ab r8, [r1,4] 1531f7e3dc0SClaudiu Zissulescu 1541f7e3dc0SClaudiu Zissulescu SHIFT_1 (r7, r6, 16) 1551f7e3dc0SClaudiu Zissulescu or r7, r7, r5 1561f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r6, 16) 1571f7e3dc0SClaudiu Zissulescu 1581f7e3dc0SClaudiu Zissulescu SHIFT_1 (r9, r8, 16) 1591f7e3dc0SClaudiu Zissulescu or r9, r9, r5 1601f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r8, 16) 1611f7e3dc0SClaudiu Zissulescu 1621f7e3dc0SClaudiu Zissulescu st.ab r7, [r3, 4] 1631f7e3dc0SClaudiu Zissulescu st.ab r9, [r3, 4] 164ac506b7fSVineet Gupta.Lcopy8bytes_2: 1651f7e3dc0SClaudiu Zissulescu 1661f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 1671f7e3dc0SClaudiu Zissulescu lsr.nz r5, r5, 16 1681f7e3dc0SClaudiu Zissulescu#endif 1691f7e3dc0SClaudiu Zissulescu sth.ab r5, [r3, 2] 1701f7e3dc0SClaudiu Zissulescu 1711f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, 0x07 ;Last 8bytes 172ac506b7fSVineet Gupta lpnz @.Lcopybytewise_2 1731f7e3dc0SClaudiu Zissulescu ;; LOOP START 1741f7e3dc0SClaudiu Zissulescu ldb.ab r6, [r1,1] 1751f7e3dc0SClaudiu Zissulescu stb.ab r6, [r3,1] 176ac506b7fSVineet Gupta.Lcopybytewise_2: 1771f7e3dc0SClaudiu Zissulescu j [blink] 1781f7e3dc0SClaudiu Zissulescu 179ac506b7fSVineet Gupta.LunalignedOffby3: 1801f7e3dc0SClaudiu Zissulescu;;; CASE 3: The source is unaligned, off by 3 1811f7e3dc0SClaudiu Zissulescu;;; Hence, I need to read 1byte for achieve the 32bit alignment 1821f7e3dc0SClaudiu Zissulescu 1831f7e3dc0SClaudiu Zissulescu ;; Both src and dst are aligned 1841f7e3dc0SClaudiu Zissulescu ;; Convert to words, unfold x2 1851f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, 3 1861f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 1871f7e3dc0SClaudiu Zissulescu asl.ne r5, r5, 24 1881f7e3dc0SClaudiu Zissulescu#endif 189ac506b7fSVineet Gupta lpnz @.Lcopy8bytes_3 1901f7e3dc0SClaudiu Zissulescu ;; LOOP START 1911f7e3dc0SClaudiu Zissulescu ld.ab r6, [r1, 4] 1921f7e3dc0SClaudiu Zissulescu ld.ab r8, [r1,4] 1931f7e3dc0SClaudiu Zissulescu 1941f7e3dc0SClaudiu Zissulescu SHIFT_1 (r7, r6, 8) 1951f7e3dc0SClaudiu Zissulescu or r7, r7, r5 1961f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r6, 24) 1971f7e3dc0SClaudiu Zissulescu 1981f7e3dc0SClaudiu Zissulescu SHIFT_1 (r9, r8, 8) 1991f7e3dc0SClaudiu Zissulescu or r9, r9, r5 2001f7e3dc0SClaudiu Zissulescu SHIFT_2 (r5, r8, 24) 2011f7e3dc0SClaudiu Zissulescu 2021f7e3dc0SClaudiu Zissulescu st.ab r7, [r3, 4] 2031f7e3dc0SClaudiu Zissulescu st.ab r9, [r3, 4] 204ac506b7fSVineet Gupta.Lcopy8bytes_3: 2051f7e3dc0SClaudiu Zissulescu 2061f7e3dc0SClaudiu Zissulescu#ifdef __BIG_ENDIAN__ 2071f7e3dc0SClaudiu Zissulescu lsr.nz r5, r5, 24 2081f7e3dc0SClaudiu Zissulescu#endif 2091f7e3dc0SClaudiu Zissulescu stb.ab r5, [r3, 1] 2101f7e3dc0SClaudiu Zissulescu 2111f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, 0x07 ;Last 8bytes 212ac506b7fSVineet Gupta lpnz @.Lcopybytewise_3 2131f7e3dc0SClaudiu Zissulescu ;; LOOP START 2141f7e3dc0SClaudiu Zissulescu ldb.ab r6, [r1,1] 2151f7e3dc0SClaudiu Zissulescu stb.ab r6, [r3,1] 216ac506b7fSVineet Gupta.Lcopybytewise_3: 2171f7e3dc0SClaudiu Zissulescu j [blink] 2181f7e3dc0SClaudiu Zissulescu 21986effd0dSVineet GuptaEND_CFI(memcpy) 220