1*c5af58b7SGuo Ren/* SPDX-License-Identifier: GPL-2.0 */ 2*c5af58b7SGuo Ren// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3*c5af58b7SGuo Ren 4*c5af58b7SGuo Ren#include <linux/linkage.h> 5*c5af58b7SGuo Ren#include "sysdep.h" 6*c5af58b7SGuo Ren 7*c5af58b7SGuo RenENTRY(__memcpy) 8*c5af58b7SGuo RenENTRY(memcpy) 9*c5af58b7SGuo Ren /* Test if len less than 4 bytes. */ 10*c5af58b7SGuo Ren mov r12, r0 11*c5af58b7SGuo Ren cmplti r2, 4 12*c5af58b7SGuo Ren bt .L_copy_by_byte 13*c5af58b7SGuo Ren 14*c5af58b7SGuo Ren andi r13, r0, 3 15*c5af58b7SGuo Ren movi r19, 4 16*c5af58b7SGuo Ren /* Test if dest is not 4 bytes aligned. */ 17*c5af58b7SGuo Ren bnez r13, .L_dest_not_aligned 18*c5af58b7SGuo Ren 19*c5af58b7SGuo Ren/* Hardware can handle unaligned access directly. */ 20*c5af58b7SGuo Ren.L_dest_aligned: 21*c5af58b7SGuo Ren /* If dest is aligned, then copy. */ 22*c5af58b7SGuo Ren zext r18, r2, 31, 4 23*c5af58b7SGuo Ren 24*c5af58b7SGuo Ren /* Test if len less than 16 bytes. */ 25*c5af58b7SGuo Ren bez r18, .L_len_less_16bytes 26*c5af58b7SGuo Ren movi r19, 0 27*c5af58b7SGuo Ren 28*c5af58b7SGuo Ren LABLE_ALIGN 29*c5af58b7SGuo Ren.L_len_larger_16bytes: 30*c5af58b7SGuo Ren#if defined(__CSKY_VDSPV2__) 31*c5af58b7SGuo Ren vldx.8 vr0, (r1), r19 32*c5af58b7SGuo Ren PRE_BNEZAD (r18) 33*c5af58b7SGuo Ren addi r1, 16 34*c5af58b7SGuo Ren vstx.8 vr0, (r0), r19 35*c5af58b7SGuo Ren addi r0, 16 36*c5af58b7SGuo Ren#elif defined(__CK860__) 37*c5af58b7SGuo Ren ldw r3, (r1, 0) 38*c5af58b7SGuo Ren stw r3, (r0, 0) 39*c5af58b7SGuo Ren ldw r3, (r1, 4) 40*c5af58b7SGuo Ren stw r3, (r0, 4) 41*c5af58b7SGuo Ren ldw r3, (r1, 8) 42*c5af58b7SGuo Ren stw r3, (r0, 8) 43*c5af58b7SGuo Ren ldw r3, (r1, 12) 44*c5af58b7SGuo Ren addi r1, 16 45*c5af58b7SGuo Ren stw r3, (r0, 12) 46*c5af58b7SGuo Ren addi r0, 16 47*c5af58b7SGuo Ren#else 48*c5af58b7SGuo Ren ldw r20, (r1, 0) 49*c5af58b7SGuo Ren ldw r21, (r1, 4) 50*c5af58b7SGuo Ren ldw r22, (r1, 8) 51*c5af58b7SGuo Ren ldw r23, (r1, 12) 52*c5af58b7SGuo Ren stw r20, (r0, 0) 53*c5af58b7SGuo Ren stw r21, (r0, 4) 54*c5af58b7SGuo Ren stw r22, (r0, 8) 55*c5af58b7SGuo Ren stw r23, (r0, 12) 56*c5af58b7SGuo Ren PRE_BNEZAD (r18) 57*c5af58b7SGuo Ren addi r1, 16 58*c5af58b7SGuo Ren addi r0, 16 59*c5af58b7SGuo Ren#endif 60*c5af58b7SGuo Ren BNEZAD (r18, .L_len_larger_16bytes) 61*c5af58b7SGuo Ren 62*c5af58b7SGuo Ren.L_len_less_16bytes: 63*c5af58b7SGuo Ren zext r18, r2, 3, 2 64*c5af58b7SGuo Ren bez r18, .L_copy_by_byte 65*c5af58b7SGuo Ren.L_len_less_16bytes_loop: 66*c5af58b7SGuo Ren ldw r3, (r1, 0) 67*c5af58b7SGuo Ren PRE_BNEZAD (r18) 68*c5af58b7SGuo Ren addi r1, 4 69*c5af58b7SGuo Ren stw r3, (r0, 0) 70*c5af58b7SGuo Ren addi r0, 4 71*c5af58b7SGuo Ren BNEZAD (r18, .L_len_less_16bytes_loop) 72*c5af58b7SGuo Ren 73*c5af58b7SGuo Ren/* Test if len less than 4 bytes. */ 74*c5af58b7SGuo Ren.L_copy_by_byte: 75*c5af58b7SGuo Ren zext r18, r2, 1, 0 76*c5af58b7SGuo Ren bez r18, .L_return 77*c5af58b7SGuo Ren.L_copy_by_byte_loop: 78*c5af58b7SGuo Ren ldb r3, (r1, 0) 79*c5af58b7SGuo Ren PRE_BNEZAD (r18) 80*c5af58b7SGuo Ren addi r1, 1 81*c5af58b7SGuo Ren stb r3, (r0, 0) 82*c5af58b7SGuo Ren addi r0, 1 83*c5af58b7SGuo Ren BNEZAD (r18, .L_copy_by_byte_loop) 84*c5af58b7SGuo Ren 85*c5af58b7SGuo Ren.L_return: 86*c5af58b7SGuo Ren mov r0, r12 87*c5af58b7SGuo Ren rts 88*c5af58b7SGuo Ren 89*c5af58b7SGuo Ren/* 90*c5af58b7SGuo Ren * If dest is not aligned, just copying some bytes makes the 91*c5af58b7SGuo Ren * dest align. 92*c5af58b7SGuo Ren */ 93*c5af58b7SGuo Ren.L_dest_not_aligned: 94*c5af58b7SGuo Ren sub r13, r19, r13 95*c5af58b7SGuo Ren sub r2, r13 96*c5af58b7SGuo Ren 97*c5af58b7SGuo Ren/* Makes the dest align. */ 98*c5af58b7SGuo Ren.L_dest_not_aligned_loop: 99*c5af58b7SGuo Ren ldb r3, (r1, 0) 100*c5af58b7SGuo Ren PRE_BNEZAD (r13) 101*c5af58b7SGuo Ren addi r1, 1 102*c5af58b7SGuo Ren stb r3, (r0, 0) 103*c5af58b7SGuo Ren addi r0, 1 104*c5af58b7SGuo Ren BNEZAD (r13, .L_dest_not_aligned_loop) 105*c5af58b7SGuo Ren cmplti r2, 4 106*c5af58b7SGuo Ren bt .L_copy_by_byte 107*c5af58b7SGuo Ren 108*c5af58b7SGuo Ren /* Check whether the src is aligned. */ 109*c5af58b7SGuo Ren jbr .L_dest_aligned 110*c5af58b7SGuo RenENDPROC(__memcpy) 111