xref: /linux/arch/csky/abiv2/memcpy.S (revision c5af58b769113c4045209973052db3e3a543ee43)
1*c5af58b7SGuo Ren/* SPDX-License-Identifier: GPL-2.0 */
2*c5af58b7SGuo Ren// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
3*c5af58b7SGuo Ren
4*c5af58b7SGuo Ren#include <linux/linkage.h>
5*c5af58b7SGuo Ren#include "sysdep.h"
6*c5af58b7SGuo Ren
7*c5af58b7SGuo RenENTRY(__memcpy)
8*c5af58b7SGuo RenENTRY(memcpy)
9*c5af58b7SGuo Ren	/* Test if len less than 4 bytes.  */
10*c5af58b7SGuo Ren	mov	r12, r0
11*c5af58b7SGuo Ren	cmplti	r2, 4
12*c5af58b7SGuo Ren	bt	.L_copy_by_byte
13*c5af58b7SGuo Ren
14*c5af58b7SGuo Ren	andi	r13, r0, 3
15*c5af58b7SGuo Ren	movi	r19, 4
16*c5af58b7SGuo Ren	/* Test if dest is not 4 bytes aligned.  */
17*c5af58b7SGuo Ren	bnez	r13, .L_dest_not_aligned
18*c5af58b7SGuo Ren
19*c5af58b7SGuo Ren/* Hardware can handle unaligned access directly.  */
20*c5af58b7SGuo Ren.L_dest_aligned:
21*c5af58b7SGuo Ren	/* If dest is aligned, then copy.  */
22*c5af58b7SGuo Ren	zext	r18, r2, 31, 4
23*c5af58b7SGuo Ren
24*c5af58b7SGuo Ren	/* Test if len less than 16 bytes.  */
25*c5af58b7SGuo Ren	bez	r18, .L_len_less_16bytes
26*c5af58b7SGuo Ren	movi	r19, 0
27*c5af58b7SGuo Ren
28*c5af58b7SGuo Ren	LABLE_ALIGN
29*c5af58b7SGuo Ren.L_len_larger_16bytes:
30*c5af58b7SGuo Ren#if defined(__CSKY_VDSPV2__)
31*c5af58b7SGuo Ren	vldx.8	vr0, (r1), r19
32*c5af58b7SGuo Ren	PRE_BNEZAD (r18)
33*c5af58b7SGuo Ren	addi	r1, 16
34*c5af58b7SGuo Ren	vstx.8	vr0, (r0), r19
35*c5af58b7SGuo Ren	addi	r0, 16
36*c5af58b7SGuo Ren#elif defined(__CK860__)
37*c5af58b7SGuo Ren	ldw	r3, (r1, 0)
38*c5af58b7SGuo Ren	stw	r3, (r0, 0)
39*c5af58b7SGuo Ren	ldw	r3, (r1, 4)
40*c5af58b7SGuo Ren	stw	r3, (r0, 4)
41*c5af58b7SGuo Ren	ldw	r3, (r1, 8)
42*c5af58b7SGuo Ren	stw	r3, (r0, 8)
43*c5af58b7SGuo Ren	ldw	r3, (r1, 12)
44*c5af58b7SGuo Ren	addi	r1, 16
45*c5af58b7SGuo Ren	stw	r3, (r0, 12)
46*c5af58b7SGuo Ren	addi	r0, 16
47*c5af58b7SGuo Ren#else
48*c5af58b7SGuo Ren	ldw	r20, (r1, 0)
49*c5af58b7SGuo Ren	ldw	r21, (r1, 4)
50*c5af58b7SGuo Ren	ldw	r22, (r1, 8)
51*c5af58b7SGuo Ren	ldw	r23, (r1, 12)
52*c5af58b7SGuo Ren	stw	r20, (r0, 0)
53*c5af58b7SGuo Ren	stw	r21, (r0, 4)
54*c5af58b7SGuo Ren	stw	r22, (r0, 8)
55*c5af58b7SGuo Ren	stw	r23, (r0, 12)
56*c5af58b7SGuo Ren	PRE_BNEZAD (r18)
57*c5af58b7SGuo Ren	addi	r1, 16
58*c5af58b7SGuo Ren	addi	r0, 16
59*c5af58b7SGuo Ren#endif
60*c5af58b7SGuo Ren	BNEZAD (r18, .L_len_larger_16bytes)
61*c5af58b7SGuo Ren
62*c5af58b7SGuo Ren.L_len_less_16bytes:
63*c5af58b7SGuo Ren	zext	r18, r2, 3, 2
64*c5af58b7SGuo Ren	bez	r18, .L_copy_by_byte
65*c5af58b7SGuo Ren.L_len_less_16bytes_loop:
66*c5af58b7SGuo Ren	ldw	r3, (r1, 0)
67*c5af58b7SGuo Ren	PRE_BNEZAD (r18)
68*c5af58b7SGuo Ren	addi	r1, 4
69*c5af58b7SGuo Ren	stw	r3, (r0, 0)
70*c5af58b7SGuo Ren	addi	r0, 4
71*c5af58b7SGuo Ren	BNEZAD (r18, .L_len_less_16bytes_loop)
72*c5af58b7SGuo Ren
73*c5af58b7SGuo Ren/* Test if len less than 4 bytes.  */
74*c5af58b7SGuo Ren.L_copy_by_byte:
75*c5af58b7SGuo Ren	zext	r18, r2, 1, 0
76*c5af58b7SGuo Ren	bez	r18, .L_return
77*c5af58b7SGuo Ren.L_copy_by_byte_loop:
78*c5af58b7SGuo Ren	ldb	r3, (r1, 0)
79*c5af58b7SGuo Ren	PRE_BNEZAD (r18)
80*c5af58b7SGuo Ren	addi	r1, 1
81*c5af58b7SGuo Ren	stb	r3, (r0, 0)
82*c5af58b7SGuo Ren	addi	r0, 1
83*c5af58b7SGuo Ren	BNEZAD (r18, .L_copy_by_byte_loop)
84*c5af58b7SGuo Ren
85*c5af58b7SGuo Ren.L_return:
86*c5af58b7SGuo Ren	mov	r0, r12
87*c5af58b7SGuo Ren	rts
88*c5af58b7SGuo Ren
89*c5af58b7SGuo Ren/*
90*c5af58b7SGuo Ren * If dest is not aligned, just copying some bytes makes the
91*c5af58b7SGuo Ren * dest align.
92*c5af58b7SGuo Ren */
93*c5af58b7SGuo Ren.L_dest_not_aligned:
94*c5af58b7SGuo Ren	sub	r13, r19, r13
95*c5af58b7SGuo Ren	sub	r2, r13
96*c5af58b7SGuo Ren
97*c5af58b7SGuo Ren/* Makes the dest align.  */
98*c5af58b7SGuo Ren.L_dest_not_aligned_loop:
99*c5af58b7SGuo Ren	ldb	r3, (r1, 0)
100*c5af58b7SGuo Ren	PRE_BNEZAD (r13)
101*c5af58b7SGuo Ren	addi	r1, 1
102*c5af58b7SGuo Ren	stb	r3, (r0, 0)
103*c5af58b7SGuo Ren	addi	r0, 1
104*c5af58b7SGuo Ren	BNEZAD (r13, .L_dest_not_aligned_loop)
105*c5af58b7SGuo Ren	cmplti	r2, 4
106*c5af58b7SGuo Ren	bt	.L_copy_by_byte
107*c5af58b7SGuo Ren
108*c5af58b7SGuo Ren	/* Check whether the src is aligned.  */
109*c5af58b7SGuo Ren	jbr	.L_dest_aligned
110*c5af58b7SGuo RenENDPROC(__memcpy)
111