xref: /linux/arch/hexagon/mm/copy_user_template.S (revision 58d416351e6df1a41d415958ccdd8eb9c2173fed)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
4 */
5
6/* Numerology:
7 * WXYZ
8 * W: width in bytes
9 * X: Load=0, Store=1
10 * Y: Location 0=preamble,8=loop,9=epilog
11 * Z: Location=0,handler=9
12 */
13	.text
14	.global FUNCNAME
15	.type FUNCNAME, @function
16	.p2align 5
17FUNCNAME:
18	{
19		p0 = cmp.gtu(bytes,#0)
20		if (!p0.new) jump:nt .Ldone
21		r3 = or(dst,src)
22		r4 = xor(dst,src)
23	}
24	{
25		p1 = cmp.gtu(bytes,#15)
26		p0 = bitsclr(r3,#7)
27		if (!p0.new) jump:nt .Loop_not_aligned_8
28		src_dst_sav = combine(src,dst)
29	}
30
31	{
32		loopcount = lsr(bytes,#3)
33		if (!p1) jump .Lsmall
34	}
35	p3=sp1loop0(.Loop8,loopcount)
36.Loop8:
378080:
388180:
39	{
40		if (p3) memd(dst++#8) = d_dbuf
41		d_dbuf = memd(src++#8)
42	}:endloop0
438190:
44	{
45		memd(dst++#8) = d_dbuf
46		bytes -= asl(loopcount,#3)
47		jump .Lsmall
48	}
49
50.Loop_not_aligned_8:
51	{
52		p0 = bitsclr(r4,#7)
53		if (p0.new) jump:nt .Lalign
54	}
55	{
56		p0 = bitsclr(r3,#3)
57		if (!p0.new) jump:nt .Loop_not_aligned_4
58		p1 = cmp.gtu(bytes,#7)
59	}
60
61	{
62		if (!p1) jump .Lsmall
63		loopcount = lsr(bytes,#2)
64	}
65	p3=sp1loop0(.Loop4,loopcount)
66.Loop4:
674080:
684180:
69	{
70		if (p3) memw(dst++#4) = w_dbuf
71		w_dbuf = memw(src++#4)
72	}:endloop0
734190:
74	{
75		memw(dst++#4) = w_dbuf
76		bytes -= asl(loopcount,#2)
77		jump .Lsmall
78	}
79
80.Loop_not_aligned_4:
81	{
82		p0 = bitsclr(r3,#1)
83		if (!p0.new) jump:nt .Loop_not_aligned
84		p1 = cmp.gtu(bytes,#3)
85	}
86
87	{
88		if (!p1) jump .Lsmall
89		loopcount = lsr(bytes,#1)
90	}
91	p3=sp1loop0(.Loop2,loopcount)
92.Loop2:
932080:
942180:
95	{
96		if (p3) memh(dst++#2) = w_dbuf
97		w_dbuf = memuh(src++#2)
98	}:endloop0
992190:
100	{
101		memh(dst++#2) = w_dbuf
102		bytes -= asl(loopcount,#1)
103		jump .Lsmall
104	}
105
106.Loop_not_aligned: /* Works for as small as one byte */
107	p3=sp1loop0(.Loop1,bytes)
108.Loop1:
1091080:
1101180:
111	{
112		if (p3) memb(dst++#1) = w_dbuf
113		w_dbuf = memub(src++#1)
114	}:endloop0
115	/* Done */
1161190:
117	{
118		memb(dst) = w_dbuf
119		jumpr r31
120		r0 = #0
121	}
122
123.Lsmall:
124	{
125		p0 = cmp.gtu(bytes,#0)
126		if (p0.new) jump:nt .Loop_not_aligned
127	}
128.Ldone:
129	{
130		r0 = #0
131		jumpr r31
132	}
133	.falign
134.Lalign:
1351000:
136	{
137		if (p0.new) w_dbuf = memub(src)
138		p0 = tstbit(src,#0)
139		if (!p1) jump .Lsmall
140	}
1411100:
142	{
143		if (p0) memb(dst++#1) = w_dbuf
144		if (p0) bytes = add(bytes,#-1)
145		if (p0) src = add(src,#1)
146	}
1472000:
148	{
149		if (p0.new) w_dbuf = memuh(src)
150		p0 = tstbit(src,#1)
151		if (!p1) jump .Lsmall
152	}
1532100:
154	{
155		if (p0) memh(dst++#2) = w_dbuf
156		if (p0) bytes = add(bytes,#-2)
157		if (p0) src = add(src,#2)
158	}
1594000:
160	{
161		if (p0.new) w_dbuf = memw(src)
162		p0 = tstbit(src,#2)
163		if (!p1) jump .Lsmall
164	}
1654100:
166	{
167		if (p0) memw(dst++#4) = w_dbuf
168		if (p0) bytes = add(bytes,#-4)
169		if (p0) src = add(src,#4)
170		jump FUNCNAME
171	}
172	.size FUNCNAME,.-FUNCNAME
173