xref: /linux/arch/mips/lib/csum_partial.S (revision 606d099cdd1080bbb50ea50dc52d98252f8f10a1)
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License.  See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Quick'n'dirty IP checksum ...
7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 */
11#include <asm/asm.h>
12#include <asm/regdef.h>
13
14#ifdef CONFIG_64BIT
15#define T0	ta0
16#define T1	ta1
17#define T2	ta2
18#define T3	ta3
19#define T4	t0
20#define T7	t3
21#else
22#define T0	t0
23#define T1	t1
24#define T2	t2
25#define T3	t3
26#define T4	t4
27#define T7	t7
28#endif
29
30#define ADDC(sum,reg)						\
31	addu	sum, reg;					\
32	sltu	v1, sum, reg;					\
33	addu	sum, v1
34
35#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\
36	lw	_t0, (offset + 0x00)(src);			\
37	lw	_t1, (offset + 0x04)(src);			\
38	lw	_t2, (offset + 0x08)(src); 			\
39	lw	_t3, (offset + 0x0c)(src); 			\
40	ADDC(sum, _t0);						\
41	ADDC(sum, _t1);						\
42	ADDC(sum, _t2);						\
43	ADDC(sum, _t3);						\
44	lw	_t0, (offset + 0x10)(src);			\
45	lw	_t1, (offset + 0x14)(src);			\
46	lw	_t2, (offset + 0x18)(src);			\
47	lw	_t3, (offset + 0x1c)(src);			\
48	ADDC(sum, _t0);						\
49	ADDC(sum, _t1);						\
50	ADDC(sum, _t2);						\
51	ADDC(sum, _t3);						\
52
53/*
54 * a0: source address
55 * a1: length of the area to checksum
56 * a2: partial checksum
57 */
58
59#define src a0
60#define sum v0
61
62	.text
63	.set	noreorder
64
65/* unknown src alignment and < 8 bytes to go  */
66small_csumcpy:
67	move	a1, T2
68
69	andi	T0, a1, 4
70	beqz	T0, 1f
71	 andi	T0, a1, 2
72
73	/* Still a full word to go  */
74	ulw	T1, (src)
75	PTR_ADDIU	src, 4
76	ADDC(sum, T1)
77
781:	move	T1, zero
79	beqz	T0, 1f
80	 andi	T0, a1, 1
81
82	/* Still a halfword to go  */
83	ulhu	T1, (src)
84	PTR_ADDIU	src, 2
85
861:	beqz	T0, 1f
87	 sll	T1, T1, 16
88
89	lbu	T2, (src)
90	 nop
91
92#ifdef __MIPSEB__
93	sll	T2, T2, 8
94#endif
95	or	T1, T2
96
971:	ADDC(sum, T1)
98
99	/* fold checksum */
100	sll	v1, sum, 16
101	addu	sum, v1
102	sltu	v1, sum, v1
103	srl	sum, sum, 16
104	addu	sum, v1
105
106	/* odd buffer alignment? */
107	beqz	T7, 1f
108	 nop
109	sll	v1, sum, 8
110	srl	sum, sum, 8
111	or	sum, v1
112	andi	sum, 0xffff
1131:
114	.set	reorder
115	/* Add the passed partial csum.  */
116	ADDC(sum, a2)
117	jr	ra
118	.set	noreorder
119
120/* ------------------------------------------------------------------------- */
121
122	.align	5
123LEAF(csum_partial)
124	move	sum, zero
125	move	T7, zero
126
127	sltiu	t8, a1, 0x8
128	bnez	t8, small_csumcpy		/* < 8 bytes to copy */
129	 move	T2, a1
130
131	beqz	a1, out
132	 andi	T7, src, 0x1			/* odd buffer? */
133
134hword_align:
135	beqz	T7, word_align
136	 andi	t8, src, 0x2
137
138	lbu	T0, (src)
139	LONG_SUBU	a1, a1, 0x1
140#ifdef __MIPSEL__
141	sll	T0, T0, 8
142#endif
143	ADDC(sum, T0)
144	PTR_ADDU	src, src, 0x1
145	andi	t8, src, 0x2
146
147word_align:
148	beqz	t8, dword_align
149	 sltiu	t8, a1, 56
150
151	lhu	T0, (src)
152	LONG_SUBU	a1, a1, 0x2
153	ADDC(sum, T0)
154	sltiu	t8, a1, 56
155	PTR_ADDU	src, src, 0x2
156
157dword_align:
158	bnez	t8, do_end_words
159	 move	t8, a1
160
161	andi	t8, src, 0x4
162	beqz	t8, qword_align
163	 andi	t8, src, 0x8
164
165	lw	T0, 0x00(src)
166	LONG_SUBU	a1, a1, 0x4
167	ADDC(sum, T0)
168	PTR_ADDU	src, src, 0x4
169	andi	t8, src, 0x8
170
171qword_align:
172	beqz	t8, oword_align
173	 andi	t8, src, 0x10
174
175	lw	T0, 0x00(src)
176	lw	T1, 0x04(src)
177	LONG_SUBU	a1, a1, 0x8
178	ADDC(sum, T0)
179	ADDC(sum, T1)
180	PTR_ADDU	src, src, 0x8
181	andi	t8, src, 0x10
182
183oword_align:
184	beqz	t8, begin_movement
185	 LONG_SRL	t8, a1, 0x7
186
187	lw	T3, 0x08(src)
188	lw	T4, 0x0c(src)
189	lw	T0, 0x00(src)
190	lw	T1, 0x04(src)
191	ADDC(sum, T3)
192	ADDC(sum, T4)
193	ADDC(sum, T0)
194	ADDC(sum, T1)
195	LONG_SUBU	a1, a1, 0x10
196	PTR_ADDU	src, src, 0x10
197	LONG_SRL	t8, a1, 0x7
198
199begin_movement:
200	beqz	t8, 1f
201	 andi	T2, a1, 0x40
202
203move_128bytes:
204	CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
205	CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4)
206	CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4)
207	CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4)
208	LONG_SUBU	t8, t8, 0x01
209	bnez	t8, move_128bytes
210	 PTR_ADDU	src, src, 0x80
211
2121:
213	beqz	T2, 1f
214	 andi	T2, a1, 0x20
215
216move_64bytes:
217	CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
218	CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4)
219	PTR_ADDU	src, src, 0x40
220
2211:
222	beqz	T2, do_end_words
223	 andi	t8, a1, 0x1c
224
225move_32bytes:
226	CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
227	andi	t8, a1, 0x1c
228	PTR_ADDU	src, src, 0x20
229
230do_end_words:
231	beqz	t8, maybe_end_cruft
232	 LONG_SRL	t8, t8, 0x2
233
234end_words:
235	lw	T0, (src)
236	LONG_SUBU	t8, t8, 0x1
237	ADDC(sum, T0)
238	bnez	t8, end_words
239	 PTR_ADDU	src, src, 0x4
240
241maybe_end_cruft:
242	andi	T2, a1, 0x3
243
244small_memcpy:
245 j small_csumcpy; move a1, T2		/* XXX ??? */
246	beqz	t2, out
247	 move	a1, T2
248
249end_bytes:
250	lb	T0, (src)
251	LONG_SUBU	a1, a1, 0x1
252	bnez	a2, end_bytes
253	 PTR_ADDU	src, src, 0x1
254
255out:
256	jr	ra
257	 move	v0, sum
258	END(csum_partial)
259