checksum.S - OpenGrok cross reference for /linux/arch/xtensa/lib/checksum.S

Lines Matching +full:3 +full:- +full:byte
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
29  * This function assumes 2- or 4-byte alignment.  Other alignments will fail!
32 /* ONES_ADD converts twos-complement math to ones-complement. */
44 	 * is aligned on either a 2-byte or 4-byte boundary.
48 	bnez	a5, 8f		/* branch if 2-byte aligned */
49 	/* Fall-through on common case, 4-byte alignment */
51 	srli	a5, a3, 5	/* 32-byte chunks */
57 	add	a5, a5, a2	/* a5 = end of last 32-byte chunk */
81 	extui	a5, a3, 2, 3	/* remaining 4-byte chunks */
83 	loopgtz	a5, 3f
85 	beqz	a5, 3f
87 	add	a5, a5, a2	/* a5 = end of last 4-byte chunk */
96 3:
97 	_bbci.l	a3, 1, 5f	/* remaining 2-byte chunk */
102 	_bbci.l	a3, 0, 7f	/* remaining 1-byte chunk */
105 	slli	a6, a6, 8	/* load byte into bits 8..15 */
112 	/* uncommon case, buf is 2-byte aligned */
118 	bnez	a5, 8f		/* branch if 1-byte aligned */
123 	addi	a3, a3, -2	/* adjust len */
124 	j	1b		/* now buf is 4-byte aligned */
126 	/* case: odd-byte aligned, len > 1
131 	srli	a5, a3, 2	/* 4-byte chunks */
137 	add	a5, a5, a2	/* a5 = end of last 4-byte chunk */
142 	l8ui	a8, a2, 3	/* bits  0.. 8 */
157 	_bbci.l	a3, 1, 3f	/* remaining 2-byte chunk, still odd addr */
168 3:
169 	j	5b		/* branch to handle the remaining byte */
188     This function is optimized for 4-byte aligned addresses.  Other
195 	movi	a5, -1
198 	/* We optimize the following alignment tests for the 4-byte
200 	(commented out below).  However, both labels 5: and 3: are out
206 	beqz	a9, 1f		/* branch if both are 4-byte aligned */
208 	j	3f		/* one address is 2-byte aligned */
211 /*	_bbsi.l	a10, 1, 3f */	/* branch if 2-byte-aligned address */
214 	/* src and dst are both 4-byte aligned */
215 	srli	a10, a4, 5	/* 32-byte chunks */
221 	add	a10, a10, a2	/* a10 = end of last 32-byte src chunk */
254 	extui	a10, a4, 2, 3	/* remaining 4-byte chunks */
255 	extui	a4, a4, 0, 2	/* reset len for general-case, 2-byte chunks */
257 	loopgtz	a10, 3f
259 	beqz	a10, 3f
261 	add	a10, a10, a2	/* a10 = end of last 4-byte src chunk */
272 3:
275 	to here from the 4-byte alignment case to process, at most,
276 	one 2-byte chunk.  (2) It branches to here from above if
277 	either src or dst is 2-byte aligned, and we process all bytes
278 	here, except for perhaps a trailing odd byte.  It's
279 	inefficient, so align your addresses to 4-byte boundaries.
286 	srli	a10, a4, 1	/* 2-byte chunks */
292 	add	a10, a10, a2	/* a10 = end of last 2-byte src chunk */
304 	/* This section processes a possible trailing odd byte. */
305 	_bbci.l	a4, 0, 8f	/* 1-byte chunk */
309 	slli	a9, a9, 8	/* shift byte to bits 8..15 */
318 	process all bytes using 8-bit accesses.  Grossly inefficient,
321 	srli	a10, a4, 1	/* handle in pairs for 16-bit csum */
327 	add	a10, a10, a2	/* a10 = end of last odd-aligned, 2-byte src chunk */
335 	slli	a9, a9, 8	/* combine into a single 16-bit value */
347 	j	4b		/* process the possible trailing odd byte */