xref: /linux/arch/arm/lib/csumpartialcopygeneric.S (revision cbecf716ca618fd44feda6bd9a64a8179d031fc5)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds *  linux/arch/arm/lib/csumpartialcopygeneric.S
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds *  Copyright (C) 1995-2001 Russell King
61da177e4SLinus Torvalds */
76ebbf2ceSRussell King#include <asm/assembler.h>
81da177e4SLinus Torvalds
91da177e4SLinus Torvalds/*
101da177e4SLinus Torvalds * unsigned int
111da177e4SLinus Torvalds * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
121da177e4SLinus Torvalds *  r0 = src, r1 = dst, r2 = len, r3 = sum
131da177e4SLinus Torvalds *  Returns : r0 = checksum
141da177e4SLinus Torvalds *
151da177e4SLinus Torvalds * Note that 'tst' and 'teq' preserve the carry flag.
161da177e4SLinus Torvalds */
171da177e4SLinus Torvalds
181da177e4SLinus Torvaldssrc	.req	r0
191da177e4SLinus Torvaldsdst	.req	r1
201da177e4SLinus Torvaldslen	.req	r2
211da177e4SLinus Torvaldssum	.req	r3
221da177e4SLinus Torvalds
238adbb371SNicolas Pitre.Lzero:		mov	r0, sum
2490303b10SCatalin Marinas		load_regs
251da177e4SLinus Torvalds
261da177e4SLinus Torvalds		/*
271da177e4SLinus Torvalds		 * Align an unaligned destination pointer.  We know that
281da177e4SLinus Torvalds		 * we have >= 8 bytes here, so we don't need to check
291da177e4SLinus Torvalds		 * the length.  Note that the source pointer hasn't been
301da177e4SLinus Torvalds		 * aligned yet.
311da177e4SLinus Torvalds		 */
328adbb371SNicolas Pitre.Ldst_unaligned:
338adbb371SNicolas Pitre		tst	dst, #1
348adbb371SNicolas Pitre		beq	.Ldst_16bit
351da177e4SLinus Torvalds
361da177e4SLinus Torvalds		load1b	ip
371da177e4SLinus Torvalds		sub	len, len, #1
381da177e4SLinus Torvalds		adcs	sum, sum, ip, put_byte_1	@ update checksum
391da177e4SLinus Torvalds		strb	ip, [dst], #1
401da177e4SLinus Torvalds		tst	dst, #2
416ebbf2ceSRussell King		reteq	lr			@ dst is now 32bit aligned
421da177e4SLinus Torvalds
438adbb371SNicolas Pitre.Ldst_16bit:	load2b	r8, ip
441da177e4SLinus Torvalds		sub	len, len, #2
451da177e4SLinus Torvalds		adcs	sum, sum, r8, put_byte_0
461da177e4SLinus Torvalds		strb	r8, [dst], #1
471da177e4SLinus Torvalds		adcs	sum, sum, ip, put_byte_1
481da177e4SLinus Torvalds		strb	ip, [dst], #1
496ebbf2ceSRussell King		ret	lr			@ dst is now 32bit aligned
501da177e4SLinus Torvalds
511da177e4SLinus Torvalds		/*
521da177e4SLinus Torvalds		 * Handle 0 to 7 bytes, with any alignment of source and
531da177e4SLinus Torvalds		 * destination pointers.  Note that when we get here, C = 0
541da177e4SLinus Torvalds		 */
558adbb371SNicolas Pitre.Lless8:	teq	len, #0			@ check for zero count
568adbb371SNicolas Pitre		beq	.Lzero
571da177e4SLinus Torvalds
581da177e4SLinus Torvalds		/* we must have at least one byte. */
591da177e4SLinus Torvalds		tst	dst, #1			@ dst 16-bit aligned
608adbb371SNicolas Pitre		beq	.Lless8_aligned
611da177e4SLinus Torvalds
621da177e4SLinus Torvalds		/* Align dst */
631da177e4SLinus Torvalds		load1b	ip
641da177e4SLinus Torvalds		sub	len, len, #1
651da177e4SLinus Torvalds		adcs	sum, sum, ip, put_byte_1	@ update checksum
661da177e4SLinus Torvalds		strb	ip, [dst], #1
671da177e4SLinus Torvalds		tst	len, #6
688adbb371SNicolas Pitre		beq	.Lless8_byteonly
691da177e4SLinus Torvalds
701da177e4SLinus Torvalds1:		load2b	r8, ip
711da177e4SLinus Torvalds		sub	len, len, #2
721da177e4SLinus Torvalds		adcs	sum, sum, r8, put_byte_0
731da177e4SLinus Torvalds		strb	r8, [dst], #1
741da177e4SLinus Torvalds		adcs	sum, sum, ip, put_byte_1
751da177e4SLinus Torvalds		strb	ip, [dst], #1
768adbb371SNicolas Pitre.Lless8_aligned:
778adbb371SNicolas Pitre		tst	len, #6
781da177e4SLinus Torvalds		bne	1b
798adbb371SNicolas Pitre.Lless8_byteonly:
801da177e4SLinus Torvalds		tst	len, #1
818adbb371SNicolas Pitre		beq	.Ldone
821da177e4SLinus Torvalds		load1b	r8
831da177e4SLinus Torvalds		adcs	sum, sum, r8, put_byte_0	@ update checksum
841da177e4SLinus Torvalds		strb	r8, [dst], #1
858adbb371SNicolas Pitre		b	.Ldone
861da177e4SLinus Torvalds
871da177e4SLinus TorvaldsFN_ENTRY
881da177e4SLinus Torvalds		save_regs
89*1d60be3cSAl Viro		mov	sum, #-1
901da177e4SLinus Torvalds
911da177e4SLinus Torvalds		cmp	len, #8			@ Ensure that we have at least
928adbb371SNicolas Pitre		blo	.Lless8			@ 8 bytes to copy.
931da177e4SLinus Torvalds
941da177e4SLinus Torvalds		adds	sum, sum, #0		@ C = 0
951da177e4SLinus Torvalds		tst	dst, #3			@ Test destination alignment
968adbb371SNicolas Pitre		blne	.Ldst_unaligned		@ align destination, return here
971da177e4SLinus Torvalds
981da177e4SLinus Torvalds		/*
991da177e4SLinus Torvalds		 * Ok, the dst pointer is now 32bit aligned, and we know
1001da177e4SLinus Torvalds		 * that we must have more than 4 bytes to copy.  Note
1011da177e4SLinus Torvalds		 * that C contains the carry from the dst alignment above.
1021da177e4SLinus Torvalds		 */
1031da177e4SLinus Torvalds
1041da177e4SLinus Torvalds		tst	src, #3			@ Test source alignment
1058adbb371SNicolas Pitre		bne	.Lsrc_not_aligned
1061da177e4SLinus Torvalds
1071da177e4SLinus Torvalds		/* Routine for src & dst aligned */
1081da177e4SLinus Torvalds
1091da177e4SLinus Torvalds		bics	ip, len, #15
1101da177e4SLinus Torvalds		beq	2f
1111da177e4SLinus Torvalds
1121da177e4SLinus Torvalds1:		load4l	r4, r5, r6, r7
1131da177e4SLinus Torvalds		stmia	dst!, {r4, r5, r6, r7}
1141da177e4SLinus Torvalds		adcs	sum, sum, r4
1151da177e4SLinus Torvalds		adcs	sum, sum, r5
1161da177e4SLinus Torvalds		adcs	sum, sum, r6
1171da177e4SLinus Torvalds		adcs	sum, sum, r7
1181da177e4SLinus Torvalds		sub	ip, ip, #16
1191da177e4SLinus Torvalds		teq	ip, #0
1201da177e4SLinus Torvalds		bne	1b
1211da177e4SLinus Torvalds
1221da177e4SLinus Torvalds2:		ands	ip, len, #12
1231da177e4SLinus Torvalds		beq	4f
1241da177e4SLinus Torvalds		tst	ip, #8
1251da177e4SLinus Torvalds		beq	3f
1261da177e4SLinus Torvalds		load2l	r4, r5
1271da177e4SLinus Torvalds		stmia	dst!, {r4, r5}
1281da177e4SLinus Torvalds		adcs	sum, sum, r4
1291da177e4SLinus Torvalds		adcs	sum, sum, r5
1301da177e4SLinus Torvalds		tst	ip, #4
1311da177e4SLinus Torvalds		beq	4f
1321da177e4SLinus Torvalds
1331da177e4SLinus Torvalds3:		load1l	r4
1341da177e4SLinus Torvalds		str	r4, [dst], #4
1351da177e4SLinus Torvalds		adcs	sum, sum, r4
1361da177e4SLinus Torvalds
1371da177e4SLinus Torvalds4:		ands	len, len, #3
1388adbb371SNicolas Pitre		beq	.Ldone
1391da177e4SLinus Torvalds		load1l	r4
1401da177e4SLinus Torvalds		tst	len, #2
1411da177e4SLinus Torvalds		mov	r5, r4, get_byte_0
1428adbb371SNicolas Pitre		beq	.Lexit
143d98b90eaSVictor Kamensky		adcs	sum, sum, r4, lspush #16
1441da177e4SLinus Torvalds		strb	r5, [dst], #1
1451da177e4SLinus Torvalds		mov	r5, r4, get_byte_1
1461da177e4SLinus Torvalds		strb	r5, [dst], #1
1471da177e4SLinus Torvalds		mov	r5, r4, get_byte_2
1488adbb371SNicolas Pitre.Lexit:		tst	len, #1
149e44fc388SStefan Agner		strbne	r5, [dst], #1
1501da177e4SLinus Torvalds		andne	r5, r5, #255
151e44fc388SStefan Agner		adcsne	sum, sum, r5, put_byte_0
1521da177e4SLinus Torvalds
1531da177e4SLinus Torvalds		/*
1541da177e4SLinus Torvalds		 * If the dst pointer was not 16-bit aligned, we
1551da177e4SLinus Torvalds		 * need to rotate the checksum here to get around
1561da177e4SLinus Torvalds		 * the inefficient byte manipulations in the
1571da177e4SLinus Torvalds		 * architecture independent code.
1581da177e4SLinus Torvalds		 */
1598adbb371SNicolas Pitre.Ldone:		adc	r0, sum, #0
1601da177e4SLinus Torvalds		ldr	sum, [sp, #0]		@ dst
1611da177e4SLinus Torvalds		tst	sum, #1
1621da177e4SLinus Torvalds		movne	r0, r0, ror #8
16390303b10SCatalin Marinas		load_regs
1641da177e4SLinus Torvalds
1658adbb371SNicolas Pitre.Lsrc_not_aligned:
1661da177e4SLinus Torvalds		adc	sum, sum, #0		@ include C from dst alignment
1671da177e4SLinus Torvalds		and	ip, src, #3
1681da177e4SLinus Torvalds		bic	src, src, #3
1691da177e4SLinus Torvalds		load1l	r5
1701da177e4SLinus Torvalds		cmp	ip, #2
1718adbb371SNicolas Pitre		beq	.Lsrc2_aligned
1728adbb371SNicolas Pitre		bhi	.Lsrc3_aligned
173d98b90eaSVictor Kamensky		mov	r4, r5, lspull #8		@ C = 0
1741da177e4SLinus Torvalds		bics	ip, len, #15
1751da177e4SLinus Torvalds		beq	2f
1761da177e4SLinus Torvalds1:		load4l	r5, r6, r7, r8
177d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #24
178d98b90eaSVictor Kamensky		mov	r5, r5, lspull #8
179d98b90eaSVictor Kamensky		orr	r5, r5, r6, lspush #24
180d98b90eaSVictor Kamensky		mov	r6, r6, lspull #8
181d98b90eaSVictor Kamensky		orr	r6, r6, r7, lspush #24
182d98b90eaSVictor Kamensky		mov	r7, r7, lspull #8
183d98b90eaSVictor Kamensky		orr	r7, r7, r8, lspush #24
1841da177e4SLinus Torvalds		stmia	dst!, {r4, r5, r6, r7}
1851da177e4SLinus Torvalds		adcs	sum, sum, r4
1861da177e4SLinus Torvalds		adcs	sum, sum, r5
1871da177e4SLinus Torvalds		adcs	sum, sum, r6
1881da177e4SLinus Torvalds		adcs	sum, sum, r7
189d98b90eaSVictor Kamensky		mov	r4, r8, lspull #8
1901da177e4SLinus Torvalds		sub	ip, ip, #16
1911da177e4SLinus Torvalds		teq	ip, #0
1921da177e4SLinus Torvalds		bne	1b
1931da177e4SLinus Torvalds2:		ands	ip, len, #12
1941da177e4SLinus Torvalds		beq	4f
1951da177e4SLinus Torvalds		tst	ip, #8
1961da177e4SLinus Torvalds		beq	3f
1971da177e4SLinus Torvalds		load2l	r5, r6
198d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #24
199d98b90eaSVictor Kamensky		mov	r5, r5, lspull #8
200d98b90eaSVictor Kamensky		orr	r5, r5, r6, lspush #24
2011da177e4SLinus Torvalds		stmia	dst!, {r4, r5}
2021da177e4SLinus Torvalds		adcs	sum, sum, r4
2031da177e4SLinus Torvalds		adcs	sum, sum, r5
204d98b90eaSVictor Kamensky		mov	r4, r6, lspull #8
2051da177e4SLinus Torvalds		tst	ip, #4
2061da177e4SLinus Torvalds		beq	4f
2071da177e4SLinus Torvalds3:		load1l	r5
208d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #24
2091da177e4SLinus Torvalds		str	r4, [dst], #4
2101da177e4SLinus Torvalds		adcs	sum, sum, r4
211d98b90eaSVictor Kamensky		mov	r4, r5, lspull #8
2121da177e4SLinus Torvalds4:		ands	len, len, #3
2138adbb371SNicolas Pitre		beq	.Ldone
2141da177e4SLinus Torvalds		mov	r5, r4, get_byte_0
2151da177e4SLinus Torvalds		tst	len, #2
2168adbb371SNicolas Pitre		beq	.Lexit
217d98b90eaSVictor Kamensky		adcs	sum, sum, r4, lspush #16
2181da177e4SLinus Torvalds		strb	r5, [dst], #1
2191da177e4SLinus Torvalds		mov	r5, r4, get_byte_1
2201da177e4SLinus Torvalds		strb	r5, [dst], #1
2211da177e4SLinus Torvalds		mov	r5, r4, get_byte_2
2228adbb371SNicolas Pitre		b	.Lexit
2231da177e4SLinus Torvalds
224d98b90eaSVictor Kamensky.Lsrc2_aligned:	mov	r4, r5, lspull #16
2251da177e4SLinus Torvalds		adds	sum, sum, #0
2261da177e4SLinus Torvalds		bics	ip, len, #15
2271da177e4SLinus Torvalds		beq	2f
2281da177e4SLinus Torvalds1:		load4l	r5, r6, r7, r8
229d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #16
230d98b90eaSVictor Kamensky		mov	r5, r5, lspull #16
231d98b90eaSVictor Kamensky		orr	r5, r5, r6, lspush #16
232d98b90eaSVictor Kamensky		mov	r6, r6, lspull #16
233d98b90eaSVictor Kamensky		orr	r6, r6, r7, lspush #16
234d98b90eaSVictor Kamensky		mov	r7, r7, lspull #16
235d98b90eaSVictor Kamensky		orr	r7, r7, r8, lspush #16
2361da177e4SLinus Torvalds		stmia	dst!, {r4, r5, r6, r7}
2371da177e4SLinus Torvalds		adcs	sum, sum, r4
2381da177e4SLinus Torvalds		adcs	sum, sum, r5
2391da177e4SLinus Torvalds		adcs	sum, sum, r6
2401da177e4SLinus Torvalds		adcs	sum, sum, r7
241d98b90eaSVictor Kamensky		mov	r4, r8, lspull #16
2421da177e4SLinus Torvalds		sub	ip, ip, #16
2431da177e4SLinus Torvalds		teq	ip, #0
2441da177e4SLinus Torvalds		bne	1b
2451da177e4SLinus Torvalds2:		ands	ip, len, #12
2461da177e4SLinus Torvalds		beq	4f
2471da177e4SLinus Torvalds		tst	ip, #8
2481da177e4SLinus Torvalds		beq	3f
2491da177e4SLinus Torvalds		load2l	r5, r6
250d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #16
251d98b90eaSVictor Kamensky		mov	r5, r5, lspull #16
252d98b90eaSVictor Kamensky		orr	r5, r5, r6, lspush #16
2531da177e4SLinus Torvalds		stmia	dst!, {r4, r5}
2541da177e4SLinus Torvalds		adcs	sum, sum, r4
2551da177e4SLinus Torvalds		adcs	sum, sum, r5
256d98b90eaSVictor Kamensky		mov	r4, r6, lspull #16
2571da177e4SLinus Torvalds		tst	ip, #4
2581da177e4SLinus Torvalds		beq	4f
2591da177e4SLinus Torvalds3:		load1l	r5
260d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #16
2611da177e4SLinus Torvalds		str	r4, [dst], #4
2621da177e4SLinus Torvalds		adcs	sum, sum, r4
263d98b90eaSVictor Kamensky		mov	r4, r5, lspull #16
2641da177e4SLinus Torvalds4:		ands	len, len, #3
2658adbb371SNicolas Pitre		beq	.Ldone
2661da177e4SLinus Torvalds		mov	r5, r4, get_byte_0
2671da177e4SLinus Torvalds		tst	len, #2
2688adbb371SNicolas Pitre		beq	.Lexit
2691da177e4SLinus Torvalds		adcs	sum, sum, r4
2701da177e4SLinus Torvalds		strb	r5, [dst], #1
2711da177e4SLinus Torvalds		mov	r5, r4, get_byte_1
2721da177e4SLinus Torvalds		strb	r5, [dst], #1
2731da177e4SLinus Torvalds		tst	len, #1
2748adbb371SNicolas Pitre		beq	.Ldone
2751da177e4SLinus Torvalds		load1b	r5
2768adbb371SNicolas Pitre		b	.Lexit
2771da177e4SLinus Torvalds
278d98b90eaSVictor Kamensky.Lsrc3_aligned:	mov	r4, r5, lspull #24
2791da177e4SLinus Torvalds		adds	sum, sum, #0
2801da177e4SLinus Torvalds		bics	ip, len, #15
2811da177e4SLinus Torvalds		beq	2f
2821da177e4SLinus Torvalds1:		load4l	r5, r6, r7, r8
283d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #8
284d98b90eaSVictor Kamensky		mov	r5, r5, lspull #24
285d98b90eaSVictor Kamensky		orr	r5, r5, r6, lspush #8
286d98b90eaSVictor Kamensky		mov	r6, r6, lspull #24
287d98b90eaSVictor Kamensky		orr	r6, r6, r7, lspush #8
288d98b90eaSVictor Kamensky		mov	r7, r7, lspull #24
289d98b90eaSVictor Kamensky		orr	r7, r7, r8, lspush #8
2901da177e4SLinus Torvalds		stmia	dst!, {r4, r5, r6, r7}
2911da177e4SLinus Torvalds		adcs	sum, sum, r4
2921da177e4SLinus Torvalds		adcs	sum, sum, r5
2931da177e4SLinus Torvalds		adcs	sum, sum, r6
2941da177e4SLinus Torvalds		adcs	sum, sum, r7
295d98b90eaSVictor Kamensky		mov	r4, r8, lspull #24
2961da177e4SLinus Torvalds		sub	ip, ip, #16
2971da177e4SLinus Torvalds		teq	ip, #0
2981da177e4SLinus Torvalds		bne	1b
2991da177e4SLinus Torvalds2:		ands	ip, len, #12
3001da177e4SLinus Torvalds		beq	4f
3011da177e4SLinus Torvalds		tst	ip, #8
3021da177e4SLinus Torvalds		beq	3f
3031da177e4SLinus Torvalds		load2l	r5, r6
304d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #8
305d98b90eaSVictor Kamensky		mov	r5, r5, lspull #24
306d98b90eaSVictor Kamensky		orr	r5, r5, r6, lspush #8
3071da177e4SLinus Torvalds		stmia	dst!, {r4, r5}
3081da177e4SLinus Torvalds		adcs	sum, sum, r4
3091da177e4SLinus Torvalds		adcs	sum, sum, r5
310d98b90eaSVictor Kamensky		mov	r4, r6, lspull #24
3111da177e4SLinus Torvalds		tst	ip, #4
3121da177e4SLinus Torvalds		beq	4f
3131da177e4SLinus Torvalds3:		load1l	r5
314d98b90eaSVictor Kamensky		orr	r4, r4, r5, lspush #8
3151da177e4SLinus Torvalds		str	r4, [dst], #4
3161da177e4SLinus Torvalds		adcs	sum, sum, r4
317d98b90eaSVictor Kamensky		mov	r4, r5, lspull #24
3181da177e4SLinus Torvalds4:		ands	len, len, #3
3198adbb371SNicolas Pitre		beq	.Ldone
3201da177e4SLinus Torvalds		mov	r5, r4, get_byte_0
3211da177e4SLinus Torvalds		tst	len, #2
3228adbb371SNicolas Pitre		beq	.Lexit
3231da177e4SLinus Torvalds		strb	r5, [dst], #1
3241da177e4SLinus Torvalds		adcs	sum, sum, r4
3251da177e4SLinus Torvalds		load1l	r4
3261da177e4SLinus Torvalds		mov	r5, r4, get_byte_0
3271da177e4SLinus Torvalds		strb	r5, [dst], #1
328d98b90eaSVictor Kamensky		adcs	sum, sum, r4, lspush #24
3291da177e4SLinus Torvalds		mov	r5, r4, get_byte_1
3308adbb371SNicolas Pitre		b	.Lexit
33193ed3970SCatalin MarinasFN_EXIT
332