xref: /linux/arch/arm/lib/csumpartial.S (revision af36bef0c5bb82f361ebb2f106f11d0f63dac887)
11da177e4SLinus Torvalds/*
21da177e4SLinus Torvalds *  linux/arch/arm/lib/csumpartial.S
31da177e4SLinus Torvalds *
41da177e4SLinus Torvalds *  Copyright (C) 1995-1998 Russell King
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or modify
71da177e4SLinus Torvalds * it under the terms of the GNU General Public License version 2 as
81da177e4SLinus Torvalds * published by the Free Software Foundation.
91da177e4SLinus Torvalds */
101da177e4SLinus Torvalds#include <linux/linkage.h>
111da177e4SLinus Torvalds#include <asm/assembler.h>
121da177e4SLinus Torvalds
131da177e4SLinus Torvalds		.text
141da177e4SLinus Torvalds
151da177e4SLinus Torvalds/*
161da177e4SLinus Torvalds * Function: __u32 csum_partial(const char *src, int len, __u32 sum)
171da177e4SLinus Torvalds * Params  : r0 = buffer, r1 = len, r2 = checksum
181da177e4SLinus Torvalds * Returns : r0 = new checksum
191da177e4SLinus Torvalds */
201da177e4SLinus Torvalds
211da177e4SLinus Torvaldsbuf	.req	r0
221da177e4SLinus Torvaldslen	.req	r1
231da177e4SLinus Torvaldssum	.req	r2
241da177e4SLinus Torvaldstd0	.req	r3
251da177e4SLinus Torvaldstd1	.req	r4	@ save before use
261da177e4SLinus Torvaldstd2	.req	r5	@ save before use
271da177e4SLinus Torvaldstd3	.req	lr
281da177e4SLinus Torvalds
291da177e4SLinus Torvalds.zero:		mov	r0, sum
301da177e4SLinus Torvalds		add	sp, sp, #4
311da177e4SLinus Torvalds		ldr	pc, [sp], #4
321da177e4SLinus Torvalds
331da177e4SLinus Torvalds		/*
341da177e4SLinus Torvalds		 * Handle 0 to 7 bytes, with any alignment of source and
351da177e4SLinus Torvalds		 * destination pointers.  Note that when we get here, C = 0
361da177e4SLinus Torvalds		 */
371da177e4SLinus Torvalds.less8:		teq	len, #0			@ check for zero count
381da177e4SLinus Torvalds		beq	.zero
391da177e4SLinus Torvalds
401da177e4SLinus Torvalds		/* we must have at least one byte. */
411da177e4SLinus Torvalds		tst	buf, #1			@ odd address?
42*af36bef0SRussell King		movne	sum, sum, ror #8
431da177e4SLinus Torvalds		ldrneb	td0, [buf], #1
441da177e4SLinus Torvalds		subne	len, len, #1
451da177e4SLinus Torvalds		adcnes	sum, sum, td0, put_byte_1
461da177e4SLinus Torvalds
471da177e4SLinus Torvalds.less4:		tst	len, #6
481da177e4SLinus Torvalds		beq	.less8_byte
491da177e4SLinus Torvalds
501da177e4SLinus Torvalds		/* we are now half-word aligned */
511da177e4SLinus Torvalds
521da177e4SLinus Torvalds.less8_wordlp:
531da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 4
541da177e4SLinus Torvalds		ldrh	td0, [buf], #2
551da177e4SLinus Torvalds		sub	len, len, #2
561da177e4SLinus Torvalds#else
571da177e4SLinus Torvalds		ldrb	td0, [buf], #1
581da177e4SLinus Torvalds		ldrb	td3, [buf], #1
591da177e4SLinus Torvalds		sub	len, len, #2
601da177e4SLinus Torvalds#ifndef __ARMEB__
611da177e4SLinus Torvalds		orr	td0, td0, td3, lsl #8
621da177e4SLinus Torvalds#else
631da177e4SLinus Torvalds		orr	td0, td3, td0, lsl #8
641da177e4SLinus Torvalds#endif
651da177e4SLinus Torvalds#endif
661da177e4SLinus Torvalds		adcs	sum, sum, td0
671da177e4SLinus Torvalds		tst	len, #6
681da177e4SLinus Torvalds		bne	.less8_wordlp
691da177e4SLinus Torvalds
701da177e4SLinus Torvalds.less8_byte:	tst	len, #1			@ odd number of bytes
711da177e4SLinus Torvalds		ldrneb	td0, [buf], #1		@ include last byte
721da177e4SLinus Torvalds		adcnes	sum, sum, td0, put_byte_0	@ update checksum
731da177e4SLinus Torvalds
741da177e4SLinus Torvalds.done:		adc	r0, sum, #0		@ collect up the last carry
751da177e4SLinus Torvalds		ldr	td0, [sp], #4
761da177e4SLinus Torvalds		tst	td0, #1			@ check buffer alignment
771da177e4SLinus Torvalds		movne	r0, r0, ror #8		@ rotate checksum by 8 bits
781da177e4SLinus Torvalds		ldr	pc, [sp], #4		@ return
791da177e4SLinus Torvalds
801da177e4SLinus Torvalds.not_aligned:	tst	buf, #1			@ odd address
811da177e4SLinus Torvalds		ldrneb	td0, [buf], #1		@ make even
821da177e4SLinus Torvalds		subne	len, len, #1
831da177e4SLinus Torvalds		adcnes	sum, sum, td0, put_byte_1	@ update checksum
841da177e4SLinus Torvalds
851da177e4SLinus Torvalds		tst	buf, #2			@ 32-bit aligned?
861da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 4
871da177e4SLinus Torvalds		ldrneh	td0, [buf], #2		@ make 32-bit aligned
881da177e4SLinus Torvalds		subne	len, len, #2
891da177e4SLinus Torvalds#else
901da177e4SLinus Torvalds		ldrneb	td0, [buf], #1
911da177e4SLinus Torvalds		ldrneb	ip, [buf], #1
921da177e4SLinus Torvalds		subne	len, len, #2
931da177e4SLinus Torvalds#ifndef __ARMEB__
941da177e4SLinus Torvalds		orrne	td0, td0, ip, lsl #8
951da177e4SLinus Torvalds#else
961da177e4SLinus Torvalds		orrne	td0, ip, td0, lsl #8
971da177e4SLinus Torvalds#endif
981da177e4SLinus Torvalds#endif
991da177e4SLinus Torvalds		adcnes	sum, sum, td0		@ update checksum
1001da177e4SLinus Torvalds		mov	pc, lr
1011da177e4SLinus Torvalds
1021da177e4SLinus TorvaldsENTRY(csum_partial)
1031da177e4SLinus Torvalds		stmfd	sp!, {buf, lr}
1041da177e4SLinus Torvalds		cmp	len, #8			@ Ensure that we have at least
1051da177e4SLinus Torvalds		blo	.less8			@ 8 bytes to copy.
1061da177e4SLinus Torvalds
107*af36bef0SRussell King		tst	buf, #1
108*af36bef0SRussell King		movne	sum, sum, ror #8
109*af36bef0SRussell King
1101da177e4SLinus Torvalds		adds	sum, sum, #0		@ C = 0
1111da177e4SLinus Torvalds		tst	buf, #3			@ Test destination alignment
1121da177e4SLinus Torvalds		blne	.not_aligned		@ aligh destination, return here
1131da177e4SLinus Torvalds
1141da177e4SLinus Torvalds1:		bics	ip, len, #31
1151da177e4SLinus Torvalds		beq	3f
1161da177e4SLinus Torvalds
1171da177e4SLinus Torvalds		stmfd	sp!, {r4 - r5}
1181da177e4SLinus Torvalds2:		ldmia	buf!, {td0, td1, td2, td3}
1191da177e4SLinus Torvalds		adcs	sum, sum, td0
1201da177e4SLinus Torvalds		adcs	sum, sum, td1
1211da177e4SLinus Torvalds		adcs	sum, sum, td2
1221da177e4SLinus Torvalds		adcs	sum, sum, td3
1231da177e4SLinus Torvalds		ldmia	buf!, {td0, td1, td2, td3}
1241da177e4SLinus Torvalds		adcs	sum, sum, td0
1251da177e4SLinus Torvalds		adcs	sum, sum, td1
1261da177e4SLinus Torvalds		adcs	sum, sum, td2
1271da177e4SLinus Torvalds		adcs	sum, sum, td3
1281da177e4SLinus Torvalds		sub	ip, ip, #32
1291da177e4SLinus Torvalds		teq	ip, #0
1301da177e4SLinus Torvalds		bne	2b
1311da177e4SLinus Torvalds		ldmfd	sp!, {r4 - r5}
1321da177e4SLinus Torvalds
1331da177e4SLinus Torvalds3:		tst	len, #0x1c		@ should not change C
1341da177e4SLinus Torvalds		beq	.less4
1351da177e4SLinus Torvalds
1361da177e4SLinus Torvalds4:		ldr	td0, [buf], #4
1371da177e4SLinus Torvalds		sub	len, len, #4
1381da177e4SLinus Torvalds		adcs	sum, sum, td0
1391da177e4SLinus Torvalds		tst	len, #0x1c
1401da177e4SLinus Torvalds		bne	4b
1411da177e4SLinus Torvalds		b	.less4
142