xref: /linux/arch/alpha/lib/csum_ipv6_magic.S (revision 905e46acd3272d04566fec49afbd7ad9e2ed9ae3)
1/*
2 * arch/alpha/lib/csum_ipv6_magic.S
3 * Contributed by Richard Henderson <rth@tamu.edu>
4 *
5 * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
6 *                                struct in6_addr *daddr,
7 *                                __u32 len,
8 *                                unsigned short proto,
9 *                                unsigned int csum);
10 *
11 * Misalignment handling (which costs 16 instructions / 8 cycles)
12 * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
13 */
14
15#include <asm/export.h>
16	.globl csum_ipv6_magic
17	.align 4
18	.ent csum_ipv6_magic
19	.frame $30,0,$26,0
20csum_ipv6_magic:
21	.prologue 0
22
23	ldq_u	$0,0($16)	# e0    : load src & dst addr words
24	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
25	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
26	ldq_u	$21,7($16)	# .. e1 : handle misalignment
27
28	extbl	$18,1,$5	# e0	:
29	ldq_u	$1,8($16)	# .. e1 :
30	extbl	$18,2,$6	# e0 	:
31	ldq_u	$22,15($16)	# .. e1 :
32
33	extbl	$18,3,$18	# e0	:
34	ldq_u	$2,0($17)	# .. e1 :
35	sra	$4,32,$4	# e0	:
36	ldq_u	$23,7($17)	# .. e1 :
37
38	extql	$0,$16,$0	# e0	:
39	ldq_u	$3,8($17)	# .. e1 :
40	extqh	$21,$16,$21	# e0	:
41	ldq_u	$24,15($17)	# .. e1 :
42
43	sll	$5,16,$5	# e0	:
44	or	$0,$21,$0	# .. e1 : 1st src word complete
45	extql	$1,$16,$1	# e0	:
46	addq	$20,$0,$20	# .. e1 : begin summing the words
47
48	extqh	$22,$16,$22	# e0	:
49	cmpult	$20,$0,$0	# .. e1 :
50	sll	$6,8,$6		# e0	:
51	or	$1,$22,$1	# .. e1 : 2nd src word complete
52
53	extql	$2,$17,$2	# e0	:
54	or	$4,$18,$18	# .. e1 :
55	extqh	$23,$17,$23	# e0	:
56	or	$5,$6,$5	# .. e1 :
57
58	extql	$3,$17,$3	# e0	:
59	or	$2,$23,$2	# .. e1 : 1st dst word complete
60	extqh	$24,$17,$24	# e0	:
61	or	$18,$5,$18	# .. e1 : len complete
62
63	extwh	$19,7,$7	# e0    :
64	or	$3,$24,$3	# .. e1 : 2nd dst word complete
65	extbl	$19,1,$19	# e0    :
66	addq	$20,$1,$20	# .. e1 :
67
68	or	$19,$7,$19	# e0    :
69	cmpult	$20,$1,$1	# .. e1 :
70	sll	$19,48,$19	# e0    :
71	nop			# .. e0 :
72
73	sra	$19,32,$19	# e0    : proto complete
74	addq	$20,$2,$20	# .. e1 :
75	cmpult	$20,$2,$2	# e0    :
76	addq	$20,$3,$20	# .. e1 :
77
78	cmpult	$20,$3,$3	# e0    :
79	addq	$20,$18,$20	# .. e1 :
80	cmpult	$20,$18,$18	# e0    :
81	addq	$20,$19,$20	# .. e1 :
82
83	cmpult	$20,$19,$19	# e0    :
84	addq	$0,$1,$0	# .. e1 : merge the carries back into the csum
85	addq	$2,$3,$2	# e0    :
86	addq	$18,$19,$18	# .. e1 :
87
88	addq	$0,$2,$0	# e0    :
89	addq	$20,$18,$20	# .. e1 :
90	addq	$0,$20,$0	# e0    :
91	unop			#       :
92
93	extwl	$0,2,$2		# e0    : begin folding the 64-bit value
94	zapnot	$0,3,$3		# .. e1 :
95	extwl	$0,4,$1		# e0    :
96	addq	$2,$3,$3	# .. e1 :
97
98	extwl	$0,6,$0		# e0    :
99	addq	$3,$1,$3	# .. e1 :
100	addq	$0,$3,$0	# e0    :
101	unop			#       :
102
103	extwl	$0,2,$1		# e0    : fold 18-bit value
104	zapnot	$0,3,$0		# .. e1 :
105	addq	$0,$1,$0	# e0    :
106	unop			#       :
107
108	extwl	$0,2,$1		# e0    : fold 17-bit value
109	zapnot	$0,3,$0		# .. e1 :
110	addq	$0,$1,$0	# e0    :
111	not	$0,$0		# .. e1 : and complement.
112
113	zapnot	$0,3,$0		# e0    :
114	ret			# .. e1 :
115
116	.end csum_ipv6_magic
117	EXPORT_SYMBOL(csum_ipv6_magic)
118