xref: /linux/arch/x86/crypto/des3_ede-asm_64.S (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1/*
2 * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/linkage.h>
18
19.file "des3_ede-asm_64.S"
20.text
21
22#define s1 .L_s1
23#define s2 ((s1) + (64*8))
24#define s3 ((s2) + (64*8))
25#define s4 ((s3) + (64*8))
26#define s5 ((s4) + (64*8))
27#define s6 ((s5) + (64*8))
28#define s7 ((s6) + (64*8))
29#define s8 ((s7) + (64*8))
30
31/* register macros */
32#define CTX %rdi
33
34#define RL0 %r8
35#define RL1 %r9
36#define RL2 %r10
37
38#define RL0d %r8d
39#define RL1d %r9d
40#define RL2d %r10d
41
42#define RR0 %r11
43#define RR1 %r12
44#define RR2 %r13
45
46#define RR0d %r11d
47#define RR1d %r12d
48#define RR2d %r13d
49
50#define RW0 %rax
51#define RW1 %rbx
52#define RW2 %rcx
53
54#define RW0d %eax
55#define RW1d %ebx
56#define RW2d %ecx
57
58#define RW0bl %al
59#define RW1bl %bl
60#define RW2bl %cl
61
62#define RW0bh %ah
63#define RW1bh %bh
64#define RW2bh %ch
65
66#define RT0 %r15
67#define RT1 %rbp
68#define RT2 %r14
69#define RT3 %rdx
70
71#define RT0d %r15d
72#define RT1d %ebp
73#define RT2d %r14d
74#define RT3d %edx
75
76/***********************************************************************
77 * 1-way 3DES
78 ***********************************************************************/
79#define do_permutation(a, b, offset, mask) \
80	movl a, RT0d; \
81	shrl $(offset), RT0d; \
82	xorl b, RT0d; \
83	andl $(mask), RT0d; \
84	xorl RT0d, b; \
85	shll $(offset), RT0d; \
86	xorl RT0d, a;
87
88#define expand_to_64bits(val, mask) \
89	movl val##d, RT0d; \
90	rorl $4, RT0d; \
91	shlq $32, RT0; \
92	orq RT0, val; \
93	andq mask, val;
94
95#define compress_to_64bits(val) \
96	movq val, RT0; \
97	shrq $32, RT0; \
98	roll $4, RT0d; \
99	orl RT0d, val##d;
100
101#define initial_permutation(left, right) \
102	do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
103	do_permutation(left##d, right##d, 16, 0x0000ffff); \
104	do_permutation(right##d, left##d,  2, 0x33333333); \
105	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
106	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
107	movl left##d, RW0d; \
108	roll $1, right##d; \
109	xorl right##d, RW0d; \
110	andl $0xaaaaaaaa, RW0d; \
111	xorl RW0d, left##d; \
112	xorl RW0d, right##d; \
113	roll $1, left##d; \
114	expand_to_64bits(right, RT3); \
115	expand_to_64bits(left, RT3);
116
117#define final_permutation(left, right) \
118	compress_to_64bits(right); \
119	compress_to_64bits(left); \
120	movl right##d, RW0d; \
121	rorl $1, left##d; \
122	xorl left##d, RW0d; \
123	andl $0xaaaaaaaa, RW0d; \
124	xorl RW0d, right##d; \
125	xorl RW0d, left##d; \
126	rorl $1, right##d; \
127	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
128	do_permutation(right##d, left##d,  2, 0x33333333); \
129	do_permutation(left##d, right##d, 16, 0x0000ffff); \
130	do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
131
132#define round1(n, from, to, load_next_key) \
133	xorq from, RW0; \
134	\
135	movzbl RW0bl, RT0d; \
136	movzbl RW0bh, RT1d; \
137	shrq $16, RW0; \
138	movzbl RW0bl, RT2d; \
139	movzbl RW0bh, RT3d; \
140	shrq $16, RW0; \
141	movq s8(, RT0, 8), RT0; \
142	xorq s6(, RT1, 8), to; \
143	movzbl RW0bl, RL1d; \
144	movzbl RW0bh, RT1d; \
145	shrl $16, RW0d; \
146	xorq s4(, RT2, 8), RT0; \
147	xorq s2(, RT3, 8), to; \
148	movzbl RW0bl, RT2d; \
149	movzbl RW0bh, RT3d; \
150	xorq s7(, RL1, 8), RT0; \
151	xorq s5(, RT1, 8), to; \
152	xorq s3(, RT2, 8), RT0; \
153	load_next_key(n, RW0); \
154	xorq RT0, to; \
155	xorq s1(, RT3, 8), to; \
156
157#define load_next_key(n, RWx) \
158	movq (((n) + 1) * 8)(CTX), RWx;
159
160#define dummy2(a, b) /*_*/
161
162#define read_block(io, left, right) \
163	movl    (io), left##d; \
164	movl   4(io), right##d; \
165	bswapl left##d; \
166	bswapl right##d;
167
168#define write_block(io, left, right) \
169	bswapl left##d; \
170	bswapl right##d; \
171	movl   left##d,   (io); \
172	movl   right##d, 4(io);
173
174ENTRY(des3_ede_x86_64_crypt_blk)
175	/* input:
176	 *	%rdi: round keys, CTX
177	 *	%rsi: dst
178	 *	%rdx: src
179	 */
180	pushq %rbp;
181	pushq %rbx;
182	pushq %r12;
183	pushq %r13;
184	pushq %r14;
185	pushq %r15;
186
187	read_block(%rdx, RL0, RR0);
188	initial_permutation(RL0, RR0);
189
190	movq (CTX), RW0;
191
192	round1(0, RR0, RL0, load_next_key);
193	round1(1, RL0, RR0, load_next_key);
194	round1(2, RR0, RL0, load_next_key);
195	round1(3, RL0, RR0, load_next_key);
196	round1(4, RR0, RL0, load_next_key);
197	round1(5, RL0, RR0, load_next_key);
198	round1(6, RR0, RL0, load_next_key);
199	round1(7, RL0, RR0, load_next_key);
200	round1(8, RR0, RL0, load_next_key);
201	round1(9, RL0, RR0, load_next_key);
202	round1(10, RR0, RL0, load_next_key);
203	round1(11, RL0, RR0, load_next_key);
204	round1(12, RR0, RL0, load_next_key);
205	round1(13, RL0, RR0, load_next_key);
206	round1(14, RR0, RL0, load_next_key);
207	round1(15, RL0, RR0, load_next_key);
208
209	round1(16+0, RL0, RR0, load_next_key);
210	round1(16+1, RR0, RL0, load_next_key);
211	round1(16+2, RL0, RR0, load_next_key);
212	round1(16+3, RR0, RL0, load_next_key);
213	round1(16+4, RL0, RR0, load_next_key);
214	round1(16+5, RR0, RL0, load_next_key);
215	round1(16+6, RL0, RR0, load_next_key);
216	round1(16+7, RR0, RL0, load_next_key);
217	round1(16+8, RL0, RR0, load_next_key);
218	round1(16+9, RR0, RL0, load_next_key);
219	round1(16+10, RL0, RR0, load_next_key);
220	round1(16+11, RR0, RL0, load_next_key);
221	round1(16+12, RL0, RR0, load_next_key);
222	round1(16+13, RR0, RL0, load_next_key);
223	round1(16+14, RL0, RR0, load_next_key);
224	round1(16+15, RR0, RL0, load_next_key);
225
226	round1(32+0, RR0, RL0, load_next_key);
227	round1(32+1, RL0, RR0, load_next_key);
228	round1(32+2, RR0, RL0, load_next_key);
229	round1(32+3, RL0, RR0, load_next_key);
230	round1(32+4, RR0, RL0, load_next_key);
231	round1(32+5, RL0, RR0, load_next_key);
232	round1(32+6, RR0, RL0, load_next_key);
233	round1(32+7, RL0, RR0, load_next_key);
234	round1(32+8, RR0, RL0, load_next_key);
235	round1(32+9, RL0, RR0, load_next_key);
236	round1(32+10, RR0, RL0, load_next_key);
237	round1(32+11, RL0, RR0, load_next_key);
238	round1(32+12, RR0, RL0, load_next_key);
239	round1(32+13, RL0, RR0, load_next_key);
240	round1(32+14, RR0, RL0, load_next_key);
241	round1(32+15, RL0, RR0, dummy2);
242
243	final_permutation(RR0, RL0);
244	write_block(%rsi, RR0, RL0);
245
246	popq %r15;
247	popq %r14;
248	popq %r13;
249	popq %r12;
250	popq %rbx;
251	popq %rbp;
252
253	ret;
254ENDPROC(des3_ede_x86_64_crypt_blk)
255
256/***********************************************************************
257 * 3-way 3DES
258 ***********************************************************************/
259#define expand_to_64bits(val, mask) \
260	movl val##d, RT0d; \
261	rorl $4, RT0d; \
262	shlq $32, RT0; \
263	orq RT0, val; \
264	andq mask, val;
265
266#define compress_to_64bits(val) \
267	movq val, RT0; \
268	shrq $32, RT0; \
269	roll $4, RT0d; \
270	orl RT0d, val##d;
271
272#define initial_permutation3(left, right) \
273	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
274	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
275	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
276	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
277	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
278	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
279	    \
280	do_permutation(right##0d, left##0d,  2, 0x33333333); \
281	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
282	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
283	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
284	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
285	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
286	    \
287	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
288	    \
289	movl left##0d, RW0d; \
290	roll $1, right##0d; \
291	xorl right##0d, RW0d; \
292	andl $0xaaaaaaaa, RW0d; \
293	xorl RW0d, left##0d; \
294	xorl RW0d, right##0d; \
295	roll $1, left##0d; \
296	expand_to_64bits(right##0, RT3); \
297	expand_to_64bits(left##0, RT3); \
298	  movl left##1d, RW1d; \
299	  roll $1, right##1d; \
300	  xorl right##1d, RW1d; \
301	  andl $0xaaaaaaaa, RW1d; \
302	  xorl RW1d, left##1d; \
303	  xorl RW1d, right##1d; \
304	  roll $1, left##1d; \
305	  expand_to_64bits(right##1, RT3); \
306	  expand_to_64bits(left##1, RT3); \
307	    movl left##2d, RW2d; \
308	    roll $1, right##2d; \
309	    xorl right##2d, RW2d; \
310	    andl $0xaaaaaaaa, RW2d; \
311	    xorl RW2d, left##2d; \
312	    xorl RW2d, right##2d; \
313	    roll $1, left##2d; \
314	    expand_to_64bits(right##2, RT3); \
315	    expand_to_64bits(left##2, RT3);
316
317#define final_permutation3(left, right) \
318	compress_to_64bits(right##0); \
319	compress_to_64bits(left##0); \
320	movl right##0d, RW0d; \
321	rorl $1, left##0d; \
322	xorl left##0d, RW0d; \
323	andl $0xaaaaaaaa, RW0d; \
324	xorl RW0d, right##0d; \
325	xorl RW0d, left##0d; \
326	rorl $1, right##0d; \
327	  compress_to_64bits(right##1); \
328	  compress_to_64bits(left##1); \
329	  movl right##1d, RW1d; \
330	  rorl $1, left##1d; \
331	  xorl left##1d, RW1d; \
332	  andl $0xaaaaaaaa, RW1d; \
333	  xorl RW1d, right##1d; \
334	  xorl RW1d, left##1d; \
335	  rorl $1, right##1d; \
336	    compress_to_64bits(right##2); \
337	    compress_to_64bits(left##2); \
338	    movl right##2d, RW2d; \
339	    rorl $1, left##2d; \
340	    xorl left##2d, RW2d; \
341	    andl $0xaaaaaaaa, RW2d; \
342	    xorl RW2d, right##2d; \
343	    xorl RW2d, left##2d; \
344	    rorl $1, right##2d; \
345	    \
346	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
347	do_permutation(right##0d, left##0d,  2, 0x33333333); \
348	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
349	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
350	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
351	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
352	    \
353	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
354	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
355	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
356	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
357	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
358	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
359
360#define round3(n, from, to, load_next_key, do_movq) \
361	xorq from##0, RW0; \
362	movzbl RW0bl, RT3d; \
363	movzbl RW0bh, RT1d; \
364	shrq $16, RW0; \
365	xorq s8(, RT3, 8), to##0; \
366	xorq s6(, RT1, 8), to##0; \
367	movzbl RW0bl, RT3d; \
368	movzbl RW0bh, RT1d; \
369	shrq $16, RW0; \
370	xorq s4(, RT3, 8), to##0; \
371	xorq s2(, RT1, 8), to##0; \
372	movzbl RW0bl, RT3d; \
373	movzbl RW0bh, RT1d; \
374	shrl $16, RW0d; \
375	xorq s7(, RT3, 8), to##0; \
376	xorq s5(, RT1, 8), to##0; \
377	movzbl RW0bl, RT3d; \
378	movzbl RW0bh, RT1d; \
379	load_next_key(n, RW0); \
380	xorq s3(, RT3, 8), to##0; \
381	xorq s1(, RT1, 8), to##0; \
382		xorq from##1, RW1; \
383		movzbl RW1bl, RT3d; \
384		movzbl RW1bh, RT1d; \
385		shrq $16, RW1; \
386		xorq s8(, RT3, 8), to##1; \
387		xorq s6(, RT1, 8), to##1; \
388		movzbl RW1bl, RT3d; \
389		movzbl RW1bh, RT1d; \
390		shrq $16, RW1; \
391		xorq s4(, RT3, 8), to##1; \
392		xorq s2(, RT1, 8), to##1; \
393		movzbl RW1bl, RT3d; \
394		movzbl RW1bh, RT1d; \
395		shrl $16, RW1d; \
396		xorq s7(, RT3, 8), to##1; \
397		xorq s5(, RT1, 8), to##1; \
398		movzbl RW1bl, RT3d; \
399		movzbl RW1bh, RT1d; \
400		do_movq(RW0, RW1); \
401		xorq s3(, RT3, 8), to##1; \
402		xorq s1(, RT1, 8), to##1; \
403			xorq from##2, RW2; \
404			movzbl RW2bl, RT3d; \
405			movzbl RW2bh, RT1d; \
406			shrq $16, RW2; \
407			xorq s8(, RT3, 8), to##2; \
408			xorq s6(, RT1, 8), to##2; \
409			movzbl RW2bl, RT3d; \
410			movzbl RW2bh, RT1d; \
411			shrq $16, RW2; \
412			xorq s4(, RT3, 8), to##2; \
413			xorq s2(, RT1, 8), to##2; \
414			movzbl RW2bl, RT3d; \
415			movzbl RW2bh, RT1d; \
416			shrl $16, RW2d; \
417			xorq s7(, RT3, 8), to##2; \
418			xorq s5(, RT1, 8), to##2; \
419			movzbl RW2bl, RT3d; \
420			movzbl RW2bh, RT1d; \
421			do_movq(RW0, RW2); \
422			xorq s3(, RT3, 8), to##2; \
423			xorq s1(, RT1, 8), to##2;
424
425#define __movq(src, dst) \
426	movq src, dst;
427
428ENTRY(des3_ede_x86_64_crypt_blk_3way)
429	/* input:
430	 *	%rdi: ctx, round keys
431	 *	%rsi: dst (3 blocks)
432	 *	%rdx: src (3 blocks)
433	 */
434
435	pushq %rbp;
436	pushq %rbx;
437	pushq %r12;
438	pushq %r13;
439	pushq %r14;
440	pushq %r15;
441
442	/* load input */
443	movl 0 * 4(%rdx), RL0d;
444	movl 1 * 4(%rdx), RR0d;
445	movl 2 * 4(%rdx), RL1d;
446	movl 3 * 4(%rdx), RR1d;
447	movl 4 * 4(%rdx), RL2d;
448	movl 5 * 4(%rdx), RR2d;
449
450	bswapl RL0d;
451	bswapl RR0d;
452	bswapl RL1d;
453	bswapl RR1d;
454	bswapl RL2d;
455	bswapl RR2d;
456
457	initial_permutation3(RL, RR);
458
459	movq 0(CTX), RW0;
460	movq RW0, RW1;
461	movq RW0, RW2;
462
463	round3(0, RR, RL, load_next_key, __movq);
464	round3(1, RL, RR, load_next_key, __movq);
465	round3(2, RR, RL, load_next_key, __movq);
466	round3(3, RL, RR, load_next_key, __movq);
467	round3(4, RR, RL, load_next_key, __movq);
468	round3(5, RL, RR, load_next_key, __movq);
469	round3(6, RR, RL, load_next_key, __movq);
470	round3(7, RL, RR, load_next_key, __movq);
471	round3(8, RR, RL, load_next_key, __movq);
472	round3(9, RL, RR, load_next_key, __movq);
473	round3(10, RR, RL, load_next_key, __movq);
474	round3(11, RL, RR, load_next_key, __movq);
475	round3(12, RR, RL, load_next_key, __movq);
476	round3(13, RL, RR, load_next_key, __movq);
477	round3(14, RR, RL, load_next_key, __movq);
478	round3(15, RL, RR, load_next_key, __movq);
479
480	round3(16+0, RL, RR, load_next_key, __movq);
481	round3(16+1, RR, RL, load_next_key, __movq);
482	round3(16+2, RL, RR, load_next_key, __movq);
483	round3(16+3, RR, RL, load_next_key, __movq);
484	round3(16+4, RL, RR, load_next_key, __movq);
485	round3(16+5, RR, RL, load_next_key, __movq);
486	round3(16+6, RL, RR, load_next_key, __movq);
487	round3(16+7, RR, RL, load_next_key, __movq);
488	round3(16+8, RL, RR, load_next_key, __movq);
489	round3(16+9, RR, RL, load_next_key, __movq);
490	round3(16+10, RL, RR, load_next_key, __movq);
491	round3(16+11, RR, RL, load_next_key, __movq);
492	round3(16+12, RL, RR, load_next_key, __movq);
493	round3(16+13, RR, RL, load_next_key, __movq);
494	round3(16+14, RL, RR, load_next_key, __movq);
495	round3(16+15, RR, RL, load_next_key, __movq);
496
497	round3(32+0, RR, RL, load_next_key, __movq);
498	round3(32+1, RL, RR, load_next_key, __movq);
499	round3(32+2, RR, RL, load_next_key, __movq);
500	round3(32+3, RL, RR, load_next_key, __movq);
501	round3(32+4, RR, RL, load_next_key, __movq);
502	round3(32+5, RL, RR, load_next_key, __movq);
503	round3(32+6, RR, RL, load_next_key, __movq);
504	round3(32+7, RL, RR, load_next_key, __movq);
505	round3(32+8, RR, RL, load_next_key, __movq);
506	round3(32+9, RL, RR, load_next_key, __movq);
507	round3(32+10, RR, RL, load_next_key, __movq);
508	round3(32+11, RL, RR, load_next_key, __movq);
509	round3(32+12, RR, RL, load_next_key, __movq);
510	round3(32+13, RL, RR, load_next_key, __movq);
511	round3(32+14, RR, RL, load_next_key, __movq);
512	round3(32+15, RL, RR, dummy2, dummy2);
513
514	final_permutation3(RR, RL);
515
516	bswapl RR0d;
517	bswapl RL0d;
518	bswapl RR1d;
519	bswapl RL1d;
520	bswapl RR2d;
521	bswapl RL2d;
522
523	movl RR0d, 0 * 4(%rsi);
524	movl RL0d, 1 * 4(%rsi);
525	movl RR1d, 2 * 4(%rsi);
526	movl RL1d, 3 * 4(%rsi);
527	movl RR2d, 4 * 4(%rsi);
528	movl RL2d, 5 * 4(%rsi);
529
530	popq %r15;
531	popq %r14;
532	popq %r13;
533	popq %r12;
534	popq %rbx;
535	popq %rbp;
536
537	ret;
538ENDPROC(des3_ede_x86_64_crypt_blk_3way)
539
540.data
541.align 16
542.L_s1:
543	.quad 0x0010100001010400, 0x0000000000000000
544	.quad 0x0000100000010000, 0x0010100001010404
545	.quad 0x0010100001010004, 0x0000100000010404
546	.quad 0x0000000000000004, 0x0000100000010000
547	.quad 0x0000000000000400, 0x0010100001010400
548	.quad 0x0010100001010404, 0x0000000000000400
549	.quad 0x0010000001000404, 0x0010100001010004
550	.quad 0x0010000001000000, 0x0000000000000004
551	.quad 0x0000000000000404, 0x0010000001000400
552	.quad 0x0010000001000400, 0x0000100000010400
553	.quad 0x0000100000010400, 0x0010100001010000
554	.quad 0x0010100001010000, 0x0010000001000404
555	.quad 0x0000100000010004, 0x0010000001000004
556	.quad 0x0010000001000004, 0x0000100000010004
557	.quad 0x0000000000000000, 0x0000000000000404
558	.quad 0x0000100000010404, 0x0010000001000000
559	.quad 0x0000100000010000, 0x0010100001010404
560	.quad 0x0000000000000004, 0x0010100001010000
561	.quad 0x0010100001010400, 0x0010000001000000
562	.quad 0x0010000001000000, 0x0000000000000400
563	.quad 0x0010100001010004, 0x0000100000010000
564	.quad 0x0000100000010400, 0x0010000001000004
565	.quad 0x0000000000000400, 0x0000000000000004
566	.quad 0x0010000001000404, 0x0000100000010404
567	.quad 0x0010100001010404, 0x0000100000010004
568	.quad 0x0010100001010000, 0x0010000001000404
569	.quad 0x0010000001000004, 0x0000000000000404
570	.quad 0x0000100000010404, 0x0010100001010400
571	.quad 0x0000000000000404, 0x0010000001000400
572	.quad 0x0010000001000400, 0x0000000000000000
573	.quad 0x0000100000010004, 0x0000100000010400
574	.quad 0x0000000000000000, 0x0010100001010004
575.L_s2:
576	.quad 0x0801080200100020, 0x0800080000000000
577	.quad 0x0000080000000000, 0x0001080200100020
578	.quad 0x0001000000100000, 0x0000000200000020
579	.quad 0x0801000200100020, 0x0800080200000020
580	.quad 0x0800000200000020, 0x0801080200100020
581	.quad 0x0801080000100000, 0x0800000000000000
582	.quad 0x0800080000000000, 0x0001000000100000
583	.quad 0x0000000200000020, 0x0801000200100020
584	.quad 0x0001080000100000, 0x0001000200100020
585	.quad 0x0800080200000020, 0x0000000000000000
586	.quad 0x0800000000000000, 0x0000080000000000
587	.quad 0x0001080200100020, 0x0801000000100000
588	.quad 0x0001000200100020, 0x0800000200000020
589	.quad 0x0000000000000000, 0x0001080000100000
590	.quad 0x0000080200000020, 0x0801080000100000
591	.quad 0x0801000000100000, 0x0000080200000020
592	.quad 0x0000000000000000, 0x0001080200100020
593	.quad 0x0801000200100020, 0x0001000000100000
594	.quad 0x0800080200000020, 0x0801000000100000
595	.quad 0x0801080000100000, 0x0000080000000000
596	.quad 0x0801000000100000, 0x0800080000000000
597	.quad 0x0000000200000020, 0x0801080200100020
598	.quad 0x0001080200100020, 0x0000000200000020
599	.quad 0x0000080000000000, 0x0800000000000000
600	.quad 0x0000080200000020, 0x0801080000100000
601	.quad 0x0001000000100000, 0x0800000200000020
602	.quad 0x0001000200100020, 0x0800080200000020
603	.quad 0x0800000200000020, 0x0001000200100020
604	.quad 0x0001080000100000, 0x0000000000000000
605	.quad 0x0800080000000000, 0x0000080200000020
606	.quad 0x0800000000000000, 0x0801000200100020
607	.quad 0x0801080200100020, 0x0001080000100000
608.L_s3:
609	.quad 0x0000002000000208, 0x0000202008020200
610	.quad 0x0000000000000000, 0x0000200008020008
611	.quad 0x0000002008000200, 0x0000000000000000
612	.quad 0x0000202000020208, 0x0000002008000200
613	.quad 0x0000200000020008, 0x0000000008000008
614	.quad 0x0000000008000008, 0x0000200000020000
615	.quad 0x0000202008020208, 0x0000200000020008
616	.quad 0x0000200008020000, 0x0000002000000208
617	.quad 0x0000000008000000, 0x0000000000000008
618	.quad 0x0000202008020200, 0x0000002000000200
619	.quad 0x0000202000020200, 0x0000200008020000
620	.quad 0x0000200008020008, 0x0000202000020208
621	.quad 0x0000002008000208, 0x0000202000020200
622	.quad 0x0000200000020000, 0x0000002008000208
623	.quad 0x0000000000000008, 0x0000202008020208
624	.quad 0x0000002000000200, 0x0000000008000000
625	.quad 0x0000202008020200, 0x0000000008000000
626	.quad 0x0000200000020008, 0x0000002000000208
627	.quad 0x0000200000020000, 0x0000202008020200
628	.quad 0x0000002008000200, 0x0000000000000000
629	.quad 0x0000002000000200, 0x0000200000020008
630	.quad 0x0000202008020208, 0x0000002008000200
631	.quad 0x0000000008000008, 0x0000002000000200
632	.quad 0x0000000000000000, 0x0000200008020008
633	.quad 0x0000002008000208, 0x0000200000020000
634	.quad 0x0000000008000000, 0x0000202008020208
635	.quad 0x0000000000000008, 0x0000202000020208
636	.quad 0x0000202000020200, 0x0000000008000008
637	.quad 0x0000200008020000, 0x0000002008000208
638	.quad 0x0000002000000208, 0x0000200008020000
639	.quad 0x0000202000020208, 0x0000000000000008
640	.quad 0x0000200008020008, 0x0000202000020200
641.L_s4:
642	.quad 0x1008020000002001, 0x1000020800002001
643	.quad 0x1000020800002001, 0x0000000800000000
644	.quad 0x0008020800002000, 0x1008000800000001
645	.quad 0x1008000000000001, 0x1000020000002001
646	.quad 0x0000000000000000, 0x0008020000002000
647	.quad 0x0008020000002000, 0x1008020800002001
648	.quad 0x1000000800000001, 0x0000000000000000
649	.quad 0x0008000800000000, 0x1008000000000001
650	.quad 0x1000000000000001, 0x0000020000002000
651	.quad 0x0008000000000000, 0x1008020000002001
652	.quad 0x0000000800000000, 0x0008000000000000
653	.quad 0x1000020000002001, 0x0000020800002000
654	.quad 0x1008000800000001, 0x1000000000000001
655	.quad 0x0000020800002000, 0x0008000800000000
656	.quad 0x0000020000002000, 0x0008020800002000
657	.quad 0x1008020800002001, 0x1000000800000001
658	.quad 0x0008000800000000, 0x1008000000000001
659	.quad 0x0008020000002000, 0x1008020800002001
660	.quad 0x1000000800000001, 0x0000000000000000
661	.quad 0x0000000000000000, 0x0008020000002000
662	.quad 0x0000020800002000, 0x0008000800000000
663	.quad 0x1008000800000001, 0x1000000000000001
664	.quad 0x1008020000002001, 0x1000020800002001
665	.quad 0x1000020800002001, 0x0000000800000000
666	.quad 0x1008020800002001, 0x1000000800000001
667	.quad 0x1000000000000001, 0x0000020000002000
668	.quad 0x1008000000000001, 0x1000020000002001
669	.quad 0x0008020800002000, 0x1008000800000001
670	.quad 0x1000020000002001, 0x0000020800002000
671	.quad 0x0008000000000000, 0x1008020000002001
672	.quad 0x0000000800000000, 0x0008000000000000
673	.quad 0x0000020000002000, 0x0008020800002000
674.L_s5:
675	.quad 0x0000001000000100, 0x0020001002080100
676	.quad 0x0020000002080000, 0x0420001002000100
677	.quad 0x0000000000080000, 0x0000001000000100
678	.quad 0x0400000000000000, 0x0020000002080000
679	.quad 0x0400001000080100, 0x0000000000080000
680	.quad 0x0020001002000100, 0x0400001000080100
681	.quad 0x0420001002000100, 0x0420000002080000
682	.quad 0x0000001000080100, 0x0400000000000000
683	.quad 0x0020000002000000, 0x0400000000080000
684	.quad 0x0400000000080000, 0x0000000000000000
685	.quad 0x0400001000000100, 0x0420001002080100
686	.quad 0x0420001002080100, 0x0020001002000100
687	.quad 0x0420000002080000, 0x0400001000000100
688	.quad 0x0000000000000000, 0x0420000002000000
689	.quad 0x0020001002080100, 0x0020000002000000
690	.quad 0x0420000002000000, 0x0000001000080100
691	.quad 0x0000000000080000, 0x0420001002000100
692	.quad 0x0000001000000100, 0x0020000002000000
693	.quad 0x0400000000000000, 0x0020000002080000
694	.quad 0x0420001002000100, 0x0400001000080100
695	.quad 0x0020001002000100, 0x0400000000000000
696	.quad 0x0420000002080000, 0x0020001002080100
697	.quad 0x0400001000080100, 0x0000001000000100
698	.quad 0x0020000002000000, 0x0420000002080000
699	.quad 0x0420001002080100, 0x0000001000080100
700	.quad 0x0420000002000000, 0x0420001002080100
701	.quad 0x0020000002080000, 0x0000000000000000
702	.quad 0x0400000000080000, 0x0420000002000000
703	.quad 0x0000001000080100, 0x0020001002000100
704	.quad 0x0400001000000100, 0x0000000000080000
705	.quad 0x0000000000000000, 0x0400000000080000
706	.quad 0x0020001002080100, 0x0400001000000100
707.L_s6:
708	.quad 0x0200000120000010, 0x0204000020000000
709	.quad 0x0000040000000000, 0x0204040120000010
710	.quad 0x0204000020000000, 0x0000000100000010
711	.quad 0x0204040120000010, 0x0004000000000000
712	.quad 0x0200040020000000, 0x0004040100000010
713	.quad 0x0004000000000000, 0x0200000120000010
714	.quad 0x0004000100000010, 0x0200040020000000
715	.quad 0x0200000020000000, 0x0000040100000010
716	.quad 0x0000000000000000, 0x0004000100000010
717	.quad 0x0200040120000010, 0x0000040000000000
718	.quad 0x0004040000000000, 0x0200040120000010
719	.quad 0x0000000100000010, 0x0204000120000010
720	.quad 0x0204000120000010, 0x0000000000000000
721	.quad 0x0004040100000010, 0x0204040020000000
722	.quad 0x0000040100000010, 0x0004040000000000
723	.quad 0x0204040020000000, 0x0200000020000000
724	.quad 0x0200040020000000, 0x0000000100000010
725	.quad 0x0204000120000010, 0x0004040000000000
726	.quad 0x0204040120000010, 0x0004000000000000
727	.quad 0x0000040100000010, 0x0200000120000010
728	.quad 0x0004000000000000, 0x0200040020000000
729	.quad 0x0200000020000000, 0x0000040100000010
730	.quad 0x0200000120000010, 0x0204040120000010
731	.quad 0x0004040000000000, 0x0204000020000000
732	.quad 0x0004040100000010, 0x0204040020000000
733	.quad 0x0000000000000000, 0x0204000120000010
734	.quad 0x0000000100000010, 0x0000040000000000
735	.quad 0x0204000020000000, 0x0004040100000010
736	.quad 0x0000040000000000, 0x0004000100000010
737	.quad 0x0200040120000010, 0x0000000000000000
738	.quad 0x0204040020000000, 0x0200000020000000
739	.quad 0x0004000100000010, 0x0200040120000010
740.L_s7:
741	.quad 0x0002000000200000, 0x2002000004200002
742	.quad 0x2000000004000802, 0x0000000000000000
743	.quad 0x0000000000000800, 0x2000000004000802
744	.quad 0x2002000000200802, 0x0002000004200800
745	.quad 0x2002000004200802, 0x0002000000200000
746	.quad 0x0000000000000000, 0x2000000004000002
747	.quad 0x2000000000000002, 0x0000000004000000
748	.quad 0x2002000004200002, 0x2000000000000802
749	.quad 0x0000000004000800, 0x2002000000200802
750	.quad 0x2002000000200002, 0x0000000004000800
751	.quad 0x2000000004000002, 0x0002000004200000
752	.quad 0x0002000004200800, 0x2002000000200002
753	.quad 0x0002000004200000, 0x0000000000000800
754	.quad 0x2000000000000802, 0x2002000004200802
755	.quad 0x0002000000200800, 0x2000000000000002
756	.quad 0x0000000004000000, 0x0002000000200800
757	.quad 0x0000000004000000, 0x0002000000200800
758	.quad 0x0002000000200000, 0x2000000004000802
759	.quad 0x2000000004000802, 0x2002000004200002
760	.quad 0x2002000004200002, 0x2000000000000002
761	.quad 0x2002000000200002, 0x0000000004000000
762	.quad 0x0000000004000800, 0x0002000000200000
763	.quad 0x0002000004200800, 0x2000000000000802
764	.quad 0x2002000000200802, 0x0002000004200800
765	.quad 0x2000000000000802, 0x2000000004000002
766	.quad 0x2002000004200802, 0x0002000004200000
767	.quad 0x0002000000200800, 0x0000000000000000
768	.quad 0x2000000000000002, 0x2002000004200802
769	.quad 0x0000000000000000, 0x2002000000200802
770	.quad 0x0002000004200000, 0x0000000000000800
771	.quad 0x2000000004000002, 0x0000000004000800
772	.quad 0x0000000000000800, 0x2002000000200002
773.L_s8:
774	.quad 0x0100010410001000, 0x0000010000001000
775	.quad 0x0000000000040000, 0x0100010410041000
776	.quad 0x0100000010000000, 0x0100010410001000
777	.quad 0x0000000400000000, 0x0100000010000000
778	.quad 0x0000000400040000, 0x0100000010040000
779	.quad 0x0100010410041000, 0x0000010000041000
780	.quad 0x0100010010041000, 0x0000010400041000
781	.quad 0x0000010000001000, 0x0000000400000000
782	.quad 0x0100000010040000, 0x0100000410000000
783	.quad 0x0100010010001000, 0x0000010400001000
784	.quad 0x0000010000041000, 0x0000000400040000
785	.quad 0x0100000410040000, 0x0100010010041000
786	.quad 0x0000010400001000, 0x0000000000000000
787	.quad 0x0000000000000000, 0x0100000410040000
788	.quad 0x0100000410000000, 0x0100010010001000
789	.quad 0x0000010400041000, 0x0000000000040000
790	.quad 0x0000010400041000, 0x0000000000040000
791	.quad 0x0100010010041000, 0x0000010000001000
792	.quad 0x0000000400000000, 0x0100000410040000
793	.quad 0x0000010000001000, 0x0000010400041000
794	.quad 0x0100010010001000, 0x0000000400000000
795	.quad 0x0100000410000000, 0x0100000010040000
796	.quad 0x0100000410040000, 0x0100000010000000
797	.quad 0x0000000000040000, 0x0100010410001000
798	.quad 0x0000000000000000, 0x0100010410041000
799	.quad 0x0000000400040000, 0x0100000410000000
800	.quad 0x0100000010040000, 0x0100010010001000
801	.quad 0x0100010410001000, 0x0000000000000000
802	.quad 0x0100010410041000, 0x0000010000041000
803	.quad 0x0000010000041000, 0x0000010400001000
804	.quad 0x0000010400001000, 0x0000000400040000
805	.quad 0x0100000010000000, 0x0100010010041000
806