xref: /linux/arch/powerpc/crypto/curve25519-ppc64le_asm.S (revision a1ff5a7d78a036d6c2178ee5acd6ba4946243800)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2#
3# This code is taken from CRYPTOGAMs[1] and is included here using the option
4# in the license to distribute the code under the GPL. Therefore this program
5# is free software; you can redistribute it and/or modify it under the terms of
6# the GNU General Public License version 2 as published by the Free Software
7# Foundation.
8#
9# [1] https://github.com/dot-asm/cryptogams/
10
11# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
12# All rights reserved.
13#
14# Redistribution and use in source and binary forms, with or without
15# modification, are permitted provided that the following conditions
16# are met:
17#
18#       * Redistributions of source code must retain copyright notices,
19#         this list of conditions and the following disclaimer.
20#
21#       * Redistributions in binary form must reproduce the above
22#         copyright notice, this list of conditions and the following
23#         disclaimer in the documentation and/or other materials
24#         provided with the distribution.
25#
26#       * Neither the name of the CRYPTOGAMS nor the names of its
27#         copyright holder and contributors may be used to endorse or
28#         promote products derived from this software without specific
29#         prior written permission.
30#
31# ALTERNATIVELY, provided that this notice is retained in full, this
32# product may be distributed under the terms of the GNU General Public
33# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
34# those given above.
35#
36# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
37# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
39# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
40# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
43# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
44# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
45# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
46# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47
48# ====================================================================
49# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
50# project. The module is, however, dual licensed under OpenSSL and
51# CRYPTOGAMS licenses depending on where you obtain it. For further
52# details see https://www.openssl.org/~appro/cryptogams/.
53# ====================================================================
54
55#
56# ====================================================================
57# Written and Modified by Danny Tsen <dtsen@us.ibm.com>
58# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes
59#   and x25519_cswap
60#
61# Copyright 2024- IBM Corp.
62#
63# X25519 lower-level primitives for PPC64.
64#
65
66#include <linux/linkage.h>
67
68.text
69
70.align	5
71SYM_FUNC_START(x25519_fe51_mul)
72
73	stdu	1,-144(1)
74	std	21,56(1)
75	std	22,64(1)
76	std	23,72(1)
77	std	24,80(1)
78	std	25,88(1)
79	std	26,96(1)
80	std	27,104(1)
81	std	28,112(1)
82	std	29,120(1)
83	std	30,128(1)
84	std	31,136(1)
85
86	ld	6,0(5)
87	ld	7,0(4)
88	ld	8,8(4)
89	ld	9,16(4)
90	ld	10,24(4)
91	ld	11,32(4)
92
93	mulld	22,7,6
94	mulhdu	23,7,6
95
96	mulld	24,8,6
97	mulhdu	25,8,6
98
99	mulld	30,11,6
100	mulhdu	31,11,6
101	ld	4,8(5)
102	mulli	11,11,19
103
104	mulld	26,9,6
105	mulhdu	27,9,6
106
107	mulld	28,10,6
108	mulhdu	29,10,6
109	mulld	12,11,4
110	mulhdu	21,11,4
111	addc	22,22,12
112	adde	23,23,21
113
114	mulld	12,7,4
115	mulhdu	21,7,4
116	addc	24,24,12
117	adde	25,25,21
118
119	mulld	12,10,4
120	mulhdu	21,10,4
121	ld	6,16(5)
122	mulli	10,10,19
123	addc	30,30,12
124	adde	31,31,21
125
126	mulld	12,8,4
127	mulhdu	21,8,4
128	addc	26,26,12
129	adde	27,27,21
130
131	mulld	12,9,4
132	mulhdu	21,9,4
133	addc	28,28,12
134	adde	29,29,21
135	mulld	12,10,6
136	mulhdu	21,10,6
137	addc	22,22,12
138	adde	23,23,21
139
140	mulld	12,11,6
141	mulhdu	21,11,6
142	addc	24,24,12
143	adde	25,25,21
144
145	mulld	12,9,6
146	mulhdu	21,9,6
147	ld	4,24(5)
148	mulli	9,9,19
149	addc	30,30,12
150	adde	31,31,21
151
152	mulld	12,7,6
153	mulhdu	21,7,6
154	addc	26,26,12
155	adde	27,27,21
156
157	mulld	12,8,6
158	mulhdu	21,8,6
159	addc	28,28,12
160	adde	29,29,21
161	mulld	12,9,4
162	mulhdu	21,9,4
163	addc	22,22,12
164	adde	23,23,21
165
166	mulld	12,10,4
167	mulhdu	21,10,4
168	addc	24,24,12
169	adde	25,25,21
170
171	mulld	12,8,4
172	mulhdu	21,8,4
173	ld	6,32(5)
174	mulli	8,8,19
175	addc	30,30,12
176	adde	31,31,21
177
178	mulld	12,11,4
179	mulhdu	21,11,4
180	addc	26,26,12
181	adde	27,27,21
182
183	mulld	12,7,4
184	mulhdu	21,7,4
185	addc	28,28,12
186	adde	29,29,21
187	mulld	12,8,6
188	mulhdu	21,8,6
189	addc	22,22,12
190	adde	23,23,21
191
192	mulld	12,9,6
193	mulhdu	21,9,6
194	addc	24,24,12
195	adde	25,25,21
196
197	mulld	12,10,6
198	mulhdu	21,10,6
199	addc	26,26,12
200	adde	27,27,21
201
202	mulld	12,11,6
203	mulhdu	21,11,6
204	addc	28,28,12
205	adde	29,29,21
206
207	mulld	12,7,6
208	mulhdu	21,7,6
209	addc	30,30,12
210	adde	31,31,21
211
212.Lfe51_reduce:
213	li	0,-1
214	srdi	0,0,13
215
216	srdi	12,26,51
217	and	9,26,0
218	insrdi	12,27,51,0
219	srdi	21,22,51
220	and	7,22,0
221	insrdi	21,23,51,0
222	addc	28,28,12
223	addze	29,29
224	addc	24,24,21
225	addze	25,25
226
227	srdi	12,28,51
228	and	10,28,0
229	insrdi	12,29,51,0
230	srdi	21,24,51
231	and	8,24,0
232	insrdi	21,25,51,0
233	addc	30,30,12
234	addze	31,31
235	add	9,9,21
236
237	srdi	12,30,51
238	and	11,30,0
239	insrdi	12,31,51,0
240	mulli	12,12,19
241
242	add	7,7,12
243
244	srdi	21,9,51
245	and	9,9,0
246	add	10,10,21
247
248	srdi	12,7,51
249	and	7,7,0
250	add	8,8,12
251
252	std	9,16(3)
253	std	10,24(3)
254	std	11,32(3)
255	std	7,0(3)
256	std	8,8(3)
257
258	ld	21,56(1)
259	ld	22,64(1)
260	ld	23,72(1)
261	ld	24,80(1)
262	ld	25,88(1)
263	ld	26,96(1)
264	ld	27,104(1)
265	ld	28,112(1)
266	ld	29,120(1)
267	ld	30,128(1)
268	ld	31,136(1)
269	addi	1,1,144
270	blr
271SYM_FUNC_END(x25519_fe51_mul)
272
273.align	5
274SYM_FUNC_START(x25519_fe51_sqr)
275
276	stdu	1,-144(1)
277	std	21,56(1)
278	std	22,64(1)
279	std	23,72(1)
280	std	24,80(1)
281	std	25,88(1)
282	std	26,96(1)
283	std	27,104(1)
284	std	28,112(1)
285	std	29,120(1)
286	std	30,128(1)
287	std	31,136(1)
288
289	ld	7,0(4)
290	ld	8,8(4)
291	ld	9,16(4)
292	ld	10,24(4)
293	ld	11,32(4)
294
295	add	6,7,7
296	mulli	21,11,19
297
298	mulld	22,7,7
299	mulhdu	23,7,7
300	mulld	24,8,6
301	mulhdu	25,8,6
302	mulld	26,9,6
303	mulhdu	27,9,6
304	mulld	28,10,6
305	mulhdu	29,10,6
306	mulld	30,11,6
307	mulhdu	31,11,6
308	add	6,8,8
309	mulld	12,11,21
310	mulhdu	11,11,21
311	addc	28,28,12
312	adde	29,29,11
313
314	mulli	5,10,19
315
316	mulld	12,8,8
317	mulhdu	11,8,8
318	addc	26,26,12
319	adde	27,27,11
320	mulld	12,9,6
321	mulhdu	11,9,6
322	addc	28,28,12
323	adde	29,29,11
324	mulld	12,10,6
325	mulhdu	11,10,6
326	addc	30,30,12
327	adde	31,31,11
328	mulld	12,21,6
329	mulhdu	11,21,6
330	add	6,10,10
331	addc	22,22,12
332	adde	23,23,11
333	mulld	12,10,5
334	mulhdu	10,10,5
335	addc	24,24,12
336	adde	25,25,10
337	mulld	12,6,21
338	mulhdu	10,6,21
339	add	6,9,9
340	addc	26,26,12
341	adde	27,27,10
342
343	mulld	12,9,9
344	mulhdu	10,9,9
345	addc	30,30,12
346	adde	31,31,10
347	mulld	12,5,6
348	mulhdu	10,5,6
349	addc	22,22,12
350	adde	23,23,10
351	mulld	12,21,6
352	mulhdu	10,21,6
353	addc	24,24,12
354	adde	25,25,10
355
356	b	.Lfe51_reduce
357SYM_FUNC_END(x25519_fe51_sqr)
358
359.align	5
360SYM_FUNC_START(x25519_fe51_mul121666)
361
362	stdu	1,-144(1)
363	std	21,56(1)
364	std	22,64(1)
365	std	23,72(1)
366	std	24,80(1)
367	std	25,88(1)
368	std	26,96(1)
369	std	27,104(1)
370	std	28,112(1)
371	std	29,120(1)
372	std	30,128(1)
373	std	31,136(1)
374
375	lis	6,1
376	ori	6,6,56130
377	ld	7,0(4)
378	ld	8,8(4)
379	ld	9,16(4)
380	ld	10,24(4)
381	ld	11,32(4)
382
383	mulld	22,7,6
384	mulhdu	23,7,6
385	mulld	24,8,6
386	mulhdu	25,8,6
387	mulld	26,9,6
388	mulhdu	27,9,6
389	mulld	28,10,6
390	mulhdu	29,10,6
391	mulld	30,11,6
392	mulhdu	31,11,6
393
394	b	.Lfe51_reduce
395SYM_FUNC_END(x25519_fe51_mul121666)
396
397.align	5
398SYM_FUNC_START(x25519_fe51_sqr_times)
399
400	stdu	1,-144(1)
401	std	21,56(1)
402	std	22,64(1)
403	std	23,72(1)
404	std	24,80(1)
405	std	25,88(1)
406	std	26,96(1)
407	std	27,104(1)
408	std	28,112(1)
409	std	29,120(1)
410	std	30,128(1)
411	std	31,136(1)
412
413	ld	7,0(4)
414	ld	8,8(4)
415	ld	9,16(4)
416	ld	10,24(4)
417	ld	11,32(4)
418
419	mtctr	5
420
421.Lsqr_times_loop:
422	add	6,7,7
423	mulli	21,11,19
424
425	mulld	22,7,7
426	mulhdu	23,7,7
427	mulld	24,8,6
428	mulhdu	25,8,6
429	mulld	26,9,6
430	mulhdu	27,9,6
431	mulld	28,10,6
432	mulhdu	29,10,6
433	mulld	30,11,6
434	mulhdu	31,11,6
435	add	6,8,8
436	mulld	12,11,21
437	mulhdu	11,11,21
438	addc	28,28,12
439	adde	29,29,11
440
441	mulli	5,10,19
442
443	mulld	12,8,8
444	mulhdu	11,8,8
445	addc	26,26,12
446	adde	27,27,11
447	mulld	12,9,6
448	mulhdu	11,9,6
449	addc	28,28,12
450	adde	29,29,11
451	mulld	12,10,6
452	mulhdu	11,10,6
453	addc	30,30,12
454	adde	31,31,11
455	mulld	12,21,6
456	mulhdu	11,21,6
457	add	6,10,10
458	addc	22,22,12
459	adde	23,23,11
460	mulld	12,10,5
461	mulhdu	10,10,5
462	addc	24,24,12
463	adde	25,25,10
464	mulld	12,6,21
465	mulhdu	10,6,21
466	add	6,9,9
467	addc	26,26,12
468	adde	27,27,10
469
470	mulld	12,9,9
471	mulhdu	10,9,9
472	addc	30,30,12
473	adde	31,31,10
474	mulld	12,5,6
475	mulhdu	10,5,6
476	addc	22,22,12
477	adde	23,23,10
478	mulld	12,21,6
479	mulhdu	10,21,6
480	addc	24,24,12
481	adde	25,25,10
482
483	# fe51_reduce
484	li	0,-1
485	srdi	0,0,13
486
487	srdi	12,26,51
488	and	9,26,0
489	insrdi	12,27,51,0
490	srdi	21,22,51
491	and	7,22,0
492	insrdi	21,23,51,0
493	addc	28,28,12
494	addze	29,29
495	addc	24,24,21
496	addze	25,25
497
498	srdi	12,28,51
499	and	10,28,0
500	insrdi	12,29,51,0
501	srdi	21,24,51
502	and	8,24,0
503	insrdi	21,25,51,0
504	addc	30,30,12
505	addze	31,31
506	add	9,9,21
507
508	srdi	12,30,51
509	and	11,30,0
510	insrdi	12,31,51,0
511	mulli	12,12,19
512
513	add	7,7,12
514
515	srdi	21,9,51
516	and	9,9,0
517	add	10,10,21
518
519	srdi	12,7,51
520	and	7,7,0
521	add	8,8,12
522
523	bdnz	.Lsqr_times_loop
524
525	std	9,16(3)
526	std	10,24(3)
527	std	11,32(3)
528	std	7,0(3)
529	std	8,8(3)
530
531	ld	21,56(1)
532	ld	22,64(1)
533	ld	23,72(1)
534	ld	24,80(1)
535	ld	25,88(1)
536	ld	26,96(1)
537	ld	27,104(1)
538	ld	28,112(1)
539	ld	29,120(1)
540	ld	30,128(1)
541	ld	31,136(1)
542	addi	1,1,144
543	blr
544SYM_FUNC_END(x25519_fe51_sqr_times)
545
546.align	5
547SYM_FUNC_START(x25519_fe51_frombytes)
548
549	li	12, -1
550	srdi	12, 12, 13	# 0x7ffffffffffff
551
552	ld	5, 0(4)
553	ld	6, 8(4)
554	ld	7, 16(4)
555	ld	8, 24(4)
556
557	srdi	10, 5, 51
558	and	5, 5, 12	# h0
559
560	sldi	11, 6, 13
561	or	11, 10, 11	# h1t
562	srdi	10, 6, 38
563	and	6, 11, 12	# h1
564
565	sldi	11, 7, 26
566	or	10, 10, 11	# h2t
567
568	srdi	11, 7, 25
569	and	7, 10, 12	# h2
570	sldi	10, 8, 39
571	or	11, 11, 10	# h3t
572
573	srdi	9, 8, 12
574	and	8, 11, 12	# h3
575	and	9, 9, 12	# h4
576
577	std	5, 0(3)
578	std	6, 8(3)
579	std	7, 16(3)
580	std	8, 24(3)
581	std	9, 32(3)
582
583	blr
584SYM_FUNC_END(x25519_fe51_frombytes)
585
586.align	5
587SYM_FUNC_START(x25519_fe51_tobytes)
588
589	ld	5, 0(4)
590	ld	6, 8(4)
591	ld	7, 16(4)
592	ld	8, 24(4)
593	ld	9, 32(4)
594
595	li	12, -1
596	srdi	12, 12, 13	# 0x7ffffffffffff
597
598	# Full reducuction
599	addi	10, 5, 19
600	srdi	10, 10, 51
601	add	10, 10, 6
602	srdi	10, 10, 51
603	add	10, 10, 7
604	srdi	10, 10, 51
605	add	10, 10, 8
606	srdi	10, 10, 51
607	add	10, 10, 9
608	srdi	10, 10, 51
609
610	mulli	10, 10, 19
611	add	5, 5, 10
612	srdi	11, 5, 51
613	add	6, 6, 11
614	srdi	11, 6, 51
615	add	7, 7, 11
616	srdi	11, 7, 51
617	add	8, 8, 11
618	srdi	11, 8, 51
619	add	9, 9, 11
620
621	and	5, 5, 12
622	and	6, 6, 12
623	and	7, 7, 12
624	and	8, 8, 12
625	and	9, 9, 12
626
627	sldi	10, 6, 51
628	or	5, 5, 10	# s0
629
630	srdi	11, 6, 13
631	sldi	10, 7, 38
632	or	6, 11, 10	# s1
633
634	srdi	11, 7, 26
635	sldi	10, 8, 25
636	or	7, 11, 10	# s2
637
638	srdi	11, 8, 39
639	sldi	10, 9, 12
640	or	8, 11, 10	# s4
641
642	std	5, 0(3)
643	std	6, 8(3)
644	std	7, 16(3)
645	std	8, 24(3)
646
647	blr
648SYM_FUNC_END(x25519_fe51_tobytes)
649
650.align	5
651SYM_FUNC_START(x25519_cswap)
652
653	li	7, 5
654	neg	6, 5
655	mtctr	7
656
657.Lswap_loop:
658	ld	8, 0(3)
659	ld	9, 0(4)
660	xor	10, 8, 9
661	and	10, 10, 6
662	xor	11, 8, 10
663	xor	12, 9, 10
664	std	11, 0(3)
665	addi	3, 3, 8
666	std	12, 0(4)
667	addi	4, 4, 8
668	bdnz	.Lswap_loop
669
670	blr
671SYM_FUNC_END(x25519_cswap)
672