Lines Matching +full:- +full:b
1 /* SPDX-License-Identifier: GPL-2.0-only */
10 * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
14 * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
192 in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
193 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
194 inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
195 \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
196 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
197 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
198 out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
199 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
204 inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
205 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
206 inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
207 \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
208 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
209 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
210 inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
211 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
216 ldp q18, q19, [bskey, #-96]
217 ldp q20, q21, [bskey, #-64]
218 ldp q22, q23, [bskey, #-32]
222 ldp q16, q17, [bskey, #-128]!
229 eor \x0\().16b, \x0\().16b, v16.16b
230 eor \x1\().16b, \x1\().16b, v17.16b
231 eor \x2\().16b, \x2\().16b, v18.16b
232 eor \x3\().16b, \x3\().16b, v19.16b
233 eor \x4\().16b, \x4\().16b, v20.16b
234 eor \x5\().16b, \x5\().16b, v21.16b
235 eor \x6\().16b, \x6\().16b, v22.16b
236 eor \x7\().16b, \x7\().16b, v23.16b
240 tbl \x0\().16b, {\x0\().16b}, \mask\().16b
241 tbl \x1\().16b, {\x1\().16b}, \mask\().16b
242 tbl \x2\().16b, {\x2\().16b}, \mask\().16b
243 tbl \x3\().16b, {\x3\().16b}, \mask\().16b
244 tbl \x4\().16b, {\x4\().16b}, \mask\().16b
245 tbl \x5\().16b, {\x5\().16b}, \mask\().16b
246 tbl \x6\().16b, {\x6\().16b}, \mask\().16b
247 tbl \x7\().16b, {\x7\().16b}, \mask\().16b
252 ext \t0\().16b, \x0\().16b, \x0\().16b, #12
253 ext \t1\().16b, \x1\().16b, \x1\().16b, #12
254 eor \x0\().16b, \x0\().16b, \t0\().16b
255 ext \t2\().16b, \x2\().16b, \x2\().16b, #12
256 eor \x1\().16b, \x1\().16b, \t1\().16b
257 ext \t3\().16b, \x3\().16b, \x3\().16b, #12
258 eor \x2\().16b, \x2\().16b, \t2\().16b
259 ext \t4\().16b, \x4\().16b, \x4\().16b, #12
260 eor \x3\().16b, \x3\().16b, \t3\().16b
261 ext \t5\().16b, \x5\().16b, \x5\().16b, #12
262 eor \x4\().16b, \x4\().16b, \t4\().16b
263 ext \t6\().16b, \x6\().16b, \x6\().16b, #12
264 eor \x5\().16b, \x5\().16b, \t5\().16b
265 ext \t7\().16b, \x7\().16b, \x7\().16b, #12
266 eor \x6\().16b, \x6\().16b, \t6\().16b
267 eor \t1\().16b, \t1\().16b, \x0\().16b
268 eor \x7\().16b, \x7\().16b, \t7\().16b
269 ext \x0\().16b, \x0\().16b, \x0\().16b, #8
270 eor \t2\().16b, \t2\().16b, \x1\().16b
271 eor \t0\().16b, \t0\().16b, \x7\().16b
272 eor \t1\().16b, \t1\().16b, \x7\().16b
273 ext \x1\().16b, \x1\().16b, \x1\().16b, #8
274 eor \t5\().16b, \t5\().16b, \x4\().16b
275 eor \x0\().16b, \x0\().16b, \t0\().16b
276 eor \t6\().16b, \t6\().16b, \x5\().16b
277 eor \x1\().16b, \x1\().16b, \t1\().16b
278 ext \t0\().16b, \x4\().16b, \x4\().16b, #8
279 eor \t4\().16b, \t4\().16b, \x3\().16b
280 ext \t1\().16b, \x5\().16b, \x5\().16b, #8
281 eor \t7\().16b, \t7\().16b, \x6\().16b
282 ext \x4\().16b, \x3\().16b, \x3\().16b, #8
283 eor \t3\().16b, \t3\().16b, \x2\().16b
284 ext \x5\().16b, \x7\().16b, \x7\().16b, #8
285 eor \t4\().16b, \t4\().16b, \x7\().16b
286 ext \x3\().16b, \x6\().16b, \x6\().16b, #8
287 eor \t3\().16b, \t3\().16b, \x7\().16b
288 ext \x6\().16b, \x2\().16b, \x2\().16b, #8
289 eor \x7\().16b, \t1\().16b, \t5\().16b
291 eor \x2\().16b, \t0\().16b, \t4\().16b
292 eor \x4\().16b, \x4\().16b, \t3\().16b
293 eor \x5\().16b, \x5\().16b, \t7\().16b
294 eor \x3\().16b, \x3\().16b, \t6\().16b
295 eor \x6\().16b, \x6\().16b, \t2\().16b
297 eor \t3\().16b, \t3\().16b, \x4\().16b
298 eor \x5\().16b, \x5\().16b, \t7\().16b
299 eor \x2\().16b, \x3\().16b, \t6\().16b
300 eor \x3\().16b, \t0\().16b, \t4\().16b
301 eor \x4\().16b, \x6\().16b, \t2\().16b
302 mov \x6\().16b, \t3\().16b
308 ext \t0\().16b, \x0\().16b, \x0\().16b, #8
309 ext \t6\().16b, \x6\().16b, \x6\().16b, #8
310 ext \t7\().16b, \x7\().16b, \x7\().16b, #8
311 eor \t0\().16b, \t0\().16b, \x0\().16b
312 ext \t1\().16b, \x1\().16b, \x1\().16b, #8
313 eor \t6\().16b, \t6\().16b, \x6\().16b
314 ext \t2\().16b, \x2\().16b, \x2\().16b, #8
315 eor \t7\().16b, \t7\().16b, \x7\().16b
316 ext \t3\().16b, \x3\().16b, \x3\().16b, #8
317 eor \t1\().16b, \t1\().16b, \x1\().16b
318 ext \t4\().16b, \x4\().16b, \x4\().16b, #8
319 eor \t2\().16b, \t2\().16b, \x2\().16b
320 ext \t5\().16b, \x5\().16b, \x5\().16b, #8
321 eor \t3\().16b, \t3\().16b, \x3\().16b
322 eor \t4\().16b, \t4\().16b, \x4\().16b
323 eor \t5\().16b, \t5\().16b, \x5\().16b
324 eor \x0\().16b, \x0\().16b, \t6\().16b
325 eor \x1\().16b, \x1\().16b, \t6\().16b
326 eor \x2\().16b, \x2\().16b, \t0\().16b
327 eor \x4\().16b, \x4\().16b, \t2\().16b
328 eor \x3\().16b, \x3\().16b, \t1\().16b
329 eor \x1\().16b, \x1\().16b, \t7\().16b
330 eor \x2\().16b, \x2\().16b, \t7\().16b
331 eor \x4\().16b, \x4\().16b, \t6\().16b
332 eor \x5\().16b, \x5\().16b, \t3\().16b
333 eor \x3\().16b, \x3\().16b, \t6\().16b
334 eor \x6\().16b, \x6\().16b, \t4\().16b
335 eor \x4\().16b, \x4\().16b, \t7\().16b
336 eor \x5\().16b, \x5\().16b, \t7\().16b
337 eor \x7\().16b, \x7\().16b, \t5\().16b
345 eor \t0\().16b, \t0\().16b, \a0\().16b
346 eor \t1\().16b, \t1\().16b, \a1\().16b
347 and \t0\().16b, \t0\().16b, \mask\().16b
348 and \t1\().16b, \t1\().16b, \mask\().16b
349 eor \a0\().16b, \a0\().16b, \t0\().16b
351 eor \a1\().16b, \a1\().16b, \t1\().16b
353 eor \b0\().16b, \b0\().16b, \t0\().16b
354 eor \b1\().16b, \b1\().16b, \t1\().16b
358 movi \t0\().16b, #0x55
359 movi \t1\().16b, #0x33
362 movi \t0\().16b, #0x0f
388 movi v8.16b, #0x01 // bit masks
389 movi v9.16b, #0x02
390 movi v10.16b, #0x04
391 movi v11.16b, #0x08
392 movi v12.16b, #0x10
393 movi v13.16b, #0x20
394 movi v14.16b, #0x40
395 movi v15.16b, #0x80
402 tbl v7.16b ,{v17.16b}, v16.16b
405 cmtst v0.16b, v7.16b, v8.16b
406 cmtst v1.16b, v7.16b, v9.16b
407 cmtst v2.16b, v7.16b, v10.16b
408 cmtst v3.16b, v7.16b, v11.16b
409 cmtst v4.16b, v7.16b, v12.16b
410 cmtst v5.16b, v7.16b, v13.16b
411 cmtst v6.16b, v7.16b, v14.16b
412 cmtst v7.16b, v7.16b, v15.16b
413 not v0.16b, v0.16b
414 not v1.16b, v1.16b
415 not v5.16b, v5.16b
416 not v6.16b, v6.16b
420 stp q2, q3, [x0, #-96]
421 stp q4, q5, [x0, #-64]
422 stp q6, q7, [x0, #-32]
423 b.ne .Lkey_loop
425 movi v7.16b, #0x63 // compose .L63
426 eor v17.16b, v17.16b, v7.16b
437 eor v10.16b, v0.16b, v9.16b // xor with round0 key
438 eor v11.16b, v1.16b, v9.16b
439 tbl v0.16b, {v10.16b}, v8.16b
440 eor v12.16b, v2.16b, v9.16b
441 tbl v1.16b, {v11.16b}, v8.16b
442 eor v13.16b, v3.16b, v9.16b
443 tbl v2.16b, {v12.16b}, v8.16b
444 eor v14.16b, v4.16b, v9.16b
445 tbl v3.16b, {v13.16b}, v8.16b
446 eor v15.16b, v5.16b, v9.16b
447 tbl v4.16b, {v14.16b}, v8.16b
448 eor v10.16b, v6.16b, v9.16b
449 tbl v5.16b, {v15.16b}, v8.16b
450 eor v11.16b, v7.16b, v9.16b
451 tbl v6.16b, {v10.16b}, v8.16b
452 tbl v7.16b, {v11.16b}, v8.16b
457 b .Lenc_sbox
465 b.cc .Lenc_done
474 b.ne .Lenc_loop
476 b .Lenc_loop
483 eor v0.16b, v0.16b, v12.16b
484 eor v1.16b, v1.16b, v12.16b
485 eor v4.16b, v4.16b, v12.16b
486 eor v6.16b, v6.16b, v12.16b
487 eor v3.16b, v3.16b, v12.16b
488 eor v7.16b, v7.16b, v12.16b
489 eor v2.16b, v2.16b, v12.16b
490 eor v5.16b, v5.16b, v12.16b
499 ldr q9, [bskey, #-112]! // round 0 key
503 eor v10.16b, v0.16b, v9.16b // xor with round0 key
504 eor v11.16b, v1.16b, v9.16b
505 tbl v0.16b, {v10.16b}, v8.16b
506 eor v12.16b, v2.16b, v9.16b
507 tbl v1.16b, {v11.16b}, v8.16b
508 eor v13.16b, v3.16b, v9.16b
509 tbl v2.16b, {v12.16b}, v8.16b
510 eor v14.16b, v4.16b, v9.16b
511 tbl v3.16b, {v13.16b}, v8.16b
512 eor v15.16b, v5.16b, v9.16b
513 tbl v4.16b, {v14.16b}, v8.16b
514 eor v10.16b, v6.16b, v9.16b
515 tbl v5.16b, {v15.16b}, v8.16b
516 eor v11.16b, v7.16b, v9.16b
517 tbl v6.16b, {v10.16b}, v8.16b
518 tbl v7.16b, {v11.16b}, v8.16b
523 b .Ldec_sbox
531 b.cc .Ldec_done
540 b.ne .Ldec_loop
542 b .Ldec_loop
544 ldr q12, [bskey, #-16] // last round key
548 eor v0.16b, v0.16b, v12.16b
549 eor v1.16b, v1.16b, v12.16b
550 eor v6.16b, v6.16b, v12.16b
551 eor v4.16b, v4.16b, v12.16b
552 eor v2.16b, v2.16b, v12.16b
553 eor v7.16b, v7.16b, v12.16b
554 eor v3.16b, v3.16b, v12.16b
555 eor v5.16b, v5.16b, v12.16b
580 ld1 {v0.16b}, [x20], #16
582 ld1 {v1.16b}, [x20], #16
584 ld1 {v2.16b}, [x20], #16
586 ld1 {v3.16b}, [x20], #16
588 ld1 {v4.16b}, [x20], #16
590 ld1 {v5.16b}, [x20], #16
592 ld1 {v6.16b}, [x20], #16
594 ld1 {v7.16b}, [x20], #16
600 st1 {\o0\().16b}, [x19], #16
602 st1 {\o1\().16b}, [x19], #16
604 st1 {\o2\().16b}, [x19], #16
606 st1 {\o3\().16b}, [x19], #16
608 st1 {\o4\().16b}, [x19], #16
610 st1 {\o5\().16b}, [x19], #16
612 st1 {\o6\().16b}, [x19], #16
614 st1 {\o7\().16b}, [x19], #16
617 b 99b
654 ld1 {v0.16b}, [x20], #16
655 mov v25.16b, v0.16b
657 ld1 {v1.16b}, [x20], #16
658 mov v26.16b, v1.16b
660 ld1 {v2.16b}, [x20], #16
661 mov v27.16b, v2.16b
663 ld1 {v3.16b}, [x20], #16
664 mov v28.16b, v3.16b
666 ld1 {v4.16b}, [x20], #16
667 mov v29.16b, v4.16b
669 ld1 {v5.16b}, [x20], #16
670 mov v30.16b, v5.16b
672 ld1 {v6.16b}, [x20], #16
673 mov v31.16b, v6.16b
675 ld1 {v7.16b}, [x20]
681 ld1 {v24.16b}, [x24] // load IV
683 eor v1.16b, v1.16b, v25.16b
684 eor v6.16b, v6.16b, v26.16b
685 eor v4.16b, v4.16b, v27.16b
686 eor v2.16b, v2.16b, v28.16b
687 eor v7.16b, v7.16b, v29.16b
688 eor v0.16b, v0.16b, v24.16b
689 eor v3.16b, v3.16b, v30.16b
690 eor v5.16b, v5.16b, v31.16b
692 st1 {v0.16b}, [x19], #16
693 mov v24.16b, v25.16b
695 st1 {v1.16b}, [x19], #16
696 mov v24.16b, v26.16b
698 st1 {v6.16b}, [x19], #16
699 mov v24.16b, v27.16b
701 st1 {v4.16b}, [x19], #16
702 mov v24.16b, v28.16b
704 st1 {v2.16b}, [x19], #16
705 mov v24.16b, v29.16b
707 st1 {v7.16b}, [x19], #16
708 mov v24.16b, v30.16b
710 st1 {v3.16b}, [x19], #16
711 mov v24.16b, v31.16b
713 ld1 {v24.16b}, [x20], #16
714 st1 {v5.16b}, [x19], #16
715 1: st1 {v24.16b}, [x24] // store IV
718 b 99b
726 and \tmp\().16b, \tmp\().16b, \const\().16b
728 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
729 eor \out\().16b, \out\().16b, \tmp\().16b
743 ld1 {v0.16b-v3.16b}, [x1], #64
744 ld1 {v4.16b-v7.16b}, [x1], #64
755 eor v0.16b, v0.16b, v25.16b
756 eor v1.16b, v1.16b, v26.16b
757 eor v2.16b, v2.16b, v27.16b
758 eor v3.16b, v3.16b, v28.16b
759 eor v4.16b, v4.16b, v29.16b
760 eor v5.16b, v5.16b, v30.16b
761 eor v6.16b, v6.16b, v31.16b
762 eor v7.16b, v7.16b, v16.16b
775 ld1 {v25.16b}, [x5]
780 eor v16.16b, \o0\().16b, v25.16b
781 eor v17.16b, \o1\().16b, v26.16b
782 eor v18.16b, \o2\().16b, v27.16b
783 eor v19.16b, \o3\().16b, v28.16b
787 eor v20.16b, \o4\().16b, v29.16b
788 eor v21.16b, \o5\().16b, v30.16b
789 eor v22.16b, \o6\().16b, v31.16b
790 eor v23.16b, \o7\().16b, v24.16b
792 st1 {v16.16b-v19.16b}, [x0], #64
793 st1 {v20.16b-v23.16b}, [x0], #64
796 b.gt 0b
798 st1 {v25.16b}, [x5]
816 rev64 \v\().16b, \v\().16b
826 ld1 {v0.16b}, [x5]
844 ld1 { v8.16b-v11.16b}, [x1], #64
845 ld1 {v12.16b-v15.16b}, [x1], #64
847 eor v8.16b, v0.16b, v8.16b
848 eor v9.16b, v1.16b, v9.16b
849 eor v10.16b, v4.16b, v10.16b
850 eor v11.16b, v6.16b, v11.16b
851 eor v12.16b, v3.16b, v12.16b
852 eor v13.16b, v7.16b, v13.16b
853 eor v14.16b, v2.16b, v14.16b
854 eor v15.16b, v5.16b, v15.16b
856 st1 { v8.16b-v11.16b}, [x0], #64
857 st1 {v12.16b-v15.16b}, [x0], #64
861 b.gt 0b
863 st1 {v0.16b}, [x5]