Lines Matching +full:2 +full:d

47 	sub	x21,x5,#16		// j=num-2
115 sub x21,x5,#16 // j=num-2
259 st1 {v6.2d,v7.2d},[x7],#32
261 st1 {v8.2d,v9.2d},[x7],#32
262 st1 {v10.2d,v11.2d},[x7],#32
263 st1 {v12.2d,v13.2d},[x7],#32
280 umlal v6.2d,v28.2s,v0.s[0]
281 umlal v7.2d,v28.2s,v0.s[1]
282 umlal v8.2d,v28.2s,v0.s[2]
283 shl v29.2d,v6.2d,#16
285 umlal v9.2d,v28.2s,v0.s[3]
286 add v29.2d,v29.2d,v6.2d
287 umlal v10.2d,v28.2s,v1.s[0]
288 mul v29.2s,v29.2s,v30.2s
289 umlal v11.2d,v28.2s,v1.s[1]
290 st1 {v28.2s},[sp] // put aside smashed b[8*i+0]
291 umlal v12.2d,v28.2s,v1.s[2]
293 umlal v13.2d,v28.2s,v1.s[3]
295 umlal v6.2d,v29.2s,v2.s[0]
296 umlal v7.2d,v29.2s,v2.s[1]
298 umlal v8.2d,v29.2s,v2.s[2]
299 ushr v15.2d,v6.2d,#16
300 umlal v9.2d,v29.2s,v2.s[3]
301 umlal v10.2d,v29.2s,v3.s[0]
303 add v6.2d,v6.2d,v15.2d
304 umlal v11.2d,v29.2s,v3.s[1]
305 ushr v6.2d,v6.2d,#16
306 umlal v12.2d,v29.2s,v3.s[2]
307 umlal v13.2d,v29.2s,v3.s[3]
308 add v16.2d,v7.2d,v6.2d
309 ins v7.d[0],v16.d[0]
310 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+0]
311 umlal v7.2d,v28.2s,v0.s[0]
312 ld1 {v6.2d},[x6],#16
313 umlal v8.2d,v28.2s,v0.s[1]
314 umlal v9.2d,v28.2s,v0.s[2]
315 shl v29.2d,v7.2d,#16
317 umlal v10.2d,v28.2s,v0.s[3]
318 add v29.2d,v29.2d,v7.2d
319 umlal v11.2d,v28.2s,v1.s[0]
320 mul v29.2s,v29.2s,v30.2s
321 umlal v12.2d,v28.2s,v1.s[1]
322 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+1]
323 umlal v13.2d,v28.2s,v1.s[2]
325 umlal v6.2d,v28.2s,v1.s[3]
327 umlal v7.2d,v29.2s,v2.s[0]
328 umlal v8.2d,v29.2s,v2.s[1]
330 umlal v9.2d,v29.2s,v2.s[2]
331 ushr v15.2d,v7.2d,#16
332 umlal v10.2d,v29.2s,v2.s[3]
333 umlal v11.2d,v29.2s,v3.s[0]
335 add v7.2d,v7.2d,v15.2d
336 umlal v12.2d,v29.2s,v3.s[1]
337 ushr v7.2d,v7.2d,#16
338 umlal v13.2d,v29.2s,v3.s[2]
339 umlal v6.2d,v29.2s,v3.s[3]
340 add v16.2d,v8.2d,v7.2d
341 ins v8.d[0],v16.d[0]
342 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+1]
343 umlal v8.2d,v28.2s,v0.s[0]
344 ld1 {v7.2d},[x6],#16
345 umlal v9.2d,v28.2s,v0.s[1]
346 umlal v10.2d,v28.2s,v0.s[2]
347 shl v29.2d,v8.2d,#16
349 umlal v11.2d,v28.2s,v0.s[3]
350 add v29.2d,v29.2d,v8.2d
351 umlal v12.2d,v28.2s,v1.s[0]
352 mul v29.2s,v29.2s,v30.2s
353 umlal v13.2d,v28.2s,v1.s[1]
354 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+2]
355 umlal v6.2d,v28.2s,v1.s[2]
357 umlal v7.2d,v28.2s,v1.s[3]
359 umlal v8.2d,v29.2s,v2.s[0]
360 umlal v9.2d,v29.2s,v2.s[1]
362 umlal v10.2d,v29.2s,v2.s[2]
363 ushr v15.2d,v8.2d,#16
364 umlal v11.2d,v29.2s,v2.s[3]
365 umlal v12.2d,v29.2s,v3.s[0]
367 add v8.2d,v8.2d,v15.2d
368 umlal v13.2d,v29.2s,v3.s[1]
369 ushr v8.2d,v8.2d,#16
370 umlal v6.2d,v29.2s,v3.s[2]
371 umlal v7.2d,v29.2s,v3.s[3]
372 add v16.2d,v9.2d,v8.2d
373 ins v9.d[0],v16.d[0]
374 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+2]
375 umlal v9.2d,v28.2s,v0.s[0]
376 ld1 {v8.2d},[x6],#16
377 umlal v10.2d,v28.2s,v0.s[1]
378 umlal v11.2d,v28.2s,v0.s[2]
379 shl v29.2d,v9.2d,#16
381 umlal v12.2d,v28.2s,v0.s[3]
382 add v29.2d,v29.2d,v9.2d
383 umlal v13.2d,v28.2s,v1.s[0]
384 mul v29.2s,v29.2s,v30.2s
385 umlal v6.2d,v28.2s,v1.s[1]
386 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+3]
387 umlal v7.2d,v28.2s,v1.s[2]
389 umlal v8.2d,v28.2s,v1.s[3]
391 umlal v9.2d,v29.2s,v2.s[0]
392 umlal v10.2d,v29.2s,v2.s[1]
394 umlal v11.2d,v29.2s,v2.s[2]
395 ushr v15.2d,v9.2d,#16
396 umlal v12.2d,v29.2s,v2.s[3]
397 umlal v13.2d,v29.2s,v3.s[0]
399 add v9.2d,v9.2d,v15.2d
400 umlal v6.2d,v29.2s,v3.s[1]
401 ushr v9.2d,v9.2d,#16
402 umlal v7.2d,v29.2s,v3.s[2]
403 umlal v8.2d,v29.2s,v3.s[3]
404 add v16.2d,v10.2d,v9.2d
405 ins v10.d[0],v16.d[0]
406 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+3]
407 umlal v10.2d,v28.2s,v0.s[0]
408 ld1 {v9.2d},[x6],#16
409 umlal v11.2d,v28.2s,v0.s[1]
410 umlal v12.2d,v28.2s,v0.s[2]
411 shl v29.2d,v10.2d,#16
413 umlal v13.2d,v28.2s,v0.s[3]
414 add v29.2d,v29.2d,v10.2d
415 umlal v6.2d,v28.2s,v1.s[0]
416 mul v29.2s,v29.2s,v30.2s
417 umlal v7.2d,v28.2s,v1.s[1]
418 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+4]
419 umlal v8.2d,v28.2s,v1.s[2]
421 umlal v9.2d,v28.2s,v1.s[3]
423 umlal v10.2d,v29.2s,v2.s[0]
424 umlal v11.2d,v29.2s,v2.s[1]
426 umlal v12.2d,v29.2s,v2.s[2]
427 ushr v15.2d,v10.2d,#16
428 umlal v13.2d,v29.2s,v2.s[3]
429 umlal v6.2d,v29.2s,v3.s[0]
431 add v10.2d,v10.2d,v15.2d
432 umlal v7.2d,v29.2s,v3.s[1]
433 ushr v10.2d,v10.2d,#16
434 umlal v8.2d,v29.2s,v3.s[2]
435 umlal v9.2d,v29.2s,v3.s[3]
436 add v16.2d,v11.2d,v10.2d
437 ins v11.d[0],v16.d[0]
438 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+4]
439 umlal v11.2d,v28.2s,v0.s[0]
440 ld1 {v10.2d},[x6],#16
441 umlal v12.2d,v28.2s,v0.s[1]
442 umlal v13.2d,v28.2s,v0.s[2]
443 shl v29.2d,v11.2d,#16
445 umlal v6.2d,v28.2s,v0.s[3]
446 add v29.2d,v29.2d,v11.2d
447 umlal v7.2d,v28.2s,v1.s[0]
448 mul v29.2s,v29.2s,v30.2s
449 umlal v8.2d,v28.2s,v1.s[1]
450 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+5]
451 umlal v9.2d,v28.2s,v1.s[2]
453 umlal v10.2d,v28.2s,v1.s[3]
455 umlal v11.2d,v29.2s,v2.s[0]
456 umlal v12.2d,v29.2s,v2.s[1]
458 umlal v13.2d,v29.2s,v2.s[2]
459 ushr v15.2d,v11.2d,#16
460 umlal v6.2d,v29.2s,v2.s[3]
461 umlal v7.2d,v29.2s,v3.s[0]
463 add v11.2d,v11.2d,v15.2d
464 umlal v8.2d,v29.2s,v3.s[1]
465 ushr v11.2d,v11.2d,#16
466 umlal v9.2d,v29.2s,v3.s[2]
467 umlal v10.2d,v29.2s,v3.s[3]
468 add v16.2d,v12.2d,v11.2d
469 ins v12.d[0],v16.d[0]
470 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+5]
471 umlal v12.2d,v28.2s,v0.s[0]
472 ld1 {v11.2d},[x6],#16
473 umlal v13.2d,v28.2s,v0.s[1]
474 umlal v6.2d,v28.2s,v0.s[2]
475 shl v29.2d,v12.2d,#16
477 umlal v7.2d,v28.2s,v0.s[3]
478 add v29.2d,v29.2d,v12.2d
479 umlal v8.2d,v28.2s,v1.s[0]
480 mul v29.2s,v29.2s,v30.2s
481 umlal v9.2d,v28.2s,v1.s[1]
482 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+6]
483 umlal v10.2d,v28.2s,v1.s[2]
485 umlal v11.2d,v28.2s,v1.s[3]
487 umlal v12.2d,v29.2s,v2.s[0]
488 umlal v13.2d,v29.2s,v2.s[1]
490 umlal v6.2d,v29.2s,v2.s[2]
491 ushr v15.2d,v12.2d,#16
492 umlal v7.2d,v29.2s,v2.s[3]
493 umlal v8.2d,v29.2s,v3.s[0]
495 add v12.2d,v12.2d,v15.2d
496 umlal v9.2d,v29.2s,v3.s[1]
497 ushr v12.2d,v12.2d,#16
498 umlal v10.2d,v29.2s,v3.s[2]
499 umlal v11.2d,v29.2s,v3.s[3]
500 add v16.2d,v13.2d,v12.2d
501 ins v13.d[0],v16.d[0]
502 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+6]
503 umlal v13.2d,v28.2s,v0.s[0]
504 ld1 {v12.2d},[x6],#16
505 umlal v6.2d,v28.2s,v0.s[1]
506 umlal v7.2d,v28.2s,v0.s[2]
507 shl v29.2d,v13.2d,#16
509 umlal v8.2d,v28.2s,v0.s[3]
510 add v29.2d,v29.2d,v13.2d
511 umlal v9.2d,v28.2s,v1.s[0]
512 mul v29.2s,v29.2s,v30.2s
513 umlal v10.2d,v28.2s,v1.s[1]
514 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+7]
515 umlal v11.2d,v28.2s,v1.s[2]
517 umlal v12.2d,v28.2s,v1.s[3]
518 ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
519 umlal v13.2d,v29.2s,v2.s[0]
521 umlal v6.2d,v29.2s,v2.s[1]
522 umlal v7.2d,v29.2s,v2.s[2]
524 ushr v5.2d,v5.2d,#16
526 umlal v8.2d,v29.2s,v2.s[3]
527 umlal v9.2d,v29.2s,v3.s[0]
528 add v13.2d,v13.2d,v5.2d
529 umlal v10.2d,v29.2s,v3.s[1]
530 ushr v13.2d,v13.2d,#16
532 ins v13.d[1],v15.d[0]
533 umlal v11.2d,v29.2s,v3.s[2]
534 umlal v12.2d,v29.2s,v3.s[3]
535 add v6.2d,v6.2d,v13.2d
536 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+7]
544 umlal v6.2d,v28.2s,v0.s[0]
545 ld1 {v13.2d},[x6]
546 umlal v7.2d,v28.2s,v0.s[1]
547 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+0]
548 umlal v8.2d,v28.2s,v0.s[2]
550 umlal v9.2d,v28.2s,v0.s[3]
554 umlal v10.2d,v28.2s,v1.s[0]
555 umlal v11.2d,v28.2s,v1.s[1]
556 umlal v12.2d,v28.2s,v1.s[2]
557 umlal v13.2d,v28.2s,v1.s[3]
558 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+1]
559 umlal v6.2d,v29.2s,v2.s[0]
560 umlal v7.2d,v29.2s,v2.s[1]
561 umlal v8.2d,v29.2s,v2.s[2]
562 umlal v9.2d,v29.2s,v2.s[3]
563 umlal v10.2d,v29.2s,v3.s[0]
564 umlal v11.2d,v29.2s,v3.s[1]
565 umlal v12.2d,v29.2s,v3.s[2]
566 umlal v13.2d,v29.2s,v3.s[3]
567 st1 {v6.2d},[x7],#16
568 umlal v7.2d,v28.2s,v0.s[0]
569 ld1 {v6.2d},[x6]
570 umlal v8.2d,v28.2s,v0.s[1]
571 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+1]
572 umlal v9.2d,v28.2s,v0.s[2]
576 umlal v10.2d,v28.2s,v0.s[3]
577 umlal v11.2d,v28.2s,v1.s[0]
578 umlal v12.2d,v28.2s,v1.s[1]
579 umlal v13.2d,v28.2s,v1.s[2]
580 umlal v6.2d,v28.2s,v1.s[3]
581 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+2]
582 umlal v7.2d,v29.2s,v2.s[0]
583 umlal v8.2d,v29.2s,v2.s[1]
584 umlal v9.2d,v29.2s,v2.s[2]
585 umlal v10.2d,v29.2s,v2.s[3]
586 umlal v11.2d,v29.2s,v3.s[0]
587 umlal v12.2d,v29.2s,v3.s[1]
588 umlal v13.2d,v29.2s,v3.s[2]
589 umlal v6.2d,v29.2s,v3.s[3]
590 st1 {v7.2d},[x7],#16
591 umlal v8.2d,v28.2s,v0.s[0]
592 ld1 {v7.2d},[x6]
593 umlal v9.2d,v28.2s,v0.s[1]
594 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+2]
595 umlal v10.2d,v28.2s,v0.s[2]
599 umlal v11.2d,v28.2s,v0.s[3]
600 umlal v12.2d,v28.2s,v1.s[0]
601 umlal v13.2d,v28.2s,v1.s[1]
602 umlal v6.2d,v28.2s,v1.s[2]
603 umlal v7.2d,v28.2s,v1.s[3]
604 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+3]
605 umlal v8.2d,v29.2s,v2.s[0]
606 umlal v9.2d,v29.2s,v2.s[1]
607 umlal v10.2d,v29.2s,v2.s[2]
608 umlal v11.2d,v29.2s,v2.s[3]
609 umlal v12.2d,v29.2s,v3.s[0]
610 umlal v13.2d,v29.2s,v3.s[1]
611 umlal v6.2d,v29.2s,v3.s[2]
612 umlal v7.2d,v29.2s,v3.s[3]
613 st1 {v8.2d},[x7],#16
614 umlal v9.2d,v28.2s,v0.s[0]
615 ld1 {v8.2d},[x6]
616 umlal v10.2d,v28.2s,v0.s[1]
617 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+3]
618 umlal v11.2d,v28.2s,v0.s[2]
622 umlal v12.2d,v28.2s,v0.s[3]
623 umlal v13.2d,v28.2s,v1.s[0]
624 umlal v6.2d,v28.2s,v1.s[1]
625 umlal v7.2d,v28.2s,v1.s[2]
626 umlal v8.2d,v28.2s,v1.s[3]
627 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+4]
628 umlal v9.2d,v29.2s,v2.s[0]
629 umlal v10.2d,v29.2s,v2.s[1]
630 umlal v11.2d,v29.2s,v2.s[2]
631 umlal v12.2d,v29.2s,v2.s[3]
632 umlal v13.2d,v29.2s,v3.s[0]
633 umlal v6.2d,v29.2s,v3.s[1]
634 umlal v7.2d,v29.2s,v3.s[2]
635 umlal v8.2d,v29.2s,v3.s[3]
636 st1 {v9.2d},[x7],#16
637 umlal v10.2d,v28.2s,v0.s[0]
638 ld1 {v9.2d},[x6]
639 umlal v11.2d,v28.2s,v0.s[1]
640 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+4]
641 umlal v12.2d,v28.2s,v0.s[2]
645 umlal v13.2d,v28.2s,v0.s[3]
646 umlal v6.2d,v28.2s,v1.s[0]
647 umlal v7.2d,v28.2s,v1.s[1]
648 umlal v8.2d,v28.2s,v1.s[2]
649 umlal v9.2d,v28.2s,v1.s[3]
650 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+5]
651 umlal v10.2d,v29.2s,v2.s[0]
652 umlal v11.2d,v29.2s,v2.s[1]
653 umlal v12.2d,v29.2s,v2.s[2]
654 umlal v13.2d,v29.2s,v2.s[3]
655 umlal v6.2d,v29.2s,v3.s[0]
656 umlal v7.2d,v29.2s,v3.s[1]
657 umlal v8.2d,v29.2s,v3.s[2]
658 umlal v9.2d,v29.2s,v3.s[3]
659 st1 {v10.2d},[x7],#16
660 umlal v11.2d,v28.2s,v0.s[0]
661 ld1 {v10.2d},[x6]
662 umlal v12.2d,v28.2s,v0.s[1]
663 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+5]
664 umlal v13.2d,v28.2s,v0.s[2]
668 umlal v6.2d,v28.2s,v0.s[3]
669 umlal v7.2d,v28.2s,v1.s[0]
670 umlal v8.2d,v28.2s,v1.s[1]
671 umlal v9.2d,v28.2s,v1.s[2]
672 umlal v10.2d,v28.2s,v1.s[3]
673 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+6]
674 umlal v11.2d,v29.2s,v2.s[0]
675 umlal v12.2d,v29.2s,v2.s[1]
676 umlal v13.2d,v29.2s,v2.s[2]
677 umlal v6.2d,v29.2s,v2.s[3]
678 umlal v7.2d,v29.2s,v3.s[0]
679 umlal v8.2d,v29.2s,v3.s[1]
680 umlal v9.2d,v29.2s,v3.s[2]
681 umlal v10.2d,v29.2s,v3.s[3]
682 st1 {v11.2d},[x7],#16
683 umlal v12.2d,v28.2s,v0.s[0]
684 ld1 {v11.2d},[x6]
685 umlal v13.2d,v28.2s,v0.s[1]
686 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+6]
687 umlal v6.2d,v28.2s,v0.s[2]
691 umlal v7.2d,v28.2s,v0.s[3]
692 umlal v8.2d,v28.2s,v1.s[0]
693 umlal v9.2d,v28.2s,v1.s[1]
694 umlal v10.2d,v28.2s,v1.s[2]
695 umlal v11.2d,v28.2s,v1.s[3]
696 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+7]
697 umlal v12.2d,v29.2s,v2.s[0]
698 umlal v13.2d,v29.2s,v2.s[1]
699 umlal v6.2d,v29.2s,v2.s[2]
700 umlal v7.2d,v29.2s,v2.s[3]
701 umlal v8.2d,v29.2s,v3.s[0]
702 umlal v9.2d,v29.2s,v3.s[1]
703 umlal v10.2d,v29.2s,v3.s[2]
704 umlal v11.2d,v29.2s,v3.s[3]
705 st1 {v12.2d},[x7],#16
706 umlal v13.2d,v28.2s,v0.s[0]
707 ld1 {v12.2d},[x6]
708 umlal v6.2d,v28.2s,v0.s[1]
709 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+7]
710 umlal v7.2d,v28.2s,v0.s[2]
714 umlal v8.2d,v28.2s,v0.s[3]
715 umlal v9.2d,v28.2s,v1.s[0]
716 umlal v10.2d,v28.2s,v1.s[1]
717 umlal v11.2d,v28.2s,v1.s[2]
718 umlal v12.2d,v28.2s,v1.s[3]
720 sub x1,x1,x5,lsl#2 // rewind
722 umlal v13.2d,v29.2s,v2.s[0]
723 ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
724 umlal v6.2d,v29.2s,v2.s[1]
726 umlal v7.2d,v29.2s,v2.s[2]
728 umlal v8.2d,v29.2s,v2.s[3]
729 umlal v9.2d,v29.2s,v3.s[0]
730 umlal v10.2d,v29.2s,v3.s[1]
731 umlal v11.2d,v29.2s,v3.s[2]
732 st1 {v13.2d},[x7],#16
733 umlal v12.2d,v29.2s,v3.s[3]
737 st1 {v6.2d,v7.2d},[x7],#32
739 st1 {v8.2d,v9.2d},[x7],#32
741 st1 {v10.2d,v11.2d},[x7],#32
742 st1 {v12.2d},[x7]
745 ld1 {v6.2d,v7.2d},[x6],#32
746 ld1 {v8.2d,v9.2d},[x6],#32
747 ld1 {v10.2d,v11.2d},[x6],#32
748 ld1 {v12.2d,v13.2d},[x6],#32
751 sub x3,x3,x5,lsl#2 // rewind
756 st1 {v2.2d,v3.2d}, [sp],#32 // start wiping stack frame
758 ushr v15.2d,v6.2d,#16
760 st1 {v2.2d,v3.2d}, [sp],#32
761 add v6.2d,v6.2d,v15.2d
762 st1 {v2.2d,v3.2d}, [sp],#32
763 ushr v15.2d,v6.2d,#16
764 st1 {v2.2d,v3.2d}, [sp],#32
766 ins v15.d[1],v14.d[0]
773 add v6.2d,v6.2d,v15.2d
775 ushr v15.2d,v6.2d,#16
777 ld1 {v8.2d,v9.2d}, [x6],#32
778 add v6.2d,v6.2d,v15.2d
779 ld1 {v10.2d,v11.2d}, [x6],#32
780 ushr v15.2d,v6.2d,#16
781 ld1 {v12.2d,v13.2d}, [x6],#32
783 ins v15.d[1],v14.d[0]
786 add v7.2d,v7.2d,v15.2d
788 ushr v15.2d,v7.2d,#16
791 add v7.2d,v7.2d,v15.2d
792 ushr v15.2d,v7.2d,#16
794 ins v15.d[1],v14.d[0]
795 add v8.2d,v8.2d,v15.2d
797 ushr v15.2d,v8.2d,#16
800 add v8.2d,v8.2d,v15.2d
801 ushr v15.2d,v8.2d,#16
803 ins v15.d[1],v14.d[0]
804 add v9.2d,v9.2d,v15.2d
806 ushr v15.2d,v9.2d,#16
809 add v9.2d,v9.2d,v15.2d
810 ushr v15.2d,v9.2d,#16
812 ins v15.d[1],v14.d[0]
813 add v10.2d,v10.2d,v15.2d
815 ushr v15.2d,v10.2d,#16
818 add v10.2d,v10.2d,v15.2d
819 ushr v15.2d,v10.2d,#16
821 ins v15.d[1],v14.d[0]
822 add v11.2d,v11.2d,v15.2d
824 ushr v15.2d,v11.2d,#16
827 add v11.2d,v11.2d,v15.2d
828 ushr v15.2d,v11.2d,#16
830 ins v15.d[1],v14.d[0]
831 add v12.2d,v12.2d,v15.2d
833 ushr v15.2d,v12.2d,#16
836 add v12.2d,v12.2d,v15.2d
837 ushr v15.2d,v12.2d,#16
839 ins v15.d[1],v14.d[0]
840 add v13.2d,v13.2d,v15.2d
842 ushr v15.2d,v13.2d,#16
845 add v13.2d,v13.2d,v15.2d
846 ushr v15.2d,v13.2d,#16
848 ins v15.d[1],v14.d[0]
849 ld1 {v6.2d,v7.2d}, [x6],#32
855 sub x3,x3,x5,lsl#2 // rewind x3
857 add x2,sp,x5,lsl#2
895 st1 {v0.2d,v1.2d}, [x3],#32 // wipe
896 st1 {v0.2d,v1.2d}, [x3],#32 // wipe
911 st1 {v0.2d,v1.2d}, [x1],#32 // wipe
912 st1 {v0.2d,v1.2d}, [x3],#32 // wipe
946 ldp x8,x9,[x1,#8*2]
960 stp xzr,xzr,[x2,#8*2]
988 // a[2]a[0]
994 // a[2]a[1] (ii)
1000 // a[3]a[2] (iii)
1001 // a[4]a[2]
1002 // a[5]a[2]
1003 // a[6]a[2]
1004 // a[7]a[2]
1034 stp x19,x20,[x2],#8*2 // t[0..1]
1036 adds x21,x21,x17 // t[2]+lo(a[1]*a[0])
1043 mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii)
1056 umulh x14,x8,x7 // hi(a[2..7]*a[1])
1063 stp x21,x22,[x2],#8*2 // t[2..3]
1070 mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii)
1081 umulh x17,x9,x8 // hi(a[3..7]*a[2])
1088 stp x23,x24,[x2],#8*2 // t[4..5]
1109 stp x25,x26,[x2],#8*2 // t[6..7]
1152 ldp x8,x9,[x2,#8*2]
1160 ldp x8,x9,[x1,#8*2]
1177 // a[d]a[0]
1182 // a[8]a[2]
1183 // a[f]a[2]........................
1240 ldp x8,x9,[x2,#8*2]
1249 ldp x8,x9,[x1,#8*2]
1265 ldp x8,x9,[x0,#8*2]
1274 stp x21,x22,[x2,#8*2]
1275 ldp x21,x22,[x15,#8*2]
1285 // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
1288 ldp x11,x13,[x14,#8*2]
1294 stp x21,x22,[x2,#8*2]
1312 ldp x7,x9,[x1],#8*2
1320 stp x21,x22,[x2,#8*2]
1329 ldp x11,x13,[x1],#8*2
1354 stp x21,x22,[x2,#8*2]
1364 ldp x8,x9,[x1,#8*2]
1372 ldp x21,x22,[sp,#8*2]
1423 ldp x16,x17,[x2,#8*2]
1441 ldp x8,x9,[x1,#8*2]
1492 ldp x8,x9,[x2,#8*2]
1503 ldp x8,x9,[x1,#8*2]
1527 ldp x8,x9,[x16,#8*2]
1538 stp x21,x22,[x2,#8*2]
1539 ldp x21,x22,[x0,#8*2]
1566 ldp x8,x9,[x1,#8*2]
1568 stp x16,x17,[x0,#8*2]
1576 ldp x21,x22,[x2,#8*2]
1594 ldp x8,x9,[x3,#8*2]
1596 stp x16,x17,[x0,#8*2]
1600 ldp x21,x22,[x1,#8*2]
1615 stp xzr,xzr,[x2,#8*2]
1622 stp x16,x17,[x3,#8*2]
1625 stp xzr,xzr,[x1,#8*2]
1631 stp xzr,xzr,[x2,#8*2]
1635 stp x16,x17,[x3,#8*2]
1649 stp xzr,xzr,[sp,#8*2]
1671 stp x8,x9,[x1,#8*2]
1714 ldp x8,x9,[x1,#8*2]
1721 ldp x16,x17,[x3,#8*2]
1777 ldp x8,x9,[x1,#8*2]
1781 ldp x16,x17,[x3,#8*2]
1834 ldp x8,x9,[x1,#8*2]
1837 ldp x16,x17,[x3,#8*2]
1847 ldp x8,x9,[x11,#8*2]
1852 stp x21,x22,[x26,#8*2] // result!!!
1857 ldp x16,x17,[x3,#8*2]
1912 ldp x8,x9,[x1,#8*2]
1922 ldp x16,x17,[x3,#8*2]
1979 ldp x8,x9,[x1,#8*2]
1987 ldp x16,x17,[x3,#8*2]
2003 stp x21,x22,[x26,#8*2] // result!!!
2007 ldp x16,x17,[x11,#8*2]
2013 ldp x8,x9,[x1,#8*2]
2038 ldp x16,x17,[x3,#8*2]
2040 ldp x21,x22,[x26,#8*2]
2044 stp x12,x13,[x0,#8*2]
2055 ldp x8,x9,[x27,#8*2]
2056 stp x12,x13,[x0,#8*2]
2058 ldp x21,x22,[x1,#8*2]
2071 stp xzr,xzr,[x26,#8*2]
2078 stp x12,x13,[x27,#8*2]
2085 stp xzr,xzr,[x26,#8*2]
2091 stp x12,x13,[x27,#8*2]
2105 stp xzr,xzr,[sp,#8*2]
2117 stp x8,x9,[x1,#8*2]
2133 .align 2