Lines Matching +full:2 +full:d

49 	sub	x21,x5,#16		// j=num-2
117 sub x21,x5,#16 // j=num-2
261 st1 {v6.2d,v7.2d},[x7],#32
263 st1 {v8.2d,v9.2d},[x7],#32
264 st1 {v10.2d,v11.2d},[x7],#32
265 st1 {v12.2d,v13.2d},[x7],#32
282 umlal v6.2d,v28.2s,v0.s[0]
283 umlal v7.2d,v28.2s,v0.s[1]
284 umlal v8.2d,v28.2s,v0.s[2]
285 shl v29.2d,v6.2d,#16
287 umlal v9.2d,v28.2s,v0.s[3]
288 add v29.2d,v29.2d,v6.2d
289 umlal v10.2d,v28.2s,v1.s[0]
290 mul v29.2s,v29.2s,v30.2s
291 umlal v11.2d,v28.2s,v1.s[1]
292 st1 {v28.2s},[sp] // put aside smashed b[8*i+0]
293 umlal v12.2d,v28.2s,v1.s[2]
295 umlal v13.2d,v28.2s,v1.s[3]
297 umlal v6.2d,v29.2s,v2.s[0]
298 umlal v7.2d,v29.2s,v2.s[1]
300 umlal v8.2d,v29.2s,v2.s[2]
301 ushr v15.2d,v6.2d,#16
302 umlal v9.2d,v29.2s,v2.s[3]
303 umlal v10.2d,v29.2s,v3.s[0]
305 add v6.2d,v6.2d,v15.2d
306 umlal v11.2d,v29.2s,v3.s[1]
307 ushr v6.2d,v6.2d,#16
308 umlal v12.2d,v29.2s,v3.s[2]
309 umlal v13.2d,v29.2s,v3.s[3]
310 add v16.2d,v7.2d,v6.2d
311 ins v7.d[0],v16.d[0]
312 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+0]
313 umlal v7.2d,v28.2s,v0.s[0]
314 ld1 {v6.2d},[x6],#16
315 umlal v8.2d,v28.2s,v0.s[1]
316 umlal v9.2d,v28.2s,v0.s[2]
317 shl v29.2d,v7.2d,#16
319 umlal v10.2d,v28.2s,v0.s[3]
320 add v29.2d,v29.2d,v7.2d
321 umlal v11.2d,v28.2s,v1.s[0]
322 mul v29.2s,v29.2s,v30.2s
323 umlal v12.2d,v28.2s,v1.s[1]
324 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+1]
325 umlal v13.2d,v28.2s,v1.s[2]
327 umlal v6.2d,v28.2s,v1.s[3]
329 umlal v7.2d,v29.2s,v2.s[0]
330 umlal v8.2d,v29.2s,v2.s[1]
332 umlal v9.2d,v29.2s,v2.s[2]
333 ushr v15.2d,v7.2d,#16
334 umlal v10.2d,v29.2s,v2.s[3]
335 umlal v11.2d,v29.2s,v3.s[0]
337 add v7.2d,v7.2d,v15.2d
338 umlal v12.2d,v29.2s,v3.s[1]
339 ushr v7.2d,v7.2d,#16
340 umlal v13.2d,v29.2s,v3.s[2]
341 umlal v6.2d,v29.2s,v3.s[3]
342 add v16.2d,v8.2d,v7.2d
343 ins v8.d[0],v16.d[0]
344 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+1]
345 umlal v8.2d,v28.2s,v0.s[0]
346 ld1 {v7.2d},[x6],#16
347 umlal v9.2d,v28.2s,v0.s[1]
348 umlal v10.2d,v28.2s,v0.s[2]
349 shl v29.2d,v8.2d,#16
351 umlal v11.2d,v28.2s,v0.s[3]
352 add v29.2d,v29.2d,v8.2d
353 umlal v12.2d,v28.2s,v1.s[0]
354 mul v29.2s,v29.2s,v30.2s
355 umlal v13.2d,v28.2s,v1.s[1]
356 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+2]
357 umlal v6.2d,v28.2s,v1.s[2]
359 umlal v7.2d,v28.2s,v1.s[3]
361 umlal v8.2d,v29.2s,v2.s[0]
362 umlal v9.2d,v29.2s,v2.s[1]
364 umlal v10.2d,v29.2s,v2.s[2]
365 ushr v15.2d,v8.2d,#16
366 umlal v11.2d,v29.2s,v2.s[3]
367 umlal v12.2d,v29.2s,v3.s[0]
369 add v8.2d,v8.2d,v15.2d
370 umlal v13.2d,v29.2s,v3.s[1]
371 ushr v8.2d,v8.2d,#16
372 umlal v6.2d,v29.2s,v3.s[2]
373 umlal v7.2d,v29.2s,v3.s[3]
374 add v16.2d,v9.2d,v8.2d
375 ins v9.d[0],v16.d[0]
376 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+2]
377 umlal v9.2d,v28.2s,v0.s[0]
378 ld1 {v8.2d},[x6],#16
379 umlal v10.2d,v28.2s,v0.s[1]
380 umlal v11.2d,v28.2s,v0.s[2]
381 shl v29.2d,v9.2d,#16
383 umlal v12.2d,v28.2s,v0.s[3]
384 add v29.2d,v29.2d,v9.2d
385 umlal v13.2d,v28.2s,v1.s[0]
386 mul v29.2s,v29.2s,v30.2s
387 umlal v6.2d,v28.2s,v1.s[1]
388 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+3]
389 umlal v7.2d,v28.2s,v1.s[2]
391 umlal v8.2d,v28.2s,v1.s[3]
393 umlal v9.2d,v29.2s,v2.s[0]
394 umlal v10.2d,v29.2s,v2.s[1]
396 umlal v11.2d,v29.2s,v2.s[2]
397 ushr v15.2d,v9.2d,#16
398 umlal v12.2d,v29.2s,v2.s[3]
399 umlal v13.2d,v29.2s,v3.s[0]
401 add v9.2d,v9.2d,v15.2d
402 umlal v6.2d,v29.2s,v3.s[1]
403 ushr v9.2d,v9.2d,#16
404 umlal v7.2d,v29.2s,v3.s[2]
405 umlal v8.2d,v29.2s,v3.s[3]
406 add v16.2d,v10.2d,v9.2d
407 ins v10.d[0],v16.d[0]
408 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+3]
409 umlal v10.2d,v28.2s,v0.s[0]
410 ld1 {v9.2d},[x6],#16
411 umlal v11.2d,v28.2s,v0.s[1]
412 umlal v12.2d,v28.2s,v0.s[2]
413 shl v29.2d,v10.2d,#16
415 umlal v13.2d,v28.2s,v0.s[3]
416 add v29.2d,v29.2d,v10.2d
417 umlal v6.2d,v28.2s,v1.s[0]
418 mul v29.2s,v29.2s,v30.2s
419 umlal v7.2d,v28.2s,v1.s[1]
420 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+4]
421 umlal v8.2d,v28.2s,v1.s[2]
423 umlal v9.2d,v28.2s,v1.s[3]
425 umlal v10.2d,v29.2s,v2.s[0]
426 umlal v11.2d,v29.2s,v2.s[1]
428 umlal v12.2d,v29.2s,v2.s[2]
429 ushr v15.2d,v10.2d,#16
430 umlal v13.2d,v29.2s,v2.s[3]
431 umlal v6.2d,v29.2s,v3.s[0]
433 add v10.2d,v10.2d,v15.2d
434 umlal v7.2d,v29.2s,v3.s[1]
435 ushr v10.2d,v10.2d,#16
436 umlal v8.2d,v29.2s,v3.s[2]
437 umlal v9.2d,v29.2s,v3.s[3]
438 add v16.2d,v11.2d,v10.2d
439 ins v11.d[0],v16.d[0]
440 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+4]
441 umlal v11.2d,v28.2s,v0.s[0]
442 ld1 {v10.2d},[x6],#16
443 umlal v12.2d,v28.2s,v0.s[1]
444 umlal v13.2d,v28.2s,v0.s[2]
445 shl v29.2d,v11.2d,#16
447 umlal v6.2d,v28.2s,v0.s[3]
448 add v29.2d,v29.2d,v11.2d
449 umlal v7.2d,v28.2s,v1.s[0]
450 mul v29.2s,v29.2s,v30.2s
451 umlal v8.2d,v28.2s,v1.s[1]
452 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+5]
453 umlal v9.2d,v28.2s,v1.s[2]
455 umlal v10.2d,v28.2s,v1.s[3]
457 umlal v11.2d,v29.2s,v2.s[0]
458 umlal v12.2d,v29.2s,v2.s[1]
460 umlal v13.2d,v29.2s,v2.s[2]
461 ushr v15.2d,v11.2d,#16
462 umlal v6.2d,v29.2s,v2.s[3]
463 umlal v7.2d,v29.2s,v3.s[0]
465 add v11.2d,v11.2d,v15.2d
466 umlal v8.2d,v29.2s,v3.s[1]
467 ushr v11.2d,v11.2d,#16
468 umlal v9.2d,v29.2s,v3.s[2]
469 umlal v10.2d,v29.2s,v3.s[3]
470 add v16.2d,v12.2d,v11.2d
471 ins v12.d[0],v16.d[0]
472 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+5]
473 umlal v12.2d,v28.2s,v0.s[0]
474 ld1 {v11.2d},[x6],#16
475 umlal v13.2d,v28.2s,v0.s[1]
476 umlal v6.2d,v28.2s,v0.s[2]
477 shl v29.2d,v12.2d,#16
479 umlal v7.2d,v28.2s,v0.s[3]
480 add v29.2d,v29.2d,v12.2d
481 umlal v8.2d,v28.2s,v1.s[0]
482 mul v29.2s,v29.2s,v30.2s
483 umlal v9.2d,v28.2s,v1.s[1]
484 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+6]
485 umlal v10.2d,v28.2s,v1.s[2]
487 umlal v11.2d,v28.2s,v1.s[3]
489 umlal v12.2d,v29.2s,v2.s[0]
490 umlal v13.2d,v29.2s,v2.s[1]
492 umlal v6.2d,v29.2s,v2.s[2]
493 ushr v15.2d,v12.2d,#16
494 umlal v7.2d,v29.2s,v2.s[3]
495 umlal v8.2d,v29.2s,v3.s[0]
497 add v12.2d,v12.2d,v15.2d
498 umlal v9.2d,v29.2s,v3.s[1]
499 ushr v12.2d,v12.2d,#16
500 umlal v10.2d,v29.2s,v3.s[2]
501 umlal v11.2d,v29.2s,v3.s[3]
502 add v16.2d,v13.2d,v12.2d
503 ins v13.d[0],v16.d[0]
504 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+6]
505 umlal v13.2d,v28.2s,v0.s[0]
506 ld1 {v12.2d},[x6],#16
507 umlal v6.2d,v28.2s,v0.s[1]
508 umlal v7.2d,v28.2s,v0.s[2]
509 shl v29.2d,v13.2d,#16
511 umlal v8.2d,v28.2s,v0.s[3]
512 add v29.2d,v29.2d,v13.2d
513 umlal v9.2d,v28.2s,v1.s[0]
514 mul v29.2s,v29.2s,v30.2s
515 umlal v10.2d,v28.2s,v1.s[1]
516 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+7]
517 umlal v11.2d,v28.2s,v1.s[2]
519 umlal v12.2d,v28.2s,v1.s[3]
520 ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
521 umlal v13.2d,v29.2s,v2.s[0]
523 umlal v6.2d,v29.2s,v2.s[1]
524 umlal v7.2d,v29.2s,v2.s[2]
526 ushr v5.2d,v5.2d,#16
528 umlal v8.2d,v29.2s,v2.s[3]
529 umlal v9.2d,v29.2s,v3.s[0]
530 add v13.2d,v13.2d,v5.2d
531 umlal v10.2d,v29.2s,v3.s[1]
532 ushr v13.2d,v13.2d,#16
534 ins v13.d[1],v15.d[0]
535 umlal v11.2d,v29.2s,v3.s[2]
536 umlal v12.2d,v29.2s,v3.s[3]
537 add v6.2d,v6.2d,v13.2d
538 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+7]
546 umlal v6.2d,v28.2s,v0.s[0]
547 ld1 {v13.2d},[x6]
548 umlal v7.2d,v28.2s,v0.s[1]
549 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+0]
550 umlal v8.2d,v28.2s,v0.s[2]
552 umlal v9.2d,v28.2s,v0.s[3]
556 umlal v10.2d,v28.2s,v1.s[0]
557 umlal v11.2d,v28.2s,v1.s[1]
558 umlal v12.2d,v28.2s,v1.s[2]
559 umlal v13.2d,v28.2s,v1.s[3]
560 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+1]
561 umlal v6.2d,v29.2s,v2.s[0]
562 umlal v7.2d,v29.2s,v2.s[1]
563 umlal v8.2d,v29.2s,v2.s[2]
564 umlal v9.2d,v29.2s,v2.s[3]
565 umlal v10.2d,v29.2s,v3.s[0]
566 umlal v11.2d,v29.2s,v3.s[1]
567 umlal v12.2d,v29.2s,v3.s[2]
568 umlal v13.2d,v29.2s,v3.s[3]
569 st1 {v6.2d},[x7],#16
570 umlal v7.2d,v28.2s,v0.s[0]
571 ld1 {v6.2d},[x6]
572 umlal v8.2d,v28.2s,v0.s[1]
573 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+1]
574 umlal v9.2d,v28.2s,v0.s[2]
578 umlal v10.2d,v28.2s,v0.s[3]
579 umlal v11.2d,v28.2s,v1.s[0]
580 umlal v12.2d,v28.2s,v1.s[1]
581 umlal v13.2d,v28.2s,v1.s[2]
582 umlal v6.2d,v28.2s,v1.s[3]
583 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+2]
584 umlal v7.2d,v29.2s,v2.s[0]
585 umlal v8.2d,v29.2s,v2.s[1]
586 umlal v9.2d,v29.2s,v2.s[2]
587 umlal v10.2d,v29.2s,v2.s[3]
588 umlal v11.2d,v29.2s,v3.s[0]
589 umlal v12.2d,v29.2s,v3.s[1]
590 umlal v13.2d,v29.2s,v3.s[2]
591 umlal v6.2d,v29.2s,v3.s[3]
592 st1 {v7.2d},[x7],#16
593 umlal v8.2d,v28.2s,v0.s[0]
594 ld1 {v7.2d},[x6]
595 umlal v9.2d,v28.2s,v0.s[1]
596 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+2]
597 umlal v10.2d,v28.2s,v0.s[2]
601 umlal v11.2d,v28.2s,v0.s[3]
602 umlal v12.2d,v28.2s,v1.s[0]
603 umlal v13.2d,v28.2s,v1.s[1]
604 umlal v6.2d,v28.2s,v1.s[2]
605 umlal v7.2d,v28.2s,v1.s[3]
606 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+3]
607 umlal v8.2d,v29.2s,v2.s[0]
608 umlal v9.2d,v29.2s,v2.s[1]
609 umlal v10.2d,v29.2s,v2.s[2]
610 umlal v11.2d,v29.2s,v2.s[3]
611 umlal v12.2d,v29.2s,v3.s[0]
612 umlal v13.2d,v29.2s,v3.s[1]
613 umlal v6.2d,v29.2s,v3.s[2]
614 umlal v7.2d,v29.2s,v3.s[3]
615 st1 {v8.2d},[x7],#16
616 umlal v9.2d,v28.2s,v0.s[0]
617 ld1 {v8.2d},[x6]
618 umlal v10.2d,v28.2s,v0.s[1]
619 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+3]
620 umlal v11.2d,v28.2s,v0.s[2]
624 umlal v12.2d,v28.2s,v0.s[3]
625 umlal v13.2d,v28.2s,v1.s[0]
626 umlal v6.2d,v28.2s,v1.s[1]
627 umlal v7.2d,v28.2s,v1.s[2]
628 umlal v8.2d,v28.2s,v1.s[3]
629 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+4]
630 umlal v9.2d,v29.2s,v2.s[0]
631 umlal v10.2d,v29.2s,v2.s[1]
632 umlal v11.2d,v29.2s,v2.s[2]
633 umlal v12.2d,v29.2s,v2.s[3]
634 umlal v13.2d,v29.2s,v3.s[0]
635 umlal v6.2d,v29.2s,v3.s[1]
636 umlal v7.2d,v29.2s,v3.s[2]
637 umlal v8.2d,v29.2s,v3.s[3]
638 st1 {v9.2d},[x7],#16
639 umlal v10.2d,v28.2s,v0.s[0]
640 ld1 {v9.2d},[x6]
641 umlal v11.2d,v28.2s,v0.s[1]
642 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+4]
643 umlal v12.2d,v28.2s,v0.s[2]
647 umlal v13.2d,v28.2s,v0.s[3]
648 umlal v6.2d,v28.2s,v1.s[0]
649 umlal v7.2d,v28.2s,v1.s[1]
650 umlal v8.2d,v28.2s,v1.s[2]
651 umlal v9.2d,v28.2s,v1.s[3]
652 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+5]
653 umlal v10.2d,v29.2s,v2.s[0]
654 umlal v11.2d,v29.2s,v2.s[1]
655 umlal v12.2d,v29.2s,v2.s[2]
656 umlal v13.2d,v29.2s,v2.s[3]
657 umlal v6.2d,v29.2s,v3.s[0]
658 umlal v7.2d,v29.2s,v3.s[1]
659 umlal v8.2d,v29.2s,v3.s[2]
660 umlal v9.2d,v29.2s,v3.s[3]
661 st1 {v10.2d},[x7],#16
662 umlal v11.2d,v28.2s,v0.s[0]
663 ld1 {v10.2d},[x6]
664 umlal v12.2d,v28.2s,v0.s[1]
665 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+5]
666 umlal v13.2d,v28.2s,v0.s[2]
670 umlal v6.2d,v28.2s,v0.s[3]
671 umlal v7.2d,v28.2s,v1.s[0]
672 umlal v8.2d,v28.2s,v1.s[1]
673 umlal v9.2d,v28.2s,v1.s[2]
674 umlal v10.2d,v28.2s,v1.s[3]
675 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+6]
676 umlal v11.2d,v29.2s,v2.s[0]
677 umlal v12.2d,v29.2s,v2.s[1]
678 umlal v13.2d,v29.2s,v2.s[2]
679 umlal v6.2d,v29.2s,v2.s[3]
680 umlal v7.2d,v29.2s,v3.s[0]
681 umlal v8.2d,v29.2s,v3.s[1]
682 umlal v9.2d,v29.2s,v3.s[2]
683 umlal v10.2d,v29.2s,v3.s[3]
684 st1 {v11.2d},[x7],#16
685 umlal v12.2d,v28.2s,v0.s[0]
686 ld1 {v11.2d},[x6]
687 umlal v13.2d,v28.2s,v0.s[1]
688 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+6]
689 umlal v6.2d,v28.2s,v0.s[2]
693 umlal v7.2d,v28.2s,v0.s[3]
694 umlal v8.2d,v28.2s,v1.s[0]
695 umlal v9.2d,v28.2s,v1.s[1]
696 umlal v10.2d,v28.2s,v1.s[2]
697 umlal v11.2d,v28.2s,v1.s[3]
698 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+7]
699 umlal v12.2d,v29.2s,v2.s[0]
700 umlal v13.2d,v29.2s,v2.s[1]
701 umlal v6.2d,v29.2s,v2.s[2]
702 umlal v7.2d,v29.2s,v2.s[3]
703 umlal v8.2d,v29.2s,v3.s[0]
704 umlal v9.2d,v29.2s,v3.s[1]
705 umlal v10.2d,v29.2s,v3.s[2]
706 umlal v11.2d,v29.2s,v3.s[3]
707 st1 {v12.2d},[x7],#16
708 umlal v13.2d,v28.2s,v0.s[0]
709 ld1 {v12.2d},[x6]
710 umlal v6.2d,v28.2s,v0.s[1]
711 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+7]
712 umlal v7.2d,v28.2s,v0.s[2]
716 umlal v8.2d,v28.2s,v0.s[3]
717 umlal v9.2d,v28.2s,v1.s[0]
718 umlal v10.2d,v28.2s,v1.s[1]
719 umlal v11.2d,v28.2s,v1.s[2]
720 umlal v12.2d,v28.2s,v1.s[3]
722 sub x1,x1,x5,lsl#2 // rewind
724 umlal v13.2d,v29.2s,v2.s[0]
725 ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
726 umlal v6.2d,v29.2s,v2.s[1]
728 umlal v7.2d,v29.2s,v2.s[2]
730 umlal v8.2d,v29.2s,v2.s[3]
731 umlal v9.2d,v29.2s,v3.s[0]
732 umlal v10.2d,v29.2s,v3.s[1]
733 umlal v11.2d,v29.2s,v3.s[2]
734 st1 {v13.2d},[x7],#16
735 umlal v12.2d,v29.2s,v3.s[3]
739 st1 {v6.2d,v7.2d},[x7],#32
741 st1 {v8.2d,v9.2d},[x7],#32
743 st1 {v10.2d,v11.2d},[x7],#32
744 st1 {v12.2d},[x7]
747 ld1 {v6.2d,v7.2d},[x6],#32
748 ld1 {v8.2d,v9.2d},[x6],#32
749 ld1 {v10.2d,v11.2d},[x6],#32
750 ld1 {v12.2d,v13.2d},[x6],#32
753 sub x3,x3,x5,lsl#2 // rewind
758 st1 {v2.2d,v3.2d}, [sp],#32 // start wiping stack frame
760 ushr v15.2d,v6.2d,#16
762 st1 {v2.2d,v3.2d}, [sp],#32
763 add v6.2d,v6.2d,v15.2d
764 st1 {v2.2d,v3.2d}, [sp],#32
765 ushr v15.2d,v6.2d,#16
766 st1 {v2.2d,v3.2d}, [sp],#32
768 ins v15.d[1],v14.d[0]
775 add v6.2d,v6.2d,v15.2d
777 ushr v15.2d,v6.2d,#16
779 ld1 {v8.2d,v9.2d}, [x6],#32
780 add v6.2d,v6.2d,v15.2d
781 ld1 {v10.2d,v11.2d}, [x6],#32
782 ushr v15.2d,v6.2d,#16
783 ld1 {v12.2d,v13.2d}, [x6],#32
785 ins v15.d[1],v14.d[0]
788 add v7.2d,v7.2d,v15.2d
790 ushr v15.2d,v7.2d,#16
793 add v7.2d,v7.2d,v15.2d
794 ushr v15.2d,v7.2d,#16
796 ins v15.d[1],v14.d[0]
797 add v8.2d,v8.2d,v15.2d
799 ushr v15.2d,v8.2d,#16
802 add v8.2d,v8.2d,v15.2d
803 ushr v15.2d,v8.2d,#16
805 ins v15.d[1],v14.d[0]
806 add v9.2d,v9.2d,v15.2d
808 ushr v15.2d,v9.2d,#16
811 add v9.2d,v9.2d,v15.2d
812 ushr v15.2d,v9.2d,#16
814 ins v15.d[1],v14.d[0]
815 add v10.2d,v10.2d,v15.2d
817 ushr v15.2d,v10.2d,#16
820 add v10.2d,v10.2d,v15.2d
821 ushr v15.2d,v10.2d,#16
823 ins v15.d[1],v14.d[0]
824 add v11.2d,v11.2d,v15.2d
826 ushr v15.2d,v11.2d,#16
829 add v11.2d,v11.2d,v15.2d
830 ushr v15.2d,v11.2d,#16
832 ins v15.d[1],v14.d[0]
833 add v12.2d,v12.2d,v15.2d
835 ushr v15.2d,v12.2d,#16
838 add v12.2d,v12.2d,v15.2d
839 ushr v15.2d,v12.2d,#16
841 ins v15.d[1],v14.d[0]
842 add v13.2d,v13.2d,v15.2d
844 ushr v15.2d,v13.2d,#16
847 add v13.2d,v13.2d,v15.2d
848 ushr v15.2d,v13.2d,#16
850 ins v15.d[1],v14.d[0]
851 ld1 {v6.2d,v7.2d}, [x6],#32
857 sub x3,x3,x5,lsl#2 // rewind x3
859 add x2,sp,x5,lsl#2
897 st1 {v0.2d,v1.2d}, [x3],#32 // wipe
898 st1 {v0.2d,v1.2d}, [x3],#32 // wipe
913 st1 {v0.2d,v1.2d}, [x1],#32 // wipe
914 st1 {v0.2d,v1.2d}, [x3],#32 // wipe
948 ldp x8,x9,[x1,#8*2]
962 stp xzr,xzr,[x2,#8*2]
990 // a[2]a[0]
996 // a[2]a[1] (ii)
1002 // a[3]a[2] (iii)
1003 // a[4]a[2]
1004 // a[5]a[2]
1005 // a[6]a[2]
1006 // a[7]a[2]
1036 stp x19,x20,[x2],#8*2 // t[0..1]
1038 adds x21,x21,x17 // t[2]+lo(a[1]*a[0])
1045 mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii)
1058 umulh x14,x8,x7 // hi(a[2..7]*a[1])
1065 stp x21,x22,[x2],#8*2 // t[2..3]
1072 mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii)
1083 umulh x17,x9,x8 // hi(a[3..7]*a[2])
1090 stp x23,x24,[x2],#8*2 // t[4..5]
1111 stp x25,x26,[x2],#8*2 // t[6..7]
1154 ldp x8,x9,[x2,#8*2]
1162 ldp x8,x9,[x1,#8*2]
1179 // a[d]a[0]
1184 // a[8]a[2]
1185 // a[f]a[2]........................
1242 ldp x8,x9,[x2,#8*2]
1251 ldp x8,x9,[x1,#8*2]
1267 ldp x8,x9,[x0,#8*2]
1276 stp x21,x22,[x2,#8*2]
1277 ldp x21,x22,[x15,#8*2]
1287 // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
1290 ldp x11,x13,[x14,#8*2]
1296 stp x21,x22,[x2,#8*2]
1314 ldp x7,x9,[x1],#8*2
1322 stp x21,x22,[x2,#8*2]
1331 ldp x11,x13,[x1],#8*2
1356 stp x21,x22,[x2,#8*2]
1366 ldp x8,x9,[x1,#8*2]
1374 ldp x21,x22,[sp,#8*2]
1425 ldp x16,x17,[x2,#8*2]
1443 ldp x8,x9,[x1,#8*2]
1494 ldp x8,x9,[x2,#8*2]
1505 ldp x8,x9,[x1,#8*2]
1529 ldp x8,x9,[x16,#8*2]
1540 stp x21,x22,[x2,#8*2]
1541 ldp x21,x22,[x0,#8*2]
1568 ldp x8,x9,[x1,#8*2]
1570 stp x16,x17,[x0,#8*2]
1578 ldp x21,x22,[x2,#8*2]
1596 ldp x8,x9,[x3,#8*2]
1598 stp x16,x17,[x0,#8*2]
1602 ldp x21,x22,[x1,#8*2]
1617 stp xzr,xzr,[x2,#8*2]
1624 stp x16,x17,[x3,#8*2]
1627 stp xzr,xzr,[x1,#8*2]
1633 stp xzr,xzr,[x2,#8*2]
1637 stp x16,x17,[x3,#8*2]
1651 stp xzr,xzr,[sp,#8*2]
1673 stp x8,x9,[x1,#8*2]
1716 ldp x8,x9,[x1,#8*2]
1723 ldp x16,x17,[x3,#8*2]
1779 ldp x8,x9,[x1,#8*2]
1783 ldp x16,x17,[x3,#8*2]
1836 ldp x8,x9,[x1,#8*2]
1839 ldp x16,x17,[x3,#8*2]
1849 ldp x8,x9,[x11,#8*2]
1854 stp x21,x22,[x26,#8*2] // result!!!
1859 ldp x16,x17,[x3,#8*2]
1914 ldp x8,x9,[x1,#8*2]
1924 ldp x16,x17,[x3,#8*2]
1981 ldp x8,x9,[x1,#8*2]
1989 ldp x16,x17,[x3,#8*2]
2005 stp x21,x22,[x26,#8*2] // result!!!
2009 ldp x16,x17,[x11,#8*2]
2015 ldp x8,x9,[x1,#8*2]
2040 ldp x16,x17,[x3,#8*2]
2042 ldp x21,x22,[x26,#8*2]
2046 stp x12,x13,[x0,#8*2]
2057 ldp x8,x9,[x27,#8*2]
2058 stp x12,x13,[x0,#8*2]
2060 ldp x21,x22,[x1,#8*2]
2073 stp xzr,xzr,[x26,#8*2]
2080 stp x12,x13,[x27,#8*2]
2087 stp xzr,xzr,[x26,#8*2]
2093 stp x12,x13,[x27,#8*2]
2107 stp xzr,xzr,[sp,#8*2]
2119 stp x8,x9,[x1,#8*2]
2135 .align 2