/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" .section ".text",#alloc,#execinstr .file "mont_mulf_asm_v8plus.s" /* * This file is a result of compiling the mont_mulf.c file to generate an * assembly output and then hand-editing that output to replace the * compiler-generated loop for the 512-bit case (nlen == 16) in the * mont_mulf_noconv routine with a hand-crafted version. * To compile this: * * cc -c -xarch=v8plus -KPIC mont_mulf_asm.s * * Note, this file does not support sparcv9 (64-bit). */ .section ".rodata",#alloc .align 8 ! ! CONSTANT POOL ! TwoTo16: .word 1089470464 .word 0 .type TwoTo16,#object .size TwoTo16,8 ! ! CONSTANT POOL ! TwoToMinus16: .word 1055916032 .word 0 .type TwoToMinus16,#object .size TwoToMinus16,8 ! ! CONSTANT POOL ! Zero: .word 0 .word 0 .type Zero,#object .size Zero,8 ! ! CONSTANT POOL ! TwoTo32: .word 1106247680 .word 0 .type TwoTo32,#object .size TwoTo32,8 ! ! CONSTANT POOL ! TwoToMinus32: .word 1039138816 .word 0 .type TwoToMinus32,#object .size TwoToMinus32,8 .section ".text",#alloc,#execinstr /* 000000 0 */ .align 4 ! ! SUBROUTINE conv_d16_to_i32 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_d16_to_i32 conv_d16_to_i32: /* 000000 */ save %sp,-128,%sp ! FILE mont_mulf.c ! 1 !#define RF_INLINE_MACROS ! 3 !static const double TwoTo16=65536.0; ! 4 !static const double TwoToMinus16=1.0/65536.0; ! 5 !static const double Zero=0.0; ! 6 !static const double TwoTo32=65536.0*65536.0; ! 7 !static const double TwoToMinus32=1.0/(65536.0*65536.0); ! 9 !#ifdef RF_INLINE_MACROS ! 11 !double upper32(double); ! 12 !double lower32(double, double); ! 13 !double mod(double, double, double); ! 15 !#else ! 17 !static double upper32(double x) ! 18 !{ ! 19 ! return floor(x*TwoToMinus32); ! 20 !} ! 22 !static double lower32(double x, double y) ! 23 !{ ! 24 ! return x-TwoTo32*floor(x*TwoToMinus32); ! 25 !} ! 27 !static double mod(double x, double oneoverm, double m) ! 28 !{ ! 29 ! return x-m*floor(x*oneoverm); ! 30 !} ! 32 !#endif ! 35 !static void cleanup(double *dt, int from, int tlen) ! 36 !{ ! 37 ! int i; ! 38 ! double tmp,tmp1,x,x1; ! 40 ! tmp=tmp1=Zero; ! 41 ! /* original code ** ! 42 ! for(i=2*from;i<2*tlen-2;i++) ! 43 ! { ! 44 ! x=dt[i]; ! 45 ! dt[i]=lower32(x,Zero)+tmp1; ! 46 ! tmp1=tmp; ! 47 ! tmp=upper32(x); ! 48 ! } ! 49 ! dt[tlen-2]+=tmp1; ! 50 ! dt[tlen-1]+=tmp; ! 51 ! **end original code ***/ ! 52 ! /* new code ***/ ! 53 ! for(i=2*from;i<2*tlen;i+=2) ! 54 ! { ! 55 ! x=dt[i]; ! 56 ! x1=dt[i+1]; ! 57 ! dt[i]=lower32(x,Zero)+tmp; ! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1; ! 59 ! tmp=upper32(x); ! 60 ! tmp1=upper32(x1); ! 61 ! } ! 62 ! /** end new code **/ ! 63 !} ! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) ! 67 !{ ! 68 !int i; ! 69 !long long t, t1, a, b, c, d; ! 71 ! t1=0; ! 72 ! a=(long long)d16[0]; /* 0x0004 72 */ ldd [%i1],%f0 /* 0x0008 67 */ or %g0,%i1,%o0 ! 73 ! b=(long long)d16[1]; ! 74 ! for(i=0; i>32); ! 79 ! d=(long long)d16[2*i+3]; ! 80 ! t1+=(b&0xffff)<<16; ! 81 ! t+=(b>>16)+(t1>>32); ! 82 ! i32[i]=t1&0xffffffff; ! 83 ! t1=t; ! 84 ! a=c; ! 85 ! b=d; /* 0x0070 85 */ add %o0,16,%g2 /* 0x0074 80 */ and %g1,%o1,%o0 /* 0x0078 */ sllx %o0,16,%g3 /* 0x007c 77 */ and %g4,%o3,%o0 /* 0x0080 74 */ add %o0,%g3,%o4 /* 0x0084 76 */ fdtox %f0,%f0 /* 0x0088 */ std %f0,[%sp+104] /* 0x008c 82 */ and %o4,%o3,%g5 /* 0x0090 79 */ ldd [%g2+8],%f2 /* 0x0094 85 */ add %o5,4,%o5 /* 0x0098 81 */ srax %o4,32,%o4 /* 0x009c */ stx %o4,[%sp+112] /* 0x00a0 79 */ fdtox %f2,%f0 /* 0x00a4 */ std %f0,[%sp+96] /* 0x00a8 81 */ srax %g1,16,%o0 /* 0x00ac */ ldx [%sp+112],%o7 /* 0x00b0 78 */ srax %g4,32,%o4 /* 0x00b4 81 */ add %o0,%o7,%g4 /* 0x00b8 85 */ or %g0,1,%o7 /* 0x00bc 76 */ ldx [%sp+104],%g3 /* 0x00c0 81 */ add %o4,%g4,%o4 /* 0x00c4 79 */ ldx [%sp+96],%g1 /* 0x00c8 82 */ st %g5,[%o5-4] /* 0x00cc 84 */ or %g0,%g3,%g4 .L900000112: /* 0x00d0 76 */ ldd [%g2+16],%f0 /* 0x00d4 85 */ add %o7,1,%o7 /* 0x00d8 */ add %o5,4,%o5 /* 0x00dc */ cmp %o7,%o2 /* 0x00e0 */ add %g2,16,%g2 /* 0x00e4 76 */ fdtox %f0,%f0 /* 0x00e8 */ std %f0,[%sp+104] /* 0x00ec 79 */ ldd [%g2+8],%f0 /* 0x00f0 */ fdtox %f0,%f0 /* 0x00f4 */ std %f0,[%sp+96] /* 0x00f8 80 */ and %g1,%o1,%g3 /* 0x00fc */ sllx %g3,16,%g5 /* 0x0100 77 */ and %g4,%o3,%g3 /* 0x0104 74 */ add %g3,%g5,%g3 /* 0x0108 81 */ srax %g1,16,%g1 /* 0x010c 74 */ add %g3,%o4,%g3 /* 0x0110 81 */ srax %g3,32,%o4 /* 0x0114 */ stx %o4,[%sp+112] /* 0x0118 76 */ ldx [%sp+104],%g5 /* 0x011c 78 */ srax %g4,32,%o4 /* 0x0120 81 */ ldx [%sp+112],%g4 /* 0x0124 */ add %g1,%g4,%g4 /* 0x0128 79 */ ldx [%sp+96],%g1 /* 0x012c 81 */ add %o4,%g4,%o4 /* 0x0130 82 */ and %g3,%o3,%g3 /* 0x0134 84 */ or %g0,%g5,%g4 /* 0x0138 85 */ ble,pt %icc,.L900000112 /* 0x013c */ st %g3,[%o5-4] .L900000115: /* 0x0140 85 */ ba .L900000117 /* 0x0144 */ sethi %hi(0xfc00),%g2 .L77000134: /* 0x0148 76 */ ldd [%g2+16],%f0 .L900000116: /* 0x014c 77 */ and %g4,%o3,%o0 /* 0x0150 80 */ and %g1,%o1,%g3 /* 0x0154 76 */ fdtox %f0,%f0 /* 0x0158 77 */ add %o4,%o0,%o0 /* 0x015c 76 */ std %f0,[%sp+104] /* 0x0160 85 */ add %o7,1,%o7 /* 0x0164 80 */ sllx %g3,16,%o4 /* 0x0168 79 */ ldd [%g2+24],%f2 /* 0x016c 85 */ add %g2,16,%g2 /* 0x0170 80 */ add %o0,%o4,%o0 /* 0x0174 85 */ cmp %o7,%o2 /* 0x0178 82 */ and %o0,%o3,%g3 /* 0x017c 79 */ fdtox %f2,%f0 /* 0x0180 */ std %f0,[%sp+96] /* 0x0184 81 */ srax %o0,32,%o0 /* 0x0188 */ stx %o0,[%sp+112] /* 0x018c 78 */ srax %g4,32,%o4 /* 0x0190 79 */ ldx [%sp+96],%o0 /* 0x0194 81 */ srax %g1,16,%g5 /* 0x0198 */ ldx [%sp+112],%g4 /* 0x019c 76 */ ldx [%sp+104],%g1 /* 0x01a0 82 */ st %g3,[%o5] /* 0x01a4 81 */ add %g5,%g4,%g4 /* 0x01a8 85 */ add %o5,4,%o5 /* 0x01ac 81 */ add %o4,%g4,%o4 /* 0x01b0 84 */ or %g0,%g1,%g4 /* 0x01b4 85 */ or %g0,%o0,%g1 /* 0x01b8 */ ble,a,pt %icc,.L900000116 /* 0x01bc */ ldd [%g2+16],%f0 .L77000127: ! 86 ! } ! 87 ! t1+=a&0xffffffff; ! 88 ! t=(a>>32); ! 89 ! t1+=(b&0xffff)<<16; ! 90 ! i32[i]=t1&0xffffffff; /* 0x01c0 90 */ sethi %hi(0xfc00),%g2 .L900000117: /* 0x01c4 90 */ or %g0,-1,%g3 /* 0x01c8 */ add %g2,1023,%g2 /* 0x01cc */ srl %g3,0,%g3 /* 0x01d0 */ and %g1,%g2,%g2 /* 0x01d4 */ and %g4,%g3,%g4 /* 0x01d8 */ sllx %g2,16,%g2 /* 0x01dc */ add %o4,%g4,%g4 /* 0x01e0 */ add %g4,%g2,%g2 /* 0x01e4 */ sll %o7,2,%g4 /* 0x01e8 */ and %g2,%g3,%g2 /* 0x01ec */ st %g2,[%i0+%g4] /* 0x01f0 */ ret ! Result = /* 0x01f4 */ restore %g0,%g0,%g0 /* 0x01f8 0 */ .type conv_d16_to_i32,2 /* 0x01f8 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! .L_const_seg_900000201: /* 000000 0 */ .word 1127219200,0 /* 0x0008 0 */ .align 4 /* 0x0008 */ .skip 16 ! ! SUBROUTINE conv_i32_to_d32 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d32 conv_i32_to_d32: /* 000000 */ or %g0,%o7,%g2 /* 0x0004 */ or %g0,%o1,%g4 .L900000210: /* 0x0008 */ call .+8 /* 0x000c */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g3 ! 92 !} ! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) ! 95 !{ ! 96 !int i; ! 98 !#pragma pipeloop(0) ! 99 ! for(i=0;i>16); /* 0x001c 113 */ sethi %hi(.L_const_seg_900000301),%g2 /* 0x0020 109 */ sub %o0,1,%o5 /* 0x0024 113 */ add %g2,%lo(.L_const_seg_900000301),%o1 /* 0x0028 */ ld [%o2+%o1],%o3 /* 0x002c 109 */ sethi %hi(0xfc00),%o0 /* 0x0030 */ add %o5,1,%g2 /* 0x0034 */ or %g0,0,%g1 /* 0x0038 */ cmp %g2,3 /* 0x003c 112 */ ldd [%o3],%f0 /* 0x0040 */ or %g0,%i1,%o7 /* 0x0044 */ add %o0,1023,%o4 /* 0x0048 */ or %g0,%i0,%g3 /* 0x004c 109 */ bl,pn %icc,.L77000154 /* 0x0050 */ add %o7,4,%o0 /* 0x0054 111 */ ld [%o0-4],%o1 /* 0x0058 0 */ or %g0,%o0,%o7 /* 0x005c 113 */ or %g0,1,%g1 /* 0x0060 112 */ and %o1,%o4,%o0 .L900000306: /* 0x0064 112 */ st %o0,[%sp+96] /* 0x0068 113 */ add %g1,1,%g1 /* 0x006c */ add %g3,16,%g3 /* 0x0070 */ cmp %g1,%o5 /* 0x0074 */ add %o7,4,%o7 /* 0x0078 112 */ ld [%sp+96],%f3 /* 0x007c */ fmovs %f0,%f2 /* 0x0080 */ fsubd %f2,%f0,%f2 /* 0x0084 113 */ srl %o1,16,%o0 /* 0x0088 112 */ std %f2,[%g3-16] /* 0x008c 113 */ st %o0,[%sp+92] /* 0x0090 */ ld [%sp+92],%f3 /* 0x0094 111 */ ld [%o7-4],%o1 /* 0x0098 113 */ fmovs %f0,%f2 /* 0x009c */ fsubd %f2,%f0,%f2 /* 0x00a0 112 */ and %o1,%o4,%o0 /* 0x00a4 113 */ ble,pt %icc,.L900000306 /* 0x00a8 */ std %f2,[%g3-8] .L900000309: /* 0x00ac 112 */ st %o0,[%sp+96] /* 0x00b0 */ fmovs %f0,%f2 /* 0x00b4 113 */ add %g3,16,%g3 /* 0x00b8 */ srl %o1,16,%o0 /* 0x00bc 112 */ ld [%sp+96],%f3 /* 0x00c0 */ fsubd %f2,%f0,%f2 /* 0x00c4 */ std %f2,[%g3-16] /* 0x00c8 113 */ st %o0,[%sp+92] /* 0x00cc */ fmovs %f0,%f2 /* 0x00d0 */ ld [%sp+92],%f3 /* 0x00d4 */ fsubd %f2,%f0,%f0 /* 0x00d8 */ std %f0,[%g3-8] /* 0x00dc */ ret ! Result = /* 0x00e0 */ restore %g0,%g0,%g0 .L77000154: /* 0x00e4 111 */ ld [%o7],%o0 .L900000311: /* 0x00e8 112 */ and %o0,%o4,%o1 /* 0x00ec */ st %o1,[%sp+96] /* 0x00f0 113 */ add %g1,1,%g1 /* 0x00f4 112 */ ldd [%o3],%f0 /* 0x00f8 113 */ srl %o0,16,%o0 /* 0x00fc */ add %o7,4,%o7 /* 0x0100 */ cmp %g1,%o5 /* 0x0104 112 */ fmovs %f0,%f2 /* 0x0108 */ ld [%sp+96],%f3 /* 0x010c */ fsubd %f2,%f0,%f2 /* 0x0110 */ std %f2,[%g3] /* 0x0114 113 */ st %o0,[%sp+92] /* 0x0118 */ fmovs %f0,%f2 /* 0x011c */ ld [%sp+92],%f3 /* 0x0120 */ fsubd %f2,%f0,%f0 /* 0x0124 */ std %f0,[%g3+8] /* 0x0128 */ add %g3,16,%g3 /* 0x012c */ ble,a,pt %icc,.L900000311 /* 0x0130 */ ld [%o7],%o0 .L77000150: /* 0x0134 */ ret ! Result = /* 0x0138 */ restore %g0,%g0,%g0 /* 0x013c 0 */ .type conv_i32_to_d16,2 /* 0x013c */ .size conv_i32_to_d16,(.-conv_i32_to_d16) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! .L_const_seg_900000401: /* 000000 0 */ .word 1127219200,0 /* 0x0008 0 */ .align 4 /* 0x0008 */ .skip 16 ! ! SUBROUTINE conv_i32_to_d32_and_d16 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d32_and_d16 conv_i32_to_d32_and_d16: /* 000000 */ save %sp,-104,%sp .L900000413: /* 0x0004 */ call .+8 /* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000413-.)),%g4 ! 114 ! } ! 115 !} ! 118 !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, ! 119 ! const double * /* 2^16*/, const double * /* 0 */, ! 120 ! double * /*result16*/, double * /* result32 */, ! 121 ! float * /*source - should be */ ! 122 ! unsigned int* converted to float* */); ! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16, ! 127 ! unsigned int *i32, int len) ! 128 !{ ! 129 !int i; ! 130 !unsigned int a; ! 132 !#pragma pipeloop(0) ! 133 ! for(i=0;i>16); /* 0x0138 143 */ sethi %hi(.L_const_seg_900000401),%g2 /* 0x013c */ add %g2,%lo(.L_const_seg_900000401),%o1 /* 0x0140 138 */ sethi %hi(0xfc00),%o0 /* 0x0144 */ ld [%o2+%o1],%o2 /* 0x0148 */ sll %g1,2,%o3 /* 0x014c */ sub %i3,%g1,%g3 /* 0x0150 */ sll %g1,3,%g2 /* 0x0154 */ add %o0,1023,%o4 /* 0x0158 141 */ ldd [%o2],%f0 /* 0x015c */ add %g5,%o3,%o0 /* 0x0160 138 */ cmp %g3,3 /* 0x0164 */ add %i4,%g2,%o3 /* 0x0168 */ sub %i3,1,%o1 /* 0x016c */ sll %g1,4,%g4 /* 0x0170 */ bl,pn %icc,.L77000161 /* 0x0174 */ add %i1,%g4,%o5 /* 0x0178 141 */ ld [%o0],%f3 /* 0x017c 143 */ add %o3,8,%o3 /* 0x0180 140 */ ld [%o0],%o7 /* 0x0184 143 */ add %o5,16,%o5 /* 0x0188 */ add %g1,1,%g1 /* 0x018c 141 */ fmovs %f0,%f2 /* 0x0190 143 */ add %o0,4,%o0 /* 0x0194 142 */ and %o7,%o4,%g2 /* 0x0198 141 */ fsubd %f2,%f0,%f2 /* 0x019c */ std %f2,[%o3-8] /* 0x01a0 143 */ srl %o7,16,%o7 /* 0x01a4 142 */ st %g2,[%sp+96] /* 0x01a8 */ fmovs %f0,%f2 /* 0x01ac */ ld [%sp+96],%f3 /* 0x01b0 */ fsubd %f2,%f0,%f2 /* 0x01b4 */ std %f2,[%o5-16] /* 0x01b8 143 */ st %o7,[%sp+92] /* 0x01bc */ fmovs %f0,%f2 /* 0x01c0 */ ld [%sp+92],%f3 /* 0x01c4 */ fsubd %f2,%f0,%f2 /* 0x01c8 */ std %f2,[%o5-8] .L900000409: /* 0x01cc 141 */ ld [%o0],%f3 /* 0x01d0 143 */ add %g1,2,%g1 /* 0x01d4 */ add %o5,32,%o5 /* 0x01d8 140 */ ld [%o0],%o7 /* 0x01dc 143 */ cmp %g1,%o1 /* 0x01e0 */ add %o3,16,%o3 /* 0x01e4 141 */ fmovs %f0,%f2 /* 0x01e8 */ fsubd %f2,%f0,%f2 /* 0x01ec */ std %f2,[%o3-16] /* 0x01f0 142 */ and %o7,%o4,%g2 /* 0x01f4 */ st %g2,[%sp+96] /* 0x01f8 */ ld [%sp+96],%f3 /* 0x01fc */ fmovs %f0,%f2 /* 0x0200 */ fsubd %f2,%f0,%f2 /* 0x0204 143 */ srl %o7,16,%o7 /* 0x0208 142 */ std %f2,[%o5-32] /* 0x020c 143 */ st %o7,[%sp+92] /* 0x0210 */ ld [%sp+92],%f3 /* 0x0214 */ fmovs %f0,%f2 /* 0x0218 */ fsubd %f2,%f0,%f2 /* 0x021c */ std %f2,[%o5-24] /* 0x0220 */ add %o0,4,%o0 /* 0x0224 141 */ ld [%o0],%f3 /* 0x0228 140 */ ld [%o0],%o7 /* 0x022c 141 */ fmovs %f0,%f2 /* 0x0230 */ fsubd %f2,%f0,%f2 /* 0x0234 */ std %f2,[%o3-8] /* 0x0238 142 */ and %o7,%o4,%g2 /* 0x023c */ st %g2,[%sp+96] /* 0x0240 */ ld [%sp+96],%f3 /* 0x0244 */ fmovs %f0,%f2 /* 0x0248 */ fsubd %f2,%f0,%f2 /* 0x024c 143 */ srl %o7,16,%o7 /* 0x0250 142 */ std %f2,[%o5-16] /* 0x0254 143 */ st %o7,[%sp+92] /* 0x0258 */ ld [%sp+92],%f3 /* 0x025c */ fmovs %f0,%f2 /* 0x0260 */ fsubd %f2,%f0,%f2 /* 0x0264 */ std %f2,[%o5-8] /* 0x0268 */ bl,pt %icc,.L900000409 /* 0x026c */ add %o0,4,%o0 .L900000412: /* 0x0270 143 */ cmp %g1,%i3 /* 0x0274 */ bge,pn %icc,.L77000164 /* 0x0278 */ nop .L77000161: /* 0x027c 141 */ ld [%o0],%f3 .L900000414: /* 0x0280 141 */ ldd [%o2],%f0 /* 0x0284 143 */ add %g1,1,%g1 /* 0x0288 140 */ ld [%o0],%o1 /* 0x028c 143 */ add %o0,4,%o0 /* 0x0290 */ cmp %g1,%i3 /* 0x0294 141 */ fmovs %f0,%f2 /* 0x0298 142 */ and %o1,%o4,%o7 /* 0x029c 141 */ fsubd %f2,%f0,%f2 /* 0x02a0 */ std %f2,[%o3] /* 0x02a4 143 */ srl %o1,16,%o1 /* 0x02a8 142 */ st %o7,[%sp+96] /* 0x02ac 143 */ add %o3,8,%o3 /* 0x02b0 142 */ fmovs %f0,%f2 /* 0x02b4 */ ld [%sp+96],%f3 /* 0x02b8 */ fsubd %f2,%f0,%f2 /* 0x02bc */ std %f2,[%o5] /* 0x02c0 143 */ st %o1,[%sp+92] /* 0x02c4 */ fmovs %f0,%f2 /* 0x02c8 */ ld [%sp+92],%f3 /* 0x02cc */ fsubd %f2,%f0,%f0 /* 0x02d0 */ std %f0,[%o5+8] /* 0x02d4 */ add %o5,16,%o5 /* 0x02d8 */ bl,a,pt %icc,.L900000414 /* 0x02dc */ ld [%o0],%f3 .L77000164: /* 0x02e0 */ ret ! Result = /* 0x02e4 */ restore %g0,%g0,%g0 /* 0x02e8 0 */ .type conv_i32_to_d32_and_d16,2 /* 0x02e8 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 4 ! ! SUBROUTINE adjust_montf_result ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global adjust_montf_result adjust_montf_result: ! 144 ! } ! 145 !} ! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) ! 149 !{ ! 150 !long long acc; ! 151 !int i; ! 153 ! if(i32[len]>0) i=-1; /* 000000 153 */ sll %o2,2,%g1 /* 0x0004 */ or %g0,-1,%g3 /* 0x0008 */ ld [%o0+%g1],%g1 /* 0x000c */ cmp %g1,0 /* 0x0010 */ bleu,pn %icc,.L77000175 /* 0x0014 */ or %g0,%o1,%o3 /* 0x0018 */ ba .L900000511 /* 0x001c */ cmp %g3,0 .L77000175: ! 154 ! else ! 155 ! { ! 156 ! for(i=len-1; i>=0; i--) /* 0x0020 156 */ subcc %o2,1,%g3 /* 0x0024 */ bneg,pt %icc,.L900000511 /* 0x0028 */ cmp %g3,0 /* 0x002c */ sll %g3,2,%g1 /* 0x0030 */ add %o0,%g1,%g2 /* 0x0034 */ add %o1,%g1,%g1 ! 157 ! { ! 158 ! if(i32[i]!=nint[i]) break; /* 0x0038 158 */ ld [%g1],%g5 .L900000510: /* 0x003c 158 */ ld [%g2],%o5 /* 0x0040 */ sub %g1,4,%g1 /* 0x0044 */ sub %g2,4,%g2 /* 0x0048 */ cmp %o5,%g5 /* 0x004c */ bne,pn %icc,.L77000182 /* 0x0050 */ nop /* 0x0054 */ subcc %g3,1,%g3 /* 0x0058 */ bpos,a,pt %icc,.L900000510 /* 0x005c */ ld [%g1],%g5 .L77000182: ! 159 ! } ! 160 ! } ! 161 ! if((i<0)||(i32[i]>nint[i])) /* 0x0060 161 */ cmp %g3,0 .L900000511: /* 0x0064 161 */ bl,pn %icc,.L77000198 /* 0x0068 */ sll %g3,2,%g2 /* 0x006c */ ld [%o1+%g2],%g1 /* 0x0070 */ ld [%o0+%g2],%g2 /* 0x0074 */ cmp %g2,%g1 /* 0x0078 */ bleu,pt %icc,.L77000191 /* 0x007c */ nop .L77000198: ! 162 ! { ! 163 ! acc=0; ! 164 ! for(i=0;i>32; /* 0x00c4 168 */ or %g0,2,%o5 /* 0x00c8 166 */ ld [%o0+4],%g1 /* 0x00cc 164 */ sub %o2,%o1,%o2 /* 0x00d0 */ or %g0,%o2,%g5 /* 0x00d4 167 */ and %o2,%g3,%o2 /* 0x00d8 */ st %o2,[%o0] /* 0x00dc 168 */ srax %g5,32,%g5 .L900000505: /* 0x00e0 166 */ ld [%o3],%o2 /* 0x00e4 168 */ add %o5,1,%o5 /* 0x00e8 */ add %o3,4,%o3 /* 0x00ec */ cmp %o5,%g4 /* 0x00f0 */ add %o4,4,%o4 /* 0x00f4 164 */ sub %g1,%o2,%g1 /* 0x00f8 */ add %g1,%g5,%g5 /* 0x00fc 167 */ and %g5,%g3,%o2 /* 0x0100 166 */ ld [%o4-4],%g1 /* 0x0104 167 */ st %o2,[%o4-8] /* 0x0108 168 */ ble,pt %icc,.L900000505 /* 0x010c */ srax %g5,32,%g5 .L900000508: /* 0x0110 166 */ ld [%o3],%g2 /* 0x0114 164 */ sub %g1,%g2,%g1 /* 0x0118 */ add %g1,%g5,%g1 /* 0x011c 167 */ and %g1,%g3,%g2 /* 0x0120 */ retl ! Result = /* 0x0124 */ st %g2,[%o4-4] .L77000199: /* 0x0128 166 */ ld [%o4],%g1 .L900000509: /* 0x012c 166 */ ld [%o3],%g2 /* 0x0130 */ add %g5,%g1,%g1 /* 0x0134 168 */ add %o5,1,%o5 /* 0x0138 */ add %o3,4,%o3 /* 0x013c */ cmp %o5,%g4 /* 0x0140 166 */ sub %g1,%g2,%g1 /* 0x0144 167 */ and %g1,%g3,%g2 /* 0x0148 */ st %g2,[%o4] /* 0x014c 168 */ add %o4,4,%o4 /* 0x0150 */ srax %g1,32,%g5 /* 0x0154 */ ble,a,pt %icc,.L900000509 /* 0x0158 */ ld [%o4],%g1 .L77000191: /* 0x015c */ retl ! Result = /* 0x0160 */ nop /* 0x0164 0 */ .type adjust_montf_result,2 /* 0x0164 */ .size adjust_montf_result,(.-adjust_montf_result) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 4 /* 000000 */ .skip 16 ! ! SUBROUTINE mont_mulf_noconv ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global mont_mulf_noconv mont_mulf_noconv: /* 000000 */ save %sp,-144,%sp .L900000644: /* 0x0004 */ call .+8 /* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000644-.)),%g4 ! 169 ! } ! 170 ! } ! 171 !} ! 175 !void cleanup(double *dt, int from, int tlen); ! 177 !/* ! 178 !** the lengths of the input arrays should be at least the following: ! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] ! 180 !** all of them should be different from one another ! 181 !** ! 182 !*/ ! 183 !void mont_mulf_noconv(unsigned int *result, ! 184 ! double *dm1, double *dm2, double *dt, ! 185 ! double *dn, unsigned int *nint, ! 186 ! int nlen, double dn0) ! 187 !{ ! 188 ! int i, j, jj; ! 189 ! int tmp; ! 190 ! double digit, m2j, nextm2j, a, b; ! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; ! 193 ! pdm1=&(dm1[0]); ! 194 ! pdm2=&(dm2[0]); ! 195 ! pdn=&(dn[0]); ! 196 ! pdm2[2*nlen]=Zero; /* 0x000c 196 */ sethi %hi(Zero),%g2 /* 0x0010 */ ld [%fp+92],%o0 /* 0x0014 187 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000644-.)),%g4 /* 0x0018 196 */ add %g2,%lo(Zero),%g2 /* 0x001c 187 */ ldd [%fp+96],%f2 /* 0x0020 */ add %g4,%o7,%o3 /* 0x0024 */ st %i0,[%fp+68] /* 0x0028 */ or %g0,%i3,%o1 /* 0x002c 196 */ ld [%o3+%g2],%g3 /* 0x0030 */ sll %o0,4,%g2 /* 0x0034 187 */ or %g0,%i1,%g4 /* 0x0038 */ fmovd %f2,%f16 /* 0x003c */ st %i5,[%fp+88] /* 0x0040 */ or %g0,%o1,%g5 /* 0x0044 */ or %g0,%i2,%o2 /* 0x0048 196 */ ldd [%g3],%f0 /* 0x004c */ or %g0,%o0,%g1 ! 198 ! if (nlen!=16) /* 0x0050 198 */ cmp %o0,16 /* 0x0054 */ be,pn %icc,.L77000289 /* 0x0058 */ std %f0,[%o2+%g2] ! 199 ! { ! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; /* 0x005c 200 */ sll %o0,2,%g2 /* 0x0060 187 */ or %g0,%i4,%i0 /* 0x0064 196 */ sll %o0,1,%o7 /* 0x0068 200 */ add %g2,2,%o2 /* 0x006c */ cmp %o2,0 /* 0x0070 196 */ or %g0,%i2,%i1 /* 0x0074 200 */ ble,a,pt %icc,.L900000658 /* 0x0078 */ ldd [%g4],%f0 ! 202 ! a=dt[0]=pdm1[0]*pdm2[0]; ! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); ! 205 ! pdtj=&(dt[0]); ! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) ! 207 ! { ! 208 ! m2j=pdm2[j]; ! 209 ! a=pdtj[0]+pdn[0]*digit; ! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; ! 211 ! pdtj[1]=b; ! 213 !#pragma pipeloop(0) ! 214 ! for(i=1;i