1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* 27 * This file is mostly a result of compiling the mont_mulf.c file to generate an 28 * assembly output and then hand-editing that output to replace the 29 * compiler-generated loop for the 512-bit case (nlen == 16) in the 30 * mont_mulf_noconv routine with a hand-crafted version. This file also 31 * has big_savefp() and big_restorefp() routines added by hand. 32 */ 33 34#include <sys/asm_linkage.h> 35#include <sys/trap.h> 36#include <sys/stack.h> 37#include <sys/privregs.h> 38#include <sys/regset.h> 39#include <sys/vis.h> 40#include <sys/machthread.h> 41#include <sys/machtrap.h> 42#include <sys/machsig.h> 43 44#if defined(lint) || defined(__lint) 45#include <sys/types.h> 46 47/* ARGSUSED */ 48uint64_t 49double2uint64_t(double* d) 50{ 51 return (0ULL); 52} 53 54/* ARGSUSED */ 55void 56conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen) 57{ 58} 59 60/* ARGSUSED */ 61void 62conv_i32_to_d32(double *d32, uint32_t *i32, int len) 63{ 64} 65 66/* ARGSUSED */ 67void 68conv_i32_to_d16(double *d16, uint32_t *i32, int len) 69{ 70} 71 72/* ARGSUSED */ 73void 74mont_mulf_noconv(uint32_t *result, double *dm1, double *dm2, double *dt, 75 double *dn, uint32_t *nint, int nlen, double dn0) 76{ 77} 78 79#else /* lint || __lint */ 80 81 .section ".text",#alloc,#execinstr 82 .file "mont_mulf.c" 83 84 .section ".bss",#alloc,#write 85Bbss.bss: 86 87 .section ".data",#alloc,#write 88Ddata.data: 89 90 .section ".rodata",#alloc 91! 92! CONSTANT POOL 93! 94Drodata.rodata: 95 .global TwoTo16 96 .align 8 97! 98! CONSTANT POOL 99! 100 .global TwoTo16 101TwoTo16: 102 .word 1089470464 103 .word 0 104 .type TwoTo16,#object 105 .size TwoTo16,8 106 .global TwoToMinus16 107! 108! CONSTANT POOL 109! 110 .global TwoToMinus16 111TwoToMinus16: 112 .word 1055916032 113 .word 0 114 .type TwoToMinus16,#object 115 .size TwoToMinus16,8 116 .global Zero 117! 118! CONSTANT POOL 119! 120 .global Zero 121Zero: 122 .word 0 123 .word 0 124 .type Zero,#object 125 .size Zero,8 126 .global TwoTo32 127! 128! CONSTANT POOL 129! 130 .global TwoTo32 131TwoTo32: 132 .word 1106247680 133 .word 0 134 .type TwoTo32,#object 135 .size TwoTo32,8 136 .global TwoToMinus32 137! 138! CONSTANT POOL 139! 140 .global TwoToMinus32 141TwoToMinus32: 142 .word 1039138816 143 .word 0 144 .type TwoToMinus32,#object 145 .size TwoToMinus32,8 146 147 .section ".text",#alloc,#execinstr 148/* 000000 0 */ .register %g3,#scratch 149/* 000000 */ .register %g2,#scratch 150/* 000000 0 */ .align 32 151! FILE mont_mulf.c 152 153! 1 !/* 154! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 155! 3 ! * Use is subject to license terms. 156! 4 ! */ 157! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI" 158! 9 !/* 159! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time 160! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time 161! 12 ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c ) 162! 13 ! */ 163! 15 !#include <sys/types.h> 164! 16 !#include <math.h> 165! 18 !static const double TwoTo16 = 65536.0; 166! 19 !static const double TwoToMinus16 = 1.0/65536.0; 167! 20 !static const double Zero = 0.0; 168! 21 !static const double TwoTo32 = 65536.0 * 65536.0; 169! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0); 170! 24 !#ifdef RF_INLINE_MACROS 171! 26 !double upper32(double); 172! 27 !double lower32(double, double); 173! 28 !double mod(double, double, double); 174! 30 !#else 175! 32 !static double 176! 33 !upper32(double x) 177! 34 !{ 178! 35 ! return (floor(x * TwoToMinus32)); 179! 36 !} 180! 39 !/* ARGSUSED */ 181! 40 !static double 182! 41 !lower32(double x, double y) 183! 42 !{ 184! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32)); 185! 44 !} 186! 46 !static double 187! 47 !mod(double x, double oneoverm, double m) 188! 48 !{ 189! 49 ! return (x - m * floor(x * oneoverm)); 190! 50 !} 191! 52 !#endif 192! 55 !static void 193! 56 !cleanup(double *dt, int from, int tlen) 194! 57 !{ 195 196! 197! SUBROUTINE cleanup 198! 199! OFFSET SOURCE LINE LABEL INSTRUCTION 200 201 cleanup: 202/* 000000 57 */ sra %o1,0,%o4 203/* 0x0004 */ sra %o2,0,%o5 204 205! 58 ! int i; 206! 59 ! double tmp, tmp1, x, x1; 207! 61 ! tmp = tmp1 = Zero; 208 209/* 0x0008 61 */ sll %o5,1,%g5 210 211! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) { 212 213/* 0x000c 63 */ sll %o4,1,%g3 214/* 0x0010 */ cmp %g3,%g5 215/* 0x0014 */ bge,pn %icc,.L77000188 216/* 0x0018 0 */ sethi %hi(Zero),%o3 217 .L77000197: 218/* 0x001c 63 */ ldd [%o3+%lo(Zero)],%f8 219/* 0x0020 */ sra %g3,0,%o1 220/* 0x0024 */ sub %g5,1,%g2 221/* 0x0028 */ sllx %o1,3,%g4 222 223! 64 ! x = dt[i]; 224 225/* 0x002c 64 */ ldd [%g4+%o0],%f10 226/* 0x0030 63 */ add %g4,%o0,%g1 227/* 0x0034 */ fmovd %f8,%f18 228/* 0x0038 */ fmovd %f8,%f16 229 230! 65 ! x1 = dt[i + 1]; 231! 66 ! dt[i] = lower32(x, Zero) + tmp; 232 233 .L900000110: 234/* 0x003c 66 */ fdtox %f10,%f0 235/* 0x0040 65 */ ldd [%g1+8],%f12 236 237! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1; 238! 68 ! tmp = upper32(x); 239! 69 ! tmp1 = upper32(x1); 240 241/* 0x0044 69 */ add %g3,2,%g3 242/* 0x0048 */ cmp %g3,%g2 243/* 0x004c 67 */ fdtox %f12,%f2 244/* 0x0050 68 */ fmovd %f0,%f4 245/* 0x0054 66 */ fmovs %f8,%f0 246/* 0x0058 67 */ fmovs %f8,%f2 247/* 0x005c 66 */ fxtod %f0,%f0 248/* 0x0060 67 */ fxtod %f2,%f2 249/* 0x0064 69 */ fdtox %f12,%f6 250/* 0x0068 66 */ faddd %f0,%f18,%f10 251/* 0x006c */ std %f10,[%g1] 252/* 0x0070 67 */ faddd %f2,%f16,%f14 253/* 0x0074 */ std %f14,[%g1+8] 254/* 0x0078 68 */ fitod %f4,%f18 255/* 0x007c 69 */ add %g1,16,%g1 256/* 0x0080 */ fitod %f6,%f16 257/* 0x0084 */ ble,a,pt %icc,.L900000110 258/* 0x0088 64 */ ldd [%g1],%f10 259 .L77000188: 260/* 0x008c 69 */ retl ! Result = 261/* 0x0090 */ nop 262/* 0x0094 0 */ .type cleanup,2 263/* 0x0094 0 */ .size cleanup,(.-cleanup) 264 265 .section ".text",#alloc,#execinstr 266/* 000000 0 */ .align 8 267/* 000000 */ .skip 24 268/* 0x0018 */ .align 32 269 270! 70 ! } 271! 71 !} 272! 75 !#ifdef _KERNEL 273! 76 !/* 274! 77 ! * This only works if 0 <= d < 2^53 275! 78 ! */ 276! 79 !uint64_t 277! 80 !double2uint64_t(double* d) 278! 81 !{ 279! 82 ! uint64_t x; 280! 83 ! uint64_t exp; 281! 84 ! uint64_t man; 282! 86 ! x = *((uint64_t *)d); 283 284! 285! SUBROUTINE double2uint64_t 286! 287! OFFSET SOURCE LINE LABEL INSTRUCTION 288 289 .global double2uint64_t 290 double2uint64_t: 291/* 000000 86 */ ldx [%o0],%o2 292 293! 87 ! if (x == 0) { 294 295/* 0x0004 87 */ cmp %o2,0 296/* 0x0008 */ bne,pn %xcc,.L900000206 297/* 0x000c 94 */ sethi %hi(0xfff00000),%o5 298 .L77000202: 299/* 0x0010 94 */ retl ! Result = %o0 300 301! 88 ! return (0ULL); 302 303/* 0x0014 88 */ or %g0,0,%o0 304 305! 89 ! } 306! 90 ! exp = (x >> 52) - 1023; 307! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL; 308! 92 ! x = man >> (52 - exp); 309! 94 ! return (x); 310 311 .L900000206: 312/* 0x0018 94 */ sllx %o5,32,%o4 313/* 0x001c */ srlx %o2,52,%o0 314/* 0x0020 */ sethi %hi(0x40000000),%o1 315/* 0x0024 */ or %g0,1023,%g5 316/* 0x0028 */ sllx %o1,22,%g4 317/* 0x002c */ xor %o4,-1,%o3 318/* 0x0030 */ sub %g5,%o0,%g3 319/* 0x0034 */ and %o2,%o3,%g2 320/* 0x0038 */ or %g2,%g4,%o5 321/* 0x003c */ add %g3,52,%g1 322/* 0x0040 */ retl ! Result = %o0 323/* 0x0044 */ srlx %o5,%g1,%o0 324/* 0x0048 0 */ .type double2uint64_t,2 325/* 0x0048 0 */ .size double2uint64_t,(.-double2uint64_t) 326 327 .section ".text",#alloc,#execinstr 328/* 000000 0 */ .align 8 329/* 000000 */ .skip 24 330/* 0x0018 */ .align 32 331 332! 95 !} 333! 96 !#else 334! 97 !/* 335! 98 ! * This only works if 0 <= d < 2^63 336! 99 ! */ 337! 100 !uint64_t 338! 101 !double2uint64_t(double* d) 339! 102 !{ 340! 103 ! return ((int64_t)(*d)); 341! 104 !} 342! 105 !#endif 343! 107 !/* ARGSUSED */ 344! 108 !void 345! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen) 346! 110 !{ 347 348! 349! SUBROUTINE conv_d16_to_i32 350! 351! OFFSET SOURCE LINE LABEL INSTRUCTION 352 353 .global conv_d16_to_i32 354 conv_d16_to_i32: 355/* 000000 110 */ save %sp,-176,%sp 356 357! 111 ! int i; 358! 112 ! int64_t t, t1, /* using int64_t and not uint64_t */ 359! 113 ! a, b, c, d; /* because more efficient code is */ 360! 114 ! /* generated this way, and there */ 361! 115 ! /* is no overflow */ 362! 116 ! t1 = 0; 363! 117 ! a = double2uint64_t(&(d16[0])); 364 365/* 0x0004 117 */ ldx [%i1],%o0 366/* 0x0008 118 */ ldx [%i1+8],%i2 367/* 0x000c 117 */ cmp %o0,0 368/* 0x0010 */ bne,pn %xcc,.L77000216 369/* 0x0014 */ or %g0,0,%i4 370 .L77000215: 371/* 0x0018 117 */ ba .L900000316 372/* 0x001c 118 */ cmp %i2,0 373 .L77000216: 374/* 0x0020 117 */ srlx %o0,52,%o5 375/* 0x0024 */ sethi %hi(0xfff00000),%i4 376/* 0x0028 */ sllx %i4,32,%o2 377/* 0x002c */ sethi %hi(0x40000000),%o7 378/* 0x0030 */ sllx %o7,22,%o3 379/* 0x0034 */ or %g0,1023,%o4 380/* 0x0038 */ xor %o2,-1,%g5 381/* 0x003c */ sub %o4,%o5,%l0 382/* 0x0040 */ and %o0,%g5,%o1 383/* 0x0044 */ add %l0,52,%l1 384/* 0x0048 */ or %o1,%o3,%g4 385 386! 118 ! b = double2uint64_t(&(d16[1])); 387 388/* 0x004c 118 */ cmp %i2,0 389/* 0x0050 117 */ srlx %g4,%l1,%i4 390 .L900000316: 391/* 0x0054 118 */ bne,pn %xcc,.L77000222 392/* 0x0058 134 */ sub %i3,1,%l3 393 .L77000221: 394/* 0x005c 118 */ or %g0,0,%i2 395/* 0x0060 */ ba .L900000315 396/* 0x0064 116 */ or %g0,0,%o3 397 .L77000222: 398/* 0x0068 118 */ srlx %i2,52,%l6 399/* 0x006c */ sethi %hi(0xfff00000),%g4 400/* 0x0070 */ sllx %g4,32,%i5 401/* 0x0074 */ sethi %hi(0x40000000),%l5 402/* 0x0078 */ xor %i5,-1,%l4 403/* 0x007c */ or %g0,1023,%l2 404/* 0x0080 */ and %i2,%l4,%l7 405/* 0x0084 */ sllx %l5,22,%i2 406/* 0x0088 */ sub %l2,%l6,%g1 407/* 0x008c */ or %l7,%i2,%g3 408/* 0x0090 */ add %g1,52,%g2 409/* 0x0094 116 */ or %g0,0,%o3 410/* 0x0098 118 */ srlx %g3,%g2,%i2 411 412! 119 ! for (i = 0; i < ilen - 1; i++) { 413 414 .L900000315: 415/* 0x009c 119 */ cmp %l3,0 416/* 0x00a0 */ ble,pn %icc,.L77000210 417/* 0x00a4 */ or %g0,0,%l4 418 .L77000245: 419/* 0x00a8 118 */ sethi %hi(0xfff00000),%l7 420/* 0x00ac */ or %g0,-1,%l6 421/* 0x00b0 */ sllx %l7,32,%l3 422/* 0x00b4 */ srl %l6,0,%l6 423/* 0x00b8 */ sethi %hi(0x40000000),%l1 424/* 0x00bc */ sethi %hi(0xfc00),%l2 425/* 0x00c0 */ xor %l3,-1,%l7 426/* 0x00c4 */ sllx %l1,22,%l3 427/* 0x00c8 */ sub %i3,2,%l5 428/* 0x00cc */ add %l2,1023,%l2 429/* 0x00d0 */ or %g0,2,%g2 430/* 0x00d4 */ or %g0,%i0,%g1 431 432! 120 ! c = double2uint64_t(&(d16[2 * i + 2])); 433 434 .L77000208: 435/* 0x00d8 120 */ sra %g2,0,%g3 436/* 0x00dc 123 */ add %g2,1,%o2 437/* 0x00e0 120 */ sllx %g3,3,%i3 438 439! 121 ! t1 += a & 0xffffffff; 440! 122 ! t = (a >> 32); 441! 123 ! d = double2uint64_t(&(d16[2 * i + 3])); 442 443/* 0x00e4 123 */ sra %o2,0,%g5 444/* 0x00e8 120 */ ldx [%i1+%i3],%o5 445/* 0x00ec 123 */ sllx %g5,3,%o0 446/* 0x00f0 121 */ and %i4,%l6,%g4 447/* 0x00f4 123 */ ldx [%i1+%o0],%i3 448/* 0x00f8 120 */ cmp %o5,0 449/* 0x00fc */ bne,pn %xcc,.L77000228 450/* 0x0100 124 */ and %i2,%l2,%i5 451 .L77000227: 452/* 0x0104 120 */ or %g0,0,%l1 453/* 0x0108 */ ba .L900000314 454/* 0x010c 121 */ add %o3,%g4,%o0 455 .L77000228: 456/* 0x0110 120 */ srlx %o5,52,%o7 457/* 0x0114 */ and %o5,%l7,%o5 458/* 0x0118 */ or %g0,52,%l0 459/* 0x011c */ sub %o7,1023,%o4 460/* 0x0120 */ or %o5,%l3,%l1 461/* 0x0124 */ sub %l0,%o4,%o1 462/* 0x0128 */ srlx %l1,%o1,%l1 463/* 0x012c 121 */ add %o3,%g4,%o0 464 .L900000314: 465/* 0x0130 122 */ srax %i4,32,%g3 466/* 0x0134 123 */ cmp %i3,0 467/* 0x0138 */ bne,pn %xcc,.L77000234 468/* 0x013c 124 */ sllx %i5,16,%g5 469 .L77000233: 470/* 0x0140 123 */ or %g0,0,%o2 471/* 0x0144 */ ba .L900000313 472/* 0x0148 124 */ add %o0,%g5,%o7 473 .L77000234: 474/* 0x014c 123 */ srlx %i3,52,%o2 475/* 0x0150 */ and %i3,%l7,%i4 476/* 0x0154 */ sub %o2,1023,%o1 477/* 0x0158 */ or %g0,52,%g4 478/* 0x015c */ sub %g4,%o1,%i5 479/* 0x0160 */ or %i4,%l3,%i3 480/* 0x0164 */ srlx %i3,%i5,%o2 481 482! 124 ! t1 += (b & 0xffff) << 16; 483 484/* 0x0168 124 */ add %o0,%g5,%o7 485 486! 125 ! t += (b >> 16) + (t1 >> 32); 487 488 .L900000313: 489/* 0x016c 125 */ srax %i2,16,%l0 490/* 0x0170 */ srax %o7,32,%o4 491/* 0x0174 */ add %l0,%o4,%o3 492 493! 126 ! i32[i] = t1 & 0xffffffff; 494! 127 ! t1 = t; 495! 128 ! a = c; 496! 129 ! b = d; 497 498/* 0x0178 129 */ add %l4,1,%l4 499/* 0x017c 126 */ and %o7,%l6,%o5 500/* 0x0180 125 */ add %g3,%o3,%o3 501/* 0x0184 126 */ st %o5,[%g1] 502/* 0x0188 128 */ or %g0,%l1,%i4 503/* 0x018c 129 */ or %g0,%o2,%i2 504/* 0x0190 */ add %g2,2,%g2 505/* 0x0194 */ cmp %l4,%l5 506/* 0x0198 */ ble,pt %icc,.L77000208 507/* 0x019c */ add %g1,4,%g1 508 509! 130 ! } 510! 131 ! t1 += a & 0xffffffff; 511! 132 ! t = (a >> 32); 512! 133 ! t1 += (b & 0xffff) << 16; 513! 134 ! i32[i] = t1 & 0xffffffff; 514 515 .L77000210: 516/* 0x01a0 134 */ sra %l4,0,%l4 517/* 0x01a4 */ sethi %hi(0xfc00),%i1 518/* 0x01a8 */ add %o3,%i4,%l2 519/* 0x01ac */ add %i1,1023,%i5 520/* 0x01b0 */ and %i2,%i5,%l5 521/* 0x01b4 */ sllx %l4,2,%i2 522/* 0x01b8 */ sllx %l5,16,%l6 523/* 0x01bc */ add %l2,%l6,%l7 524/* 0x01c0 */ st %l7,[%i0+%i2] 525/* 0x01c4 129 */ ret ! Result = 526/* 0x01c8 */ restore %g0,%g0,%g0 527/* 0x01cc 0 */ .type conv_d16_to_i32,2 528/* 0x01cc 0 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) 529 530 .section ".text",#alloc,#execinstr 531/* 000000 0 */ .align 8 532! 533! CONSTANT POOL 534! 535 ___const_seg_900000401: 536/* 000000 0 */ .word 1127219200,0 537/* 0x0008 */ .word 1127219200 538/* 0x000c 0 */ .type ___const_seg_900000401,1 539/* 0x000c 0 */ .size ___const_seg_900000401,(.-___const_seg_900000401) 540/* 0x000c 0 */ .align 8 541/* 0x0010 */ .skip 24 542/* 0x0028 */ .align 32 543 544! 135 !} 545! 138 !void 546! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len) 547! 140 !{ 548 549! 550! SUBROUTINE conv_i32_to_d32 551! 552! OFFSET SOURCE LINE LABEL INSTRUCTION 553 554 .global conv_i32_to_d32 555 conv_i32_to_d32: 556/* 000000 140 */ orcc %g0,%o2,%o2 557 558! 141 ! int i; 559! 143 !#pragma pipeloop(0) 560! 144 ! for (i = 0; i < len; i++) 561 562/* 0x0004 144 */ ble,pn %icc,.L77000254 563/* 0x0008 */ sub %o2,1,%o3 564 .L77000263: 565/* 0x000c 140 */ or %g0,%o0,%o2 566 567! 145 ! d32[i] = (double)(i32[i]); 568 569/* 0x0010 145 */ add %o3,1,%o5 570/* 0x0014 144 */ or %g0,0,%g5 571/* 0x0018 145 */ cmp %o5,10 572/* 0x001c */ bl,pn %icc,.L77000261 573/* 0x0020 */ sethi %hi(___const_seg_900000401),%g4 574 .L900000407: 575/* 0x0024 145 */ prefetch [%o1],0 576/* 0x0028 */ prefetch [%o0],22 577/* 0x002c */ sethi %hi(___const_seg_900000401+8),%o4 578/* 0x0030 */ or %g0,%o0,%o2 579/* 0x0034 */ prefetch [%o1+64],0 580/* 0x0038 */ add %o1,8,%o0 581/* 0x003c */ sub %o3,7,%o5 582/* 0x0040 */ prefetch [%o2+64],22 583/* 0x0044 */ or %g0,2,%g5 584/* 0x0048 */ prefetch [%o2+128],22 585/* 0x004c */ prefetch [%o2+192],22 586/* 0x0050 */ prefetch [%o1+128],0 587/* 0x0054 */ ld [%o4+%lo(___const_seg_900000401+8)],%f2 588/* 0x0058 */ ldd [%g4+%lo(___const_seg_900000401)],%f16 589/* 0x005c */ fmovs %f2,%f0 590/* 0x0060 */ prefetch [%o2+256],22 591/* 0x0064 */ prefetch [%o2+320],22 592/* 0x0068 */ ld [%o1],%f3 593/* 0x006c */ prefetch [%o1+192],0 594/* 0x0070 */ ld [%o1+4],%f1 595 .L900000405: 596/* 0x0074 145 */ prefetch [%o0+188],0 597/* 0x0078 */ fsubd %f2,%f16,%f22 598/* 0x007c */ add %g5,8,%g5 599/* 0x0080 */ add %o0,32,%o0 600/* 0x0084 */ ld [%o4+%lo(___const_seg_900000401+8)],%f4 601/* 0x0088 */ std %f22,[%o2] 602/* 0x008c */ cmp %g5,%o5 603/* 0x0090 */ ld [%o0-32],%f5 604/* 0x0094 */ fsubd %f0,%f16,%f24 605/* 0x0098 */ add %o2,64,%o2 606/* 0x009c */ fmovs %f4,%f0 607/* 0x00a0 */ std %f24,[%o2-56] 608/* 0x00a4 */ ld [%o0-28],%f1 609/* 0x00a8 */ fsubd %f4,%f16,%f26 610/* 0x00ac */ fmovs %f0,%f6 611/* 0x00b0 */ prefetch [%o2+312],22 612/* 0x00b4 */ std %f26,[%o2-48] 613/* 0x00b8 */ ld [%o0-24],%f7 614/* 0x00bc */ fsubd %f0,%f16,%f28 615/* 0x00c0 */ fmovs %f6,%f8 616/* 0x00c4 */ std %f28,[%o2-40] 617/* 0x00c8 */ ld [%o0-20],%f9 618/* 0x00cc */ fsubd %f6,%f16,%f30 619/* 0x00d0 */ fmovs %f8,%f10 620/* 0x00d4 */ std %f30,[%o2-32] 621/* 0x00d8 */ ld [%o0-16],%f11 622/* 0x00dc */ prefetch [%o2+344],22 623/* 0x00e0 */ fsubd %f8,%f16,%f48 624/* 0x00e4 */ fmovs %f10,%f12 625/* 0x00e8 */ std %f48,[%o2-24] 626/* 0x00ec */ ld [%o0-12],%f13 627/* 0x00f0 */ fsubd %f10,%f16,%f50 628/* 0x00f4 */ fmovs %f12,%f2 629/* 0x00f8 */ std %f50,[%o2-16] 630/* 0x00fc */ ld [%o0-8],%f3 631/* 0x0100 */ fsubd %f12,%f16,%f52 632/* 0x0104 */ fmovs %f2,%f0 633/* 0x0108 */ std %f52,[%o2-8] 634/* 0x010c */ ble,pt %icc,.L900000405 635/* 0x0110 */ ld [%o0-4],%f1 636 .L900000408: 637/* 0x0114 145 */ fsubd %f2,%f16,%f18 638/* 0x0118 */ add %o2,16,%o2 639/* 0x011c */ cmp %g5,%o3 640/* 0x0120 */ std %f18,[%o2-16] 641/* 0x0124 */ fsubd %f0,%f16,%f20 642/* 0x0128 */ or %g0,%o0,%o1 643/* 0x012c */ bg,pn %icc,.L77000254 644/* 0x0130 */ std %f20,[%o2-8] 645 .L77000261: 646/* 0x0134 145 */ ld [%o1],%f15 647 .L900000409: 648/* 0x0138 145 */ sethi %hi(___const_seg_900000401+8),%o4 649/* 0x013c */ ldd [%g4+%lo(___const_seg_900000401)],%f16 650/* 0x0140 */ add %g5,1,%g5 651/* 0x0144 */ ld [%o4+%lo(___const_seg_900000401+8)],%f14 652/* 0x0148 */ add %o1,4,%o1 653/* 0x014c */ cmp %g5,%o3 654/* 0x0150 */ fsubd %f14,%f16,%f54 655/* 0x0154 */ std %f54,[%o2] 656/* 0x0158 */ add %o2,8,%o2 657/* 0x015c */ ble,a,pt %icc,.L900000409 658/* 0x0160 */ ld [%o1],%f15 659 .L77000254: 660/* 0x0164 145 */ retl ! Result = 661/* 0x0168 */ nop 662/* 0x016c 0 */ .type conv_i32_to_d32,2 663/* 0x016c 0 */ .size conv_i32_to_d32,(.-conv_i32_to_d32) 664 665 .section ".text",#alloc,#execinstr 666/* 000000 0 */ .align 8 667! 668! CONSTANT POOL 669! 670 ___const_seg_900000501: 671/* 000000 0 */ .word 1127219200,0 672/* 0x0008 */ .word 1127219200 673/* 0x000c 0 */ .type ___const_seg_900000501,1 674/* 0x000c 0 */ .size ___const_seg_900000501,(.-___const_seg_900000501) 675/* 0x000c 0 */ .align 8 676/* 0x0010 */ .skip 24 677/* 0x0028 */ .align 32 678 679! 146 !} 680! 149 !void 681! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len) 682! 151 !{ 683 684! 685! SUBROUTINE conv_i32_to_d16 686! 687! OFFSET SOURCE LINE LABEL INSTRUCTION 688 689 .global conv_i32_to_d16 690 conv_i32_to_d16: 691/* 000000 151 */ save %sp,-368,%sp 692/* 0x0004 */ orcc %g0,%i2,%i2 693 694! 152 ! int i; 695! 153 ! uint32_t a; 696! 155 !#pragma pipeloop(0) 697! 156 ! for (i = 0; i < len; i++) { 698 699/* 0x0008 156 */ ble,pn %icc,.L77000272 700/* 0x000c */ sub %i2,1,%l6 701 .L77000281: 702/* 0x0010 156 */ sethi %hi(0xfc00),%i3 703 704! 157 ! a = i32[i]; 705 706/* 0x0014 157 */ or %g0,%i2,%l1 707/* 0x0018 156 */ add %i3,1023,%i4 708/* 0x001c 157 */ cmp %i2,4 709/* 0x0020 151 */ or %g0,%i1,%l7 710/* 0x0024 */ or %g0,%i0,%i2 711/* 0x0028 156 */ or %g0,0,%i5 712/* 0x002c */ or %g0,0,%i3 713/* 0x0030 157 */ bl,pn %icc,.L77000279 714/* 0x0034 0 */ sethi %hi(___const_seg_900000501),%i1 715 .L900000508: 716/* 0x0038 157 */ prefetch [%i0+8],22 717/* 0x003c */ prefetch [%i0+72],22 718/* 0x0040 */ or %g0,%i0,%l2 719 720! 158 ! d16[2 * i] = (double)(a & 0xffff); 721 722/* 0x0044 158 */ sethi %hi(___const_seg_900000501+8),%l1 723/* 0x0048 157 */ prefetch [%i0+136],22 724/* 0x004c */ sub %l6,1,%i0 725/* 0x0050 */ or %g0,0,%i3 726/* 0x0054 */ prefetch [%i2+200],22 727/* 0x0058 */ or %g0,2,%i5 728/* 0x005c */ prefetch [%i2+264],22 729/* 0x0060 */ prefetch [%i2+328],22 730/* 0x0064 */ prefetch [%i2+392],22 731/* 0x0068 */ ld [%l7],%l3 732/* 0x006c */ ld [%l7+4],%l4 733/* 0x0070 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 734 735! 159 ! d16[2 * i + 1] = (double)(a >> 16); 736 737/* 0x0074 159 */ srl %l3,16,%o1 738/* 0x0078 158 */ and %l3,%i4,%o3 739/* 0x007c */ st %o3,[%sp+2335] 740/* 0x0080 159 */ srl %l4,16,%g4 741/* 0x0084 158 */ and %l4,%i4,%o0 742/* 0x0088 */ st %o0,[%sp+2303] 743/* 0x008c 159 */ add %l7,8,%l7 744/* 0x0090 */ st %o1,[%sp+2271] 745/* 0x0094 */ st %g4,[%sp+2239] 746/* 0x0098 157 */ prefetch [%i2+456],22 747/* 0x009c */ prefetch [%i2+520],22 748 .L900000506: 749/* 0x00a0 157 */ prefetch [%l2+536],22 750/* 0x00a4 159 */ add %i5,2,%i5 751/* 0x00a8 157 */ add %l2,32,%l2 752/* 0x00ac */ ld [%l7],%g2 753/* 0x00b0 159 */ cmp %i5,%i0 754/* 0x00b4 */ add %l7,8,%l7 755/* 0x00b8 158 */ ld [%sp+2335],%f9 756/* 0x00bc 159 */ add %i3,4,%i3 757/* 0x00c0 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f8 758/* 0x00c4 159 */ ld [%sp+2271],%f11 759/* 0x00c8 158 */ and %g2,%i4,%g3 760/* 0x00cc 159 */ fmovs %f8,%f10 761/* 0x00d0 158 */ st %g3,[%sp+2335] 762/* 0x00d4 */ fsubd %f8,%f20,%f28 763/* 0x00d8 */ std %f28,[%l2-32] 764/* 0x00dc 159 */ srl %g2,16,%g1 765/* 0x00e0 */ st %g1,[%sp+2271] 766/* 0x00e4 */ fsubd %f10,%f20,%f30 767/* 0x00e8 */ std %f30,[%l2-24] 768/* 0x00ec 157 */ ld [%l7-4],%l0 769/* 0x00f0 158 */ ld [%sp+2303],%f13 770/* 0x00f4 */ ld [%l1+%lo(___const_seg_900000501+8)],%f12 771/* 0x00f8 159 */ ld [%sp+2239],%f15 772/* 0x00fc 158 */ and %l0,%i4,%l5 773/* 0x0100 159 */ fmovs %f12,%f14 774/* 0x0104 158 */ st %l5,[%sp+2303] 775/* 0x0108 */ fsubd %f12,%f20,%f44 776/* 0x010c */ std %f44,[%l2-16] 777/* 0x0110 159 */ srl %l0,16,%o5 778/* 0x0114 */ st %o5,[%sp+2239] 779/* 0x0118 */ fsubd %f14,%f20,%f46 780/* 0x011c */ ble,pt %icc,.L900000506 781/* 0x0120 */ std %f46,[%l2-8] 782 .L900000509: 783/* 0x0124 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f0 784/* 0x0128 159 */ cmp %i5,%l6 785/* 0x012c */ add %i3,4,%i3 786/* 0x0130 158 */ ld [%sp+2335],%f1 787/* 0x0134 */ ld [%sp+2303],%f5 788/* 0x0138 159 */ fmovs %f0,%f2 789/* 0x013c */ ld [%sp+2271],%f3 790/* 0x0140 158 */ fmovs %f0,%f4 791/* 0x0144 159 */ ld [%sp+2239],%f7 792/* 0x0148 */ fmovs %f0,%f6 793/* 0x014c 158 */ fsubd %f0,%f20,%f22 794/* 0x0150 */ std %f22,[%l2] 795/* 0x0154 159 */ fsubd %f2,%f20,%f24 796/* 0x0158 */ std %f24,[%l2+8] 797/* 0x015c 158 */ fsubd %f4,%f20,%f26 798/* 0x0160 */ std %f26,[%l2+16] 799/* 0x0164 159 */ fsubd %f6,%f20,%f20 800/* 0x0168 */ bg,pn %icc,.L77000272 801/* 0x016c */ std %f20,[%l2+24] 802 .L77000279: 803/* 0x0170 157 */ ld [%l7],%l2 804 .L900000510: 805/* 0x0174 158 */ and %l2,%i4,%o4 806/* 0x0178 */ st %o4,[%sp+2399] 807/* 0x017c 159 */ srl %l2,16,%o2 808/* 0x0180 */ st %o2,[%sp+2367] 809/* 0x0184 158 */ sethi %hi(___const_seg_900000501+8),%l1 810/* 0x0188 */ sra %i3,0,%i0 811/* 0x018c */ ld [%l1+%lo(___const_seg_900000501+8)],%f16 812/* 0x0190 */ sllx %i0,3,%o1 813/* 0x0194 159 */ add %i3,1,%o3 814/* 0x0198 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 815/* 0x019c 159 */ sra %o3,0,%l3 816/* 0x01a0 */ add %i5,1,%i5 817/* 0x01a4 158 */ ld [%sp+2399],%f17 818/* 0x01a8 159 */ sllx %l3,3,%o0 819/* 0x01ac */ add %l7,4,%l7 820/* 0x01b0 */ fmovs %f16,%f18 821/* 0x01b4 */ cmp %i5,%l6 822/* 0x01b8 */ add %i3,2,%i3 823/* 0x01bc 158 */ fsubd %f16,%f20,%f48 824/* 0x01c0 */ std %f48,[%i2+%o1] 825/* 0x01c4 159 */ ld [%sp+2367],%f19 826/* 0x01c8 */ fsubd %f18,%f20,%f50 827/* 0x01cc */ std %f50,[%i2+%o0] 828/* 0x01d0 */ ble,a,pt %icc,.L900000510 829/* 0x01d4 157 */ ld [%l7],%l2 830 .L77000272: 831/* 0x01d8 159 */ ret ! Result = 832/* 0x01dc */ restore %g0,%g0,%g0 833/* 0x01e0 0 */ .type conv_i32_to_d16,2 834/* 0x01e0 0 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) 835 836 .section ".text",#alloc,#execinstr 837/* 000000 0 */ .align 8 838! 839! CONSTANT POOL 840! 841 ___const_seg_900000601: 842/* 000000 0 */ .word 1127219200,0 843/* 0x0008 */ .word 1127219200 844/* 0x000c 0 */ .type ___const_seg_900000601,1 845/* 0x000c 0 */ .size ___const_seg_900000601,(.-___const_seg_900000601) 846/* 0x000c 0 */ .align 8 847/* 0x0010 */ .skip 24 848/* 0x0028 */ .align 32 849 850! 160 ! } 851! 161 !} 852! 163 !#ifdef RF_INLINE_MACROS 853! 165 !void 854! 166 !i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */ 855! 167 ! const double *, /* 2^16 */ 856! 168 ! const double *, /* 0 */ 857! 169 ! double *, /* result16 */ 858! 170 ! double *, /* result32 */ 859! 171 ! float *); /* source - should be unsigned int* */ 860! 172 ! /* converted to float* */ 861! 174 !#else 862! 177 !/* ARGSUSED */ 863! 178 !static void 864! 179 !i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */ 865! 180 ! const double *dummy2, /* 2^16 */ 866! 181 ! const double *dummy3, /* 0 */ 867! 182 ! double *result16, 868! 183 ! double *result32, 869! 184 ! float *src) /* source - should be unsigned int* */ 870! 185 ! /* converted to float* */ 871! 186 !{ 872! 187 ! uint32_t *i32; 873! 188 ! uint32_t a, b, c, d; 874! 190 ! i32 = (uint32_t *)src; 875! 191 ! a = i32[0]; 876! 192 ! b = i32[1]; 877! 193 ! c = i32[2]; 878! 194 ! d = i32[3]; 879! 195 ! result16[0] = (double)(a & 0xffff); 880! 196 ! result16[1] = (double)(a >> 16); 881! 197 ! result32[0] = (double)a; 882! 198 ! result16[2] = (double)(b & 0xffff); 883! 199 ! result16[3] = (double)(b >> 16); 884! 200 ! result32[1] = (double)b; 885! 201 ! result16[4] = (double)(c & 0xffff); 886! 202 ! result16[5] = (double)(c >> 16); 887! 203 ! result32[2] = (double)c; 888! 204 ! result16[6] = (double)(d & 0xffff); 889! 205 ! result16[7] = (double)(d >> 16); 890! 206 ! result32[3] = (double)d; 891! 207 !} 892! 209 !#endif 893! 212 !void 894! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len) 895! 214 !{ 896 897! 898! SUBROUTINE conv_i32_to_d32_and_d16 899! 900! OFFSET SOURCE LINE LABEL INSTRUCTION 901 902 .global conv_i32_to_d32_and_d16 903 conv_i32_to_d32_and_d16: 904/* 000000 214 */ save %sp,-368,%sp 905 906! 215 ! int i; 907! 216 ! uint32_t a; 908! 218 !#pragma pipeloop(0) 909! 219 ! for (i = 0; i < len - 3; i += 4) { 910! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, 911! 221 ! &(d16[2*i]), &(d32[i]), 912! 222 ! (float *)(&(i32[i]))); 913! 223 ! } 914! 224 ! for (; i < len; i++) { 915! 225 ! a = i32[i]; 916! 226 ! d32[i] = (double)(i32[i]); 917! 227 ! d16[2 * i] = (double)(a & 0xffff); 918! 228 ! d16[2 * i + 1] = (double)(a >> 16); 919 920/* 0x0004 228 */ sub %i3,3,%i4 921/* 0x0008 219 */ cmp %i4,0 922/* 0x000c */ ble,pn %icc,.L77000289 923/* 0x0010 */ or %g0,0,%i5 924 .L77000306: 925/* 0x0014 222 */ sethi %hi(Zero),%g3 926/* 0x0018 */ sethi %hi(TwoToMinus16),%g2 927/* 0x001c */ sethi %hi(TwoTo16),%o5 928/* 0x0020 */ ldd [%g3+%lo(Zero)],%f2 929/* 0x0024 219 */ sub %i3,4,%o4 930/* 0x0028 */ or %g0,0,%o3 931/* 0x002c */ or %g0,%i0,%l6 932/* 0x0030 */ or %g0,%i2,%l5 933 .L900000615: 934/* 0x0034 222 */ fmovd %f2,%f26 935/* 0x0038 */ ld [%l5],%f27 936/* 0x003c */ sra %o3,0,%o0 937/* 0x0040 */ add %i5,4,%i5 938/* 0x0044 */ fmovd %f2,%f28 939/* 0x0048 */ ld [%l5+4],%f29 940/* 0x004c */ sllx %o0,3,%g5 941/* 0x0050 */ cmp %i5,%o4 942/* 0x0054 */ fmovd %f2,%f30 943/* 0x0058 */ ld [%l5+8],%f31 944/* 0x005c */ add %i1,%g5,%g4 945/* 0x0060 */ add %o3,8,%o3 946/* 0x0064 */ ld [%l5+12],%f3 947/* 0x0068 */ fxtod %f26,%f26 948/* 0x006c */ ldd [%g2+%lo(TwoToMinus16)],%f32 949/* 0x0070 */ fxtod %f28,%f28 950/* 0x0074 */ add %l5,16,%l5 951/* 0x0078 */ fxtod %f30,%f30 952/* 0x007c */ ldd [%o5+%lo(TwoTo16)],%f34 953/* 0x0080 */ fxtod %f2,%f2 954/* 0x0084 */ std %f2,[%l6+24] 955/* 0x0088 */ fmuld %f32,%f26,%f36 956/* 0x008c */ std %f26,[%l6] 957/* 0x0090 */ fmuld %f32,%f28,%f38 958/* 0x0094 */ std %f28,[%l6+8] 959/* 0x0098 */ fmuld %f32,%f30,%f40 960/* 0x009c */ std %f30,[%l6+16] 961/* 0x00a0 */ fmuld %f32,%f2,%f42 962/* 0x00a4 */ add %l6,32,%l6 963/* 0x00a8 */ fdtox %f36,%f36 964/* 0x00ac */ fdtox %f38,%f38 965/* 0x00b0 */ fdtox %f40,%f40 966/* 0x00b4 */ fdtox %f42,%f42 967/* 0x00b8 */ fxtod %f36,%f36 968/* 0x00bc */ std %f36,[%g4+8] 969/* 0x00c0 */ fxtod %f38,%f38 970/* 0x00c4 */ std %f38,[%g4+24] 971/* 0x00c8 */ fxtod %f40,%f40 972/* 0x00cc */ std %f40,[%g4+40] 973/* 0x00d0 */ fxtod %f42,%f42 974/* 0x00d4 */ std %f42,[%g4+56] 975/* 0x00d8 */ fmuld %f36,%f34,%f36 976/* 0x00dc */ fmuld %f38,%f34,%f38 977/* 0x00e0 */ fmuld %f40,%f34,%f40 978/* 0x00e4 */ fmuld %f42,%f34,%f42 979/* 0x00e8 */ fsubd %f26,%f36,%f36 980/* 0x00ec */ std %f36,[%i1+%g5] 981/* 0x00f0 */ fsubd %f28,%f38,%f38 982/* 0x00f4 */ std %f38,[%g4+16] 983/* 0x00f8 */ fsubd %f30,%f40,%f40 984/* 0x00fc */ std %f40,[%g4+32] 985/* 0x0100 */ fsubd %f2,%f42,%f42 986/* 0x0104 */ std %f42,[%g4+48] 987/* 0x0108 */ ble,a,pt %icc,.L900000615 988/* 0x010c */ ldd [%g3+%lo(Zero)],%f2 989 .L77000289: 990/* 0x0110 224 */ cmp %i5,%i3 991/* 0x0114 */ bge,pn %icc,.L77000294 992/* 0x0118 */ sethi %hi(0xfc00),%l0 993 .L77000307: 994/* 0x011c 224 */ sra %i5,0,%l2 995/* 0x0120 */ sll %i5,1,%i4 996/* 0x0124 */ sllx %l2,3,%l1 997/* 0x0128 */ sllx %l2,2,%o1 998/* 0x012c 225 */ sub %i3,%i5,%l3 999/* 0x0130 224 */ add %l0,1023,%l0 1000/* 0x0134 */ add %l1,%i0,%l1 1001/* 0x0138 */ add %o1,%i2,%i2 1002/* 0x013c 225 */ cmp %l3,5 1003/* 0x0140 */ bl,pn %icc,.L77000291 1004/* 0x0144 0 */ sethi %hi(___const_seg_900000601),%l7 1005 .L900000612: 1006/* 0x0148 225 */ prefetch [%l1],22 1007/* 0x014c */ prefetch [%l1+64],22 1008/* 0x0150 */ sra %i4,0,%l6 1009/* 0x0154 226 */ sethi %hi(___const_seg_900000601+8),%l2 1010/* 0x0158 225 */ prefetch [%l1+128],22 1011/* 0x015c */ add %l6,-2,%l5 1012/* 0x0160 */ sub %i3,3,%i0 1013/* 0x0164 */ prefetch [%l1+192],22 1014/* 0x0168 */ sllx %l5,3,%o4 1015/* 0x016c 228 */ add %i5,1,%i5 1016/* 0x0170 225 */ add %i1,%o4,%o3 1017/* 0x0174 */ or %g0,%i3,%g1 1018/* 0x0178 */ ld [%i2],%l4 1019/* 0x017c */ prefetch [%o3+16],22 1020/* 0x0180 */ add %o3,16,%l3 1021/* 0x0184 228 */ add %i2,4,%i2 1022/* 0x0188 225 */ prefetch [%o3+80],22 1023/* 0x018c 228 */ srl %l4,16,%o1 1024/* 0x0190 227 */ and %l4,%l0,%o0 1025/* 0x0194 225 */ prefetch [%o3+144],22 1026/* 0x0198 228 */ st %o1,[%sp+2271] 1027/* 0x019c 227 */ st %o0,[%sp+2239] 1028/* 0x01a0 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 1029/* 0x01a4 228 */ ld [%l2+%lo(___const_seg_900000601+8)],%f0 1030/* 0x01a8 225 */ prefetch [%o3+208],22 1031/* 0x01ac */ prefetch [%o3+272],22 1032/* 0x01b0 */ prefetch [%o3+336],22 1033 .L900000610: 1034/* 0x01b4 225 */ prefetch [%l1+192],22 1035/* 0x01b8 228 */ add %i5,4,%i5 1036/* 0x01bc 225 */ add %l3,64,%l3 1037/* 0x01c0 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f8 1038/* 0x01c4 228 */ cmp %i5,%i0 1039/* 0x01c8 225 */ ld [%i2],%g5 1040/* 0x01cc 228 */ add %i2,16,%i2 1041/* 0x01d0 */ add %l1,32,%l1 1042/* 0x01d4 */ add %i4,8,%i4 1043/* 0x01d8 226 */ ld [%i2-20],%f7 1044/* 0x01dc 228 */ srl %g5,16,%i3 1045/* 0x01e0 226 */ fmovs %f8,%f6 1046/* 0x01e4 228 */ st %i3,[%sp+2335] 1047/* 0x01e8 227 */ and %g5,%l0,%g4 1048/* 0x01ec */ st %g4,[%sp+2303] 1049/* 0x01f0 226 */ fsubd %f6,%f32,%f40 1050/* 0x01f4 227 */ ld [%sp+2239],%f9 1051/* 0x01f8 228 */ ld [%sp+2271],%f1 1052/* 0x01fc */ fmovs %f8,%f12 1053/* 0x0200 226 */ std %f40,[%l1-32] 1054/* 0x0204 227 */ fsubd %f8,%f32,%f42 1055/* 0x0208 */ std %f42,[%l3-64] 1056/* 0x020c 228 */ fsubd %f0,%f32,%f44 1057/* 0x0210 */ std %f44,[%l3-56] 1058/* 0x0214 227 */ fmovs %f12,%f10 1059/* 0x0218 225 */ ld [%i2-12],%g2 1060/* 0x021c 226 */ ld [%i2-16],%f1 1061/* 0x0220 228 */ srl %g2,16,%g3 1062/* 0x0224 226 */ fmovs %f12,%f0 1063/* 0x0228 225 */ prefetch [%l3+320],22 1064/* 0x022c 228 */ st %g3,[%sp+2271] 1065/* 0x0230 227 */ and %g2,%l0,%l6 1066/* 0x0234 */ st %l6,[%sp+2239] 1067/* 0x0238 226 */ fsubd %f0,%f32,%f46 1068/* 0x023c 227 */ ld [%sp+2303],%f11 1069/* 0x0240 228 */ ld [%sp+2335],%f13 1070/* 0x0244 */ fmovs %f12,%f18 1071/* 0x0248 226 */ std %f46,[%l1-24] 1072/* 0x024c 227 */ fsubd %f10,%f32,%f48 1073/* 0x0250 */ std %f48,[%l3-48] 1074/* 0x0254 228 */ fsubd %f12,%f32,%f50 1075/* 0x0258 */ std %f50,[%l3-40] 1076/* 0x025c 227 */ fmovs %f18,%f16 1077/* 0x0260 225 */ ld [%i2-8],%o5 1078/* 0x0264 226 */ ld [%i2-12],%f15 1079/* 0x0268 228 */ srl %o5,16,%l5 1080/* 0x026c 226 */ fmovs %f18,%f14 1081/* 0x0270 228 */ st %l5,[%sp+2335] 1082/* 0x0274 227 */ and %o5,%l0,%o4 1083/* 0x0278 */ st %o4,[%sp+2303] 1084/* 0x027c 226 */ fsubd %f14,%f32,%f52 1085/* 0x0280 227 */ ld [%sp+2239],%f17 1086/* 0x0284 228 */ ld [%sp+2271],%f19 1087/* 0x0288 225 */ prefetch [%l3+352],22 1088/* 0x028c 228 */ fmovs %f18,%f24 1089/* 0x0290 226 */ std %f52,[%l1-16] 1090/* 0x0294 227 */ fsubd %f16,%f32,%f54 1091/* 0x0298 */ std %f54,[%l3-32] 1092/* 0x029c 228 */ fsubd %f18,%f32,%f56 1093/* 0x02a0 */ std %f56,[%l3-24] 1094/* 0x02a4 227 */ fmovs %f24,%f22 1095/* 0x02a8 225 */ ld [%i2-4],%l4 1096/* 0x02ac 226 */ ld [%i2-8],%f21 1097/* 0x02b0 228 */ srl %l4,16,%o3 1098/* 0x02b4 226 */ fmovs %f24,%f20 1099/* 0x02b8 228 */ st %o3,[%sp+2271] 1100/* 0x02bc 227 */ and %l4,%l0,%o2 1101/* 0x02c0 */ st %o2,[%sp+2239] 1102/* 0x02c4 226 */ fsubd %f20,%f32,%f58 1103/* 0x02c8 227 */ ld [%sp+2303],%f23 1104/* 0x02cc 228 */ ld [%sp+2335],%f25 1105/* 0x02d0 */ fmovs %f24,%f0 1106/* 0x02d4 226 */ std %f58,[%l1-8] 1107/* 0x02d8 227 */ fsubd %f22,%f32,%f60 1108/* 0x02dc */ std %f60,[%l3-16] 1109/* 0x02e0 228 */ fsubd %f24,%f32,%f62 1110/* 0x02e4 */ bl,pt %icc,.L900000610 1111/* 0x02e8 */ std %f62,[%l3-8] 1112 .L900000613: 1113/* 0x02ec 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 1114/* 0x02f0 228 */ add %l1,8,%l1 1115/* 0x02f4 */ cmp %i5,%g1 1116/* 0x02f8 226 */ ld [%i2-4],%f3 1117/* 0x02fc 225 */ or %g0,%g1,%i3 1118/* 0x0300 228 */ add %i4,2,%i4 1119/* 0x0304 227 */ ld [%sp+2239],%f5 1120/* 0x0308 226 */ fmovs %f4,%f2 1121/* 0x030c 228 */ ld [%sp+2271],%f1 1122/* 0x0310 226 */ fsubd %f2,%f32,%f34 1123/* 0x0314 */ std %f34,[%l1-8] 1124/* 0x0318 227 */ fsubd %f4,%f32,%f36 1125/* 0x031c */ std %f36,[%l3] 1126/* 0x0320 228 */ fsubd %f0,%f32,%f38 1127/* 0x0324 */ bge,pn %icc,.L77000294 1128/* 0x0328 */ std %f38,[%l3+8] 1129 .L77000291: 1130/* 0x032c 225 */ ld [%i2],%o2 1131 .L900000614: 1132/* 0x0330 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 1133/* 0x0334 228 */ srl %o2,16,%l3 1134/* 0x0338 227 */ sra %i4,0,%i0 1135/* 0x033c 228 */ st %l3,[%sp+2367] 1136/* 0x0340 227 */ and %o2,%l0,%g1 1137/* 0x0344 226 */ sethi %hi(___const_seg_900000601+8),%l2 1138/* 0x0348 227 */ st %g1,[%sp+2399] 1139/* 0x034c */ sllx %i0,3,%o0 1140/* 0x0350 228 */ add %i4,1,%l4 1141/* 0x0354 226 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 1142/* 0x0358 228 */ sra %l4,0,%o1 1143/* 0x035c */ add %i5,1,%i5 1144/* 0x0360 226 */ ld [%i2],%f5 1145/* 0x0364 228 */ sllx %o1,3,%g5 1146/* 0x0368 */ cmp %i5,%i3 1147/* 0x036c */ ld [%sp+2367],%f9 1148/* 0x0370 */ add %i2,4,%i2 1149/* 0x0374 */ add %i4,2,%i4 1150/* 0x0378 227 */ fmovs %f4,%f6 1151/* 0x037c 226 */ fsubd %f4,%f32,%f44 1152/* 0x0380 */ std %f44,[%l1] 1153/* 0x0384 227 */ ld [%sp+2399],%f7 1154/* 0x0388 228 */ fmovs %f6,%f8 1155/* 0x038c */ add %l1,8,%l1 1156/* 0x0390 */ fsubd %f8,%f32,%f48 1157/* 0x0394 227 */ fsubd %f6,%f32,%f46 1158/* 0x0398 */ std %f46,[%i1+%o0] 1159/* 0x039c 228 */ std %f48,[%i1+%g5] 1160/* 0x03a0 */ bl,a,pt %icc,.L900000614 1161/* 0x03a4 225 */ ld [%i2],%o2 1162 .L77000294: 1163/* 0x03a8 222 */ ret ! Result = 1164/* 0x03ac */ restore %g0,%g0,%g0 1165/* 0x03b0 0 */ .type conv_i32_to_d32_and_d16,2 1166/* 0x03b0 0 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) 1167 1168 .section ".text",#alloc,#execinstr 1169/* 000000 0 */ .align 32 1170 1171! 229 ! } 1172! 230 !} 1173! 232 !extern long long c1, c2, c3, c4; 1174! 234 !static void 1175! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len) 1176! 236 !{ 1177 1178! 1179! SUBROUTINE adjust_montf_result 1180! 1181! OFFSET SOURCE LINE LABEL INSTRUCTION 1182 1183 adjust_montf_result: 1184/* 000000 236 */ sra %o2,0,%g2 1185/* 0x0004 */ or %g0,%o0,%o4 1186 1187! 237 ! int64_t acc; 1188! 238 ! int i; 1189! 240 ! if (i32[len] > 0) { 1190 1191/* 0x0008 240 */ sllx %g2,2,%g3 1192/* 0x000c */ ld [%o0+%g3],%o0 1193/* 0x0010 */ cmp %o0,0 1194/* 0x0014 */ bleu,pn %icc,.L77000316 1195/* 0x0018 236 */ or %g0,%o1,%o5 1196 1197! 241 ! i = -1; 1198 1199 .L77000315: 1200/* 0x001c 241 */ sub %g2,1,%g3 1201/* 0x0020 */ ba .L900000712 1202/* 0x0024 249 */ cmp %g2,0 1203 1204! 242 ! } else { 1205! 243 ! for (i = len - 1; i >= 0; i--) { 1206 1207 .L77000316: 1208/* 0x0028 243 */ subcc %g2,1,%g3 1209/* 0x002c */ bneg,pn %icc,.L77000340 1210/* 0x0030 */ or %g0,%g3,%o3 1211 .L77000348: 1212/* 0x0034 243 */ sra %g3,0,%o1 1213/* 0x0038 */ sllx %o1,2,%g1 1214 1215! 244 ! if (i32[i] != nint[i]) break; 1216 1217/* 0x003c 244 */ ld [%g1+%o5],%g4 1218/* 0x0040 243 */ add %g1,%o4,%o2 1219/* 0x0044 */ add %g1,%o5,%o1 1220 .L900000713: 1221/* 0x0048 244 */ ld [%o2],%o0 1222/* 0x004c */ cmp %o0,%g4 1223/* 0x0050 */ bne,pn %icc,.L77000324 1224/* 0x0054 */ sub %o2,4,%o2 1225 .L77000320: 1226/* 0x0058 244 */ sub %o1,4,%o1 1227/* 0x005c */ subcc %o3,1,%o3 1228/* 0x0060 */ bpos,a,pt %icc,.L900000713 1229/* 0x0064 */ ld [%o1],%g4 1230 .L900000706: 1231/* 0x0068 244 */ ba .L900000712 1232/* 0x006c 249 */ cmp %g2,0 1233 .L77000324: 1234/* 0x0070 244 */ sra %o3,0,%o0 1235/* 0x0074 */ sllx %o0,2,%g1 1236/* 0x0078 */ ld [%o5+%g1],%o3 1237/* 0x007c */ ld [%o4+%g1],%g5 1238/* 0x0080 */ cmp %g5,%o3 1239/* 0x0084 */ bleu,pt %icc,.L77000332 1240/* 0x0088 */ nop 1241 1242! 245 ! } 1243! 246 ! } 1244! 247 ! if ((i < 0) || (i32[i] > nint[i])) { 1245! 248 ! acc = 0; 1246! 249 ! for (i = 0; i < len; i++) { 1247 1248 .L77000340: 1249/* 0x008c 249 */ cmp %g2,0 1250 .L900000712: 1251/* 0x0090 249 */ ble,pn %icc,.L77000332 1252/* 0x0094 250 */ or %g0,%g2,%o3 1253 .L77000347: 1254/* 0x0098 249 */ or %g0,0,%o0 1255 1256! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); 1257 1258/* 0x009c 250 */ cmp %o3,10 1259/* 0x00a0 */ bl,pn %icc,.L77000341 1260/* 0x00a4 249 */ or %g0,0,%g2 1261 .L900000709: 1262/* 0x00a8 250 */ prefetch [%o4],22 1263/* 0x00ac */ prefetch [%o4+64],22 1264 1265! 251 ! i32[i] = acc & 0xffffffff; 1266! 252 ! acc = acc >> 32; 1267 1268/* 0x00b0 252 */ add %o5,4,%o1 1269/* 0x00b4 */ add %o4,8,%o2 1270/* 0x00b8 250 */ prefetch [%o4+128],22 1271/* 0x00bc */ sub %o3,8,%o5 1272/* 0x00c0 */ or %g0,2,%o0 1273/* 0x00c4 */ prefetch [%o4+192],22 1274/* 0x00c8 */ prefetch [%o4+256],22 1275/* 0x00cc */ prefetch [%o4+320],22 1276/* 0x00d0 */ prefetch [%o4+384],22 1277/* 0x00d4 */ ld [%o2-4],%g5 1278/* 0x00d8 */ prefetch [%o2+440],22 1279/* 0x00dc */ prefetch [%o2+504],22 1280/* 0x00e0 */ ld [%o4],%g4 1281/* 0x00e4 */ ld [%o1-4],%o4 1282/* 0x00e8 */ sub %g4,%o4,%o3 1283/* 0x00ec 251 */ st %o3,[%o2-8] 1284/* 0x00f0 252 */ srax %o3,32,%g4 1285 .L900000707: 1286/* 0x00f4 252 */ add %o0,8,%o0 1287/* 0x00f8 */ add %o2,32,%o2 1288/* 0x00fc 250 */ ld [%o1],%g1 1289/* 0x0100 */ prefetch [%o2+496],22 1290/* 0x0104 252 */ cmp %o0,%o5 1291/* 0x0108 */ add %o1,32,%o1 1292/* 0x010c 250 */ sub %g5,%g1,%g5 1293/* 0x0110 */ add %g5,%g4,%o4 1294/* 0x0114 */ ld [%o2-32],%g4 1295/* 0x0118 251 */ st %o4,[%o2-36] 1296/* 0x011c 252 */ srax %o4,32,%g1 1297/* 0x0120 250 */ ld [%o1-28],%o3 1298/* 0x0124 */ sub %g4,%o3,%g2 1299/* 0x0128 */ add %g2,%g1,%g5 1300/* 0x012c */ ld [%o2-28],%o3 1301/* 0x0130 251 */ st %g5,[%o2-32] 1302/* 0x0134 252 */ srax %g5,32,%g4 1303/* 0x0138 250 */ ld [%o1-24],%o4 1304/* 0x013c */ sub %o3,%o4,%g1 1305/* 0x0140 */ add %g1,%g4,%g2 1306/* 0x0144 */ ld [%o2-24],%o3 1307/* 0x0148 251 */ st %g2,[%o2-28] 1308/* 0x014c 252 */ srax %g2,32,%g5 1309/* 0x0150 250 */ ld [%o1-20],%o4 1310/* 0x0154 */ sub %o3,%o4,%g4 1311/* 0x0158 */ add %g4,%g5,%g1 1312/* 0x015c */ ld [%o2-20],%o4 1313/* 0x0160 251 */ st %g1,[%o2-24] 1314/* 0x0164 252 */ srax %g1,32,%o3 1315/* 0x0168 250 */ ld [%o1-16],%g2 1316/* 0x016c */ sub %o4,%g2,%g5 1317/* 0x0170 */ add %g5,%o3,%g1 1318/* 0x0174 */ ld [%o2-16],%g4 1319/* 0x0178 251 */ st %g1,[%o2-20] 1320/* 0x017c 252 */ srax %g1,32,%o4 1321/* 0x0180 250 */ ld [%o1-12],%g2 1322/* 0x0184 */ sub %g4,%g2,%o3 1323/* 0x0188 */ add %o3,%o4,%g5 1324/* 0x018c */ ld [%o2-12],%g2 1325/* 0x0190 251 */ st %g5,[%o2-16] 1326/* 0x0194 252 */ srax %g5,32,%g4 1327/* 0x0198 250 */ ld [%o1-8],%g1 1328/* 0x019c */ sub %g2,%g1,%o4 1329/* 0x01a0 */ add %o4,%g4,%o3 1330/* 0x01a4 */ ld [%o2-8],%g2 1331/* 0x01a8 251 */ st %o3,[%o2-12] 1332/* 0x01ac 252 */ srax %o3,32,%g5 1333/* 0x01b0 250 */ ld [%o1-4],%g1 1334/* 0x01b4 */ sub %g2,%g1,%g4 1335/* 0x01b8 */ add %g4,%g5,%o4 1336/* 0x01bc */ ld [%o2-4],%g5 1337/* 0x01c0 251 */ st %o4,[%o2-8] 1338/* 0x01c4 252 */ ble,pt %icc,.L900000707 1339/* 0x01c8 */ srax %o4,32,%g4 1340 .L900000710: 1341/* 0x01cc 250 */ ld [%o1],%o3 1342/* 0x01d0 252 */ add %o1,4,%o5 1343/* 0x01d4 250 */ or %g0,%o2,%o4 1344/* 0x01d8 252 */ cmp %o0,%g3 1345/* 0x01dc 250 */ sub %g5,%o3,%g2 1346/* 0x01e0 */ add %g2,%g4,%g1 1347/* 0x01e4 251 */ st %g1,[%o2-4] 1348/* 0x01e8 252 */ bg,pn %icc,.L77000332 1349/* 0x01ec */ srax %g1,32,%g2 1350 .L77000341: 1351/* 0x01f0 250 */ ld [%o4],%g5 1352 .L900000711: 1353/* 0x01f4 250 */ ld [%o5],%o2 1354/* 0x01f8 */ add %g2,%g5,%g4 1355/* 0x01fc 252 */ add %o0,1,%o0 1356/* 0x0200 */ cmp %o0,%g3 1357/* 0x0204 */ add %o5,4,%o5 1358/* 0x0208 250 */ sub %g4,%o2,%o1 1359/* 0x020c 251 */ st %o1,[%o4] 1360/* 0x0210 252 */ srax %o1,32,%g2 1361/* 0x0214 */ add %o4,4,%o4 1362/* 0x0218 */ ble,a,pt %icc,.L900000711 1363/* 0x021c 250 */ ld [%o4],%g5 1364 .L77000332: 1365/* 0x0220 252 */ retl ! Result = 1366/* 0x0224 */ nop 1367/* 0x0228 0 */ .type adjust_montf_result,2 1368/* 0x0228 0 */ .size adjust_montf_result,(.-adjust_montf_result) 1369 1370 .section ".text",#alloc,#execinstr 1371/* 000000 0 */ .align 32 1372 1373! 253 ! } 1374! 254 ! } 1375! 255 !} 1376! 257 !/************* 1377! 258 !static void 1378! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len) 1379! 260 !{ 1380! 261 ! int64_t acc; 1381! 262 ! int i; 1382! 264 ! c4++; 1383! 265 ! 1384! 266 ! if (i32[len] > 0) { 1385! 267 ! i = -1; 1386! 268 ! c1++; 1387! 269 ! } else { 1388! 270 ! for (i = len - 1; i >= 0; i++) { 1389! 271 ! if (i32[i] != nint[i]) break; 1390! 272 ! c2++; 1391! 273 ! } 1392! 274 ! } 1393! 275 ! if ((i < 0) || (i32[i] > nint[i])) { 1394! 276 ! c3++; 1395! 277 ! acc = 0; 1396! 278 ! for (i = 0; i < len; i++) { 1397! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); 1398! 280 ! i32[i] = acc & 0xffffffff; 1399! 281 ! acc = acc >> 32; 1400! 282 ! } 1401! 283 ! } 1402! 284 !} 1403! 285 !uint32_t saveresult[1000]; 1404! 286 !void printarray(char *name, uint32_t *arr, int len) 1405! 287 !{ 1406! 288 ! int i, j; 1407! 289 ! uint64_t tmp; 1408! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2); 1409! 292 ! for(i=j=0; i<len; i+=2,j+=2){ 1410! 293 ! if(j == 6){ 1411! 294 ! printf("\n"); 1412! 295 ! j=0; 1413! 296 ! } 1414! 297 ! tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]); 1415! 298 ! printf("0x%016llx",tmp); 1416! 299 ! if((i/2)!=(((len+1)/2)-1))printf(","); 1417! 300 ! if(j!=4)printf(" "); 1418! 301 ! } 1419! 302 ! if(j!=0) printf("\n"); 1420! 303 ! printf("};\n"); 1421! 304 !} 1422! 305 !**************/ 1423! 308 !/* 1424! 309 ! * the lengths of the input arrays should be at least the following: 1425! 310 ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] 1426! 311 ! * all of them should be different from one another 1427! 312 ! */ 1428! 313 !void mont_mulf_noconv(uint32_t *result, 1429! 314 ! double *dm1, double *dm2, double *dt, 1430! 315 ! double *dn, uint32_t *nint, 1431! 316 ! int nlen, double dn0) 1432! 317 !{ 1433 1434! 1435! SUBROUTINE mont_mulf_noconv 1436! 1437! OFFSET SOURCE LINE LABEL INSTRUCTION 1438 1439 .global mont_mulf_noconv 1440 mont_mulf_noconv: 1441/* 000000 317 */ save %sp,-176,%sp 1442/* 0x0004 */ ldx [%fp+2223],%g1 1443/* 0x0008 0 */ sethi %hi(Zero),%l5 1444/* 0x000c 317 */ or %g0,%i2,%l0 1445 1446! 318 ! int i, j, jj; 1447! 319 ! double digit, m2j, a, b; 1448! 320 ! double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; 1449! 322 ! pdm1 = &(dm1[0]); 1450! 323 ! pdm2 = &(dm2[0]); 1451! 324 ! pdn = &(dn[0]); 1452! 325 ! pdm2[2 * nlen] = Zero; 1453 1454/* 0x0010 325 */ ldd [%l5+%lo(Zero)],%f0 1455/* 0x0014 317 */ or %g0,%i0,%i2 1456/* 0x0018 325 */ sll %g1,1,%o3 1457 1458! 327 ! if (nlen != 16) { 1459 1460/* 0x001c 327 */ cmp %g1,16 1461/* 0x0020 325 */ sra %o3,0,%i0 1462/* 0x0024 */ sllx %i0,3,%o0 1463/* 0x0028 317 */ or %g0,%i5,%i0 1464/* 0x002c 327 */ bne,pn %icc,.L77000476 1465/* 0x0030 325 */ std %f0,[%l0+%o0] 1466 .L77000488: 1467/* 0x0034 0 */ sethi %hi(TwoToMinus16),%o2 1468/* 0x0038 0 */ sethi %hi(TwoTo16),%l3 1469 1470! 328 ! for (i = 0; i < 4 * nlen + 2; i++) 1471! 329 ! dt[i] = Zero; 1472! 330 ! a = dt[0] = pdm1[0] * pdm2[0]; 1473! 331 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); 1474! 333 ! pdtj = &(dt[0]); 1475! 334 ! for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) { 1476! 335 ! m2j = pdm2[j]; 1477! 336 ! a = pdtj[0] + pdn[0] * digit; 1478! 337 ! b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16; 1479! 338 ! pdtj[1] = b; 1480! 340 !#pragma pipeloop(0) 1481! 341 ! for (i = 1; i < nlen; i++) { 1482! 342 ! pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit; 1483! 343 ! } 1484! 344 ! if (jj == 15) { 1485! 345 ! cleanup(dt, j / 2 + 1, 2 * nlen + 1); 1486! 346 ! jj = 0; 1487! 347 ! } 1488! 349 ! digit = mod(lower32(b, Zero) * dn0, 1489! 350 ! TwoToMinus16, TwoTo16); 1490! 351 ! } 1491! 352 ! } else { 1492! 353 ! a = dt[0] = pdm1[0] * pdm2[0]; 1493 1494/* 0x003c 353 */ ldd [%i1],%f40 1495 1496! 355 ! dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] = 1497! 356 ! dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = 1498! 357 ! dt[54] = dt[53] = dt[52] = dt[51] = dt[50] = 1499! 358 ! dt[49] = dt[48] = dt[47] = dt[46] = dt[45] = 1500! 359 ! dt[44] = dt[43] = dt[42] = dt[41] = dt[40] = 1501! 360 ! dt[39] = dt[38] = dt[37] = dt[36] = dt[35] = 1502! 361 ! dt[34] = dt[33] = dt[32] = dt[31] = dt[30] = 1503! 362 ! dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = 1504! 363 ! dt[24] = dt[23] = dt[22] = dt[21] = dt[20] = 1505! 364 ! dt[19] = dt[18] = dt[17] = dt[16] = dt[15] = 1506! 365 ! dt[14] = dt[13] = dt[12] = dt[11] = dt[10] = 1507! 366 ! dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] = 1508! 367 ! dt[3] = dt[2] = dt[1] = Zero; 1509! 369 ! pdn_0 = pdn[0]; 1510! 370 ! pdm1_0 = pdm1[0]; 1511! 372 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); 1512! 373 ! pdtj = &(dt[0]); 1513 1514/* 0x0040 373 */ or %g0,%i3,%o3 1515 1516! 375 ! for (j = 0; j < 32; j++, pdtj++) { 1517 1518/* 0x0044 375 */ or %g0,0,%l1 1519/* 0x0048 353 */ ldd [%l0],%f42 1520/* 0x004c 372 */ ldd [%o2+%lo(TwoToMinus16)],%f44 1521/* 0x0050 */ ldd [%l3+%lo(TwoTo16)],%f46 1522/* 0x0054 367 */ std %f0,[%i3+8] 1523/* 0x0058 353 */ fmuld %f40,%f42,%f38 1524/* 0x005c */ std %f38,[%i3] 1525/* 0x0060 367 */ std %f0,[%i3+16] 1526/* 0x0064 */ std %f0,[%i3+24] 1527/* 0x0068 */ std %f0,[%i3+32] 1528/* 0x006c 372 */ fdtox %f38,%f4 1529/* 0x0070 367 */ std %f0,[%i3+40] 1530/* 0x0074 */ std %f0,[%i3+48] 1531/* 0x0078 */ std %f0,[%i3+56] 1532/* 0x007c 372 */ fmovs %f0,%f4 1533/* 0x0080 367 */ std %f0,[%i3+64] 1534/* 0x0084 */ std %f0,[%i3+72] 1535/* 0x0088 372 */ fxtod %f4,%f52 1536/* 0x008c 367 */ std %f0,[%i3+80] 1537/* 0x0090 */ std %f0,[%i3+88] 1538/* 0x0094 */ std %f0,[%i3+96] 1539/* 0x0098 */ std %f0,[%i3+104] 1540/* 0x009c 372 */ fmuld %f52,%f14,%f60 1541/* 0x00a0 367 */ std %f0,[%i3+112] 1542/* 0x00a4 */ std %f0,[%i3+120] 1543/* 0x00a8 */ std %f0,[%i3+128] 1544/* 0x00ac */ std %f0,[%i3+136] 1545/* 0x00b0 372 */ fmuld %f60,%f44,%f62 1546/* 0x00b4 367 */ std %f0,[%i3+144] 1547/* 0x00b8 */ std %f0,[%i3+152] 1548/* 0x00bc */ std %f0,[%i3+160] 1549/* 0x00c0 */ std %f0,[%i3+168] 1550/* 0x00c4 372 */ fdtox %f62,%f32 1551/* 0x00c8 367 */ std %f0,[%i3+176] 1552/* 0x00cc */ std %f0,[%i3+184] 1553/* 0x00d0 */ std %f0,[%i3+192] 1554/* 0x00d4 */ std %f0,[%i3+200] 1555/* 0x00d8 372 */ fxtod %f32,%f50 1556/* 0x00dc 367 */ std %f0,[%i3+208] 1557/* 0x00e0 */ std %f0,[%i3+216] 1558/* 0x00e4 */ std %f0,[%i3+224] 1559/* 0x00e8 */ std %f0,[%i3+232] 1560/* 0x00ec 372 */ fmuld %f50,%f46,%f34 1561/* 0x00f0 367 */ std %f0,[%i3+240] 1562/* 0x00f4 */ std %f0,[%i3+248] 1563/* 0x00f8 */ std %f0,[%i3+256] 1564/* 0x00fc */ std %f0,[%i3+264] 1565/* 0x0100 372 */ fsubd %f60,%f34,%f40 1566/* 0x0104 367 */ std %f0,[%i3+272] 1567/* 0x0108 */ std %f0,[%i3+280] 1568/* 0x010c */ std %f0,[%i3+288] 1569/* 0x0110 */ std %f0,[%i3+296] 1570/* 0x0114 */ std %f0,[%i3+304] 1571/* 0x0118 */ std %f0,[%i3+312] 1572/* 0x011c */ std %f0,[%i3+320] 1573/* 0x0120 */ std %f0,[%i3+328] 1574/* 0x0124 */ std %f0,[%i3+336] 1575/* 0x0128 */ std %f0,[%i3+344] 1576/* 0x012c */ std %f0,[%i3+352] 1577/* 0x0130 */ std %f0,[%i3+360] 1578/* 0x0134 */ std %f0,[%i3+368] 1579/* 0x0138 375 */ sub %g1,1,%l3 1580/* 0x013c */ add %i3,8,%o7 1581/* 0x0140 367 */ std %f0,[%i3+376] 1582/* 0x0144 */ std %f0,[%i3+384] 1583/* 0x0148 */ std %f0,[%i3+392] 1584/* 0x014c */ std %f0,[%i3+400] 1585/* 0x0150 */ std %f0,[%i3+408] 1586/* 0x0154 */ std %f0,[%i3+416] 1587/* 0x0158 */ std %f0,[%i3+424] 1588/* 0x015c */ std %f0,[%i3+432] 1589/* 0x0160 */ std %f0,[%i3+440] 1590/* 0x0164 */ std %f0,[%i3+448] 1591/* 0x0168 */ std %f0,[%i3+456] 1592/* 0x016c */ std %f0,[%i3+464] 1593/* 0x0170 */ std %f0,[%i3+472] 1594/* 0x0174 */ std %f0,[%i3+480] 1595/* 0x0178 */ std %f0,[%i3+488] 1596/* 0x017c */ std %f0,[%i3+496] 1597/* 0x0180 */ std %f0,[%i3+504] 1598/* 0x0184 */ std %f0,[%i3+512] 1599/* 0x0188 */ std %f0,[%i3+520] 1600 1601!BEGIN HAND CODED PART 1602 1603! cheetah schedule, no even-odd trick 1604 1605 1606 add %i3,%g0,%o5 1607 1608 fmovd %f40,%f0 1609 fmovd %f14,%f2 1610 fmovd %f44,%f8 1611 sethi %hi(TwoTo32),%l5 1612 fmovd %f46,%f10 1613 sethi %hi(TwoToMinus32),%g5 1614 ldd [%i3],%f6 1615 ldd [%l0],%f4 1616 1617 ldd [%i1],%f40 1618 ldd [%i1+8],%f42 1619 ldd [%i1+16],%f52 1620 ldd [%i1+48],%f54 1621 ldd [%i1+56],%f36 1622 ldd [%i1+64],%f56 1623 ldd [%i1+104],%f48 1624 ldd [%i1+112],%f58 1625 1626 ldd [%i4],%f44 1627 ldd [%i4+8],%f46 1628 ldd [%i4+104],%f50 1629 ldd [%i4+112],%f60 1630 1631 1632 .L99999999: 1633!1 1634 ldd [%i1+24],%f20 1635 fmuld %f0,%f44,%f12 1636!2 1637 ldd [%i4+24],%f22 1638 fmuld %f42,%f4,%f16 1639!3 1640 ldd [%i1+40],%f24 1641 fmuld %f46,%f0,%f18 1642!4 1643 ldd [%i4+40],%f26 1644 fmuld %f20,%f4,%f20 1645!5 1646 ldd [%l0+8],%f38 1647 faddd %f12,%f6,%f12 1648 fmuld %f22,%f0,%f22 1649!6 1650 add %l0,8,%l0 1651 ldd [%i4+56],%f30 1652 fmuld %f24,%f4,%f24 1653!7 1654 ldd [%i1+72],%f32 1655 faddd %f16,%f18,%f16 1656 fmuld %f26,%f0,%f26 1657!8 1658 ldd [%i3+16],%f18 1659 fmuld %f40,%f38,%f14 1660!9 1661 ldd [%i4+72],%f34 1662 faddd %f20,%f22,%f20 1663 fmuld %f8,%f12,%f12 1664!10 1665 ldd [%i3+48],%f22 1666 fmuld %f36,%f4,%f28 1667!11 1668 ldd [%i3+8],%f6 1669 faddd %f16,%f18,%f16 1670 fmuld %f30,%f0,%f30 1671!12 1672 std %f16,[%i3+16] 1673 faddd %f24,%f26,%f24 1674 fmuld %f32,%f4,%f32 1675!13 1676 ldd [%i3+80],%f26 1677 faddd %f12,%f14,%f12 1678 fmuld %f34,%f0,%f34 1679!14 1680 ldd [%i1+88],%f16 1681 faddd %f20,%f22,%f20 1682!15 1683 ldd [%i4+88],%f18 1684 faddd %f28,%f30,%f28 1685!16 1686 ldd [%i3+112],%f30 1687 faddd %f32,%f34,%f32 1688!17 1689 ldd [%i3+144],%f34 1690 faddd %f12,%f6,%f6 1691 fmuld %f16,%f4,%f16 1692!18 1693 std %f20,[%i3+48] 1694 faddd %f24,%f26,%f24 1695 fmuld %f18,%f0,%f18 1696!19 1697 std %f24,[%i3+80] 1698 faddd %f28,%f30,%f28 1699 fmuld %f48,%f4,%f20 1700!20 1701 std %f28,[%i3+112] 1702 faddd %f32,%f34,%f32 1703 fmuld %f50,%f0,%f22 1704!21 1705 ldd [%i1+120],%f24 1706 fdtox %f6,%f12 1707!22 1708 std %f32,[%i3+144] 1709 faddd %f16,%f18,%f16 1710!23 1711 ldd [%i4+120],%f26 1712!24 1713 ldd [%i3+176],%f18 1714 faddd %f20,%f22,%f20 1715 fmuld %f24,%f4,%f24 1716!25 1717 ldd [%i4+16],%f30 1718 fmovs %f11,%f12 1719!26 1720 ldd [%i1+32],%f32 1721 fmuld %f26,%f0,%f26 1722!27 1723 ldd [%i4+32],%f34 1724 fmuld %f52,%f4,%f28 1725!28 1726 ldd [%i3+208],%f22 1727 faddd %f16,%f18,%f16 1728 fmuld %f30,%f0,%f30 1729!29 1730 std %f16,[%i3+176] 1731 fxtod %f12,%f12 1732 fmuld %f32,%f4,%f32 1733!30 1734 ldd [%i4+48],%f18 1735 faddd %f24,%f26,%f24 1736 fmuld %f34,%f0,%f34 1737!31 1738 ldd [%i3+240],%f26 1739 faddd %f20,%f22,%f20 1740!32 1741 std %f20,[%i3+208] 1742 faddd %f28,%f30,%f28 1743 fmuld %f54,%f4,%f16 1744!33 1745 ldd [%i3+32],%f30 1746 fmuld %f12,%f2,%f14 1747!34 1748 ldd [%i4+64],%f22 1749 faddd %f32,%f34,%f32 1750 fmuld %f18,%f0,%f18 1751!35 1752 ldd [%i3+64],%f34 1753 faddd %f24,%f26,%f24 1754!36 1755 std %f24,[%i3+240] 1756 faddd %f28,%f30,%f28 1757 fmuld %f56,%f4,%f20 1758!37 1759 std %f28,[%i3+32] 1760 fmuld %f14,%f8,%f12 1761!38 1762 ldd [%i1+80],%f24 1763 faddd %f32,%f34,%f34 ! yes, tmp52! 1764 fmuld %f22,%f0,%f22 1765!39 1766 ldd [%i4+80],%f26 1767 faddd %f16,%f18,%f16 1768!40 1769 ldd [%i1+96],%f28 1770 fmuld %f58,%f4,%f32 1771!41 1772 ldd [%i4+96],%f30 1773 fdtox %f12,%f12 1774 fmuld %f24,%f4,%f24 1775!42 1776 std %f34,[%i3+64] ! yes, tmp52! 1777 faddd %f20,%f22,%f20 1778 fmuld %f26,%f0,%f26 1779!43 1780 ldd [%i3+96],%f18 1781 fmuld %f28,%f4,%f28 1782!44 1783 ldd [%i3+128],%f22 1784 fmovd %f38,%f4 1785 fmuld %f30,%f0,%f30 1786!45 1787 fxtod %f12,%f12 1788 fmuld %f60,%f0,%f34 1789!46 1790 add %i3,8,%i3 1791 faddd %f24,%f26,%f24 1792!47 1793 ldd [%i3+160-8],%f26 1794 faddd %f16,%f18,%f16 1795!48 1796 std %f16,[%i3+96-8] 1797 faddd %f28,%f30,%f28 1798!49 1799 ldd [%i3+192-8],%f30 1800 faddd %f32,%f34,%f32 1801 fmuld %f12,%f10,%f12 1802!50 1803 ldd [%i3+224-8],%f34 1804 faddd %f20,%f22,%f20 1805!51 1806 std %f20,[%i3+128-8] 1807 faddd %f24,%f26,%f24 1808!52 1809 add %l1,1,%l1 1810 std %f24,[%i3+160-8] 1811 faddd %f28,%f30,%f28 1812!53 1813 cmp %l1,15 1814 std %f28,[%i3+192-8] 1815 fsubd %f14,%f12,%f0 1816!54 1817 faddd %f32,%f34,%f32 1818 ble,pt %icc,.L99999999 1819 std %f32,[%i3+224-8] 1820 1821 1822! 1823 ldd [%g5+%lo(TwoToMinus32)],%f8 1824! 1825 ldd [%i3+8],%f16 1826! 1827 ldd [%i3+16],%f20 1828! 1829 fmuld %f8,%f16,%f18 1830 ldd [%i3+24],%f24 1831! 1832 fmuld %f8,%f20,%f22 1833 ldd [%i3+32],%f28 1834! 1835 fmuld %f8,%f24,%f26 1836 ldd [%l5+%lo(TwoTo32)],%f10 1837! 1838 fmuld %f8,%f28,%f30 1839! 1840 fdtox %f18,%f18 1841! 1842 fdtox %f22,%f22 1843! 1844 fdtox %f26,%f26 1845 ldd [%i3+40],%f32 1846! 1847 fdtox %f30,%f30 1848 ldd [%i3+48],%f56 1849! 1850 fxtod %f18,%f18 1851 fmuld %f8,%f32,%f34 1852 ldd [%i3+56],%f36 1853! 1854 fxtod %f22,%f22 1855 fmuld %f8,%f56,%f58 1856 ldd [%i3+64],%f38 1857! 1858 fxtod %f26,%f26 1859 fmuld %f8,%f36,%f60 1860! 1861 fxtod %f30,%f30 1862 fmuld %f8,%f38,%f62 1863! 1864 fdtox %f34,%f34 1865 fmuld %f10,%f18,%f40 1866! 1867 fdtox %f58,%f58 1868 fmuld %f10,%f22,%f42 1869! 1870 fdtox %f60,%f60 1871 fmuld %f10,%f26,%f44 1872! 1873 fdtox %f62,%f62 1874 fmuld %f10,%f30,%f46 1875! 1876 fxtod %f34,%f34 1877! 1878 fxtod %f58,%f58 1879! 1880 fxtod %f60,%f60 1881! 1882 fxtod %f62,%f62 1883! 1884 fsubd %f16,%f40,%f40 1885 fmuld %f10,%f34,%f48 1886! 1887 fsubd %f20,%f42,%f42 1888 fmuld %f10,%f58,%f50 1889! 1890 fsubd %f24,%f44,%f44 1891 fmuld %f10,%f60,%f52 1892! 1893 fsubd %f28,%f46,%f46 1894 fmuld %f10,%f62,%f54 1895! 1896 std %f40,[%i3+8] 1897! 1898 std %f42,[%i3+16] 1899! 1900 faddd %f18,%f44,%f44 1901 std %f44,[%i3+24] 1902! 1903 faddd %f22,%f46,%f46 1904 std %f46,[%i3+32] 1905! 1906 1907 1908 1909 fsubd %f32,%f48,%f48 1910 ldd [%i3+64+8],%f16 1911! 1912 fsubd %f56,%f50,%f50 1913 ldd [%i3+64+16],%f20 1914! 1915 fsubd %f36,%f52,%f52 1916 ldd [%i3+64+24],%f24 1917! 1918 fsubd %f38,%f54,%f54 1919 ldd [%i3+64+32],%f28 1920! 1921 faddd %f26,%f48,%f48 1922 fmuld %f8,%f16,%f18 1923 std %f48,[%i3+40] 1924! 1925 faddd %f30,%f50,%f50 1926 fmuld %f8,%f20,%f22 1927 std %f50,[%i3+48] 1928! 1929 faddd %f34,%f52,%f52 1930 fmuld %f8,%f24,%f26 1931 std %f52,[%i3+56] 1932! 1933 faddd %f58,%f54,%f54 1934 fmuld %f8,%f28,%f30 1935 std %f54,[%i3+64] 1936! 1937 1938 1939 fdtox %f18,%f18 1940! 1941 fdtox %f22,%f22 1942! 1943 fdtox %f26,%f26 1944 ldd [%i3+64+40],%f32 1945! 1946 fdtox %f30,%f30 1947 ldd [%i3+64+48],%f56 1948! 1949 fxtod %f18,%f18 1950 fmuld %f8,%f32,%f34 1951 ldd [%i3+64+56],%f36 1952! 1953 fxtod %f22,%f22 1954 fmuld %f8,%f56,%f58 1955 ldd [%i3+64+64],%f38 1956! 1957 fxtod %f26,%f26 1958 fmuld %f8,%f36,%f12 1959! 1960 fxtod %f30,%f30 1961 fmuld %f8,%f38,%f14 1962! 1963 fdtox %f34,%f34 1964 fmuld %f10,%f18,%f40 1965! 1966 fdtox %f58,%f58 1967 fmuld %f10,%f22,%f42 1968! 1969 fdtox %f12,%f12 1970 fmuld %f10,%f26,%f44 1971! 1972 fdtox %f14,%f14 1973 fmuld %f10,%f30,%f46 1974! 1975 fxtod %f34,%f34 1976! 1977 fxtod %f58,%f58 1978! 1979 fxtod %f12,%f12 1980! 1981 fxtod %f14,%f14 1982! 1983 fsubd %f16,%f40,%f40 1984 fmuld %f10,%f34,%f48 1985! 1986 fsubd %f20,%f42,%f42 1987 fmuld %f10,%f58,%f50 1988! 1989 fsubd %f24,%f44,%f44 1990 fmuld %f10,%f12,%f52 1991! 1992 fsubd %f28,%f46,%f46 1993 fmuld %f10,%f14,%f54 1994! 1995 faddd %f60,%f40,%f40 1996 std %f40,[%i3+64+8] 1997! 1998 faddd %f62,%f42,%f42 1999 std %f42,[%i3+64+16] 2000! 2001 faddd %f18,%f44,%f44 2002 std %f44,[%i3+64+24] 2003! 2004 faddd %f22,%f46,%f46 2005 std %f46,[%i3+64+32] 2006! 2007 2008 2009 2010 fsubd %f32,%f48,%f48 2011 ldd [%i3+64+64+8],%f16 2012! 2013 fsubd %f56,%f50,%f50 2014 ldd [%i3+64+64+16],%f20 2015! 2016 fsubd %f36,%f52,%f52 2017 ldd [%i3+64+64+24],%f24 2018! 2019 fsubd %f38,%f54,%f54 2020 ldd [%i3+64+64+32],%f28 2021! 2022 faddd %f26,%f48,%f48 2023 fmuld %f8,%f16,%f18 2024 std %f48,[%i3+64+40] 2025! 2026 faddd %f30,%f50,%f50 2027 fmuld %f8,%f20,%f22 2028 std %f50,[%i3+64+48] 2029! 2030 faddd %f34,%f52,%f52 2031 fmuld %f8,%f24,%f26 2032 std %f52,[%i3+64+56] 2033! 2034 faddd %f58,%f54,%f54 2035 fmuld %f8,%f28,%f30 2036 std %f54,[%i3+64+64] 2037! 2038 2039 2040 2041 fdtox %f18,%f18 2042! 2043 fdtox %f22,%f22 2044! 2045 fdtox %f26,%f26 2046 ldd [%i3+64+64+40],%f32 2047! 2048 fdtox %f30,%f30 2049 ldd [%i3+64+64+48],%f56 2050! 2051 fxtod %f18,%f18 2052 fmuld %f8,%f32,%f34 2053 ldd [%i3+64+64+56],%f36 2054! 2055 fxtod %f22,%f22 2056 fmuld %f8,%f56,%f58 2057 ldd [%i3+64+64+64],%f38 2058! 2059 fxtod %f26,%f26 2060 fmuld %f8,%f36,%f60 2061! 2062 fxtod %f30,%f30 2063 fmuld %f8,%f38,%f62 2064! 2065 fdtox %f34,%f34 2066 fmuld %f10,%f18,%f40 2067! 2068 fdtox %f58,%f58 2069 fmuld %f10,%f22,%f42 2070! 2071 fdtox %f60,%f60 2072 fmuld %f10,%f26,%f44 2073! 2074 fdtox %f62,%f62 2075 fmuld %f10,%f30,%f46 2076! 2077 fxtod %f34,%f34 2078! 2079 fxtod %f58,%f58 2080! 2081 fxtod %f60,%f60 2082! 2083 fxtod %f62,%f62 2084! 2085 fsubd %f16,%f40,%f40 2086 fmuld %f10,%f34,%f48 2087! 2088 fsubd %f20,%f42,%f42 2089 fmuld %f10,%f58,%f50 2090! 2091 fsubd %f24,%f44,%f44 2092 fmuld %f10,%f60,%f52 2093! 2094 fsubd %f28,%f46,%f46 2095 fmuld %f10,%f62,%f54 2096! 2097 faddd %f12,%f40,%f40 2098 std %f40,[%i3+64+64+8] 2099! 2100 faddd %f14,%f42,%f42 2101 std %f42,[%i3+64+64+16] 2102! 2103 faddd %f18,%f44,%f44 2104 std %f44,[%i3+64+64+24] 2105! 2106 faddd %f22,%f46,%f46 2107 std %f46,[%i3+64+64+32] 2108! 2109 2110 2111 fsubd %f32,%f48,%f48 2112 ldd [%i3+64+64+64+8],%f16 2113! 2114 fsubd %f56,%f50,%f50 2115 ldd [%i3+64+64+64+16],%f20 2116! 2117 fsubd %f36,%f52,%f52 2118 ldd [%i3+64+64+64+24],%f24 2119! 2120 fsubd %f38,%f54,%f54 2121 ldd [%i3+64+64+64+32],%f28 2122! 2123 faddd %f26,%f48,%f48 2124 fmuld %f8,%f16,%f18 2125 std %f48,[%i3+64+64+40] 2126! 2127 faddd %f30,%f50,%f50 2128 fmuld %f8,%f20,%f22 2129 std %f50,[%i3+64+64+48] 2130! 2131 faddd %f34,%f52,%f52 2132 fmuld %f8,%f24,%f26 2133 std %f52,[%i3+64+64+56] 2134! 2135 faddd %f58,%f54,%f54 2136 fmuld %f8,%f28,%f30 2137 std %f54,[%i3+64+64+64] 2138! 2139 2140 2141 fdtox %f18,%f18 2142! 2143 fdtox %f22,%f22 2144! 2145 fdtox %f26,%f26 2146 ldd [%i3+64+64+64+40],%f32 2147! 2148 fdtox %f30,%f30 2149 ldd [%i3+64+64+64+48],%f56 2150! 2151 fxtod %f18,%f18 2152 fmuld %f8,%f32,%f34 2153 ldd [%i3+64+64+64+56],%f36 2154! 2155 fxtod %f22,%f22 2156 fmuld %f8,%f56,%f58 2157 ldd [%i3+64+64+64+64],%f38 2158! 2159 fxtod %f26,%f26 2160 fmuld %f8,%f36,%f12 2161! 2162 fxtod %f30,%f30 2163 fmuld %f8,%f38,%f14 2164! 2165 fdtox %f34,%f34 2166 fmuld %f10,%f18,%f40 2167! 2168 fdtox %f58,%f58 2169 fmuld %f10,%f22,%f42 2170! 2171 fdtox %f12,%f12 2172 fmuld %f10,%f26,%f44 2173! 2174 fdtox %f14,%f14 2175 fmuld %f10,%f30,%f46 2176! 2177 sethi %hi(TwoToMinus16),%g5 2178 fxtod %f34,%f34 2179! 2180 sethi %hi(TwoTo16),%l5 2181 fxtod %f58,%f58 2182! 2183 fxtod %f12,%f12 2184! 2185 fxtod %f14,%f14 2186! 2187 fsubd %f16,%f40,%f16 2188 fmuld %f10,%f34,%f48 2189 ldd [%g5+%lo(TwoToMinus16)],%f8 2190! 2191 fsubd %f20,%f42,%f20 2192 fmuld %f10,%f58,%f50 2193 ldd [%i1],%f40 ! should be %f40 2194! 2195 fsubd %f24,%f44,%f24 2196 fmuld %f10,%f12,%f52 2197 ldd [%i1+8],%f42 ! should be %f42 2198! 2199 fsubd %f28,%f46,%f28 2200 fmuld %f10,%f14,%f54 2201 ldd [%i4],%f44 ! should be %f44 2202! 2203 faddd %f60,%f16,%f16 2204 std %f16,[%i3+64+64+64+8] 2205! 2206 faddd %f62,%f20,%f20 2207 std %f20,[%i3+64+64+64+16] 2208! 2209 faddd %f18,%f24,%f24 2210 std %f24,[%i3+64+64+64+24] 2211! 2212 faddd %f22,%f28,%f28 2213 std %f28,[%i3+64+64+64+32] 2214! 2215 fsubd %f32,%f48,%f32 2216 ldd [%i4+8],%f46 ! should be %f46 2217! 2218 fsubd %f56,%f50,%f56 2219 ldd [%i1+104],%f48 ! should be %f48 2220! 2221 fsubd %f36,%f52,%f36 2222 ldd [%i4+104],%f50 ! should be %f50 2223! 2224 fsubd %f38,%f54,%f38 2225 ldd [%i1+16],%f52 ! should be %f52 2226! 2227 faddd %f26,%f32,%f32 2228 std %f32,[%i3+64+64+64+40] 2229! 2230 faddd %f30,%f56,%f56 2231 std %f56,[%i3+64+64+64+48] 2232! 2233 faddd %f34,%f36,%f36 2234 std %f36,[%i3+64+64+64+56] 2235! 2236 faddd %f58,%f38,%f38 2237 std %f38,[%i3+64+64+64+64] 2238! 2239 std %f12,[%i3+64+64+64+64+8] 2240! 2241 std %f14,[%i3+64+64+64+64+16] 2242! 2243 2244 ldd [%l5+%lo(TwoTo16)],%f10 2245 ldd [%i1+48],%f54 2246 ldd [%i1+56],%f36 2247 ldd [%i1+64],%f56 2248 ldd [%i1+112],%f58 2249 2250 ldd [%i4+104],%f50 2251 ldd [%i4+112],%f60 2252 2253 2254 .L99999998: 2255!1 2256 ldd [%i1+24],%f20 2257 fmuld %f0,%f44,%f12 2258!2 2259 ldd [%i4+24],%f22 2260 fmuld %f42,%f4,%f16 2261!3 2262 ldd [%i1+40],%f24 2263 fmuld %f46,%f0,%f18 2264!4 2265 ldd [%i4+40],%f26 2266 fmuld %f20,%f4,%f20 2267!5 2268 ldd [%l0+8],%f38 2269 faddd %f12,%f6,%f12 2270 fmuld %f22,%f0,%f22 2271!6 2272 add %l0,8,%l0 2273 ldd [%i4+56],%f30 2274 fmuld %f24,%f4,%f24 2275!7 2276 ldd [%i1+72],%f32 2277 faddd %f16,%f18,%f16 2278 fmuld %f26,%f0,%f26 2279!8 2280 ldd [%i3+16],%f18 2281 fmuld %f40,%f38,%f14 2282!9 2283 ldd [%i4+72],%f34 2284 faddd %f20,%f22,%f20 2285 fmuld %f8,%f12,%f12 2286!10 2287 ldd [%i3+48],%f22 2288 fmuld %f36,%f4,%f28 2289!11 2290 ldd [%i3+8],%f6 2291 faddd %f16,%f18,%f16 2292 fmuld %f30,%f0,%f30 2293!12 2294 std %f16,[%i3+16] 2295 faddd %f24,%f26,%f24 2296 fmuld %f32,%f4,%f32 2297!13 2298 ldd [%i3+80],%f26 2299 faddd %f12,%f14,%f12 2300 fmuld %f34,%f0,%f34 2301!14 2302 ldd [%i1+88],%f16 2303 faddd %f20,%f22,%f20 2304!15 2305 ldd [%i4+88],%f18 2306 faddd %f28,%f30,%f28 2307!16 2308 ldd [%i3+112],%f30 2309 faddd %f32,%f34,%f32 2310!17 2311 ldd [%i3+144],%f34 2312 faddd %f12,%f6,%f6 2313 fmuld %f16,%f4,%f16 2314!18 2315 std %f20,[%i3+48] 2316 faddd %f24,%f26,%f24 2317 fmuld %f18,%f0,%f18 2318!19 2319 std %f24,[%i3+80] 2320 faddd %f28,%f30,%f28 2321 fmuld %f48,%f4,%f20 2322!20 2323 std %f28,[%i3+112] 2324 faddd %f32,%f34,%f32 2325 fmuld %f50,%f0,%f22 2326!21 2327 ldd [%i1+120],%f24 2328 fdtox %f6,%f12 2329!22 2330 std %f32,[%i3+144] 2331 faddd %f16,%f18,%f16 2332!23 2333 ldd [%i4+120],%f26 2334!24 2335 ldd [%i3+176],%f18 2336 faddd %f20,%f22,%f20 2337 fmuld %f24,%f4,%f24 2338!25 2339 ldd [%i4+16],%f30 2340 fmovs %f11,%f12 2341!26 2342 ldd [%i1+32],%f32 2343 fmuld %f26,%f0,%f26 2344!27 2345 ldd [%i4+32],%f34 2346 fmuld %f52,%f4,%f28 2347!28 2348 ldd [%i3+208],%f22 2349 faddd %f16,%f18,%f16 2350 fmuld %f30,%f0,%f30 2351!29 2352 std %f16,[%i3+176] 2353 fxtod %f12,%f12 2354 fmuld %f32,%f4,%f32 2355!30 2356 ldd [%i4+48],%f18 2357 faddd %f24,%f26,%f24 2358 fmuld %f34,%f0,%f34 2359!31 2360 ldd [%i3+240],%f26 2361 faddd %f20,%f22,%f20 2362!32 2363 std %f20,[%i3+208] 2364 faddd %f28,%f30,%f28 2365 fmuld %f54,%f4,%f16 2366!33 2367 ldd [%i3+32],%f30 2368 fmuld %f12,%f2,%f14 2369!34 2370 ldd [%i4+64],%f22 2371 faddd %f32,%f34,%f32 2372 fmuld %f18,%f0,%f18 2373!35 2374 ldd [%i3+64],%f34 2375 faddd %f24,%f26,%f24 2376!36 2377 std %f24,[%i3+240] 2378 faddd %f28,%f30,%f28 2379 fmuld %f56,%f4,%f20 2380!37 2381 std %f28,[%i3+32] 2382 fmuld %f14,%f8,%f12 2383!38 2384 ldd [%i1+80],%f24 2385 faddd %f32,%f34,%f34 ! yes, tmp52! 2386 fmuld %f22,%f0,%f22 2387!39 2388 ldd [%i4+80],%f26 2389 faddd %f16,%f18,%f16 2390!40 2391 ldd [%i1+96],%f28 2392 fmuld %f58,%f4,%f32 2393!41 2394 ldd [%i4+96],%f30 2395 fdtox %f12,%f12 2396 fmuld %f24,%f4,%f24 2397!42 2398 std %f34,[%i3+64] ! yes, tmp52! 2399 faddd %f20,%f22,%f20 2400 fmuld %f26,%f0,%f26 2401!43 2402 ldd [%i3+96],%f18 2403 fmuld %f28,%f4,%f28 2404!44 2405 ldd [%i3+128],%f22 2406 fmovd %f38,%f4 2407 fmuld %f30,%f0,%f30 2408!45 2409 fxtod %f12,%f12 2410 fmuld %f60,%f0,%f34 2411!46 2412 add %i3,8,%i3 2413 faddd %f24,%f26,%f24 2414!47 2415 ldd [%i3+160-8],%f26 2416 faddd %f16,%f18,%f16 2417!48 2418 std %f16,[%i3+96-8] 2419 faddd %f28,%f30,%f28 2420!49 2421 ldd [%i3+192-8],%f30 2422 faddd %f32,%f34,%f32 2423 fmuld %f12,%f10,%f12 2424!50 2425 ldd [%i3+224-8],%f34 2426 faddd %f20,%f22,%f20 2427!51 2428 std %f20,[%i3+128-8] 2429 faddd %f24,%f26,%f24 2430!52 2431 add %l1,1,%l1 2432 std %f24,[%i3+160-8] 2433 faddd %f28,%f30,%f28 2434!53 2435 cmp %l1,31 2436 std %f28,[%i3+192-8] 2437 fsubd %f14,%f12,%f0 2438!54 2439 faddd %f32,%f34,%f32 2440 ble,pt %icc,.L99999998 2441 std %f32,[%i3+224-8] 2442!55 2443 std %f6,[%i3] 2444 2445 add %o5,%g0,%i3 2446 2447 2448!END HAND CODED PART 2449 .L900000828: 2450/* 0x03e4 405 */ ba .L900000852 2451/* 0x03e8 409 */ ldx [%i3+%o0],%l1 2452 2453! 406 ! } 2454! 407 ! } 2455! 409 ! conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1); 2456! 411 !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/ 2457! 413 ! adjust_montf_result(result, nint, nlen); 2458 2459 .L77000476: 2460/* 0x03ec 413 */ sll %g1,2,%l3 2461/* 0x03f0 0 */ sethi %hi(TwoTo16),%g5 2462/* 0x03f4 413 */ add %l3,2,%l2 2463/* 0x03f8 328 */ cmp %l2,0 2464/* 0x03fc */ ble,pn %icc,.L77000482 2465/* 0x0400 0 */ sethi %hi(TwoToMinus16),%o2 2466 .L77000514: 2467/* 0x0404 329 */ add %l3,2,%l2 2468/* 0x0408 328 */ add %l3,1,%o4 2469/* 0x040c */ or %g0,0,%l3 2470/* 0x0410 329 */ cmp %l2,8 2471/* 0x0414 */ bl,pn %icc,.L77000477 2472/* 0x0418 328 */ or %g0,%i3,%l1 2473 .L900000831: 2474/* 0x041c 329 */ prefetch [%i3],22 2475/* 0x0420 */ sub %o4,7,%l4 2476/* 0x0424 */ or %g0,0,%l3 2477/* 0x0428 */ or %g0,%i3,%l1 2478 .L900000829: 2479/* 0x042c 329 */ prefetch [%l1+528],22 2480/* 0x0430 */ std %f0,[%l1] 2481/* 0x0434 */ add %l3,8,%l3 2482/* 0x0438 */ add %l1,64,%l1 2483/* 0x043c */ std %f0,[%l1-56] 2484/* 0x0440 */ cmp %l3,%l4 2485/* 0x0444 */ std %f0,[%l1-48] 2486/* 0x0448 */ std %f0,[%l1-40] 2487/* 0x044c */ prefetch [%l1+496],22 2488/* 0x0450 */ std %f0,[%l1-32] 2489/* 0x0454 */ std %f0,[%l1-24] 2490/* 0x0458 */ std %f0,[%l1-16] 2491/* 0x045c */ ble,pt %icc,.L900000829 2492/* 0x0460 */ std %f0,[%l1-8] 2493 .L900000832: 2494/* 0x0464 329 */ cmp %l3,%o4 2495/* 0x0468 */ bg,pn %icc,.L77000482 2496/* 0x046c */ nop 2497 .L77000477: 2498/* 0x0470 329 */ add %l3,1,%l3 2499 .L900000851: 2500/* 0x0474 329 */ std %f0,[%l1] 2501/* 0x0478 */ cmp %l3,%o4 2502/* 0x047c */ add %l1,8,%l1 2503/* 0x0480 */ ble,pt %icc,.L900000851 2504/* 0x0484 */ add %l3,1,%l3 2505 .L77000482: 2506/* 0x0488 330 */ ldd [%i1],%f40 2507/* 0x048c 334 */ cmp %o3,0 2508/* 0x0490 */ sub %g1,1,%l3 2509/* 0x0494 330 */ ldd [%l0],%f42 2510/* 0x0498 331 */ ldd [%o2+%lo(TwoToMinus16)],%f36 2511/* 0x049c */ ldd [%g5+%lo(TwoTo16)],%f38 2512/* 0x04a0 330 */ fmuld %f40,%f42,%f52 2513/* 0x04a4 331 */ fdtox %f52,%f8 2514/* 0x04a8 */ fmovs %f0,%f8 2515/* 0x04ac */ fxtod %f8,%f62 2516/* 0x04b0 */ fmuld %f62,%f14,%f60 2517/* 0x04b4 */ fmuld %f60,%f36,%f32 2518/* 0x04b8 */ fdtox %f32,%f50 2519/* 0x04bc */ fxtod %f50,%f34 2520/* 0x04c0 */ fmuld %f34,%f38,%f46 2521/* 0x04c4 */ fsubd %f60,%f46,%f40 2522/* 0x04c8 334 */ ble,pn %icc,.L77000378 2523/* 0x04cc 330 */ std %f52,[%i3] 2524 .L77000509: 2525/* 0x04d0 345 */ add %o3,1,%g5 2526/* 0x04d4 */ sll %g5,1,%o2 2527/* 0x04d8 */ or %g0,0,%l1 2528/* 0x04dc 337 */ ldd [%i4],%f42 2529/* 0x04e0 345 */ sub %o3,1,%o3 2530/* 0x04e4 */ or %g0,0,%o5 2531/* 0x04e8 */ or %g0,%i3,%l2 2532/* 0x04ec */ add %i4,8,%o1 2533/* 0x04f0 */ add %i1,8,%g5 2534 .L900000848: 2535/* 0x04f4 337 */ fmuld %f40,%f42,%f34 2536/* 0x04f8 */ ldd [%l0+8],%f32 2537/* 0x04fc 341 */ cmp %g1,1 2538/* 0x0500 337 */ ldd [%i1],%f50 2539/* 0x0504 */ ldd [%l2],%f46 2540/* 0x0508 */ ldd [%l2+8],%f44 2541/* 0x050c */ fmuld %f50,%f32,%f60 2542/* 0x0510 335 */ ldd [%l0],%f42 2543/* 0x0514 337 */ faddd %f46,%f34,%f48 2544/* 0x0518 */ faddd %f44,%f60,%f58 2545/* 0x051c */ fmuld %f36,%f48,%f54 2546/* 0x0520 */ faddd %f58,%f54,%f34 2547/* 0x0524 341 */ ble,pn %icc,.L77000368 2548/* 0x0528 338 */ std %f34,[%l2+8] 2549 .L77000507: 2550/* 0x052c 341 */ or %g0,1,%l5 2551/* 0x0530 */ or %g0,2,%l4 2552/* 0x0534 */ or %g0,%g5,%g4 2553/* 0x0538 342 */ cmp %l3,12 2554/* 0x053c */ bl,pn %icc,.L77000481 2555/* 0x0540 341 */ or %g0,%o1,%g3 2556 .L900000839: 2557/* 0x0544 342 */ prefetch [%i1+8],0 2558/* 0x0548 */ prefetch [%i1+72],0 2559/* 0x054c */ add %i4,40,%l6 2560/* 0x0550 */ add %i1,40,%l7 2561/* 0x0554 */ prefetch [%l2+16],0 2562/* 0x0558 */ or %g0,%l2,%o7 2563/* 0x055c */ sub %l3,7,%i5 2564/* 0x0560 */ prefetch [%l2+80],0 2565/* 0x0564 */ add %l2,80,%g2 2566/* 0x0568 */ or %g0,2,%l4 2567/* 0x056c */ prefetch [%i1+136],0 2568/* 0x0570 */ or %g0,5,%l5 2569/* 0x0574 */ prefetch [%i1+200],0 2570/* 0x0578 */ prefetch [%l2+144],0 2571/* 0x057c */ ldd [%i4+8],%f52 2572/* 0x0580 */ ldd [%i4+16],%f44 2573/* 0x0584 */ ldd [%i4+24],%f56 2574/* 0x0588 */ fmuld %f40,%f52,%f48 2575/* 0x058c */ fmuld %f40,%f44,%f46 2576/* 0x0590 */ fmuld %f40,%f56,%f44 2577/* 0x0594 */ ldd [%l2+48],%f56 2578/* 0x0598 */ prefetch [%l2+208],0 2579/* 0x059c */ prefetch [%l2+272],0 2580/* 0x05a0 */ prefetch [%l2+336],0 2581/* 0x05a4 */ prefetch [%l2+400],0 2582/* 0x05a8 */ ldd [%i1+8],%f32 2583/* 0x05ac */ ldd [%i1+16],%f60 2584/* 0x05b0 */ ldd [%i1+24],%f50 2585/* 0x05b4 */ fmuld %f42,%f32,%f62 2586/* 0x05b8 */ ldd [%i1+32],%f32 2587/* 0x05bc */ fmuld %f42,%f60,%f58 2588/* 0x05c0 */ ldd [%l2+16],%f52 2589/* 0x05c4 */ ldd [%l2+32],%f54 2590/* 0x05c8 */ faddd %f62,%f48,%f60 2591/* 0x05cc */ fmuld %f42,%f50,%f48 2592/* 0x05d0 */ faddd %f58,%f46,%f62 2593/* 0x05d4 */ ldd [%i4+32],%f46 2594/* 0x05d8 */ ldd [%l2+64],%f58 2595 .L900000837: 2596/* 0x05dc 342 */ prefetch [%l7+192],0 2597/* 0x05e0 */ fmuld %f40,%f46,%f46 2598/* 0x05e4 */ faddd %f60,%f52,%f60 2599/* 0x05e8 */ ldd [%l6],%f52 2600/* 0x05ec */ std %f60,[%g2-64] 2601/* 0x05f0 */ fmuld %f42,%f32,%f50 2602/* 0x05f4 */ add %l5,8,%l5 2603/* 0x05f8 */ ldd [%l7],%f60 2604/* 0x05fc */ faddd %f48,%f44,%f48 2605/* 0x0600 */ cmp %l5,%i5 2606/* 0x0604 */ ldd [%g2],%f32 2607/* 0x0608 */ add %g2,128,%g2 2608/* 0x060c */ prefetch [%g2+256],0 2609/* 0x0610 */ fmuld %f40,%f52,%f52 2610/* 0x0614 */ faddd %f62,%f54,%f44 2611/* 0x0618 */ ldd [%l6+8],%f54 2612/* 0x061c */ std %f44,[%g2-176] 2613/* 0x0620 */ fmuld %f42,%f60,%f44 2614/* 0x0624 */ add %l6,64,%l6 2615/* 0x0628 */ ldd [%l7+8],%f60 2616/* 0x062c */ faddd %f50,%f46,%f50 2617/* 0x0630 */ add %l7,64,%l7 2618/* 0x0634 */ add %l4,16,%l4 2619/* 0x0638 */ ldd [%g2-112],%f46 2620/* 0x063c */ fmuld %f40,%f54,%f54 2621/* 0x0640 */ faddd %f48,%f56,%f62 2622/* 0x0644 */ ldd [%l6-48],%f56 2623/* 0x0648 */ std %f62,[%g2-160] 2624/* 0x064c */ fmuld %f42,%f60,%f48 2625/* 0x0650 */ ldd [%l7-48],%f60 2626/* 0x0654 */ faddd %f44,%f52,%f52 2627/* 0x0658 */ ldd [%g2-96],%f30 2628/* 0x065c */ prefetch [%g2+288],0 2629/* 0x0660 */ fmuld %f40,%f56,%f56 2630/* 0x0664 */ faddd %f50,%f58,%f62 2631/* 0x0668 */ ldd [%l6-40],%f58 2632/* 0x066c */ std %f62,[%g2-144] 2633/* 0x0670 */ fmuld %f42,%f60,%f50 2634/* 0x0674 */ ldd [%l7-40],%f62 2635/* 0x0678 */ faddd %f48,%f54,%f54 2636/* 0x067c */ ldd [%g2-80],%f28 2637/* 0x0680 */ prefetch [%l7+160],0 2638/* 0x0684 */ fmuld %f40,%f58,%f48 2639/* 0x0688 */ faddd %f52,%f32,%f44 2640/* 0x068c */ ldd [%l6-32],%f58 2641/* 0x0690 */ std %f44,[%g2-128] 2642/* 0x0694 */ fmuld %f42,%f62,%f44 2643/* 0x0698 */ ldd [%l7-32],%f60 2644/* 0x069c */ faddd %f50,%f56,%f56 2645/* 0x06a0 */ ldd [%g2-64],%f52 2646/* 0x06a4 */ prefetch [%g2+320],0 2647/* 0x06a8 */ fmuld %f40,%f58,%f50 2648/* 0x06ac */ faddd %f54,%f46,%f32 2649/* 0x06b0 */ ldd [%l6-24],%f62 2650/* 0x06b4 */ std %f32,[%g2-112] 2651/* 0x06b8 */ fmuld %f42,%f60,%f46 2652/* 0x06bc */ ldd [%l7-24],%f60 2653/* 0x06c0 */ faddd %f44,%f48,%f48 2654/* 0x06c4 */ ldd [%g2-48],%f54 2655/* 0x06c8 */ fmuld %f40,%f62,%f26 2656/* 0x06cc */ faddd %f56,%f30,%f32 2657/* 0x06d0 */ ldd [%l6-16],%f58 2658/* 0x06d4 */ std %f32,[%g2-96] 2659/* 0x06d8 */ fmuld %f42,%f60,%f30 2660/* 0x06dc */ ldd [%l7-16],%f32 2661/* 0x06e0 */ faddd %f46,%f50,%f60 2662/* 0x06e4 */ ldd [%g2-32],%f56 2663/* 0x06e8 */ prefetch [%g2+352],0 2664/* 0x06ec */ fmuld %f40,%f58,%f44 2665/* 0x06f0 */ faddd %f48,%f28,%f62 2666/* 0x06f4 */ ldd [%l6-8],%f46 2667/* 0x06f8 */ std %f62,[%g2-80] 2668/* 0x06fc */ fmuld %f42,%f32,%f48 2669/* 0x0700 */ ldd [%l7-8],%f32 2670/* 0x0704 */ faddd %f30,%f26,%f62 2671/* 0x0708 */ ble,pt %icc,.L900000837 2672/* 0x070c */ ldd [%g2-16],%f58 2673 .L900000840: 2674/* 0x0710 342 */ fmuld %f40,%f46,%f46 2675/* 0x0714 */ faddd %f62,%f54,%f62 2676/* 0x0718 */ std %f62,[%g2-48] 2677/* 0x071c */ cmp %l5,%l3 2678/* 0x0720 */ fmuld %f42,%f32,%f50 2679/* 0x0724 */ faddd %f48,%f44,%f48 2680/* 0x0728 */ or %g0,%l7,%g4 2681/* 0x072c */ or %g0,%l6,%g3 2682/* 0x0730 */ faddd %f60,%f52,%f60 2683/* 0x0734 */ std %f60,[%g2-64] 2684/* 0x0738 */ or %g0,%o7,%l2 2685/* 0x073c */ add %l4,8,%l4 2686/* 0x0740 */ faddd %f50,%f46,%f54 2687/* 0x0744 */ faddd %f48,%f56,%f56 2688/* 0x0748 */ std %f56,[%g2-32] 2689/* 0x074c */ faddd %f54,%f58,%f58 2690/* 0x0750 */ bg,pn %icc,.L77000368 2691/* 0x0754 */ std %f58,[%g2-16] 2692 .L77000481: 2693/* 0x0758 342 */ ldd [%g4],%f44 2694 .L900000850: 2695/* 0x075c 342 */ ldd [%g3],%f48 2696/* 0x0760 */ fmuld %f42,%f44,%f58 2697/* 0x0764 */ sra %l4,0,%l7 2698/* 0x0768 */ add %l5,1,%l5 2699/* 0x076c */ sllx %l7,3,%g2 2700/* 0x0770 */ add %g4,8,%g4 2701/* 0x0774 */ ldd [%l2+%g2],%f56 2702/* 0x0778 */ cmp %l5,%l3 2703/* 0x077c */ add %l4,2,%l4 2704/* 0x0780 */ fmuld %f40,%f48,%f54 2705/* 0x0784 */ add %g3,8,%g3 2706/* 0x0788 */ faddd %f58,%f54,%f52 2707/* 0x078c */ faddd %f52,%f56,%f62 2708/* 0x0790 */ std %f62,[%l2+%g2] 2709/* 0x0794 */ ble,a,pt %icc,.L900000850 2710/* 0x0798 */ ldd [%g4],%f44 2711 .L77000368: 2712/* 0x079c 344 */ cmp %o5,15 2713/* 0x07a0 */ bne,pn %icc,.L77000483 2714/* 0x07a4 345 */ srl %l1,31,%g4 2715 .L77000478: 2716/* 0x07a8 345 */ add %l1,%g4,%l4 2717/* 0x07ac */ sra %l4,1,%o7 2718/* 0x07b0 */ add %o7,1,%o4 2719/* 0x07b4 */ sll %o4,1,%l6 2720/* 0x07b8 */ cmp %l6,%o2 2721/* 0x07bc */ bge,pn %icc,.L77000392 2722/* 0x07c0 */ fmovd %f0,%f42 2723 .L77000508: 2724/* 0x07c4 345 */ sra %l6,0,%l4 2725/* 0x07c8 */ sllx %l4,3,%g2 2726/* 0x07cc */ fmovd %f0,%f32 2727/* 0x07d0 */ sub %o2,1,%l5 2728/* 0x07d4 */ ldd [%g2+%i3],%f40 2729/* 0x07d8 */ add %g2,%i3,%g3 2730 .L900000849: 2731/* 0x07dc 345 */ fdtox %f40,%f10 2732/* 0x07e0 */ ldd [%g3+8],%f52 2733/* 0x07e4 */ add %l6,2,%l6 2734/* 0x07e8 */ cmp %l6,%l5 2735/* 0x07ec */ fdtox %f52,%f2 2736/* 0x07f0 */ fmovd %f10,%f30 2737/* 0x07f4 */ fmovs %f0,%f10 2738/* 0x07f8 */ fmovs %f0,%f2 2739/* 0x07fc */ fxtod %f10,%f10 2740/* 0x0800 */ fxtod %f2,%f2 2741/* 0x0804 */ fdtox %f52,%f28 2742/* 0x0808 */ faddd %f10,%f32,%f56 2743/* 0x080c */ std %f56,[%g3] 2744/* 0x0810 */ faddd %f2,%f42,%f62 2745/* 0x0814 */ std %f62,[%g3+8] 2746/* 0x0818 */ fitod %f30,%f32 2747/* 0x081c */ add %g3,16,%g3 2748/* 0x0820 */ fitod %f28,%f42 2749/* 0x0824 */ ble,a,pt %icc,.L900000849 2750/* 0x0828 */ ldd [%g3],%f40 2751 .L77000392: 2752/* 0x082c 346 */ or %g0,0,%o5 2753 .L77000483: 2754/* 0x0830 350 */ fdtox %f34,%f6 2755/* 0x0834 */ add %l1,1,%l1 2756/* 0x0838 */ cmp %l1,%o3 2757/* 0x083c */ add %o5,1,%o5 2758/* 0x0840 */ add %l2,8,%l2 2759/* 0x0844 */ add %l0,8,%l0 2760/* 0x0848 */ fmovs %f0,%f6 2761/* 0x084c */ fxtod %f6,%f46 2762/* 0x0850 */ fmuld %f46,%f14,%f56 2763/* 0x0854 */ fmuld %f56,%f36,%f44 2764/* 0x0858 */ fdtox %f44,%f48 2765/* 0x085c */ fxtod %f48,%f58 2766/* 0x0860 */ fmuld %f58,%f38,%f54 2767/* 0x0864 */ fsubd %f56,%f54,%f40 2768/* 0x0868 */ ble,a,pt %icc,.L900000848 2769/* 0x086c 337 */ ldd [%i4],%f42 2770 .L77000378: 2771/* 0x0870 409 */ ldx [%i3+%o0],%l1 2772 .L900000852: 2773/* 0x0874 409 */ add %i3,%o0,%l4 2774/* 0x0878 */ ldx [%l4+8],%i1 2775/* 0x087c */ cmp %l1,0 2776/* 0x0880 */ bne,pn %xcc,.L77000403 2777/* 0x0884 */ or %g0,0,%g5 2778 .L77000402: 2779/* 0x0888 409 */ or %g0,0,%i3 2780/* 0x088c */ ba .L900000847 2781/* 0x0890 */ cmp %i1,0 2782 .L77000403: 2783/* 0x0894 409 */ srlx %l1,52,%o5 2784/* 0x0898 */ sethi %hi(0xfff00000),%i3 2785/* 0x089c */ sllx %i3,32,%o2 2786/* 0x08a0 */ sethi %hi(0x40000000),%o0 2787/* 0x08a4 */ sllx %o0,22,%o4 2788/* 0x08a8 */ or %g0,1023,%l0 2789/* 0x08ac */ xor %o2,-1,%o3 2790/* 0x08b0 */ sub %l0,%o5,%o7 2791/* 0x08b4 */ and %l1,%o3,%l1 2792/* 0x08b8 */ add %o7,52,%i4 2793/* 0x08bc */ or %l1,%o4,%o1 2794/* 0x08c0 */ cmp %i1,0 2795/* 0x08c4 */ srlx %o1,%i4,%i3 2796 .L900000847: 2797/* 0x08c8 409 */ bne,pn %xcc,.L77000409 2798/* 0x08cc */ or %g0,0,%o7 2799 .L77000408: 2800/* 0x08d0 409 */ ba .L900000846 2801/* 0x08d4 350 */ cmp %g1,0 2802 .L77000409: 2803/* 0x08d8 409 */ srlx %i1,52,%l2 2804/* 0x08dc */ sethi %hi(0xfff00000),%o7 2805/* 0x08e0 */ sllx %o7,32,%i4 2806/* 0x08e4 */ sethi %hi(0x40000000),%i5 2807/* 0x08e8 */ sllx %i5,22,%l6 2808/* 0x08ec */ or %g0,1023,%l5 2809/* 0x08f0 */ xor %i4,-1,%o1 2810/* 0x08f4 */ sub %l5,%l2,%g2 2811/* 0x08f8 */ and %i1,%o1,%l7 2812/* 0x08fc */ add %g2,52,%g3 2813/* 0x0900 */ or %l7,%l6,%g4 2814/* 0x0904 350 */ cmp %g1,0 2815/* 0x0908 409 */ srlx %g4,%g3,%o7 2816 .L900000846: 2817/* 0x090c 350 */ ble,pn %icc,.L77000397 2818/* 0x0910 */ or %g0,0,%l5 2819 .L77000510: 2820/* 0x0914 409 */ sethi %hi(0xfff00000),%g4 2821/* 0x0918 */ sllx %g4,32,%o0 2822/* 0x091c 0 */ or %g0,-1,%i5 2823/* 0x0920 409 */ srl %i5,0,%l7 2824/* 0x0924 */ sethi %hi(0x40000000),%i1 2825/* 0x0928 */ sllx %i1,22,%l6 2826/* 0x092c */ sethi %hi(0xfc00),%i4 2827/* 0x0930 */ xor %o0,-1,%g2 2828/* 0x0934 */ add %i4,1023,%l2 2829/* 0x0938 */ or %g0,2,%g4 2830/* 0x093c */ or %g0,%i2,%g3 2831 .L77000395: 2832/* 0x0940 409 */ sra %g4,0,%o2 2833/* 0x0944 */ add %g4,1,%o3 2834/* 0x0948 */ sllx %o2,3,%o0 2835/* 0x094c */ sra %o3,0,%o5 2836/* 0x0950 */ ldx [%l4+%o0],%o4 2837/* 0x0954 */ sllx %o5,3,%l0 2838/* 0x0958 */ and %i3,%l7,%o1 2839/* 0x095c */ ldx [%l4+%l0],%i4 2840/* 0x0960 */ cmp %o4,0 2841/* 0x0964 */ bne,pn %xcc,.L77000415 2842/* 0x0968 350 */ and %o7,%l2,%i5 2843 .L77000414: 2844/* 0x096c 409 */ or %g0,0,%l1 2845/* 0x0970 */ ba .L900000845 2846/* 0x0974 */ add %g5,%o1,%i1 2847 .L77000415: 2848/* 0x0978 409 */ srlx %o4,52,%o3 2849/* 0x097c */ and %o4,%g2,%l1 2850/* 0x0980 */ or %g0,52,%o0 2851/* 0x0984 */ sub %o3,1023,%l0 2852/* 0x0988 */ or %l1,%l6,%o4 2853/* 0x098c */ sub %o0,%l0,%o5 2854/* 0x0990 */ srlx %o4,%o5,%l1 2855/* 0x0994 */ add %g5,%o1,%i1 2856 .L900000845: 2857/* 0x0998 409 */ srax %i3,32,%g5 2858/* 0x099c */ cmp %i4,0 2859/* 0x09a0 */ bne,pn %xcc,.L77000421 2860/* 0x09a4 350 */ sllx %i5,16,%o2 2861 .L77000420: 2862/* 0x09a8 409 */ or %g0,0,%o4 2863/* 0x09ac */ ba .L900000844 2864/* 0x09b0 350 */ add %i1,%o2,%o5 2865 .L77000421: 2866/* 0x09b4 409 */ srlx %i4,52,%o4 2867/* 0x09b8 */ or %g0,52,%o0 2868/* 0x09bc */ sub %o4,1023,%o3 2869/* 0x09c0 */ and %i4,%g2,%i3 2870/* 0x09c4 */ or %i3,%l6,%o5 2871/* 0x09c8 */ sub %o0,%o3,%l0 2872/* 0x09cc */ srlx %o5,%l0,%o4 2873/* 0x09d0 350 */ add %i1,%o2,%o5 2874 .L900000844: 2875/* 0x09d4 350 */ srax %o7,16,%i4 2876/* 0x09d8 */ srax %o5,32,%i5 2877/* 0x09dc */ add %i4,%i5,%o1 2878/* 0x09e0 */ add %l5,1,%l5 2879/* 0x09e4 */ and %o5,%l7,%i1 2880/* 0x09e8 */ add %g5,%o1,%g5 2881/* 0x09ec */ st %i1,[%g3] 2882/* 0x09f0 */ or %g0,%l1,%i3 2883/* 0x09f4 */ or %g0,%o4,%o7 2884/* 0x09f8 */ add %g4,2,%g4 2885/* 0x09fc */ cmp %l5,%l3 2886/* 0x0a00 */ ble,pt %icc,.L77000395 2887/* 0x0a04 */ add %g3,4,%g3 2888 .L77000397: 2889/* 0x0a08 409 */ sethi %hi(0xfc00),%l4 2890/* 0x0a0c */ sra %l5,0,%i5 2891/* 0x0a10 */ add %l4,1023,%i1 2892/* 0x0a14 */ add %g5,%i3,%l5 2893/* 0x0a18 */ and %o7,%i1,%g5 2894/* 0x0a1c */ sllx %g5,16,%l2 2895/* 0x0a20 */ sllx %i5,2,%l7 2896/* 0x0a24 413 */ sra %g1,0,%g2 2897/* 0x0a28 409 */ add %l5,%l2,%l6 2898/* 0x0a2c */ st %l6,[%i2+%l7] 2899/* 0x0a30 413 */ sllx %g2,2,%g3 2900/* 0x0a34 */ ld [%i2+%g3],%g4 2901/* 0x0a38 */ cmp %g4,0 2902/* 0x0a3c */ bgu,pn %icc,.L77000486 2903/* 0x0a40 */ cmp %l3,0 2904 .L77000427: 2905/* 0x0a44 413 */ bl,pn %icc,.L77000486 2906/* 0x0a48 */ or %g0,%l3,%i5 2907 .L77000512: 2908/* 0x0a4c 413 */ sra %l3,0,%o5 2909/* 0x0a50 */ sllx %o5,2,%l7 2910/* 0x0a54 */ ld [%l7+%i0],%o5 2911/* 0x0a58 */ add %l7,%i2,%o1 2912/* 0x0a5c */ add %l7,%i0,%i4 2913 .L900000843: 2914/* 0x0a60 413 */ ld [%o1],%i1 2915/* 0x0a64 */ cmp %i1,%o5 2916/* 0x0a68 */ bne,pn %icc,.L77000435 2917/* 0x0a6c */ sub %o1,4,%o1 2918 .L77000431: 2919/* 0x0a70 413 */ sub %i4,4,%i4 2920/* 0x0a74 */ subcc %i5,1,%i5 2921/* 0x0a78 */ bpos,a,pt %icc,.L900000843 2922/* 0x0a7c */ ld [%i4],%o5 2923 .L900000827: 2924/* 0x0a80 413 */ ba .L900000842 2925/* 0x0a84 350 */ cmp %g1,0 2926 .L77000435: 2927/* 0x0a88 413 */ sra %i5,0,%o0 2928/* 0x0a8c */ sllx %o0,2,%l1 2929/* 0x0a90 */ ld [%i0+%l1],%i3 2930/* 0x0a94 */ ld [%i2+%l1],%l0 2931/* 0x0a98 */ cmp %l0,%i3 2932/* 0x0a9c */ bleu,pt %icc,.L77000379 2933/* 0x0aa0 */ nop 2934 .L77000486: 2935/* 0x0aa4 350 */ cmp %g1,0 2936 .L900000842: 2937/* 0x0aa8 350 */ ble,pn %icc,.L77000379 2938/* 0x0aac */ add %l3,1,%g3 2939 .L77000511: 2940/* 0x0ab0 350 */ or %g0,0,%l5 2941/* 0x0ab4 */ cmp %g3,10 2942/* 0x0ab8 */ bl,pn %icc,.L77000487 2943/* 0x0abc */ or %g0,0,%g1 2944 .L900000835: 2945/* 0x0ac0 350 */ prefetch [%i2],22 2946/* 0x0ac4 */ add %i0,4,%l2 2947/* 0x0ac8 */ prefetch [%i2+64],22 2948/* 0x0acc */ add %i2,8,%o5 2949/* 0x0ad0 */ sub %l3,7,%i0 2950/* 0x0ad4 */ prefetch [%i2+128],22 2951/* 0x0ad8 */ or %g0,2,%l5 2952/* 0x0adc */ prefetch [%i2+192],22 2953/* 0x0ae0 */ prefetch [%i2+256],22 2954/* 0x0ae4 */ prefetch [%i2+320],22 2955/* 0x0ae8 */ prefetch [%i2+384],22 2956/* 0x0aec */ ld [%l2-4],%l7 2957/* 0x0af0 */ ld [%o5-4],%l6 2958/* 0x0af4 */ prefetch [%o5+440],22 2959/* 0x0af8 */ prefetch [%o5+504],22 2960/* 0x0afc */ ld [%i2],%i2 2961/* 0x0b00 */ sub %i2,%l7,%g3 2962/* 0x0b04 */ st %g3,[%o5-8] 2963/* 0x0b08 */ srax %g3,32,%l7 2964 .L900000833: 2965/* 0x0b0c 350 */ add %l5,8,%l5 2966/* 0x0b10 */ add %o5,32,%o5 2967/* 0x0b14 */ ld [%l2],%i5 2968/* 0x0b18 */ prefetch [%o5+496],22 2969/* 0x0b1c */ cmp %l5,%i0 2970/* 0x0b20 */ add %l2,32,%l2 2971/* 0x0b24 */ sub %l6,%i5,%g5 2972/* 0x0b28 */ add %g5,%l7,%o0 2973/* 0x0b2c */ ld [%o5-32],%l4 2974/* 0x0b30 */ st %o0,[%o5-36] 2975/* 0x0b34 */ srax %o0,32,%i3 2976/* 0x0b38 */ ld [%l2-28],%i1 2977/* 0x0b3c */ sub %l4,%i1,%i4 2978/* 0x0b40 */ add %i4,%i3,%o1 2979/* 0x0b44 */ ld [%o5-28],%o3 2980/* 0x0b48 */ st %o1,[%o5-32] 2981/* 0x0b4c */ srax %o1,32,%l1 2982/* 0x0b50 */ ld [%l2-24],%o2 2983/* 0x0b54 */ sub %o3,%o2,%g2 2984/* 0x0b58 */ add %g2,%l1,%o7 2985/* 0x0b5c */ ld [%o5-24],%l0 2986/* 0x0b60 */ st %o7,[%o5-28] 2987/* 0x0b64 */ srax %o7,32,%l6 2988/* 0x0b68 */ ld [%l2-20],%o4 2989/* 0x0b6c */ sub %l0,%o4,%g1 2990/* 0x0b70 */ add %g1,%l6,%l7 2991/* 0x0b74 */ ld [%o5-20],%i2 2992/* 0x0b78 */ st %l7,[%o5-24] 2993/* 0x0b7c */ srax %l7,32,%g4 2994/* 0x0b80 */ ld [%l2-16],%g3 2995/* 0x0b84 */ sub %i2,%g3,%i5 2996/* 0x0b88 */ add %i5,%g4,%g5 2997/* 0x0b8c */ ld [%o5-16],%i1 2998/* 0x0b90 */ st %g5,[%o5-20] 2999/* 0x0b94 */ srax %g5,32,%l4 3000/* 0x0b98 */ ld [%l2-12],%o0 3001/* 0x0b9c */ sub %i1,%o0,%i3 3002/* 0x0ba0 */ add %i3,%l4,%i4 3003/* 0x0ba4 */ ld [%o5-12],%o2 3004/* 0x0ba8 */ st %i4,[%o5-16] 3005/* 0x0bac */ srax %i4,32,%o3 3006/* 0x0bb0 */ ld [%l2-8],%o1 3007/* 0x0bb4 */ sub %o2,%o1,%l1 3008/* 0x0bb8 */ add %l1,%o3,%g2 3009/* 0x0bbc */ ld [%o5-8],%o4 3010/* 0x0bc0 */ st %g2,[%o5-12] 3011/* 0x0bc4 */ srax %g2,32,%l0 3012/* 0x0bc8 */ ld [%l2-4],%o7 3013/* 0x0bcc */ sub %o4,%o7,%l6 3014/* 0x0bd0 */ add %l6,%l0,%g1 3015/* 0x0bd4 */ ld [%o5-4],%l6 3016/* 0x0bd8 */ st %g1,[%o5-8] 3017/* 0x0bdc */ ble,pt %icc,.L900000833 3018/* 0x0be0 */ srax %g1,32,%l7 3019 .L900000836: 3020/* 0x0be4 350 */ ld [%l2],%l0 3021/* 0x0be8 */ add %l2,4,%i0 3022/* 0x0bec */ or %g0,%o5,%i2 3023/* 0x0bf0 */ cmp %l5,%l3 3024/* 0x0bf4 */ sub %l6,%l0,%l6 3025/* 0x0bf8 */ add %l6,%l7,%g1 3026/* 0x0bfc */ st %g1,[%o5-4] 3027/* 0x0c00 */ bg,pn %icc,.L77000379 3028/* 0x0c04 */ srax %g1,32,%g1 3029 .L77000487: 3030/* 0x0c08 350 */ ld [%i2],%o4 3031 .L900000841: 3032/* 0x0c0c 350 */ ld [%i0],%i3 3033/* 0x0c10 */ add %g1,%o4,%l0 3034/* 0x0c14 */ add %l5,1,%l5 3035/* 0x0c18 */ cmp %l5,%l3 3036/* 0x0c1c */ add %i0,4,%i0 3037/* 0x0c20 */ sub %l0,%i3,%l6 3038/* 0x0c24 */ st %l6,[%i2] 3039/* 0x0c28 */ srax %l6,32,%g1 3040/* 0x0c2c */ add %i2,4,%i2 3041/* 0x0c30 */ ble,a,pt %icc,.L900000841 3042/* 0x0c34 */ ld [%i2],%o4 3043 .L77000379: 3044/* 0x0c38 405 */ ret ! Result = 3045/* 0x0c3c */ restore %g0,%g0,%g0 3046/* 0x0c40 0 */ .type mont_mulf_noconv,2 3047/* 0x0c40 0 */ .size mont_mulf_noconv,(.-mont_mulf_noconv) 3048 3049! Begin Disassembling Debug Info 3050 .xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0 3051 .xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0 3052 3053! End Disassembling Debug Info 3054 3055! Begin Disassembling Ident 3056 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE) 3057 .ident "@(#)mont_mulf.c\t1.2\t01/09/24 SMI" ! (/tmp/acompAAApja4Fx:8) 3058 .ident "@(#)types.h\t1.74\t03/08/07 SMI" ! (/tmp/acompAAApja4Fx:9) 3059 .ident "@(#)isa_defs.h\t1.20\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:10) 3060 .ident "@(#)feature_tests.h\t1.18\t99/07/26 SMI" ! (/tmp/acompAAApja4Fx:11) 3061 .ident "@(#)machtypes.h\t1.13\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:12) 3062 .ident "@(#)inttypes.h\t1.2\t98/01/16 SMI" ! (/tmp/acompAAApja4Fx:13) 3063 .ident "@(#)int_types.h\t1.6\t97/08/20 SMI" ! (/tmp/acompAAApja4Fx:14) 3064 .ident "@(#)int_limits.h\t1.6\t99/08/06 SMI" ! (/tmp/acompAAApja4Fx:15) 3065 .ident "@(#)int_const.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:16) 3066 .ident "@(#)int_fmtio.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:17) 3067 .ident "@(#)types32.h\t1.4\t98/02/13 SMI" ! (/tmp/acompAAApja4Fx:18) 3068 .ident "@(#)select.h\t1.17\t01/08/15 SMI" ! (/tmp/acompAAApja4Fx:19) 3069 .ident "@(#)math.h\t2.11\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:20) 3070 .ident "@(#)math_iso.h\t1.2\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:21) 3071 .ident "@(#)floatingpoint.h\t2.5\t99/06/22 SMI" ! (/tmp/acompAAApja4Fx:22) 3072 .ident "@(#)stdio_tag.h\t1.3\t98/04/20 SMI" ! (/tmp/acompAAApja4Fx:23) 3073 .ident "@(#)ieeefp.h\t2.8 99/10/29" ! (/tmp/acompAAApja4Fx:24) 3074 .ident "acomp: Sun C 5.5 Patch 112760-07 2004/02/03" ! (/tmp/acompAAApja4Fx:57) 3075 .ident "iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (/tmp/acompAAApja4Fx:58) 3076 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE) 3077! End Disassembling Ident 3078 3079#define FZERO \ 3080 fzero %f0 ;\ 3081 fzero %f2 ;\ 3082 faddd %f0, %f2, %f4 ;\ 3083 fmuld %f0, %f2, %f6 ;\ 3084 faddd %f0, %f2, %f8 ;\ 3085 fmuld %f0, %f2, %f10 ;\ 3086 faddd %f0, %f2, %f12 ;\ 3087 fmuld %f0, %f2, %f14 ;\ 3088 faddd %f0, %f2, %f16 ;\ 3089 fmuld %f0, %f2, %f18 ;\ 3090 faddd %f0, %f2, %f20 ;\ 3091 fmuld %f0, %f2, %f22 ;\ 3092 faddd %f0, %f2, %f24 ;\ 3093 fmuld %f0, %f2, %f26 ;\ 3094 faddd %f0, %f2, %f28 ;\ 3095 fmuld %f0, %f2, %f30 ;\ 3096 faddd %f0, %f2, %f32 ;\ 3097 fmuld %f0, %f2, %f34 ;\ 3098 faddd %f0, %f2, %f36 ;\ 3099 fmuld %f0, %f2, %f38 ;\ 3100 faddd %f0, %f2, %f40 ;\ 3101 fmuld %f0, %f2, %f42 ;\ 3102 faddd %f0, %f2, %f44 ;\ 3103 fmuld %f0, %f2, %f46 ;\ 3104 faddd %f0, %f2, %f48 ;\ 3105 fmuld %f0, %f2, %f50 ;\ 3106 faddd %f0, %f2, %f52 ;\ 3107 fmuld %f0, %f2, %f54 ;\ 3108 faddd %f0, %f2, %f56 ;\ 3109 fmuld %f0, %f2, %f58 ;\ 3110 faddd %f0, %f2, %f60 ;\ 3111 fmuld %f0, %f2, %f62 3112 3113#include "assym.h" 3114 3115/* 3116 * In the routine below, we check/set FPRS_FEF bit since 3117 * we don't want to take a fp_disabled trap. We need not 3118 * check/set PSTATE_PEF bit as it is done early during boot. 3119 */ 3120 ENTRY(big_savefp) 3121 rd %fprs, %o2 3122 st %o2, [%o0 + FPU_FPRS] 3123 andcc %o2, FPRS_FEF, %g0 ! is FPRS_FEF set? 3124 bnz,a,pt %icc, .fregs_save ! yes, go to save 3125 nop 3126 wr %g0, FPRS_FEF, %fprs ! else, set the bit 3127 stx %fsr, [%o0 + FPU_FSR] ! store %fsr 3128 retl 3129 nop 3130.fregs_save: 3131 BSTORE_FPREGS(%o0, %o4) 3132 stx %fsr, [%o0 + FPU_FSR] ! store %fsr 3133 retl 3134 nop 3135 SET_SIZE(big_savefp) 3136 3137 3138 ENTRY(big_restorefp) 3139 ldx [%o0 + FPU_FSR], %fsr ! restore %fsr 3140 ld [%o0 + FPU_FPRS], %o1 3141 andcc %o1, FPRS_FEF, %g0 ! is FPRS_FEF set in saved %fprs? 3142 bnz,pt %icc, .fregs_restore ! yes, go to restore 3143 nop 3144 FZERO ! zero out to avoid leaks 3145 wr %g0, 0, %fprs 3146 retl 3147 nop 3148.fregs_restore: 3149 BLOAD_FPREGS(%o0, %o2) 3150 wr %o1, 0, %fprs 3151 retl 3152 nop 3153 SET_SIZE(big_restorefp) 3154 3155#endif /* lint || __lint */ 3156