1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* 27 * This file is mostly a result of compiling the mont_mulf.c file to generate an 28 * assembly output and then hand-editing that output to replace the 29 * compiler-generated loop for the 512-bit case (nlen == 16) in the 30 * mont_mulf_noconv routine with a hand-crafted version. This file also 31 * has big_savefp() and big_restorefp() routines added by hand. 32 */ 33 34#include <sys/asm_linkage.h> 35#include <sys/trap.h> 36#include <sys/stack.h> 37#include <sys/privregs.h> 38#include <sys/regset.h> 39#include <sys/vis.h> 40#include <sys/machthread.h> 41#include <sys/machtrap.h> 42#include <sys/machsig.h> 43 44 .section ".text",#alloc,#execinstr 45 .file "mont_mulf.s" 46 47 .section ".bss",#alloc,#write 48Bbss.bss: 49 50 .section ".data",#alloc,#write 51Ddata.data: 52 53 .section ".rodata",#alloc 54! 55! CONSTANT POOL 56! 57Drodata.rodata: 58 .global TwoTo16 59 .align 8 60! 61! CONSTANT POOL 62! 63 .global TwoTo16 64TwoTo16: 65 .word 1089470464 66 .word 0 67 .type TwoTo16,#object 68 .size TwoTo16,8 69 .global TwoToMinus16 70! 71! CONSTANT POOL 72! 73 .global TwoToMinus16 74TwoToMinus16: 75 .word 1055916032 76 .word 0 77 .type TwoToMinus16,#object 78 .size TwoToMinus16,8 79 .global Zero 80! 81! CONSTANT POOL 82! 83 .global Zero 84Zero: 85 .word 0 86 .word 0 87 .type Zero,#object 88 .size Zero,8 89 .global TwoTo32 90! 91! CONSTANT POOL 92! 93 .global TwoTo32 94TwoTo32: 95 .word 1106247680 96 .word 0 97 .type TwoTo32,#object 98 .size TwoTo32,8 99 .global TwoToMinus32 100! 101! CONSTANT POOL 102! 103 .global TwoToMinus32 104TwoToMinus32: 105 .word 1039138816 106 .word 0 107 .type TwoToMinus32,#object 108 .size TwoToMinus32,8 109 110 .section ".text",#alloc,#execinstr 111/* 000000 0 */ .register %g3,#scratch 112/* 000000 */ .register %g2,#scratch 113/* 000000 0 */ .align 32 114! FILE mont_mulf.c 115 116! 1 !/* 117! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 118! 3 ! * Use is subject to license terms. 119! 4 ! */ 120! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI" 121! 9 !/* 122! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time 123! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time 124! 12 ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c ) 125! 13 ! */ 126! 15 !#include <sys/types.h> 127! 16 !#include <math.h> 128! 18 !static const double TwoTo16 = 65536.0; 129! 19 !static const double TwoToMinus16 = 1.0/65536.0; 130! 20 !static const double Zero = 0.0; 131! 21 !static const double TwoTo32 = 65536.0 * 65536.0; 132! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0); 133! 24 !#ifdef RF_INLINE_MACROS 134! 26 !double upper32(double); 135! 27 !double lower32(double, double); 136! 28 !double mod(double, double, double); 137! 30 !#else 138! 32 !static double 139! 33 !upper32(double x) 140! 34 !{ 141! 35 ! return (floor(x * TwoToMinus32)); 142! 36 !} 143! 39 !/* ARGSUSED */ 144! 40 !static double 145! 41 !lower32(double x, double y) 146! 42 !{ 147! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32)); 148! 44 !} 149! 46 !static double 150! 47 !mod(double x, double oneoverm, double m) 151! 48 !{ 152! 49 ! return (x - m * floor(x * oneoverm)); 153! 50 !} 154! 52 !#endif 155! 55 !static void 156! 56 !cleanup(double *dt, int from, int tlen) 157! 57 !{ 158 159! 160! SUBROUTINE cleanup 161! 162! OFFSET SOURCE LINE LABEL INSTRUCTION 163 164 cleanup: 165/* 000000 57 */ sra %o1,0,%o4 166/* 0x0004 */ sra %o2,0,%o5 167 168! 58 ! int i; 169! 59 ! double tmp, tmp1, x, x1; 170! 61 ! tmp = tmp1 = Zero; 171 172/* 0x0008 61 */ sll %o5,1,%g5 173 174! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) { 175 176/* 0x000c 63 */ sll %o4,1,%g3 177/* 0x0010 */ cmp %g3,%g5 178/* 0x0014 */ bge,pn %icc,.L77000188 179/* 0x0018 0 */ sethi %hi(Zero),%o3 180 .L77000197: 181/* 0x001c 63 */ ldd [%o3+%lo(Zero)],%f8 182/* 0x0020 */ sra %g3,0,%o1 183/* 0x0024 */ sub %g5,1,%g2 184/* 0x0028 */ sllx %o1,3,%g4 185 186! 64 ! x = dt[i]; 187 188/* 0x002c 64 */ ldd [%g4+%o0],%f10 189/* 0x0030 63 */ add %g4,%o0,%g1 190/* 0x0034 */ fmovd %f8,%f18 191/* 0x0038 */ fmovd %f8,%f16 192 193! 65 ! x1 = dt[i + 1]; 194! 66 ! dt[i] = lower32(x, Zero) + tmp; 195 196 .L900000110: 197/* 0x003c 66 */ fdtox %f10,%f0 198/* 0x0040 65 */ ldd [%g1+8],%f12 199 200! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1; 201! 68 ! tmp = upper32(x); 202! 69 ! tmp1 = upper32(x1); 203 204/* 0x0044 69 */ add %g3,2,%g3 205/* 0x0048 */ cmp %g3,%g2 206/* 0x004c 67 */ fdtox %f12,%f2 207/* 0x0050 68 */ fmovd %f0,%f4 208/* 0x0054 66 */ fmovs %f8,%f0 209/* 0x0058 67 */ fmovs %f8,%f2 210/* 0x005c 66 */ fxtod %f0,%f0 211/* 0x0060 67 */ fxtod %f2,%f2 212/* 0x0064 69 */ fdtox %f12,%f6 213/* 0x0068 66 */ faddd %f0,%f18,%f10 214/* 0x006c */ std %f10,[%g1] 215/* 0x0070 67 */ faddd %f2,%f16,%f14 216/* 0x0074 */ std %f14,[%g1+8] 217/* 0x0078 68 */ fitod %f4,%f18 218/* 0x007c 69 */ add %g1,16,%g1 219/* 0x0080 */ fitod %f6,%f16 220/* 0x0084 */ ble,a,pt %icc,.L900000110 221/* 0x0088 64 */ ldd [%g1],%f10 222 .L77000188: 223/* 0x008c 69 */ retl ! Result = 224/* 0x0090 */ nop 225/* 0x0094 0 */ .type cleanup,2 226/* 0x0094 0 */ .size cleanup,(.-cleanup) 227 228 .section ".text",#alloc,#execinstr 229/* 000000 0 */ .align 8 230/* 000000 */ .skip 24 231/* 0x0018 */ .align 32 232 233! 70 ! } 234! 71 !} 235! 75 !#ifdef _KERNEL 236! 76 !/* 237! 77 ! * This only works if 0 <= d < 2^53 238! 78 ! */ 239! 79 !uint64_t 240! 80 !double2uint64_t(double* d) 241! 81 !{ 242! 82 ! uint64_t x; 243! 83 ! uint64_t exp; 244! 84 ! uint64_t man; 245! 86 ! x = *((uint64_t *)d); 246 247! 248! SUBROUTINE double2uint64_t 249! 250! OFFSET SOURCE LINE LABEL INSTRUCTION 251 252 .global double2uint64_t 253 double2uint64_t: 254/* 000000 86 */ ldx [%o0],%o2 255 256! 87 ! if (x == 0) { 257 258/* 0x0004 87 */ cmp %o2,0 259/* 0x0008 */ bne,pn %xcc,.L900000206 260/* 0x000c 94 */ sethi %hi(0xfff00000),%o5 261 .L77000202: 262/* 0x0010 94 */ retl ! Result = %o0 263 264! 88 ! return (0ULL); 265 266/* 0x0014 88 */ or %g0,0,%o0 267 268! 89 ! } 269! 90 ! exp = (x >> 52) - 1023; 270! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL; 271! 92 ! x = man >> (52 - exp); 272! 94 ! return (x); 273 274 .L900000206: 275/* 0x0018 94 */ sllx %o5,32,%o4 276/* 0x001c */ srlx %o2,52,%o0 277/* 0x0020 */ sethi %hi(0x40000000),%o1 278/* 0x0024 */ or %g0,1023,%g5 279/* 0x0028 */ sllx %o1,22,%g4 280/* 0x002c */ xor %o4,-1,%o3 281/* 0x0030 */ sub %g5,%o0,%g3 282/* 0x0034 */ and %o2,%o3,%g2 283/* 0x0038 */ or %g2,%g4,%o5 284/* 0x003c */ add %g3,52,%g1 285/* 0x0040 */ retl ! Result = %o0 286/* 0x0044 */ srlx %o5,%g1,%o0 287/* 0x0048 0 */ .type double2uint64_t,2 288/* 0x0048 0 */ .size double2uint64_t,(.-double2uint64_t) 289 290 .section ".text",#alloc,#execinstr 291/* 000000 0 */ .align 8 292/* 000000 */ .skip 24 293/* 0x0018 */ .align 32 294 295! 95 !} 296! 96 !#else 297! 97 !/* 298! 98 ! * This only works if 0 <= d < 2^63 299! 99 ! */ 300! 100 !uint64_t 301! 101 !double2uint64_t(double* d) 302! 102 !{ 303! 103 ! return ((int64_t)(*d)); 304! 104 !} 305! 105 !#endif 306! 107 !/* ARGSUSED */ 307! 108 !void 308! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen) 309! 110 !{ 310 311! 312! SUBROUTINE conv_d16_to_i32 313! 314! OFFSET SOURCE LINE LABEL INSTRUCTION 315 316 .global conv_d16_to_i32 317 conv_d16_to_i32: 318/* 000000 110 */ save %sp,-176,%sp 319 320! 111 ! int i; 321! 112 ! int64_t t, t1, /* using int64_t and not uint64_t */ 322! 113 ! a, b, c, d; /* because more efficient code is */ 323! 114 ! /* generated this way, and there */ 324! 115 ! /* is no overflow */ 325! 116 ! t1 = 0; 326! 117 ! a = double2uint64_t(&(d16[0])); 327 328/* 0x0004 117 */ ldx [%i1],%o0 329/* 0x0008 118 */ ldx [%i1+8],%i2 330/* 0x000c 117 */ cmp %o0,0 331/* 0x0010 */ bne,pn %xcc,.L77000216 332/* 0x0014 */ or %g0,0,%i4 333 .L77000215: 334/* 0x0018 117 */ ba .L900000316 335/* 0x001c 118 */ cmp %i2,0 336 .L77000216: 337/* 0x0020 117 */ srlx %o0,52,%o5 338/* 0x0024 */ sethi %hi(0xfff00000),%i4 339/* 0x0028 */ sllx %i4,32,%o2 340/* 0x002c */ sethi %hi(0x40000000),%o7 341/* 0x0030 */ sllx %o7,22,%o3 342/* 0x0034 */ or %g0,1023,%o4 343/* 0x0038 */ xor %o2,-1,%g5 344/* 0x003c */ sub %o4,%o5,%l0 345/* 0x0040 */ and %o0,%g5,%o1 346/* 0x0044 */ add %l0,52,%l1 347/* 0x0048 */ or %o1,%o3,%g4 348 349! 118 ! b = double2uint64_t(&(d16[1])); 350 351/* 0x004c 118 */ cmp %i2,0 352/* 0x0050 117 */ srlx %g4,%l1,%i4 353 .L900000316: 354/* 0x0054 118 */ bne,pn %xcc,.L77000222 355/* 0x0058 134 */ sub %i3,1,%l3 356 .L77000221: 357/* 0x005c 118 */ or %g0,0,%i2 358/* 0x0060 */ ba .L900000315 359/* 0x0064 116 */ or %g0,0,%o3 360 .L77000222: 361/* 0x0068 118 */ srlx %i2,52,%l6 362/* 0x006c */ sethi %hi(0xfff00000),%g4 363/* 0x0070 */ sllx %g4,32,%i5 364/* 0x0074 */ sethi %hi(0x40000000),%l5 365/* 0x0078 */ xor %i5,-1,%l4 366/* 0x007c */ or %g0,1023,%l2 367/* 0x0080 */ and %i2,%l4,%l7 368/* 0x0084 */ sllx %l5,22,%i2 369/* 0x0088 */ sub %l2,%l6,%g1 370/* 0x008c */ or %l7,%i2,%g3 371/* 0x0090 */ add %g1,52,%g2 372/* 0x0094 116 */ or %g0,0,%o3 373/* 0x0098 118 */ srlx %g3,%g2,%i2 374 375! 119 ! for (i = 0; i < ilen - 1; i++) { 376 377 .L900000315: 378/* 0x009c 119 */ cmp %l3,0 379/* 0x00a0 */ ble,pn %icc,.L77000210 380/* 0x00a4 */ or %g0,0,%l4 381 .L77000245: 382/* 0x00a8 118 */ sethi %hi(0xfff00000),%l7 383/* 0x00ac */ or %g0,-1,%l6 384/* 0x00b0 */ sllx %l7,32,%l3 385/* 0x00b4 */ srl %l6,0,%l6 386/* 0x00b8 */ sethi %hi(0x40000000),%l1 387/* 0x00bc */ sethi %hi(0xfc00),%l2 388/* 0x00c0 */ xor %l3,-1,%l7 389/* 0x00c4 */ sllx %l1,22,%l3 390/* 0x00c8 */ sub %i3,2,%l5 391/* 0x00cc */ add %l2,1023,%l2 392/* 0x00d0 */ or %g0,2,%g2 393/* 0x00d4 */ or %g0,%i0,%g1 394 395! 120 ! c = double2uint64_t(&(d16[2 * i + 2])); 396 397 .L77000208: 398/* 0x00d8 120 */ sra %g2,0,%g3 399/* 0x00dc 123 */ add %g2,1,%o2 400/* 0x00e0 120 */ sllx %g3,3,%i3 401 402! 121 ! t1 += a & 0xffffffff; 403! 122 ! t = (a >> 32); 404! 123 ! d = double2uint64_t(&(d16[2 * i + 3])); 405 406/* 0x00e4 123 */ sra %o2,0,%g5 407/* 0x00e8 120 */ ldx [%i1+%i3],%o5 408/* 0x00ec 123 */ sllx %g5,3,%o0 409/* 0x00f0 121 */ and %i4,%l6,%g4 410/* 0x00f4 123 */ ldx [%i1+%o0],%i3 411/* 0x00f8 120 */ cmp %o5,0 412/* 0x00fc */ bne,pn %xcc,.L77000228 413/* 0x0100 124 */ and %i2,%l2,%i5 414 .L77000227: 415/* 0x0104 120 */ or %g0,0,%l1 416/* 0x0108 */ ba .L900000314 417/* 0x010c 121 */ add %o3,%g4,%o0 418 .L77000228: 419/* 0x0110 120 */ srlx %o5,52,%o7 420/* 0x0114 */ and %o5,%l7,%o5 421/* 0x0118 */ or %g0,52,%l0 422/* 0x011c */ sub %o7,1023,%o4 423/* 0x0120 */ or %o5,%l3,%l1 424/* 0x0124 */ sub %l0,%o4,%o1 425/* 0x0128 */ srlx %l1,%o1,%l1 426/* 0x012c 121 */ add %o3,%g4,%o0 427 .L900000314: 428/* 0x0130 122 */ srax %i4,32,%g3 429/* 0x0134 123 */ cmp %i3,0 430/* 0x0138 */ bne,pn %xcc,.L77000234 431/* 0x013c 124 */ sllx %i5,16,%g5 432 .L77000233: 433/* 0x0140 123 */ or %g0,0,%o2 434/* 0x0144 */ ba .L900000313 435/* 0x0148 124 */ add %o0,%g5,%o7 436 .L77000234: 437/* 0x014c 123 */ srlx %i3,52,%o2 438/* 0x0150 */ and %i3,%l7,%i4 439/* 0x0154 */ sub %o2,1023,%o1 440/* 0x0158 */ or %g0,52,%g4 441/* 0x015c */ sub %g4,%o1,%i5 442/* 0x0160 */ or %i4,%l3,%i3 443/* 0x0164 */ srlx %i3,%i5,%o2 444 445! 124 ! t1 += (b & 0xffff) << 16; 446 447/* 0x0168 124 */ add %o0,%g5,%o7 448 449! 125 ! t += (b >> 16) + (t1 >> 32); 450 451 .L900000313: 452/* 0x016c 125 */ srax %i2,16,%l0 453/* 0x0170 */ srax %o7,32,%o4 454/* 0x0174 */ add %l0,%o4,%o3 455 456! 126 ! i32[i] = t1 & 0xffffffff; 457! 127 ! t1 = t; 458! 128 ! a = c; 459! 129 ! b = d; 460 461/* 0x0178 129 */ add %l4,1,%l4 462/* 0x017c 126 */ and %o7,%l6,%o5 463/* 0x0180 125 */ add %g3,%o3,%o3 464/* 0x0184 126 */ st %o5,[%g1] 465/* 0x0188 128 */ or %g0,%l1,%i4 466/* 0x018c 129 */ or %g0,%o2,%i2 467/* 0x0190 */ add %g2,2,%g2 468/* 0x0194 */ cmp %l4,%l5 469/* 0x0198 */ ble,pt %icc,.L77000208 470/* 0x019c */ add %g1,4,%g1 471 472! 130 ! } 473! 131 ! t1 += a & 0xffffffff; 474! 132 ! t = (a >> 32); 475! 133 ! t1 += (b & 0xffff) << 16; 476! 134 ! i32[i] = t1 & 0xffffffff; 477 478 .L77000210: 479/* 0x01a0 134 */ sra %l4,0,%l4 480/* 0x01a4 */ sethi %hi(0xfc00),%i1 481/* 0x01a8 */ add %o3,%i4,%l2 482/* 0x01ac */ add %i1,1023,%i5 483/* 0x01b0 */ and %i2,%i5,%l5 484/* 0x01b4 */ sllx %l4,2,%i2 485/* 0x01b8 */ sllx %l5,16,%l6 486/* 0x01bc */ add %l2,%l6,%l7 487/* 0x01c0 */ st %l7,[%i0+%i2] 488/* 0x01c4 129 */ ret ! Result = 489/* 0x01c8 */ restore %g0,%g0,%g0 490/* 0x01cc 0 */ .type conv_d16_to_i32,2 491/* 0x01cc 0 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) 492 493 .section ".text",#alloc,#execinstr 494/* 000000 0 */ .align 8 495! 496! CONSTANT POOL 497! 498 ___const_seg_900000401: 499/* 000000 0 */ .word 1127219200,0 500/* 0x0008 */ .word 1127219200 501/* 0x000c 0 */ .type ___const_seg_900000401,1 502/* 0x000c 0 */ .size ___const_seg_900000401,(.-___const_seg_900000401) 503/* 0x000c 0 */ .align 8 504/* 0x0010 */ .skip 24 505/* 0x0028 */ .align 32 506 507! 135 !} 508! 138 !void 509! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len) 510! 140 !{ 511 512! 513! SUBROUTINE conv_i32_to_d32 514! 515! OFFSET SOURCE LINE LABEL INSTRUCTION 516 517 .global conv_i32_to_d32 518 conv_i32_to_d32: 519/* 000000 140 */ orcc %g0,%o2,%o2 520 521! 141 ! int i; 522! 143 !#pragma pipeloop(0) 523! 144 ! for (i = 0; i < len; i++) 524 525/* 0x0004 144 */ ble,pn %icc,.L77000254 526/* 0x0008 */ sub %o2,1,%o3 527 .L77000263: 528/* 0x000c 140 */ or %g0,%o0,%o2 529 530! 145 ! d32[i] = (double)(i32[i]); 531 532/* 0x0010 145 */ add %o3,1,%o5 533/* 0x0014 144 */ or %g0,0,%g5 534/* 0x0018 145 */ cmp %o5,10 535/* 0x001c */ bl,pn %icc,.L77000261 536/* 0x0020 */ sethi %hi(___const_seg_900000401),%g4 537 .L900000407: 538/* 0x0024 145 */ prefetch [%o1],0 539/* 0x0028 */ prefetch [%o0],22 540/* 0x002c */ sethi %hi(___const_seg_900000401+8),%o4 541/* 0x0030 */ or %g0,%o0,%o2 542/* 0x0034 */ prefetch [%o1+64],0 543/* 0x0038 */ add %o1,8,%o0 544/* 0x003c */ sub %o3,7,%o5 545/* 0x0040 */ prefetch [%o2+64],22 546/* 0x0044 */ or %g0,2,%g5 547/* 0x0048 */ prefetch [%o2+128],22 548/* 0x004c */ prefetch [%o2+192],22 549/* 0x0050 */ prefetch [%o1+128],0 550/* 0x0054 */ ld [%o4+%lo(___const_seg_900000401+8)],%f2 551/* 0x0058 */ ldd [%g4+%lo(___const_seg_900000401)],%f16 552/* 0x005c */ fmovs %f2,%f0 553/* 0x0060 */ prefetch [%o2+256],22 554/* 0x0064 */ prefetch [%o2+320],22 555/* 0x0068 */ ld [%o1],%f3 556/* 0x006c */ prefetch [%o1+192],0 557/* 0x0070 */ ld [%o1+4],%f1 558 .L900000405: 559/* 0x0074 145 */ prefetch [%o0+188],0 560/* 0x0078 */ fsubd %f2,%f16,%f22 561/* 0x007c */ add %g5,8,%g5 562/* 0x0080 */ add %o0,32,%o0 563/* 0x0084 */ ld [%o4+%lo(___const_seg_900000401+8)],%f4 564/* 0x0088 */ std %f22,[%o2] 565/* 0x008c */ cmp %g5,%o5 566/* 0x0090 */ ld [%o0-32],%f5 567/* 0x0094 */ fsubd %f0,%f16,%f24 568/* 0x0098 */ add %o2,64,%o2 569/* 0x009c */ fmovs %f4,%f0 570/* 0x00a0 */ std %f24,[%o2-56] 571/* 0x00a4 */ ld [%o0-28],%f1 572/* 0x00a8 */ fsubd %f4,%f16,%f26 573/* 0x00ac */ fmovs %f0,%f6 574/* 0x00b0 */ prefetch [%o2+312],22 575/* 0x00b4 */ std %f26,[%o2-48] 576/* 0x00b8 */ ld [%o0-24],%f7 577/* 0x00bc */ fsubd %f0,%f16,%f28 578/* 0x00c0 */ fmovs %f6,%f8 579/* 0x00c4 */ std %f28,[%o2-40] 580/* 0x00c8 */ ld [%o0-20],%f9 581/* 0x00cc */ fsubd %f6,%f16,%f30 582/* 0x00d0 */ fmovs %f8,%f10 583/* 0x00d4 */ std %f30,[%o2-32] 584/* 0x00d8 */ ld [%o0-16],%f11 585/* 0x00dc */ prefetch [%o2+344],22 586/* 0x00e0 */ fsubd %f8,%f16,%f48 587/* 0x00e4 */ fmovs %f10,%f12 588/* 0x00e8 */ std %f48,[%o2-24] 589/* 0x00ec */ ld [%o0-12],%f13 590/* 0x00f0 */ fsubd %f10,%f16,%f50 591/* 0x00f4 */ fmovs %f12,%f2 592/* 0x00f8 */ std %f50,[%o2-16] 593/* 0x00fc */ ld [%o0-8],%f3 594/* 0x0100 */ fsubd %f12,%f16,%f52 595/* 0x0104 */ fmovs %f2,%f0 596/* 0x0108 */ std %f52,[%o2-8] 597/* 0x010c */ ble,pt %icc,.L900000405 598/* 0x0110 */ ld [%o0-4],%f1 599 .L900000408: 600/* 0x0114 145 */ fsubd %f2,%f16,%f18 601/* 0x0118 */ add %o2,16,%o2 602/* 0x011c */ cmp %g5,%o3 603/* 0x0120 */ std %f18,[%o2-16] 604/* 0x0124 */ fsubd %f0,%f16,%f20 605/* 0x0128 */ or %g0,%o0,%o1 606/* 0x012c */ bg,pn %icc,.L77000254 607/* 0x0130 */ std %f20,[%o2-8] 608 .L77000261: 609/* 0x0134 145 */ ld [%o1],%f15 610 .L900000409: 611/* 0x0138 145 */ sethi %hi(___const_seg_900000401+8),%o4 612/* 0x013c */ ldd [%g4+%lo(___const_seg_900000401)],%f16 613/* 0x0140 */ add %g5,1,%g5 614/* 0x0144 */ ld [%o4+%lo(___const_seg_900000401+8)],%f14 615/* 0x0148 */ add %o1,4,%o1 616/* 0x014c */ cmp %g5,%o3 617/* 0x0150 */ fsubd %f14,%f16,%f54 618/* 0x0154 */ std %f54,[%o2] 619/* 0x0158 */ add %o2,8,%o2 620/* 0x015c */ ble,a,pt %icc,.L900000409 621/* 0x0160 */ ld [%o1],%f15 622 .L77000254: 623/* 0x0164 145 */ retl ! Result = 624/* 0x0168 */ nop 625/* 0x016c 0 */ .type conv_i32_to_d32,2 626/* 0x016c 0 */ .size conv_i32_to_d32,(.-conv_i32_to_d32) 627 628 .section ".text",#alloc,#execinstr 629/* 000000 0 */ .align 8 630! 631! CONSTANT POOL 632! 633 ___const_seg_900000501: 634/* 000000 0 */ .word 1127219200,0 635/* 0x0008 */ .word 1127219200 636/* 0x000c 0 */ .type ___const_seg_900000501,1 637/* 0x000c 0 */ .size ___const_seg_900000501,(.-___const_seg_900000501) 638/* 0x000c 0 */ .align 8 639/* 0x0010 */ .skip 24 640/* 0x0028 */ .align 32 641 642! 146 !} 643! 149 !void 644! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len) 645! 151 !{ 646 647! 648! SUBROUTINE conv_i32_to_d16 649! 650! OFFSET SOURCE LINE LABEL INSTRUCTION 651 652 .global conv_i32_to_d16 653 conv_i32_to_d16: 654/* 000000 151 */ save %sp,-368,%sp 655/* 0x0004 */ orcc %g0,%i2,%i2 656 657! 152 ! int i; 658! 153 ! uint32_t a; 659! 155 !#pragma pipeloop(0) 660! 156 ! for (i = 0; i < len; i++) { 661 662/* 0x0008 156 */ ble,pn %icc,.L77000272 663/* 0x000c */ sub %i2,1,%l6 664 .L77000281: 665/* 0x0010 156 */ sethi %hi(0xfc00),%i3 666 667! 157 ! a = i32[i]; 668 669/* 0x0014 157 */ or %g0,%i2,%l1 670/* 0x0018 156 */ add %i3,1023,%i4 671/* 0x001c 157 */ cmp %i2,4 672/* 0x0020 151 */ or %g0,%i1,%l7 673/* 0x0024 */ or %g0,%i0,%i2 674/* 0x0028 156 */ or %g0,0,%i5 675/* 0x002c */ or %g0,0,%i3 676/* 0x0030 157 */ bl,pn %icc,.L77000279 677/* 0x0034 0 */ sethi %hi(___const_seg_900000501),%i1 678 .L900000508: 679/* 0x0038 157 */ prefetch [%i0+8],22 680/* 0x003c */ prefetch [%i0+72],22 681/* 0x0040 */ or %g0,%i0,%l2 682 683! 158 ! d16[2 * i] = (double)(a & 0xffff); 684 685/* 0x0044 158 */ sethi %hi(___const_seg_900000501+8),%l1 686/* 0x0048 157 */ prefetch [%i0+136],22 687/* 0x004c */ sub %l6,1,%i0 688/* 0x0050 */ or %g0,0,%i3 689/* 0x0054 */ prefetch [%i2+200],22 690/* 0x0058 */ or %g0,2,%i5 691/* 0x005c */ prefetch [%i2+264],22 692/* 0x0060 */ prefetch [%i2+328],22 693/* 0x0064 */ prefetch [%i2+392],22 694/* 0x0068 */ ld [%l7],%l3 695/* 0x006c */ ld [%l7+4],%l4 696/* 0x0070 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 697 698! 159 ! d16[2 * i + 1] = (double)(a >> 16); 699 700/* 0x0074 159 */ srl %l3,16,%o1 701/* 0x0078 158 */ and %l3,%i4,%o3 702/* 0x007c */ st %o3,[%sp+2335] 703/* 0x0080 159 */ srl %l4,16,%g4 704/* 0x0084 158 */ and %l4,%i4,%o0 705/* 0x0088 */ st %o0,[%sp+2303] 706/* 0x008c 159 */ add %l7,8,%l7 707/* 0x0090 */ st %o1,[%sp+2271] 708/* 0x0094 */ st %g4,[%sp+2239] 709/* 0x0098 157 */ prefetch [%i2+456],22 710/* 0x009c */ prefetch [%i2+520],22 711 .L900000506: 712/* 0x00a0 157 */ prefetch [%l2+536],22 713/* 0x00a4 159 */ add %i5,2,%i5 714/* 0x00a8 157 */ add %l2,32,%l2 715/* 0x00ac */ ld [%l7],%g2 716/* 0x00b0 159 */ cmp %i5,%i0 717/* 0x00b4 */ add %l7,8,%l7 718/* 0x00b8 158 */ ld [%sp+2335],%f9 719/* 0x00bc 159 */ add %i3,4,%i3 720/* 0x00c0 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f8 721/* 0x00c4 159 */ ld [%sp+2271],%f11 722/* 0x00c8 158 */ and %g2,%i4,%g3 723/* 0x00cc 159 */ fmovs %f8,%f10 724/* 0x00d0 158 */ st %g3,[%sp+2335] 725/* 0x00d4 */ fsubd %f8,%f20,%f28 726/* 0x00d8 */ std %f28,[%l2-32] 727/* 0x00dc 159 */ srl %g2,16,%g1 728/* 0x00e0 */ st %g1,[%sp+2271] 729/* 0x00e4 */ fsubd %f10,%f20,%f30 730/* 0x00e8 */ std %f30,[%l2-24] 731/* 0x00ec 157 */ ld [%l7-4],%l0 732/* 0x00f0 158 */ ld [%sp+2303],%f13 733/* 0x00f4 */ ld [%l1+%lo(___const_seg_900000501+8)],%f12 734/* 0x00f8 159 */ ld [%sp+2239],%f15 735/* 0x00fc 158 */ and %l0,%i4,%l5 736/* 0x0100 159 */ fmovs %f12,%f14 737/* 0x0104 158 */ st %l5,[%sp+2303] 738/* 0x0108 */ fsubd %f12,%f20,%f44 739/* 0x010c */ std %f44,[%l2-16] 740/* 0x0110 159 */ srl %l0,16,%o5 741/* 0x0114 */ st %o5,[%sp+2239] 742/* 0x0118 */ fsubd %f14,%f20,%f46 743/* 0x011c */ ble,pt %icc,.L900000506 744/* 0x0120 */ std %f46,[%l2-8] 745 .L900000509: 746/* 0x0124 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f0 747/* 0x0128 159 */ cmp %i5,%l6 748/* 0x012c */ add %i3,4,%i3 749/* 0x0130 158 */ ld [%sp+2335],%f1 750/* 0x0134 */ ld [%sp+2303],%f5 751/* 0x0138 159 */ fmovs %f0,%f2 752/* 0x013c */ ld [%sp+2271],%f3 753/* 0x0140 158 */ fmovs %f0,%f4 754/* 0x0144 159 */ ld [%sp+2239],%f7 755/* 0x0148 */ fmovs %f0,%f6 756/* 0x014c 158 */ fsubd %f0,%f20,%f22 757/* 0x0150 */ std %f22,[%l2] 758/* 0x0154 159 */ fsubd %f2,%f20,%f24 759/* 0x0158 */ std %f24,[%l2+8] 760/* 0x015c 158 */ fsubd %f4,%f20,%f26 761/* 0x0160 */ std %f26,[%l2+16] 762/* 0x0164 159 */ fsubd %f6,%f20,%f20 763/* 0x0168 */ bg,pn %icc,.L77000272 764/* 0x016c */ std %f20,[%l2+24] 765 .L77000279: 766/* 0x0170 157 */ ld [%l7],%l2 767 .L900000510: 768/* 0x0174 158 */ and %l2,%i4,%o4 769/* 0x0178 */ st %o4,[%sp+2399] 770/* 0x017c 159 */ srl %l2,16,%o2 771/* 0x0180 */ st %o2,[%sp+2367] 772/* 0x0184 158 */ sethi %hi(___const_seg_900000501+8),%l1 773/* 0x0188 */ sra %i3,0,%i0 774/* 0x018c */ ld [%l1+%lo(___const_seg_900000501+8)],%f16 775/* 0x0190 */ sllx %i0,3,%o1 776/* 0x0194 159 */ add %i3,1,%o3 777/* 0x0198 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 778/* 0x019c 159 */ sra %o3,0,%l3 779/* 0x01a0 */ add %i5,1,%i5 780/* 0x01a4 158 */ ld [%sp+2399],%f17 781/* 0x01a8 159 */ sllx %l3,3,%o0 782/* 0x01ac */ add %l7,4,%l7 783/* 0x01b0 */ fmovs %f16,%f18 784/* 0x01b4 */ cmp %i5,%l6 785/* 0x01b8 */ add %i3,2,%i3 786/* 0x01bc 158 */ fsubd %f16,%f20,%f48 787/* 0x01c0 */ std %f48,[%i2+%o1] 788/* 0x01c4 159 */ ld [%sp+2367],%f19 789/* 0x01c8 */ fsubd %f18,%f20,%f50 790/* 0x01cc */ std %f50,[%i2+%o0] 791/* 0x01d0 */ ble,a,pt %icc,.L900000510 792/* 0x01d4 157 */ ld [%l7],%l2 793 .L77000272: 794/* 0x01d8 159 */ ret ! Result = 795/* 0x01dc */ restore %g0,%g0,%g0 796/* 0x01e0 0 */ .type conv_i32_to_d16,2 797/* 0x01e0 0 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) 798 799 .section ".text",#alloc,#execinstr 800/* 000000 0 */ .align 8 801! 802! CONSTANT POOL 803! 804 ___const_seg_900000601: 805/* 000000 0 */ .word 1127219200,0 806/* 0x0008 */ .word 1127219200 807/* 0x000c 0 */ .type ___const_seg_900000601,1 808/* 0x000c 0 */ .size ___const_seg_900000601,(.-___const_seg_900000601) 809/* 0x000c 0 */ .align 8 810/* 0x0010 */ .skip 24 811/* 0x0028 */ .align 32 812 813! 160 ! } 814! 161 !} 815! 163 !#ifdef RF_INLINE_MACROS 816! 165 !void 817! 166 !i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */ 818! 167 ! const double *, /* 2^16 */ 819! 168 ! const double *, /* 0 */ 820! 169 ! double *, /* result16 */ 821! 170 ! double *, /* result32 */ 822! 171 ! float *); /* source - should be unsigned int* */ 823! 172 ! /* converted to float* */ 824! 174 !#else 825! 177 !/* ARGSUSED */ 826! 178 !static void 827! 179 !i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */ 828! 180 ! const double *dummy2, /* 2^16 */ 829! 181 ! const double *dummy3, /* 0 */ 830! 182 ! double *result16, 831! 183 ! double *result32, 832! 184 ! float *src) /* source - should be unsigned int* */ 833! 185 ! /* converted to float* */ 834! 186 !{ 835! 187 ! uint32_t *i32; 836! 188 ! uint32_t a, b, c, d; 837! 190 ! i32 = (uint32_t *)src; 838! 191 ! a = i32[0]; 839! 192 ! b = i32[1]; 840! 193 ! c = i32[2]; 841! 194 ! d = i32[3]; 842! 195 ! result16[0] = (double)(a & 0xffff); 843! 196 ! result16[1] = (double)(a >> 16); 844! 197 ! result32[0] = (double)a; 845! 198 ! result16[2] = (double)(b & 0xffff); 846! 199 ! result16[3] = (double)(b >> 16); 847! 200 ! result32[1] = (double)b; 848! 201 ! result16[4] = (double)(c & 0xffff); 849! 202 ! result16[5] = (double)(c >> 16); 850! 203 ! result32[2] = (double)c; 851! 204 ! result16[6] = (double)(d & 0xffff); 852! 205 ! result16[7] = (double)(d >> 16); 853! 206 ! result32[3] = (double)d; 854! 207 !} 855! 209 !#endif 856! 212 !void 857! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len) 858! 214 !{ 859 860! 861! SUBROUTINE conv_i32_to_d32_and_d16 862! 863! OFFSET SOURCE LINE LABEL INSTRUCTION 864 865 .global conv_i32_to_d32_and_d16 866 conv_i32_to_d32_and_d16: 867/* 000000 214 */ save %sp,-368,%sp 868 869! 215 ! int i; 870! 216 ! uint32_t a; 871! 218 !#pragma pipeloop(0) 872! 219 ! for (i = 0; i < len - 3; i += 4) { 873! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, 874! 221 ! &(d16[2*i]), &(d32[i]), 875! 222 ! (float *)(&(i32[i]))); 876! 223 ! } 877! 224 ! for (; i < len; i++) { 878! 225 ! a = i32[i]; 879! 226 ! d32[i] = (double)(i32[i]); 880! 227 ! d16[2 * i] = (double)(a & 0xffff); 881! 228 ! d16[2 * i + 1] = (double)(a >> 16); 882 883/* 0x0004 228 */ sub %i3,3,%i4 884/* 0x0008 219 */ cmp %i4,0 885/* 0x000c */ ble,pn %icc,.L77000289 886/* 0x0010 */ or %g0,0,%i5 887 .L77000306: 888/* 0x0014 222 */ sethi %hi(Zero),%g3 889/* 0x0018 */ sethi %hi(TwoToMinus16),%g2 890/* 0x001c */ sethi %hi(TwoTo16),%o5 891/* 0x0020 */ ldd [%g3+%lo(Zero)],%f2 892/* 0x0024 219 */ sub %i3,4,%o4 893/* 0x0028 */ or %g0,0,%o3 894/* 0x002c */ or %g0,%i0,%l6 895/* 0x0030 */ or %g0,%i2,%l5 896 .L900000615: 897/* 0x0034 222 */ fmovd %f2,%f26 898/* 0x0038 */ ld [%l5],%f27 899/* 0x003c */ sra %o3,0,%o0 900/* 0x0040 */ add %i5,4,%i5 901/* 0x0044 */ fmovd %f2,%f28 902/* 0x0048 */ ld [%l5+4],%f29 903/* 0x004c */ sllx %o0,3,%g5 904/* 0x0050 */ cmp %i5,%o4 905/* 0x0054 */ fmovd %f2,%f30 906/* 0x0058 */ ld [%l5+8],%f31 907/* 0x005c */ add %i1,%g5,%g4 908/* 0x0060 */ add %o3,8,%o3 909/* 0x0064 */ ld [%l5+12],%f3 910/* 0x0068 */ fxtod %f26,%f26 911/* 0x006c */ ldd [%g2+%lo(TwoToMinus16)],%f32 912/* 0x0070 */ fxtod %f28,%f28 913/* 0x0074 */ add %l5,16,%l5 914/* 0x0078 */ fxtod %f30,%f30 915/* 0x007c */ ldd [%o5+%lo(TwoTo16)],%f34 916/* 0x0080 */ fxtod %f2,%f2 917/* 0x0084 */ std %f2,[%l6+24] 918/* 0x0088 */ fmuld %f32,%f26,%f36 919/* 0x008c */ std %f26,[%l6] 920/* 0x0090 */ fmuld %f32,%f28,%f38 921/* 0x0094 */ std %f28,[%l6+8] 922/* 0x0098 */ fmuld %f32,%f30,%f40 923/* 0x009c */ std %f30,[%l6+16] 924/* 0x00a0 */ fmuld %f32,%f2,%f42 925/* 0x00a4 */ add %l6,32,%l6 926/* 0x00a8 */ fdtox %f36,%f36 927/* 0x00ac */ fdtox %f38,%f38 928/* 0x00b0 */ fdtox %f40,%f40 929/* 0x00b4 */ fdtox %f42,%f42 930/* 0x00b8 */ fxtod %f36,%f36 931/* 0x00bc */ std %f36,[%g4+8] 932/* 0x00c0 */ fxtod %f38,%f38 933/* 0x00c4 */ std %f38,[%g4+24] 934/* 0x00c8 */ fxtod %f40,%f40 935/* 0x00cc */ std %f40,[%g4+40] 936/* 0x00d0 */ fxtod %f42,%f42 937/* 0x00d4 */ std %f42,[%g4+56] 938/* 0x00d8 */ fmuld %f36,%f34,%f36 939/* 0x00dc */ fmuld %f38,%f34,%f38 940/* 0x00e0 */ fmuld %f40,%f34,%f40 941/* 0x00e4 */ fmuld %f42,%f34,%f42 942/* 0x00e8 */ fsubd %f26,%f36,%f36 943/* 0x00ec */ std %f36,[%i1+%g5] 944/* 0x00f0 */ fsubd %f28,%f38,%f38 945/* 0x00f4 */ std %f38,[%g4+16] 946/* 0x00f8 */ fsubd %f30,%f40,%f40 947/* 0x00fc */ std %f40,[%g4+32] 948/* 0x0100 */ fsubd %f2,%f42,%f42 949/* 0x0104 */ std %f42,[%g4+48] 950/* 0x0108 */ ble,a,pt %icc,.L900000615 951/* 0x010c */ ldd [%g3+%lo(Zero)],%f2 952 .L77000289: 953/* 0x0110 224 */ cmp %i5,%i3 954/* 0x0114 */ bge,pn %icc,.L77000294 955/* 0x0118 */ sethi %hi(0xfc00),%l0 956 .L77000307: 957/* 0x011c 224 */ sra %i5,0,%l2 958/* 0x0120 */ sll %i5,1,%i4 959/* 0x0124 */ sllx %l2,3,%l1 960/* 0x0128 */ sllx %l2,2,%o1 961/* 0x012c 225 */ sub %i3,%i5,%l3 962/* 0x0130 224 */ add %l0,1023,%l0 963/* 0x0134 */ add %l1,%i0,%l1 964/* 0x0138 */ add %o1,%i2,%i2 965/* 0x013c 225 */ cmp %l3,5 966/* 0x0140 */ bl,pn %icc,.L77000291 967/* 0x0144 0 */ sethi %hi(___const_seg_900000601),%l7 968 .L900000612: 969/* 0x0148 225 */ prefetch [%l1],22 970/* 0x014c */ prefetch [%l1+64],22 971/* 0x0150 */ sra %i4,0,%l6 972/* 0x0154 226 */ sethi %hi(___const_seg_900000601+8),%l2 973/* 0x0158 225 */ prefetch [%l1+128],22 974/* 0x015c */ add %l6,-2,%l5 975/* 0x0160 */ sub %i3,3,%i0 976/* 0x0164 */ prefetch [%l1+192],22 977/* 0x0168 */ sllx %l5,3,%o4 978/* 0x016c 228 */ add %i5,1,%i5 979/* 0x0170 225 */ add %i1,%o4,%o3 980/* 0x0174 */ or %g0,%i3,%g1 981/* 0x0178 */ ld [%i2],%l4 982/* 0x017c */ prefetch [%o3+16],22 983/* 0x0180 */ add %o3,16,%l3 984/* 0x0184 228 */ add %i2,4,%i2 985/* 0x0188 225 */ prefetch [%o3+80],22 986/* 0x018c 228 */ srl %l4,16,%o1 987/* 0x0190 227 */ and %l4,%l0,%o0 988/* 0x0194 225 */ prefetch [%o3+144],22 989/* 0x0198 228 */ st %o1,[%sp+2271] 990/* 0x019c 227 */ st %o0,[%sp+2239] 991/* 0x01a0 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 992/* 0x01a4 228 */ ld [%l2+%lo(___const_seg_900000601+8)],%f0 993/* 0x01a8 225 */ prefetch [%o3+208],22 994/* 0x01ac */ prefetch [%o3+272],22 995/* 0x01b0 */ prefetch [%o3+336],22 996 .L900000610: 997/* 0x01b4 225 */ prefetch [%l1+192],22 998/* 0x01b8 228 */ add %i5,4,%i5 999/* 0x01bc 225 */ add %l3,64,%l3 1000/* 0x01c0 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f8 1001/* 0x01c4 228 */ cmp %i5,%i0 1002/* 0x01c8 225 */ ld [%i2],%g5 1003/* 0x01cc 228 */ add %i2,16,%i2 1004/* 0x01d0 */ add %l1,32,%l1 1005/* 0x01d4 */ add %i4,8,%i4 1006/* 0x01d8 226 */ ld [%i2-20],%f7 1007/* 0x01dc 228 */ srl %g5,16,%i3 1008/* 0x01e0 226 */ fmovs %f8,%f6 1009/* 0x01e4 228 */ st %i3,[%sp+2335] 1010/* 0x01e8 227 */ and %g5,%l0,%g4 1011/* 0x01ec */ st %g4,[%sp+2303] 1012/* 0x01f0 226 */ fsubd %f6,%f32,%f40 1013/* 0x01f4 227 */ ld [%sp+2239],%f9 1014/* 0x01f8 228 */ ld [%sp+2271],%f1 1015/* 0x01fc */ fmovs %f8,%f12 1016/* 0x0200 226 */ std %f40,[%l1-32] 1017/* 0x0204 227 */ fsubd %f8,%f32,%f42 1018/* 0x0208 */ std %f42,[%l3-64] 1019/* 0x020c 228 */ fsubd %f0,%f32,%f44 1020/* 0x0210 */ std %f44,[%l3-56] 1021/* 0x0214 227 */ fmovs %f12,%f10 1022/* 0x0218 225 */ ld [%i2-12],%g2 1023/* 0x021c 226 */ ld [%i2-16],%f1 1024/* 0x0220 228 */ srl %g2,16,%g3 1025/* 0x0224 226 */ fmovs %f12,%f0 1026/* 0x0228 225 */ prefetch [%l3+320],22 1027/* 0x022c 228 */ st %g3,[%sp+2271] 1028/* 0x0230 227 */ and %g2,%l0,%l6 1029/* 0x0234 */ st %l6,[%sp+2239] 1030/* 0x0238 226 */ fsubd %f0,%f32,%f46 1031/* 0x023c 227 */ ld [%sp+2303],%f11 1032/* 0x0240 228 */ ld [%sp+2335],%f13 1033/* 0x0244 */ fmovs %f12,%f18 1034/* 0x0248 226 */ std %f46,[%l1-24] 1035/* 0x024c 227 */ fsubd %f10,%f32,%f48 1036/* 0x0250 */ std %f48,[%l3-48] 1037/* 0x0254 228 */ fsubd %f12,%f32,%f50 1038/* 0x0258 */ std %f50,[%l3-40] 1039/* 0x025c 227 */ fmovs %f18,%f16 1040/* 0x0260 225 */ ld [%i2-8],%o5 1041/* 0x0264 226 */ ld [%i2-12],%f15 1042/* 0x0268 228 */ srl %o5,16,%l5 1043/* 0x026c 226 */ fmovs %f18,%f14 1044/* 0x0270 228 */ st %l5,[%sp+2335] 1045/* 0x0274 227 */ and %o5,%l0,%o4 1046/* 0x0278 */ st %o4,[%sp+2303] 1047/* 0x027c 226 */ fsubd %f14,%f32,%f52 1048/* 0x0280 227 */ ld [%sp+2239],%f17 1049/* 0x0284 228 */ ld [%sp+2271],%f19 1050/* 0x0288 225 */ prefetch [%l3+352],22 1051/* 0x028c 228 */ fmovs %f18,%f24 1052/* 0x0290 226 */ std %f52,[%l1-16] 1053/* 0x0294 227 */ fsubd %f16,%f32,%f54 1054/* 0x0298 */ std %f54,[%l3-32] 1055/* 0x029c 228 */ fsubd %f18,%f32,%f56 1056/* 0x02a0 */ std %f56,[%l3-24] 1057/* 0x02a4 227 */ fmovs %f24,%f22 1058/* 0x02a8 225 */ ld [%i2-4],%l4 1059/* 0x02ac 226 */ ld [%i2-8],%f21 1060/* 0x02b0 228 */ srl %l4,16,%o3 1061/* 0x02b4 226 */ fmovs %f24,%f20 1062/* 0x02b8 228 */ st %o3,[%sp+2271] 1063/* 0x02bc 227 */ and %l4,%l0,%o2 1064/* 0x02c0 */ st %o2,[%sp+2239] 1065/* 0x02c4 226 */ fsubd %f20,%f32,%f58 1066/* 0x02c8 227 */ ld [%sp+2303],%f23 1067/* 0x02cc 228 */ ld [%sp+2335],%f25 1068/* 0x02d0 */ fmovs %f24,%f0 1069/* 0x02d4 226 */ std %f58,[%l1-8] 1070/* 0x02d8 227 */ fsubd %f22,%f32,%f60 1071/* 0x02dc */ std %f60,[%l3-16] 1072/* 0x02e0 228 */ fsubd %f24,%f32,%f62 1073/* 0x02e4 */ bl,pt %icc,.L900000610 1074/* 0x02e8 */ std %f62,[%l3-8] 1075 .L900000613: 1076/* 0x02ec 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 1077/* 0x02f0 228 */ add %l1,8,%l1 1078/* 0x02f4 */ cmp %i5,%g1 1079/* 0x02f8 226 */ ld [%i2-4],%f3 1080/* 0x02fc 225 */ or %g0,%g1,%i3 1081/* 0x0300 228 */ add %i4,2,%i4 1082/* 0x0304 227 */ ld [%sp+2239],%f5 1083/* 0x0308 226 */ fmovs %f4,%f2 1084/* 0x030c 228 */ ld [%sp+2271],%f1 1085/* 0x0310 226 */ fsubd %f2,%f32,%f34 1086/* 0x0314 */ std %f34,[%l1-8] 1087/* 0x0318 227 */ fsubd %f4,%f32,%f36 1088/* 0x031c */ std %f36,[%l3] 1089/* 0x0320 228 */ fsubd %f0,%f32,%f38 1090/* 0x0324 */ bge,pn %icc,.L77000294 1091/* 0x0328 */ std %f38,[%l3+8] 1092 .L77000291: 1093/* 0x032c 225 */ ld [%i2],%o2 1094 .L900000614: 1095/* 0x0330 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 1096/* 0x0334 228 */ srl %o2,16,%l3 1097/* 0x0338 227 */ sra %i4,0,%i0 1098/* 0x033c 228 */ st %l3,[%sp+2367] 1099/* 0x0340 227 */ and %o2,%l0,%g1 1100/* 0x0344 226 */ sethi %hi(___const_seg_900000601+8),%l2 1101/* 0x0348 227 */ st %g1,[%sp+2399] 1102/* 0x034c */ sllx %i0,3,%o0 1103/* 0x0350 228 */ add %i4,1,%l4 1104/* 0x0354 226 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 1105/* 0x0358 228 */ sra %l4,0,%o1 1106/* 0x035c */ add %i5,1,%i5 1107/* 0x0360 226 */ ld [%i2],%f5 1108/* 0x0364 228 */ sllx %o1,3,%g5 1109/* 0x0368 */ cmp %i5,%i3 1110/* 0x036c */ ld [%sp+2367],%f9 1111/* 0x0370 */ add %i2,4,%i2 1112/* 0x0374 */ add %i4,2,%i4 1113/* 0x0378 227 */ fmovs %f4,%f6 1114/* 0x037c 226 */ fsubd %f4,%f32,%f44 1115/* 0x0380 */ std %f44,[%l1] 1116/* 0x0384 227 */ ld [%sp+2399],%f7 1117/* 0x0388 228 */ fmovs %f6,%f8 1118/* 0x038c */ add %l1,8,%l1 1119/* 0x0390 */ fsubd %f8,%f32,%f48 1120/* 0x0394 227 */ fsubd %f6,%f32,%f46 1121/* 0x0398 */ std %f46,[%i1+%o0] 1122/* 0x039c 228 */ std %f48,[%i1+%g5] 1123/* 0x03a0 */ bl,a,pt %icc,.L900000614 1124/* 0x03a4 225 */ ld [%i2],%o2 1125 .L77000294: 1126/* 0x03a8 222 */ ret ! Result = 1127/* 0x03ac */ restore %g0,%g0,%g0 1128/* 0x03b0 0 */ .type conv_i32_to_d32_and_d16,2 1129/* 0x03b0 0 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) 1130 1131 .section ".text",#alloc,#execinstr 1132/* 000000 0 */ .align 32 1133 1134! 229 ! } 1135! 230 !} 1136! 232 !extern long long c1, c2, c3, c4; 1137! 234 !static void 1138! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len) 1139! 236 !{ 1140 1141! 1142! SUBROUTINE adjust_montf_result 1143! 1144! OFFSET SOURCE LINE LABEL INSTRUCTION 1145 1146 adjust_montf_result: 1147/* 000000 236 */ sra %o2,0,%g2 1148/* 0x0004 */ or %g0,%o0,%o4 1149 1150! 237 ! int64_t acc; 1151! 238 ! int i; 1152! 240 ! if (i32[len] > 0) { 1153 1154/* 0x0008 240 */ sllx %g2,2,%g3 1155/* 0x000c */ ld [%o0+%g3],%o0 1156/* 0x0010 */ cmp %o0,0 1157/* 0x0014 */ bleu,pn %icc,.L77000316 1158/* 0x0018 236 */ or %g0,%o1,%o5 1159 1160! 241 ! i = -1; 1161 1162 .L77000315: 1163/* 0x001c 241 */ sub %g2,1,%g3 1164/* 0x0020 */ ba .L900000712 1165/* 0x0024 249 */ cmp %g2,0 1166 1167! 242 ! } else { 1168! 243 ! for (i = len - 1; i >= 0; i--) { 1169 1170 .L77000316: 1171/* 0x0028 243 */ subcc %g2,1,%g3 1172/* 0x002c */ bneg,pn %icc,.L77000340 1173/* 0x0030 */ or %g0,%g3,%o3 1174 .L77000348: 1175/* 0x0034 243 */ sra %g3,0,%o1 1176/* 0x0038 */ sllx %o1,2,%g1 1177 1178! 244 ! if (i32[i] != nint[i]) break; 1179 1180/* 0x003c 244 */ ld [%g1+%o5],%g4 1181/* 0x0040 243 */ add %g1,%o4,%o2 1182/* 0x0044 */ add %g1,%o5,%o1 1183 .L900000713: 1184/* 0x0048 244 */ ld [%o2],%o0 1185/* 0x004c */ cmp %o0,%g4 1186/* 0x0050 */ bne,pn %icc,.L77000324 1187/* 0x0054 */ sub %o2,4,%o2 1188 .L77000320: 1189/* 0x0058 244 */ sub %o1,4,%o1 1190/* 0x005c */ subcc %o3,1,%o3 1191/* 0x0060 */ bpos,a,pt %icc,.L900000713 1192/* 0x0064 */ ld [%o1],%g4 1193 .L900000706: 1194/* 0x0068 244 */ ba .L900000712 1195/* 0x006c 249 */ cmp %g2,0 1196 .L77000324: 1197/* 0x0070 244 */ sra %o3,0,%o0 1198/* 0x0074 */ sllx %o0,2,%g1 1199/* 0x0078 */ ld [%o5+%g1],%o3 1200/* 0x007c */ ld [%o4+%g1],%g5 1201/* 0x0080 */ cmp %g5,%o3 1202/* 0x0084 */ bleu,pt %icc,.L77000332 1203/* 0x0088 */ nop 1204 1205! 245 ! } 1206! 246 ! } 1207! 247 ! if ((i < 0) || (i32[i] > nint[i])) { 1208! 248 ! acc = 0; 1209! 249 ! for (i = 0; i < len; i++) { 1210 1211 .L77000340: 1212/* 0x008c 249 */ cmp %g2,0 1213 .L900000712: 1214/* 0x0090 249 */ ble,pn %icc,.L77000332 1215/* 0x0094 250 */ or %g0,%g2,%o3 1216 .L77000347: 1217/* 0x0098 249 */ or %g0,0,%o0 1218 1219! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); 1220 1221/* 0x009c 250 */ cmp %o3,10 1222/* 0x00a0 */ bl,pn %icc,.L77000341 1223/* 0x00a4 249 */ or %g0,0,%g2 1224 .L900000709: 1225/* 0x00a8 250 */ prefetch [%o4],22 1226/* 0x00ac */ prefetch [%o4+64],22 1227 1228! 251 ! i32[i] = acc & 0xffffffff; 1229! 252 ! acc = acc >> 32; 1230 1231/* 0x00b0 252 */ add %o5,4,%o1 1232/* 0x00b4 */ add %o4,8,%o2 1233/* 0x00b8 250 */ prefetch [%o4+128],22 1234/* 0x00bc */ sub %o3,8,%o5 1235/* 0x00c0 */ or %g0,2,%o0 1236/* 0x00c4 */ prefetch [%o4+192],22 1237/* 0x00c8 */ prefetch [%o4+256],22 1238/* 0x00cc */ prefetch [%o4+320],22 1239/* 0x00d0 */ prefetch [%o4+384],22 1240/* 0x00d4 */ ld [%o2-4],%g5 1241/* 0x00d8 */ prefetch [%o2+440],22 1242/* 0x00dc */ prefetch [%o2+504],22 1243/* 0x00e0 */ ld [%o4],%g4 1244/* 0x00e4 */ ld [%o1-4],%o4 1245/* 0x00e8 */ sub %g4,%o4,%o3 1246/* 0x00ec 251 */ st %o3,[%o2-8] 1247/* 0x00f0 252 */ srax %o3,32,%g4 1248 .L900000707: 1249/* 0x00f4 252 */ add %o0,8,%o0 1250/* 0x00f8 */ add %o2,32,%o2 1251/* 0x00fc 250 */ ld [%o1],%g1 1252/* 0x0100 */ prefetch [%o2+496],22 1253/* 0x0104 252 */ cmp %o0,%o5 1254/* 0x0108 */ add %o1,32,%o1 1255/* 0x010c 250 */ sub %g5,%g1,%g5 1256/* 0x0110 */ add %g5,%g4,%o4 1257/* 0x0114 */ ld [%o2-32],%g4 1258/* 0x0118 251 */ st %o4,[%o2-36] 1259/* 0x011c 252 */ srax %o4,32,%g1 1260/* 0x0120 250 */ ld [%o1-28],%o3 1261/* 0x0124 */ sub %g4,%o3,%g2 1262/* 0x0128 */ add %g2,%g1,%g5 1263/* 0x012c */ ld [%o2-28],%o3 1264/* 0x0130 251 */ st %g5,[%o2-32] 1265/* 0x0134 252 */ srax %g5,32,%g4 1266/* 0x0138 250 */ ld [%o1-24],%o4 1267/* 0x013c */ sub %o3,%o4,%g1 1268/* 0x0140 */ add %g1,%g4,%g2 1269/* 0x0144 */ ld [%o2-24],%o3 1270/* 0x0148 251 */ st %g2,[%o2-28] 1271/* 0x014c 252 */ srax %g2,32,%g5 1272/* 0x0150 250 */ ld [%o1-20],%o4 1273/* 0x0154 */ sub %o3,%o4,%g4 1274/* 0x0158 */ add %g4,%g5,%g1 1275/* 0x015c */ ld [%o2-20],%o4 1276/* 0x0160 251 */ st %g1,[%o2-24] 1277/* 0x0164 252 */ srax %g1,32,%o3 1278/* 0x0168 250 */ ld [%o1-16],%g2 1279/* 0x016c */ sub %o4,%g2,%g5 1280/* 0x0170 */ add %g5,%o3,%g1 1281/* 0x0174 */ ld [%o2-16],%g4 1282/* 0x0178 251 */ st %g1,[%o2-20] 1283/* 0x017c 252 */ srax %g1,32,%o4 1284/* 0x0180 250 */ ld [%o1-12],%g2 1285/* 0x0184 */ sub %g4,%g2,%o3 1286/* 0x0188 */ add %o3,%o4,%g5 1287/* 0x018c */ ld [%o2-12],%g2 1288/* 0x0190 251 */ st %g5,[%o2-16] 1289/* 0x0194 252 */ srax %g5,32,%g4 1290/* 0x0198 250 */ ld [%o1-8],%g1 1291/* 0x019c */ sub %g2,%g1,%o4 1292/* 0x01a0 */ add %o4,%g4,%o3 1293/* 0x01a4 */ ld [%o2-8],%g2 1294/* 0x01a8 251 */ st %o3,[%o2-12] 1295/* 0x01ac 252 */ srax %o3,32,%g5 1296/* 0x01b0 250 */ ld [%o1-4],%g1 1297/* 0x01b4 */ sub %g2,%g1,%g4 1298/* 0x01b8 */ add %g4,%g5,%o4 1299/* 0x01bc */ ld [%o2-4],%g5 1300/* 0x01c0 251 */ st %o4,[%o2-8] 1301/* 0x01c4 252 */ ble,pt %icc,.L900000707 1302/* 0x01c8 */ srax %o4,32,%g4 1303 .L900000710: 1304/* 0x01cc 250 */ ld [%o1],%o3 1305/* 0x01d0 252 */ add %o1,4,%o5 1306/* 0x01d4 250 */ or %g0,%o2,%o4 1307/* 0x01d8 252 */ cmp %o0,%g3 1308/* 0x01dc 250 */ sub %g5,%o3,%g2 1309/* 0x01e0 */ add %g2,%g4,%g1 1310/* 0x01e4 251 */ st %g1,[%o2-4] 1311/* 0x01e8 252 */ bg,pn %icc,.L77000332 1312/* 0x01ec */ srax %g1,32,%g2 1313 .L77000341: 1314/* 0x01f0 250 */ ld [%o4],%g5 1315 .L900000711: 1316/* 0x01f4 250 */ ld [%o5],%o2 1317/* 0x01f8 */ add %g2,%g5,%g4 1318/* 0x01fc 252 */ add %o0,1,%o0 1319/* 0x0200 */ cmp %o0,%g3 1320/* 0x0204 */ add %o5,4,%o5 1321/* 0x0208 250 */ sub %g4,%o2,%o1 1322/* 0x020c 251 */ st %o1,[%o4] 1323/* 0x0210 252 */ srax %o1,32,%g2 1324/* 0x0214 */ add %o4,4,%o4 1325/* 0x0218 */ ble,a,pt %icc,.L900000711 1326/* 0x021c 250 */ ld [%o4],%g5 1327 .L77000332: 1328/* 0x0220 252 */ retl ! Result = 1329/* 0x0224 */ nop 1330/* 0x0228 0 */ .type adjust_montf_result,2 1331/* 0x0228 0 */ .size adjust_montf_result,(.-adjust_montf_result) 1332 1333 .section ".text",#alloc,#execinstr 1334/* 000000 0 */ .align 32 1335 1336! 253 ! } 1337! 254 ! } 1338! 255 !} 1339! 257 !/************* 1340! 258 !static void 1341! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len) 1342! 260 !{ 1343! 261 ! int64_t acc; 1344! 262 ! int i; 1345! 264 ! c4++; 1346! 265 ! 1347! 266 ! if (i32[len] > 0) { 1348! 267 ! i = -1; 1349! 268 ! c1++; 1350! 269 ! } else { 1351! 270 ! for (i = len - 1; i >= 0; i++) { 1352! 271 ! if (i32[i] != nint[i]) break; 1353! 272 ! c2++; 1354! 273 ! } 1355! 274 ! } 1356! 275 ! if ((i < 0) || (i32[i] > nint[i])) { 1357! 276 ! c3++; 1358! 277 ! acc = 0; 1359! 278 ! for (i = 0; i < len; i++) { 1360! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); 1361! 280 ! i32[i] = acc & 0xffffffff; 1362! 281 ! acc = acc >> 32; 1363! 282 ! } 1364! 283 ! } 1365! 284 !} 1366! 285 !uint32_t saveresult[1000]; 1367! 286 !void printarray(char *name, uint32_t *arr, int len) 1368! 287 !{ 1369! 288 ! int i, j; 1370! 289 ! uint64_t tmp; 1371! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2); 1372! 292 ! for(i=j=0; i<len; i+=2,j+=2){ 1373! 293 ! if(j == 6){ 1374! 294 ! printf("\n"); 1375! 295 ! j=0; 1376! 296 ! } 1377! 297 ! tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]); 1378! 298 ! printf("0x%016llx",tmp); 1379! 299 ! if((i/2)!=(((len+1)/2)-1))printf(","); 1380! 300 ! if(j!=4)printf(" "); 1381! 301 ! } 1382! 302 ! if(j!=0) printf("\n"); 1383! 303 ! printf("};\n"); 1384! 304 !} 1385! 305 !**************/ 1386! 308 !/* 1387! 309 ! * the lengths of the input arrays should be at least the following: 1388! 310 ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] 1389! 311 ! * all of them should be different from one another 1390! 312 ! */ 1391! 313 !void mont_mulf_noconv(uint32_t *result, 1392! 314 ! double *dm1, double *dm2, double *dt, 1393! 315 ! double *dn, uint32_t *nint, 1394! 316 ! int nlen, double dn0) 1395! 317 !{ 1396 1397! 1398! SUBROUTINE mont_mulf_noconv 1399! 1400! OFFSET SOURCE LINE LABEL INSTRUCTION 1401 1402 .global mont_mulf_noconv 1403 mont_mulf_noconv: 1404/* 000000 317 */ save %sp,-176,%sp 1405/* 0x0004 */ ldx [%fp+2223],%g1 1406/* 0x0008 0 */ sethi %hi(Zero),%l5 1407/* 0x000c 317 */ or %g0,%i2,%l0 1408 1409! 318 ! int i, j, jj; 1410! 319 ! double digit, m2j, a, b; 1411! 320 ! double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; 1412! 322 ! pdm1 = &(dm1[0]); 1413! 323 ! pdm2 = &(dm2[0]); 1414! 324 ! pdn = &(dn[0]); 1415! 325 ! pdm2[2 * nlen] = Zero; 1416 1417/* 0x0010 325 */ ldd [%l5+%lo(Zero)],%f0 1418/* 0x0014 317 */ or %g0,%i0,%i2 1419/* 0x0018 325 */ sll %g1,1,%o3 1420 1421! 327 ! if (nlen != 16) { 1422 1423/* 0x001c 327 */ cmp %g1,16 1424/* 0x0020 325 */ sra %o3,0,%i0 1425/* 0x0024 */ sllx %i0,3,%o0 1426/* 0x0028 317 */ or %g0,%i5,%i0 1427/* 0x002c 327 */ bne,pn %icc,.L77000476 1428/* 0x0030 325 */ std %f0,[%l0+%o0] 1429 .L77000488: 1430/* 0x0034 0 */ sethi %hi(TwoToMinus16),%o2 1431/* 0x0038 0 */ sethi %hi(TwoTo16),%l3 1432 1433! 328 ! for (i = 0; i < 4 * nlen + 2; i++) 1434! 329 ! dt[i] = Zero; 1435! 330 ! a = dt[0] = pdm1[0] * pdm2[0]; 1436! 331 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); 1437! 333 ! pdtj = &(dt[0]); 1438! 334 ! for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) { 1439! 335 ! m2j = pdm2[j]; 1440! 336 ! a = pdtj[0] + pdn[0] * digit; 1441! 337 ! b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16; 1442! 338 ! pdtj[1] = b; 1443! 340 !#pragma pipeloop(0) 1444! 341 ! for (i = 1; i < nlen; i++) { 1445! 342 ! pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit; 1446! 343 ! } 1447! 344 ! if (jj == 15) { 1448! 345 ! cleanup(dt, j / 2 + 1, 2 * nlen + 1); 1449! 346 ! jj = 0; 1450! 347 ! } 1451! 349 ! digit = mod(lower32(b, Zero) * dn0, 1452! 350 ! TwoToMinus16, TwoTo16); 1453! 351 ! } 1454! 352 ! } else { 1455! 353 ! a = dt[0] = pdm1[0] * pdm2[0]; 1456 1457/* 0x003c 353 */ ldd [%i1],%f40 1458 1459! 355 ! dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] = 1460! 356 ! dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = 1461! 357 ! dt[54] = dt[53] = dt[52] = dt[51] = dt[50] = 1462! 358 ! dt[49] = dt[48] = dt[47] = dt[46] = dt[45] = 1463! 359 ! dt[44] = dt[43] = dt[42] = dt[41] = dt[40] = 1464! 360 ! dt[39] = dt[38] = dt[37] = dt[36] = dt[35] = 1465! 361 ! dt[34] = dt[33] = dt[32] = dt[31] = dt[30] = 1466! 362 ! dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = 1467! 363 ! dt[24] = dt[23] = dt[22] = dt[21] = dt[20] = 1468! 364 ! dt[19] = dt[18] = dt[17] = dt[16] = dt[15] = 1469! 365 ! dt[14] = dt[13] = dt[12] = dt[11] = dt[10] = 1470! 366 ! dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] = 1471! 367 ! dt[3] = dt[2] = dt[1] = Zero; 1472! 369 ! pdn_0 = pdn[0]; 1473! 370 ! pdm1_0 = pdm1[0]; 1474! 372 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); 1475! 373 ! pdtj = &(dt[0]); 1476 1477/* 0x0040 373 */ or %g0,%i3,%o3 1478 1479! 375 ! for (j = 0; j < 32; j++, pdtj++) { 1480 1481/* 0x0044 375 */ or %g0,0,%l1 1482/* 0x0048 353 */ ldd [%l0],%f42 1483/* 0x004c 372 */ ldd [%o2+%lo(TwoToMinus16)],%f44 1484/* 0x0050 */ ldd [%l3+%lo(TwoTo16)],%f46 1485/* 0x0054 367 */ std %f0,[%i3+8] 1486/* 0x0058 353 */ fmuld %f40,%f42,%f38 1487/* 0x005c */ std %f38,[%i3] 1488/* 0x0060 367 */ std %f0,[%i3+16] 1489/* 0x0064 */ std %f0,[%i3+24] 1490/* 0x0068 */ std %f0,[%i3+32] 1491/* 0x006c 372 */ fdtox %f38,%f4 1492/* 0x0070 367 */ std %f0,[%i3+40] 1493/* 0x0074 */ std %f0,[%i3+48] 1494/* 0x0078 */ std %f0,[%i3+56] 1495/* 0x007c 372 */ fmovs %f0,%f4 1496/* 0x0080 367 */ std %f0,[%i3+64] 1497/* 0x0084 */ std %f0,[%i3+72] 1498/* 0x0088 372 */ fxtod %f4,%f52 1499/* 0x008c 367 */ std %f0,[%i3+80] 1500/* 0x0090 */ std %f0,[%i3+88] 1501/* 0x0094 */ std %f0,[%i3+96] 1502/* 0x0098 */ std %f0,[%i3+104] 1503/* 0x009c 372 */ fmuld %f52,%f14,%f60 1504/* 0x00a0 367 */ std %f0,[%i3+112] 1505/* 0x00a4 */ std %f0,[%i3+120] 1506/* 0x00a8 */ std %f0,[%i3+128] 1507/* 0x00ac */ std %f0,[%i3+136] 1508/* 0x00b0 372 */ fmuld %f60,%f44,%f62 1509/* 0x00b4 367 */ std %f0,[%i3+144] 1510/* 0x00b8 */ std %f0,[%i3+152] 1511/* 0x00bc */ std %f0,[%i3+160] 1512/* 0x00c0 */ std %f0,[%i3+168] 1513/* 0x00c4 372 */ fdtox %f62,%f32 1514/* 0x00c8 367 */ std %f0,[%i3+176] 1515/* 0x00cc */ std %f0,[%i3+184] 1516/* 0x00d0 */ std %f0,[%i3+192] 1517/* 0x00d4 */ std %f0,[%i3+200] 1518/* 0x00d8 372 */ fxtod %f32,%f50 1519/* 0x00dc 367 */ std %f0,[%i3+208] 1520/* 0x00e0 */ std %f0,[%i3+216] 1521/* 0x00e4 */ std %f0,[%i3+224] 1522/* 0x00e8 */ std %f0,[%i3+232] 1523/* 0x00ec 372 */ fmuld %f50,%f46,%f34 1524/* 0x00f0 367 */ std %f0,[%i3+240] 1525/* 0x00f4 */ std %f0,[%i3+248] 1526/* 0x00f8 */ std %f0,[%i3+256] 1527/* 0x00fc */ std %f0,[%i3+264] 1528/* 0x0100 372 */ fsubd %f60,%f34,%f40 1529/* 0x0104 367 */ std %f0,[%i3+272] 1530/* 0x0108 */ std %f0,[%i3+280] 1531/* 0x010c */ std %f0,[%i3+288] 1532/* 0x0110 */ std %f0,[%i3+296] 1533/* 0x0114 */ std %f0,[%i3+304] 1534/* 0x0118 */ std %f0,[%i3+312] 1535/* 0x011c */ std %f0,[%i3+320] 1536/* 0x0120 */ std %f0,[%i3+328] 1537/* 0x0124 */ std %f0,[%i3+336] 1538/* 0x0128 */ std %f0,[%i3+344] 1539/* 0x012c */ std %f0,[%i3+352] 1540/* 0x0130 */ std %f0,[%i3+360] 1541/* 0x0134 */ std %f0,[%i3+368] 1542/* 0x0138 375 */ sub %g1,1,%l3 1543/* 0x013c */ add %i3,8,%o7 1544/* 0x0140 367 */ std %f0,[%i3+376] 1545/* 0x0144 */ std %f0,[%i3+384] 1546/* 0x0148 */ std %f0,[%i3+392] 1547/* 0x014c */ std %f0,[%i3+400] 1548/* 0x0150 */ std %f0,[%i3+408] 1549/* 0x0154 */ std %f0,[%i3+416] 1550/* 0x0158 */ std %f0,[%i3+424] 1551/* 0x015c */ std %f0,[%i3+432] 1552/* 0x0160 */ std %f0,[%i3+440] 1553/* 0x0164 */ std %f0,[%i3+448] 1554/* 0x0168 */ std %f0,[%i3+456] 1555/* 0x016c */ std %f0,[%i3+464] 1556/* 0x0170 */ std %f0,[%i3+472] 1557/* 0x0174 */ std %f0,[%i3+480] 1558/* 0x0178 */ std %f0,[%i3+488] 1559/* 0x017c */ std %f0,[%i3+496] 1560/* 0x0180 */ std %f0,[%i3+504] 1561/* 0x0184 */ std %f0,[%i3+512] 1562/* 0x0188 */ std %f0,[%i3+520] 1563 1564!BEGIN HAND CODED PART 1565 1566! cheetah schedule, no even-odd trick 1567 1568 1569 add %i3,%g0,%o5 1570 1571 fmovd %f40,%f0 1572 fmovd %f14,%f2 1573 fmovd %f44,%f8 1574 sethi %hi(TwoTo32),%l5 1575 fmovd %f46,%f10 1576 sethi %hi(TwoToMinus32),%g5 1577 ldd [%i3],%f6 1578 ldd [%l0],%f4 1579 1580 ldd [%i1],%f40 1581 ldd [%i1+8],%f42 1582 ldd [%i1+16],%f52 1583 ldd [%i1+48],%f54 1584 ldd [%i1+56],%f36 1585 ldd [%i1+64],%f56 1586 ldd [%i1+104],%f48 1587 ldd [%i1+112],%f58 1588 1589 ldd [%i4],%f44 1590 ldd [%i4+8],%f46 1591 ldd [%i4+104],%f50 1592 ldd [%i4+112],%f60 1593 1594 1595 .L99999999: 1596!1 1597 ldd [%i1+24],%f20 1598 fmuld %f0,%f44,%f12 1599!2 1600 ldd [%i4+24],%f22 1601 fmuld %f42,%f4,%f16 1602!3 1603 ldd [%i1+40],%f24 1604 fmuld %f46,%f0,%f18 1605!4 1606 ldd [%i4+40],%f26 1607 fmuld %f20,%f4,%f20 1608!5 1609 ldd [%l0+8],%f38 1610 faddd %f12,%f6,%f12 1611 fmuld %f22,%f0,%f22 1612!6 1613 add %l0,8,%l0 1614 ldd [%i4+56],%f30 1615 fmuld %f24,%f4,%f24 1616!7 1617 ldd [%i1+72],%f32 1618 faddd %f16,%f18,%f16 1619 fmuld %f26,%f0,%f26 1620!8 1621 ldd [%i3+16],%f18 1622 fmuld %f40,%f38,%f14 1623!9 1624 ldd [%i4+72],%f34 1625 faddd %f20,%f22,%f20 1626 fmuld %f8,%f12,%f12 1627!10 1628 ldd [%i3+48],%f22 1629 fmuld %f36,%f4,%f28 1630!11 1631 ldd [%i3+8],%f6 1632 faddd %f16,%f18,%f16 1633 fmuld %f30,%f0,%f30 1634!12 1635 std %f16,[%i3+16] 1636 faddd %f24,%f26,%f24 1637 fmuld %f32,%f4,%f32 1638!13 1639 ldd [%i3+80],%f26 1640 faddd %f12,%f14,%f12 1641 fmuld %f34,%f0,%f34 1642!14 1643 ldd [%i1+88],%f16 1644 faddd %f20,%f22,%f20 1645!15 1646 ldd [%i4+88],%f18 1647 faddd %f28,%f30,%f28 1648!16 1649 ldd [%i3+112],%f30 1650 faddd %f32,%f34,%f32 1651!17 1652 ldd [%i3+144],%f34 1653 faddd %f12,%f6,%f6 1654 fmuld %f16,%f4,%f16 1655!18 1656 std %f20,[%i3+48] 1657 faddd %f24,%f26,%f24 1658 fmuld %f18,%f0,%f18 1659!19 1660 std %f24,[%i3+80] 1661 faddd %f28,%f30,%f28 1662 fmuld %f48,%f4,%f20 1663!20 1664 std %f28,[%i3+112] 1665 faddd %f32,%f34,%f32 1666 fmuld %f50,%f0,%f22 1667!21 1668 ldd [%i1+120],%f24 1669 fdtox %f6,%f12 1670!22 1671 std %f32,[%i3+144] 1672 faddd %f16,%f18,%f16 1673!23 1674 ldd [%i4+120],%f26 1675!24 1676 ldd [%i3+176],%f18 1677 faddd %f20,%f22,%f20 1678 fmuld %f24,%f4,%f24 1679!25 1680 ldd [%i4+16],%f30 1681 fmovs %f11,%f12 1682!26 1683 ldd [%i1+32],%f32 1684 fmuld %f26,%f0,%f26 1685!27 1686 ldd [%i4+32],%f34 1687 fmuld %f52,%f4,%f28 1688!28 1689 ldd [%i3+208],%f22 1690 faddd %f16,%f18,%f16 1691 fmuld %f30,%f0,%f30 1692!29 1693 std %f16,[%i3+176] 1694 fxtod %f12,%f12 1695 fmuld %f32,%f4,%f32 1696!30 1697 ldd [%i4+48],%f18 1698 faddd %f24,%f26,%f24 1699 fmuld %f34,%f0,%f34 1700!31 1701 ldd [%i3+240],%f26 1702 faddd %f20,%f22,%f20 1703!32 1704 std %f20,[%i3+208] 1705 faddd %f28,%f30,%f28 1706 fmuld %f54,%f4,%f16 1707!33 1708 ldd [%i3+32],%f30 1709 fmuld %f12,%f2,%f14 1710!34 1711 ldd [%i4+64],%f22 1712 faddd %f32,%f34,%f32 1713 fmuld %f18,%f0,%f18 1714!35 1715 ldd [%i3+64],%f34 1716 faddd %f24,%f26,%f24 1717!36 1718 std %f24,[%i3+240] 1719 faddd %f28,%f30,%f28 1720 fmuld %f56,%f4,%f20 1721!37 1722 std %f28,[%i3+32] 1723 fmuld %f14,%f8,%f12 1724!38 1725 ldd [%i1+80],%f24 1726 faddd %f32,%f34,%f34 ! yes, tmp52! 1727 fmuld %f22,%f0,%f22 1728!39 1729 ldd [%i4+80],%f26 1730 faddd %f16,%f18,%f16 1731!40 1732 ldd [%i1+96],%f28 1733 fmuld %f58,%f4,%f32 1734!41 1735 ldd [%i4+96],%f30 1736 fdtox %f12,%f12 1737 fmuld %f24,%f4,%f24 1738!42 1739 std %f34,[%i3+64] ! yes, tmp52! 1740 faddd %f20,%f22,%f20 1741 fmuld %f26,%f0,%f26 1742!43 1743 ldd [%i3+96],%f18 1744 fmuld %f28,%f4,%f28 1745!44 1746 ldd [%i3+128],%f22 1747 fmovd %f38,%f4 1748 fmuld %f30,%f0,%f30 1749!45 1750 fxtod %f12,%f12 1751 fmuld %f60,%f0,%f34 1752!46 1753 add %i3,8,%i3 1754 faddd %f24,%f26,%f24 1755!47 1756 ldd [%i3+160-8],%f26 1757 faddd %f16,%f18,%f16 1758!48 1759 std %f16,[%i3+96-8] 1760 faddd %f28,%f30,%f28 1761!49 1762 ldd [%i3+192-8],%f30 1763 faddd %f32,%f34,%f32 1764 fmuld %f12,%f10,%f12 1765!50 1766 ldd [%i3+224-8],%f34 1767 faddd %f20,%f22,%f20 1768!51 1769 std %f20,[%i3+128-8] 1770 faddd %f24,%f26,%f24 1771!52 1772 add %l1,1,%l1 1773 std %f24,[%i3+160-8] 1774 faddd %f28,%f30,%f28 1775!53 1776 cmp %l1,15 1777 std %f28,[%i3+192-8] 1778 fsubd %f14,%f12,%f0 1779!54 1780 faddd %f32,%f34,%f32 1781 ble,pt %icc,.L99999999 1782 std %f32,[%i3+224-8] 1783 1784 1785! 1786 ldd [%g5+%lo(TwoToMinus32)],%f8 1787! 1788 ldd [%i3+8],%f16 1789! 1790 ldd [%i3+16],%f20 1791! 1792 fmuld %f8,%f16,%f18 1793 ldd [%i3+24],%f24 1794! 1795 fmuld %f8,%f20,%f22 1796 ldd [%i3+32],%f28 1797! 1798 fmuld %f8,%f24,%f26 1799 ldd [%l5+%lo(TwoTo32)],%f10 1800! 1801 fmuld %f8,%f28,%f30 1802! 1803 fdtox %f18,%f18 1804! 1805 fdtox %f22,%f22 1806! 1807 fdtox %f26,%f26 1808 ldd [%i3+40],%f32 1809! 1810 fdtox %f30,%f30 1811 ldd [%i3+48],%f56 1812! 1813 fxtod %f18,%f18 1814 fmuld %f8,%f32,%f34 1815 ldd [%i3+56],%f36 1816! 1817 fxtod %f22,%f22 1818 fmuld %f8,%f56,%f58 1819 ldd [%i3+64],%f38 1820! 1821 fxtod %f26,%f26 1822 fmuld %f8,%f36,%f60 1823! 1824 fxtod %f30,%f30 1825 fmuld %f8,%f38,%f62 1826! 1827 fdtox %f34,%f34 1828 fmuld %f10,%f18,%f40 1829! 1830 fdtox %f58,%f58 1831 fmuld %f10,%f22,%f42 1832! 1833 fdtox %f60,%f60 1834 fmuld %f10,%f26,%f44 1835! 1836 fdtox %f62,%f62 1837 fmuld %f10,%f30,%f46 1838! 1839 fxtod %f34,%f34 1840! 1841 fxtod %f58,%f58 1842! 1843 fxtod %f60,%f60 1844! 1845 fxtod %f62,%f62 1846! 1847 fsubd %f16,%f40,%f40 1848 fmuld %f10,%f34,%f48 1849! 1850 fsubd %f20,%f42,%f42 1851 fmuld %f10,%f58,%f50 1852! 1853 fsubd %f24,%f44,%f44 1854 fmuld %f10,%f60,%f52 1855! 1856 fsubd %f28,%f46,%f46 1857 fmuld %f10,%f62,%f54 1858! 1859 std %f40,[%i3+8] 1860! 1861 std %f42,[%i3+16] 1862! 1863 faddd %f18,%f44,%f44 1864 std %f44,[%i3+24] 1865! 1866 faddd %f22,%f46,%f46 1867 std %f46,[%i3+32] 1868! 1869 1870 1871 1872 fsubd %f32,%f48,%f48 1873 ldd [%i3+64+8],%f16 1874! 1875 fsubd %f56,%f50,%f50 1876 ldd [%i3+64+16],%f20 1877! 1878 fsubd %f36,%f52,%f52 1879 ldd [%i3+64+24],%f24 1880! 1881 fsubd %f38,%f54,%f54 1882 ldd [%i3+64+32],%f28 1883! 1884 faddd %f26,%f48,%f48 1885 fmuld %f8,%f16,%f18 1886 std %f48,[%i3+40] 1887! 1888 faddd %f30,%f50,%f50 1889 fmuld %f8,%f20,%f22 1890 std %f50,[%i3+48] 1891! 1892 faddd %f34,%f52,%f52 1893 fmuld %f8,%f24,%f26 1894 std %f52,[%i3+56] 1895! 1896 faddd %f58,%f54,%f54 1897 fmuld %f8,%f28,%f30 1898 std %f54,[%i3+64] 1899! 1900 1901 1902 fdtox %f18,%f18 1903! 1904 fdtox %f22,%f22 1905! 1906 fdtox %f26,%f26 1907 ldd [%i3+64+40],%f32 1908! 1909 fdtox %f30,%f30 1910 ldd [%i3+64+48],%f56 1911! 1912 fxtod %f18,%f18 1913 fmuld %f8,%f32,%f34 1914 ldd [%i3+64+56],%f36 1915! 1916 fxtod %f22,%f22 1917 fmuld %f8,%f56,%f58 1918 ldd [%i3+64+64],%f38 1919! 1920 fxtod %f26,%f26 1921 fmuld %f8,%f36,%f12 1922! 1923 fxtod %f30,%f30 1924 fmuld %f8,%f38,%f14 1925! 1926 fdtox %f34,%f34 1927 fmuld %f10,%f18,%f40 1928! 1929 fdtox %f58,%f58 1930 fmuld %f10,%f22,%f42 1931! 1932 fdtox %f12,%f12 1933 fmuld %f10,%f26,%f44 1934! 1935 fdtox %f14,%f14 1936 fmuld %f10,%f30,%f46 1937! 1938 fxtod %f34,%f34 1939! 1940 fxtod %f58,%f58 1941! 1942 fxtod %f12,%f12 1943! 1944 fxtod %f14,%f14 1945! 1946 fsubd %f16,%f40,%f40 1947 fmuld %f10,%f34,%f48 1948! 1949 fsubd %f20,%f42,%f42 1950 fmuld %f10,%f58,%f50 1951! 1952 fsubd %f24,%f44,%f44 1953 fmuld %f10,%f12,%f52 1954! 1955 fsubd %f28,%f46,%f46 1956 fmuld %f10,%f14,%f54 1957! 1958 faddd %f60,%f40,%f40 1959 std %f40,[%i3+64+8] 1960! 1961 faddd %f62,%f42,%f42 1962 std %f42,[%i3+64+16] 1963! 1964 faddd %f18,%f44,%f44 1965 std %f44,[%i3+64+24] 1966! 1967 faddd %f22,%f46,%f46 1968 std %f46,[%i3+64+32] 1969! 1970 1971 1972 1973 fsubd %f32,%f48,%f48 1974 ldd [%i3+64+64+8],%f16 1975! 1976 fsubd %f56,%f50,%f50 1977 ldd [%i3+64+64+16],%f20 1978! 1979 fsubd %f36,%f52,%f52 1980 ldd [%i3+64+64+24],%f24 1981! 1982 fsubd %f38,%f54,%f54 1983 ldd [%i3+64+64+32],%f28 1984! 1985 faddd %f26,%f48,%f48 1986 fmuld %f8,%f16,%f18 1987 std %f48,[%i3+64+40] 1988! 1989 faddd %f30,%f50,%f50 1990 fmuld %f8,%f20,%f22 1991 std %f50,[%i3+64+48] 1992! 1993 faddd %f34,%f52,%f52 1994 fmuld %f8,%f24,%f26 1995 std %f52,[%i3+64+56] 1996! 1997 faddd %f58,%f54,%f54 1998 fmuld %f8,%f28,%f30 1999 std %f54,[%i3+64+64] 2000! 2001 2002 2003 2004 fdtox %f18,%f18 2005! 2006 fdtox %f22,%f22 2007! 2008 fdtox %f26,%f26 2009 ldd [%i3+64+64+40],%f32 2010! 2011 fdtox %f30,%f30 2012 ldd [%i3+64+64+48],%f56 2013! 2014 fxtod %f18,%f18 2015 fmuld %f8,%f32,%f34 2016 ldd [%i3+64+64+56],%f36 2017! 2018 fxtod %f22,%f22 2019 fmuld %f8,%f56,%f58 2020 ldd [%i3+64+64+64],%f38 2021! 2022 fxtod %f26,%f26 2023 fmuld %f8,%f36,%f60 2024! 2025 fxtod %f30,%f30 2026 fmuld %f8,%f38,%f62 2027! 2028 fdtox %f34,%f34 2029 fmuld %f10,%f18,%f40 2030! 2031 fdtox %f58,%f58 2032 fmuld %f10,%f22,%f42 2033! 2034 fdtox %f60,%f60 2035 fmuld %f10,%f26,%f44 2036! 2037 fdtox %f62,%f62 2038 fmuld %f10,%f30,%f46 2039! 2040 fxtod %f34,%f34 2041! 2042 fxtod %f58,%f58 2043! 2044 fxtod %f60,%f60 2045! 2046 fxtod %f62,%f62 2047! 2048 fsubd %f16,%f40,%f40 2049 fmuld %f10,%f34,%f48 2050! 2051 fsubd %f20,%f42,%f42 2052 fmuld %f10,%f58,%f50 2053! 2054 fsubd %f24,%f44,%f44 2055 fmuld %f10,%f60,%f52 2056! 2057 fsubd %f28,%f46,%f46 2058 fmuld %f10,%f62,%f54 2059! 2060 faddd %f12,%f40,%f40 2061 std %f40,[%i3+64+64+8] 2062! 2063 faddd %f14,%f42,%f42 2064 std %f42,[%i3+64+64+16] 2065! 2066 faddd %f18,%f44,%f44 2067 std %f44,[%i3+64+64+24] 2068! 2069 faddd %f22,%f46,%f46 2070 std %f46,[%i3+64+64+32] 2071! 2072 2073 2074 fsubd %f32,%f48,%f48 2075 ldd [%i3+64+64+64+8],%f16 2076! 2077 fsubd %f56,%f50,%f50 2078 ldd [%i3+64+64+64+16],%f20 2079! 2080 fsubd %f36,%f52,%f52 2081 ldd [%i3+64+64+64+24],%f24 2082! 2083 fsubd %f38,%f54,%f54 2084 ldd [%i3+64+64+64+32],%f28 2085! 2086 faddd %f26,%f48,%f48 2087 fmuld %f8,%f16,%f18 2088 std %f48,[%i3+64+64+40] 2089! 2090 faddd %f30,%f50,%f50 2091 fmuld %f8,%f20,%f22 2092 std %f50,[%i3+64+64+48] 2093! 2094 faddd %f34,%f52,%f52 2095 fmuld %f8,%f24,%f26 2096 std %f52,[%i3+64+64+56] 2097! 2098 faddd %f58,%f54,%f54 2099 fmuld %f8,%f28,%f30 2100 std %f54,[%i3+64+64+64] 2101! 2102 2103 2104 fdtox %f18,%f18 2105! 2106 fdtox %f22,%f22 2107! 2108 fdtox %f26,%f26 2109 ldd [%i3+64+64+64+40],%f32 2110! 2111 fdtox %f30,%f30 2112 ldd [%i3+64+64+64+48],%f56 2113! 2114 fxtod %f18,%f18 2115 fmuld %f8,%f32,%f34 2116 ldd [%i3+64+64+64+56],%f36 2117! 2118 fxtod %f22,%f22 2119 fmuld %f8,%f56,%f58 2120 ldd [%i3+64+64+64+64],%f38 2121! 2122 fxtod %f26,%f26 2123 fmuld %f8,%f36,%f12 2124! 2125 fxtod %f30,%f30 2126 fmuld %f8,%f38,%f14 2127! 2128 fdtox %f34,%f34 2129 fmuld %f10,%f18,%f40 2130! 2131 fdtox %f58,%f58 2132 fmuld %f10,%f22,%f42 2133! 2134 fdtox %f12,%f12 2135 fmuld %f10,%f26,%f44 2136! 2137 fdtox %f14,%f14 2138 fmuld %f10,%f30,%f46 2139! 2140 sethi %hi(TwoToMinus16),%g5 2141 fxtod %f34,%f34 2142! 2143 sethi %hi(TwoTo16),%l5 2144 fxtod %f58,%f58 2145! 2146 fxtod %f12,%f12 2147! 2148 fxtod %f14,%f14 2149! 2150 fsubd %f16,%f40,%f16 2151 fmuld %f10,%f34,%f48 2152 ldd [%g5+%lo(TwoToMinus16)],%f8 2153! 2154 fsubd %f20,%f42,%f20 2155 fmuld %f10,%f58,%f50 2156 ldd [%i1],%f40 ! should be %f40 2157! 2158 fsubd %f24,%f44,%f24 2159 fmuld %f10,%f12,%f52 2160 ldd [%i1+8],%f42 ! should be %f42 2161! 2162 fsubd %f28,%f46,%f28 2163 fmuld %f10,%f14,%f54 2164 ldd [%i4],%f44 ! should be %f44 2165! 2166 faddd %f60,%f16,%f16 2167 std %f16,[%i3+64+64+64+8] 2168! 2169 faddd %f62,%f20,%f20 2170 std %f20,[%i3+64+64+64+16] 2171! 2172 faddd %f18,%f24,%f24 2173 std %f24,[%i3+64+64+64+24] 2174! 2175 faddd %f22,%f28,%f28 2176 std %f28,[%i3+64+64+64+32] 2177! 2178 fsubd %f32,%f48,%f32 2179 ldd [%i4+8],%f46 ! should be %f46 2180! 2181 fsubd %f56,%f50,%f56 2182 ldd [%i1+104],%f48 ! should be %f48 2183! 2184 fsubd %f36,%f52,%f36 2185 ldd [%i4+104],%f50 ! should be %f50 2186! 2187 fsubd %f38,%f54,%f38 2188 ldd [%i1+16],%f52 ! should be %f52 2189! 2190 faddd %f26,%f32,%f32 2191 std %f32,[%i3+64+64+64+40] 2192! 2193 faddd %f30,%f56,%f56 2194 std %f56,[%i3+64+64+64+48] 2195! 2196 faddd %f34,%f36,%f36 2197 std %f36,[%i3+64+64+64+56] 2198! 2199 faddd %f58,%f38,%f38 2200 std %f38,[%i3+64+64+64+64] 2201! 2202 std %f12,[%i3+64+64+64+64+8] 2203! 2204 std %f14,[%i3+64+64+64+64+16] 2205! 2206 2207 ldd [%l5+%lo(TwoTo16)],%f10 2208 ldd [%i1+48],%f54 2209 ldd [%i1+56],%f36 2210 ldd [%i1+64],%f56 2211 ldd [%i1+112],%f58 2212 2213 ldd [%i4+104],%f50 2214 ldd [%i4+112],%f60 2215 2216 2217 .L99999998: 2218!1 2219 ldd [%i1+24],%f20 2220 fmuld %f0,%f44,%f12 2221!2 2222 ldd [%i4+24],%f22 2223 fmuld %f42,%f4,%f16 2224!3 2225 ldd [%i1+40],%f24 2226 fmuld %f46,%f0,%f18 2227!4 2228 ldd [%i4+40],%f26 2229 fmuld %f20,%f4,%f20 2230!5 2231 ldd [%l0+8],%f38 2232 faddd %f12,%f6,%f12 2233 fmuld %f22,%f0,%f22 2234!6 2235 add %l0,8,%l0 2236 ldd [%i4+56],%f30 2237 fmuld %f24,%f4,%f24 2238!7 2239 ldd [%i1+72],%f32 2240 faddd %f16,%f18,%f16 2241 fmuld %f26,%f0,%f26 2242!8 2243 ldd [%i3+16],%f18 2244 fmuld %f40,%f38,%f14 2245!9 2246 ldd [%i4+72],%f34 2247 faddd %f20,%f22,%f20 2248 fmuld %f8,%f12,%f12 2249!10 2250 ldd [%i3+48],%f22 2251 fmuld %f36,%f4,%f28 2252!11 2253 ldd [%i3+8],%f6 2254 faddd %f16,%f18,%f16 2255 fmuld %f30,%f0,%f30 2256!12 2257 std %f16,[%i3+16] 2258 faddd %f24,%f26,%f24 2259 fmuld %f32,%f4,%f32 2260!13 2261 ldd [%i3+80],%f26 2262 faddd %f12,%f14,%f12 2263 fmuld %f34,%f0,%f34 2264!14 2265 ldd [%i1+88],%f16 2266 faddd %f20,%f22,%f20 2267!15 2268 ldd [%i4+88],%f18 2269 faddd %f28,%f30,%f28 2270!16 2271 ldd [%i3+112],%f30 2272 faddd %f32,%f34,%f32 2273!17 2274 ldd [%i3+144],%f34 2275 faddd %f12,%f6,%f6 2276 fmuld %f16,%f4,%f16 2277!18 2278 std %f20,[%i3+48] 2279 faddd %f24,%f26,%f24 2280 fmuld %f18,%f0,%f18 2281!19 2282 std %f24,[%i3+80] 2283 faddd %f28,%f30,%f28 2284 fmuld %f48,%f4,%f20 2285!20 2286 std %f28,[%i3+112] 2287 faddd %f32,%f34,%f32 2288 fmuld %f50,%f0,%f22 2289!21 2290 ldd [%i1+120],%f24 2291 fdtox %f6,%f12 2292!22 2293 std %f32,[%i3+144] 2294 faddd %f16,%f18,%f16 2295!23 2296 ldd [%i4+120],%f26 2297!24 2298 ldd [%i3+176],%f18 2299 faddd %f20,%f22,%f20 2300 fmuld %f24,%f4,%f24 2301!25 2302 ldd [%i4+16],%f30 2303 fmovs %f11,%f12 2304!26 2305 ldd [%i1+32],%f32 2306 fmuld %f26,%f0,%f26 2307!27 2308 ldd [%i4+32],%f34 2309 fmuld %f52,%f4,%f28 2310!28 2311 ldd [%i3+208],%f22 2312 faddd %f16,%f18,%f16 2313 fmuld %f30,%f0,%f30 2314!29 2315 std %f16,[%i3+176] 2316 fxtod %f12,%f12 2317 fmuld %f32,%f4,%f32 2318!30 2319 ldd [%i4+48],%f18 2320 faddd %f24,%f26,%f24 2321 fmuld %f34,%f0,%f34 2322!31 2323 ldd [%i3+240],%f26 2324 faddd %f20,%f22,%f20 2325!32 2326 std %f20,[%i3+208] 2327 faddd %f28,%f30,%f28 2328 fmuld %f54,%f4,%f16 2329!33 2330 ldd [%i3+32],%f30 2331 fmuld %f12,%f2,%f14 2332!34 2333 ldd [%i4+64],%f22 2334 faddd %f32,%f34,%f32 2335 fmuld %f18,%f0,%f18 2336!35 2337 ldd [%i3+64],%f34 2338 faddd %f24,%f26,%f24 2339!36 2340 std %f24,[%i3+240] 2341 faddd %f28,%f30,%f28 2342 fmuld %f56,%f4,%f20 2343!37 2344 std %f28,[%i3+32] 2345 fmuld %f14,%f8,%f12 2346!38 2347 ldd [%i1+80],%f24 2348 faddd %f32,%f34,%f34 ! yes, tmp52! 2349 fmuld %f22,%f0,%f22 2350!39 2351 ldd [%i4+80],%f26 2352 faddd %f16,%f18,%f16 2353!40 2354 ldd [%i1+96],%f28 2355 fmuld %f58,%f4,%f32 2356!41 2357 ldd [%i4+96],%f30 2358 fdtox %f12,%f12 2359 fmuld %f24,%f4,%f24 2360!42 2361 std %f34,[%i3+64] ! yes, tmp52! 2362 faddd %f20,%f22,%f20 2363 fmuld %f26,%f0,%f26 2364!43 2365 ldd [%i3+96],%f18 2366 fmuld %f28,%f4,%f28 2367!44 2368 ldd [%i3+128],%f22 2369 fmovd %f38,%f4 2370 fmuld %f30,%f0,%f30 2371!45 2372 fxtod %f12,%f12 2373 fmuld %f60,%f0,%f34 2374!46 2375 add %i3,8,%i3 2376 faddd %f24,%f26,%f24 2377!47 2378 ldd [%i3+160-8],%f26 2379 faddd %f16,%f18,%f16 2380!48 2381 std %f16,[%i3+96-8] 2382 faddd %f28,%f30,%f28 2383!49 2384 ldd [%i3+192-8],%f30 2385 faddd %f32,%f34,%f32 2386 fmuld %f12,%f10,%f12 2387!50 2388 ldd [%i3+224-8],%f34 2389 faddd %f20,%f22,%f20 2390!51 2391 std %f20,[%i3+128-8] 2392 faddd %f24,%f26,%f24 2393!52 2394 add %l1,1,%l1 2395 std %f24,[%i3+160-8] 2396 faddd %f28,%f30,%f28 2397!53 2398 cmp %l1,31 2399 std %f28,[%i3+192-8] 2400 fsubd %f14,%f12,%f0 2401!54 2402 faddd %f32,%f34,%f32 2403 ble,pt %icc,.L99999998 2404 std %f32,[%i3+224-8] 2405!55 2406 std %f6,[%i3] 2407 2408 add %o5,%g0,%i3 2409 2410 2411!END HAND CODED PART 2412 .L900000828: 2413/* 0x03e4 405 */ ba .L900000852 2414/* 0x03e8 409 */ ldx [%i3+%o0],%l1 2415 2416! 406 ! } 2417! 407 ! } 2418! 409 ! conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1); 2419! 411 !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/ 2420! 413 ! adjust_montf_result(result, nint, nlen); 2421 2422 .L77000476: 2423/* 0x03ec 413 */ sll %g1,2,%l3 2424/* 0x03f0 0 */ sethi %hi(TwoTo16),%g5 2425/* 0x03f4 413 */ add %l3,2,%l2 2426/* 0x03f8 328 */ cmp %l2,0 2427/* 0x03fc */ ble,pn %icc,.L77000482 2428/* 0x0400 0 */ sethi %hi(TwoToMinus16),%o2 2429 .L77000514: 2430/* 0x0404 329 */ add %l3,2,%l2 2431/* 0x0408 328 */ add %l3,1,%o4 2432/* 0x040c */ or %g0,0,%l3 2433/* 0x0410 329 */ cmp %l2,8 2434/* 0x0414 */ bl,pn %icc,.L77000477 2435/* 0x0418 328 */ or %g0,%i3,%l1 2436 .L900000831: 2437/* 0x041c 329 */ prefetch [%i3],22 2438/* 0x0420 */ sub %o4,7,%l4 2439/* 0x0424 */ or %g0,0,%l3 2440/* 0x0428 */ or %g0,%i3,%l1 2441 .L900000829: 2442/* 0x042c 329 */ prefetch [%l1+528],22 2443/* 0x0430 */ std %f0,[%l1] 2444/* 0x0434 */ add %l3,8,%l3 2445/* 0x0438 */ add %l1,64,%l1 2446/* 0x043c */ std %f0,[%l1-56] 2447/* 0x0440 */ cmp %l3,%l4 2448/* 0x0444 */ std %f0,[%l1-48] 2449/* 0x0448 */ std %f0,[%l1-40] 2450/* 0x044c */ prefetch [%l1+496],22 2451/* 0x0450 */ std %f0,[%l1-32] 2452/* 0x0454 */ std %f0,[%l1-24] 2453/* 0x0458 */ std %f0,[%l1-16] 2454/* 0x045c */ ble,pt %icc,.L900000829 2455/* 0x0460 */ std %f0,[%l1-8] 2456 .L900000832: 2457/* 0x0464 329 */ cmp %l3,%o4 2458/* 0x0468 */ bg,pn %icc,.L77000482 2459/* 0x046c */ nop 2460 .L77000477: 2461/* 0x0470 329 */ add %l3,1,%l3 2462 .L900000851: 2463/* 0x0474 329 */ std %f0,[%l1] 2464/* 0x0478 */ cmp %l3,%o4 2465/* 0x047c */ add %l1,8,%l1 2466/* 0x0480 */ ble,pt %icc,.L900000851 2467/* 0x0484 */ add %l3,1,%l3 2468 .L77000482: 2469/* 0x0488 330 */ ldd [%i1],%f40 2470/* 0x048c 334 */ cmp %o3,0 2471/* 0x0490 */ sub %g1,1,%l3 2472/* 0x0494 330 */ ldd [%l0],%f42 2473/* 0x0498 331 */ ldd [%o2+%lo(TwoToMinus16)],%f36 2474/* 0x049c */ ldd [%g5+%lo(TwoTo16)],%f38 2475/* 0x04a0 330 */ fmuld %f40,%f42,%f52 2476/* 0x04a4 331 */ fdtox %f52,%f8 2477/* 0x04a8 */ fmovs %f0,%f8 2478/* 0x04ac */ fxtod %f8,%f62 2479/* 0x04b0 */ fmuld %f62,%f14,%f60 2480/* 0x04b4 */ fmuld %f60,%f36,%f32 2481/* 0x04b8 */ fdtox %f32,%f50 2482/* 0x04bc */ fxtod %f50,%f34 2483/* 0x04c0 */ fmuld %f34,%f38,%f46 2484/* 0x04c4 */ fsubd %f60,%f46,%f40 2485/* 0x04c8 334 */ ble,pn %icc,.L77000378 2486/* 0x04cc 330 */ std %f52,[%i3] 2487 .L77000509: 2488/* 0x04d0 345 */ add %o3,1,%g5 2489/* 0x04d4 */ sll %g5,1,%o2 2490/* 0x04d8 */ or %g0,0,%l1 2491/* 0x04dc 337 */ ldd [%i4],%f42 2492/* 0x04e0 345 */ sub %o3,1,%o3 2493/* 0x04e4 */ or %g0,0,%o5 2494/* 0x04e8 */ or %g0,%i3,%l2 2495/* 0x04ec */ add %i4,8,%o1 2496/* 0x04f0 */ add %i1,8,%g5 2497 .L900000848: 2498/* 0x04f4 337 */ fmuld %f40,%f42,%f34 2499/* 0x04f8 */ ldd [%l0+8],%f32 2500/* 0x04fc 341 */ cmp %g1,1 2501/* 0x0500 337 */ ldd [%i1],%f50 2502/* 0x0504 */ ldd [%l2],%f46 2503/* 0x0508 */ ldd [%l2+8],%f44 2504/* 0x050c */ fmuld %f50,%f32,%f60 2505/* 0x0510 335 */ ldd [%l0],%f42 2506/* 0x0514 337 */ faddd %f46,%f34,%f48 2507/* 0x0518 */ faddd %f44,%f60,%f58 2508/* 0x051c */ fmuld %f36,%f48,%f54 2509/* 0x0520 */ faddd %f58,%f54,%f34 2510/* 0x0524 341 */ ble,pn %icc,.L77000368 2511/* 0x0528 338 */ std %f34,[%l2+8] 2512 .L77000507: 2513/* 0x052c 341 */ or %g0,1,%l5 2514/* 0x0530 */ or %g0,2,%l4 2515/* 0x0534 */ or %g0,%g5,%g4 2516/* 0x0538 342 */ cmp %l3,12 2517/* 0x053c */ bl,pn %icc,.L77000481 2518/* 0x0540 341 */ or %g0,%o1,%g3 2519 .L900000839: 2520/* 0x0544 342 */ prefetch [%i1+8],0 2521/* 0x0548 */ prefetch [%i1+72],0 2522/* 0x054c */ add %i4,40,%l6 2523/* 0x0550 */ add %i1,40,%l7 2524/* 0x0554 */ prefetch [%l2+16],0 2525/* 0x0558 */ or %g0,%l2,%o7 2526/* 0x055c */ sub %l3,7,%i5 2527/* 0x0560 */ prefetch [%l2+80],0 2528/* 0x0564 */ add %l2,80,%g2 2529/* 0x0568 */ or %g0,2,%l4 2530/* 0x056c */ prefetch [%i1+136],0 2531/* 0x0570 */ or %g0,5,%l5 2532/* 0x0574 */ prefetch [%i1+200],0 2533/* 0x0578 */ prefetch [%l2+144],0 2534/* 0x057c */ ldd [%i4+8],%f52 2535/* 0x0580 */ ldd [%i4+16],%f44 2536/* 0x0584 */ ldd [%i4+24],%f56 2537/* 0x0588 */ fmuld %f40,%f52,%f48 2538/* 0x058c */ fmuld %f40,%f44,%f46 2539/* 0x0590 */ fmuld %f40,%f56,%f44 2540/* 0x0594 */ ldd [%l2+48],%f56 2541/* 0x0598 */ prefetch [%l2+208],0 2542/* 0x059c */ prefetch [%l2+272],0 2543/* 0x05a0 */ prefetch [%l2+336],0 2544/* 0x05a4 */ prefetch [%l2+400],0 2545/* 0x05a8 */ ldd [%i1+8],%f32 2546/* 0x05ac */ ldd [%i1+16],%f60 2547/* 0x05b0 */ ldd [%i1+24],%f50 2548/* 0x05b4 */ fmuld %f42,%f32,%f62 2549/* 0x05b8 */ ldd [%i1+32],%f32 2550/* 0x05bc */ fmuld %f42,%f60,%f58 2551/* 0x05c0 */ ldd [%l2+16],%f52 2552/* 0x05c4 */ ldd [%l2+32],%f54 2553/* 0x05c8 */ faddd %f62,%f48,%f60 2554/* 0x05cc */ fmuld %f42,%f50,%f48 2555/* 0x05d0 */ faddd %f58,%f46,%f62 2556/* 0x05d4 */ ldd [%i4+32],%f46 2557/* 0x05d8 */ ldd [%l2+64],%f58 2558 .L900000837: 2559/* 0x05dc 342 */ prefetch [%l7+192],0 2560/* 0x05e0 */ fmuld %f40,%f46,%f46 2561/* 0x05e4 */ faddd %f60,%f52,%f60 2562/* 0x05e8 */ ldd [%l6],%f52 2563/* 0x05ec */ std %f60,[%g2-64] 2564/* 0x05f0 */ fmuld %f42,%f32,%f50 2565/* 0x05f4 */ add %l5,8,%l5 2566/* 0x05f8 */ ldd [%l7],%f60 2567/* 0x05fc */ faddd %f48,%f44,%f48 2568/* 0x0600 */ cmp %l5,%i5 2569/* 0x0604 */ ldd [%g2],%f32 2570/* 0x0608 */ add %g2,128,%g2 2571/* 0x060c */ prefetch [%g2+256],0 2572/* 0x0610 */ fmuld %f40,%f52,%f52 2573/* 0x0614 */ faddd %f62,%f54,%f44 2574/* 0x0618 */ ldd [%l6+8],%f54 2575/* 0x061c */ std %f44,[%g2-176] 2576/* 0x0620 */ fmuld %f42,%f60,%f44 2577/* 0x0624 */ add %l6,64,%l6 2578/* 0x0628 */ ldd [%l7+8],%f60 2579/* 0x062c */ faddd %f50,%f46,%f50 2580/* 0x0630 */ add %l7,64,%l7 2581/* 0x0634 */ add %l4,16,%l4 2582/* 0x0638 */ ldd [%g2-112],%f46 2583/* 0x063c */ fmuld %f40,%f54,%f54 2584/* 0x0640 */ faddd %f48,%f56,%f62 2585/* 0x0644 */ ldd [%l6-48],%f56 2586/* 0x0648 */ std %f62,[%g2-160] 2587/* 0x064c */ fmuld %f42,%f60,%f48 2588/* 0x0650 */ ldd [%l7-48],%f60 2589/* 0x0654 */ faddd %f44,%f52,%f52 2590/* 0x0658 */ ldd [%g2-96],%f30 2591/* 0x065c */ prefetch [%g2+288],0 2592/* 0x0660 */ fmuld %f40,%f56,%f56 2593/* 0x0664 */ faddd %f50,%f58,%f62 2594/* 0x0668 */ ldd [%l6-40],%f58 2595/* 0x066c */ std %f62,[%g2-144] 2596/* 0x0670 */ fmuld %f42,%f60,%f50 2597/* 0x0674 */ ldd [%l7-40],%f62 2598/* 0x0678 */ faddd %f48,%f54,%f54 2599/* 0x067c */ ldd [%g2-80],%f28 2600/* 0x0680 */ prefetch [%l7+160],0 2601/* 0x0684 */ fmuld %f40,%f58,%f48 2602/* 0x0688 */ faddd %f52,%f32,%f44 2603/* 0x068c */ ldd [%l6-32],%f58 2604/* 0x0690 */ std %f44,[%g2-128] 2605/* 0x0694 */ fmuld %f42,%f62,%f44 2606/* 0x0698 */ ldd [%l7-32],%f60 2607/* 0x069c */ faddd %f50,%f56,%f56 2608/* 0x06a0 */ ldd [%g2-64],%f52 2609/* 0x06a4 */ prefetch [%g2+320],0 2610/* 0x06a8 */ fmuld %f40,%f58,%f50 2611/* 0x06ac */ faddd %f54,%f46,%f32 2612/* 0x06b0 */ ldd [%l6-24],%f62 2613/* 0x06b4 */ std %f32,[%g2-112] 2614/* 0x06b8 */ fmuld %f42,%f60,%f46 2615/* 0x06bc */ ldd [%l7-24],%f60 2616/* 0x06c0 */ faddd %f44,%f48,%f48 2617/* 0x06c4 */ ldd [%g2-48],%f54 2618/* 0x06c8 */ fmuld %f40,%f62,%f26 2619/* 0x06cc */ faddd %f56,%f30,%f32 2620/* 0x06d0 */ ldd [%l6-16],%f58 2621/* 0x06d4 */ std %f32,[%g2-96] 2622/* 0x06d8 */ fmuld %f42,%f60,%f30 2623/* 0x06dc */ ldd [%l7-16],%f32 2624/* 0x06e0 */ faddd %f46,%f50,%f60 2625/* 0x06e4 */ ldd [%g2-32],%f56 2626/* 0x06e8 */ prefetch [%g2+352],0 2627/* 0x06ec */ fmuld %f40,%f58,%f44 2628/* 0x06f0 */ faddd %f48,%f28,%f62 2629/* 0x06f4 */ ldd [%l6-8],%f46 2630/* 0x06f8 */ std %f62,[%g2-80] 2631/* 0x06fc */ fmuld %f42,%f32,%f48 2632/* 0x0700 */ ldd [%l7-8],%f32 2633/* 0x0704 */ faddd %f30,%f26,%f62 2634/* 0x0708 */ ble,pt %icc,.L900000837 2635/* 0x070c */ ldd [%g2-16],%f58 2636 .L900000840: 2637/* 0x0710 342 */ fmuld %f40,%f46,%f46 2638/* 0x0714 */ faddd %f62,%f54,%f62 2639/* 0x0718 */ std %f62,[%g2-48] 2640/* 0x071c */ cmp %l5,%l3 2641/* 0x0720 */ fmuld %f42,%f32,%f50 2642/* 0x0724 */ faddd %f48,%f44,%f48 2643/* 0x0728 */ or %g0,%l7,%g4 2644/* 0x072c */ or %g0,%l6,%g3 2645/* 0x0730 */ faddd %f60,%f52,%f60 2646/* 0x0734 */ std %f60,[%g2-64] 2647/* 0x0738 */ or %g0,%o7,%l2 2648/* 0x073c */ add %l4,8,%l4 2649/* 0x0740 */ faddd %f50,%f46,%f54 2650/* 0x0744 */ faddd %f48,%f56,%f56 2651/* 0x0748 */ std %f56,[%g2-32] 2652/* 0x074c */ faddd %f54,%f58,%f58 2653/* 0x0750 */ bg,pn %icc,.L77000368 2654/* 0x0754 */ std %f58,[%g2-16] 2655 .L77000481: 2656/* 0x0758 342 */ ldd [%g4],%f44 2657 .L900000850: 2658/* 0x075c 342 */ ldd [%g3],%f48 2659/* 0x0760 */ fmuld %f42,%f44,%f58 2660/* 0x0764 */ sra %l4,0,%l7 2661/* 0x0768 */ add %l5,1,%l5 2662/* 0x076c */ sllx %l7,3,%g2 2663/* 0x0770 */ add %g4,8,%g4 2664/* 0x0774 */ ldd [%l2+%g2],%f56 2665/* 0x0778 */ cmp %l5,%l3 2666/* 0x077c */ add %l4,2,%l4 2667/* 0x0780 */ fmuld %f40,%f48,%f54 2668/* 0x0784 */ add %g3,8,%g3 2669/* 0x0788 */ faddd %f58,%f54,%f52 2670/* 0x078c */ faddd %f52,%f56,%f62 2671/* 0x0790 */ std %f62,[%l2+%g2] 2672/* 0x0794 */ ble,a,pt %icc,.L900000850 2673/* 0x0798 */ ldd [%g4],%f44 2674 .L77000368: 2675/* 0x079c 344 */ cmp %o5,15 2676/* 0x07a0 */ bne,pn %icc,.L77000483 2677/* 0x07a4 345 */ srl %l1,31,%g4 2678 .L77000478: 2679/* 0x07a8 345 */ add %l1,%g4,%l4 2680/* 0x07ac */ sra %l4,1,%o7 2681/* 0x07b0 */ add %o7,1,%o4 2682/* 0x07b4 */ sll %o4,1,%l6 2683/* 0x07b8 */ cmp %l6,%o2 2684/* 0x07bc */ bge,pn %icc,.L77000392 2685/* 0x07c0 */ fmovd %f0,%f42 2686 .L77000508: 2687/* 0x07c4 345 */ sra %l6,0,%l4 2688/* 0x07c8 */ sllx %l4,3,%g2 2689/* 0x07cc */ fmovd %f0,%f32 2690/* 0x07d0 */ sub %o2,1,%l5 2691/* 0x07d4 */ ldd [%g2+%i3],%f40 2692/* 0x07d8 */ add %g2,%i3,%g3 2693 .L900000849: 2694/* 0x07dc 345 */ fdtox %f40,%f10 2695/* 0x07e0 */ ldd [%g3+8],%f52 2696/* 0x07e4 */ add %l6,2,%l6 2697/* 0x07e8 */ cmp %l6,%l5 2698/* 0x07ec */ fdtox %f52,%f2 2699/* 0x07f0 */ fmovd %f10,%f30 2700/* 0x07f4 */ fmovs %f0,%f10 2701/* 0x07f8 */ fmovs %f0,%f2 2702/* 0x07fc */ fxtod %f10,%f10 2703/* 0x0800 */ fxtod %f2,%f2 2704/* 0x0804 */ fdtox %f52,%f28 2705/* 0x0808 */ faddd %f10,%f32,%f56 2706/* 0x080c */ std %f56,[%g3] 2707/* 0x0810 */ faddd %f2,%f42,%f62 2708/* 0x0814 */ std %f62,[%g3+8] 2709/* 0x0818 */ fitod %f30,%f32 2710/* 0x081c */ add %g3,16,%g3 2711/* 0x0820 */ fitod %f28,%f42 2712/* 0x0824 */ ble,a,pt %icc,.L900000849 2713/* 0x0828 */ ldd [%g3],%f40 2714 .L77000392: 2715/* 0x082c 346 */ or %g0,0,%o5 2716 .L77000483: 2717/* 0x0830 350 */ fdtox %f34,%f6 2718/* 0x0834 */ add %l1,1,%l1 2719/* 0x0838 */ cmp %l1,%o3 2720/* 0x083c */ add %o5,1,%o5 2721/* 0x0840 */ add %l2,8,%l2 2722/* 0x0844 */ add %l0,8,%l0 2723/* 0x0848 */ fmovs %f0,%f6 2724/* 0x084c */ fxtod %f6,%f46 2725/* 0x0850 */ fmuld %f46,%f14,%f56 2726/* 0x0854 */ fmuld %f56,%f36,%f44 2727/* 0x0858 */ fdtox %f44,%f48 2728/* 0x085c */ fxtod %f48,%f58 2729/* 0x0860 */ fmuld %f58,%f38,%f54 2730/* 0x0864 */ fsubd %f56,%f54,%f40 2731/* 0x0868 */ ble,a,pt %icc,.L900000848 2732/* 0x086c 337 */ ldd [%i4],%f42 2733 .L77000378: 2734/* 0x0870 409 */ ldx [%i3+%o0],%l1 2735 .L900000852: 2736/* 0x0874 409 */ add %i3,%o0,%l4 2737/* 0x0878 */ ldx [%l4+8],%i1 2738/* 0x087c */ cmp %l1,0 2739/* 0x0880 */ bne,pn %xcc,.L77000403 2740/* 0x0884 */ or %g0,0,%g5 2741 .L77000402: 2742/* 0x0888 409 */ or %g0,0,%i3 2743/* 0x088c */ ba .L900000847 2744/* 0x0890 */ cmp %i1,0 2745 .L77000403: 2746/* 0x0894 409 */ srlx %l1,52,%o5 2747/* 0x0898 */ sethi %hi(0xfff00000),%i3 2748/* 0x089c */ sllx %i3,32,%o2 2749/* 0x08a0 */ sethi %hi(0x40000000),%o0 2750/* 0x08a4 */ sllx %o0,22,%o4 2751/* 0x08a8 */ or %g0,1023,%l0 2752/* 0x08ac */ xor %o2,-1,%o3 2753/* 0x08b0 */ sub %l0,%o5,%o7 2754/* 0x08b4 */ and %l1,%o3,%l1 2755/* 0x08b8 */ add %o7,52,%i4 2756/* 0x08bc */ or %l1,%o4,%o1 2757/* 0x08c0 */ cmp %i1,0 2758/* 0x08c4 */ srlx %o1,%i4,%i3 2759 .L900000847: 2760/* 0x08c8 409 */ bne,pn %xcc,.L77000409 2761/* 0x08cc */ or %g0,0,%o7 2762 .L77000408: 2763/* 0x08d0 409 */ ba .L900000846 2764/* 0x08d4 350 */ cmp %g1,0 2765 .L77000409: 2766/* 0x08d8 409 */ srlx %i1,52,%l2 2767/* 0x08dc */ sethi %hi(0xfff00000),%o7 2768/* 0x08e0 */ sllx %o7,32,%i4 2769/* 0x08e4 */ sethi %hi(0x40000000),%i5 2770/* 0x08e8 */ sllx %i5,22,%l6 2771/* 0x08ec */ or %g0,1023,%l5 2772/* 0x08f0 */ xor %i4,-1,%o1 2773/* 0x08f4 */ sub %l5,%l2,%g2 2774/* 0x08f8 */ and %i1,%o1,%l7 2775/* 0x08fc */ add %g2,52,%g3 2776/* 0x0900 */ or %l7,%l6,%g4 2777/* 0x0904 350 */ cmp %g1,0 2778/* 0x0908 409 */ srlx %g4,%g3,%o7 2779 .L900000846: 2780/* 0x090c 350 */ ble,pn %icc,.L77000397 2781/* 0x0910 */ or %g0,0,%l5 2782 .L77000510: 2783/* 0x0914 409 */ sethi %hi(0xfff00000),%g4 2784/* 0x0918 */ sllx %g4,32,%o0 2785/* 0x091c 0 */ or %g0,-1,%i5 2786/* 0x0920 409 */ srl %i5,0,%l7 2787/* 0x0924 */ sethi %hi(0x40000000),%i1 2788/* 0x0928 */ sllx %i1,22,%l6 2789/* 0x092c */ sethi %hi(0xfc00),%i4 2790/* 0x0930 */ xor %o0,-1,%g2 2791/* 0x0934 */ add %i4,1023,%l2 2792/* 0x0938 */ or %g0,2,%g4 2793/* 0x093c */ or %g0,%i2,%g3 2794 .L77000395: 2795/* 0x0940 409 */ sra %g4,0,%o2 2796/* 0x0944 */ add %g4,1,%o3 2797/* 0x0948 */ sllx %o2,3,%o0 2798/* 0x094c */ sra %o3,0,%o5 2799/* 0x0950 */ ldx [%l4+%o0],%o4 2800/* 0x0954 */ sllx %o5,3,%l0 2801/* 0x0958 */ and %i3,%l7,%o1 2802/* 0x095c */ ldx [%l4+%l0],%i4 2803/* 0x0960 */ cmp %o4,0 2804/* 0x0964 */ bne,pn %xcc,.L77000415 2805/* 0x0968 350 */ and %o7,%l2,%i5 2806 .L77000414: 2807/* 0x096c 409 */ or %g0,0,%l1 2808/* 0x0970 */ ba .L900000845 2809/* 0x0974 */ add %g5,%o1,%i1 2810 .L77000415: 2811/* 0x0978 409 */ srlx %o4,52,%o3 2812/* 0x097c */ and %o4,%g2,%l1 2813/* 0x0980 */ or %g0,52,%o0 2814/* 0x0984 */ sub %o3,1023,%l0 2815/* 0x0988 */ or %l1,%l6,%o4 2816/* 0x098c */ sub %o0,%l0,%o5 2817/* 0x0990 */ srlx %o4,%o5,%l1 2818/* 0x0994 */ add %g5,%o1,%i1 2819 .L900000845: 2820/* 0x0998 409 */ srax %i3,32,%g5 2821/* 0x099c */ cmp %i4,0 2822/* 0x09a0 */ bne,pn %xcc,.L77000421 2823/* 0x09a4 350 */ sllx %i5,16,%o2 2824 .L77000420: 2825/* 0x09a8 409 */ or %g0,0,%o4 2826/* 0x09ac */ ba .L900000844 2827/* 0x09b0 350 */ add %i1,%o2,%o5 2828 .L77000421: 2829/* 0x09b4 409 */ srlx %i4,52,%o4 2830/* 0x09b8 */ or %g0,52,%o0 2831/* 0x09bc */ sub %o4,1023,%o3 2832/* 0x09c0 */ and %i4,%g2,%i3 2833/* 0x09c4 */ or %i3,%l6,%o5 2834/* 0x09c8 */ sub %o0,%o3,%l0 2835/* 0x09cc */ srlx %o5,%l0,%o4 2836/* 0x09d0 350 */ add %i1,%o2,%o5 2837 .L900000844: 2838/* 0x09d4 350 */ srax %o7,16,%i4 2839/* 0x09d8 */ srax %o5,32,%i5 2840/* 0x09dc */ add %i4,%i5,%o1 2841/* 0x09e0 */ add %l5,1,%l5 2842/* 0x09e4 */ and %o5,%l7,%i1 2843/* 0x09e8 */ add %g5,%o1,%g5 2844/* 0x09ec */ st %i1,[%g3] 2845/* 0x09f0 */ or %g0,%l1,%i3 2846/* 0x09f4 */ or %g0,%o4,%o7 2847/* 0x09f8 */ add %g4,2,%g4 2848/* 0x09fc */ cmp %l5,%l3 2849/* 0x0a00 */ ble,pt %icc,.L77000395 2850/* 0x0a04 */ add %g3,4,%g3 2851 .L77000397: 2852/* 0x0a08 409 */ sethi %hi(0xfc00),%l4 2853/* 0x0a0c */ sra %l5,0,%i5 2854/* 0x0a10 */ add %l4,1023,%i1 2855/* 0x0a14 */ add %g5,%i3,%l5 2856/* 0x0a18 */ and %o7,%i1,%g5 2857/* 0x0a1c */ sllx %g5,16,%l2 2858/* 0x0a20 */ sllx %i5,2,%l7 2859/* 0x0a24 413 */ sra %g1,0,%g2 2860/* 0x0a28 409 */ add %l5,%l2,%l6 2861/* 0x0a2c */ st %l6,[%i2+%l7] 2862/* 0x0a30 413 */ sllx %g2,2,%g3 2863/* 0x0a34 */ ld [%i2+%g3],%g4 2864/* 0x0a38 */ cmp %g4,0 2865/* 0x0a3c */ bgu,pn %icc,.L77000486 2866/* 0x0a40 */ cmp %l3,0 2867 .L77000427: 2868/* 0x0a44 413 */ bl,pn %icc,.L77000486 2869/* 0x0a48 */ or %g0,%l3,%i5 2870 .L77000512: 2871/* 0x0a4c 413 */ sra %l3,0,%o5 2872/* 0x0a50 */ sllx %o5,2,%l7 2873/* 0x0a54 */ ld [%l7+%i0],%o5 2874/* 0x0a58 */ add %l7,%i2,%o1 2875/* 0x0a5c */ add %l7,%i0,%i4 2876 .L900000843: 2877/* 0x0a60 413 */ ld [%o1],%i1 2878/* 0x0a64 */ cmp %i1,%o5 2879/* 0x0a68 */ bne,pn %icc,.L77000435 2880/* 0x0a6c */ sub %o1,4,%o1 2881 .L77000431: 2882/* 0x0a70 413 */ sub %i4,4,%i4 2883/* 0x0a74 */ subcc %i5,1,%i5 2884/* 0x0a78 */ bpos,a,pt %icc,.L900000843 2885/* 0x0a7c */ ld [%i4],%o5 2886 .L900000827: 2887/* 0x0a80 413 */ ba .L900000842 2888/* 0x0a84 350 */ cmp %g1,0 2889 .L77000435: 2890/* 0x0a88 413 */ sra %i5,0,%o0 2891/* 0x0a8c */ sllx %o0,2,%l1 2892/* 0x0a90 */ ld [%i0+%l1],%i3 2893/* 0x0a94 */ ld [%i2+%l1],%l0 2894/* 0x0a98 */ cmp %l0,%i3 2895/* 0x0a9c */ bleu,pt %icc,.L77000379 2896/* 0x0aa0 */ nop 2897 .L77000486: 2898/* 0x0aa4 350 */ cmp %g1,0 2899 .L900000842: 2900/* 0x0aa8 350 */ ble,pn %icc,.L77000379 2901/* 0x0aac */ add %l3,1,%g3 2902 .L77000511: 2903/* 0x0ab0 350 */ or %g0,0,%l5 2904/* 0x0ab4 */ cmp %g3,10 2905/* 0x0ab8 */ bl,pn %icc,.L77000487 2906/* 0x0abc */ or %g0,0,%g1 2907 .L900000835: 2908/* 0x0ac0 350 */ prefetch [%i2],22 2909/* 0x0ac4 */ add %i0,4,%l2 2910/* 0x0ac8 */ prefetch [%i2+64],22 2911/* 0x0acc */ add %i2,8,%o5 2912/* 0x0ad0 */ sub %l3,7,%i0 2913/* 0x0ad4 */ prefetch [%i2+128],22 2914/* 0x0ad8 */ or %g0,2,%l5 2915/* 0x0adc */ prefetch [%i2+192],22 2916/* 0x0ae0 */ prefetch [%i2+256],22 2917/* 0x0ae4 */ prefetch [%i2+320],22 2918/* 0x0ae8 */ prefetch [%i2+384],22 2919/* 0x0aec */ ld [%l2-4],%l7 2920/* 0x0af0 */ ld [%o5-4],%l6 2921/* 0x0af4 */ prefetch [%o5+440],22 2922/* 0x0af8 */ prefetch [%o5+504],22 2923/* 0x0afc */ ld [%i2],%i2 2924/* 0x0b00 */ sub %i2,%l7,%g3 2925/* 0x0b04 */ st %g3,[%o5-8] 2926/* 0x0b08 */ srax %g3,32,%l7 2927 .L900000833: 2928/* 0x0b0c 350 */ add %l5,8,%l5 2929/* 0x0b10 */ add %o5,32,%o5 2930/* 0x0b14 */ ld [%l2],%i5 2931/* 0x0b18 */ prefetch [%o5+496],22 2932/* 0x0b1c */ cmp %l5,%i0 2933/* 0x0b20 */ add %l2,32,%l2 2934/* 0x0b24 */ sub %l6,%i5,%g5 2935/* 0x0b28 */ add %g5,%l7,%o0 2936/* 0x0b2c */ ld [%o5-32],%l4 2937/* 0x0b30 */ st %o0,[%o5-36] 2938/* 0x0b34 */ srax %o0,32,%i3 2939/* 0x0b38 */ ld [%l2-28],%i1 2940/* 0x0b3c */ sub %l4,%i1,%i4 2941/* 0x0b40 */ add %i4,%i3,%o1 2942/* 0x0b44 */ ld [%o5-28],%o3 2943/* 0x0b48 */ st %o1,[%o5-32] 2944/* 0x0b4c */ srax %o1,32,%l1 2945/* 0x0b50 */ ld [%l2-24],%o2 2946/* 0x0b54 */ sub %o3,%o2,%g2 2947/* 0x0b58 */ add %g2,%l1,%o7 2948/* 0x0b5c */ ld [%o5-24],%l0 2949/* 0x0b60 */ st %o7,[%o5-28] 2950/* 0x0b64 */ srax %o7,32,%l6 2951/* 0x0b68 */ ld [%l2-20],%o4 2952/* 0x0b6c */ sub %l0,%o4,%g1 2953/* 0x0b70 */ add %g1,%l6,%l7 2954/* 0x0b74 */ ld [%o5-20],%i2 2955/* 0x0b78 */ st %l7,[%o5-24] 2956/* 0x0b7c */ srax %l7,32,%g4 2957/* 0x0b80 */ ld [%l2-16],%g3 2958/* 0x0b84 */ sub %i2,%g3,%i5 2959/* 0x0b88 */ add %i5,%g4,%g5 2960/* 0x0b8c */ ld [%o5-16],%i1 2961/* 0x0b90 */ st %g5,[%o5-20] 2962/* 0x0b94 */ srax %g5,32,%l4 2963/* 0x0b98 */ ld [%l2-12],%o0 2964/* 0x0b9c */ sub %i1,%o0,%i3 2965/* 0x0ba0 */ add %i3,%l4,%i4 2966/* 0x0ba4 */ ld [%o5-12],%o2 2967/* 0x0ba8 */ st %i4,[%o5-16] 2968/* 0x0bac */ srax %i4,32,%o3 2969/* 0x0bb0 */ ld [%l2-8],%o1 2970/* 0x0bb4 */ sub %o2,%o1,%l1 2971/* 0x0bb8 */ add %l1,%o3,%g2 2972/* 0x0bbc */ ld [%o5-8],%o4 2973/* 0x0bc0 */ st %g2,[%o5-12] 2974/* 0x0bc4 */ srax %g2,32,%l0 2975/* 0x0bc8 */ ld [%l2-4],%o7 2976/* 0x0bcc */ sub %o4,%o7,%l6 2977/* 0x0bd0 */ add %l6,%l0,%g1 2978/* 0x0bd4 */ ld [%o5-4],%l6 2979/* 0x0bd8 */ st %g1,[%o5-8] 2980/* 0x0bdc */ ble,pt %icc,.L900000833 2981/* 0x0be0 */ srax %g1,32,%l7 2982 .L900000836: 2983/* 0x0be4 350 */ ld [%l2],%l0 2984/* 0x0be8 */ add %l2,4,%i0 2985/* 0x0bec */ or %g0,%o5,%i2 2986/* 0x0bf0 */ cmp %l5,%l3 2987/* 0x0bf4 */ sub %l6,%l0,%l6 2988/* 0x0bf8 */ add %l6,%l7,%g1 2989/* 0x0bfc */ st %g1,[%o5-4] 2990/* 0x0c00 */ bg,pn %icc,.L77000379 2991/* 0x0c04 */ srax %g1,32,%g1 2992 .L77000487: 2993/* 0x0c08 350 */ ld [%i2],%o4 2994 .L900000841: 2995/* 0x0c0c 350 */ ld [%i0],%i3 2996/* 0x0c10 */ add %g1,%o4,%l0 2997/* 0x0c14 */ add %l5,1,%l5 2998/* 0x0c18 */ cmp %l5,%l3 2999/* 0x0c1c */ add %i0,4,%i0 3000/* 0x0c20 */ sub %l0,%i3,%l6 3001/* 0x0c24 */ st %l6,[%i2] 3002/* 0x0c28 */ srax %l6,32,%g1 3003/* 0x0c2c */ add %i2,4,%i2 3004/* 0x0c30 */ ble,a,pt %icc,.L900000841 3005/* 0x0c34 */ ld [%i2],%o4 3006 .L77000379: 3007/* 0x0c38 405 */ ret ! Result = 3008/* 0x0c3c */ restore %g0,%g0,%g0 3009/* 0x0c40 0 */ .type mont_mulf_noconv,2 3010/* 0x0c40 0 */ .size mont_mulf_noconv,(.-mont_mulf_noconv) 3011 3012! Begin Disassembling Debug Info 3013 .xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0 3014 .xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0 3015 3016! End Disassembling Debug Info 3017 3018! Begin Disassembling Ident 3019 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE) 3020 .ident "@(#)mont_mulf.c\t1.2\t01/09/24 SMI" ! (/tmp/acompAAApja4Fx:8) 3021 .ident "@(#)types.h\t1.74\t03/08/07 SMI" ! (/tmp/acompAAApja4Fx:9) 3022 .ident "@(#)isa_defs.h\t1.20\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:10) 3023 .ident "@(#)feature_tests.h\t1.18\t99/07/26 SMI" ! (/tmp/acompAAApja4Fx:11) 3024 .ident "@(#)machtypes.h\t1.13\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:12) 3025 .ident "@(#)inttypes.h\t1.2\t98/01/16 SMI" ! (/tmp/acompAAApja4Fx:13) 3026 .ident "@(#)int_types.h\t1.6\t97/08/20 SMI" ! (/tmp/acompAAApja4Fx:14) 3027 .ident "@(#)int_limits.h\t1.6\t99/08/06 SMI" ! (/tmp/acompAAApja4Fx:15) 3028 .ident "@(#)int_const.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:16) 3029 .ident "@(#)int_fmtio.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:17) 3030 .ident "@(#)types32.h\t1.4\t98/02/13 SMI" ! (/tmp/acompAAApja4Fx:18) 3031 .ident "@(#)select.h\t1.17\t01/08/15 SMI" ! (/tmp/acompAAApja4Fx:19) 3032 .ident "@(#)math.h\t2.11\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:20) 3033 .ident "@(#)math_iso.h\t1.2\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:21) 3034 .ident "@(#)floatingpoint.h\t2.5\t99/06/22 SMI" ! (/tmp/acompAAApja4Fx:22) 3035 .ident "@(#)stdio_tag.h\t1.3\t98/04/20 SMI" ! (/tmp/acompAAApja4Fx:23) 3036 .ident "@(#)ieeefp.h\t2.8 99/10/29" ! (/tmp/acompAAApja4Fx:24) 3037 .ident "acomp: Sun C 5.5 Patch 112760-07 2004/02/03" ! (/tmp/acompAAApja4Fx:57) 3038 .ident "iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (/tmp/acompAAApja4Fx:58) 3039 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE) 3040! End Disassembling Ident 3041 3042#define FZERO \ 3043 fzero %f0 ;\ 3044 fzero %f2 ;\ 3045 faddd %f0, %f2, %f4 ;\ 3046 fmuld %f0, %f2, %f6 ;\ 3047 faddd %f0, %f2, %f8 ;\ 3048 fmuld %f0, %f2, %f10 ;\ 3049 faddd %f0, %f2, %f12 ;\ 3050 fmuld %f0, %f2, %f14 ;\ 3051 faddd %f0, %f2, %f16 ;\ 3052 fmuld %f0, %f2, %f18 ;\ 3053 faddd %f0, %f2, %f20 ;\ 3054 fmuld %f0, %f2, %f22 ;\ 3055 faddd %f0, %f2, %f24 ;\ 3056 fmuld %f0, %f2, %f26 ;\ 3057 faddd %f0, %f2, %f28 ;\ 3058 fmuld %f0, %f2, %f30 ;\ 3059 faddd %f0, %f2, %f32 ;\ 3060 fmuld %f0, %f2, %f34 ;\ 3061 faddd %f0, %f2, %f36 ;\ 3062 fmuld %f0, %f2, %f38 ;\ 3063 faddd %f0, %f2, %f40 ;\ 3064 fmuld %f0, %f2, %f42 ;\ 3065 faddd %f0, %f2, %f44 ;\ 3066 fmuld %f0, %f2, %f46 ;\ 3067 faddd %f0, %f2, %f48 ;\ 3068 fmuld %f0, %f2, %f50 ;\ 3069 faddd %f0, %f2, %f52 ;\ 3070 fmuld %f0, %f2, %f54 ;\ 3071 faddd %f0, %f2, %f56 ;\ 3072 fmuld %f0, %f2, %f58 ;\ 3073 faddd %f0, %f2, %f60 ;\ 3074 fmuld %f0, %f2, %f62 3075 3076#include "assym.h" 3077 3078/* 3079 * In the routine below, we check/set FPRS_FEF bit since 3080 * we don't want to take a fp_disabled trap. We need not 3081 * check/set PSTATE_PEF bit as it is done early during boot. 3082 */ 3083 ENTRY(big_savefp) 3084 rd %fprs, %o2 3085 st %o2, [%o0 + FPU_FPRS] 3086 andcc %o2, FPRS_FEF, %g0 ! is FPRS_FEF set? 3087 bnz,a,pt %icc, .fregs_save ! yes, go to save 3088 nop 3089 wr %g0, FPRS_FEF, %fprs ! else, set the bit 3090 stx %fsr, [%o0 + FPU_FSR] ! store %fsr 3091 retl 3092 nop 3093.fregs_save: 3094 BSTORE_FPREGS(%o0, %o4) 3095 stx %fsr, [%o0 + FPU_FSR] ! store %fsr 3096 retl 3097 nop 3098 SET_SIZE(big_savefp) 3099 3100 3101 ENTRY(big_restorefp) 3102 ldx [%o0 + FPU_FSR], %fsr ! restore %fsr 3103 ld [%o0 + FPU_FPRS], %o1 3104 andcc %o1, FPRS_FEF, %g0 ! is FPRS_FEF set in saved %fprs? 3105 bnz,pt %icc, .fregs_restore ! yes, go to restore 3106 nop 3107 FZERO ! zero out to avoid leaks 3108 wr %g0, 0, %fprs 3109 retl 3110 nop 3111.fregs_restore: 3112 BLOAD_FPREGS(%o0, %o2) 3113 wr %o1, 0, %fprs 3114 retl 3115 nop 3116 SET_SIZE(big_restorefp) 3117