/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

	.file	"__vexpf.S"

#include "libm.h"

	RO_DATA
	.align	64
!!  2^(i/256) - ((i & 0xf0) << 44), i = [0, 255]
.CONST_TBL:
	.word	0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
	.word	0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
	.word	0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
	.word	0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
	.word	0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
	.word	0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
	.word	0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
	.word	0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
	.word	0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85
	.word	0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec
	.word	0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5
	.word	0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e
	.word	0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6
	.word	0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab
	.word	0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e
	.word	0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2
	.word	0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0
	.word	0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f
	.word	0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c
	.word	0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b
	.word	0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027
	.word	0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d
	.word	0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819
	.word	0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1
	.word	0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a
	.word	0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75
	.word	0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29
	.word	0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70
	.word	0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13
	.word	0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f
	.word	0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589
	.word	0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b
	.word	0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd
	.word	0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32
	.word	0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d
	.word	0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b
	.word	0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a
	.word	0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef
	.word	0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4
	.word	0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173
	.word	0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175
	.word	0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024
	.word	0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a
	.word	0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4
	.word	0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232
	.word	0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237
	.word	0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2
	.word	0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7
	.word	0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114
	.word	0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff
	.word	0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee
	.word	0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef
	.word	0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27
	.word	0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2
	.word	0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf
	.word	0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc
	.word	0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03
	.word	0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93
	.word	0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71
	.word	0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4
	.word	0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd
	.word	0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7
	.word	0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6
	.word	0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538
	.word	0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e
	.word	0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645
	.word	0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5
	.word	0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87
	.word	0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a
	.word	0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd
	.word	0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09
	.word	0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6
	.word	0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb
	.word	0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0
	.word	0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491
	.word	0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9
	.word	0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7
	.word	0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21
	.word	0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436
	.word	0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f
	.word	0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778
	.word	0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9
	.word	0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a
	.word	0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2
	.word	0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5
	.word	0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3
	.word	0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2
	.word	0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d
	.word	0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5
	.word	0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e
	.word	0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb
	.word	0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8
	.word	0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052
	.word	0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59
	.word	0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba
	.word	0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774
	.word	0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff
	.word	0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952
	.word	0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1
	.word	0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a
	.word	0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4
	.word	0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f
	.word	0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207
	.word	0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d
	.word	0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c
	.word	0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22
	.word	0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933
	.word	0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db
	.word	0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675
	.word	0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74
	.word	0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968
	.word	0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6
	.word	0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3
	.word	0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075
	.word	0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315
	.word	0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658
	.word	0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17
	.word	0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12
	.word	0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76
	.word	0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740
	.word	0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e
	.word	0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510
	.word	0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a
	.word	0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274
	.word	0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8
	.word	0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89
	.word	0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514
	.word	0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9

	.word	0x7149f2ca, 0x0da24260	! 1.0e30f, 1.0e-30f
	.word	0x3ecebfbe, 0x9d182250	! KA2 = 3.66556671660783833261e-06
	.word	0x3f662e43, 0xe2528362	! KA1 = 2.70760782821392980564e-03
	.word	0x40771547, 0x652b82fe	! K256ONLN2 = 369.3299304675746271
	.word	0x42aeac4f, 0x42b17218	! THRESHOLD = 87.3365402f
					! THRESHOLDL = 88.7228394f
! local storage indices

#define tmp0		STACK_BIAS-32
#define tmp1		STACK_BIAS-28
#define tmp2		STACK_BIAS-24
#define tmp3		STACK_BIAS-20
#define tmp4		STACK_BIAS-16
#define tmp5		STACK_BIAS-12
#define tmp6		STACK_BIAS-8
#define tmp7		STACK_BIAS-4

! sizeof temp storage - must be a multiple of 16 for V9
#define tmps 		0x20

#define I5_THRESHOLD	%i5
#define G1_CONST_TBL	%g5
#define G5_CONST	%g1

#define F62_K256ONLN2	%f62
#define F60_KA2		%f60
#define F58_KA1		%f58

#define THRESHOLDL	%f0

! register use
! i0  n
! i1  x
! i2  stridex
! i3  y
! i4  stridey

! i5  0x42aeac4f (87.3365402f)

! g1  CONST_TBL
! g5  0x7fffffff

! f62 K256ONLN2 = 369.3299304675746271
! f60 KA2 = 3.66556671660783833261e-06
! f58 KA1 = 2.70760782821392980564e-03


!		!!!!!  Algorithm  !!!!!
!
!  double y, dtmp, drez;
!  int k, sign, Xi;
!  float X, Y;
!  int THRESHOLD = 0x42aeac4f; /* 87.3365402f */
!  float THRESHOLDL = 88.7228394f;
!  double KA2 = 3.66556671660783833261e-06;
!  double KA1 = 2.70760782821392980564e-03;
!  double K256ONLN2 = 369.3299304675746271;
!  char *CONST_TBL;
!
!  X  = px[0];
!  Xi = ((int*)px)[0];
!  ax = Xi & 0x7fffffff;
!
!  if (ax > THRESHOLD) {
!    sign = ((unsigned)Xi >> 29) & 4;
!    if (ax >= 0x7f800000) {      /* Inf or NaN */
!      if (ax > 0x7f800000) {     /* NaN */
!        Y = X * X;               /* NaN -> NaN */
!        return Y;
!      }
!      Y = (sign) ? zero : X;     /* +Inf -> +Inf , -Inf -> zero */
!      return Y;
!    }
!
!    if ( X < 0.0f || X >= THRESHOLDL ) {
!      Y = ((float*)(CONST_TBL + 2048 + sign))[0];
!         /* Xi >= THRESHOLDL : Y = 1.0e+30f */
!         /* Xi < -THRESHOLD  : Y = 1.0e-30f */
!      Y =  Y * Y;
!         /* Xi >= THRESHOLDL : +Inf + overflow  */
!         /* Xi < -THRESHOLD  : +0 + underflow */
!      return Y;
!    }
!  }
!  vis_write_gsr(12 << 3);
!  y = (double) X;
!  y = K256ONLN2 * y;
!  k = (int) y;
!  dtmp = (double) k;
!  y -= dtmp;
!  dtmp = y * KA2;
!  dtmp += KA1;
!  y *= dtmp;
!  y = (y * KA2 + KA1) * y;
!  ((int*)&drez)[0] = k;
!  ((int*)&drez)[1] = 0;
!  ((float*)&drez)[0] = vis_fpackfix(drez);
!  k &= 255;
!  k <<= 3;
!  dtmp = ((double*)(CONST_TBL + k))[0];
!  drez = vis_fpadd32(drez,dtmp);
!  y *= drez;
!  y += drez;
!  Y = (float) y;
!
!
!  fstod %f16,%f40			! y = (double) X
!  fmuld F62_K256ONLN2,%f40,%f40	! y *= K256ONLN2
!  fdtoi %f40,%f16			! k = (int) y
!  st  %f16,[%fp+tmp0]			! store k
!  fitod %f16,%f34			! dtmp = (double) k
!  fpackfix  %f16,%f16			! ((float*)&drez)[0] = vis_fpackfix(drez)
!  fsubd %f40,%f34,%f40			! y -= dtmp
!  fmuld F60_KA2,%f40,%f34		! dtmp = y * KA2
!  faddd F58_KA1,%f34,%f34		! dtmp += KA1
!  ld  [%fp+tmp0],%o0			! load k
!  fmuld %f34,%f40,%f40			! y *= dtmp
!  and %o0,255,%o0			! k &= 255
!  sll  %o0,3,%o0			! k <<= 3
!  ldd [G1_CONST_TBL+%o0],%f34		! dtmp = ((double*)(CONST_TBL + k))[0]
!  fpadd32 %f16,%f34,%f34		! drez = vis_fpadd32(drez,dtmp)
!  fmuld %f34,%f40,%f40			! y *= drez
!  faddd %f34,%f40,%f40			! y += drez
!  fdtos %f40,%f26			! (float) y
!--------------------------------------------------------------------

	ENTRY(__vexpf)
	save	%sp,-SA(MINFRAME)-tmps,%sp
	PIC_SETUP(l7)
	PIC_SET(l7,.CONST_TBL,g5)

	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
	wr	%g0,0x60,%gsr

	sll	%i2,2,%i2
	sll	%i4,2,%i4

	ldd	[G1_CONST_TBL+2056],F60_KA2
	sethi	%hi(0x7ffffc00),G5_CONST
	ldd	[G1_CONST_TBL+2064],F58_KA1
	add	G5_CONST,1023,G5_CONST
	ldd	[G1_CONST_TBL+2072],F62_K256ONLN2
	ld	[G1_CONST_TBL+2080],I5_THRESHOLD
	ld	[G1_CONST_TBL+2084],THRESHOLDL

	subcc	%i0,8,%i0
	bneg,pn	%icc,.tail
	fzeros	%f3

.main_loop_preload:

! preload 8 elements and get absolute values
	ld	[%i1],%l0		! (0) Xi = ((int*)px)[0]
	fzeros	%f5
	ld	[%i1],%f16		! (0) X = px[0]
	fzeros	%f7
	add	%i1,%i2,%o5		! px += stridex
	ld	[%o5],%l1		! (1) Xi = ((int*)px)[0]
	and	%l0,G5_CONST,%l0	! (0) ax = Xi & 0x7fffffff
	fzeros	%f9
	ld	[%o5],%f2		! (1) X = px[0]
	fzeros	%f11
	add	%o5,%i2,%i1		! px += stridex
	ld	[%i1],%l2		! (2) Xi = ((int*)px)[0]
	and	%l1,G5_CONST,%l1	! (1) ax = Xi & 0x7fffffff
	fzeros	%f13
	ld	[%i1],%f4		! (2) X = px[0]
	fzeros	%f15
	add	%i1,%i2,%o5		! px += stridex
	ld	[%o5],%l3		! (3) Xi = ((int*)px)[0]
	and	%l2,G5_CONST,%l2	! (2) ax = Xi & 0x7fffffff
	fzeros	%f17
	ld	[%o5],%f6		! (3) X = px[0]
	add	%o5,%i2,%o0		! px += stridex
	ld	[%o0],%l4		! (4) Xi = ((int*)px)[0]
	and	%l3,G5_CONST,%l3	! (3) ax = Xi & 0x7fffffff
	add	%o0,%i2,%o1		! px += stridex
	ld	[%o1],%l5		! (5) Xi = ((int*)px)[0]
	add	%o1,%i2,%o2		! px += stridex
	ld	[%o2],%l6		! (6) Xi = ((int*)px)[0]
	and	%l4,G5_CONST,%l4	! (4) ax = Xi & 0x7fffffff
	add	%o2,%i2,%o3		! px += stridex
	ld	[%o3],%l7		! (7) Xi = ((int*)px)[0]
	add	%o3,%i2,%i1		! px += stridex
	and	%l5,G5_CONST,%l5	! (5) ax = Xi & 0x7fffffff
	and	%l6,G5_CONST,%l6	! (6) ax = Xi & 0x7fffffff
	ba	.main_loop
	and	%l7,G5_CONST,%l7	! (7) ax = Xi & 0x7fffffff

	.align	16
.main_loop:
	cmp	%l0,I5_THRESHOLD
	bg,pn	%icc,.spec0		! (0) if (ax > THRESHOLD)
	lda	[%o0]%asi,%f8		! (4) X = px[0]
	fstod	%f16,%f40		! (0) y = (double) X
.spec0_cont:
	cmp	%l1,I5_THRESHOLD
	bg,pn	%icc,.spec1		! (1) if (ax > THRESHOLD)
	lda	[%o1]%asi,%f10		! (5) X = px[0]
	fstod	%f2,%f42		! (1) y = (double) X
.spec1_cont:
	cmp	%l2,I5_THRESHOLD
	bg,pn	%icc,.spec2		! (2) if (ax > THRESHOLD)
	lda	[%o2]%asi,%f12		! (6) X = px[0]
	fstod	%f4,%f44		! (2) y = (double) X
.spec2_cont:
	cmp	%l3,I5_THRESHOLD
	bg,pn	%icc,.spec3		! (3) if (ax > THRESHOLD)
	lda	[%o3]%asi,%f14		! (7) X = px[0]
	fstod	%f6,%f46		! (3) y = (double) X
.spec3_cont:
	cmp	%l4,I5_THRESHOLD
	bg,pn	%icc,.spec4		! (4) if (ax > THRESHOLD)
	fmuld	F62_K256ONLN2,%f40,%f40	! (0) y *= K256ONLN2
	fstod	%f8,%f48		! (4) y = (double) X
.spec4_cont:
	cmp	%l5,I5_THRESHOLD
	bg,pn	%icc,.spec5		! (5) if (ax > THRESHOLD)
	fmuld	F62_K256ONLN2,%f42,%f42	! (1) y *= K256ONLN2
	fstod	%f10,%f50		! (5) y = (double) X
.spec5_cont:
	cmp	%l6,I5_THRESHOLD
	bg,pn	%icc,.spec6		! (6) if (ax > THRESHOLD)
	fmuld	F62_K256ONLN2,%f44,%f44	! (2) y *= K256ONLN2
	fstod	%f12,%f52		! (6) y = (double) X
.spec6_cont:
	cmp	%l7,I5_THRESHOLD
	bg,pn	%icc,.spec7		! (7) if (ax > THRESHOLD)
	fmuld	F62_K256ONLN2,%f46,%f46	! (3) y *= K256ONLN2
	fstod	%f14,%f54		! (7) y = (double) X
.spec7_cont:
	fdtoi	%f40,%f16		! (0) k = (int) y
	st	%f16,[%fp+tmp0]
	fmuld	F62_K256ONLN2,%f48,%f48	! (4) y *= K256ONLN2

	fdtoi	%f42,%f2		! (1) k = (int) y
	st	%f2,[%fp+tmp1]
	fmuld	F62_K256ONLN2,%f50,%f50	! (5) y *= K256ONLN2

	fdtoi	%f44,%f4		! (2) k = (int) y
	st	%f4,[%fp+tmp2]
	fmuld	F62_K256ONLN2,%f52,%f52	! (6) y *= K256ONLN2

	fdtoi	%f46,%f6		! (3) k = (int) y
	st	%f6,[%fp+tmp3]
	fmuld	F62_K256ONLN2,%f54,%f54	! (7) y *= K256ONLN2

	fdtoi	%f48,%f8		! (4) k = (int) y
	st	%f8,[%fp+tmp4]

	fdtoi	%f50,%f10		! (5) k = (int) y
	st	%f10,[%fp+tmp5]

	fitod	%f16,%f34		! (0) dtmp = (double) k
	fpackfix	%f16,%f16	! (0) ((float*)&drez)[0] = vis_fpackfix(drez)
	nop
	nop

	fdtoi	%f52,%f12		! (6) k = (int) y
	st	%f12,[%fp+tmp6]

	fdtoi	%f54,%f14		! (7) k = (int) y
	st	%f14,[%fp+tmp7]

	lda	[%i1]%asi,%l0		! (8) Xi = ((int*)px)[0]
	add	%i1,%i2,%o5		! px += stridex
	fitod	%f2,%f18		! (1) dtmp = (double) k
	fpackfix	%f2,%f2		! (1) ((float*)&drez)[0] = vis_fpackfix(drez)

	lda	[%o5]%asi,%l1		! (9) Xi = ((int*)px)[0]
	add	%o5,%i2,%i1		! px += stridex
	fitod	%f4,%f20		! (2) dtmp = (double) k
	fpackfix	%f4,%f4		! (2) ((float*)&drez)[0] = vis_fpackfix(drez)

	lda	[%i1]%asi,%l2		! (10) Xi = ((int*)px)[0]
	add	%i1,%i2,%o5		! px += stridex
	fitod	%f6,%f22		! (3) dtmp = (double) k
	fpackfix	%f6,%f6		! (3) ((float*)&drez)[0] = vis_fpackfix(drez)

	lda	[%o5]%asi,%l3		! (11) Xi = ((int*)px)[0]
	add	%o5,%i2,%i1		! px += stridex
	fitod	%f8,%f24		! (4) dtmp = (double) k
	fpackfix	%f8,%f8		! (4) ((float*)&drez)[0] = vis_fpackfix(drez)

	fitod	%f10,%f26		! (5) dtmp = (double) k
	fpackfix	%f10,%f10	! (5) ((float*)&drez)[0] = vis_fpackfix(drez)

	fitod	%f12,%f28		! (6) dtmp = (double) k
	fpackfix	%f12,%f12	! (6) ((float*)&drez)[0] = vis_fpackfix(drez)

	fitod	%f14,%f30		! (7) dtmp = (double) k
	fpackfix	%f14,%f14	! (7) ((float*)&drez)[0] = vis_fpackfix(drez)

	ld	[%fp+tmp0],%o0		! (0) load k
	and	%l0,G5_CONST,%l0	! (8) ax = Xi & 0x7fffffff
	fsubd	%f40,%f34,%f40		! (0) y -= dtmp

	ld	[%fp+tmp1],%o1		! (1) load k
	and	%l1,G5_CONST,%l1	! (9) ax = Xi & 0x7fffffff
	fsubd	%f42,%f18,%f42		! (1) y -= dtmp

	ld	[%fp+tmp2],%o2		! (2) load k
	and	%l2,G5_CONST,%l2	! (10) ax = Xi & 0x7fffffff
	and	%o0,255,%o0		! (0) k &= 255
	fsubd	%f44,%f20,%f44		! (2) y -= dtmp

	ld	[%fp+tmp3],%o3		! (3) load k
	and	%o1,255,%o1		! (1) k &= 255
	fsubd	%f46,%f22,%f46		! (3) y -= dtmp

	sll	%o0,3,%o0		! (0) k <<= 3
	sll	%o1,3,%o1		! (1) k <<= 3
	fmuld	F60_KA2,%f40,%f34	! (0) dtmp = y * KA2
	fsubd	%f48,%f24,%f48		! (4) y -= dtmp

	and	%l3,G5_CONST,%l3	! (11) ax = Xi & 0x7fffffff
	and	%o2,255,%o2		! (2) k &= 255
	fmuld	F60_KA2,%f42,%f18	! (1) dtmp = y * KA2
	fsubd	%f50,%f26,%f50		! (5) y -= dtmp

	sll	%o2,3,%o2		! (2) k <<= 3
	fmuld	F60_KA2,%f44,%f20	! (2) dtmp = y * KA2
	fsubd	%f52,%f28,%f52		! (6) y -= dtmp

	ld	[%fp+tmp4],%o4		! (4) load k
	and	%o3,255,%o3		! (3) k &= 255
	fmuld	F60_KA2,%f46,%f22	! (3) dtmp = y * KA2
	fsubd	%f54,%f30,%f54		! (7) y -= dtmp

	ld	[%fp+tmp5],%o5		! (5) load k
	sll	%o3,3,%o3		! (3) k <<= 3
	fmuld	F60_KA2,%f48,%f24	! (4) dtmp = y * KA2
	faddd	F58_KA1,%f34,%f34	! (0) dtmp += KA1

	ld	[%fp+tmp6],%o7		! (6) load k
	and	%o4,255,%o4		! (4) k &= 255
	fmuld	F60_KA2,%f50,%f26	! (5) dtmp = y * KA2
	faddd	F58_KA1,%f18,%f18	! (1) dtmp += KA1

	ld	[%fp+tmp7],%l4		! (7) load k
	and	%o5,255,%o5		! (5) k &= 255
	fmuld	F60_KA2,%f52,%f28	! (6) dtmp = y * KA2
	faddd	F58_KA1,%f20,%f20	! (2) dtmp += KA1

	sll	%o5,3,%o5		! (5) k <<= 3
	fmuld	F60_KA2,%f54,%f30	! (7) dtmp = y * KA2
	faddd	F58_KA1,%f22,%f22	! (3) dtmp += KA1

	fmuld	%f34,%f40,%f40		! (0) y *= dtmp
	ldd	[G1_CONST_TBL+%o0],%f34	! (0) dtmp = ((double*)(CONST_TBL + k))[0]
	and	%l4,255,%l4		! (7) k &= 255
	faddd	F58_KA1,%f24,%f24	! (4) dtmp += KA1

	fmuld	%f18,%f42,%f42		! (1) y *= dtmp
	ldd	[G1_CONST_TBL+%o1],%f18	! (1) dtmp = ((double*)(CONST_TBL + k))[0]
	sll	%l4,3,%l4		! (7) k <<= 3
	faddd	F58_KA1,%f26,%f26	! (5) dtmp += KA1

	fmuld	%f20,%f44,%f44		! (2) y *= dtmp
	ldd	[G1_CONST_TBL+%o2],%f20	! (2) dtmp = ((double*)(CONST_TBL + k))[0]
	faddd	F58_KA1,%f28,%f28	! (6) dtmp += KA1

	fmuld	%f22,%f46,%f46		! (3) y *= dtmp
	ldd	[G1_CONST_TBL+%o3],%f22	! (3) dtmp = ((double*)(CONST_TBL + k))[0]
	sll	%o4,3,%o4		! (4) k <<= 3
	faddd	F58_KA1,%f30,%f30	! (7) dtmp += KA1

	fmuld	%f24,%f48,%f48		! (4) y *= dtmp
	ldd	[G1_CONST_TBL+%o4],%f24	! (4) dtmp = ((double*)(CONST_TBL + k))[0]
	and	%o7,255,%o7		! (6) k &= 255
	fpadd32 %f16,%f34,%f34		! (0) drez = vis_fpadd32(drez,dtmp)

	fmuld	%f26,%f50,%f50		! (5) y *= dtmp
	ldd	[G1_CONST_TBL+%o5],%f26	! (5) dtmp = ((double*)(CONST_TBL + k))[0]
	sll	%o7,3,%o7		! (6) k <<= 3
	fpadd32 %f2,%f18,%f18		! (1) drez = vis_fpadd32(drez,dtmp)

	fmuld	%f28,%f52,%f52		! (6) y *= dtmp
	ldd	[G1_CONST_TBL+%o7],%f28	! (6) dtmp = ((double*)(CONST_TBL + k))[0]
	sll	%i2,2,%o0
	fpadd32 %f4,%f20,%f20		! (2) drez = vis_fpadd32(drez,dtmp)

	fmuld	%f30,%f54,%f54		! (7) y *= dtmp
	ldd	[G1_CONST_TBL+%l4],%f30	! (7) dtmp = ((double*)(CONST_TBL + k))[0]
	sub	%i1,%o0,%o0
	fpadd32 %f6,%f22,%f22		! (3) drez = vis_fpadd32(drez,dtmp)

	lda	[%i1]%asi,%l4		! (12) Xi = ((int*)px)[0]
	add	%i1,%i2,%o1		! px += stridex
	fpadd32 %f8,%f24,%f24		! (4) drez = vis_fpadd32(drez,dtmp)
	fmuld	%f34,%f40,%f40		! (0) y *= drez

	lda	[%o1]%asi,%l5		! (13) Xi = ((int*)px)[0]
	add	%o1,%i2,%o2		! px += stridex
	fpadd32 %f10,%f26,%f26		! (5)  drez = vis_fpadd32(drez,dtmp)
	fmuld	%f18,%f42,%f42		! (1)  y *= drez

	lda	[%o2]%asi,%l6		! (14) Xi = ((int*)px)[0]
	add	%o2,%i2,%o3		! px += stridex
	fpadd32 %f12,%f28,%f28		! (6)  drez = vis_fpadd32(drez,dtmp)
	fmuld	%f20,%f44,%f44		! (2)  y *= drez

	lda	[%o3]%asi,%l7		! (15) Xi = ((int*)px)[0]
	add	%o3,%i2,%i1		! px += stridex
	fpadd32 %f14,%f30,%f30		! (7)  drez = vis_fpadd32(drez,dtmp)
	fmuld	%f22,%f46,%f46		! (3)  y *= drez

	lda	[%o0]%asi,%f16		! (8)  X = px[0]
	add	%o0,%i2,%o5
	fmuld	%f24,%f48,%f48		! (4)  y *= drez
	faddd	%f34,%f40,%f40		! (0)  y += drez

	lda	[%o5]%asi,%f2		! (9)  X = px[0]
	add	%o5,%i2,%o0
	fmuld	%f26,%f50,%f50		! (5)  y *= drez
	faddd	%f18,%f42,%f42		! (1)  y += drez

	lda	[%o0]%asi,%f4		! (10) X = px[0]
	add	%o0,%i2,%o5
	fmuld	%f28,%f52,%f52		! (6)  y *= drez
	faddd	%f20,%f44,%f44		! (2)  y += drez

	lda	[%o5]%asi,%f6		! (11) X = px[0]
	add	%o5,%i2,%o0
	fmuld	%f30,%f54,%f54		! (7)  y *= drez
	faddd	%f22,%f46,%f46		! (3)  y += drez

	and	%l4,G5_CONST,%l4	! (12) ax = Xi & 0x7fffffff
	faddd	%f24,%f48,%f48		! (4)  y += drez

	and	%l5,G5_CONST,%l5	! (13) ax = Xi & 0x7fffffff
	faddd	%f26,%f50,%f50		! (5)  y += drez

	and	%l6,G5_CONST,%l6	! (14) ax = Xi & 0x7fffffff
	faddd	%f28,%f52,%f52		! (6)  y += drez

	and	%l7,G5_CONST,%l7	! (15) ax = Xi & 0x7fffffff
	faddd	%f30,%f54,%f54		! (7)  y += drez

	fdtos	%f40,%f26		! (0) (float) y
	st	%f26,[%i3]
	add	%i3,%i4,%o4		! py += stridey

	fdtos	%f42,%f18		! (1) (float) y
	st	%f18,[%o4]
	add	%o4,%i4,%i3		! py += stridey

	fdtos	%f44,%f20		! (2) (float) y
	st	%f20,[%i3]
	add	%i3,%i4,%o4		! py += stridey

	fdtos	%f46,%f22		! (3) (float) y
	st	%f22,[%o4]
	add	%o4,%i4,%i3		! py += stridey

	fdtos	%f48,%f24		! (4) (float) y
	st	%f24,[%i3]
	subcc	%i0,8,%i0
	add	%i3,%i4,%o4		! py += stridey

	fdtos	%f50,%f26		! (5) (float) y
	st	%f26,[%o4]
	add	%o4,%i4,%o5		! py += stridey
	add	%i4,%i4,%o7

	fdtos	%f52,%f28		! (6) (float) y
	st	%f28,[%o5]
	add	%o5,%i4,%o4		! py += stridey
	add	%o5,%o7,%i3		! py += stridey

	fdtos	%f54,%f30		! (7) (float) y
	st	%f30,[%o4]
	bpos,pt	%icc,.main_loop
	nop
.after_main_loop:
	sll	%i2,3,%o2
	sub	%i1,%o2,%i1

.tail:
	add	%i0,8,%i0
	subcc	%i0,1,%i0
	bneg,pn	%icc,.exit

	ld	[%i1],%l0
	ld	[%i1],%f2
	add	%i1,%i2,%i1

.tail_loop:
	and	%l0,G5_CONST,%l1
	cmp	%l1,I5_THRESHOLD
	bg,pn	%icc,.tail_spec
	nop
.tail_spec_cont:
	fstod	%f2,%f40
	fmuld	F62_K256ONLN2,%f40,%f40
	fdtoi	%f40,%f2
	st	%f2,[%fp+tmp0]
	fitod	%f2,%f16
	fpackfix	%f2,%f2
	fsubd	%f40,%f16,%f40
	fmuld	F60_KA2,%f40,%f16
	faddd	F58_KA1,%f16,%f16
	ld	[%fp+tmp0],%o0
	fmuld	%f16,%f40,%f40
	and	%o0,255,%o0
	sll	%o0,3,%o0
	ldd	[G1_CONST_TBL+%o0],%f16
	fpadd32 %f2,%f16,%f16
	lda	[%i1]%asi,%l0
	fmuld	%f16,%f40,%f40
	lda	[%i1]%asi,%f2
	faddd	%f16,%f40,%f40
	add	%i1,%i2,%i1
	fdtos	%f40,%f16
	st	%f16,[%i3]
	add	%i3,%i4,%i3
	subcc	%i0,1,%i0
	bpos,pt	%icc,.tail_loop
	nop

.exit:
	ret
	restore

.tail_spec:
	sethi	%hi(0x7f800000),%o4
	cmp	%l1,%o4
	bl,pt	%icc,.tail_spec_out_of_range
	nop

	srl	%l0,29,%l0
	ble,pn	%icc,.tail_spec_inf
	andcc	%l0,4,%g0

! NaN -> NaN

	fmuls	%f2,%f2,%f2
	ba	.tail_spec_exit
	st	%f2,[%i3]

.tail_spec_inf:
	be,a,pn	%icc,.tail_spec_exit
	st	%f2,[%i3]

	ba	.tail_spec_exit
	st	%f3,[%i3]

.tail_spec_out_of_range:
	fcmpes	%fcc0,%f2,%f3
	fcmpes	%fcc1,%f2,THRESHOLDL
	fbl,pn	%fcc0,1f		! if ( X < 0.0f )
	nop
	fbl,pt	%fcc1,.tail_spec_cont	! if ( X < THRESHOLDL )
	nop
1:
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.tail_spec_exit:
	lda	[%i1]%asi,%l0
	lda	[%i1]%asi,%f2
	add	%i1,%i2,%i1

	subcc	%i0,1,%i0
	bpos,pt	%icc,.tail_loop
	add	%i3,%i4,%i3
	ba	.exit
	nop

	.align	16
.spec0:
	sethi	%hi(0x7f800000),%o5
	cmp	%l0,%o5
	bl,pt	%icc,.spec0_out_of_range
	sll	%i2,3,%o4

	ble,pn	%icc,.spec0_inf
	sub	%i1,%o4,%o4

! NaN -> NaN

	fmuls	%f16,%f16,%f16
	ba	.spec0_exit
	st	%f16,[%i3]

.spec0_inf:
	ld	[%o4],%l0
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec0_exit
	st	%f16,[%i3]

	ba	.spec0_exit
	st	%f3,[%i3]

.spec0_out_of_range:
	fcmpes	%fcc0,%f16,%f3
	fcmpes	%fcc1,%f16,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f16,%f40			! (0) y = (double) X
	fbl,a,pt	%fcc1,.spec0_cont	! if ( X < THRESHOLDL )
	fstod	%f16,%f40			! (0) y = (double) X
1:
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f16
	fmuls	%f16,%f16,%f16
	st	%f16,[%i3]

.spec0_exit:
	fmovs	%f2,%f16
	mov	%l1,%l0
	fmovs	%f4,%f2
	mov	%l2,%l1
	fmovs	%f6,%f4
	mov	%l3,%l2
	fmovs	%f8,%f6
	mov	%l4,%l3
	mov	%l5,%l4
	mov	%l6,%l5
	mov	%l7,%l6
	lda	[%i1]%asi,%l7
	add	%i1,%i2,%i1
	mov	%o1,%o0
	mov	%o2,%o1
	mov	%o3,%o2
	and	%l7,G5_CONST,%l7
	add	%o2,%i2,%o3

	subcc	%i0,1,%i0
	bpos,pt	%icc,.main_loop
	add	%i3,%i4,%i3
	ba	.after_main_loop
	nop

	.align	16
.spec1:
	sethi	%hi(0x7f800000),%o5
	cmp	%l1,%o5
	bge,pn	%icc,1f
	nop
	fcmpes	%fcc0,%f2,%f3
	fcmpes	%fcc1,%f2,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f2,%f42			! (1) y = (double) X
	fbl,a,pt	%fcc1,.spec1_cont	! if ( X < THRESHOLDL )
	fstod	%f2,%f42			! (1) y = (double) X
1:
	fmuld	F62_K256ONLN2,%f40,%f40
	fdtoi	%f40,%f16
	st	%f16,[%fp+tmp0]
	fitod	%f16,%f34
	fpackfix	%f16,%f16
	fsubd	%f40,%f34,%f40
	fmuld	F60_KA2,%f40,%f34
	faddd	F58_KA1,%f34,%f34
	ld	[%fp+tmp0],%o0
	fmuld	%f34,%f40,%f40
	and	%o0,255,%o0
	sll	%o0,3,%o0
	ldd	[G1_CONST_TBL+%o0],%f34
	fpadd32 %f16,%f34,%f34
	fmuld	%f34,%f40,%f40
	faddd	%f34,%f40,%f40
	fdtos	%f40,%f26
	st	%f26,[%i3]
	add	%i3,%i4,%i3

	cmp	%l1,%o5
	bl,pt	%icc,.spec1_out_of_range
	sll	%i2,3,%o4

	ble,pn	%icc,.spec1_inf
	sub	%i1,%o4,%o4

! NaN -> NaN

	fmuls	%f2,%f2,%f2
	ba	.spec1_exit
	st	%f2,[%i3]

.spec1_inf:
	add	%o4,%i2,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec1_exit
	st	%f2,[%i3]

	ba	.spec1_exit
	st	%f3,[%i3]

.spec1_out_of_range:
	sub	%i1,%o4,%o4
	add	%o4,%i2,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.spec1_exit:
	fmovs	%f4,%f16
	mov	%l2,%l0
	fmovs	%f6,%f2
	mov	%l3,%l1
	fmovs	%f8,%f4
	mov	%l4,%l2
	fmovs	%f10,%f6
	mov	%l5,%l3
	mov	%l6,%l4
	mov	%l7,%l5
	lda	[%i1]%asi,%l6
	add	%i1,%i2,%i1
	lda	[%i1]%asi,%l7
	add	%i1,%i2,%i1
	and	%l6,G5_CONST,%l6
	and	%l7,G5_CONST,%l7
	mov	%o2,%o0
	mov	%o3,%o1
	add	%o1,%i2,%o2
	add	%o2,%i2,%o3

	subcc	%i0,2,%i0
	bpos,pt	%icc,.main_loop
	add	%i3,%i4,%i3
	ba	.after_main_loop
	nop

	.align	16
.spec2:
	sethi	%hi(0x7f800000),%o5
	cmp	%l2,%o5
	bge,pn	%icc,1f
	nop
	fcmpes	%fcc0,%f4,%f3
	fcmpes	%fcc1,%f4,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f4,%f44			! (2) y = (double) X
	fbl,a,pt	%fcc1,.spec2_cont	! if ( X < THRESHOLDL )
	fstod	%f4,%f44			! (2) y = (double) X
1:
	fmuld	F62_K256ONLN2,%f40,%f40

	fmuld	F62_K256ONLN2,%f42,%f42

	fdtoi	%f40,%f16
	st	%f16,[%fp+tmp0]

	fdtoi	%f42,%f2
	st	%f2,[%fp+tmp1]

	fitod	%f16,%f34
	fpackfix	%f16,%f16

	fitod	%f2,%f18
	fpackfix	%f2,%f2

	fsubd	%f40,%f34,%f40

	fsubd	%f42,%f18,%f42

	fmuld	F60_KA2,%f40,%f34

	fmuld	F60_KA2,%f42,%f18

	faddd	F58_KA1,%f34,%f34

	faddd	F58_KA1,%f18,%f18

	ld	[%fp+tmp0],%o0
	fmuld	%f34,%f40,%f40

	ld	[%fp+tmp1],%o1
	fmuld	%f18,%f42,%f42

	and	%o0,255,%o0

	and	%o1,255,%o1

	sll	%o0,3,%o0

	sll	%o1,3,%o1

	ldd	[G1_CONST_TBL+%o0],%f34

	ldd	[G1_CONST_TBL+%o1],%f18

	fpadd32 %f16,%f34,%f34

	fpadd32 %f2,%f18,%f18

	fmuld	%f34,%f40,%f40

	fmuld	%f18,%f42,%f42

	faddd	%f34,%f40,%f40

	faddd	%f18,%f42,%f42

	fdtos	%f40,%f26
	st	%f26,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f42,%f18
	st	%f18,[%o4]
	add	%o4,%i4,%i3

	cmp	%l2,%o5
	sll	%i2,1,%o5
	bl,pt	%icc,.spec2_out_of_range
	sll	%i2,2,%o4

	ble,pn	%icc,.spec2_inf
	add	%o4,%o5,%o4

! NaN -> NaN

	fmuls	%f4,%f4,%f4
	ba	.spec2_exit
	st	%f4,[%i3]

.spec2_inf:
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec2_exit
	st	%f4,[%i3]

	ba	.spec2_exit
	st	%f3,[%i3]

.spec2_out_of_range:
	add	%o4,%o5,%o4
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.spec2_exit:
	fmovs	%f6,%f16
	mov	%l3,%l0
	mov	%o3,%o0
	fmovs	%f8,%f2
	mov	%l4,%l1
	add	%o0,%i2,%o1
	fmovs	%f10,%f4
	mov	%l5,%l2
	add	%o1,%i2,%o2
	fmovs	%f12,%f6
	mov	%l6,%l3
	mov	%l7,%l4
	lda	[%i1]%asi,%l5
	add	%i1,%i2,%i1
	add	%o2,%i2,%o3
	lda	[%i1]%asi,%l6
	add	%i1,%i2,%i1
	lda	[%i1]%asi,%l7
	add	%i1,%i2,%i1
	and	%l5,G5_CONST,%l5
	and	%l6,G5_CONST,%l6
	and	%l7,G5_CONST,%l7

	subcc	%i0,3,%i0
	bpos,pt	%icc,.main_loop
	add	%i3,%i4,%i3
	ba	.after_main_loop
	nop
.spec3:
	sethi	%hi(0x7f800000),%o5
	cmp	%l3,%o5
	bge,pn	%icc,1f
	nop
	fcmpes	%fcc0,%f6,%f3
	fcmpes	%fcc1,%f6,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f6,%f46			! (3) y = (double) X
	fbl,a,pt	%fcc1,.spec3_cont	! if ( X < THRESHOLDL )
	fstod	%f6,%f46			! (3) y = (double) X
1:
	fmuld	F62_K256ONLN2,%f40,%f40

	fmuld	F62_K256ONLN2,%f42,%f42

	fmuld	F62_K256ONLN2,%f44,%f44

	fdtoi	%f40,%f16
	st	%f16,[%fp+tmp0]

	fdtoi	%f42,%f2
	st	%f2,[%fp+tmp1]

	fdtoi	%f44,%f4
	st	%f4,[%fp+tmp2]

	fitod	%f16,%f34
	fpackfix	%f16,%f16

	fitod	%f2,%f18
	fpackfix	%f2,%f2

	fitod	%f4,%f20
	fpackfix	%f4,%f4

	fsubd	%f40,%f34,%f40

	fsubd	%f42,%f18,%f42

	fsubd	%f44,%f20,%f44

	fmuld	F60_KA2,%f40,%f34

	fmuld	F60_KA2,%f42,%f18

	fmuld	F60_KA2,%f44,%f20

	faddd	F58_KA1,%f34,%f34

	faddd	F58_KA1,%f18,%f18

	faddd	F58_KA1,%f20,%f20

	ld	[%fp+tmp0],%o0
	fmuld	%f34,%f40,%f40

	ld	[%fp+tmp1],%o1
	fmuld	%f18,%f42,%f42

	ld	[%fp+tmp2],%o2
	fmuld	%f20,%f44,%f44

	and	%o0,255,%o0
	and	%o1,255,%o1

	and	%o2,255,%o2
	sll	%o0,3,%o0

	sll	%o1,3,%o1
	sll	%o2,3,%o2

	ldd	[G1_CONST_TBL+%o0],%f34

	ldd	[G1_CONST_TBL+%o1],%f18

	ldd	[G1_CONST_TBL+%o2],%f20

	fpadd32 %f16,%f34,%f34

	fpadd32 %f2,%f18,%f18

	fpadd32 %f4,%f20,%f20

	fmuld	%f34,%f40,%f40

	fmuld	%f18,%f42,%f42

	fmuld	%f20,%f44,%f44

	faddd	%f34,%f40,%f40

	faddd	%f18,%f42,%f42

	faddd	%f20,%f44,%f44

	fdtos	%f40,%f26
	st	%f26,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f42,%f18
	st	%f18,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f44,%f20
	st	%f20,[%i3]
	add	%i3,%i4,%i3

	cmp	%l3,%o5
	bl,pt	%icc,.spec3_out_of_range
	sll	%i2,2,%o4

	ble,pn	%icc,.spec3_inf
	add	%o4,%i2,%o4

! NaN -> NaN

	fmuls	%f6,%f6,%f6
	ba	.spec3_exit
	st	%f6,[%i3]

.spec3_inf:
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec3_exit
	st	%f6,[%i3]

	ba	.spec3_exit
	st	%f3,[%i3]

.spec3_out_of_range:
	add	%o4,%i2,%o4
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.spec3_exit:
	fmovs	%f8,%f16
	mov	%l4,%l0
	fmovs	%f10,%f2
	mov	%l5,%l1
	fmovs	%f12,%f4
	mov	%l6,%l2
	fmovs	%f14,%f6
	mov	%l7,%l3
	mov	%i1,%o0
	lda	[%o0]%asi,%l4
	add	%o0,%i2,%o1
	lda	[%o1]%asi,%l5
	add	%o1,%i2,%o2
	lda	[%o2]%asi,%l6
	add	%o2,%i2,%o3
	lda	[%o3]%asi,%l7
	add	%o3,%i2,%i1
	and	%l4,G5_CONST,%l4
	and	%l5,G5_CONST,%l5
	and	%l6,G5_CONST,%l6
	and	%l7,G5_CONST,%l7

	subcc	%i0,4,%i0
	bpos,pt	%icc,.main_loop
	add	%i3,%i4,%i3
	ba	.after_main_loop
	nop

	.align	16
.spec4:
	sethi	%hi(0x7f800000),%o5
	cmp	%l4,%o5
	bge,pn	%icc,1f
	nop
	fcmpes	%fcc0,%f8,%f3
	fcmpes	%fcc1,%f8,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f8,%f48			! (4) y = (double) X
	fbl,a,pt	%fcc1,.spec4_cont	! if ( X < THRESHOLDL )
	fstod	%f8,%f48			! (4) y = (double) X
1:
	fmuld	F62_K256ONLN2,%f42,%f42

	fmuld	F62_K256ONLN2,%f44,%f44

	fmuld	F62_K256ONLN2,%f46,%f46

	fdtoi	%f40,%f16
	st	%f16,[%fp+tmp0]

	fdtoi	%f42,%f2
	st	%f2,[%fp+tmp1]

	fdtoi	%f44,%f4
	st	%f4,[%fp+tmp2]

	fdtoi	%f46,%f6
	st	%f6,[%fp+tmp3]

	fitod	%f16,%f34
	fpackfix	%f16,%f16

	fitod	%f2,%f18
	fpackfix	%f2,%f2

	fitod	%f4,%f20
	fpackfix	%f4,%f4

	fitod	%f6,%f22
	fpackfix	%f6,%f6

	fsubd	%f40,%f34,%f40

	fsubd	%f42,%f18,%f42

	fsubd	%f44,%f20,%f44

	fsubd	%f46,%f22,%f46

	fmuld	F60_KA2,%f40,%f34

	fmuld	F60_KA2,%f42,%f18

	fmuld	F60_KA2,%f44,%f20

	fmuld	F60_KA2,%f46,%f22

	faddd	F58_KA1,%f34,%f34

	faddd	F58_KA1,%f18,%f18

	faddd	F58_KA1,%f20,%f20

	faddd	F58_KA1,%f22,%f22

	ld	[%fp+tmp0],%o0
	fmuld	%f34,%f40,%f40

	ld	[%fp+tmp1],%o1
	fmuld	%f18,%f42,%f42

	ld	[%fp+tmp2],%o2
	fmuld	%f20,%f44,%f44

	ld	[%fp+tmp3],%o3
	fmuld	%f22,%f46,%f46

	and	%o0,255,%o0
	and	%o1,255,%o1

	and	%o2,255,%o2
	and	%o3,255,%o3

	sll	%o0,3,%o0
	sll	%o1,3,%o1

	sll	%o2,3,%o2
	sll	%o3,3,%o3

	ldd	[G1_CONST_TBL+%o0],%f34

	ldd	[G1_CONST_TBL+%o1],%f18

	ldd	[G1_CONST_TBL+%o2],%f20

	ldd	[G1_CONST_TBL+%o3],%f22

	fpadd32 %f16,%f34,%f34

	fpadd32 %f2,%f18,%f18

	fpadd32 %f4,%f20,%f20

	fpadd32 %f6,%f22,%f22

	fmuld	%f34,%f40,%f40

	fmuld	%f18,%f42,%f42

	fmuld	%f20,%f44,%f44

	fmuld	%f22,%f46,%f46

	faddd	%f34,%f40,%f40

	faddd	%f18,%f42,%f42

	faddd	%f20,%f44,%f44

	faddd	%f22,%f46,%f46

	fdtos	%f40,%f26
	st	%f26,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f42,%f18
	st	%f18,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f44,%f20
	st	%f20,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f46,%f22
	st	%f22,[%o4]
	add	%o4,%i4,%i3

	cmp	%l4,%o5
	bl,pt	%icc,.spec4_out_of_range
	sll	%i2,2,%o4

	ble,pn	%icc,.spec4_inf
	sub	%i1,%o4,%o4

! NaN -> NaN

	fmuls	%f8,%f8,%f8
	ba	.spec4_exit
	st	%f8,[%i3]

.spec4_inf:
	ld	[%o4],%l0
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec4_exit
	st	%f8,[%i3]

	ba	.spec4_exit
	st	%f3,[%i3]

.spec4_out_of_range:
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.spec4_exit:
	fmovs	%f10,%f16
	mov	%l5,%l0
	fmovs	%f12,%f2
	mov	%l6,%l1
	fmovs	%f14,%f4
	mov	%l7,%l2
	lda	[%i1]%asi,%l3
	lda	[%i1]%asi,%f6
	add	%i1,%i2,%o0
	lda	[%o0]%asi,%l4
	add	%o0,%i2,%o1
	lda	[%o1]%asi,%l5
	add	%o1,%i2,%o2
	lda	[%o2]%asi,%l6
	add	%o2,%i2,%o3
	lda	[%o3]%asi,%l7
	add	%o3,%i2,%i1
	and	%l3,G5_CONST,%l3
	and	%l4,G5_CONST,%l4
	and	%l5,G5_CONST,%l5
	and	%l6,G5_CONST,%l6
	and	%l7,G5_CONST,%l7

	subcc	%i0,5,%i0
	bpos,pt	%icc,.main_loop
	add	%i3,%i4,%i3
	ba	.after_main_loop
	nop

	.align 16
.spec5:
	sethi	%hi(0x7f800000),%o5
	cmp	%l5,%o5
	bge,pn	%icc,1f
	nop
	fcmpes	%fcc0,%f10,%f3
	fcmpes	%fcc1,%f10,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f10,%f50			! (5) y = (double) X
	fbl,a,pt	%fcc1,.spec5_cont	! if ( X < THRESHOLDL )
	fstod	%f10,%f50			! (5) y = (double) X
1:
	fmuld	F62_K256ONLN2,%f44,%f44

	fmuld	F62_K256ONLN2,%f46,%f46

	fdtoi	%f40,%f16
	st	%f16,[%fp+tmp0]
	fmuld	F62_K256ONLN2,%f48,%f48

	fdtoi	%f42,%f2
	st	%f2,[%fp+tmp1]

	fdtoi	%f44,%f4
	st	%f4,[%fp+tmp2]

	fdtoi	%f46,%f6
	st	%f6,[%fp+tmp3]

	fdtoi	%f48,%f8
	st	%f8,[%fp+tmp4]

	fitod	%f16,%f34
	fpackfix	%f16,%f16

	fitod	%f2,%f18
	fpackfix	%f2,%f2

	fitod	%f4,%f20
	fpackfix	%f4,%f4

	fitod	%f6,%f22
	fpackfix	%f6,%f6

	fitod	%f8,%f24
	fpackfix	%f8,%f8

	ld	[%fp+tmp0],%o0
	fsubd	%f40,%f34,%f40

	ld	[%fp+tmp1],%o1
	fsubd	%f42,%f18,%f42

	ld	[%fp+tmp2],%o2
	and	%o0,255,%o0
	fsubd	%f44,%f20,%f44

	ld	[%fp+tmp3],%o3
	and	%o1,255,%o1
	fsubd	%f46,%f22,%f46

	sll	%o0,3,%o0
	sll	%o1,3,%o1
	fmuld	F60_KA2,%f40,%f34
	fsubd	%f48,%f24,%f48

	and	%o2,255,%o2
	fmuld	F60_KA2,%f42,%f18

	sll	%o2,3,%o2
	fmuld	F60_KA2,%f44,%f20

	ld	[%fp+tmp4],%o4
	and	%o3,255,%o3
	fmuld	F60_KA2,%f46,%f22

	sll	%o3,3,%o3
	fmuld	F60_KA2,%f48,%f24
	faddd	F58_KA1,%f34,%f34

	and	%o4,255,%o4
	faddd	F58_KA1,%f18,%f18

	faddd	F58_KA1,%f20,%f20

	faddd	F58_KA1,%f22,%f22

	fmuld	%f34,%f40,%f40
	ldd	[G1_CONST_TBL+%o0],%f34
	faddd	F58_KA1,%f24,%f24

	fmuld	%f18,%f42,%f42
	ldd	[G1_CONST_TBL+%o1],%f18

	fmuld	%f20,%f44,%f44
	ldd	[G1_CONST_TBL+%o2],%f20

	fmuld	%f22,%f46,%f46
	ldd	[G1_CONST_TBL+%o3],%f22
	sll	%o4,3,%o4

	fmuld	%f24,%f48,%f48
	ldd	[G1_CONST_TBL+%o4],%f24
	fpadd32 %f16,%f34,%f34

	fpadd32 %f2,%f18,%f18

	fpadd32 %f4,%f20,%f20

	fpadd32 %f6,%f22,%f22

	fpadd32 %f8,%f24,%f24
	fmuld	%f34,%f40,%f40

	fmuld	%f18,%f42,%f42

	fmuld	%f20,%f44,%f44

	fmuld	%f22,%f46,%f46

	fmuld	%f24,%f48,%f48
	faddd	%f34,%f40,%f40

	faddd	%f18,%f42,%f42

	faddd	%f20,%f44,%f44

	faddd	%f22,%f46,%f46

	faddd	%f24,%f48,%f48

	fdtos	%f40,%f26
	st	%f26,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f42,%f18
	st	%f18,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f44,%f20
	st	%f20,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f46,%f22
	st	%f22,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f48,%f24
	st	%f24,[%i3]
	add	%i3,%i4,%i3

	cmp	%l5,%o5
	bl,pt	%icc,.spec5_out_of_range
	sll	%i2,2,%o4

	ble,pn	%icc,.spec5_inf
	sub	%o4,%i2,%o4

! NaN -> NaN

	fmuls	%f10,%f10,%f10
	ba	.spec5_exit
	st	%f10,[%i3]

.spec5_inf:
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec5_exit
	st	%f10,[%i3]

	ba	.spec5_exit
	st	%f3,[%i3]

.spec5_out_of_range:
	sub	%o4,%i2,%o4
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.spec5_exit:
	fmovs	%f12,%f16
	mov	%l6,%l0
	fmovs	%f14,%f2
	mov	%l7,%l1
	lda	[%i1]%asi,%l2
	lda	[%i1]%asi,%f4
	add	%i1,%i2,%i1
	lda	[%i1]%asi,%l3
	lda	[%i1]%asi,%f6
	add	%i1,%i2,%o0
	lda	[%o0]%asi,%l4
	add	%o0,%i2,%o1
	lda	[%o1]%asi,%l5
	add	%o1,%i2,%o2
	lda	[%o2]%asi,%l6
	add	%o2,%i2,%o3
	lda	[%o3]%asi,%l7
	add	%o3,%i2,%i1
	and	%l2,G5_CONST,%l2
	and	%l3,G5_CONST,%l3
	and	%l4,G5_CONST,%l4
	and	%l5,G5_CONST,%l5
	and	%l6,G5_CONST,%l6
	and	%l7,G5_CONST,%l7

	subcc	%i0,6,%i0
	bpos,pt	%icc,.main_loop
	add	%i3,%i4,%i3
	ba	.after_main_loop
	nop
.spec6:
	sethi	%hi(0x7f800000),%o5
	cmp	%l6,%o5
	bge,pn	%icc,1f
	nop
	fcmpes	%fcc0,%f12,%f3
	fcmpes	%fcc1,%f12,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f12,%f52			! (6) y = (double) X
	fbl,a,pt	%fcc1,.spec6_cont	! if ( X < THRESHOLDL )
	fstod	%f12,%f52			! (6) y = (double) X
1:
	fmuld	F62_K256ONLN2,%f46,%f46

	fdtoi	%f40,%f16
	st	%f16,[%fp+tmp0]
	fmuld	F62_K256ONLN2,%f48,%f48

	fdtoi	%f42,%f2
	st	%f2,[%fp+tmp1]
	fmuld	F62_K256ONLN2,%f50,%f50

	fdtoi	%f44,%f4
	st	%f4,[%fp+tmp2]

	fdtoi	%f46,%f6
	st	%f6,[%fp+tmp3]

	fdtoi	%f48,%f8
	st	%f8,[%fp+tmp4]

	fdtoi	%f50,%f10
	st	%f10,[%fp+tmp5]

	fitod	%f16,%f34
	fpackfix	%f16,%f16

	fitod	%f2,%f18
	fpackfix	%f2,%f2

	fitod	%f4,%f20
	fpackfix	%f4,%f4

	fitod	%f6,%f22
	fpackfix	%f6,%f6

	fitod	%f8,%f24
	fpackfix	%f8,%f8

	fitod	%f10,%f26
	fpackfix	%f10,%f10

	ld	[%fp+tmp0],%o0
	fsubd	%f40,%f34,%f40

	ld	[%fp+tmp1],%o1
	fsubd	%f42,%f18,%f42

	ld	[%fp+tmp2],%o2
	and	%o0,255,%o0
	fsubd	%f44,%f20,%f44

	ld	[%fp+tmp3],%o3
	and	%o1,255,%o1
	fsubd	%f46,%f22,%f46

	sll	%o0,3,%o0
	sll	%o1,3,%o1
	fmuld	F60_KA2,%f40,%f34
	fsubd	%f48,%f24,%f48

	and	%o2,255,%o2
	fmuld	F60_KA2,%f42,%f18
	fsubd	%f50,%f26,%f50

	sll	%o2,3,%o2
	fmuld	F60_KA2,%f44,%f20

	ld	[%fp+tmp4],%o4
	and	%o3,255,%o3
	fmuld	F60_KA2,%f46,%f22

	ld	[%fp+tmp5],%o5
	sll	%o3,3,%o3
	fmuld	F60_KA2,%f48,%f24
	faddd	F58_KA1,%f34,%f34

	and	%o4,255,%o4
	fmuld	F60_KA2,%f50,%f26
	faddd	F58_KA1,%f18,%f18

	and	%o5,255,%o5
	faddd	F58_KA1,%f20,%f20

	sll	%o5,3,%o5
	faddd	F58_KA1,%f22,%f22

	fmuld	%f34,%f40,%f40
	ldd	[G1_CONST_TBL+%o0],%f34
	faddd	F58_KA1,%f24,%f24

	fmuld	%f18,%f42,%f42
	ldd	[G1_CONST_TBL+%o1],%f18
	faddd	F58_KA1,%f26,%f26

	fmuld	%f20,%f44,%f44
	ldd	[G1_CONST_TBL+%o2],%f20

	fmuld	%f22,%f46,%f46
	ldd	[G1_CONST_TBL+%o3],%f22
	sll	%o4,3,%o4

	fmuld	%f24,%f48,%f48
	ldd	[G1_CONST_TBL+%o4],%f24
	fpadd32 %f16,%f34,%f34

	fmuld	%f26,%f50,%f50
	ldd	[G1_CONST_TBL+%o5],%f26
	fpadd32 %f2,%f18,%f18

	fpadd32 %f4,%f20,%f20

	fpadd32 %f6,%f22,%f22

	fpadd32 %f8,%f24,%f24
	fmuld	%f34,%f40,%f40

	fpadd32 %f10,%f26,%f26
	fmuld	%f18,%f42,%f42

	fmuld	%f20,%f44,%f44

	fmuld	%f22,%f46,%f46

	fmuld	%f24,%f48,%f48
	faddd	%f34,%f40,%f40

	fmuld	%f26,%f50,%f50
	faddd	%f18,%f42,%f42

	faddd	%f20,%f44,%f44

	faddd	%f22,%f46,%f46

	faddd	%f24,%f48,%f48

	faddd	%f26,%f50,%f50

	fdtos	%f40,%f26
	st	%f26,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f42,%f18
	st	%f18,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f44,%f20
	st	%f20,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f46,%f22
	st	%f22,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f48,%f24
	st	%f24,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f50,%f26
	st	%f26,[%o4]
	add	%o4,%i4,%i3

	sethi	%hi(0x7f800000),%o5
	cmp	%l6,%o5
	bl,pt	%icc,.spec6_out_of_range
	sll	%i2,1,%o4

	ble,pn	%icc,.spec6_inf
	sub	%i1,%o4,%o4

! NaN -> NaN

	fmuls	%f12,%f12,%f12
	ba	.spec6_exit
	st	%f12,[%i3]

.spec6_inf:
	ld	[%o4],%l0
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec6_exit
	st	%f12,[%i3]

	ba	.spec6_exit
	st	%f3,[%i3]

.spec6_out_of_range:
	sub	%i1,%o4,%o4
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.spec6_exit:
	fmovs	%f14,%f16
	mov	%l7,%l0
	lda	[%i1]%asi,%l1
	lda	[%i1]%asi,%f2
	add	%i1,%i2,%i1
	lda	[%i1]%asi,%l2
	lda	[%i1]%asi,%f4
	add	%i1,%i2,%i1
	lda	[%i1]%asi,%l3
	lda	[%i1]%asi,%f6
	add	%i1,%i2,%o0
	lda	[%o0]%asi,%l4
	add	%o0,%i2,%o1
	lda	[%o1]%asi,%l5
	add	%o1,%i2,%o2
	lda	[%o2]%asi,%l6
	add	%o2,%i2,%o3
	lda	[%o3]%asi,%l7
	add	%o3,%i2,%i1
	and	%l1,G5_CONST,%l1
	and	%l2,G5_CONST,%l2
	and	%l3,G5_CONST,%l3
	and	%l4,G5_CONST,%l4
	and	%l5,G5_CONST,%l5
	and	%l6,G5_CONST,%l6
	and	%l7,G5_CONST,%l7

	subcc	%i0,7,%i0
	bpos,pt	%icc,.main_loop
	add	%i3,%i4,%i3
	ba	.after_main_loop
	nop

	.align	16
.spec7:
	sethi	%hi(0x7f800000),%o5
	cmp	%l7,%o5
	bge,pn	%icc,1f
	nop
	fcmpes	%fcc0,%f14,%f3
	fcmpes	%fcc1,%f14,THRESHOLDL
	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
	fstod	%f14,%f54			! (7) y = (double) X
	fbl,a,pt	%fcc1,.spec7_cont	! if ( X < THRESHOLDL )
	fstod	%f14,%f54			! (7) y = (double) X
1:
	fdtoi	%f40,%f16
	st	%f16,[%fp+tmp0]
	fmuld	F62_K256ONLN2,%f48,%f48

	fdtoi	%f42,%f2
	st	%f2,[%fp+tmp1]
	fmuld	F62_K256ONLN2,%f50,%f50

	fdtoi	%f44,%f4
	st	%f4,[%fp+tmp2]
	fmuld	F62_K256ONLN2,%f52,%f52

	fdtoi	%f46,%f6
	st	%f6,[%fp+tmp3]

	fdtoi	%f48,%f8
	st	%f8,[%fp+tmp4]

	fdtoi	%f50,%f10
	st	%f10,[%fp+tmp5]

	fdtoi	%f52,%f12
	st	%f12,[%fp+tmp6]

	fitod	%f16,%f34
	fpackfix	%f16,%f16

	fitod	%f2,%f18
	fpackfix	%f2,%f2

	fitod	%f4,%f20
	fpackfix	%f4,%f4

	fitod	%f6,%f22
	fpackfix	%f6,%f6

	fitod	%f8,%f24
	fpackfix	%f8,%f8

	fitod	%f10,%f26
	fpackfix	%f10,%f10

	fitod	%f12,%f28
	fpackfix	%f12,%f12

	ld	[%fp+tmp0],%o0
	fsubd	%f40,%f34,%f40

	ld	[%fp+tmp1],%o1
	fsubd	%f42,%f18,%f42

	ld	[%fp+tmp2],%o2
	and	%o0,255,%o0
	fsubd	%f44,%f20,%f44

	ld	[%fp+tmp3],%o3
	and	%o1,255,%o1
	fsubd	%f46,%f22,%f46

	sll	%o0,3,%o0
	sll	%o1,3,%o1
	fmuld	F60_KA2,%f40,%f34
	fsubd	%f48,%f24,%f48

	and	%o2,255,%o2
	fmuld	F60_KA2,%f42,%f18
	fsubd	%f50,%f26,%f50

	sll	%o2,3,%o2
	fmuld	F60_KA2,%f44,%f20
	fsubd	%f52,%f28,%f52

	ld	[%fp+tmp4],%o4
	and	%o3,255,%o3
	fmuld	F60_KA2,%f46,%f22

	ld	[%fp+tmp5],%o5
	sll	%o3,3,%o3
	fmuld	F60_KA2,%f48,%f24
	faddd	F58_KA1,%f34,%f34

	ld	[%fp+tmp6],%o7
	and	%o4,255,%o4
	fmuld	F60_KA2,%f50,%f26
	faddd	F58_KA1,%f18,%f18

	and	%o5,255,%o5
	fmuld	F60_KA2,%f52,%f28
	faddd	F58_KA1,%f20,%f20

	sll	%o5,3,%o5
	faddd	F58_KA1,%f22,%f22

	fmuld	%f34,%f40,%f40
	ldd	[G1_CONST_TBL+%o0],%f34
	faddd	F58_KA1,%f24,%f24

	fmuld	%f18,%f42,%f42
	ldd	[G1_CONST_TBL+%o1],%f18
	faddd	F58_KA1,%f26,%f26

	fmuld	%f20,%f44,%f44
	ldd	[G1_CONST_TBL+%o2],%f20
	faddd	F58_KA1,%f28,%f28

	fmuld	%f22,%f46,%f46
	ldd	[G1_CONST_TBL+%o3],%f22
	sll	%o4,3,%o4

	fmuld	%f24,%f48,%f48
	ldd	[G1_CONST_TBL+%o4],%f24
	and	%o7,255,%o7
	fpadd32 %f16,%f34,%f34

	fmuld	%f26,%f50,%f50
	ldd	[G1_CONST_TBL+%o5],%f26
	sll	%o7,3,%o7
	fpadd32 %f2,%f18,%f18

	fmuld	%f28,%f52,%f52
	ldd	[G1_CONST_TBL+%o7],%f28
	fpadd32 %f4,%f20,%f20

	fpadd32 %f6,%f22,%f22

	fpadd32 %f8,%f24,%f24
	fmuld	%f34,%f40,%f40

	fpadd32 %f10,%f26,%f26
	fmuld	%f18,%f42,%f42

	fpadd32 %f12,%f28,%f28
	fmuld	%f20,%f44,%f44

	fmuld	%f22,%f46,%f46

	fmuld	%f24,%f48,%f48
	faddd	%f34,%f40,%f40

	fmuld	%f26,%f50,%f50
	faddd	%f18,%f42,%f42

	fmuld	%f28,%f52,%f52
	faddd	%f20,%f44,%f44

	faddd	%f22,%f46,%f46

	faddd	%f24,%f48,%f48

	faddd	%f26,%f50,%f50

	faddd	%f28,%f52,%f52

	fdtos	%f40,%f26
	st	%f26,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f42,%f18
	st	%f18,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f44,%f20
	st	%f20,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f46,%f22
	st	%f22,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f48,%f24
	st	%f24,[%i3]
	add	%i3,%i4,%o4

	fdtos	%f50,%f26
	st	%f26,[%o4]
	add	%o4,%i4,%i3

	fdtos	%f52,%f28
	st	%f28,[%i3]
	add	%i3,%i4,%i3

	sethi	%hi(0x7f800000),%o5
	cmp	%l7,%o5
	bl,pt	%icc,.spec7_out_of_range
	sub	%i1,%i2,%o4

	ble,pn	%icc,.spec7_inf
	ld	[%o4],%l0

! NaN -> NaN

	fmuls	%f14,%f14,%f14
	ba	.spec7_exit
	st	%f14,[%i3]

.spec7_inf:
	srl	%l0,29,%l0
	andcc	%l0,4,%l0
	be,a,pn	%icc,.spec7_exit
	st	%f14,[%i3]

	ba	.spec7_exit
	st	%f3,[%i3]

.spec7_out_of_range:
	ld	[%o4],%l0
	srl	%l0,29,%l0
	and	%l0,4,%l0
	add	%l0,2048,%l0
	ld	[G1_CONST_TBL+%l0],%f2
	fmuls	%f2,%f2,%f2
	st	%f2,[%i3]

.spec7_exit:
	subcc	%i0,8,%i0
	bpos,pt	%icc,.main_loop_preload
	add	%i3,%i4,%i3

	ba	.tail
	nop
	SET_SIZE(__vexpf)