/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <siginfo.h>
#include <ucontext.h>
#include <thread.h>
#include <math.h>
#if defined(__SUNPRO_C)
#include <sunmath.h>
#endif
#include <fenv.h>
#include "fex_handler.h"
#include "fenv_inlines.h"

#if defined(__amd64)
#define test_sse_hw	1
#else
/*
 * The following variable lives in libc on Solaris 10, where it
 * gets set to a nonzero value at startup time on systems with SSE.
 */
extern int _sse_hw;
#define test_sse_hw	_sse_hw
#endif

static int accrued = 0;
static thread_key_t accrued_key;
static mutex_t accrued_key_lock = DEFAULTMUTEX;

int *
__fex_accrued()
{
	int		*p;

	if (thr_main())
		return &accrued;
	else {
		p = NULL;
		mutex_lock(&accrued_key_lock);
		if (thr_getspecific(accrued_key, (void **)&p) != 0 &&
			thr_keycreate(&accrued_key, free) != 0) {
			mutex_unlock(&accrued_key_lock);
			return NULL;
		}
		mutex_unlock(&accrued_key_lock);
		if (!p) {
			if ((p = (int*) malloc(sizeof(int))) == NULL)
				return NULL;
			if (thr_setspecific(accrued_key, (void *)p) != 0) {
				(void)free(p);
				return NULL;
			}
			*p = 0;
		}
		return p;
	}
}

void
__fenv_getfsr(unsigned long *fsr)
{
	unsigned int	cwsw, mxcsr;

	__fenv_getcwsw(&cwsw);
	/* clear reserved bits for no particularly good reason */
	cwsw &= ~0xe0c00000u;
	if (test_sse_hw) {
		/* pick up exception flags (excluding denormal operand
		   flag) from mxcsr */
		__fenv_getmxcsr(&mxcsr);
		cwsw |= (mxcsr & 0x3d);
	}
	cwsw |= *__fex_accrued();
	*fsr = cwsw ^ 0x003f0000u;
}

void
__fenv_setfsr(const unsigned long *fsr)
{
	unsigned int	cwsw, mxcsr;
	int				te;

	/* save accrued exception flags corresponding to enabled exceptions */
	cwsw = (unsigned int)*fsr;
	te = __fenv_get_te(cwsw);
	*__fex_accrued() = cwsw & te;
	cwsw = (cwsw & ~te) ^ 0x003f0000;
	if (test_sse_hw) {
		/* propagate rounding direction, masks, and exception flags
		   (excluding denormal operand mask and flag) to mxcsr */
		__fenv_getmxcsr(&mxcsr);
		mxcsr = (mxcsr & ~0x7ebd) | ((cwsw >> 13) & 0x6000) |
			((cwsw >> 9) & 0x1e80) | (cwsw & 0x3d);
		__fenv_setmxcsr(&mxcsr);
	}
	__fenv_setcwsw(&cwsw);
}

/* Offsets into the fp environment save area (assumes 32-bit protected mode) */
#define CW	0	/* control word */
#define SW	1	/* status word */
#define TW	2	/* tag word */
#define IP	3	/* instruction pointer */
#define OP	4	/* opcode */
#define EA	5	/* operand address */

/* macro for accessing fp registers in the save area */
#if defined(__amd64)
#define fpreg(u,x)	*(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.st)
#else
#define fpreg(u,x)	*(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[7])
#endif

/*
*  Fix sip->si_code; the Solaris x86 kernel can get it wrong
*/
void
__fex_get_x86_exc(siginfo_t *sip, ucontext_t *uap)
{
	unsigned	sw, cw;

	sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status;
#if defined(__amd64)
	cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw;
#else
	cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[CW];
#endif
	if ((sw & FE_INVALID) && !(cw & (1 << fp_trap_invalid)))
		/* store 0 for stack fault, FPE_FLTINV for IEEE invalid op */
		sip->si_code = ((sw & 0x40)? 0 : FPE_FLTINV);
	else if ((sw & FE_DIVBYZERO) && !(cw & (1 << fp_trap_division)))
		sip->si_code = FPE_FLTDIV;
	else if ((sw & FE_OVERFLOW) && !(cw & (1 << fp_trap_overflow)))
		sip->si_code = FPE_FLTOVF;
	else if ((sw & FE_UNDERFLOW) && !(cw & (1 << fp_trap_underflow)))
		sip->si_code = FPE_FLTUND;
	else if ((sw & FE_INEXACT) && !(cw & (1 << fp_trap_inexact)))
		sip->si_code = FPE_FLTRES;
	else
		sip->si_code = 0;
}

static enum fp_class_type
my_fp_classf(float *x)
{
	int		i = *(int*)x & ~0x80000000;

	if (i < 0x7f800000) {
		if (i < 0x00800000)
			return ((i == 0)? fp_zero : fp_subnormal);
		return fp_normal;
	}
	else if (i == 0x7f800000)
		return fp_infinity;
	else if (i & 0x400000)
		return fp_quiet;
	else
		return fp_signaling;
}

static enum fp_class_type
my_fp_class(double *x)
{
	int		i = *(1+(int*)x) & ~0x80000000;

	if (i < 0x7ff00000) {
		if (i < 0x00100000)
			return (((i | *(int*)x) == 0)? fp_zero : fp_subnormal);
		return fp_normal;
	}
	else if (i == 0x7ff00000 && *(int*)x == 0)
		return fp_infinity;
	else if (i & 0x80000)
		return fp_quiet;
	else
		return fp_signaling;
}

static enum fp_class_type
my_fp_classl(long double *x)
{
	int		i = *(2+(int*)x) & 0x7fff;

	if (i < 0x7fff) {
		if (i < 1) {
			if (*(1+(int*)x) < 0) return fp_normal; /* pseudo-denormal */
			return (((*(1+(int*)x) | *(int*)x) == 0)?
				fp_zero : fp_subnormal);
		}
		return ((*(1+(int*)x) < 0)? fp_normal :
			(enum fp_class_type) -1); /* unsupported format */
	}
	else if (*(1+(int*)x) == 0x80000000 && *(int*)x == 0)
		return fp_infinity;
	else if (*(1+(unsigned*)x) >= 0xc0000000)
		return fp_quiet;
	else if (*(1+(int*)x) < 0)
		return fp_signaling;
	else
		return (enum fp_class_type) -1; /* unsupported format */
}

/*
*  Determine which type of invalid operation exception occurred
*/
enum fex_exception
__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap)
{
	unsigned			op;
	unsigned long			ea;
	enum fp_class_type	t1, t2;

	/* get the opcode and data address */
#if defined(__amd64)
	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16;
	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp;
#else
	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16;
	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA];
#endif

	/* if the instruction is fld, the source must be snan (it can't be
	   an unsupported format, since fldt doesn't raise any exceptions) */
	switch (op & 0x7f8) {
	case 0x100:
	case 0x140:
	case 0x180:
	case 0x500:
	case 0x540:
	case 0x580:
		return fex_inv_snan;
	}

	/* otherwise st is one of the operands; see if it's snan */
	t1 = my_fp_classl(&fpreg(uap, 0));
	if (t1 == fp_signaling)
		return fex_inv_snan;
	else if (t1 == (enum fp_class_type) -1)
		return (enum fex_exception) -1;

	/* determine the class of the second operand if there is one */
	t2 = fp_normal;
	switch (op & 0x7e0) {
	case 0x600:
	case 0x620:
	case 0x640:
	case 0x660:
	case 0x680:
	case 0x6a0:
		/* short memory operand */
		if (!ea)
			return (enum fex_exception) -1;
		if (*(short *)ea == 0)
			t2 = fp_zero;
		break;

	case 0x200:
	case 0x220:
	case 0x240:
	case 0x260:
	case 0x280:
	case 0x2a0:
		/* int memory operand */
		if (!ea)
			return (enum fex_exception) -1;
		if (*(int *)ea == 0)
			t2 = fp_zero;
		break;

	case 0x000:
	case 0x020:
	case 0x040:
	case 0x060:
	case 0x080:
	case 0x0a0:
		/* single precision memory operand */
		if (!ea)
			return (enum fex_exception) -1;
		t2 = my_fp_classf((float *)ea);
		break;

	case 0x400:
	case 0x420:
	case 0x440:
	case 0x460:
	case 0x480:
	case 0x4a0:
		/* double precision memory operand */
		if (!ea)
			return (enum fex_exception) -1;
		t2 = my_fp_class((double *)ea);
		break;

	case 0x0c0:
	case 0x0e0:
	case 0x3e0:
	case 0x4c0:
	case 0x4e0:
	case 0x5e0:
	case 0x6c0:
	case 0x6e0:
	case 0x7e0:
		/* register operand determined by opcode */
		switch (op & 0x7f8) {
		case 0x3e0:
		case 0x3f8:
		case 0x5f0:
		case 0x5f8:
		case 0x7e0:
		case 0x7f8:
			/* weed out nonexistent opcodes */
			break;

		default:
			t2 = my_fp_classl(&fpreg(uap, op & 7));
		}
		break;

	case 0x1e0:
	case 0x2e0:
		/* special forms */
		switch (op) {
		case 0x1f1: /* fyl2x */
		case 0x1f3: /* fpatan */
		case 0x1f5: /* fprem1 */
		case 0x1f8: /* fprem */
		case 0x1f9: /* fyl2xp1 */
		case 0x1fd: /* fscale */
		case 0x2e9: /* fucompp */
			t2 = my_fp_classl(&fpreg(uap, 1));
			break;
		}
		break;
	}

	/* see if the second op is snan */
	if (t2 == fp_signaling)
		return fex_inv_snan;
	else if (t2 == (enum fp_class_type) -1)
		return (enum fex_exception) -1;

	/* determine the type of operation */
	switch (op & 0x7f8) {
	case 0x000:
	case 0x020:
	case 0x028:
	case 0x040:
	case 0x060:
	case 0x068:
	case 0x080:
	case 0x0a0:
	case 0x0a8:
	case 0x0c0:
	case 0x0e0:
	case 0x0e8:
	case 0x400:
	case 0x420:
	case 0x428:
	case 0x440:
	case 0x460:
	case 0x468:
	case 0x480:
	case 0x4a0:
	case 0x4a8:
	case 0x4c0:
	case 0x4e0:
	case 0x4e8:
	case 0x6c0:
	case 0x6e0:
	case 0x6e8:
		/* fadd, fsub, fsubr */
		if (t1 == fp_infinity && t2 == fp_infinity)
			return fex_inv_isi;
		break;

	case 0x008:
	case 0x048:
	case 0x088:
	case 0x0c8:
	case 0x208:
	case 0x248:
	case 0x288:
	case 0x408:
	case 0x448:
	case 0x488:
	case 0x4c8:
	case 0x608:
	case 0x648:
	case 0x688:
	case 0x6c8:
		/* fmul */
		if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
		  t1 == fp_infinity))
			return fex_inv_zmi;
		break;

	case 0x030:
	case 0x038:
	case 0x070:
	case 0x078:
	case 0x0b0:
	case 0x0b8:
	case 0x0f0:
	case 0x0f8:
	case 0x230:
	case 0x238:
	case 0x270:
	case 0x278:
	case 0x2b0:
	case 0x2b8:
	case 0x430:
	case 0x438:
	case 0x470:
	case 0x478:
	case 0x4b0:
	case 0x4b8:
	case 0x4f0:
	case 0x4f8:
	case 0x630:
	case 0x638:
	case 0x670:
	case 0x678:
	case 0x6b0:
	case 0x6b8:
	case 0x6f0:
	case 0x6f8:
		/* fdiv */
		if (t1 == fp_zero && t2 == fp_zero)
			return fex_inv_zdz;
		else if (t1 == fp_infinity && t2 == fp_infinity)
			return fex_inv_idi;
		break;

	case 0x1f0:
	case 0x1f8:
		/* fsqrt, other special ops */
		return fex_inv_sqrt;

	case 0x010:
	case 0x018:
	case 0x050:
	case 0x058:
	case 0x090:
	case 0x098:
	case 0x0d0:
	case 0x0d8:
	case 0x210:
	case 0x218:
	case 0x250:
	case 0x258:
	case 0x290:
	case 0x298:
	case 0x2e8:
	case 0x3f0:
	case 0x410:
	case 0x418:
	case 0x450:
	case 0x458:
	case 0x490:
	case 0x498:
	case 0x4d0:
	case 0x4d8:
	case 0x5e0:
	case 0x5e8:
	case 0x610:
	case 0x618:
	case 0x650:
	case 0x658:
	case 0x690:
	case 0x698:
	case 0x6d0:
	case 0x6d8:
	case 0x7f0:
		/* fcom */
		if (t1 == fp_quiet || t2 == fp_quiet)
			return fex_inv_cmp;
		break;

	case 0x1e0:
		/* ftst */
		if (op == 0x1e4 && t1 == fp_quiet)
			return fex_inv_cmp;
		break;

	case 0x310:
	case 0x318:
	case 0x350:
	case 0x358:
	case 0x390:
	case 0x398:
	case 0x710:
	case 0x718:
	case 0x730:
	case 0x738:
	case 0x750:
	case 0x758:
	case 0x770:
	case 0x778:
	case 0x790:
	case 0x798:
	case 0x7b0:
	case 0x7b8:
		/* fist, fbst */
		return fex_inv_int;
	}

	return (enum fex_exception) -1;
}

/* scale factors for exponent unwrapping */
static const long double
	two12288 = 1.139165225263043370845938579315932009e+3699l,	/* 2^12288 */
	twom12288 = 8.778357852076208839765066529179033145e-3700l,	/* 2^-12288 */
	twom12288mulp = 8.778357852076208839289190796475222545e-3700l;
		/* (")*(1-2^-64) */

/* inline templates */
extern long double f2xm1(long double);
extern long double fyl2x(long double, long double);
extern long double fptan(long double);
extern long double fpatan(long double, long double);
extern long double fxtract(long double);
extern long double fprem1(long double, long double);
extern long double fprem(long double, long double);
extern long double fyl2xp1(long double, long double);
extern long double fsqrt(long double);
extern long double fsincos(long double);
extern long double frndint(long double);
extern long double fscale(long double, long double);
extern long double fsin(long double);
extern long double fcos(long double);

/*
*  Get the operands, generate the default untrapped result with
*  exceptions, and set a code indicating the type of operation
*/
void
__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info)
{
	fex_numeric_t			t;
	long double			op2v, x;
	unsigned int			cwsw, ex, sw, op;
	unsigned long			ea;
	volatile int			c;

	/* get the exception type, status word, opcode, and data address */
	ex = sip->si_code;
	sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status;
#if defined(__amd64)
	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16;
	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp;
#else
	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16;
	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA];
#endif

	/* initialize res to the default untrapped result and ex to the
	   corresponding flags (assume trapping is disabled and flags
	   are clear) */

	/* single operand instructions */
	info->op = fex_cnvt;
	info->op2.type = fex_nodata;
	switch (op & 0x7f8) {
	/* load instructions */
	case 0x100:
	case 0x140:
	case 0x180:
		if (!ea) {
			info->op = fex_other;
			info->op1.type = info->op2.type = info->res.type = fex_nodata;
			info->flags = 0;
			return;
		}
		info->op1.type = fex_float;
		info->op1.val.f = *(float *)ea;
		info->res.type = fex_ldouble;
		info->res.val.q = (long double) info->op1.val.f;
		goto done;

	case 0x500:
	case 0x540:
	case 0x580:
		if (!ea) {
			info->op = fex_other;
			info->op1.type = info->op2.type = info->res.type = fex_nodata;
			info->flags = 0;
			return;
		}
		info->op1.type = fex_double;
		info->op1.val.d = *(double *)ea;
		info->res.type = fex_ldouble;
		info->res.val.q = (long double) info->op1.val.d;
		goto done;

	/* store instructions */
	case 0x110:
	case 0x118:
	case 0x150:
	case 0x158:
	case 0x190:
	case 0x198:
		info->res.type = fex_float;
		if (ex == FPE_FLTRES && (op & 8) != 0) {
			/* inexact, stack popped */
			if (!ea) {
				info->op = fex_other;
				info->op1.type = info->op2.type = info->res.type = fex_nodata;
				info->flags = 0;
				return;
			}
			info->op1.type = fex_nodata;
			info->res.val.f = *(float *)ea;
			info->flags = FE_INEXACT;
			return;
		}
		info->op1.type = fex_ldouble;
		info->op1.val.q = fpreg(uap, 0);
		info->res.val.f = (float) info->op1.val.q;
		goto done;

	case 0x310:
	case 0x318:
	case 0x350:
	case 0x358:
	case 0x390:
	case 0x398:
		info->res.type = fex_int;
		if (ex == FPE_FLTRES && (op & 8) != 0) {
			/* inexact, stack popped */
			if (!ea) {
				info->op = fex_other;
				info->op1.type = info->op2.type = info->res.type = fex_nodata;
				info->flags = 0;
				return;
			}
			info->op1.type = fex_nodata;
			info->res.val.i = *(int *)ea;
			info->flags = FE_INEXACT;
			return;
		}
		info->op1.type = fex_ldouble;
		info->op1.val.q = fpreg(uap, 0);
		info->res.val.i = (int) info->op1.val.q;
		goto done;

	case 0x510:
	case 0x518:
	case 0x550:
	case 0x558:
	case 0x590:
	case 0x598:
		info->res.type = fex_double;
		if (ex == FPE_FLTRES && (op & 8) != 0) {
			/* inexact, stack popped */
			if (!ea) {
				info->op = fex_other;
				info->op1.type = info->op2.type = info->res.type = fex_nodata;
				info->flags = 0;
				return;
			}
			info->op1.type = fex_nodata;
			info->res.val.d = *(double *)ea;
			info->flags = FE_INEXACT;
			return;
		}
		info->op1.type = fex_ldouble;
		info->op1.val.q = fpreg(uap, 0);
		info->res.val.d = (double) info->op1.val.q;
		goto done;

	case 0x710:
	case 0x718:
	case 0x750:
	case 0x758:
	case 0x790:
	case 0x798:
		info->res.type = fex_int;
		if (ex == FPE_FLTRES && (op & 8) != 0) {
			/* inexact, stack popped */
			if (!ea) {
				info->op = fex_other;
				info->op1.type = info->op2.type = info->res.type = fex_nodata;
				info->flags = 0;
				return;
			}
			info->op1.type = fex_nodata;
			info->res.val.i = *(short *)ea;
			info->flags = FE_INEXACT;
			return;
		}
		info->op1.type = fex_ldouble;
		info->op1.val.q = fpreg(uap, 0);
		info->res.val.i = (short) info->op1.val.q;
		goto done;

	case 0x730:
	case 0x770:
	case 0x7b0:
		/* fbstp; don't bother */
		info->op = fex_other;
		info->op1.type = info->res.type = fex_nodata;
		info->flags = 0;
		return;

	case 0x738:
	case 0x778:
	case 0x7b8:
		info->res.type = fex_llong;
		if (ex == FPE_FLTRES) {
			/* inexact, stack popped */
			if (!ea) {
				info->op = fex_other;
				info->op1.type = info->op2.type = info->res.type = fex_nodata;
				info->flags = 0;
				return;
			}
			info->op1.type = fex_nodata;
			info->res.val.l = *(long long *)ea;
			info->flags = FE_INEXACT;
			return;
		}
		info->op1.type = fex_ldouble;
		info->op1.val.q = fpreg(uap, 0);
		info->res.val.l = (long long) info->op1.val.q;
		goto done;
	}

	/* all other ops (except compares) have destinations on the stack
	   so overflow, underflow, and inexact will stomp their operands */
	if (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES) {
		/* find the trapped result */
		info->op1.type = info->op2.type = fex_nodata;
		info->res.type = fex_ldouble;
		switch (op & 0x7f8) {
		case 0x1f0:
			/* fptan pushes 1.0 afterward, so result is in st(1) */
			info->res.val.q = ((op == 0x1f2)? fpreg(uap, 1) :
				fpreg(uap, 0));
			break;

		case 0x4c0:
		case 0x4c8:
		case 0x4e0:
		case 0x4e8:
		case 0x4f0:
		case 0x4f8:
			info->res.val.q = fpreg(uap, op & 7);
			break;

		case 0x6c0:
		case 0x6c8:
		case 0x6e0:
		case 0x6e8:
		case 0x6f0:
		case 0x6f8:
			/* stack was popped afterward */
			info->res.val.q = fpreg(uap, (op - 1) & 7);
			break;

		default:
			info->res.val.q = fpreg(uap, 0);
		}

		/* reconstruct default untrapped result */
		if (ex == FPE_FLTOVF) {
			/* generate an overflow with the sign of the result */
			x = two12288;
			*(4+(short*)&x) |= (*(4+(short*)&info->res.val.q) & 0x8000);
			info->res.val.q = x * two12288;
			info->flags = FE_OVERFLOW | FE_INEXACT;
			__fenv_getcwsw(&cwsw);
			cwsw &= ~FE_ALL_EXCEPT;
			__fenv_setcwsw(&cwsw);
		}
		else if (ex == FPE_FLTUND) {
			/* undo the scaling; we can't distinguish a chopped result
			   from an exact one without futzing around to trap all in-
			   exact exceptions so as to keep the flag clear, so we just
			   punt */
			if (sw & 0x200) /* result was rounded up */
				info->res.val.q = (info->res.val.q * twom12288) * twom12288mulp;
			else
				info->res.val.q = (info->res.val.q * twom12288) * twom12288;
			__fenv_getcwsw(&cwsw);
			info->flags = (cwsw & FE_INEXACT) | FE_UNDERFLOW;
			cwsw &= ~FE_ALL_EXCEPT;
			__fenv_setcwsw(&cwsw);
		}
		else
			info->flags = FE_INEXACT;

		/* determine the operation code */
		switch (op) {
		case 0x1f0: /* f2xm1 */
		case 0x1f1: /* fyl2x */
		case 0x1f2: /* fptan */
		case 0x1f3: /* fpatan */
		case 0x1f5: /* fprem1 */
		case 0x1f8: /* fprem */
		case 0x1f9: /* fyl2xp1 */
		case 0x1fb: /* fsincos */
		case 0x1fc: /* frndint */
		case 0x1fd: /* fscale */
		case 0x1fe: /* fsin */
		case 0x1ff: /* fcos */
			info->op = fex_other;
			return;

		case 0x1fa: /* fsqrt */
			info->op = fex_sqrt;
			return;
		}

		info->op = fex_other;
		switch (op & 0x7c0) {
		case 0x000:
		case 0x040:
		case 0x080:
		case 0x0c0:
		case 0x200:
		case 0x240:
		case 0x280:
		case 0x400:
		case 0x440:
		case 0x480:
		case 0x4c0:
		case 0x600:
		case 0x640:
		case 0x680:
		case 0x6c0:
			switch (op & 0x38) {
			case 0x00:
				info->op = fex_add;
				break;

			case 0x08:
				info->op = fex_mul;
				break;

			case 0x20:
			case 0x28:
				info->op = fex_sub;
				break;

			case 0x30:
			case 0x38:
				info->op = fex_div;
				break;
			}
		}
		return;
	}

	/* for other exceptions, the operands are preserved, so we can
	   just emulate the operation with traps disabled */

	/* one operand is always in st */
	info->op1.type = fex_ldouble;
	info->op1.val.q = fpreg(uap, 0);

	/* oddball instructions */
	info->op = fex_other;
	switch (op) {
	case 0x1e4: /* ftst */
		info->op = fex_cmp;
		info->op2.type = fex_ldouble;
		info->op2.val.q = 0.0l;
		info->res.type = fex_nodata;
		c = (info->op1.val.q < info->op2.val.q);
		goto done;

	case 0x1f0: /* f2xm1 */
		info->res.type = fex_ldouble;
		info->res.val.q = f2xm1(info->op1.val.q);
		goto done;

	case 0x1f1: /* fyl2x */
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, 1);
		info->res.type = fex_ldouble;
		info->res.val.q = fyl2x(info->op1.val.q, info->op2.val.q);
		goto done;

	case 0x1f2: /* fptan */
		info->res.type = fex_ldouble;
		info->res.val.q = fptan(info->op1.val.q);
		goto done;

	case 0x1f3: /* fpatan */
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, 1);
		info->res.type = fex_ldouble;
		info->res.val.q = fpatan(info->op1.val.q, info->op2.val.q);
		goto done;

	case 0x1f4: /* fxtract */
		info->res.type = fex_ldouble;
		info->res.val.q = fxtract(info->op1.val.q);
		goto done;

	case 0x1f5: /* fprem1 */
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, 1);
		info->res.type = fex_ldouble;
		info->res.val.q = fprem1(info->op1.val.q, info->op2.val.q);
		goto done;

	case 0x1f8: /* fprem */
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, 1);
		info->res.type = fex_ldouble;
		info->res.val.q = fprem(info->op1.val.q, info->op2.val.q);
		goto done;

	case 0x1f9: /* fyl2xp1 */
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, 1);
		info->res.type = fex_ldouble;
		info->res.val.q = fyl2xp1(info->op1.val.q, info->op2.val.q);
		goto done;

	case 0x1fa: /* fsqrt */
		info->op = fex_sqrt;
		info->res.type = fex_ldouble;
		info->res.val.q = fsqrt(info->op1.val.q);
		goto done;

	case 0x1fb: /* fsincos */
		info->res.type = fex_ldouble;
		info->res.val.q = fsincos(info->op1.val.q);
		goto done;

	case 0x1fc: /* frndint */
		info->res.type = fex_ldouble;
		info->res.val.q = frndint(info->op1.val.q);
		goto done;

	case 0x1fd: /* fscale */
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, 1);
		info->res.type = fex_ldouble;
		info->res.val.q = fscale(info->op1.val.q, info->op2.val.q);
		goto done;

	case 0x1fe: /* fsin */
		info->res.type = fex_ldouble;
		info->res.val.q = fsin(info->op1.val.q);
		goto done;

	case 0x1ff: /* fcos */
		info->res.type = fex_ldouble;
		info->res.val.q = fcos(info->op1.val.q);
		goto done;

	case 0x2e9: /* fucompp */
		info->op = fex_cmp;
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, 1);
		info->res.type = fex_nodata;
		c = (info->op1.val.q == info->op2.val.q);
		goto done;
	}

	/* fucom[p], fcomi[p], fucomi[p] */
	switch (op & 0x7f8) {
	case 0x3e8:
	case 0x5e0:
	case 0x5e8:
	case 0x7e8: /* unordered compares */
		info->op = fex_cmp;
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, op & 7);
		info->res.type = fex_nodata;
		c = (info->op1.val.q == info->op2.val.q);
		goto done;

	case 0x3f0:
	case 0x7f0: /* ordered compares */
		info->op = fex_cmp;
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, op & 7);
		info->res.type = fex_nodata;
		c = (info->op1.val.q < info->op2.val.q);
		goto done;
	}

	/* all other instructions come in groups of the form
	   fadd, fmul, fcom, fcomp, fsub, fsubr, fdiv, fdivr */

	/* get the second operand */
	switch (op & 0x7c0) {
	case 0x000:
	case 0x040:
	case 0x080:
		if (!ea) {
			info->op = fex_other;
			info->op1.type = info->op2.type = info->res.type = fex_nodata;
			info->flags = 0;
			return;
		}
		info->op2.type = fex_float;
		info->op2.val.f = *(float *)ea;
		op2v = (long double) info->op2.val.f;
		break;

	case 0x0c0:
		info->op2.type = fex_ldouble;
		op2v = info->op2.val.q = fpreg(uap, op & 7);
		break;

	case 0x200:
	case 0x240:
	case 0x280:
		if (!ea) {
			info->op = fex_other;
			info->op1.type = info->op2.type = info->res.type = fex_nodata;
			info->flags = 0;
			return;
		}
		info->op2.type = fex_int;
		info->op2.val.i = *(int *)ea;
		op2v = (long double) info->op2.val.i;
		break;

	case 0x400:
	case 0x440:
	case 0x480:
		if (!ea) {
			info->op = fex_other;
			info->op1.type = info->op2.type = info->res.type = fex_nodata;
			info->flags = 0;
			return;
		}
		info->op2.type = fex_double;
		info->op2.val.d = *(double *)ea;
		op2v = (long double) info->op2.val.d;
		break;

	case 0x4c0:
	case 0x6c0:
		info->op2.type = fex_ldouble;
		info->op2.val.q = fpreg(uap, op & 7);
		t = info->op1;
		info->op1 = info->op2;
		info->op2 = t;
		op2v = info->op2.val.q;
		break;

	case 0x600:
	case 0x640:
	case 0x680:
		if (!ea) {
			info->op = fex_other;
			info->op1.type = info->op2.type = info->res.type = fex_nodata;
			info->flags = 0;
			return;
		}
		info->op2.type = fex_int;
		info->op2.val.i = *(short *)ea;
		op2v = (long double) info->op2.val.i;
		break;

	default:
		info->op = fex_other;
		info->op1.type = info->op2.type = info->res.type = fex_nodata;
		info->flags = 0;
		return;
	}

	/* distinguish different operations in the group */
	info->res.type = fex_ldouble;
	switch (op & 0x38) {
	case 0x00:
		info->op = fex_add;
		info->res.val.q = info->op1.val.q + op2v;
		break;

	case 0x08:
		info->op = fex_mul;
		info->res.val.q = info->op1.val.q * op2v;
		break;

	case 0x10:
	case 0x18:
		info->op = fex_cmp;
		info->res.type = fex_nodata;
		c = (info->op1.val.q < op2v);
		break;

	case 0x20:
		info->op = fex_sub;
		info->res.val.q = info->op1.val.q - op2v;
		break;

	case 0x28:
		info->op = fex_sub;
		info->res.val.q = op2v - info->op1.val.q;
		t = info->op1;
		info->op1 = info->op2;
		info->op2 = t;
		break;

	case 0x30:
		info->op = fex_div;
		info->res.val.q = info->op1.val.q / op2v;
		break;

	case 0x38:
		info->op = fex_div;
		info->res.val.q = op2v / info->op1.val.q;
		t = info->op1;
		info->op1 = info->op2;
		info->op2 = t;
		break;

	default:
		info->op = fex_other;
		info->op1.type = info->op2.type = info->res.type = fex_nodata;
		info->flags = 0;
		return;
	}

done:
	__fenv_getcwsw(&cwsw);
	info->flags = cwsw & FE_ALL_EXCEPT;
	cwsw &= ~FE_ALL_EXCEPT;
	__fenv_setcwsw(&cwsw);
}

/* pop the saved stack */
static void pop(ucontext_t *uap)
{
	unsigned top;

	fpreg(uap, 0) = fpreg(uap, 1);
	fpreg(uap, 1) = fpreg(uap, 2);
	fpreg(uap, 2) = fpreg(uap, 3);
	fpreg(uap, 3) = fpreg(uap, 4);
	fpreg(uap, 4) = fpreg(uap, 5);
	fpreg(uap, 5) = fpreg(uap, 6);
	fpreg(uap, 6) = fpreg(uap, 7);
#if defined(__amd64)
	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10)
		& 0xe;
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw |= (3 << top);
	top = (top + 2) & 0xe;
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw =
		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800)
		| (top << 10);
#else
	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10)
		& 0xe;
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] |= (3 << top);
	top = (top + 2) & 0xe;
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] =
		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800)
		| (top << 10);
#endif
}

/* push x onto the saved stack */
static void push(long double x, ucontext_t *uap)
{
	unsigned top;

	fpreg(uap, 7) = fpreg(uap, 6);
	fpreg(uap, 6) = fpreg(uap, 5);
	fpreg(uap, 5) = fpreg(uap, 4);
	fpreg(uap, 4) = fpreg(uap, 3);
	fpreg(uap, 3) = fpreg(uap, 2);
	fpreg(uap, 2) = fpreg(uap, 1);
	fpreg(uap, 1) = fpreg(uap, 0);
	fpreg(uap, 0) = x;
#if defined(__amd64)
	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10)
		& 0xe;
	top = (top - 2) & 0xe;
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw &= ~(3 << top);
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw =
		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800)
		| (top << 10);
#else
	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10)
		& 0xe;
	top = (top - 2) & 0xe;
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] &= ~(3 << top);
	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] =
		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800)
		| (top << 10);
#endif
}

/* scale factors for exponent wrapping */
static const float
	fun = 7.922816251e+28f,	/* 2^96 */
	fov = 1.262177448e-29f;	/* 2^-96 */
static const double
	dun = 1.552518092300708935e+231,	/* 2^768 */
	dov = 6.441148769597133308e-232;	/* 2^-768 */

/*
*  Store the specified result; if no result is given but the exception
*  is underflow or overflow, use the default trapped result
*/
void
__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info)
{
	fex_numeric_t	r;
	unsigned long		ex, op, ea, stack;

	/* get the exception type, opcode, and data address */
	ex = sip->si_code;
#if defined(__amd64)
	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16;
	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; /*???*/
#else
	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16;
	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA];
#endif

	/* if the instruction is a compare, set the condition codes
	   to unordered and update the stack */
	switch (op & 0x7f8) {
	case 0x010:
	case 0x050:
	case 0x090:
	case 0x0d0:
	case 0x210:
	case 0x250:
	case 0x290:
	case 0x410:
	case 0x450:
	case 0x490:
	case 0x4d0:
	case 0x5e0:
	case 0x610:
	case 0x650:
	case 0x690:
		/* f[u]com */
#if defined(__amd64)
		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
#else
		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
#endif
		return;

	case 0x018:
	case 0x058:
	case 0x098:
	case 0x0d8:
	case 0x218:
	case 0x258:
	case 0x298:
	case 0x418:
	case 0x458:
	case 0x498:
	case 0x4d8:
	case 0x5e8:
	case 0x618:
	case 0x658:
	case 0x698:
	case 0x6d0:
		/* f[u]comp */
#if defined(__amd64)
		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
#else
		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
#endif
		pop(uap);
		return;

	case 0x2e8:
	case 0x6d8:
		/* f[u]compp */
#if defined(__amd64)
		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
#else
		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
#endif
		pop(uap);
		pop(uap);
		return;

	case 0x1e0:
		if (op == 0x1e4) { /* ftst */
#if defined(__amd64)
			uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
#else
			uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
#endif
			return;
		}
		break;

	case 0x3e8:
	case 0x3f0:
		/* f[u]comi */
#if defined(__amd64)
		uap->uc_mcontext.gregs[REG_PS] |= 0x45;
#else
		uap->uc_mcontext.gregs[EFL] |= 0x45;
#endif
		return;

	case 0x7e8:
	case 0x7f0:
		/* f[u]comip */
#if defined(__amd64)
		uap->uc_mcontext.gregs[REG_PS] |= 0x45;
#else
		uap->uc_mcontext.gregs[EFL] |= 0x45;
#endif
		pop(uap);
		return;
	}

	/* if there is no result available and the exception is overflow
	   or underflow, use the wrapped result */
	r = info->res;
	if (r.type == fex_nodata) {
		if (ex == FPE_FLTOVF || ex == FPE_FLTUND) {
			/* for store instructions, do the scaling and store */
			switch (op & 0x7f8) {
			case 0x110:
			case 0x118:
			case 0x150:
			case 0x158:
			case 0x190:
			case 0x198:
				if (!ea)
					return;
				if (ex == FPE_FLTOVF)
					*(float *)ea = (fpreg(uap, 0) * fov) * fov;
				else
					*(float *)ea = (fpreg(uap, 0) * fun) * fun;
				if ((op & 8) != 0)
					pop(uap);
				break;

			case 0x510:
			case 0x518:
			case 0x550:
			case 0x558:
			case 0x590:
			case 0x598:
				if (!ea)
					return;
				if (ex == FPE_FLTOVF)
					*(double *)ea = (fpreg(uap, 0) * dov) * dov;
				else
					*(double *)ea = (fpreg(uap, 0) * dun) * dun;
				if ((op & 8) != 0)
					pop(uap);
				break;
			}
		}
#ifdef DEBUG
		else if (ex != FPE_FLTRES)
			printf("No result supplied, stack may be hosed\n");
#endif
		return;
	}

	/* otherwise convert the supplied result to the correct type,
	   put it in the destination, and update the stack as need be */

	/* store instructions */
	switch (op & 0x7f8) {
	case 0x110:
	case 0x118:
	case 0x150:
	case 0x158:
	case 0x190:
	case 0x198:
		if (!ea)
			return;
		switch (r.type) {
		case fex_int:
			*(float *)ea = (float) r.val.i;
			break;

		case fex_llong:
			*(float *)ea = (float) r.val.l;
			break;

		case fex_float:
			*(float *)ea = r.val.f;
			break;

		case fex_double:
			*(float *)ea = (float) r.val.d;
			break;

		case fex_ldouble:
			*(float *)ea = (float) r.val.q;
			break;

		default:
			break;
		}
		if (ex != FPE_FLTRES && (op & 8) != 0)
			pop(uap);
		return;

	case 0x310:
	case 0x318:
	case 0x350:
	case 0x358:
	case 0x390:
	case 0x398:
		if (!ea)
			return;
		switch (r.type) {
		case fex_int:
			*(int *)ea = r.val.i;
			break;

		case fex_llong:
			*(int *)ea = (int) r.val.l;
			break;

		case fex_float:
			*(int *)ea = (int) r.val.f;
			break;

		case fex_double:
			*(int *)ea = (int) r.val.d;
			break;

		case fex_ldouble:
			*(int *)ea = (int) r.val.q;
			break;

		default:
			break;
		}
		if (ex != FPE_FLTRES && (op & 8) != 0)
			pop(uap);
		return;

	case 0x510:
	case 0x518:
	case 0x550:
	case 0x558:
	case 0x590:
	case 0x598:
		if (!ea)
			return;
		switch (r.type) {
		case fex_int:
			*(double *)ea = (double) r.val.i;
			break;

		case fex_llong:
			*(double *)ea = (double) r.val.l;
			break;

		case fex_float:
			*(double *)ea = (double) r.val.f;
			break;

		case fex_double:
			*(double *)ea = r.val.d;
			break;

		case fex_ldouble:
			*(double *)ea = (double) r.val.q;
			break;

		default:
			break;
		}
		if (ex != FPE_FLTRES && (op & 8) != 0)
			pop(uap);
		return;

	case 0x710:
	case 0x718:
	case 0x750:
	case 0x758:
	case 0x790:
	case 0x798:
		if (!ea)
			return;
		switch (r.type) {
		case fex_int:
			*(short *)ea = (short) r.val.i;
			break;

		case fex_llong:
			*(short *)ea = (short) r.val.l;
			break;

		case fex_float:
			*(short *)ea = (short) r.val.f;
			break;

		case fex_double:
			*(short *)ea = (short) r.val.d;
			break;

		case fex_ldouble:
			*(short *)ea = (short) r.val.q;
			break;

		default:
			break;
		}
		if (ex != FPE_FLTRES && (op & 8) != 0)
			pop(uap);
		return;

	case 0x730:
	case 0x770:
	case 0x7b0:
		/* fbstp; don't bother */
		if (ea && ex != FPE_FLTRES)
			pop(uap);
		return;

	case 0x738:
	case 0x778:
	case 0x7b8:
		if (!ea)
			return;
		switch (r.type) {
		case fex_int:
			*(long long *)ea = (long long) r.val.i;
			break;

		case fex_llong:
			*(long long *)ea = r.val.l;
			break;

		case fex_float:
			*(long long *)ea = (long long) r.val.f;
			break;

		case fex_double:
			*(long long *)ea = (long long) r.val.d;
			break;

		case fex_ldouble:
			*(long long *)ea = (long long) r.val.q;
			break;

		default:
			break;
		}
		if (ex != FPE_FLTRES)
			pop(uap);
		return;
	}

	/* for all other instructions, the result goes into a register */
	switch (r.type) {
	case fex_int:
		r.val.q = (long double) r.val.i;
		break;

	case fex_llong:
		r.val.q = (long double) r.val.l;
		break;

	case fex_float:
		r.val.q = (long double) r.val.f;
		break;

	case fex_double:
		r.val.q = (long double) r.val.d;
		break;

	default:
		break;
	}

	/* for load instructions, push the result onto the stack */
	switch (op & 0x7f8) {
	case 0x100:
	case 0x140:
	case 0x180:
	case 0x500:
	case 0x540:
	case 0x580:
		if (ea)
			push(r.val.q, uap);
		return;
	}

	/* for all other instructions, if the exception is overflow,
	   underflow, or inexact, the stack has already been updated */
	stack = (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES);
	switch (op & 0x7f8) {
	case 0x1f0: /* oddballs */
		switch (op) {
		case 0x1f1: /* fyl2x */
		case 0x1f3: /* fpatan */
		case 0x1f9: /* fyl2xp1 */
			/* pop the stack, leaving the result in st */
			if (!stack)
				pop(uap);
			fpreg(uap, 0) = r.val.q;
			return;

		case 0x1f2: /* fpatan */
			/* fptan pushes 1.0 afterward */
			if (stack)
				fpreg(uap, 1) = r.val.q;
			else {
				fpreg(uap, 0) = r.val.q;
				push(1.0L, uap);
			}
			return;

		case 0x1f4: /* fxtract */
		case 0x1fb: /* fsincos */
			/* leave the supplied result in st */
			if (stack)
				fpreg(uap, 0) = r.val.q;
			else {
				fpreg(uap, 0) = 0.0; /* punt */
				push(r.val.q, uap);
			}
			return;
		}

		/* all others leave the stack alone and the result in st */
		fpreg(uap, 0) = r.val.q;
		return;

	case 0x4c0:
	case 0x4c8:
	case 0x4e0:
	case 0x4e8:
	case 0x4f0:
	case 0x4f8:
		fpreg(uap, op & 7) = r.val.q;
		return;

	case 0x6c0:
	case 0x6c8:
	case 0x6e0:
	case 0x6e8:
	case 0x6f0:
	case 0x6f8:
		/* stack is popped afterward */
		if (stack)
			fpreg(uap, (op - 1) & 7) = r.val.q;
		else {
			fpreg(uap, op & 7) = r.val.q;
			pop(uap);
		}
		return;

	default:
		fpreg(uap, 0) = r.val.q;
		return;
	}
}