xref: /linux/arch/sh/kernel/cpu/sh2a/fpu.c (revision d3ea9fa0a563620fe9f416f94bb8927c64390917)
174d99a5eSPaul Mundt /*
274d99a5eSPaul Mundt  * Save/restore floating point context for signal handlers.
374d99a5eSPaul Mundt  *
474d99a5eSPaul Mundt  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
574d99a5eSPaul Mundt  *
674d99a5eSPaul Mundt  * This file is subject to the terms and conditions of the GNU General Public
774d99a5eSPaul Mundt  * License.  See the file "COPYING" in the main directory of this archive
874d99a5eSPaul Mundt  * for more details.
974d99a5eSPaul Mundt  *
1074d99a5eSPaul Mundt  * FIXME! These routines can be optimized in big endian case.
1174d99a5eSPaul Mundt  */
1274d99a5eSPaul Mundt #include <linux/sched.h>
1374d99a5eSPaul Mundt #include <linux/signal.h>
1474d99a5eSPaul Mundt #include <asm/processor.h>
1574d99a5eSPaul Mundt #include <asm/io.h>
169bbafce2SPaul Mundt #include <asm/fpu.h>
1774d99a5eSPaul Mundt 
1874d99a5eSPaul Mundt /* The PR (precision) bit in the FP Status Register must be clear when
1974d99a5eSPaul Mundt  * an frchg instruction is executed, otherwise the instruction is undefined.
2074d99a5eSPaul Mundt  * Executing frchg with PR set causes a trap on some SH4 implementations.
2174d99a5eSPaul Mundt  */
2274d99a5eSPaul Mundt 
2374d99a5eSPaul Mundt #define FPSCR_RCHG 0x00000000
2474d99a5eSPaul Mundt 
2574d99a5eSPaul Mundt 
2674d99a5eSPaul Mundt /*
2774d99a5eSPaul Mundt  * Save FPU registers onto task structure.
2874d99a5eSPaul Mundt  */
2974d99a5eSPaul Mundt void
30*d3ea9fa0SStuart Menefy save_fpu(struct task_struct *tsk)
3174d99a5eSPaul Mundt {
3274d99a5eSPaul Mundt 	unsigned long dummy;
3374d99a5eSPaul Mundt 
3474d99a5eSPaul Mundt 	enable_fpu();
3574d99a5eSPaul Mundt 	asm volatile("sts.l	fpul, @-%0\n\t"
3674d99a5eSPaul Mundt 		     "sts.l	fpscr, @-%0\n\t"
3774d99a5eSPaul Mundt 		     "fmov.s	fr15, @-%0\n\t"
3874d99a5eSPaul Mundt 		     "fmov.s	fr14, @-%0\n\t"
3974d99a5eSPaul Mundt 		     "fmov.s	fr13, @-%0\n\t"
4074d99a5eSPaul Mundt 		     "fmov.s	fr12, @-%0\n\t"
4174d99a5eSPaul Mundt 		     "fmov.s	fr11, @-%0\n\t"
4274d99a5eSPaul Mundt 		     "fmov.s	fr10, @-%0\n\t"
4374d99a5eSPaul Mundt 		     "fmov.s	fr9, @-%0\n\t"
4474d99a5eSPaul Mundt 		     "fmov.s	fr8, @-%0\n\t"
4574d99a5eSPaul Mundt 		     "fmov.s	fr7, @-%0\n\t"
4674d99a5eSPaul Mundt 		     "fmov.s	fr6, @-%0\n\t"
4774d99a5eSPaul Mundt 		     "fmov.s	fr5, @-%0\n\t"
4874d99a5eSPaul Mundt 		     "fmov.s	fr4, @-%0\n\t"
4974d99a5eSPaul Mundt 		     "fmov.s	fr3, @-%0\n\t"
5074d99a5eSPaul Mundt 		     "fmov.s	fr2, @-%0\n\t"
5174d99a5eSPaul Mundt 		     "fmov.s	fr1, @-%0\n\t"
5274d99a5eSPaul Mundt 		     "fmov.s	fr0, @-%0\n\t"
5374d99a5eSPaul Mundt 		     "lds	%3, fpscr\n\t"
5474d99a5eSPaul Mundt 		     : "=r" (dummy)
5574d99a5eSPaul Mundt 		     : "0" ((char *)(&tsk->thread.fpu.hard.status)),
5674d99a5eSPaul Mundt 		       "r" (FPSCR_RCHG),
5774d99a5eSPaul Mundt 		       "r" (FPSCR_INIT)
5874d99a5eSPaul Mundt 		     : "memory");
5974d99a5eSPaul Mundt 
6074d99a5eSPaul Mundt 	disable_fpu();
6174d99a5eSPaul Mundt }
6274d99a5eSPaul Mundt 
6374d99a5eSPaul Mundt static void
6474d99a5eSPaul Mundt restore_fpu(struct task_struct *tsk)
6574d99a5eSPaul Mundt {
6674d99a5eSPaul Mundt 	unsigned long dummy;
6774d99a5eSPaul Mundt 
6874d99a5eSPaul Mundt 	enable_fpu();
6974d99a5eSPaul Mundt 	asm volatile("fmov.s	@%0+, fr0\n\t"
7074d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr1\n\t"
7174d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr2\n\t"
7274d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr3\n\t"
7374d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr4\n\t"
7474d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr5\n\t"
7574d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr6\n\t"
7674d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr7\n\t"
7774d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr8\n\t"
7874d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr9\n\t"
7974d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr10\n\t"
8074d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr11\n\t"
8174d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr12\n\t"
8274d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr13\n\t"
8374d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr14\n\t"
8474d99a5eSPaul Mundt 		     "fmov.s	@%0+, fr15\n\t"
8574d99a5eSPaul Mundt 		     "lds.l	@%0+, fpscr\n\t"
8674d99a5eSPaul Mundt 		     "lds.l	@%0+, fpul\n\t"
8774d99a5eSPaul Mundt 		     : "=r" (dummy)
8874d99a5eSPaul Mundt 		     : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
8974d99a5eSPaul Mundt 		     : "memory");
9074d99a5eSPaul Mundt 	disable_fpu();
9174d99a5eSPaul Mundt }
9274d99a5eSPaul Mundt 
9374d99a5eSPaul Mundt /*
9474d99a5eSPaul Mundt  * Load the FPU with signalling NANS.  This bit pattern we're using
9574d99a5eSPaul Mundt  * has the property that no matter wether considered as single or as
9674d99a5eSPaul Mundt  * double precission represents signaling NANS.
9774d99a5eSPaul Mundt  */
9874d99a5eSPaul Mundt 
9974d99a5eSPaul Mundt static void
10074d99a5eSPaul Mundt fpu_init(void)
10174d99a5eSPaul Mundt {
10274d99a5eSPaul Mundt 	enable_fpu();
10374d99a5eSPaul Mundt 	asm volatile("lds	%0, fpul\n\t"
10474d99a5eSPaul Mundt 		     "fsts	fpul, fr0\n\t"
10574d99a5eSPaul Mundt 		     "fsts	fpul, fr1\n\t"
10674d99a5eSPaul Mundt 		     "fsts	fpul, fr2\n\t"
10774d99a5eSPaul Mundt 		     "fsts	fpul, fr3\n\t"
10874d99a5eSPaul Mundt 		     "fsts	fpul, fr4\n\t"
10974d99a5eSPaul Mundt 		     "fsts	fpul, fr5\n\t"
11074d99a5eSPaul Mundt 		     "fsts	fpul, fr6\n\t"
11174d99a5eSPaul Mundt 		     "fsts	fpul, fr7\n\t"
11274d99a5eSPaul Mundt 		     "fsts	fpul, fr8\n\t"
11374d99a5eSPaul Mundt 		     "fsts	fpul, fr9\n\t"
11474d99a5eSPaul Mundt 		     "fsts	fpul, fr10\n\t"
11574d99a5eSPaul Mundt 		     "fsts	fpul, fr11\n\t"
11674d99a5eSPaul Mundt 		     "fsts	fpul, fr12\n\t"
11774d99a5eSPaul Mundt 		     "fsts	fpul, fr13\n\t"
11874d99a5eSPaul Mundt 		     "fsts	fpul, fr14\n\t"
11974d99a5eSPaul Mundt 		     "fsts	fpul, fr15\n\t"
12074d99a5eSPaul Mundt 		     "lds	%2, fpscr\n\t"
12174d99a5eSPaul Mundt 		     : /* no output */
12274d99a5eSPaul Mundt 		     : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
12374d99a5eSPaul Mundt 	disable_fpu();
12474d99a5eSPaul Mundt }
12574d99a5eSPaul Mundt 
12674d99a5eSPaul Mundt /*
12774d99a5eSPaul Mundt  *	Emulate arithmetic ops on denormalized number for some FPU insns.
12874d99a5eSPaul Mundt  */
12974d99a5eSPaul Mundt 
13074d99a5eSPaul Mundt /* denormalized float * float */
13174d99a5eSPaul Mundt static int denormal_mulf(int hx, int hy)
13274d99a5eSPaul Mundt {
13374d99a5eSPaul Mundt 	unsigned int ix, iy;
13474d99a5eSPaul Mundt 	unsigned long long m, n;
13574d99a5eSPaul Mundt 	int exp, w;
13674d99a5eSPaul Mundt 
13774d99a5eSPaul Mundt 	ix = hx & 0x7fffffff;
13874d99a5eSPaul Mundt 	iy = hy & 0x7fffffff;
13974d99a5eSPaul Mundt 	if (iy < 0x00800000 || ix == 0)
14074d99a5eSPaul Mundt 		return ((hx ^ hy) & 0x80000000);
14174d99a5eSPaul Mundt 
14274d99a5eSPaul Mundt 	exp = (iy & 0x7f800000) >> 23;
14374d99a5eSPaul Mundt 	ix &= 0x007fffff;
14474d99a5eSPaul Mundt 	iy = (iy & 0x007fffff) | 0x00800000;
14574d99a5eSPaul Mundt 	m = (unsigned long long)ix * iy;
14674d99a5eSPaul Mundt 	n = m;
14774d99a5eSPaul Mundt 	w = -1;
14874d99a5eSPaul Mundt 	while (n) { n >>= 1; w++; }
14974d99a5eSPaul Mundt 
15074d99a5eSPaul Mundt 	/* FIXME: use guard bits */
15174d99a5eSPaul Mundt 	exp += w - 126 - 46;
15274d99a5eSPaul Mundt 	if (exp > 0)
15374d99a5eSPaul Mundt 		ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
15474d99a5eSPaul Mundt 	else if (exp + 22 >= 0)
15574d99a5eSPaul Mundt 		ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
15674d99a5eSPaul Mundt 	else
15774d99a5eSPaul Mundt 		ix = 0;
15874d99a5eSPaul Mundt 
15974d99a5eSPaul Mundt 	ix |= (hx ^ hy) & 0x80000000;
16074d99a5eSPaul Mundt 	return ix;
16174d99a5eSPaul Mundt }
16274d99a5eSPaul Mundt 
16374d99a5eSPaul Mundt /* denormalized double * double */
16474d99a5eSPaul Mundt static void mult64(unsigned long long x, unsigned long long y,
16574d99a5eSPaul Mundt 		unsigned long long *highp, unsigned long long *lowp)
16674d99a5eSPaul Mundt {
16774d99a5eSPaul Mundt 	unsigned long long sub0, sub1, sub2, sub3;
16874d99a5eSPaul Mundt 	unsigned long long high, low;
16974d99a5eSPaul Mundt 
17074d99a5eSPaul Mundt 	sub0 = (x >> 32) * (unsigned long) (y >> 32);
17174d99a5eSPaul Mundt 	sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
17274d99a5eSPaul Mundt 	sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
17374d99a5eSPaul Mundt 	sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
17474d99a5eSPaul Mundt 	low = sub3;
17574d99a5eSPaul Mundt 	high = 0LL;
17674d99a5eSPaul Mundt 	sub3 += (sub1 << 32);
17774d99a5eSPaul Mundt 	if (low > sub3)
17874d99a5eSPaul Mundt 		high++;
17974d99a5eSPaul Mundt 	low = sub3;
18074d99a5eSPaul Mundt 	sub3 += (sub2 << 32);
18174d99a5eSPaul Mundt 	if (low > sub3)
18274d99a5eSPaul Mundt 		high++;
18374d99a5eSPaul Mundt 	low = sub3;
18474d99a5eSPaul Mundt 	high += (sub1 >> 32) + (sub2 >> 32);
18574d99a5eSPaul Mundt 	high += sub0;
18674d99a5eSPaul Mundt 	*lowp = low;
18774d99a5eSPaul Mundt 	*highp = high;
18874d99a5eSPaul Mundt }
18974d99a5eSPaul Mundt 
19074d99a5eSPaul Mundt static inline long long rshift64(unsigned long long mh,
19174d99a5eSPaul Mundt 		unsigned long long ml, int n)
19274d99a5eSPaul Mundt {
19374d99a5eSPaul Mundt 	if (n >= 64)
19474d99a5eSPaul Mundt 		return mh >> (n - 64);
19574d99a5eSPaul Mundt 	return (mh << (64 - n)) | (ml >> n);
19674d99a5eSPaul Mundt }
19774d99a5eSPaul Mundt 
19874d99a5eSPaul Mundt static long long denormal_muld(long long hx, long long hy)
19974d99a5eSPaul Mundt {
20074d99a5eSPaul Mundt 	unsigned long long ix, iy;
20174d99a5eSPaul Mundt 	unsigned long long mh, ml, nh, nl;
20274d99a5eSPaul Mundt 	int exp, w;
20374d99a5eSPaul Mundt 
20474d99a5eSPaul Mundt 	ix = hx & 0x7fffffffffffffffLL;
20574d99a5eSPaul Mundt 	iy = hy & 0x7fffffffffffffffLL;
20674d99a5eSPaul Mundt 	if (iy < 0x0010000000000000LL || ix == 0)
20774d99a5eSPaul Mundt 		return ((hx ^ hy) & 0x8000000000000000LL);
20874d99a5eSPaul Mundt 
20974d99a5eSPaul Mundt 	exp = (iy & 0x7ff0000000000000LL) >> 52;
21074d99a5eSPaul Mundt 	ix &= 0x000fffffffffffffLL;
21174d99a5eSPaul Mundt 	iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
21274d99a5eSPaul Mundt 	mult64(ix, iy, &mh, &ml);
21374d99a5eSPaul Mundt 	nh = mh;
21474d99a5eSPaul Mundt 	nl = ml;
21574d99a5eSPaul Mundt 	w = -1;
21674d99a5eSPaul Mundt 	if (nh) {
21774d99a5eSPaul Mundt 		while (nh) { nh >>= 1; w++;}
21874d99a5eSPaul Mundt 		w += 64;
21974d99a5eSPaul Mundt 	} else
22074d99a5eSPaul Mundt 		while (nl) { nl >>= 1; w++;}
22174d99a5eSPaul Mundt 
22274d99a5eSPaul Mundt 	/* FIXME: use guard bits */
22374d99a5eSPaul Mundt 	exp += w - 1022 - 52 * 2;
22474d99a5eSPaul Mundt 	if (exp > 0)
22574d99a5eSPaul Mundt 		ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
22674d99a5eSPaul Mundt 			| ((long long)exp << 52);
22774d99a5eSPaul Mundt 	else if (exp + 51 >= 0)
22874d99a5eSPaul Mundt 		ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
22974d99a5eSPaul Mundt 	else
23074d99a5eSPaul Mundt 		ix = 0;
23174d99a5eSPaul Mundt 
23274d99a5eSPaul Mundt 	ix |= (hx ^ hy) & 0x8000000000000000LL;
23374d99a5eSPaul Mundt 	return ix;
23474d99a5eSPaul Mundt }
23574d99a5eSPaul Mundt 
23674d99a5eSPaul Mundt /* ix - iy where iy: denormal and ix, iy >= 0 */
23774d99a5eSPaul Mundt static int denormal_subf1(unsigned int ix, unsigned int iy)
23874d99a5eSPaul Mundt {
23974d99a5eSPaul Mundt 	int frac;
24074d99a5eSPaul Mundt 	int exp;
24174d99a5eSPaul Mundt 
24274d99a5eSPaul Mundt 	if (ix < 0x00800000)
24374d99a5eSPaul Mundt 		return ix - iy;
24474d99a5eSPaul Mundt 
24574d99a5eSPaul Mundt 	exp = (ix & 0x7f800000) >> 23;
24674d99a5eSPaul Mundt 	if (exp - 1 > 31)
24774d99a5eSPaul Mundt 		return ix;
24874d99a5eSPaul Mundt 	iy >>= exp - 1;
24974d99a5eSPaul Mundt 	if (iy == 0)
25074d99a5eSPaul Mundt 		return ix;
25174d99a5eSPaul Mundt 
25274d99a5eSPaul Mundt 	frac = (ix & 0x007fffff) | 0x00800000;
25374d99a5eSPaul Mundt 	frac -= iy;
25474d99a5eSPaul Mundt 	while (frac < 0x00800000) {
25574d99a5eSPaul Mundt 		if (--exp == 0)
25674d99a5eSPaul Mundt 			return frac;
25774d99a5eSPaul Mundt 		frac <<= 1;
25874d99a5eSPaul Mundt 	}
25974d99a5eSPaul Mundt 
26074d99a5eSPaul Mundt 	return (exp << 23) | (frac & 0x007fffff);
26174d99a5eSPaul Mundt }
26274d99a5eSPaul Mundt 
26374d99a5eSPaul Mundt /* ix + iy where iy: denormal and ix, iy >= 0 */
26474d99a5eSPaul Mundt static int denormal_addf1(unsigned int ix, unsigned int iy)
26574d99a5eSPaul Mundt {
26674d99a5eSPaul Mundt 	int frac;
26774d99a5eSPaul Mundt 	int exp;
26874d99a5eSPaul Mundt 
26974d99a5eSPaul Mundt 	if (ix < 0x00800000)
27074d99a5eSPaul Mundt 		return ix + iy;
27174d99a5eSPaul Mundt 
27274d99a5eSPaul Mundt 	exp = (ix & 0x7f800000) >> 23;
27374d99a5eSPaul Mundt 	if (exp - 1 > 31)
27474d99a5eSPaul Mundt 		return ix;
27574d99a5eSPaul Mundt 	iy >>= exp - 1;
27674d99a5eSPaul Mundt 	if (iy == 0)
27774d99a5eSPaul Mundt 	  return ix;
27874d99a5eSPaul Mundt 
27974d99a5eSPaul Mundt 	frac = (ix & 0x007fffff) | 0x00800000;
28074d99a5eSPaul Mundt 	frac += iy;
28174d99a5eSPaul Mundt 	if (frac >= 0x01000000) {
28274d99a5eSPaul Mundt 		frac >>= 1;
28374d99a5eSPaul Mundt 		++exp;
28474d99a5eSPaul Mundt 	}
28574d99a5eSPaul Mundt 
28674d99a5eSPaul Mundt 	return (exp << 23) | (frac & 0x007fffff);
28774d99a5eSPaul Mundt }
28874d99a5eSPaul Mundt 
28974d99a5eSPaul Mundt static int denormal_addf(int hx, int hy)
29074d99a5eSPaul Mundt {
29174d99a5eSPaul Mundt 	unsigned int ix, iy;
29274d99a5eSPaul Mundt 	int sign;
29374d99a5eSPaul Mundt 
29474d99a5eSPaul Mundt 	if ((hx ^ hy) & 0x80000000) {
29574d99a5eSPaul Mundt 		sign = hx & 0x80000000;
29674d99a5eSPaul Mundt 		ix = hx & 0x7fffffff;
29774d99a5eSPaul Mundt 		iy = hy & 0x7fffffff;
29874d99a5eSPaul Mundt 		if (iy < 0x00800000) {
29974d99a5eSPaul Mundt 			ix = denormal_subf1(ix, iy);
3009731e287SRoel Kluin 			if ((int) ix < 0) {
30174d99a5eSPaul Mundt 				ix = -ix;
30274d99a5eSPaul Mundt 				sign ^= 0x80000000;
30374d99a5eSPaul Mundt 			}
30474d99a5eSPaul Mundt 		} else {
30574d99a5eSPaul Mundt 			ix = denormal_subf1(iy, ix);
30674d99a5eSPaul Mundt 			sign ^= 0x80000000;
30774d99a5eSPaul Mundt 		}
30874d99a5eSPaul Mundt 	} else {
30974d99a5eSPaul Mundt 		sign = hx & 0x80000000;
31074d99a5eSPaul Mundt 		ix = hx & 0x7fffffff;
31174d99a5eSPaul Mundt 		iy = hy & 0x7fffffff;
31274d99a5eSPaul Mundt 		if (iy < 0x00800000)
31374d99a5eSPaul Mundt 			ix = denormal_addf1(ix, iy);
31474d99a5eSPaul Mundt 		else
31574d99a5eSPaul Mundt 			ix = denormal_addf1(iy, ix);
31674d99a5eSPaul Mundt 	}
31774d99a5eSPaul Mundt 
31874d99a5eSPaul Mundt 	return sign | ix;
31974d99a5eSPaul Mundt }
32074d99a5eSPaul Mundt 
32174d99a5eSPaul Mundt /* ix - iy where iy: denormal and ix, iy >= 0 */
32274d99a5eSPaul Mundt static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
32374d99a5eSPaul Mundt {
32474d99a5eSPaul Mundt 	long long frac;
32574d99a5eSPaul Mundt 	int exp;
32674d99a5eSPaul Mundt 
32774d99a5eSPaul Mundt 	if (ix < 0x0010000000000000LL)
32874d99a5eSPaul Mundt 		return ix - iy;
32974d99a5eSPaul Mundt 
33074d99a5eSPaul Mundt 	exp = (ix & 0x7ff0000000000000LL) >> 52;
33174d99a5eSPaul Mundt 	if (exp - 1 > 63)
33274d99a5eSPaul Mundt 		return ix;
33374d99a5eSPaul Mundt 	iy >>= exp - 1;
33474d99a5eSPaul Mundt 	if (iy == 0)
33574d99a5eSPaul Mundt 		return ix;
33674d99a5eSPaul Mundt 
33774d99a5eSPaul Mundt 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
33874d99a5eSPaul Mundt 	frac -= iy;
33974d99a5eSPaul Mundt 	while (frac < 0x0010000000000000LL) {
34074d99a5eSPaul Mundt 		if (--exp == 0)
34174d99a5eSPaul Mundt 			return frac;
34274d99a5eSPaul Mundt 		frac <<= 1;
34374d99a5eSPaul Mundt 	}
34474d99a5eSPaul Mundt 
34574d99a5eSPaul Mundt 	return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
34674d99a5eSPaul Mundt }
34774d99a5eSPaul Mundt 
34874d99a5eSPaul Mundt /* ix + iy where iy: denormal and ix, iy >= 0 */
34974d99a5eSPaul Mundt static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
35074d99a5eSPaul Mundt {
35174d99a5eSPaul Mundt 	long long frac;
35274d99a5eSPaul Mundt 	long long exp;
35374d99a5eSPaul Mundt 
35474d99a5eSPaul Mundt 	if (ix < 0x0010000000000000LL)
35574d99a5eSPaul Mundt 		return ix + iy;
35674d99a5eSPaul Mundt 
35774d99a5eSPaul Mundt 	exp = (ix & 0x7ff0000000000000LL) >> 52;
35874d99a5eSPaul Mundt 	if (exp - 1 > 63)
35974d99a5eSPaul Mundt 		return ix;
36074d99a5eSPaul Mundt 	iy >>= exp - 1;
36174d99a5eSPaul Mundt 	if (iy == 0)
36274d99a5eSPaul Mundt 	  return ix;
36374d99a5eSPaul Mundt 
36474d99a5eSPaul Mundt 	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
36574d99a5eSPaul Mundt 	frac += iy;
36674d99a5eSPaul Mundt 	if (frac >= 0x0020000000000000LL) {
36774d99a5eSPaul Mundt 		frac >>= 1;
36874d99a5eSPaul Mundt 		++exp;
36974d99a5eSPaul Mundt 	}
37074d99a5eSPaul Mundt 
37174d99a5eSPaul Mundt 	return (exp << 52) | (frac & 0x000fffffffffffffLL);
37274d99a5eSPaul Mundt }
37374d99a5eSPaul Mundt 
37474d99a5eSPaul Mundt static long long denormal_addd(long long hx, long long hy)
37574d99a5eSPaul Mundt {
37674d99a5eSPaul Mundt 	unsigned long long ix, iy;
37774d99a5eSPaul Mundt 	long long sign;
37874d99a5eSPaul Mundt 
37974d99a5eSPaul Mundt 	if ((hx ^ hy) & 0x8000000000000000LL) {
38074d99a5eSPaul Mundt 		sign = hx & 0x8000000000000000LL;
38174d99a5eSPaul Mundt 		ix = hx & 0x7fffffffffffffffLL;
38274d99a5eSPaul Mundt 		iy = hy & 0x7fffffffffffffffLL;
38374d99a5eSPaul Mundt 		if (iy < 0x0010000000000000LL) {
38474d99a5eSPaul Mundt 			ix = denormal_subd1(ix, iy);
3859731e287SRoel Kluin 			if ((int) ix < 0) {
38674d99a5eSPaul Mundt 				ix = -ix;
38774d99a5eSPaul Mundt 				sign ^= 0x8000000000000000LL;
38874d99a5eSPaul Mundt 			}
38974d99a5eSPaul Mundt 		} else {
39074d99a5eSPaul Mundt 			ix = denormal_subd1(iy, ix);
39174d99a5eSPaul Mundt 			sign ^= 0x8000000000000000LL;
39274d99a5eSPaul Mundt 		}
39374d99a5eSPaul Mundt 	} else {
39474d99a5eSPaul Mundt 		sign = hx & 0x8000000000000000LL;
39574d99a5eSPaul Mundt 		ix = hx & 0x7fffffffffffffffLL;
39674d99a5eSPaul Mundt 		iy = hy & 0x7fffffffffffffffLL;
39774d99a5eSPaul Mundt 		if (iy < 0x0010000000000000LL)
39874d99a5eSPaul Mundt 			ix = denormal_addd1(ix, iy);
39974d99a5eSPaul Mundt 		else
40074d99a5eSPaul Mundt 			ix = denormal_addd1(iy, ix);
40174d99a5eSPaul Mundt 	}
40274d99a5eSPaul Mundt 
40374d99a5eSPaul Mundt 	return sign | ix;
40474d99a5eSPaul Mundt }
40574d99a5eSPaul Mundt 
40674d99a5eSPaul Mundt /**
40774d99a5eSPaul Mundt  *	denormal_to_double - Given denormalized float number,
40874d99a5eSPaul Mundt  *	                     store double float
40974d99a5eSPaul Mundt  *
41074d99a5eSPaul Mundt  *	@fpu: Pointer to sh_fpu_hard structure
41174d99a5eSPaul Mundt  *	@n: Index to FP register
41274d99a5eSPaul Mundt  */
41374d99a5eSPaul Mundt static void
41474d99a5eSPaul Mundt denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
41574d99a5eSPaul Mundt {
41674d99a5eSPaul Mundt 	unsigned long du, dl;
41774d99a5eSPaul Mundt 	unsigned long x = fpu->fpul;
41874d99a5eSPaul Mundt 	int exp = 1023 - 126;
41974d99a5eSPaul Mundt 
42074d99a5eSPaul Mundt 	if (x != 0 && (x & 0x7f800000) == 0) {
42174d99a5eSPaul Mundt 		du = (x & 0x80000000);
42274d99a5eSPaul Mundt 		while ((x & 0x00800000) == 0) {
42374d99a5eSPaul Mundt 			x <<= 1;
42474d99a5eSPaul Mundt 			exp--;
42574d99a5eSPaul Mundt 		}
42674d99a5eSPaul Mundt 		x &= 0x007fffff;
42774d99a5eSPaul Mundt 		du |= (exp << 20) | (x >> 3);
42874d99a5eSPaul Mundt 		dl = x << 29;
42974d99a5eSPaul Mundt 
43074d99a5eSPaul Mundt 		fpu->fp_regs[n] = du;
43174d99a5eSPaul Mundt 		fpu->fp_regs[n+1] = dl;
43274d99a5eSPaul Mundt 	}
43374d99a5eSPaul Mundt }
43474d99a5eSPaul Mundt 
43574d99a5eSPaul Mundt /**
43674d99a5eSPaul Mundt  *	ieee_fpe_handler - Handle denormalized number exception
43774d99a5eSPaul Mundt  *
43874d99a5eSPaul Mundt  *	@regs: Pointer to register structure
43974d99a5eSPaul Mundt  *
44074d99a5eSPaul Mundt  *	Returns 1 when it's handled (should not cause exception).
44174d99a5eSPaul Mundt  */
44274d99a5eSPaul Mundt static int
44374d99a5eSPaul Mundt ieee_fpe_handler (struct pt_regs *regs)
44474d99a5eSPaul Mundt {
44574d99a5eSPaul Mundt 	unsigned short insn = *(unsigned short *) regs->pc;
44674d99a5eSPaul Mundt 	unsigned short finsn;
44774d99a5eSPaul Mundt 	unsigned long nextpc;
44874d99a5eSPaul Mundt 	int nib[4] = {
44974d99a5eSPaul Mundt 		(insn >> 12) & 0xf,
45074d99a5eSPaul Mundt 		(insn >> 8) & 0xf,
45174d99a5eSPaul Mundt 		(insn >> 4) & 0xf,
45274d99a5eSPaul Mundt 		insn & 0xf};
45374d99a5eSPaul Mundt 
45474d99a5eSPaul Mundt 	if (nib[0] == 0xb ||
45574d99a5eSPaul Mundt 	    (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
45674d99a5eSPaul Mundt 		regs->pr = regs->pc + 4;
45774d99a5eSPaul Mundt 	if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
45874d99a5eSPaul Mundt 		nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
45974d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
46074d99a5eSPaul Mundt 	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
46174d99a5eSPaul Mundt 		if (regs->sr & 1)
46274d99a5eSPaul Mundt 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
46374d99a5eSPaul Mundt 		else
46474d99a5eSPaul Mundt 			nextpc = regs->pc + 4;
46574d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
46674d99a5eSPaul Mundt 	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
46774d99a5eSPaul Mundt 		if (regs->sr & 1)
46874d99a5eSPaul Mundt 			nextpc = regs->pc + 4;
46974d99a5eSPaul Mundt 		else
47074d99a5eSPaul Mundt 			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
47174d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
47274d99a5eSPaul Mundt 	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
47374d99a5eSPaul Mundt 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
47474d99a5eSPaul Mundt 		nextpc = regs->regs[nib[1]];
47574d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
47674d99a5eSPaul Mundt 	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
47774d99a5eSPaul Mundt 		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
47874d99a5eSPaul Mundt 		nextpc = regs->pc + 4 + regs->regs[nib[1]];
47974d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
48074d99a5eSPaul Mundt 	} else if (insn == 0x000b) { /* rts */
48174d99a5eSPaul Mundt 		nextpc = regs->pr;
48274d99a5eSPaul Mundt 		finsn = *(unsigned short *) (regs->pc + 2);
48374d99a5eSPaul Mundt 	} else {
48474d99a5eSPaul Mundt 		nextpc = regs->pc + 2;
48574d99a5eSPaul Mundt 		finsn = insn;
48674d99a5eSPaul Mundt 	}
48774d99a5eSPaul Mundt 
48874d99a5eSPaul Mundt #define FPSCR_FPU_ERROR (1 << 17)
48974d99a5eSPaul Mundt 
49074d99a5eSPaul Mundt 	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
49174d99a5eSPaul Mundt 		struct task_struct *tsk = current;
49274d99a5eSPaul Mundt 
49374d99a5eSPaul Mundt 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
49474d99a5eSPaul Mundt 			/* FPU error */
49574d99a5eSPaul Mundt 			denormal_to_double (&tsk->thread.fpu.hard,
49674d99a5eSPaul Mundt 					    (finsn >> 8) & 0xf);
49774d99a5eSPaul Mundt 		} else
49874d99a5eSPaul Mundt 			return 0;
49974d99a5eSPaul Mundt 
50074d99a5eSPaul Mundt 		regs->pc = nextpc;
50174d99a5eSPaul Mundt 		return 1;
50274d99a5eSPaul Mundt 	} else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
50374d99a5eSPaul Mundt 		struct task_struct *tsk = current;
50474d99a5eSPaul Mundt 		int fpscr;
50574d99a5eSPaul Mundt 		int n, m, prec;
50674d99a5eSPaul Mundt 		unsigned int hx, hy;
50774d99a5eSPaul Mundt 
50874d99a5eSPaul Mundt 		n = (finsn >> 8) & 0xf;
50974d99a5eSPaul Mundt 		m = (finsn >> 4) & 0xf;
51074d99a5eSPaul Mundt 		hx = tsk->thread.fpu.hard.fp_regs[n];
51174d99a5eSPaul Mundt 		hy = tsk->thread.fpu.hard.fp_regs[m];
51274d99a5eSPaul Mundt 		fpscr = tsk->thread.fpu.hard.fpscr;
51374d99a5eSPaul Mundt 		prec = fpscr & (1 << 19);
51474d99a5eSPaul Mundt 
51574d99a5eSPaul Mundt 		if ((fpscr & FPSCR_FPU_ERROR)
51674d99a5eSPaul Mundt 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
51774d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00100000))) {
51874d99a5eSPaul Mundt 			long long llx, lly;
51974d99a5eSPaul Mundt 
52074d99a5eSPaul Mundt 			/* FPU error because of denormal */
52174d99a5eSPaul Mundt 			llx = ((long long) hx << 32)
52274d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[n+1];
52374d99a5eSPaul Mundt 			lly = ((long long) hy << 32)
52474d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[m+1];
52574d99a5eSPaul Mundt 			if ((hx & 0x7fffffff) >= 0x00100000)
52674d99a5eSPaul Mundt 				llx = denormal_muld(lly, llx);
52774d99a5eSPaul Mundt 			else
52874d99a5eSPaul Mundt 				llx = denormal_muld(llx, lly);
52974d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
53074d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
53174d99a5eSPaul Mundt 		} else if ((fpscr & FPSCR_FPU_ERROR)
53274d99a5eSPaul Mundt 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
53374d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00800000))) {
53474d99a5eSPaul Mundt 			/* FPU error because of denormal */
53574d99a5eSPaul Mundt 			if ((hx & 0x7fffffff) >= 0x00800000)
53674d99a5eSPaul Mundt 				hx = denormal_mulf(hy, hx);
53774d99a5eSPaul Mundt 			else
53874d99a5eSPaul Mundt 				hx = denormal_mulf(hx, hy);
53974d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = hx;
54074d99a5eSPaul Mundt 		} else
54174d99a5eSPaul Mundt 			return 0;
54274d99a5eSPaul Mundt 
54374d99a5eSPaul Mundt 		regs->pc = nextpc;
54474d99a5eSPaul Mundt 		return 1;
54574d99a5eSPaul Mundt 	} else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
54674d99a5eSPaul Mundt 		struct task_struct *tsk = current;
54774d99a5eSPaul Mundt 		int fpscr;
54874d99a5eSPaul Mundt 		int n, m, prec;
54974d99a5eSPaul Mundt 		unsigned int hx, hy;
55074d99a5eSPaul Mundt 
55174d99a5eSPaul Mundt 		n = (finsn >> 8) & 0xf;
55274d99a5eSPaul Mundt 		m = (finsn >> 4) & 0xf;
55374d99a5eSPaul Mundt 		hx = tsk->thread.fpu.hard.fp_regs[n];
55474d99a5eSPaul Mundt 		hy = tsk->thread.fpu.hard.fp_regs[m];
55574d99a5eSPaul Mundt 		fpscr = tsk->thread.fpu.hard.fpscr;
55674d99a5eSPaul Mundt 		prec = fpscr & (1 << 19);
55774d99a5eSPaul Mundt 
55874d99a5eSPaul Mundt 		if ((fpscr & FPSCR_FPU_ERROR)
55974d99a5eSPaul Mundt 		     && (prec && ((hx & 0x7fffffff) < 0x00100000
56074d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00100000))) {
56174d99a5eSPaul Mundt 			long long llx, lly;
56274d99a5eSPaul Mundt 
56374d99a5eSPaul Mundt 			/* FPU error because of denormal */
56474d99a5eSPaul Mundt 			llx = ((long long) hx << 32)
56574d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[n+1];
56674d99a5eSPaul Mundt 			lly = ((long long) hy << 32)
56774d99a5eSPaul Mundt 			       | tsk->thread.fpu.hard.fp_regs[m+1];
56874d99a5eSPaul Mundt 			if ((finsn & 0xf00f) == 0xf000)
56974d99a5eSPaul Mundt 				llx = denormal_addd(llx, lly);
57074d99a5eSPaul Mundt 			else
57174d99a5eSPaul Mundt 				llx = denormal_addd(llx, lly ^ (1LL << 63));
57274d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
57374d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
57474d99a5eSPaul Mundt 		} else if ((fpscr & FPSCR_FPU_ERROR)
57574d99a5eSPaul Mundt 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
57674d99a5eSPaul Mundt 				   || (hy & 0x7fffffff) < 0x00800000))) {
57774d99a5eSPaul Mundt 			/* FPU error because of denormal */
57874d99a5eSPaul Mundt 			if ((finsn & 0xf00f) == 0xf000)
57974d99a5eSPaul Mundt 				hx = denormal_addf(hx, hy);
58074d99a5eSPaul Mundt 			else
58174d99a5eSPaul Mundt 				hx = denormal_addf(hx, hy ^ 0x80000000);
58274d99a5eSPaul Mundt 			tsk->thread.fpu.hard.fp_regs[n] = hx;
58374d99a5eSPaul Mundt 		} else
58474d99a5eSPaul Mundt 			return 0;
58574d99a5eSPaul Mundt 
58674d99a5eSPaul Mundt 		regs->pc = nextpc;
58774d99a5eSPaul Mundt 		return 1;
58874d99a5eSPaul Mundt 	}
58974d99a5eSPaul Mundt 
59074d99a5eSPaul Mundt 	return 0;
59174d99a5eSPaul Mundt }
59274d99a5eSPaul Mundt 
59374d99a5eSPaul Mundt BUILD_TRAP_HANDLER(fpu_error)
59474d99a5eSPaul Mundt {
59574d99a5eSPaul Mundt 	struct task_struct *tsk = current;
59674d99a5eSPaul Mundt 	TRAP_HANDLER_DECL;
59774d99a5eSPaul Mundt 
598*d3ea9fa0SStuart Menefy 	__unlazy_fpu(tsk, regs);
59974d99a5eSPaul Mundt 	if (ieee_fpe_handler(regs)) {
60074d99a5eSPaul Mundt 		tsk->thread.fpu.hard.fpscr &=
60174d99a5eSPaul Mundt 			~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
60274d99a5eSPaul Mundt 		grab_fpu(regs);
60374d99a5eSPaul Mundt 		restore_fpu(tsk);
604*d3ea9fa0SStuart Menefy 		task_thread_info(tsk)->status |= TS_USEDFPU;
60574d99a5eSPaul Mundt 		return;
60674d99a5eSPaul Mundt 	}
60774d99a5eSPaul Mundt 
60874d99a5eSPaul Mundt 	force_sig(SIGFPE, tsk);
60974d99a5eSPaul Mundt }
61074d99a5eSPaul Mundt 
61174d99a5eSPaul Mundt BUILD_TRAP_HANDLER(fpu_state_restore)
61274d99a5eSPaul Mundt {
61374d99a5eSPaul Mundt 	struct task_struct *tsk = current;
61474d99a5eSPaul Mundt 	TRAP_HANDLER_DECL;
61574d99a5eSPaul Mundt 
61674d99a5eSPaul Mundt 	grab_fpu(regs);
61774d99a5eSPaul Mundt 	if (!user_mode(regs)) {
61874d99a5eSPaul Mundt 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
61974d99a5eSPaul Mundt 		return;
62074d99a5eSPaul Mundt 	}
62174d99a5eSPaul Mundt 
62274d99a5eSPaul Mundt 	if (used_math()) {
62374d99a5eSPaul Mundt 		/* Using the FPU again.  */
62474d99a5eSPaul Mundt 		restore_fpu(tsk);
62574d99a5eSPaul Mundt 	} else	{
62674d99a5eSPaul Mundt 		/* First time FPU user.  */
62774d99a5eSPaul Mundt 		fpu_init();
62874d99a5eSPaul Mundt 		set_used_math();
62974d99a5eSPaul Mundt 	}
630*d3ea9fa0SStuart Menefy 	task_thread_info(tsk)->status |= TS_USEDFPU;
63174d99a5eSPaul Mundt }
632