xref: /freebsd/sys/dev/syscons/plasma/fp16.c (revision a50b01d224ab38985b9d47fe2e27a4ae546fc62b)
1*a50b01d2SDag-Erling Smørgrav /*-
2*a50b01d2SDag-Erling Smørgrav  * Copyright (c) 2015 Dag-Erling Smørgrav
3*a50b01d2SDag-Erling Smørgrav  * All rights reserved.
4*a50b01d2SDag-Erling Smørgrav  *
5*a50b01d2SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
6*a50b01d2SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
7*a50b01d2SDag-Erling Smørgrav  * are met:
8*a50b01d2SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
9*a50b01d2SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer.
10*a50b01d2SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
11*a50b01d2SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
12*a50b01d2SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
13*a50b01d2SDag-Erling Smørgrav  *
14*a50b01d2SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15*a50b01d2SDag-Erling Smørgrav  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*a50b01d2SDag-Erling Smørgrav  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*a50b01d2SDag-Erling Smørgrav  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18*a50b01d2SDag-Erling Smørgrav  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19*a50b01d2SDag-Erling Smørgrav  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20*a50b01d2SDag-Erling Smørgrav  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21*a50b01d2SDag-Erling Smørgrav  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22*a50b01d2SDag-Erling Smørgrav  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23*a50b01d2SDag-Erling Smørgrav  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24*a50b01d2SDag-Erling Smørgrav  * SUCH DAMAGE.
25*a50b01d2SDag-Erling Smørgrav  *
26*a50b01d2SDag-Erling Smørgrav  * $FreeBSD$
27*a50b01d2SDag-Erling Smørgrav  */
28*a50b01d2SDag-Erling Smørgrav 
29*a50b01d2SDag-Erling Smørgrav #ifdef _KERNEL
30*a50b01d2SDag-Erling Smørgrav #include <sys/libkern.h>
31*a50b01d2SDag-Erling Smørgrav #else
32*a50b01d2SDag-Erling Smørgrav #include <stdio.h>
33*a50b01d2SDag-Erling Smørgrav #include <strings.h>
34*a50b01d2SDag-Erling Smørgrav #endif
35*a50b01d2SDag-Erling Smørgrav 
36*a50b01d2SDag-Erling Smørgrav #include "fp16.h"
37*a50b01d2SDag-Erling Smørgrav 
38*a50b01d2SDag-Erling Smørgrav /*
39*a50b01d2SDag-Erling Smørgrav  * Compute the quare root of x, using Newton's method with 2^(log2(x)/2)
40*a50b01d2SDag-Erling Smørgrav  * as the initial estimate.
41*a50b01d2SDag-Erling Smørgrav  */
42*a50b01d2SDag-Erling Smørgrav fp16_t
43*a50b01d2SDag-Erling Smørgrav fp16_sqrt(fp16_t x)
44*a50b01d2SDag-Erling Smørgrav {
45*a50b01d2SDag-Erling Smørgrav 	fp16_t y, delta;
46*a50b01d2SDag-Erling Smørgrav 	signed int log2x;
47*a50b01d2SDag-Erling Smørgrav 
48*a50b01d2SDag-Erling Smørgrav 	/* special case */
49*a50b01d2SDag-Erling Smørgrav 	if (x == 0)
50*a50b01d2SDag-Erling Smørgrav 		return (0);
51*a50b01d2SDag-Erling Smørgrav 
52*a50b01d2SDag-Erling Smørgrav 	/* shift toward 0 by half the logarithm */
53*a50b01d2SDag-Erling Smørgrav 	log2x = flsl(x) - 1;
54*a50b01d2SDag-Erling Smørgrav 	if (log2x >= 16) {
55*a50b01d2SDag-Erling Smørgrav 		y = x >> (log2x - 16) / 2;
56*a50b01d2SDag-Erling Smørgrav 	} else {
57*a50b01d2SDag-Erling Smørgrav #if 0
58*a50b01d2SDag-Erling Smørgrav 		y = x << (16 - log2x) / 2;
59*a50b01d2SDag-Erling Smørgrav #else
60*a50b01d2SDag-Erling Smørgrav 		/* XXX for now, return 0 for anything < 1 */
61*a50b01d2SDag-Erling Smørgrav 		return (0);
62*a50b01d2SDag-Erling Smørgrav #endif
63*a50b01d2SDag-Erling Smørgrav 	}
64*a50b01d2SDag-Erling Smørgrav 	while (y > 0) {
65*a50b01d2SDag-Erling Smørgrav 		/* delta = y^2 / 2y */
66*a50b01d2SDag-Erling Smørgrav 		delta = fp16_div(fp16_sub(fp16_mul(y, y), x), y * 2);
67*a50b01d2SDag-Erling Smørgrav 		if (delta == 0)
68*a50b01d2SDag-Erling Smørgrav 			break;
69*a50b01d2SDag-Erling Smørgrav 		y = fp16_sub(y, delta);
70*a50b01d2SDag-Erling Smørgrav 	}
71*a50b01d2SDag-Erling Smørgrav 	return (y);
72*a50b01d2SDag-Erling Smørgrav }
73*a50b01d2SDag-Erling Smørgrav 
74*a50b01d2SDag-Erling Smørgrav static fp16_t fp16_cos_table[256] = {
75*a50b01d2SDag-Erling Smørgrav 	65536,	65534,	65531,	65524,	65516,	65505,	65491,	65475,
76*a50b01d2SDag-Erling Smørgrav 	65457,	65436,	65412,	65386,	65358,	65327,	65294,	65258,
77*a50b01d2SDag-Erling Smørgrav 	65220,	65179,	65136,	65091,	65043,	64992,	64939,	64884,
78*a50b01d2SDag-Erling Smørgrav 	64826,	64766,	64703,	64638,	64571,	64501,	64428,	64353,
79*a50b01d2SDag-Erling Smørgrav 	64276,	64197,	64115,	64030,	63943,	63854,	63762,	63668,
80*a50b01d2SDag-Erling Smørgrav 	63571,	63473,	63371,	63268,	63162,	63053,	62942,	62829,
81*a50b01d2SDag-Erling Smørgrav 	62714,	62596,	62475,	62353,	62228,	62100,	61971,	61839,
82*a50b01d2SDag-Erling Smørgrav 	61705,	61568,	61429,	61288,	61144,	60998,	60850,	60700,
83*a50b01d2SDag-Erling Smørgrav 	60547,	60392,	60235,	60075,	59913,	59749,	59583,	59414,
84*a50b01d2SDag-Erling Smørgrav 	59243,	59070,	58895,	58718,	58538,	58356,	58172,	57986,
85*a50b01d2SDag-Erling Smørgrav 	57797,	57606,	57414,	57219,	57022,	56822,	56621,	56417,
86*a50b01d2SDag-Erling Smørgrav 	56212,	56004,	55794,	55582,	55368,	55152,	54933,	54713,
87*a50b01d2SDag-Erling Smørgrav 	54491,	54266,	54040,	53811,	53581,	53348,	53114,	52877,
88*a50b01d2SDag-Erling Smørgrav 	52639,	52398,	52155,	51911,	51665,	51416,	51166,	50914,
89*a50b01d2SDag-Erling Smørgrav 	50660,	50403,	50146,	49886,	49624,	49360,	49095,	48828,
90*a50b01d2SDag-Erling Smørgrav 	48558,	48288,	48015,	47740,	47464,	47186,	46906,	46624,
91*a50b01d2SDag-Erling Smørgrav 	46340,	46055,	45768,	45480,	45189,	44897,	44603,	44308,
92*a50b01d2SDag-Erling Smørgrav 	44011,	43712,	43412,	43110,	42806,	42501,	42194,	41885,
93*a50b01d2SDag-Erling Smørgrav 	41575,	41263,	40950,	40636,	40319,	40002,	39682,	39362,
94*a50b01d2SDag-Erling Smørgrav 	39039,	38716,	38390,	38064,	37736,	37406,	37075,	36743,
95*a50b01d2SDag-Erling Smørgrav 	36409,	36074,	35738,	35400,	35061,	34721,	34379,	34036,
96*a50b01d2SDag-Erling Smørgrav 	33692,	33346,	32999,	32651,	32302,	31952,	31600,	31247,
97*a50b01d2SDag-Erling Smørgrav 	30893,	30538,	30181,	29824,	29465,	29105,	28745,	28383,
98*a50b01d2SDag-Erling Smørgrav 	28020,	27656,	27291,	26925,	26557,	26189,	25820,	25450,
99*a50b01d2SDag-Erling Smørgrav 	25079,	24707,	24334,	23960,	23586,	23210,	22833,	22456,
100*a50b01d2SDag-Erling Smørgrav 	22078,	21699,	21319,	20938,	20557,	20175,	19792,	19408,
101*a50b01d2SDag-Erling Smørgrav 	19024,	18638,	18253,	17866,	17479,	17091,	16702,	16313,
102*a50b01d2SDag-Erling Smørgrav 	15923,	15533,	15142,	14751,	14359,	13966,	13573,	13179,
103*a50b01d2SDag-Erling Smørgrav 	12785,	12390,	11995,	11600,	11204,	10807,	10410,	10013,
104*a50b01d2SDag-Erling Smørgrav 	 9616,	 9218,	 8819,	 8421,	 8022,	 7623,	 7223,	 6823,
105*a50b01d2SDag-Erling Smørgrav 	 6423,	 6023,	 5622,	 5222,	 4821,	 4420,	 4018,	 3617,
106*a50b01d2SDag-Erling Smørgrav 	 3215,	 2814,	 2412,	 2010,	 1608,	 1206,	  804,	  402,
107*a50b01d2SDag-Erling Smørgrav };
108*a50b01d2SDag-Erling Smørgrav 
109*a50b01d2SDag-Erling Smørgrav /*
110*a50b01d2SDag-Erling Smørgrav  * Compute the cosine of theta.
111*a50b01d2SDag-Erling Smørgrav  */
112*a50b01d2SDag-Erling Smørgrav fp16_t
113*a50b01d2SDag-Erling Smørgrav fp16_cos(fp16_t theta)
114*a50b01d2SDag-Erling Smørgrav {
115*a50b01d2SDag-Erling Smørgrav 	unsigned int i;
116*a50b01d2SDag-Erling Smørgrav 
117*a50b01d2SDag-Erling Smørgrav 	i = 1024 * (theta % FP16_2PI) / FP16_2PI;
118*a50b01d2SDag-Erling Smørgrav 	switch (i / 256) {
119*a50b01d2SDag-Erling Smørgrav 	case 0:
120*a50b01d2SDag-Erling Smørgrav 		return (fp16_cos_table[i % 256]);
121*a50b01d2SDag-Erling Smørgrav 	case 1:
122*a50b01d2SDag-Erling Smørgrav 		return (-fp16_cos_table[255 - i % 256]);
123*a50b01d2SDag-Erling Smørgrav 	case 2:
124*a50b01d2SDag-Erling Smørgrav 		return (-fp16_cos_table[i % 256]);
125*a50b01d2SDag-Erling Smørgrav 	case 3:
126*a50b01d2SDag-Erling Smørgrav 		return (fp16_cos_table[255 - i % 256]);
127*a50b01d2SDag-Erling Smørgrav 	default:
128*a50b01d2SDag-Erling Smørgrav 		/* inconceivable! */
129*a50b01d2SDag-Erling Smørgrav 		return (0);
130*a50b01d2SDag-Erling Smørgrav 	}
131*a50b01d2SDag-Erling Smørgrav }
132