xref: /freebsd/sys/dev/syscons/plasma/fp16.c (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
1a50b01d2SDag-Erling Smørgrav /*-
2a50b01d2SDag-Erling Smørgrav  * Copyright (c) 2015 Dag-Erling Smørgrav
3a50b01d2SDag-Erling Smørgrav  * All rights reserved.
4a50b01d2SDag-Erling Smørgrav  *
5a50b01d2SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
6a50b01d2SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
7a50b01d2SDag-Erling Smørgrav  * are met:
8a50b01d2SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
9a50b01d2SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer.
10a50b01d2SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
11a50b01d2SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
12a50b01d2SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
13a50b01d2SDag-Erling Smørgrav  *
14a50b01d2SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15a50b01d2SDag-Erling Smørgrav  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16a50b01d2SDag-Erling Smørgrav  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17a50b01d2SDag-Erling Smørgrav  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18a50b01d2SDag-Erling Smørgrav  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19a50b01d2SDag-Erling Smørgrav  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20a50b01d2SDag-Erling Smørgrav  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21a50b01d2SDag-Erling Smørgrav  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22a50b01d2SDag-Erling Smørgrav  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23a50b01d2SDag-Erling Smørgrav  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24a50b01d2SDag-Erling Smørgrav  * SUCH DAMAGE.
25a50b01d2SDag-Erling Smørgrav  */
26a50b01d2SDag-Erling Smørgrav 
27a50b01d2SDag-Erling Smørgrav #ifdef _KERNEL
28a50b01d2SDag-Erling Smørgrav #include <sys/libkern.h>
29a50b01d2SDag-Erling Smørgrav #else
30a50b01d2SDag-Erling Smørgrav #include <stdio.h>
31a50b01d2SDag-Erling Smørgrav #include <strings.h>
32a50b01d2SDag-Erling Smørgrav #endif
33a50b01d2SDag-Erling Smørgrav 
34a50b01d2SDag-Erling Smørgrav #include "fp16.h"
35a50b01d2SDag-Erling Smørgrav 
36a50b01d2SDag-Erling Smørgrav /*
37a50b01d2SDag-Erling Smørgrav  * Compute the quare root of x, using Newton's method with 2^(log2(x)/2)
38a50b01d2SDag-Erling Smørgrav  * as the initial estimate.
39a50b01d2SDag-Erling Smørgrav  */
40a50b01d2SDag-Erling Smørgrav fp16_t
fp16_sqrt(fp16_t x)41a50b01d2SDag-Erling Smørgrav fp16_sqrt(fp16_t x)
42a50b01d2SDag-Erling Smørgrav {
43a50b01d2SDag-Erling Smørgrav 	fp16_t y, delta;
44a50b01d2SDag-Erling Smørgrav 	signed int log2x;
45a50b01d2SDag-Erling Smørgrav 
46a50b01d2SDag-Erling Smørgrav 	/* special case */
47a50b01d2SDag-Erling Smørgrav 	if (x == 0)
48a50b01d2SDag-Erling Smørgrav 		return (0);
49a50b01d2SDag-Erling Smørgrav 
50a50b01d2SDag-Erling Smørgrav 	/* shift toward 0 by half the logarithm */
51a50b01d2SDag-Erling Smørgrav 	log2x = flsl(x) - 1;
52a50b01d2SDag-Erling Smørgrav 	if (log2x >= 16) {
53a50b01d2SDag-Erling Smørgrav 		y = x >> (log2x - 16) / 2;
54a50b01d2SDag-Erling Smørgrav 	} else {
55a50b01d2SDag-Erling Smørgrav #if 0
56a50b01d2SDag-Erling Smørgrav 		y = x << (16 - log2x) / 2;
57a50b01d2SDag-Erling Smørgrav #else
58a50b01d2SDag-Erling Smørgrav 		/* XXX for now, return 0 for anything < 1 */
59a50b01d2SDag-Erling Smørgrav 		return (0);
60a50b01d2SDag-Erling Smørgrav #endif
61a50b01d2SDag-Erling Smørgrav 	}
62a50b01d2SDag-Erling Smørgrav 	while (y > 0) {
63a50b01d2SDag-Erling Smørgrav 		/* delta = y^2 / 2y */
64a50b01d2SDag-Erling Smørgrav 		delta = fp16_div(fp16_sub(fp16_mul(y, y), x), y * 2);
65a50b01d2SDag-Erling Smørgrav 		if (delta == 0)
66a50b01d2SDag-Erling Smørgrav 			break;
67a50b01d2SDag-Erling Smørgrav 		y = fp16_sub(y, delta);
68a50b01d2SDag-Erling Smørgrav 	}
69a50b01d2SDag-Erling Smørgrav 	return (y);
70a50b01d2SDag-Erling Smørgrav }
71a50b01d2SDag-Erling Smørgrav 
72*23e6278eSDag-Erling Smørgrav static fp16_t fp16_sin_table[256] = {
73*23e6278eSDag-Erling Smørgrav 	    0,	  402,	  804,	 1206,	 1608,	 2010,	 2412,	 2814,
74*23e6278eSDag-Erling Smørgrav 	 3215,	 3617,	 4018,	 4420,	 4821,	 5222,	 5622,	 6023,
75*23e6278eSDag-Erling Smørgrav 	 6423,	 6823,	 7223,	 7623,	 8022,	 8421,	 8819,	 9218,
76*23e6278eSDag-Erling Smørgrav 	 9616,	10013,	10410,	10807,	11204,	11600,	11995,	12390,
77*23e6278eSDag-Erling Smørgrav 	12785,	13179,	13573,	13966,	14359,	14751,	15142,	15533,
78*23e6278eSDag-Erling Smørgrav 	15923,	16313,	16702,	17091,	17479,	17866,	18253,	18638,
79*23e6278eSDag-Erling Smørgrav 	19024,	19408,	19792,	20175,	20557,	20938,	21319,	21699,
80*23e6278eSDag-Erling Smørgrav 	22078,	22456,	22833,	23210,	23586,	23960,	24334,	24707,
81*23e6278eSDag-Erling Smørgrav 	25079,	25450,	25820,	26189,	26557,	26925,	27291,	27656,
82*23e6278eSDag-Erling Smørgrav 	28020,	28383,	28745,	29105,	29465,	29824,	30181,	30538,
83*23e6278eSDag-Erling Smørgrav 	30893,	31247,	31600,	31952,	32302,	32651,	32999,	33346,
84*23e6278eSDag-Erling Smørgrav 	33692,	34036,	34379,	34721,	35061,	35400,	35738,	36074,
85*23e6278eSDag-Erling Smørgrav 	36409,	36743,	37075,	37406,	37736,	38064,	38390,	38716,
86*23e6278eSDag-Erling Smørgrav 	39039,	39362,	39682,	40002,	40319,	40636,	40950,	41263,
87*23e6278eSDag-Erling Smørgrav 	41575,	41885,	42194,	42501,	42806,	43110,	43412,	43712,
88*23e6278eSDag-Erling Smørgrav 	44011,	44308,	44603,	44897,	45189,	45480,	45768,	46055,
89*23e6278eSDag-Erling Smørgrav 	46340,	46624,	46906,	47186,	47464,	47740,	48015,	48288,
90*23e6278eSDag-Erling Smørgrav 	48558,	48828,	49095,	49360,	49624,	49886,	50146,	50403,
91*23e6278eSDag-Erling Smørgrav 	50660,	50914,	51166,	51416,	51665,	51911,	52155,	52398,
92*23e6278eSDag-Erling Smørgrav 	52639,	52877,	53114,	53348,	53581,	53811,	54040,	54266,
93*23e6278eSDag-Erling Smørgrav 	54491,	54713,	54933,	55152,	55368,	55582,	55794,	56004,
94*23e6278eSDag-Erling Smørgrav 	56212,	56417,	56621,	56822,	57022,	57219,	57414,	57606,
95*23e6278eSDag-Erling Smørgrav 	57797,	57986,	58172,	58356,	58538,	58718,	58895,	59070,
96*23e6278eSDag-Erling Smørgrav 	59243,	59414,	59583,	59749,	59913,	60075,	60235,	60392,
97*23e6278eSDag-Erling Smørgrav 	60547,	60700,	60850,	60998,	61144,	61288,	61429,	61568,
98*23e6278eSDag-Erling Smørgrav 	61705,	61839,	61971,	62100,	62228,	62353,	62475,	62596,
99*23e6278eSDag-Erling Smørgrav 	62714,	62829,	62942,	63053,	63162,	63268,	63371,	63473,
100*23e6278eSDag-Erling Smørgrav 	63571,	63668,	63762,	63854,	63943,	64030,	64115,	64197,
101*23e6278eSDag-Erling Smørgrav 	64276,	64353,	64428,	64501,	64571,	64638,	64703,	64766,
102*23e6278eSDag-Erling Smørgrav 	64826,	64884,	64939,	64992,	65043,	65091,	65136,	65179,
103*23e6278eSDag-Erling Smørgrav 	65220,	65258,	65294,	65327,	65358,	65386,	65412,	65436,
104*23e6278eSDag-Erling Smørgrav 	65457,	65475,	65491,	65505,	65516,	65524,	65531,	65534,
105a50b01d2SDag-Erling Smørgrav };
106a50b01d2SDag-Erling Smørgrav 
107a50b01d2SDag-Erling Smørgrav /*
108*23e6278eSDag-Erling Smørgrav  * Compute the sine of theta.
109*23e6278eSDag-Erling Smørgrav  */
110*23e6278eSDag-Erling Smørgrav fp16_t
fp16_sin(fp16_t theta)111*23e6278eSDag-Erling Smørgrav fp16_sin(fp16_t theta)
112*23e6278eSDag-Erling Smørgrav {
113*23e6278eSDag-Erling Smørgrav 	unsigned int i;
114*23e6278eSDag-Erling Smørgrav 
115*23e6278eSDag-Erling Smørgrav 	i = 1024 * (theta % FP16_2PI) / FP16_2PI;
116*23e6278eSDag-Erling Smørgrav 	switch (i / 256) {
117*23e6278eSDag-Erling Smørgrav 	case 0:
118*23e6278eSDag-Erling Smørgrav 		return (fp16_sin_table[i % 256]);
119*23e6278eSDag-Erling Smørgrav 	case 1:
120*23e6278eSDag-Erling Smørgrav 		return (fp16_sin_table[255 - i % 256]);
121*23e6278eSDag-Erling Smørgrav 	case 2:
122*23e6278eSDag-Erling Smørgrav 		return (-fp16_sin_table[i % 256]);
123*23e6278eSDag-Erling Smørgrav 	case 3:
124*23e6278eSDag-Erling Smørgrav 		return (-fp16_sin_table[255 - i % 256]);
125*23e6278eSDag-Erling Smørgrav 	default:
126*23e6278eSDag-Erling Smørgrav 		/* inconceivable! */
127*23e6278eSDag-Erling Smørgrav 		return (0);
128*23e6278eSDag-Erling Smørgrav 	}
129*23e6278eSDag-Erling Smørgrav }
130*23e6278eSDag-Erling Smørgrav 
131*23e6278eSDag-Erling Smørgrav /*
132a50b01d2SDag-Erling Smørgrav  * Compute the cosine of theta.
133a50b01d2SDag-Erling Smørgrav  */
134a50b01d2SDag-Erling Smørgrav fp16_t
fp16_cos(fp16_t theta)135a50b01d2SDag-Erling Smørgrav fp16_cos(fp16_t theta)
136a50b01d2SDag-Erling Smørgrav {
137a50b01d2SDag-Erling Smørgrav 	unsigned int i;
138a50b01d2SDag-Erling Smørgrav 
139a50b01d2SDag-Erling Smørgrav 	i = 1024 * (theta % FP16_2PI) / FP16_2PI;
140a50b01d2SDag-Erling Smørgrav 	switch (i / 256) {
141a50b01d2SDag-Erling Smørgrav 	case 0:
142*23e6278eSDag-Erling Smørgrav 		return (fp16_sin_table[255 - i % 256]);
143a50b01d2SDag-Erling Smørgrav 	case 1:
144*23e6278eSDag-Erling Smørgrav 		return (-fp16_sin_table[i % 256]);
145a50b01d2SDag-Erling Smørgrav 	case 2:
146*23e6278eSDag-Erling Smørgrav 		return (-fp16_sin_table[255 - i % 256]);
147a50b01d2SDag-Erling Smørgrav 	case 3:
148*23e6278eSDag-Erling Smørgrav 		return (fp16_sin_table[i % 256]);
149a50b01d2SDag-Erling Smørgrav 	default:
150a50b01d2SDag-Erling Smørgrav 		/* inconceivable! */
151a50b01d2SDag-Erling Smørgrav 		return (0);
152a50b01d2SDag-Erling Smørgrav 	}
153a50b01d2SDag-Erling Smørgrav }
154