1a50b01d2SDag-Erling Smørgrav /*-
2a50b01d2SDag-Erling Smørgrav * Copyright (c) 2015 Dag-Erling Smørgrav
3a50b01d2SDag-Erling Smørgrav * All rights reserved.
4a50b01d2SDag-Erling Smørgrav *
5a50b01d2SDag-Erling Smørgrav * Redistribution and use in source and binary forms, with or without
6a50b01d2SDag-Erling Smørgrav * modification, are permitted provided that the following conditions
7a50b01d2SDag-Erling Smørgrav * are met:
8a50b01d2SDag-Erling Smørgrav * 1. Redistributions of source code must retain the above copyright
9a50b01d2SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer.
10a50b01d2SDag-Erling Smørgrav * 2. Redistributions in binary form must reproduce the above copyright
11a50b01d2SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer in the
12a50b01d2SDag-Erling Smørgrav * documentation and/or other materials provided with the distribution.
13a50b01d2SDag-Erling Smørgrav *
14a50b01d2SDag-Erling Smørgrav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15a50b01d2SDag-Erling Smørgrav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16a50b01d2SDag-Erling Smørgrav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17a50b01d2SDag-Erling Smørgrav * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18a50b01d2SDag-Erling Smørgrav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19a50b01d2SDag-Erling Smørgrav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20a50b01d2SDag-Erling Smørgrav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21a50b01d2SDag-Erling Smørgrav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22a50b01d2SDag-Erling Smørgrav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23a50b01d2SDag-Erling Smørgrav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24a50b01d2SDag-Erling Smørgrav * SUCH DAMAGE.
25a50b01d2SDag-Erling Smørgrav */
26a50b01d2SDag-Erling Smørgrav
27a50b01d2SDag-Erling Smørgrav #ifdef _KERNEL
28a50b01d2SDag-Erling Smørgrav #include <sys/libkern.h>
29a50b01d2SDag-Erling Smørgrav #else
30a50b01d2SDag-Erling Smørgrav #include <stdio.h>
31a50b01d2SDag-Erling Smørgrav #include <strings.h>
32a50b01d2SDag-Erling Smørgrav #endif
33a50b01d2SDag-Erling Smørgrav
34a50b01d2SDag-Erling Smørgrav #include "fp16.h"
35a50b01d2SDag-Erling Smørgrav
36a50b01d2SDag-Erling Smørgrav /*
37a50b01d2SDag-Erling Smørgrav * Compute the quare root of x, using Newton's method with 2^(log2(x)/2)
38a50b01d2SDag-Erling Smørgrav * as the initial estimate.
39a50b01d2SDag-Erling Smørgrav */
40a50b01d2SDag-Erling Smørgrav fp16_t
fp16_sqrt(fp16_t x)41a50b01d2SDag-Erling Smørgrav fp16_sqrt(fp16_t x)
42a50b01d2SDag-Erling Smørgrav {
43a50b01d2SDag-Erling Smørgrav fp16_t y, delta;
44a50b01d2SDag-Erling Smørgrav signed int log2x;
45a50b01d2SDag-Erling Smørgrav
46a50b01d2SDag-Erling Smørgrav /* special case */
47a50b01d2SDag-Erling Smørgrav if (x == 0)
48a50b01d2SDag-Erling Smørgrav return (0);
49a50b01d2SDag-Erling Smørgrav
50a50b01d2SDag-Erling Smørgrav /* shift toward 0 by half the logarithm */
51a50b01d2SDag-Erling Smørgrav log2x = flsl(x) - 1;
52a50b01d2SDag-Erling Smørgrav if (log2x >= 16) {
53a50b01d2SDag-Erling Smørgrav y = x >> (log2x - 16) / 2;
54a50b01d2SDag-Erling Smørgrav } else {
55a50b01d2SDag-Erling Smørgrav #if 0
56a50b01d2SDag-Erling Smørgrav y = x << (16 - log2x) / 2;
57a50b01d2SDag-Erling Smørgrav #else
58a50b01d2SDag-Erling Smørgrav /* XXX for now, return 0 for anything < 1 */
59a50b01d2SDag-Erling Smørgrav return (0);
60a50b01d2SDag-Erling Smørgrav #endif
61a50b01d2SDag-Erling Smørgrav }
62a50b01d2SDag-Erling Smørgrav while (y > 0) {
63a50b01d2SDag-Erling Smørgrav /* delta = y^2 / 2y */
64a50b01d2SDag-Erling Smørgrav delta = fp16_div(fp16_sub(fp16_mul(y, y), x), y * 2);
65a50b01d2SDag-Erling Smørgrav if (delta == 0)
66a50b01d2SDag-Erling Smørgrav break;
67a50b01d2SDag-Erling Smørgrav y = fp16_sub(y, delta);
68a50b01d2SDag-Erling Smørgrav }
69a50b01d2SDag-Erling Smørgrav return (y);
70a50b01d2SDag-Erling Smørgrav }
71a50b01d2SDag-Erling Smørgrav
72*23e6278eSDag-Erling Smørgrav static fp16_t fp16_sin_table[256] = {
73*23e6278eSDag-Erling Smørgrav 0, 402, 804, 1206, 1608, 2010, 2412, 2814,
74*23e6278eSDag-Erling Smørgrav 3215, 3617, 4018, 4420, 4821, 5222, 5622, 6023,
75*23e6278eSDag-Erling Smørgrav 6423, 6823, 7223, 7623, 8022, 8421, 8819, 9218,
76*23e6278eSDag-Erling Smørgrav 9616, 10013, 10410, 10807, 11204, 11600, 11995, 12390,
77*23e6278eSDag-Erling Smørgrav 12785, 13179, 13573, 13966, 14359, 14751, 15142, 15533,
78*23e6278eSDag-Erling Smørgrav 15923, 16313, 16702, 17091, 17479, 17866, 18253, 18638,
79*23e6278eSDag-Erling Smørgrav 19024, 19408, 19792, 20175, 20557, 20938, 21319, 21699,
80*23e6278eSDag-Erling Smørgrav 22078, 22456, 22833, 23210, 23586, 23960, 24334, 24707,
81*23e6278eSDag-Erling Smørgrav 25079, 25450, 25820, 26189, 26557, 26925, 27291, 27656,
82*23e6278eSDag-Erling Smørgrav 28020, 28383, 28745, 29105, 29465, 29824, 30181, 30538,
83*23e6278eSDag-Erling Smørgrav 30893, 31247, 31600, 31952, 32302, 32651, 32999, 33346,
84*23e6278eSDag-Erling Smørgrav 33692, 34036, 34379, 34721, 35061, 35400, 35738, 36074,
85*23e6278eSDag-Erling Smørgrav 36409, 36743, 37075, 37406, 37736, 38064, 38390, 38716,
86*23e6278eSDag-Erling Smørgrav 39039, 39362, 39682, 40002, 40319, 40636, 40950, 41263,
87*23e6278eSDag-Erling Smørgrav 41575, 41885, 42194, 42501, 42806, 43110, 43412, 43712,
88*23e6278eSDag-Erling Smørgrav 44011, 44308, 44603, 44897, 45189, 45480, 45768, 46055,
89*23e6278eSDag-Erling Smørgrav 46340, 46624, 46906, 47186, 47464, 47740, 48015, 48288,
90*23e6278eSDag-Erling Smørgrav 48558, 48828, 49095, 49360, 49624, 49886, 50146, 50403,
91*23e6278eSDag-Erling Smørgrav 50660, 50914, 51166, 51416, 51665, 51911, 52155, 52398,
92*23e6278eSDag-Erling Smørgrav 52639, 52877, 53114, 53348, 53581, 53811, 54040, 54266,
93*23e6278eSDag-Erling Smørgrav 54491, 54713, 54933, 55152, 55368, 55582, 55794, 56004,
94*23e6278eSDag-Erling Smørgrav 56212, 56417, 56621, 56822, 57022, 57219, 57414, 57606,
95*23e6278eSDag-Erling Smørgrav 57797, 57986, 58172, 58356, 58538, 58718, 58895, 59070,
96*23e6278eSDag-Erling Smørgrav 59243, 59414, 59583, 59749, 59913, 60075, 60235, 60392,
97*23e6278eSDag-Erling Smørgrav 60547, 60700, 60850, 60998, 61144, 61288, 61429, 61568,
98*23e6278eSDag-Erling Smørgrav 61705, 61839, 61971, 62100, 62228, 62353, 62475, 62596,
99*23e6278eSDag-Erling Smørgrav 62714, 62829, 62942, 63053, 63162, 63268, 63371, 63473,
100*23e6278eSDag-Erling Smørgrav 63571, 63668, 63762, 63854, 63943, 64030, 64115, 64197,
101*23e6278eSDag-Erling Smørgrav 64276, 64353, 64428, 64501, 64571, 64638, 64703, 64766,
102*23e6278eSDag-Erling Smørgrav 64826, 64884, 64939, 64992, 65043, 65091, 65136, 65179,
103*23e6278eSDag-Erling Smørgrav 65220, 65258, 65294, 65327, 65358, 65386, 65412, 65436,
104*23e6278eSDag-Erling Smørgrav 65457, 65475, 65491, 65505, 65516, 65524, 65531, 65534,
105a50b01d2SDag-Erling Smørgrav };
106a50b01d2SDag-Erling Smørgrav
107a50b01d2SDag-Erling Smørgrav /*
108*23e6278eSDag-Erling Smørgrav * Compute the sine of theta.
109*23e6278eSDag-Erling Smørgrav */
110*23e6278eSDag-Erling Smørgrav fp16_t
fp16_sin(fp16_t theta)111*23e6278eSDag-Erling Smørgrav fp16_sin(fp16_t theta)
112*23e6278eSDag-Erling Smørgrav {
113*23e6278eSDag-Erling Smørgrav unsigned int i;
114*23e6278eSDag-Erling Smørgrav
115*23e6278eSDag-Erling Smørgrav i = 1024 * (theta % FP16_2PI) / FP16_2PI;
116*23e6278eSDag-Erling Smørgrav switch (i / 256) {
117*23e6278eSDag-Erling Smørgrav case 0:
118*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[i % 256]);
119*23e6278eSDag-Erling Smørgrav case 1:
120*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[255 - i % 256]);
121*23e6278eSDag-Erling Smørgrav case 2:
122*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[i % 256]);
123*23e6278eSDag-Erling Smørgrav case 3:
124*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[255 - i % 256]);
125*23e6278eSDag-Erling Smørgrav default:
126*23e6278eSDag-Erling Smørgrav /* inconceivable! */
127*23e6278eSDag-Erling Smørgrav return (0);
128*23e6278eSDag-Erling Smørgrav }
129*23e6278eSDag-Erling Smørgrav }
130*23e6278eSDag-Erling Smørgrav
131*23e6278eSDag-Erling Smørgrav /*
132a50b01d2SDag-Erling Smørgrav * Compute the cosine of theta.
133a50b01d2SDag-Erling Smørgrav */
134a50b01d2SDag-Erling Smørgrav fp16_t
fp16_cos(fp16_t theta)135a50b01d2SDag-Erling Smørgrav fp16_cos(fp16_t theta)
136a50b01d2SDag-Erling Smørgrav {
137a50b01d2SDag-Erling Smørgrav unsigned int i;
138a50b01d2SDag-Erling Smørgrav
139a50b01d2SDag-Erling Smørgrav i = 1024 * (theta % FP16_2PI) / FP16_2PI;
140a50b01d2SDag-Erling Smørgrav switch (i / 256) {
141a50b01d2SDag-Erling Smørgrav case 0:
142*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[255 - i % 256]);
143a50b01d2SDag-Erling Smørgrav case 1:
144*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[i % 256]);
145a50b01d2SDag-Erling Smørgrav case 2:
146*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[255 - i % 256]);
147a50b01d2SDag-Erling Smørgrav case 3:
148*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[i % 256]);
149a50b01d2SDag-Erling Smørgrav default:
150a50b01d2SDag-Erling Smørgrav /* inconceivable! */
151a50b01d2SDag-Erling Smørgrav return (0);
152a50b01d2SDag-Erling Smørgrav }
153a50b01d2SDag-Erling Smørgrav }
154