1a50b01d2SDag-Erling Smørgrav /*- 2a50b01d2SDag-Erling Smørgrav * Copyright (c) 2015 Dag-Erling Smørgrav 3a50b01d2SDag-Erling Smørgrav * All rights reserved. 4a50b01d2SDag-Erling Smørgrav * 5a50b01d2SDag-Erling Smørgrav * Redistribution and use in source and binary forms, with or without 6a50b01d2SDag-Erling Smørgrav * modification, are permitted provided that the following conditions 7a50b01d2SDag-Erling Smørgrav * are met: 8a50b01d2SDag-Erling Smørgrav * 1. Redistributions of source code must retain the above copyright 9a50b01d2SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer. 10a50b01d2SDag-Erling Smørgrav * 2. Redistributions in binary form must reproduce the above copyright 11a50b01d2SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer in the 12a50b01d2SDag-Erling Smørgrav * documentation and/or other materials provided with the distribution. 13a50b01d2SDag-Erling Smørgrav * 14a50b01d2SDag-Erling Smørgrav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15a50b01d2SDag-Erling Smørgrav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16a50b01d2SDag-Erling Smørgrav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17a50b01d2SDag-Erling Smørgrav * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18a50b01d2SDag-Erling Smørgrav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19a50b01d2SDag-Erling Smørgrav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20a50b01d2SDag-Erling Smørgrav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21a50b01d2SDag-Erling Smørgrav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22a50b01d2SDag-Erling Smørgrav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23a50b01d2SDag-Erling Smørgrav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24a50b01d2SDag-Erling Smørgrav * SUCH DAMAGE. 25a50b01d2SDag-Erling Smørgrav * 26a50b01d2SDag-Erling Smørgrav * $FreeBSD$ 27a50b01d2SDag-Erling Smørgrav */ 28a50b01d2SDag-Erling Smørgrav 29a50b01d2SDag-Erling Smørgrav #ifdef _KERNEL 30a50b01d2SDag-Erling Smørgrav #include <sys/libkern.h> 31a50b01d2SDag-Erling Smørgrav #else 32a50b01d2SDag-Erling Smørgrav #include <stdio.h> 33a50b01d2SDag-Erling Smørgrav #include <strings.h> 34a50b01d2SDag-Erling Smørgrav #endif 35a50b01d2SDag-Erling Smørgrav 36a50b01d2SDag-Erling Smørgrav #include "fp16.h" 37a50b01d2SDag-Erling Smørgrav 38a50b01d2SDag-Erling Smørgrav /* 39a50b01d2SDag-Erling Smørgrav * Compute the quare root of x, using Newton's method with 2^(log2(x)/2) 40a50b01d2SDag-Erling Smørgrav * as the initial estimate. 41a50b01d2SDag-Erling Smørgrav */ 42a50b01d2SDag-Erling Smørgrav fp16_t 43a50b01d2SDag-Erling Smørgrav fp16_sqrt(fp16_t x) 44a50b01d2SDag-Erling Smørgrav { 45a50b01d2SDag-Erling Smørgrav fp16_t y, delta; 46a50b01d2SDag-Erling Smørgrav signed int log2x; 47a50b01d2SDag-Erling Smørgrav 48a50b01d2SDag-Erling Smørgrav /* special case */ 49a50b01d2SDag-Erling Smørgrav if (x == 0) 50a50b01d2SDag-Erling Smørgrav return (0); 51a50b01d2SDag-Erling Smørgrav 52a50b01d2SDag-Erling Smørgrav /* shift toward 0 by half the logarithm */ 53a50b01d2SDag-Erling Smørgrav log2x = flsl(x) - 1; 54a50b01d2SDag-Erling Smørgrav if (log2x >= 16) { 55a50b01d2SDag-Erling Smørgrav y = x >> (log2x - 16) / 2; 56a50b01d2SDag-Erling Smørgrav } else { 57a50b01d2SDag-Erling Smørgrav #if 0 58a50b01d2SDag-Erling Smørgrav y = x << (16 - log2x) / 2; 59a50b01d2SDag-Erling Smørgrav #else 60a50b01d2SDag-Erling Smørgrav /* XXX for now, return 0 for anything < 1 */ 61a50b01d2SDag-Erling Smørgrav return (0); 62a50b01d2SDag-Erling Smørgrav #endif 63a50b01d2SDag-Erling Smørgrav } 64a50b01d2SDag-Erling Smørgrav while (y > 0) { 65a50b01d2SDag-Erling Smørgrav /* delta = y^2 / 2y */ 66a50b01d2SDag-Erling Smørgrav delta = fp16_div(fp16_sub(fp16_mul(y, y), x), y * 2); 67a50b01d2SDag-Erling Smørgrav if (delta == 0) 68a50b01d2SDag-Erling Smørgrav break; 69a50b01d2SDag-Erling Smørgrav y = fp16_sub(y, delta); 70a50b01d2SDag-Erling Smørgrav } 71a50b01d2SDag-Erling Smørgrav return (y); 72a50b01d2SDag-Erling Smørgrav } 73a50b01d2SDag-Erling Smørgrav 74*23e6278eSDag-Erling Smørgrav static fp16_t fp16_sin_table[256] = { 75*23e6278eSDag-Erling Smørgrav 0, 402, 804, 1206, 1608, 2010, 2412, 2814, 76*23e6278eSDag-Erling Smørgrav 3215, 3617, 4018, 4420, 4821, 5222, 5622, 6023, 77*23e6278eSDag-Erling Smørgrav 6423, 6823, 7223, 7623, 8022, 8421, 8819, 9218, 78*23e6278eSDag-Erling Smørgrav 9616, 10013, 10410, 10807, 11204, 11600, 11995, 12390, 79*23e6278eSDag-Erling Smørgrav 12785, 13179, 13573, 13966, 14359, 14751, 15142, 15533, 80*23e6278eSDag-Erling Smørgrav 15923, 16313, 16702, 17091, 17479, 17866, 18253, 18638, 81*23e6278eSDag-Erling Smørgrav 19024, 19408, 19792, 20175, 20557, 20938, 21319, 21699, 82*23e6278eSDag-Erling Smørgrav 22078, 22456, 22833, 23210, 23586, 23960, 24334, 24707, 83*23e6278eSDag-Erling Smørgrav 25079, 25450, 25820, 26189, 26557, 26925, 27291, 27656, 84*23e6278eSDag-Erling Smørgrav 28020, 28383, 28745, 29105, 29465, 29824, 30181, 30538, 85*23e6278eSDag-Erling Smørgrav 30893, 31247, 31600, 31952, 32302, 32651, 32999, 33346, 86*23e6278eSDag-Erling Smørgrav 33692, 34036, 34379, 34721, 35061, 35400, 35738, 36074, 87*23e6278eSDag-Erling Smørgrav 36409, 36743, 37075, 37406, 37736, 38064, 38390, 38716, 88*23e6278eSDag-Erling Smørgrav 39039, 39362, 39682, 40002, 40319, 40636, 40950, 41263, 89*23e6278eSDag-Erling Smørgrav 41575, 41885, 42194, 42501, 42806, 43110, 43412, 43712, 90*23e6278eSDag-Erling Smørgrav 44011, 44308, 44603, 44897, 45189, 45480, 45768, 46055, 91*23e6278eSDag-Erling Smørgrav 46340, 46624, 46906, 47186, 47464, 47740, 48015, 48288, 92*23e6278eSDag-Erling Smørgrav 48558, 48828, 49095, 49360, 49624, 49886, 50146, 50403, 93*23e6278eSDag-Erling Smørgrav 50660, 50914, 51166, 51416, 51665, 51911, 52155, 52398, 94*23e6278eSDag-Erling Smørgrav 52639, 52877, 53114, 53348, 53581, 53811, 54040, 54266, 95*23e6278eSDag-Erling Smørgrav 54491, 54713, 54933, 55152, 55368, 55582, 55794, 56004, 96*23e6278eSDag-Erling Smørgrav 56212, 56417, 56621, 56822, 57022, 57219, 57414, 57606, 97*23e6278eSDag-Erling Smørgrav 57797, 57986, 58172, 58356, 58538, 58718, 58895, 59070, 98*23e6278eSDag-Erling Smørgrav 59243, 59414, 59583, 59749, 59913, 60075, 60235, 60392, 99*23e6278eSDag-Erling Smørgrav 60547, 60700, 60850, 60998, 61144, 61288, 61429, 61568, 100*23e6278eSDag-Erling Smørgrav 61705, 61839, 61971, 62100, 62228, 62353, 62475, 62596, 101*23e6278eSDag-Erling Smørgrav 62714, 62829, 62942, 63053, 63162, 63268, 63371, 63473, 102*23e6278eSDag-Erling Smørgrav 63571, 63668, 63762, 63854, 63943, 64030, 64115, 64197, 103*23e6278eSDag-Erling Smørgrav 64276, 64353, 64428, 64501, 64571, 64638, 64703, 64766, 104*23e6278eSDag-Erling Smørgrav 64826, 64884, 64939, 64992, 65043, 65091, 65136, 65179, 105*23e6278eSDag-Erling Smørgrav 65220, 65258, 65294, 65327, 65358, 65386, 65412, 65436, 106*23e6278eSDag-Erling Smørgrav 65457, 65475, 65491, 65505, 65516, 65524, 65531, 65534, 107a50b01d2SDag-Erling Smørgrav }; 108a50b01d2SDag-Erling Smørgrav 109a50b01d2SDag-Erling Smørgrav /* 110*23e6278eSDag-Erling Smørgrav * Compute the sine of theta. 111*23e6278eSDag-Erling Smørgrav */ 112*23e6278eSDag-Erling Smørgrav fp16_t 113*23e6278eSDag-Erling Smørgrav fp16_sin(fp16_t theta) 114*23e6278eSDag-Erling Smørgrav { 115*23e6278eSDag-Erling Smørgrav unsigned int i; 116*23e6278eSDag-Erling Smørgrav 117*23e6278eSDag-Erling Smørgrav i = 1024 * (theta % FP16_2PI) / FP16_2PI; 118*23e6278eSDag-Erling Smørgrav switch (i / 256) { 119*23e6278eSDag-Erling Smørgrav case 0: 120*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[i % 256]); 121*23e6278eSDag-Erling Smørgrav case 1: 122*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[255 - i % 256]); 123*23e6278eSDag-Erling Smørgrav case 2: 124*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[i % 256]); 125*23e6278eSDag-Erling Smørgrav case 3: 126*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[255 - i % 256]); 127*23e6278eSDag-Erling Smørgrav default: 128*23e6278eSDag-Erling Smørgrav /* inconceivable! */ 129*23e6278eSDag-Erling Smørgrav return (0); 130*23e6278eSDag-Erling Smørgrav } 131*23e6278eSDag-Erling Smørgrav } 132*23e6278eSDag-Erling Smørgrav 133*23e6278eSDag-Erling Smørgrav /* 134a50b01d2SDag-Erling Smørgrav * Compute the cosine of theta. 135a50b01d2SDag-Erling Smørgrav */ 136a50b01d2SDag-Erling Smørgrav fp16_t 137a50b01d2SDag-Erling Smørgrav fp16_cos(fp16_t theta) 138a50b01d2SDag-Erling Smørgrav { 139a50b01d2SDag-Erling Smørgrav unsigned int i; 140a50b01d2SDag-Erling Smørgrav 141a50b01d2SDag-Erling Smørgrav i = 1024 * (theta % FP16_2PI) / FP16_2PI; 142a50b01d2SDag-Erling Smørgrav switch (i / 256) { 143a50b01d2SDag-Erling Smørgrav case 0: 144*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[255 - i % 256]); 145a50b01d2SDag-Erling Smørgrav case 1: 146*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[i % 256]); 147a50b01d2SDag-Erling Smørgrav case 2: 148*23e6278eSDag-Erling Smørgrav return (-fp16_sin_table[255 - i % 256]); 149a50b01d2SDag-Erling Smørgrav case 3: 150*23e6278eSDag-Erling Smørgrav return (fp16_sin_table[i % 256]); 151a50b01d2SDag-Erling Smørgrav default: 152a50b01d2SDag-Erling Smørgrav /* inconceivable! */ 153a50b01d2SDag-Erling Smørgrav return (0); 154a50b01d2SDag-Erling Smørgrav } 155a50b01d2SDag-Erling Smørgrav } 156