xref: /freebsd/usr.sbin/virtual_oss/virtual_oss/mul.c (revision 9cab9fde5edad9b409dd2317a2aec7815e6d6bed)
1 /*-
2  * Copyright (c) 2017 Hans Petter Selasky
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include <sys/queue.h>
27 
28 #include <stdint.h>
29 #include <string.h>
30 
31 #include "int.h"
32 
33 #ifndef VOSS_X3_LOG2_COMBA
34 #define	VOSS_X3_LOG2_COMBA 5
35 #endif
36 
37 #if (VOSS_X3_LOG2_COMBA < 2)
38 #error "VOSS_X3_LOG2_COMBA must be greater than 1"
39 #endif
40 
41 struct voss_x3_input_double {
42 	double	a;
43 	double	b;
44 } __aligned(16);
45 
46 /*
47  * <input size> = "stride"
48  * <output size> = 2 * "stride"
49  */
50 static void
voss_x3_multiply_sub_double(struct voss_x3_input_double * input,double * ptr_low,double * ptr_high,const size_t stride,const uint8_t toggle)51 voss_x3_multiply_sub_double(struct voss_x3_input_double *input, double *ptr_low, double *ptr_high,
52     const size_t stride, const uint8_t toggle)
53 {
54 	size_t x;
55 	size_t y;
56 
57 	if (stride >= (1UL << VOSS_X3_LOG2_COMBA)) {
58 		const size_t strideh = stride >> 1;
59 
60 		if (toggle) {
61 
62 			/* inverse step */
63 			for (x = 0; x != strideh; x++) {
64 				double a, b, c, d;
65 
66 				a = ptr_low[x];
67 				b = ptr_low[x + strideh];
68 				c = ptr_high[x];
69 				d = ptr_high[x + strideh];
70 
71 				ptr_low[x + strideh] = a + b;
72 				ptr_high[x] = a + b + c + d;
73 			}
74 
75 			voss_x3_multiply_sub_double(input, ptr_low, ptr_low + strideh, strideh, 1);
76 
77 			for (x = 0; x != strideh; x++)
78 				ptr_low[x + strideh] = -ptr_low[x + strideh];
79 
80 			voss_x3_multiply_sub_double(input + strideh, ptr_low + strideh, ptr_high + strideh, strideh, 1);
81 
82 			/* forward step */
83 			for (x = 0; x != strideh; x++) {
84 				double a, b, c, d;
85 
86 				a = ptr_low[x];
87 				b = ptr_low[x + strideh];
88 				c = ptr_high[x];
89 				d = ptr_high[x + strideh];
90 
91 				ptr_low[x + strideh] = -a - b;
92 				ptr_high[x] = c + b - d;
93 
94 				input[x + strideh].a += input[x].a;
95 				input[x + strideh].b += input[x].b;
96 			}
97 
98 			voss_x3_multiply_sub_double(input + strideh, ptr_low + strideh, ptr_high, strideh, 0);
99 		} else {
100 			voss_x3_multiply_sub_double(input + strideh, ptr_low + strideh, ptr_high, strideh, 1);
101 
102 			/* inverse step */
103 			for (x = 0; x != strideh; x++) {
104 				double a, b, c, d;
105 
106 				a = ptr_low[x];
107 				b = ptr_low[x + strideh];
108 				c = ptr_high[x];
109 				d = ptr_high[x + strideh];
110 
111 				ptr_low[x + strideh] = -a - b;
112 				ptr_high[x] = a + b + c + d;
113 
114 				input[x + strideh].a -= input[x].a;
115 				input[x + strideh].b -= input[x].b;
116 			}
117 
118 			voss_x3_multiply_sub_double(input + strideh, ptr_low + strideh, ptr_high + strideh, strideh, 0);
119 
120 			for (x = 0; x != strideh; x++)
121 				ptr_low[x + strideh] = -ptr_low[x + strideh];
122 
123 			voss_x3_multiply_sub_double(input, ptr_low, ptr_low + strideh, strideh, 0);
124 
125 			/* forward step */
126 			for (x = 0; x != strideh; x++) {
127 				double a, b, c, d;
128 
129 				a = ptr_low[x];
130 				b = ptr_low[x + strideh];
131 				c = ptr_high[x];
132 				d = ptr_high[x + strideh];
133 
134 				ptr_low[x + strideh] = b - a;
135 				ptr_high[x] = c - b - d;
136 			}
137 		}
138 	} else {
139 		for (x = 0; x != stride; x++) {
140 			double value = input[x].a;
141 
142 			for (y = 0; y != (stride - x); y++) {
143 				ptr_low[x + y] += input[y].b * value;
144 			}
145 
146 			for (; y != stride; y++) {
147 				ptr_high[x + y - stride] += input[y].b * value;
148 			}
149 		}
150 	}
151 }
152 
153 /*
154  * <input size> = "max"
155  * <output size> = 2 * "max"
156  */
157 void
voss_x3_multiply_double(const int64_t * va,const double * vb,double * pc,const size_t max)158 voss_x3_multiply_double(const int64_t *va, const double *vb, double *pc, const size_t max)
159 {
160 	struct voss_x3_input_double input[max];
161 	size_t x;
162 
163 	/* check for non-power of two */
164 	if (max & (max - 1))
165 		return;
166 
167 	/* setup input vector */
168 	for (x = 0; x != max; x++) {
169 		input[x].a = va[x];
170 		input[x].b = vb[x];
171 	}
172 
173 	/* do multiplication */
174 	voss_x3_multiply_sub_double(input, pc, pc + max, max, 1);
175 }
176