1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5 #include "spl_fixpt31_32.h"
6
7 static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL };
8 static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL };
9 static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL };
10
abs_i64(long long arg)11 static inline unsigned long long abs_i64(
12 long long arg)
13 {
14 if (arg > 0)
15 return (unsigned long long)arg;
16 else
17 return (unsigned long long)(-arg);
18 }
19
20 /*
21 * @brief
22 * result = dividend / divisor
23 * *remainder = dividend % divisor
24 */
spl_complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)25 static inline unsigned long long spl_complete_integer_division_u64(
26 unsigned long long dividend,
27 unsigned long long divisor,
28 unsigned long long *remainder)
29 {
30 unsigned long long result;
31
32 SPL_ASSERT(divisor);
33
34 result = spl_div64_u64_rem(dividend, divisor, remainder);
35
36 return result;
37 }
38
39
40 #define FRACTIONAL_PART_MASK \
41 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
42
43 #define GET_INTEGER_PART(x) \
44 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
45
46 #define GET_FRACTIONAL_PART(x) \
47 (FRACTIONAL_PART_MASK & (x))
48
spl_fixpt_from_fraction(long long numerator,long long denominator)49 struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator)
50 {
51 struct spl_fixed31_32 res;
52
53 bool arg1_negative = numerator < 0;
54 bool arg2_negative = denominator < 0;
55
56 unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
57 unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
58
59 unsigned long long remainder;
60
61 /* determine integer part */
62
63 unsigned long long res_value = spl_complete_integer_division_u64(
64 arg1_value, arg2_value, &remainder);
65
66 SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX);
67
68 /* determine fractional part */
69 {
70 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
71
72 do {
73 remainder <<= 1;
74
75 res_value <<= 1;
76
77 if (remainder >= arg2_value) {
78 res_value |= 1;
79 remainder -= arg2_value;
80 }
81 } while (--i != 0);
82 }
83
84 /* round up LSB */
85 {
86 unsigned long long summand = (remainder << 1) >= arg2_value;
87
88 SPL_ASSERT(res_value <= (unsigned long long)LLONG_MAX - summand);
89
90 res_value += summand;
91 }
92
93 res.value = (long long)res_value;
94
95 if (arg1_negative ^ arg2_negative)
96 res.value = -res.value;
97
98 return res;
99 }
100
spl_fixpt_mul(struct spl_fixed31_32 arg1,struct spl_fixed31_32 arg2)101 struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
102 {
103 struct spl_fixed31_32 res;
104
105 bool arg1_negative = arg1.value < 0;
106 bool arg2_negative = arg2.value < 0;
107
108 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
109 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
110
111 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
112 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
113
114 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
115 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
116
117 unsigned long long tmp;
118
119 res.value = arg1_int * arg2_int;
120
121 SPL_ASSERT(res.value <= (long long)LONG_MAX);
122
123 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
124
125 tmp = arg1_int * arg2_fra;
126
127 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
128
129 res.value += tmp;
130
131 tmp = arg2_int * arg1_fra;
132
133 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
134
135 res.value += tmp;
136
137 tmp = arg1_fra * arg2_fra;
138
139 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
140 (tmp >= (unsigned long long)spl_fixpt_half.value);
141
142 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
143
144 res.value += tmp;
145
146 if (arg1_negative ^ arg2_negative)
147 res.value = -res.value;
148
149 return res;
150 }
151
spl_fixpt_sqr(struct spl_fixed31_32 arg)152 struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg)
153 {
154 struct spl_fixed31_32 res;
155
156 unsigned long long arg_value = abs_i64(arg.value);
157
158 unsigned long long arg_int = GET_INTEGER_PART(arg_value);
159
160 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
161
162 unsigned long long tmp;
163
164 res.value = arg_int * arg_int;
165
166 SPL_ASSERT(res.value <= (long long)LONG_MAX);
167
168 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
169
170 tmp = arg_int * arg_fra;
171
172 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
173
174 res.value += tmp;
175
176 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
177
178 res.value += tmp;
179
180 tmp = arg_fra * arg_fra;
181
182 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
183 (tmp >= (unsigned long long)spl_fixpt_half.value);
184
185 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
186
187 res.value += tmp;
188
189 return res;
190 }
191
spl_fixpt_recip(struct spl_fixed31_32 arg)192 struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
193 {
194 /*
195 * @note
196 * Good idea to use Newton's method
197 */
198
199 SPL_ASSERT(arg.value);
200
201 return spl_fixpt_from_fraction(
202 spl_fixpt_one.value,
203 arg.value);
204 }
205
spl_fixpt_sinc(struct spl_fixed31_32 arg)206 struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg)
207 {
208 struct spl_fixed31_32 square;
209
210 struct spl_fixed31_32 res = spl_fixpt_one;
211
212 int n = 27;
213
214 struct spl_fixed31_32 arg_norm = arg;
215
216 if (spl_fixpt_le(
217 spl_fixpt_two_pi,
218 spl_fixpt_abs(arg))) {
219 arg_norm = spl_fixpt_sub(
220 arg_norm,
221 spl_fixpt_mul_int(
222 spl_fixpt_two_pi,
223 (int)spl_div64_s64(
224 arg_norm.value,
225 spl_fixpt_two_pi.value)));
226 }
227
228 square = spl_fixpt_sqr(arg_norm);
229
230 do {
231 res = spl_fixpt_sub(
232 spl_fixpt_one,
233 spl_fixpt_div_int(
234 spl_fixpt_mul(
235 square,
236 res),
237 n * (n - 1)));
238
239 n -= 2;
240 } while (n > 2);
241
242 if (arg.value != arg_norm.value)
243 res = spl_fixpt_div(
244 spl_fixpt_mul(res, arg_norm),
245 arg);
246
247 return res;
248 }
249
spl_fixpt_sin(struct spl_fixed31_32 arg)250 struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg)
251 {
252 return spl_fixpt_mul(
253 arg,
254 spl_fixpt_sinc(arg));
255 }
256
spl_fixpt_cos(struct spl_fixed31_32 arg)257 struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg)
258 {
259 /* TODO implement argument normalization */
260
261 const struct spl_fixed31_32 square = spl_fixpt_sqr(arg);
262
263 struct spl_fixed31_32 res = spl_fixpt_one;
264
265 int n = 26;
266
267 do {
268 res = spl_fixpt_sub(
269 spl_fixpt_one,
270 spl_fixpt_div_int(
271 spl_fixpt_mul(
272 square,
273 res),
274 n * (n - 1)));
275
276 n -= 2;
277 } while (n != 0);
278
279 return res;
280 }
281
282 /*
283 * @brief
284 * result = exp(arg),
285 * where abs(arg) < 1
286 *
287 * Calculated as Taylor series.
288 */
spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)289 static struct spl_fixed31_32 spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)
290 {
291 unsigned int n = 9;
292
293 struct spl_fixed31_32 res = spl_fixpt_from_fraction(
294 n + 2,
295 n + 1);
296 /* TODO find correct res */
297
298 SPL_ASSERT(spl_fixpt_lt(arg, spl_fixpt_one));
299
300 do
301 res = spl_fixpt_add(
302 spl_fixpt_one,
303 spl_fixpt_div_int(
304 spl_fixpt_mul(
305 arg,
306 res),
307 n));
308 while (--n != 1);
309
310 return spl_fixpt_add(
311 spl_fixpt_one,
312 spl_fixpt_mul(
313 arg,
314 res));
315 }
316
spl_fixpt_exp(struct spl_fixed31_32 arg)317 struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg)
318 {
319 /*
320 * @brief
321 * Main equation is:
322 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
323 * where m = round(x / ln(2)), r = x - m * ln(2)
324 */
325
326 if (spl_fixpt_le(
327 spl_fixpt_ln2_div_2,
328 spl_fixpt_abs(arg))) {
329 int m = spl_fixpt_round(
330 spl_fixpt_div(
331 arg,
332 spl_fixpt_ln2));
333
334 struct spl_fixed31_32 r = spl_fixpt_sub(
335 arg,
336 spl_fixpt_mul_int(
337 spl_fixpt_ln2,
338 m));
339
340 SPL_ASSERT(m != 0);
341
342 SPL_ASSERT(spl_fixpt_lt(
343 spl_fixpt_abs(r),
344 spl_fixpt_one));
345
346 if (m > 0)
347 return spl_fixpt_shl(
348 spl_fixed31_32_exp_from_taylor_series(r),
349 (unsigned char)m);
350 else
351 return spl_fixpt_div_int(
352 spl_fixed31_32_exp_from_taylor_series(r),
353 1LL << -m);
354 } else if (arg.value != 0)
355 return spl_fixed31_32_exp_from_taylor_series(arg);
356 else
357 return spl_fixpt_one;
358 }
359
spl_fixpt_log(struct spl_fixed31_32 arg)360 struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg)
361 {
362 struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one);
363 /* TODO improve 1st estimation */
364
365 struct spl_fixed31_32 error;
366
367 SPL_ASSERT(arg.value > 0);
368 /* TODO if arg is negative, return NaN */
369 /* TODO if arg is zero, return -INF */
370
371 do {
372 struct spl_fixed31_32 res1 = spl_fixpt_add(
373 spl_fixpt_sub(
374 res,
375 spl_fixpt_one),
376 spl_fixpt_div(
377 arg,
378 spl_fixpt_exp(res)));
379
380 error = spl_fixpt_sub(
381 res,
382 res1);
383
384 res = res1;
385 /* TODO determine max_allowed_error based on quality of exp() */
386 } while (abs_i64(error.value) > 100ULL);
387
388 return res;
389 }
390
391
392 /* this function is a generic helper to translate fixed point value to
393 * specified integer format that will consist of integer_bits integer part and
394 * fractional_bits fractional part. For example it is used in
395 * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
396 * part in 32 bits. It is used in hw programming (scaler)
397 */
398
spl_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)399 static inline unsigned int spl_ux_dy(
400 long long value,
401 unsigned int integer_bits,
402 unsigned int fractional_bits)
403 {
404 /* 1. create mask of integer part */
405 unsigned int result = (1 << integer_bits) - 1;
406 /* 2. mask out fractional part */
407 unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
408 /* 3. shrink fixed point integer part to be of integer_bits width*/
409 result &= GET_INTEGER_PART(value);
410 /* 4. make space for fractional part to be filled in after integer */
411 result <<= fractional_bits;
412 /* 5. shrink fixed point fractional part to of fractional_bits width*/
413 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
414 /* 6. merge the result */
415 return result | fractional_part;
416 }
417
spl_clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)418 static inline unsigned int spl_clamp_ux_dy(
419 long long value,
420 unsigned int integer_bits,
421 unsigned int fractional_bits,
422 unsigned int min_clamp)
423 {
424 unsigned int truncated_val = spl_ux_dy(value, integer_bits, fractional_bits);
425
426 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
427 return (1 << (integer_bits + fractional_bits)) - 1;
428 else if (truncated_val > min_clamp)
429 return truncated_val;
430 else
431 return min_clamp;
432 }
433
spl_fixpt_u4d19(struct spl_fixed31_32 arg)434 unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg)
435 {
436 return spl_ux_dy(arg.value, 4, 19);
437 }
438
spl_fixpt_u3d19(struct spl_fixed31_32 arg)439 unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg)
440 {
441 return spl_ux_dy(arg.value, 3, 19);
442 }
443
spl_fixpt_u2d19(struct spl_fixed31_32 arg)444 unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg)
445 {
446 return spl_ux_dy(arg.value, 2, 19);
447 }
448
spl_fixpt_u0d19(struct spl_fixed31_32 arg)449 unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg)
450 {
451 return spl_ux_dy(arg.value, 0, 19);
452 }
453
spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)454 unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)
455 {
456 return spl_clamp_ux_dy(arg.value, 0, 14, 1);
457 }
458
spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)459 unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)
460 {
461 return spl_clamp_ux_dy(arg.value, 0, 10, 1);
462 }
463
spl_fixpt_s4d19(struct spl_fixed31_32 arg)464 int spl_fixpt_s4d19(struct spl_fixed31_32 arg)
465 {
466 if (arg.value < 0)
467 return -(int)spl_ux_dy(spl_fixpt_abs(arg).value, 4, 19);
468 else
469 return spl_ux_dy(arg.value, 4, 19);
470 }
471
spl_fixpt_from_ux_dy(unsigned int value,unsigned int integer_bits,unsigned int fractional_bits)472 struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value,
473 unsigned int integer_bits,
474 unsigned int fractional_bits)
475 {
476 struct spl_fixed31_32 fixpt_value = spl_fixpt_zero;
477 struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero;
478 long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
479
480 fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
481 frac_mask = frac_mask << fractional_bits;
482 fixpt_int_value.value = value & frac_mask;
483 fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
484 fixpt_value.value |= fixpt_int_value.value;
485 return fixpt_value;
486 }
487
spl_fixpt_from_int_dy(unsigned int int_value,unsigned int frac_value,unsigned int integer_bits,unsigned int fractional_bits)488 struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
489 unsigned int frac_value,
490 unsigned int integer_bits,
491 unsigned int fractional_bits)
492 {
493 struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value);
494
495 fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
496 return fixpt_value;
497 }
498