1 /* 2 * Copyright 2012-15 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dm_services.h" 27 #include "include/fixed31_32.h" 28 29 static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL }; 30 static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL }; 31 static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL }; 32 33 static inline unsigned long long abs_i64( 34 long long arg) 35 { 36 if (arg > 0) 37 return (unsigned long long)arg; 38 else 39 return (unsigned long long)(-arg); 40 } 41 42 /* 43 * @brief 44 * result = dividend / divisor 45 * *remainder = dividend % divisor 46 */ 47 static inline unsigned long long complete_integer_division_u64( 48 unsigned long long dividend, 49 unsigned long long divisor, 50 unsigned long long *remainder) 51 { 52 unsigned long long result; 53 54 result = div64_u64_rem(dividend, divisor, remainder); 55 56 return result; 57 } 58 59 60 #define FRACTIONAL_PART_MASK \ 61 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) 62 63 #define GET_INTEGER_PART(x) \ 64 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) 65 66 #define GET_FRACTIONAL_PART(x) \ 67 (FRACTIONAL_PART_MASK & (x)) 68 69 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) 70 { 71 struct fixed31_32 res; 72 73 bool arg1_negative = numerator < 0; 74 bool arg2_negative = denominator < 0; 75 76 unsigned long long arg1_value = arg1_negative ? -numerator : numerator; 77 unsigned long long arg2_value = arg2_negative ? -denominator : denominator; 78 79 unsigned long long remainder; 80 81 /* determine integer part */ 82 83 unsigned long long res_value = complete_integer_division_u64( 84 arg1_value, arg2_value, &remainder); 85 86 ASSERT(res_value <= LONG_MAX); 87 88 /* determine fractional part */ 89 { 90 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; 91 92 do { 93 remainder <<= 1; 94 95 res_value <<= 1; 96 97 if (remainder >= arg2_value) { 98 res_value |= 1; 99 remainder -= arg2_value; 100 } 101 } while (--i != 0); 102 } 103 104 /* round up LSB */ 105 { 106 unsigned long long summand = (remainder << 1) >= arg2_value; 107 108 ASSERT(res_value <= LLONG_MAX - summand); 109 110 res_value += summand; 111 } 112 113 res.value = (long long)res_value; 114 115 if (arg1_negative ^ arg2_negative) 116 res.value = -res.value; 117 118 return res; 119 } 120 121 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) 122 { 123 struct fixed31_32 res; 124 125 bool arg1_negative = arg1.value < 0; 126 bool arg2_negative = arg2.value < 0; 127 128 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; 129 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; 130 131 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); 132 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); 133 134 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); 135 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); 136 137 unsigned long long tmp; 138 139 res.value = arg1_int * arg2_int; 140 141 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 142 143 tmp = arg1_int * arg2_fra; 144 145 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 146 147 res.value += tmp; 148 149 tmp = arg2_int * arg1_fra; 150 151 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 152 153 res.value += tmp; 154 155 tmp = arg1_fra * arg2_fra; 156 157 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 158 (tmp >= (unsigned long long)dc_fixpt_half.value); 159 160 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 161 162 res.value += tmp; 163 164 if (arg1_negative ^ arg2_negative) 165 res.value = -res.value; 166 167 return res; 168 } 169 170 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) 171 { 172 struct fixed31_32 res; 173 174 unsigned long long arg_value = abs_i64(arg.value); 175 176 unsigned long long arg_int = GET_INTEGER_PART(arg_value); 177 178 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); 179 180 unsigned long long tmp; 181 182 res.value = arg_int * arg_int; 183 184 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 185 186 tmp = arg_int * arg_fra; 187 188 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 189 190 res.value += tmp; 191 192 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 193 194 res.value += tmp; 195 196 tmp = arg_fra * arg_fra; 197 198 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 199 (tmp >= (unsigned long long)dc_fixpt_half.value); 200 201 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 202 203 res.value += tmp; 204 205 return res; 206 } 207 208 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) 209 { 210 /* 211 * @note 212 * Good idea to use Newton's method 213 */ 214 return dc_fixpt_from_fraction( 215 dc_fixpt_one.value, 216 arg.value); 217 } 218 219 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) 220 { 221 struct fixed31_32 square; 222 223 struct fixed31_32 res = dc_fixpt_one; 224 225 int n = 27; 226 227 struct fixed31_32 arg_norm = arg; 228 229 if (dc_fixpt_le( 230 dc_fixpt_two_pi, 231 dc_fixpt_abs(arg))) { 232 arg_norm = dc_fixpt_sub( 233 arg_norm, 234 dc_fixpt_mul_int( 235 dc_fixpt_two_pi, 236 (int)div64_s64( 237 arg_norm.value, 238 dc_fixpt_two_pi.value))); 239 } 240 241 square = dc_fixpt_sqr(arg_norm); 242 243 do { 244 res = dc_fixpt_sub( 245 dc_fixpt_one, 246 dc_fixpt_div_int( 247 dc_fixpt_mul( 248 square, 249 res), 250 n * (n - 1))); 251 252 n -= 2; 253 } while (n > 2); 254 255 if (arg.value != arg_norm.value) 256 res = dc_fixpt_div( 257 dc_fixpt_mul(res, arg_norm), 258 arg); 259 260 return res; 261 } 262 263 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) 264 { 265 return dc_fixpt_mul( 266 arg, 267 dc_fixpt_sinc(arg)); 268 } 269 270 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) 271 { 272 /* TODO implement argument normalization */ 273 274 const struct fixed31_32 square = dc_fixpt_sqr(arg); 275 276 struct fixed31_32 res = dc_fixpt_one; 277 278 int n = 26; 279 280 do { 281 res = dc_fixpt_sub( 282 dc_fixpt_one, 283 dc_fixpt_div_int( 284 dc_fixpt_mul( 285 square, 286 res), 287 n * (n - 1))); 288 289 n -= 2; 290 } while (n != 0); 291 292 return res; 293 } 294 295 /* 296 * @brief 297 * result = exp(arg), 298 * where abs(arg) < 1 299 * 300 * Calculated as Taylor series. 301 */ 302 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) 303 { 304 unsigned int n = 9; 305 306 struct fixed31_32 res = dc_fixpt_from_fraction( 307 n + 2, 308 n + 1); 309 /* TODO find correct res */ 310 311 ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); 312 313 do 314 res = dc_fixpt_add( 315 dc_fixpt_one, 316 dc_fixpt_div_int( 317 dc_fixpt_mul( 318 arg, 319 res), 320 n)); 321 while (--n != 1); 322 323 return dc_fixpt_add( 324 dc_fixpt_one, 325 dc_fixpt_mul( 326 arg, 327 res)); 328 } 329 330 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) 331 { 332 /* 333 * @brief 334 * Main equation is: 335 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 336 * where m = round(x / ln(2)), r = x - m * ln(2) 337 */ 338 339 if (dc_fixpt_le( 340 dc_fixpt_ln2_div_2, 341 dc_fixpt_abs(arg))) { 342 int m = dc_fixpt_round( 343 dc_fixpt_div( 344 arg, 345 dc_fixpt_ln2)); 346 347 struct fixed31_32 r = dc_fixpt_sub( 348 arg, 349 dc_fixpt_mul_int( 350 dc_fixpt_ln2, 351 m)); 352 353 ASSERT(m != 0); 354 355 ASSERT(dc_fixpt_lt( 356 dc_fixpt_abs(r), 357 dc_fixpt_one)); 358 359 if (m > 0) 360 return dc_fixpt_shl( 361 fixed31_32_exp_from_taylor_series(r), 362 (unsigned char)m); 363 else 364 return dc_fixpt_div_int( 365 fixed31_32_exp_from_taylor_series(r), 366 1LL << -m); 367 } else if (arg.value != 0) 368 return fixed31_32_exp_from_taylor_series(arg); 369 else 370 return dc_fixpt_one; 371 } 372 373 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) 374 { 375 struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one); 376 /* TODO improve 1st estimation */ 377 378 struct fixed31_32 error; 379 380 ASSERT(arg.value > 0); 381 /* TODO if arg is negative, return NaN */ 382 /* TODO if arg is zero, return -INF */ 383 384 do { 385 struct fixed31_32 res1 = dc_fixpt_add( 386 dc_fixpt_sub( 387 res, 388 dc_fixpt_one), 389 dc_fixpt_div( 390 arg, 391 dc_fixpt_exp(res))); 392 393 error = dc_fixpt_sub( 394 res, 395 res1); 396 397 res = res1; 398 /* TODO determine max_allowed_error based on quality of exp() */ 399 } while (abs_i64(error.value) > 100ULL); 400 401 return res; 402 } 403 404 405 /* this function is a generic helper to translate fixed point value to 406 * specified integer format that will consist of integer_bits integer part and 407 * fractional_bits fractional part. For example it is used in 408 * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional 409 * part in 32 bits. It is used in hw programming (scaler) 410 */ 411 412 static inline unsigned int ux_dy( 413 long long value, 414 unsigned int integer_bits, 415 unsigned int fractional_bits) 416 { 417 /* 1. create mask of integer part */ 418 unsigned int result = (1 << integer_bits) - 1; 419 /* 2. mask out fractional part */ 420 unsigned int fractional_part = FRACTIONAL_PART_MASK & value; 421 /* 3. shrink fixed point integer part to be of integer_bits width*/ 422 result &= GET_INTEGER_PART(value); 423 /* 4. make space for fractional part to be filled in after integer */ 424 result <<= fractional_bits; 425 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 426 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; 427 /* 6. merge the result */ 428 return result | fractional_part; 429 } 430 431 static inline unsigned int clamp_ux_dy( 432 long long value, 433 unsigned int integer_bits, 434 unsigned int fractional_bits, 435 unsigned int min_clamp) 436 { 437 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); 438 439 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) 440 return (1 << (integer_bits + fractional_bits)) - 1; 441 else if (truncated_val > min_clamp) 442 return truncated_val; 443 else 444 return min_clamp; 445 } 446 447 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) 448 { 449 return ux_dy(arg.value, 4, 19); 450 } 451 452 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) 453 { 454 return ux_dy(arg.value, 3, 19); 455 } 456 457 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) 458 { 459 return ux_dy(arg.value, 2, 19); 460 } 461 462 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) 463 { 464 return ux_dy(arg.value, 0, 19); 465 } 466 467 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) 468 { 469 return clamp_ux_dy(arg.value, 0, 14, 1); 470 } 471 472 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) 473 { 474 return clamp_ux_dy(arg.value, 0, 10, 1); 475 } 476 477 int dc_fixpt_s4d19(struct fixed31_32 arg) 478 { 479 if (arg.value < 0) 480 return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19); 481 else 482 return ux_dy(arg.value, 4, 19); 483 } 484 485 struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, 486 unsigned int integer_bits, 487 unsigned int fractional_bits) 488 { 489 struct fixed31_32 fixpt_value = dc_fixpt_zero; 490 struct fixed31_32 fixpt_int_value = dc_fixpt_zero; 491 long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; 492 493 fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); 494 frac_mask = frac_mask << fractional_bits; 495 fixpt_int_value.value = value & frac_mask; 496 fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); 497 fixpt_value.value |= fixpt_int_value.value; 498 return fixpt_value; 499 } 500 501 struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, 502 unsigned int frac_value, 503 unsigned int integer_bits, 504 unsigned int fractional_bits) 505 { 506 struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value); 507 508 fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); 509 return fixpt_value; 510 } 511