1 /* 2 * Copyright 2012-15 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dm_services.h" 27 #include "include/fixed31_32.h" 28 29 static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL }; 30 static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL }; 31 static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL }; 32 33 static inline unsigned long long abs_i64( 34 long long arg) 35 { 36 if (arg > 0) 37 return (unsigned long long)arg; 38 else 39 return (unsigned long long)(-arg); 40 } 41 42 /* 43 * @brief 44 * result = dividend / divisor 45 * *remainder = dividend % divisor 46 */ 47 static inline unsigned long long complete_integer_division_u64( 48 unsigned long long dividend, 49 unsigned long long divisor, 50 unsigned long long *remainder) 51 { 52 unsigned long long result; 53 54 ASSERT(divisor); 55 56 result = div64_u64_rem(dividend, divisor, remainder); 57 58 return result; 59 } 60 61 62 #define FRACTIONAL_PART_MASK \ 63 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) 64 65 #define GET_INTEGER_PART(x) \ 66 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) 67 68 #define GET_FRACTIONAL_PART(x) \ 69 (FRACTIONAL_PART_MASK & (x)) 70 71 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) 72 { 73 struct fixed31_32 res; 74 75 bool arg1_negative = numerator < 0; 76 bool arg2_negative = denominator < 0; 77 78 unsigned long long arg1_value = arg1_negative ? -numerator : numerator; 79 unsigned long long arg2_value = arg2_negative ? -denominator : denominator; 80 81 unsigned long long remainder; 82 83 /* determine integer part */ 84 85 unsigned long long res_value = complete_integer_division_u64( 86 arg1_value, arg2_value, &remainder); 87 88 ASSERT(res_value <= LONG_MAX); 89 90 /* determine fractional part */ 91 { 92 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; 93 94 do { 95 remainder <<= 1; 96 97 res_value <<= 1; 98 99 if (remainder >= arg2_value) { 100 res_value |= 1; 101 remainder -= arg2_value; 102 } 103 } while (--i != 0); 104 } 105 106 /* round up LSB */ 107 { 108 unsigned long long summand = (remainder << 1) >= arg2_value; 109 110 ASSERT(res_value <= LLONG_MAX - summand); 111 112 res_value += summand; 113 } 114 115 res.value = (long long)res_value; 116 117 if (arg1_negative ^ arg2_negative) 118 res.value = -res.value; 119 120 return res; 121 } 122 123 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) 124 { 125 struct fixed31_32 res; 126 127 bool arg1_negative = arg1.value < 0; 128 bool arg2_negative = arg2.value < 0; 129 130 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; 131 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; 132 133 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); 134 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); 135 136 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); 137 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); 138 139 unsigned long long tmp; 140 141 res.value = arg1_int * arg2_int; 142 143 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 144 145 tmp = arg1_int * arg2_fra; 146 147 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 148 149 res.value += tmp; 150 151 tmp = arg2_int * arg1_fra; 152 153 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 154 155 res.value += tmp; 156 157 tmp = arg1_fra * arg2_fra; 158 159 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 160 (tmp >= (unsigned long long)dc_fixpt_half.value); 161 162 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 163 164 res.value += tmp; 165 166 if (arg1_negative ^ arg2_negative) 167 res.value = -res.value; 168 169 return res; 170 } 171 172 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) 173 { 174 struct fixed31_32 res; 175 176 unsigned long long arg_value = abs_i64(arg.value); 177 178 unsigned long long arg_int = GET_INTEGER_PART(arg_value); 179 180 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); 181 182 unsigned long long tmp; 183 184 res.value = arg_int * arg_int; 185 186 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 187 188 tmp = arg_int * arg_fra; 189 190 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 191 192 res.value += tmp; 193 194 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 195 196 res.value += tmp; 197 198 tmp = arg_fra * arg_fra; 199 200 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 201 (tmp >= (unsigned long long)dc_fixpt_half.value); 202 203 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 204 205 res.value += tmp; 206 207 return res; 208 } 209 210 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) 211 { 212 /* 213 * @note 214 * Good idea to use Newton's method 215 */ 216 217 ASSERT(arg.value); 218 219 return dc_fixpt_from_fraction( 220 dc_fixpt_one.value, 221 arg.value); 222 } 223 224 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) 225 { 226 struct fixed31_32 square; 227 228 struct fixed31_32 res = dc_fixpt_one; 229 230 int n = 27; 231 232 struct fixed31_32 arg_norm = arg; 233 234 if (dc_fixpt_le( 235 dc_fixpt_two_pi, 236 dc_fixpt_abs(arg))) { 237 arg_norm = dc_fixpt_sub( 238 arg_norm, 239 dc_fixpt_mul_int( 240 dc_fixpt_two_pi, 241 (int)div64_s64( 242 arg_norm.value, 243 dc_fixpt_two_pi.value))); 244 } 245 246 square = dc_fixpt_sqr(arg_norm); 247 248 do { 249 res = dc_fixpt_sub( 250 dc_fixpt_one, 251 dc_fixpt_div_int( 252 dc_fixpt_mul( 253 square, 254 res), 255 n * (n - 1))); 256 257 n -= 2; 258 } while (n > 2); 259 260 if (arg.value != arg_norm.value) 261 res = dc_fixpt_div( 262 dc_fixpt_mul(res, arg_norm), 263 arg); 264 265 return res; 266 } 267 268 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) 269 { 270 return dc_fixpt_mul( 271 arg, 272 dc_fixpt_sinc(arg)); 273 } 274 275 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) 276 { 277 /* TODO implement argument normalization */ 278 279 const struct fixed31_32 square = dc_fixpt_sqr(arg); 280 281 struct fixed31_32 res = dc_fixpt_one; 282 283 int n = 26; 284 285 do { 286 res = dc_fixpt_sub( 287 dc_fixpt_one, 288 dc_fixpt_div_int( 289 dc_fixpt_mul( 290 square, 291 res), 292 n * (n - 1))); 293 294 n -= 2; 295 } while (n != 0); 296 297 return res; 298 } 299 300 /* 301 * @brief 302 * result = exp(arg), 303 * where abs(arg) < 1 304 * 305 * Calculated as Taylor series. 306 */ 307 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) 308 { 309 unsigned int n = 9; 310 311 struct fixed31_32 res = dc_fixpt_from_fraction( 312 n + 2, 313 n + 1); 314 /* TODO find correct res */ 315 316 ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); 317 318 do 319 res = dc_fixpt_add( 320 dc_fixpt_one, 321 dc_fixpt_div_int( 322 dc_fixpt_mul( 323 arg, 324 res), 325 n)); 326 while (--n != 1); 327 328 return dc_fixpt_add( 329 dc_fixpt_one, 330 dc_fixpt_mul( 331 arg, 332 res)); 333 } 334 335 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) 336 { 337 /* 338 * @brief 339 * Main equation is: 340 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 341 * where m = round(x / ln(2)), r = x - m * ln(2) 342 */ 343 344 if (dc_fixpt_le( 345 dc_fixpt_ln2_div_2, 346 dc_fixpt_abs(arg))) { 347 int m = dc_fixpt_round( 348 dc_fixpt_div( 349 arg, 350 dc_fixpt_ln2)); 351 352 struct fixed31_32 r = dc_fixpt_sub( 353 arg, 354 dc_fixpt_mul_int( 355 dc_fixpt_ln2, 356 m)); 357 358 ASSERT(m != 0); 359 360 ASSERT(dc_fixpt_lt( 361 dc_fixpt_abs(r), 362 dc_fixpt_one)); 363 364 if (m > 0) 365 return dc_fixpt_shl( 366 fixed31_32_exp_from_taylor_series(r), 367 (unsigned char)m); 368 else 369 return dc_fixpt_div_int( 370 fixed31_32_exp_from_taylor_series(r), 371 1LL << -m); 372 } else if (arg.value != 0) 373 return fixed31_32_exp_from_taylor_series(arg); 374 else 375 return dc_fixpt_one; 376 } 377 378 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) 379 { 380 struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one); 381 /* TODO improve 1st estimation */ 382 383 struct fixed31_32 error; 384 385 ASSERT(arg.value > 0); 386 /* TODO if arg is negative, return NaN */ 387 /* TODO if arg is zero, return -INF */ 388 389 do { 390 struct fixed31_32 res1 = dc_fixpt_add( 391 dc_fixpt_sub( 392 res, 393 dc_fixpt_one), 394 dc_fixpt_div( 395 arg, 396 dc_fixpt_exp(res))); 397 398 error = dc_fixpt_sub( 399 res, 400 res1); 401 402 res = res1; 403 /* TODO determine max_allowed_error based on quality of exp() */ 404 } while (abs_i64(error.value) > 100ULL); 405 406 return res; 407 } 408 409 410 /* this function is a generic helper to translate fixed point value to 411 * specified integer format that will consist of integer_bits integer part and 412 * fractional_bits fractional part. For example it is used in 413 * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional 414 * part in 32 bits. It is used in hw programming (scaler) 415 */ 416 417 static inline unsigned int ux_dy( 418 long long value, 419 unsigned int integer_bits, 420 unsigned int fractional_bits) 421 { 422 /* 1. create mask of integer part */ 423 unsigned int result = (1 << integer_bits) - 1; 424 /* 2. mask out fractional part */ 425 unsigned int fractional_part = FRACTIONAL_PART_MASK & value; 426 /* 3. shrink fixed point integer part to be of integer_bits width*/ 427 result &= GET_INTEGER_PART(value); 428 /* 4. make space for fractional part to be filled in after integer */ 429 result <<= fractional_bits; 430 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 431 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; 432 /* 6. merge the result */ 433 return result | fractional_part; 434 } 435 436 static inline unsigned int clamp_ux_dy( 437 long long value, 438 unsigned int integer_bits, 439 unsigned int fractional_bits, 440 unsigned int min_clamp) 441 { 442 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); 443 444 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) 445 return (1 << (integer_bits + fractional_bits)) - 1; 446 else if (truncated_val > min_clamp) 447 return truncated_val; 448 else 449 return min_clamp; 450 } 451 452 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) 453 { 454 return ux_dy(arg.value, 4, 19); 455 } 456 457 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) 458 { 459 return ux_dy(arg.value, 3, 19); 460 } 461 462 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) 463 { 464 return ux_dy(arg.value, 2, 19); 465 } 466 467 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) 468 { 469 return ux_dy(arg.value, 0, 19); 470 } 471 472 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) 473 { 474 return clamp_ux_dy(arg.value, 0, 14, 1); 475 } 476 477 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) 478 { 479 return clamp_ux_dy(arg.value, 0, 10, 1); 480 } 481 482 int dc_fixpt_s4d19(struct fixed31_32 arg) 483 { 484 if (arg.value < 0) 485 return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19); 486 else 487 return ux_dy(arg.value, 4, 19); 488 } 489 490 struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, 491 unsigned int integer_bits, 492 unsigned int fractional_bits) 493 { 494 struct fixed31_32 fixpt_value = dc_fixpt_zero; 495 struct fixed31_32 fixpt_int_value = dc_fixpt_zero; 496 long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; 497 498 fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); 499 frac_mask = frac_mask << fractional_bits; 500 fixpt_int_value.value = value & frac_mask; 501 fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); 502 fixpt_value.value |= fixpt_int_value.value; 503 return fixpt_value; 504 } 505 506 struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, 507 unsigned int frac_value, 508 unsigned int integer_bits, 509 unsigned int fractional_bits) 510 { 511 struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value); 512 513 fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); 514 return fixpt_value; 515 } 516