1 /* $NetBSD: amdgpu_fixpt31_32.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $ */ 2 3 /* 4 * Copyright 2012-15 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: AMD 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: amdgpu_fixpt31_32.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $"); 30 31 #include "dm_services.h" 32 #include "include/fixed31_32.h" 33 34 static inline unsigned long long abs_i64( 35 long long arg) 36 { 37 if (arg > 0) 38 return (unsigned long long)arg; 39 else 40 return (unsigned long long)(-arg); 41 } 42 43 /* 44 * @brief 45 * result = dividend / divisor 46 * *remainder = dividend % divisor 47 */ 48 static inline unsigned long long complete_integer_division_u64( 49 unsigned long long dividend, 50 unsigned long long divisor, 51 unsigned long long *remainder) 52 { 53 unsigned long long result; 54 uint64_t r64; 55 56 ASSERT(divisor); 57 58 result = div64_u64_rem(dividend, divisor, &r64); 59 *remainder = r64; 60 61 return result; 62 } 63 64 65 #define FRACTIONAL_PART_MASK \ 66 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) 67 68 #define GET_INTEGER_PART(x) \ 69 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) 70 71 #define GET_FRACTIONAL_PART(x) \ 72 (FRACTIONAL_PART_MASK & (x)) 73 74 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) 75 { 76 struct fixed31_32 res; 77 78 bool arg1_negative = numerator < 0; 79 bool arg2_negative = denominator < 0; 80 81 unsigned long long arg1_value = arg1_negative ? -numerator : numerator; 82 unsigned long long arg2_value = arg2_negative ? -denominator : denominator; 83 84 unsigned long long remainder; 85 86 /* determine integer part */ 87 88 unsigned long long res_value = complete_integer_division_u64( 89 arg1_value, arg2_value, &remainder); 90 91 ASSERT(res_value <= LONG_MAX); 92 93 /* determine fractional part */ 94 { 95 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; 96 97 do { 98 remainder <<= 1; 99 100 res_value <<= 1; 101 102 if (remainder >= arg2_value) { 103 res_value |= 1; 104 remainder -= arg2_value; 105 } 106 } while (--i != 0); 107 } 108 109 /* round up LSB */ 110 { 111 unsigned long long summand = (remainder << 1) >= arg2_value; 112 113 ASSERT(res_value <= LLONG_MAX - summand); 114 115 res_value += summand; 116 } 117 118 res.value = (long long)res_value; 119 120 if (arg1_negative ^ arg2_negative) 121 res.value = -res.value; 122 123 return res; 124 } 125 126 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) 127 { 128 struct fixed31_32 res; 129 130 bool arg1_negative = arg1.value < 0; 131 bool arg2_negative = arg2.value < 0; 132 133 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; 134 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; 135 136 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); 137 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); 138 139 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); 140 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); 141 142 unsigned long long tmp; 143 144 res.value = arg1_int * arg2_int; 145 146 ASSERT(res.value <= LONG_MAX); 147 148 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 149 150 tmp = arg1_int * arg2_fra; 151 152 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 153 154 res.value += tmp; 155 156 tmp = arg2_int * arg1_fra; 157 158 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 159 160 res.value += tmp; 161 162 tmp = arg1_fra * arg2_fra; 163 164 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 165 (tmp >= (unsigned long long)dc_fixpt_half.value); 166 167 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 168 169 res.value += tmp; 170 171 if (arg1_negative ^ arg2_negative) 172 res.value = -res.value; 173 174 return res; 175 } 176 177 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) 178 { 179 struct fixed31_32 res; 180 181 unsigned long long arg_value = abs_i64(arg.value); 182 183 unsigned long long arg_int = GET_INTEGER_PART(arg_value); 184 185 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); 186 187 unsigned long long tmp; 188 189 res.value = arg_int * arg_int; 190 191 ASSERT(res.value <= LONG_MAX); 192 193 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 194 195 tmp = arg_int * arg_fra; 196 197 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 198 199 res.value += tmp; 200 201 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 202 203 res.value += tmp; 204 205 tmp = arg_fra * arg_fra; 206 207 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 208 (tmp >= (unsigned long long)dc_fixpt_half.value); 209 210 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 211 212 res.value += tmp; 213 214 return res; 215 } 216 217 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) 218 { 219 /* 220 * @note 221 * Good idea to use Newton's method 222 */ 223 224 ASSERT(arg.value); 225 226 return dc_fixpt_from_fraction( 227 dc_fixpt_one.value, 228 arg.value); 229 } 230 231 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) 232 { 233 struct fixed31_32 square; 234 235 struct fixed31_32 res = dc_fixpt_one; 236 237 int n = 27; 238 239 struct fixed31_32 arg_norm = arg; 240 241 if (dc_fixpt_le( 242 dc_fixpt_two_pi, 243 dc_fixpt_abs(arg))) { 244 arg_norm = dc_fixpt_sub( 245 arg_norm, 246 dc_fixpt_mul_int( 247 dc_fixpt_two_pi, 248 (int)div64_s64( 249 arg_norm.value, 250 dc_fixpt_two_pi.value))); 251 } 252 253 square = dc_fixpt_sqr(arg_norm); 254 255 do { 256 res = dc_fixpt_sub( 257 dc_fixpt_one, 258 dc_fixpt_div_int( 259 dc_fixpt_mul( 260 square, 261 res), 262 n * (n - 1))); 263 264 n -= 2; 265 } while (n > 2); 266 267 if (arg.value != arg_norm.value) 268 res = dc_fixpt_div( 269 dc_fixpt_mul(res, arg_norm), 270 arg); 271 272 return res; 273 } 274 275 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) 276 { 277 return dc_fixpt_mul( 278 arg, 279 dc_fixpt_sinc(arg)); 280 } 281 282 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) 283 { 284 /* TODO implement argument normalization */ 285 286 const struct fixed31_32 square = dc_fixpt_sqr(arg); 287 288 struct fixed31_32 res = dc_fixpt_one; 289 290 int n = 26; 291 292 do { 293 res = dc_fixpt_sub( 294 dc_fixpt_one, 295 dc_fixpt_div_int( 296 dc_fixpt_mul( 297 square, 298 res), 299 n * (n - 1))); 300 301 n -= 2; 302 } while (n != 0); 303 304 return res; 305 } 306 307 /* 308 * @brief 309 * result = exp(arg), 310 * where abs(arg) < 1 311 * 312 * Calculated as Taylor series. 313 */ 314 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) 315 { 316 unsigned int n = 9; 317 318 struct fixed31_32 res = dc_fixpt_from_fraction( 319 n + 2, 320 n + 1); 321 /* TODO find correct res */ 322 323 ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); 324 325 do 326 res = dc_fixpt_add( 327 dc_fixpt_one, 328 dc_fixpt_div_int( 329 dc_fixpt_mul( 330 arg, 331 res), 332 n)); 333 while (--n != 1); 334 335 return dc_fixpt_add( 336 dc_fixpt_one, 337 dc_fixpt_mul( 338 arg, 339 res)); 340 } 341 342 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) 343 { 344 /* 345 * @brief 346 * Main equation is: 347 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 348 * where m = round(x / ln(2)), r = x - m * ln(2) 349 */ 350 351 if (dc_fixpt_le( 352 dc_fixpt_ln2_div_2, 353 dc_fixpt_abs(arg))) { 354 int m = dc_fixpt_round( 355 dc_fixpt_div( 356 arg, 357 dc_fixpt_ln2)); 358 359 struct fixed31_32 r = dc_fixpt_sub( 360 arg, 361 dc_fixpt_mul_int( 362 dc_fixpt_ln2, 363 m)); 364 365 ASSERT(m != 0); 366 367 ASSERT(dc_fixpt_lt( 368 dc_fixpt_abs(r), 369 dc_fixpt_one)); 370 371 if (m > 0) 372 return dc_fixpt_shl( 373 fixed31_32_exp_from_taylor_series(r), 374 (unsigned char)m); 375 else 376 return dc_fixpt_div_int( 377 fixed31_32_exp_from_taylor_series(r), 378 1LL << -m); 379 } else if (arg.value != 0) 380 return fixed31_32_exp_from_taylor_series(arg); 381 else 382 return dc_fixpt_one; 383 } 384 385 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) 386 { 387 struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one); 388 /* TODO improve 1st estimation */ 389 390 struct fixed31_32 error; 391 392 ASSERT(arg.value > 0); 393 /* TODO if arg is negative, return NaN */ 394 /* TODO if arg is zero, return -INF */ 395 396 do { 397 struct fixed31_32 res1 = dc_fixpt_add( 398 dc_fixpt_sub( 399 res, 400 dc_fixpt_one), 401 dc_fixpt_div( 402 arg, 403 dc_fixpt_exp(res))); 404 405 error = dc_fixpt_sub( 406 res, 407 res1); 408 409 res = res1; 410 /* TODO determine max_allowed_error based on quality of exp() */ 411 } while (abs_i64(error.value) > 100ULL); 412 413 return res; 414 } 415 416 417 /* this function is a generic helper to translate fixed point value to 418 * specified integer format that will consist of integer_bits integer part and 419 * fractional_bits fractional part. For example it is used in 420 * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional 421 * part in 32 bits. It is used in hw programming (scaler) 422 */ 423 424 static inline unsigned int ux_dy( 425 long long value, 426 unsigned int integer_bits, 427 unsigned int fractional_bits) 428 { 429 /* 1. create mask of integer part */ 430 unsigned int result = (1 << integer_bits) - 1; 431 /* 2. mask out fractional part */ 432 unsigned int fractional_part = FRACTIONAL_PART_MASK & value; 433 /* 3. shrink fixed point integer part to be of integer_bits width*/ 434 result &= GET_INTEGER_PART(value); 435 /* 4. make space for fractional part to be filled in after integer */ 436 result <<= fractional_bits; 437 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 438 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; 439 /* 6. merge the result */ 440 return result | fractional_part; 441 } 442 443 static inline unsigned int clamp_ux_dy( 444 long long value, 445 unsigned int integer_bits, 446 unsigned int fractional_bits, 447 unsigned int min_clamp) 448 { 449 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); 450 451 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) 452 return (1 << (integer_bits + fractional_bits)) - 1; 453 else if (truncated_val > min_clamp) 454 return truncated_val; 455 else 456 return min_clamp; 457 } 458 459 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) 460 { 461 return ux_dy(arg.value, 4, 19); 462 } 463 464 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) 465 { 466 return ux_dy(arg.value, 3, 19); 467 } 468 469 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) 470 { 471 return ux_dy(arg.value, 2, 19); 472 } 473 474 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) 475 { 476 return ux_dy(arg.value, 0, 19); 477 } 478 479 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) 480 { 481 return clamp_ux_dy(arg.value, 0, 14, 1); 482 } 483 484 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) 485 { 486 return clamp_ux_dy(arg.value, 0, 10, 1); 487 } 488 489 int dc_fixpt_s4d19(struct fixed31_32 arg) 490 { 491 if (arg.value < 0) 492 return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19); 493 else 494 return ux_dy(arg.value, 4, 19); 495 } 496