Home | History | Annotate | Line # | Download | only in basics
      1 /*	$NetBSD: amdgpu_fixpt31_32.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2012-15 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors: AMD
     25  *
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: amdgpu_fixpt31_32.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $");
     30 
     31 #include "dm_services.h"
     32 #include "include/fixed31_32.h"
     33 
     34 static inline unsigned long long abs_i64(
     35 	long long arg)
     36 {
     37 	if (arg > 0)
     38 		return (unsigned long long)arg;
     39 	else
     40 		return (unsigned long long)(-arg);
     41 }
     42 
     43 /*
     44  * @brief
     45  * result = dividend / divisor
     46  * *remainder = dividend % divisor
     47  */
     48 static inline unsigned long long complete_integer_division_u64(
     49 	unsigned long long dividend,
     50 	unsigned long long divisor,
     51 	unsigned long long *remainder)
     52 {
     53 	unsigned long long result;
     54 	uint64_t r64;
     55 
     56 	ASSERT(divisor);
     57 
     58 	result = div64_u64_rem(dividend, divisor, &r64);
     59 	*remainder = r64;
     60 
     61 	return result;
     62 }
     63 
     64 
     65 #define FRACTIONAL_PART_MASK \
     66 	((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
     67 
     68 #define GET_INTEGER_PART(x) \
     69 	((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
     70 
     71 #define GET_FRACTIONAL_PART(x) \
     72 	(FRACTIONAL_PART_MASK & (x))
     73 
     74 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator)
     75 {
     76 	struct fixed31_32 res;
     77 
     78 	bool arg1_negative = numerator < 0;
     79 	bool arg2_negative = denominator < 0;
     80 
     81 	unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
     82 	unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
     83 
     84 	unsigned long long remainder;
     85 
     86 	/* determine integer part */
     87 
     88 	unsigned long long res_value = complete_integer_division_u64(
     89 		arg1_value, arg2_value, &remainder);
     90 
     91 	ASSERT(res_value <= LONG_MAX);
     92 
     93 	/* determine fractional part */
     94 	{
     95 		unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
     96 
     97 		do {
     98 			remainder <<= 1;
     99 
    100 			res_value <<= 1;
    101 
    102 			if (remainder >= arg2_value) {
    103 				res_value |= 1;
    104 				remainder -= arg2_value;
    105 			}
    106 		} while (--i != 0);
    107 	}
    108 
    109 	/* round up LSB */
    110 	{
    111 		unsigned long long summand = (remainder << 1) >= arg2_value;
    112 
    113 		ASSERT(res_value <= LLONG_MAX - summand);
    114 
    115 		res_value += summand;
    116 	}
    117 
    118 	res.value = (long long)res_value;
    119 
    120 	if (arg1_negative ^ arg2_negative)
    121 		res.value = -res.value;
    122 
    123 	return res;
    124 }
    125 
    126 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
    127 {
    128 	struct fixed31_32 res;
    129 
    130 	bool arg1_negative = arg1.value < 0;
    131 	bool arg2_negative = arg2.value < 0;
    132 
    133 	unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
    134 	unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
    135 
    136 	unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
    137 	unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
    138 
    139 	unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
    140 	unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
    141 
    142 	unsigned long long tmp;
    143 
    144 	res.value = arg1_int * arg2_int;
    145 
    146 	ASSERT(res.value <= LONG_MAX);
    147 
    148 	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
    149 
    150 	tmp = arg1_int * arg2_fra;
    151 
    152 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
    153 
    154 	res.value += tmp;
    155 
    156 	tmp = arg2_int * arg1_fra;
    157 
    158 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
    159 
    160 	res.value += tmp;
    161 
    162 	tmp = arg1_fra * arg2_fra;
    163 
    164 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
    165 		(tmp >= (unsigned long long)dc_fixpt_half.value);
    166 
    167 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
    168 
    169 	res.value += tmp;
    170 
    171 	if (arg1_negative ^ arg2_negative)
    172 		res.value = -res.value;
    173 
    174 	return res;
    175 }
    176 
    177 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg)
    178 {
    179 	struct fixed31_32 res;
    180 
    181 	unsigned long long arg_value = abs_i64(arg.value);
    182 
    183 	unsigned long long arg_int = GET_INTEGER_PART(arg_value);
    184 
    185 	unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
    186 
    187 	unsigned long long tmp;
    188 
    189 	res.value = arg_int * arg_int;
    190 
    191 	ASSERT(res.value <= LONG_MAX);
    192 
    193 	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
    194 
    195 	tmp = arg_int * arg_fra;
    196 
    197 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
    198 
    199 	res.value += tmp;
    200 
    201 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
    202 
    203 	res.value += tmp;
    204 
    205 	tmp = arg_fra * arg_fra;
    206 
    207 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
    208 		(tmp >= (unsigned long long)dc_fixpt_half.value);
    209 
    210 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
    211 
    212 	res.value += tmp;
    213 
    214 	return res;
    215 }
    216 
    217 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg)
    218 {
    219 	/*
    220 	 * @note
    221 	 * Good idea to use Newton's method
    222 	 */
    223 
    224 	ASSERT(arg.value);
    225 
    226 	return dc_fixpt_from_fraction(
    227 		dc_fixpt_one.value,
    228 		arg.value);
    229 }
    230 
    231 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg)
    232 {
    233 	struct fixed31_32 square;
    234 
    235 	struct fixed31_32 res = dc_fixpt_one;
    236 
    237 	int n = 27;
    238 
    239 	struct fixed31_32 arg_norm = arg;
    240 
    241 	if (dc_fixpt_le(
    242 		dc_fixpt_two_pi,
    243 		dc_fixpt_abs(arg))) {
    244 		arg_norm = dc_fixpt_sub(
    245 			arg_norm,
    246 			dc_fixpt_mul_int(
    247 				dc_fixpt_two_pi,
    248 				(int)div64_s64(
    249 					arg_norm.value,
    250 					dc_fixpt_two_pi.value)));
    251 	}
    252 
    253 	square = dc_fixpt_sqr(arg_norm);
    254 
    255 	do {
    256 		res = dc_fixpt_sub(
    257 			dc_fixpt_one,
    258 			dc_fixpt_div_int(
    259 				dc_fixpt_mul(
    260 					square,
    261 					res),
    262 				n * (n - 1)));
    263 
    264 		n -= 2;
    265 	} while (n > 2);
    266 
    267 	if (arg.value != arg_norm.value)
    268 		res = dc_fixpt_div(
    269 			dc_fixpt_mul(res, arg_norm),
    270 			arg);
    271 
    272 	return res;
    273 }
    274 
    275 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg)
    276 {
    277 	return dc_fixpt_mul(
    278 		arg,
    279 		dc_fixpt_sinc(arg));
    280 }
    281 
    282 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg)
    283 {
    284 	/* TODO implement argument normalization */
    285 
    286 	const struct fixed31_32 square = dc_fixpt_sqr(arg);
    287 
    288 	struct fixed31_32 res = dc_fixpt_one;
    289 
    290 	int n = 26;
    291 
    292 	do {
    293 		res = dc_fixpt_sub(
    294 			dc_fixpt_one,
    295 			dc_fixpt_div_int(
    296 				dc_fixpt_mul(
    297 					square,
    298 					res),
    299 				n * (n - 1)));
    300 
    301 		n -= 2;
    302 	} while (n != 0);
    303 
    304 	return res;
    305 }
    306 
    307 /*
    308  * @brief
    309  * result = exp(arg),
    310  * where abs(arg) < 1
    311  *
    312  * Calculated as Taylor series.
    313  */
    314 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
    315 {
    316 	unsigned int n = 9;
    317 
    318 	struct fixed31_32 res = dc_fixpt_from_fraction(
    319 		n + 2,
    320 		n + 1);
    321 	/* TODO find correct res */
    322 
    323 	ASSERT(dc_fixpt_lt(arg, dc_fixpt_one));
    324 
    325 	do
    326 		res = dc_fixpt_add(
    327 			dc_fixpt_one,
    328 			dc_fixpt_div_int(
    329 				dc_fixpt_mul(
    330 					arg,
    331 					res),
    332 				n));
    333 	while (--n != 1);
    334 
    335 	return dc_fixpt_add(
    336 		dc_fixpt_one,
    337 		dc_fixpt_mul(
    338 			arg,
    339 			res));
    340 }
    341 
    342 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg)
    343 {
    344 	/*
    345 	 * @brief
    346 	 * Main equation is:
    347 	 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
    348 	 * where m = round(x / ln(2)), r = x - m * ln(2)
    349 	 */
    350 
    351 	if (dc_fixpt_le(
    352 		dc_fixpt_ln2_div_2,
    353 		dc_fixpt_abs(arg))) {
    354 		int m = dc_fixpt_round(
    355 			dc_fixpt_div(
    356 				arg,
    357 				dc_fixpt_ln2));
    358 
    359 		struct fixed31_32 r = dc_fixpt_sub(
    360 			arg,
    361 			dc_fixpt_mul_int(
    362 				dc_fixpt_ln2,
    363 				m));
    364 
    365 		ASSERT(m != 0);
    366 
    367 		ASSERT(dc_fixpt_lt(
    368 			dc_fixpt_abs(r),
    369 			dc_fixpt_one));
    370 
    371 		if (m > 0)
    372 			return dc_fixpt_shl(
    373 				fixed31_32_exp_from_taylor_series(r),
    374 				(unsigned char)m);
    375 		else
    376 			return dc_fixpt_div_int(
    377 				fixed31_32_exp_from_taylor_series(r),
    378 				1LL << -m);
    379 	} else if (arg.value != 0)
    380 		return fixed31_32_exp_from_taylor_series(arg);
    381 	else
    382 		return dc_fixpt_one;
    383 }
    384 
    385 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg)
    386 {
    387 	struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one);
    388 	/* TODO improve 1st estimation */
    389 
    390 	struct fixed31_32 error;
    391 
    392 	ASSERT(arg.value > 0);
    393 	/* TODO if arg is negative, return NaN */
    394 	/* TODO if arg is zero, return -INF */
    395 
    396 	do {
    397 		struct fixed31_32 res1 = dc_fixpt_add(
    398 			dc_fixpt_sub(
    399 				res,
    400 				dc_fixpt_one),
    401 			dc_fixpt_div(
    402 				arg,
    403 				dc_fixpt_exp(res)));
    404 
    405 		error = dc_fixpt_sub(
    406 			res,
    407 			res1);
    408 
    409 		res = res1;
    410 		/* TODO determine max_allowed_error based on quality of exp() */
    411 	} while (abs_i64(error.value) > 100ULL);
    412 
    413 	return res;
    414 }
    415 
    416 
    417 /* this function is a generic helper to translate fixed point value to
    418  * specified integer format that will consist of integer_bits integer part and
    419  * fractional_bits fractional part. For example it is used in
    420  * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
    421  * part in 32 bits. It is used in hw programming (scaler)
    422  */
    423 
    424 static inline unsigned int ux_dy(
    425 	long long value,
    426 	unsigned int integer_bits,
    427 	unsigned int fractional_bits)
    428 {
    429 	/* 1. create mask of integer part */
    430 	unsigned int result = (1 << integer_bits) - 1;
    431 	/* 2. mask out fractional part */
    432 	unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
    433 	/* 3. shrink fixed point integer part to be of integer_bits width*/
    434 	result &= GET_INTEGER_PART(value);
    435 	/* 4. make space for fractional part to be filled in after integer */
    436 	result <<= fractional_bits;
    437 	/* 5. shrink fixed point fractional part to of fractional_bits width*/
    438 	fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
    439 	/* 6. merge the result */
    440 	return result | fractional_part;
    441 }
    442 
    443 static inline unsigned int clamp_ux_dy(
    444 	long long value,
    445 	unsigned int integer_bits,
    446 	unsigned int fractional_bits,
    447 	unsigned int min_clamp)
    448 {
    449 	unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
    450 
    451 	if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
    452 		return (1 << (integer_bits + fractional_bits)) - 1;
    453 	else if (truncated_val > min_clamp)
    454 		return truncated_val;
    455 	else
    456 		return min_clamp;
    457 }
    458 
    459 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg)
    460 {
    461 	return ux_dy(arg.value, 4, 19);
    462 }
    463 
    464 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg)
    465 {
    466 	return ux_dy(arg.value, 3, 19);
    467 }
    468 
    469 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg)
    470 {
    471 	return ux_dy(arg.value, 2, 19);
    472 }
    473 
    474 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg)
    475 {
    476 	return ux_dy(arg.value, 0, 19);
    477 }
    478 
    479 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg)
    480 {
    481 	return clamp_ux_dy(arg.value, 0, 14, 1);
    482 }
    483 
    484 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg)
    485 {
    486 	return clamp_ux_dy(arg.value, 0, 10, 1);
    487 }
    488 
    489 int dc_fixpt_s4d19(struct fixed31_32 arg)
    490 {
    491 	if (arg.value < 0)
    492 		return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19);
    493 	else
    494 		return ux_dy(arg.value, 4, 19);
    495 }
    496