Home | History | Annotate | Line # | Download | only in dsc
      1 /*	$NetBSD: amdgpu_rc_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $	*/
      2 
      3 
      4 /*
      5  * Copyright 2017 Advanced Micro Devices, Inc.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     23  * OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  * Authors: AMD
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_rc_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $");
     31 
     32 #include "os_types.h"
     33 #include "rc_calc.h"
     34 #include "qp_tables.h"
     35 
     36 #define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
     37 
     38 #define MODE_SELECT(val444, val422, val420) \
     39 	(cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
     40 
     41 
     42 #define TABLE_CASE(mode, bpc, max)   case (table_hash(mode, BPC_##bpc, max)): \
     43 	table = qp_table_##mode##_##bpc##bpc_##max; \
     44 	table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
     45 	break
     46 
     47 
     48 void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp)
     49 {
     50 	int mode = MODE_SELECT(444, 422, 420);
     51 	int sel = table_hash(mode, bpc, max_min);
     52 	int table_size = 0;
     53 	int index;
     54 	const struct qp_entry *table = 0L;
     55 
     56 	// alias enum
     57 	enum { min = MM_MIN, max = MM_MAX };
     58 	switch (sel) {
     59 		TABLE_CASE(444,  8, max);
     60 		TABLE_CASE(444,  8, min);
     61 		TABLE_CASE(444, 10, max);
     62 		TABLE_CASE(444, 10, min);
     63 		TABLE_CASE(444, 12, max);
     64 		TABLE_CASE(444, 12, min);
     65 		TABLE_CASE(422,  8, max);
     66 		TABLE_CASE(422,  8, min);
     67 		TABLE_CASE(422, 10, max);
     68 		TABLE_CASE(422, 10, min);
     69 		TABLE_CASE(422, 12, max);
     70 		TABLE_CASE(422, 12, min);
     71 		TABLE_CASE(420,  8, max);
     72 		TABLE_CASE(420,  8, min);
     73 		TABLE_CASE(420, 10, max);
     74 		TABLE_CASE(420, 10, min);
     75 		TABLE_CASE(420, 12, max);
     76 		TABLE_CASE(420, 12, min);
     77 	}
     78 
     79 	if (table == 0)
     80 		return;
     81 
     82 	index = (bpp - table[0].bpp) * 2;
     83 
     84 	/* requested size is bigger than the table */
     85 	if (index >= table_size) {
     86 		dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
     87 		return;
     88 	}
     89 
     90 	memcpy(qps, table[index].qps, sizeof(qp_set));
     91 }
     92 
     93 double dsc_roundf(double num)
     94 {
     95 	if (num < 0.0)
     96 		num = num - 0.5;
     97 	else
     98 		num = num + 0.5;
     99 
    100 	return (int)(num);
    101 }
    102 
    103 double dsc_ceil(double num)
    104 {
    105 	double retval = (int)num;
    106 
    107 	if (retval != num && num > 0)
    108 		retval = num + 1;
    109 
    110 	return (int)retval;
    111 }
    112 
    113 void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
    114 {
    115 	int   *p = ofs;
    116 
    117 	if (mode == CM_444 || mode == CM_RGB) {
    118 		*p++ = (bpp <=  6) ? (0) : ((((bpp >=  8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
    119 		*p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
    120 		*p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
    121 		*p++ = (bpp <=  6) ? (-4) : ((((bpp >=  8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
    122 		*p++ = (bpp <=  6) ? (-6) : ((((bpp >=  8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
    123 		*p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
    124 		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
    125 		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
    126 		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
    127 		*p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
    128 		*p++ = -10;
    129 		*p++ = (bpp <=  6) ? (-12) : ((bpp >=  8) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2 / 2.0))));
    130 		*p++ = -12;
    131 		*p++ = -12;
    132 		*p++ = -12;
    133 	} else if (mode == CM_422) {
    134 		*p++ = (bpp <=  8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp -  8) * (8 / 2.0))));
    135 		*p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp -  8) * (8 / 2.0))));
    136 		*p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    137 		*p++ = (bpp <=  8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    138 		*p++ = (bpp <=  8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    139 		*p++ = (bpp <=  8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    140 		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    141 		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp -  8) * (4 / 2.0))));
    142 		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp -  8) * (2 / 2.0))));
    143 		*p++ = (bpp <=  8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp -  8) * (2 / 2.0))));
    144 		*p++ = -10;
    145 		*p++ = (bpp <=  6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2.0 / 1))));
    146 		*p++ = -12;
    147 		*p++ = -12;
    148 		*p++ = -12;
    149 	} else {
    150 		*p++ = (bpp <=  6) ? (2) : ((bpp >=  8) ? (10) : (2 + dsc_roundf((bpp -  6) * (8 / 2.0))));
    151 		*p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (8) : (0 + dsc_roundf((bpp -  6) * (8 / 2.0))));
    152 		*p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (6) : (0 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    153 		*p++ = (bpp <=  6) ? (-2) : ((bpp >=  8) ? (4) : (-2 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    154 		*p++ = (bpp <=  6) ? (-4) : ((bpp >=  8) ? (2) : (-4 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    155 		*p++ = (bpp <=  6) ? (-6) : ((bpp >=  8) ? (0) : (-6 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    156 		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-2) : (-8 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    157 		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-4) : (-8 + dsc_roundf((bpp -  6) * (4 / 2.0))));
    158 		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-6) : (-8 + dsc_roundf((bpp -  6) * (2 / 2.0))));
    159 		*p++ = (bpp <=  6) ? (-10) : ((bpp >=  8) ? (-8) : (-10 + dsc_roundf((bpp -  6) * (2 / 2.0))));
    160 		*p++ = -10;
    161 		*p++ = (bpp <=  4) ? (-12) : ((bpp >=  5) ? (-10) : (-12 + dsc_roundf((bpp -  4) * (2 / 1.0))));
    162 		*p++ = -12;
    163 		*p++ = -12;
    164 		*p++ = -12;
    165 	}
    166 }
    167 
    168 int median3(int a, int b, int c)
    169 {
    170 	if (a > b)
    171 		swap(a, b);
    172 	if (b > c)
    173 		swap(b, c);
    174 	if (a > b)
    175 		swap(b, c);
    176 
    177 	return b;
    178 }
    179 
    180 void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version)
    181 {
    182 	float bpp_group;
    183 	float initial_xmit_delay_factor;
    184 	int padding_pixels;
    185 	int i;
    186 
    187 	rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    188 	rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    189 
    190 	bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
    191 
    192 	switch (cm) {
    193 	case CM_420:
    194 		rc->initial_fullness_offset = (bpp >=  6) ? (2048) : ((bpp <=  4) ? (6144) : ((((bpp >  4) && (bpp <=  5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp -  5) * (3584)))));
    195 		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
    196 		rc->second_line_bpg_offset  = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
    197 		break;
    198 	case CM_422:
    199 		rc->initial_fullness_offset = (bpp >=  8) ? (2048) : ((bpp <=  7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
    200 		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
    201 		rc->second_line_bpg_offset  = 0;
    202 		break;
    203 	case CM_444:
    204 	case CM_RGB:
    205 		rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <=  8) ? (6144) : ((((bpp >  8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
    206 		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
    207 		rc->second_line_bpg_offset  = 0;
    208 		break;
    209 	}
    210 
    211 	initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
    212 	rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
    213 
    214 	if (cm == CM_422 || cm == CM_420)
    215 		slice_width /= 2;
    216 
    217 	padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
    218 	if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
    219 		if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
    220 			rc->initial_xmit_delay++;
    221 	}
    222 
    223 	rc->flatness_min_qp     = ((bpc == BPC_8) ?  (3) : ((bpc == BPC_10) ? (7)  : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    224 	rc->flatness_max_qp     = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    225 	rc->flatness_det_thresh = 2 << (bpc - 8);
    226 
    227 	get_qp_set(rc->qp_min, cm, bpc, MM_MIN, bpp);
    228 	get_qp_set(rc->qp_max, cm, bpc, MM_MAX, bpp);
    229 	if (cm == CM_444 && minor_version == 1) {
    230 		for (i = 0; i < QP_SET_SIZE; ++i) {
    231 			rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
    232 			rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
    233 		}
    234 	}
    235 	get_ofs_set(rc->ofs, cm, bpp);
    236 
    237 	/* fixed parameters */
    238 	rc->rc_model_size    = 8192;
    239 	rc->rc_edge_factor   = 6;
    240 	rc->rc_tgt_offset_hi = 3;
    241 	rc->rc_tgt_offset_lo = 3;
    242 
    243 	rc->rc_buf_thresh[0] = 896;
    244 	rc->rc_buf_thresh[1] = 1792;
    245 	rc->rc_buf_thresh[2] = 2688;
    246 	rc->rc_buf_thresh[3] = 3584;
    247 	rc->rc_buf_thresh[4] = 4480;
    248 	rc->rc_buf_thresh[5] = 5376;
    249 	rc->rc_buf_thresh[6] = 6272;
    250 	rc->rc_buf_thresh[7] = 6720;
    251 	rc->rc_buf_thresh[8] = 7168;
    252 	rc->rc_buf_thresh[9] = 7616;
    253 	rc->rc_buf_thresh[10] = 7744;
    254 	rc->rc_buf_thresh[11] = 7872;
    255 	rc->rc_buf_thresh[12] = 8000;
    256 	rc->rc_buf_thresh[13] = 8064;
    257 }
    258 
    259