1/* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25/** 26 * This file contains helpers for writing commands to commands streams. 27 */ 28 29#ifndef SI_BUILD_PM4_H 30#define SI_BUILD_PM4_H 31 32#include "si_pipe.h" 33#include "sid.h" 34 35#if 0 36#include "ac_shadowed_regs.h" 37#define SI_CHECK_SHADOWED_REGS(reg_offset, count) ac_check_shadowed_regs(GFX10, CHIP_NAVI14, reg_offset, count) 38#else 39#define SI_CHECK_SHADOWED_REGS(reg_offset, count) 40#endif 41 42#define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \ 43 unsigned __cs_num = __cs->current.cdw; \ 44 UNUSED unsigned __cs_num_initial = __cs_num; \ 45 uint32_t *__cs_buf = __cs->current.buf 46 47#define radeon_begin_again(cs) do { \ 48 assert(__cs == NULL); \ 49 __cs = (cs); \ 50 __cs_num = __cs->current.cdw; \ 51 __cs_num_initial = __cs_num; \ 52 __cs_buf = __cs->current.buf; \ 53} while (0) 54 55#define radeon_end() do { \ 56 __cs->current.cdw = __cs_num; \ 57 assert(__cs->current.cdw <= __cs->current.max_dw); \ 58 __cs = NULL; \ 59} while (0) 60 61#define radeon_emit(value) __cs_buf[__cs_num++] = (value) 62#define radeon_packets_added() (__cs_num != __cs_num_initial) 63 64#define radeon_end_update_context_roll(sctx) do { \ 65 radeon_end(); \ 66 if (radeon_packets_added()) \ 67 (sctx)->context_roll = true; \ 68} while (0) 69 70#define radeon_emit_array(values, num) do { \ 71 unsigned __n = (num); \ 72 memcpy(__cs_buf + __cs_num, (values), __n * 4); \ 73 __cs_num += __n; \ 74} while (0) 75 76#define radeon_set_config_reg_seq(reg, num) do { \ 77 SI_CHECK_SHADOWED_REGS(reg, num); \ 78 assert((reg) < SI_CONTEXT_REG_OFFSET); \ 79 radeon_emit(PKT3(PKT3_SET_CONFIG_REG, num, 0)); \ 80 radeon_emit(((reg) - SI_CONFIG_REG_OFFSET) >> 2); \ 81} while (0) 82 83#define radeon_set_config_reg(reg, value) do { \ 84 radeon_set_config_reg_seq(reg, 1); \ 85 radeon_emit(value); \ 86} while (0) 87 88#define radeon_set_context_reg_seq(reg, num) do { \ 89 SI_CHECK_SHADOWED_REGS(reg, num); \ 90 assert((reg) >= SI_CONTEXT_REG_OFFSET); \ 91 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \ 92 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \ 93} while (0) 94 95#define radeon_set_context_reg(reg, value) do { \ 96 radeon_set_context_reg_seq(reg, 1); \ 97 radeon_emit(value); \ 98} while (0) 99 100#define radeon_set_context_reg_seq_array(reg, num, values) do { \ 101 radeon_set_context_reg_seq(reg, num); \ 102 radeon_emit_array(values, num); \ 103} while (0) 104 105#define radeon_set_context_reg_idx(reg, idx, value) do { \ 106 SI_CHECK_SHADOWED_REGS(reg, 1); \ 107 assert((reg) >= SI_CONTEXT_REG_OFFSET); \ 108 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \ 109 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \ 110 radeon_emit(value); \ 111} while (0) 112 113#define radeon_set_sh_reg_seq(reg, num) do { \ 114 SI_CHECK_SHADOWED_REGS(reg, num); \ 115 assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \ 116 radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \ 117 radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \ 118} while (0) 119 120#define radeon_set_sh_reg(reg, value) do { \ 121 radeon_set_sh_reg_seq(reg, 1); \ 122 radeon_emit(value); \ 123} while (0) 124 125#define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \ 126 SI_CHECK_SHADOWED_REGS(reg, num); \ 127 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \ 128 radeon_emit(PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \ 129 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \ 130} while (0) 131 132#define radeon_set_uconfig_reg(reg, value) do { \ 133 radeon_set_uconfig_reg_seq(reg, 1, false); \ 134 radeon_emit(value); \ 135} while (0) 136 137#define radeon_set_uconfig_reg_perfctr(reg, value) do { \ 138 radeon_set_uconfig_reg_seq(reg, 1, true); \ 139 radeon_emit(value); \ 140} while (0) 141 142#define radeon_set_uconfig_reg_idx(screen, chip_class, reg, idx, value) do { \ 143 SI_CHECK_SHADOWED_REGS(reg, 1); \ 144 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \ 145 assert((idx) != 0); \ 146 unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \ 147 if ((chip_class) < GFX9 || \ 148 ((chip_class) == GFX9 && (screen)->info.me_fw_version < 26)) \ 149 __opcode = PKT3_SET_UCONFIG_REG; \ 150 radeon_emit(PKT3(__opcode, 1, 0)); \ 151 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \ 152 radeon_emit(value); \ 153} while (0) 154 155/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */ 156#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \ 157 unsigned __value = val; \ 158 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \ 159 sctx->tracked_regs.reg_value[reg] != __value) { \ 160 radeon_set_context_reg(offset, __value); \ 161 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \ 162 sctx->tracked_regs.reg_value[reg] = __value; \ 163 } \ 164} while (0) 165 166/** 167 * Set 2 consecutive registers if any registers value is different. 168 * @param offset starting register offset 169 * @param val1 is written to first register 170 * @param val2 is written to second register 171 */ 172#define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \ 173 unsigned __value1 = (val1), __value2 = (val2); \ 174 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \ 175 sctx->tracked_regs.reg_value[reg] != __value1 || \ 176 sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \ 177 radeon_set_context_reg_seq(offset, 2); \ 178 radeon_emit(__value1); \ 179 radeon_emit(__value2); \ 180 sctx->tracked_regs.reg_value[reg] = __value1; \ 181 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \ 182 sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \ 183 } \ 184} while (0) 185 186/** 187 * Set 3 consecutive registers if any registers value is different. 188 */ 189#define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \ 190 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \ 191 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \ 192 sctx->tracked_regs.reg_value[reg] != __value1 || \ 193 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \ 194 sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \ 195 radeon_set_context_reg_seq(offset, 3); \ 196 radeon_emit(__value1); \ 197 radeon_emit(__value2); \ 198 radeon_emit(__value3); \ 199 sctx->tracked_regs.reg_value[reg] = __value1; \ 200 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \ 201 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \ 202 sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \ 203 } \ 204} while (0) 205 206/** 207 * Set 4 consecutive registers if any registers value is different. 208 */ 209#define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \ 210 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \ 211 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \ 212 sctx->tracked_regs.reg_value[reg] != __value1 || \ 213 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \ 214 sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \ 215 sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \ 216 radeon_set_context_reg_seq(offset, 4); \ 217 radeon_emit(__value1); \ 218 radeon_emit(__value2); \ 219 radeon_emit(__value3); \ 220 radeon_emit(__value4); \ 221 sctx->tracked_regs.reg_value[reg] = __value1; \ 222 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \ 223 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \ 224 sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \ 225 sctx->tracked_regs.reg_saved |= 0xfull << (reg); \ 226 } \ 227} while (0) 228 229/** 230 * Set consecutive registers if any registers value is different. 231 */ 232#define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \ 233 if (memcmp(value, saved_val, sizeof(uint32_t) * (num))) { \ 234 radeon_set_context_reg_seq(offset, num); \ 235 radeon_emit_array(value, num); \ 236 memcpy(saved_val, value, sizeof(uint32_t) * (num)); \ 237 } \ 238} while (0) 239 240#define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \ 241 unsigned __value = val; \ 242 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \ 243 sctx->tracked_regs.reg_value[reg] != __value) { \ 244 radeon_set_sh_reg(offset, __value); \ 245 sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \ 246 sctx->tracked_regs.reg_value[reg] = __value; \ 247 } \ 248} while (0) 249 250#define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \ 251 unsigned __value = val; \ 252 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \ 253 sctx->tracked_regs.reg_value[reg] != __value) { \ 254 radeon_set_uconfig_reg(offset, __value); \ 255 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \ 256 sctx->tracked_regs.reg_value[reg] = __value; \ 257 } \ 258} while (0) 259 260#define radeon_set_privileged_config_reg(reg, value) do { \ 261 assert((reg) < CIK_UCONFIG_REG_OFFSET); \ 262 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \ 263 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \ 264 COPY_DATA_DST_SEL(COPY_DATA_PERF)); \ 265 radeon_emit(value); \ 266 radeon_emit(0); /* unused */ \ 267 radeon_emit((reg) >> 2); \ 268 radeon_emit(0); /* unused */ \ 269} while (0) 270 271#define radeon_emit_32bit_pointer(sscreen, va) do { \ 272 radeon_emit(va); \ 273 assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \ 274} while (0) 275 276#define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \ 277 unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \ 278 radeon_set_sh_reg_seq(sh_offset, 1); \ 279 radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \ 280} while (0) 281 282/* This should be evaluated at compile time if all parameters are constants. */ 283static ALWAYS_INLINE unsigned 284si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess, 285 enum si_has_gs has_gs, enum si_has_ngg ngg, 286 enum pipe_shader_type shader) 287{ 288 switch (shader) { 289 case PIPE_SHADER_VERTEX: 290 /* VS can be bound as VS, ES, or LS. */ 291 if (has_tess) { 292 if (chip_class >= GFX10) { 293 return R_00B430_SPI_SHADER_USER_DATA_HS_0; 294 } else if (chip_class == GFX9) { 295 return R_00B430_SPI_SHADER_USER_DATA_LS_0; 296 } else { 297 return R_00B530_SPI_SHADER_USER_DATA_LS_0; 298 } 299 } else if (chip_class >= GFX10) { 300 if (ngg || has_gs) { 301 return R_00B230_SPI_SHADER_USER_DATA_GS_0; 302 } else { 303 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 304 } 305 } else if (has_gs) { 306 return R_00B330_SPI_SHADER_USER_DATA_ES_0; 307 } else { 308 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 309 } 310 311 case PIPE_SHADER_TESS_CTRL: 312 if (chip_class == GFX9) { 313 return R_00B430_SPI_SHADER_USER_DATA_LS_0; 314 } else { 315 return R_00B430_SPI_SHADER_USER_DATA_HS_0; 316 } 317 318 case PIPE_SHADER_TESS_EVAL: 319 /* TES can be bound as ES, VS, or not bound. */ 320 if (has_tess) { 321 if (chip_class >= GFX10) { 322 if (ngg || has_gs) { 323 return R_00B230_SPI_SHADER_USER_DATA_GS_0; 324 } else { 325 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 326 } 327 } else if (has_gs) { 328 return R_00B330_SPI_SHADER_USER_DATA_ES_0; 329 } else { 330 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 331 } 332 } else { 333 return 0; 334 } 335 336 case PIPE_SHADER_GEOMETRY: 337 if (chip_class == GFX9) { 338 return R_00B330_SPI_SHADER_USER_DATA_ES_0; 339 } else { 340 return R_00B230_SPI_SHADER_USER_DATA_GS_0; 341 } 342 343 default: 344 assert(0); 345 return 0; 346 } 347} 348 349#endif 350