1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * This file contains helpers for writing commands to commands streams.
27 */
28
29#ifndef SI_BUILD_PM4_H
30#define SI_BUILD_PM4_H
31
32#include "si_pipe.h"
33#include "sid.h"
34
35#if 0
36#include "ac_shadowed_regs.h"
37#define SI_CHECK_SHADOWED_REGS(reg_offset, count) ac_check_shadowed_regs(GFX10, CHIP_NAVI14, reg_offset, count)
38#else
39#define SI_CHECK_SHADOWED_REGS(reg_offset, count)
40#endif
41
42#define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
43                         unsigned __cs_num = __cs->current.cdw; \
44                         UNUSED unsigned __cs_num_initial = __cs_num; \
45                         uint32_t *__cs_buf = __cs->current.buf
46
47#define radeon_begin_again(cs) do { \
48   assert(__cs == NULL); \
49   __cs = (cs); \
50   __cs_num = __cs->current.cdw; \
51   __cs_num_initial = __cs_num; \
52   __cs_buf = __cs->current.buf; \
53} while (0)
54
55#define radeon_end() do { \
56   __cs->current.cdw = __cs_num; \
57   assert(__cs->current.cdw <= __cs->current.max_dw); \
58   __cs = NULL; \
59} while (0)
60
61#define radeon_emit(value)  __cs_buf[__cs_num++] = (value)
62#define radeon_packets_added()  (__cs_num != __cs_num_initial)
63
64#define radeon_end_update_context_roll(sctx) do { \
65   radeon_end(); \
66   if (radeon_packets_added()) \
67      (sctx)->context_roll = true; \
68} while (0)
69
70#define radeon_emit_array(values, num) do { \
71   unsigned __n = (num); \
72   memcpy(__cs_buf + __cs_num, (values), __n * 4); \
73   __cs_num += __n; \
74} while (0)
75
76#define radeon_set_config_reg_seq(reg, num) do { \
77   SI_CHECK_SHADOWED_REGS(reg, num); \
78   assert((reg) < SI_CONTEXT_REG_OFFSET); \
79   radeon_emit(PKT3(PKT3_SET_CONFIG_REG, num, 0)); \
80   radeon_emit(((reg) - SI_CONFIG_REG_OFFSET) >> 2); \
81} while (0)
82
83#define radeon_set_config_reg(reg, value) do { \
84   radeon_set_config_reg_seq(reg, 1); \
85   radeon_emit(value); \
86} while (0)
87
88#define radeon_set_context_reg_seq(reg, num) do { \
89   SI_CHECK_SHADOWED_REGS(reg, num); \
90   assert((reg) >= SI_CONTEXT_REG_OFFSET); \
91   radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \
92   radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
93} while (0)
94
95#define radeon_set_context_reg(reg, value) do { \
96   radeon_set_context_reg_seq(reg, 1); \
97   radeon_emit(value); \
98} while (0)
99
100#define radeon_set_context_reg_seq_array(reg, num, values) do { \
101   radeon_set_context_reg_seq(reg, num); \
102   radeon_emit_array(values, num); \
103} while (0)
104
105#define radeon_set_context_reg_idx(reg, idx, value) do { \
106   SI_CHECK_SHADOWED_REGS(reg, 1); \
107   assert((reg) >= SI_CONTEXT_REG_OFFSET); \
108   radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
109   radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \
110   radeon_emit(value); \
111} while (0)
112
113#define radeon_set_sh_reg_seq(reg, num) do { \
114   SI_CHECK_SHADOWED_REGS(reg, num); \
115   assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
116   radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \
117   radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
118} while (0)
119
120#define radeon_set_sh_reg(reg, value) do { \
121   radeon_set_sh_reg_seq(reg, 1); \
122   radeon_emit(value); \
123} while (0)
124
125#define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \
126   SI_CHECK_SHADOWED_REGS(reg, num); \
127   assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
128   radeon_emit(PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \
129   radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \
130} while (0)
131
132#define radeon_set_uconfig_reg(reg, value) do { \
133   radeon_set_uconfig_reg_seq(reg, 1, false); \
134   radeon_emit(value); \
135} while (0)
136
137#define radeon_set_uconfig_reg_perfctr(reg, value) do { \
138   radeon_set_uconfig_reg_seq(reg, 1, true); \
139   radeon_emit(value); \
140} while (0)
141
142#define radeon_set_uconfig_reg_idx(screen, chip_class, reg, idx, value) do { \
143   SI_CHECK_SHADOWED_REGS(reg, 1); \
144   assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
145   assert((idx) != 0); \
146   unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \
147   if ((chip_class) < GFX9 || \
148       ((chip_class) == GFX9 && (screen)->info.me_fw_version < 26)) \
149      __opcode = PKT3_SET_UCONFIG_REG; \
150   radeon_emit(PKT3(__opcode, 1, 0)); \
151   radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \
152   radeon_emit(value); \
153} while (0)
154
155/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
156#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
157   unsigned __value = val; \
158   if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
159       sctx->tracked_regs.reg_value[reg] != __value) { \
160      radeon_set_context_reg(offset, __value); \
161      sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
162      sctx->tracked_regs.reg_value[reg] = __value; \
163   } \
164} while (0)
165
166/**
167 * Set 2 consecutive registers if any registers value is different.
168 * @param offset        starting register offset
169 * @param val1          is written to first register
170 * @param val2          is written to second register
171 */
172#define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \
173   unsigned __value1 = (val1), __value2 = (val2); \
174   if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \
175       sctx->tracked_regs.reg_value[reg] != __value1 || \
176       sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \
177      radeon_set_context_reg_seq(offset, 2); \
178      radeon_emit(__value1); \
179      radeon_emit(__value2); \
180      sctx->tracked_regs.reg_value[reg] = __value1; \
181      sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
182      sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \
183   } \
184} while (0)
185
186/**
187 * Set 3 consecutive registers if any registers value is different.
188 */
189#define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \
190   unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
191   if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \
192       sctx->tracked_regs.reg_value[reg] != __value1 || \
193       sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
194       sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \
195      radeon_set_context_reg_seq(offset, 3); \
196      radeon_emit(__value1); \
197      radeon_emit(__value2); \
198      radeon_emit(__value3); \
199      sctx->tracked_regs.reg_value[reg] = __value1; \
200      sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
201      sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
202      sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \
203   } \
204} while (0)
205
206/**
207 * Set 4 consecutive registers if any registers value is different.
208 */
209#define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \
210   unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \
211   if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \
212       sctx->tracked_regs.reg_value[reg] != __value1 || \
213       sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
214       sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \
215       sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \
216      radeon_set_context_reg_seq(offset, 4); \
217      radeon_emit(__value1); \
218      radeon_emit(__value2); \
219      radeon_emit(__value3); \
220      radeon_emit(__value4); \
221      sctx->tracked_regs.reg_value[reg] = __value1; \
222      sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
223      sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
224      sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \
225      sctx->tracked_regs.reg_saved |= 0xfull << (reg); \
226   } \
227} while (0)
228
229/**
230 * Set consecutive registers if any registers value is different.
231 */
232#define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \
233   if (memcmp(value, saved_val, sizeof(uint32_t) * (num))) { \
234      radeon_set_context_reg_seq(offset, num); \
235      radeon_emit_array(value, num); \
236      memcpy(saved_val, value, sizeof(uint32_t) * (num)); \
237   } \
238} while (0)
239
240#define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \
241   unsigned __value = val; \
242   if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
243       sctx->tracked_regs.reg_value[reg] != __value) { \
244      radeon_set_sh_reg(offset, __value); \
245      sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
246      sctx->tracked_regs.reg_value[reg] = __value; \
247   } \
248} while (0)
249
250#define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \
251   unsigned __value = val; \
252   if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
253       sctx->tracked_regs.reg_value[reg] != __value) { \
254      radeon_set_uconfig_reg(offset, __value); \
255      sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
256      sctx->tracked_regs.reg_value[reg] = __value; \
257   } \
258} while (0)
259
260#define radeon_set_privileged_config_reg(reg, value) do { \
261   assert((reg) < CIK_UCONFIG_REG_OFFSET); \
262   radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
263   radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
264               COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
265   radeon_emit(value); \
266   radeon_emit(0); /* unused */ \
267   radeon_emit((reg) >> 2); \
268   radeon_emit(0); /* unused */ \
269} while (0)
270
271#define radeon_emit_32bit_pointer(sscreen, va) do { \
272   radeon_emit(va); \
273   assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \
274} while (0)
275
276#define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \
277   unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \
278   radeon_set_sh_reg_seq(sh_offset, 1); \
279   radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
280} while (0)
281
282/* This should be evaluated at compile time if all parameters are constants. */
283static ALWAYS_INLINE unsigned
284si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
285                      enum si_has_gs has_gs, enum si_has_ngg ngg,
286                      enum pipe_shader_type shader)
287{
288   switch (shader) {
289   case PIPE_SHADER_VERTEX:
290      /* VS can be bound as VS, ES, or LS. */
291      if (has_tess) {
292         if (chip_class >= GFX10) {
293            return R_00B430_SPI_SHADER_USER_DATA_HS_0;
294         } else if (chip_class == GFX9) {
295            return R_00B430_SPI_SHADER_USER_DATA_LS_0;
296         } else {
297            return R_00B530_SPI_SHADER_USER_DATA_LS_0;
298         }
299      } else if (chip_class >= GFX10) {
300         if (ngg || has_gs) {
301            return R_00B230_SPI_SHADER_USER_DATA_GS_0;
302         } else {
303            return R_00B130_SPI_SHADER_USER_DATA_VS_0;
304         }
305      } else if (has_gs) {
306         return R_00B330_SPI_SHADER_USER_DATA_ES_0;
307      } else {
308         return R_00B130_SPI_SHADER_USER_DATA_VS_0;
309      }
310
311   case PIPE_SHADER_TESS_CTRL:
312      if (chip_class == GFX9) {
313         return R_00B430_SPI_SHADER_USER_DATA_LS_0;
314      } else {
315         return R_00B430_SPI_SHADER_USER_DATA_HS_0;
316      }
317
318   case PIPE_SHADER_TESS_EVAL:
319      /* TES can be bound as ES, VS, or not bound. */
320      if (has_tess) {
321         if (chip_class >= GFX10) {
322            if (ngg || has_gs) {
323               return R_00B230_SPI_SHADER_USER_DATA_GS_0;
324            } else {
325               return R_00B130_SPI_SHADER_USER_DATA_VS_0;
326            }
327         } else if (has_gs) {
328            return R_00B330_SPI_SHADER_USER_DATA_ES_0;
329         } else {
330            return R_00B130_SPI_SHADER_USER_DATA_VS_0;
331         }
332      } else {
333         return 0;
334      }
335
336   case PIPE_SHADER_GEOMETRY:
337      if (chip_class == GFX9) {
338         return R_00B330_SPI_SHADER_USER_DATA_ES_0;
339      } else {
340         return R_00B230_SPI_SHADER_USER_DATA_GS_0;
341      }
342
343   default:
344      assert(0);
345      return 0;
346   }
347}
348
349#endif
350