1de2362d3Smrg/*
2de2362d3Smrg * Cayman shaders
3de2362d3Smrg *
4de2362d3Smrg * Copyright (C) 2011  Advanced Micro Devices, Inc.
5de2362d3Smrg *
6de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
8de2362d3Smrg * to deal in the Software without restriction, including without limitation
9de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
11de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
12de2362d3Smrg *
13de2362d3Smrg * The above copyright notice and this permission notice shall be included
14de2362d3Smrg * in all copies or substantial portions of the Software.
15de2362d3Smrg *
16de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17de2362d3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19de2362d3Smrg * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20de2362d3Smrg * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21de2362d3Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22de2362d3Smrg */
23de2362d3Smrg
24de2362d3Smrg/*
25de2362d3Smrg * Shader macros
26de2362d3Smrg */
27de2362d3Smrg
28de2362d3Smrg#ifndef __SHADER_H__
29de2362d3Smrg#define __SHADER_H__
30de2362d3Smrg
31de2362d3Smrg#include "radeon.h"
32de2362d3Smrg
330a1d3ae0Smrg/* Order of instructions: All CF, All ALU, All Tex/Vtx fetches */
34de2362d3Smrg
35de2362d3Smrg
36de2362d3Smrg// CF insts
37de2362d3Smrg// addr
38de2362d3Smrg#define ADDR(x)  (x)
39de2362d3Smrg// jumptable
40de2362d3Smrg#define JUMPTABLE_SEL(x) (x)
41de2362d3Smrg// pc
42de2362d3Smrg#define POP_COUNT(x)      (x)
43de2362d3Smrg// const
44de2362d3Smrg#define CF_CONST(x)       (x)
45de2362d3Smrg// cond
46de2362d3Smrg#define COND(x)        (x)		// SQ_COND_*
47de2362d3Smrg// count
48de2362d3Smrg#define I_COUNT(x)        ((x) ? ((x) - 1) : 0)
49de2362d3Smrg// vpm
50de2362d3Smrg#define VALID_PIXEL_MODE(x) (x)
51de2362d3Smrg// cf inst
52de2362d3Smrg#define CF_INST(x)        (x)		// SQ_CF_INST_*
53de2362d3Smrg// wqm
54de2362d3Smrg#define WHOLE_QUAD_MODE(x)  (x)
55de2362d3Smrg// barrier
56de2362d3Smrg#define BARRIER(x)          (x)
57de2362d3Smrg//kb0
58de2362d3Smrg#define KCACHE_BANK0(x)          (x)
59de2362d3Smrg//kb1
60de2362d3Smrg#define KCACHE_BANK1(x)          (x)
61de2362d3Smrg// km0/1
62de2362d3Smrg#define KCACHE_MODE0(x)          (x)
63de2362d3Smrg#define KCACHE_MODE1(x)          (x)	// SQ_CF_KCACHE_*
64de2362d3Smrg//
65de2362d3Smrg#define KCACHE_ADDR0(x)          (x)
66de2362d3Smrg#define KCACHE_ADDR1(x)          (x)
67de2362d3Smrg
68de2362d3Smrg#define ALT_CONST(x)            (x)
69de2362d3Smrg
70de2362d3Smrg#define ARRAY_BASE(x)        (x)
71de2362d3Smrg// export pixel
72de2362d3Smrg#define CF_PIXEL_MRT0         0
73de2362d3Smrg#define CF_PIXEL_MRT1         1
74de2362d3Smrg#define CF_PIXEL_MRT2         2
75de2362d3Smrg#define CF_PIXEL_MRT3         3
76de2362d3Smrg#define CF_PIXEL_MRT4         4
77de2362d3Smrg#define CF_PIXEL_MRT5         5
78de2362d3Smrg#define CF_PIXEL_MRT6         6
79de2362d3Smrg#define CF_PIXEL_MRT7         7
80de2362d3Smrg// computed Z
81de2362d3Smrg#define CF_COMPUTED_Z         61
82de2362d3Smrg// export pos
83de2362d3Smrg#define CF_POS0               60
84de2362d3Smrg#define CF_POS1               61
85de2362d3Smrg#define CF_POS2               62
86de2362d3Smrg#define CF_POS3               63
87de2362d3Smrg// export param
88de2362d3Smrg// 0...31
89de2362d3Smrg#define TYPE(x)              (x)	// SQ_EXPORT_*
90de2362d3Smrg#define RW_GPR(x)            (x)
91de2362d3Smrg#define RW_REL(x)            (x)
92de2362d3Smrg#define ABSOLUTE                  0
93de2362d3Smrg#define RELATIVE                  1
94de2362d3Smrg#define INDEX_GPR(x)            (x)
95de2362d3Smrg#define ELEM_SIZE(x)            (x ? (x - 1) : 0)
96de2362d3Smrg#define BURST_COUNT(x)          (x ? (x - 1) : 0)
97de2362d3Smrg#define MARK(x)         (x)
98de2362d3Smrg
99de2362d3Smrg// swiz
100de2362d3Smrg#define SRC_SEL_X(x)    (x)		// SQ_SEL_* each
101de2362d3Smrg#define SRC_SEL_Y(x)    (x)
102de2362d3Smrg#define SRC_SEL_Z(x)    (x)
103de2362d3Smrg#define SRC_SEL_W(x)    (x)
104de2362d3Smrg
105de2362d3Smrg#define CF_DWORD0(addr, jmptbl) cpu_to_le32(((addr) | ((jmptbl) << 24)))
106de2362d3Smrg#define CF_DWORD1(pc, cf_const, cond, count, vpm, cf_inst, b) \
107de2362d3Smrg    cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
108de2362d3Smrg		 ((vpm) << 20) | ((cf_inst) << 22) | ((b) << 31)))
109de2362d3Smrg
110de2362d3Smrg#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
111de2362d3Smrg#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
112de2362d3Smrg    cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
113de2362d3Smrg		 ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
114de2362d3Smrg
115de2362d3Smrg#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
116de2362d3Smrg    cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
117de2362d3Smrg		 ((index_gpr) << 23) | ((es) << 30)))
118de2362d3Smrg#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, cf_inst, m, b) \
119de2362d3Smrg    cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
120de2362d3Smrg		 ((bc) << 16) | ((vpm) << 20) | ((cf_inst) << 22) | ((m) << 30) | ((b) << 31)))
121de2362d3Smrg
122de2362d3Smrg// ALU clause insts
123de2362d3Smrg#define SRC0_SEL(x)        (x)
124de2362d3Smrg#define SRC1_SEL(x)        (x)
125de2362d3Smrg#define SRC2_SEL(x)        (x)
126de2362d3Smrg// src[0-2]_sel
127de2362d3Smrg//   0-127 GPR
128de2362d3Smrg// 128-159 kcache constants bank 0
129de2362d3Smrg// 160-191 kcache constants bank 1
130de2362d3Smrg// 192-255 inline const values
131de2362d3Smrg// 256-287 kcache constants bank 2
132de2362d3Smrg// 288-319 kcache constants bank 3
133de2362d3Smrg// 219-255 special SQ_ALU_SRC_* (0, 1, etc.)
134de2362d3Smrg// 488-520 src param space
135de2362d3Smrg#define ALU_SRC_GPR_BASE        0
136de2362d3Smrg#define ALU_SRC_KCACHE0_BASE  128
137de2362d3Smrg#define ALU_SRC_KCACHE1_BASE  160
138de2362d3Smrg#define ALU_SRC_INLINE_K_BASE 192
139de2362d3Smrg#define ALU_SRC_KCACHE2_BASE  256
140de2362d3Smrg#define ALU_SRC_KCACHE3_BASE  288
141de2362d3Smrg#define ALU_SRC_PARAM_BASE    448
142de2362d3Smrg
143de2362d3Smrg#define SRC0_REL(x)        (x)
144de2362d3Smrg#define SRC1_REL(x)        (x)
145de2362d3Smrg#define SRC2_REL(x)        (x)
146de2362d3Smrg// elem
147de2362d3Smrg#define SRC0_ELEM(x)        (x)
148de2362d3Smrg#define SRC1_ELEM(x)        (x)
149de2362d3Smrg#define SRC2_ELEM(x)        (x)
150de2362d3Smrg#define ELEM_X        0
151de2362d3Smrg#define ELEM_Y        1
152de2362d3Smrg#define ELEM_Z        2
153de2362d3Smrg#define ELEM_W        3
154de2362d3Smrg// neg
155de2362d3Smrg#define SRC0_NEG(x)        (x)
156de2362d3Smrg#define SRC1_NEG(x)        (x)
157de2362d3Smrg#define SRC2_NEG(x)        (x)
158de2362d3Smrg// im
159de2362d3Smrg#define INDEX_MODE(x)    (x)		// SQ_INDEX_*
160de2362d3Smrg// ps
161de2362d3Smrg#define PRED_SEL(x)      (x)		// SQ_PRED_SEL_*
162de2362d3Smrg// last
163de2362d3Smrg#define LAST(x)          (x)
164de2362d3Smrg// abs
165de2362d3Smrg#define SRC0_ABS(x)       (x)
166de2362d3Smrg#define SRC1_ABS(x)       (x)
167de2362d3Smrg// uem
168de2362d3Smrg#define UPDATE_EXECUTE_MASK(x) (x)
169de2362d3Smrg// up
170de2362d3Smrg#define UPDATE_PRED(x)      (x)
171de2362d3Smrg// wm
172de2362d3Smrg#define WRITE_MASK(x)   (x)
173de2362d3Smrg// omod
174de2362d3Smrg#define OMOD(x)        (x)		// SQ_ALU_OMOD_*
175de2362d3Smrg// alu inst
176de2362d3Smrg#define ALU_INST(x)        (x)		// SQ_ALU_INST_*
177de2362d3Smrg//bs
178de2362d3Smrg#define BANK_SWIZZLE(x)        (x)	// SQ_ALU_VEC_*
179de2362d3Smrg#define DST_GPR(x)        (x)
180de2362d3Smrg#define DST_REL(x)        (x)
181de2362d3Smrg#define DST_ELEM(x)       (x)
182de2362d3Smrg#define CLAMP(x)          (x)
183de2362d3Smrg
184de2362d3Smrg#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
185de2362d3Smrg    cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
186de2362d3Smrg		 ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
187de2362d3Smrg		 ((im) << 26) | ((ps) << 29) | ((last) << 31)))
188de2362d3Smrg
189de2362d3Smrg#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
190de2362d3Smrg    cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
191de2362d3Smrg		 ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
192de2362d3Smrg		 ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
193de2362d3Smrg
194de2362d3Smrg#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
195de2362d3Smrg    cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
196de2362d3Smrg		 ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
197de2362d3Smrg		 ((de) << 29) | ((clamp) << 31)))
198de2362d3Smrg
199de2362d3Smrg// VTX clause insts
200de2362d3Smrg// vxt insts
201de2362d3Smrg#define VTX_INST(x)        (x)		// SQ_VTX_INST_*
202de2362d3Smrg
203de2362d3Smrg// fetch type
204de2362d3Smrg#define FETCH_TYPE(x)        (x)	// SQ_VTX_FETCH_*
205de2362d3Smrg
206de2362d3Smrg#define FETCH_WHOLE_QUAD(x)        (x)
207de2362d3Smrg#define BUFFER_ID(x)        (x)
208de2362d3Smrg#define SRC_GPR(x)          (x)
209de2362d3Smrg#define SRC_REL(x)          (x)
210de2362d3Smrg
211de2362d3Smrg#define STRUCTURED_READ(x)    (x)
212de2362d3Smrg#define LDS_REQ(x)            (x)
213de2362d3Smrg#define COALESCED_READ(x)     (x)
214de2362d3Smrg
215de2362d3Smrg#define DST_SEL_X(x)          (x)
216de2362d3Smrg#define DST_SEL_Y(x)          (x)
217de2362d3Smrg#define DST_SEL_Z(x)          (x)
218de2362d3Smrg#define DST_SEL_W(x)          (x)
219de2362d3Smrg#define USE_CONST_FIELDS(x)   (x)
220de2362d3Smrg#define DATA_FORMAT(x)        (x)
221de2362d3Smrg// num format
222de2362d3Smrg#define NUM_FORMAT_ALL(x)     (x)	// SQ_NUM_FORMAT_*
223de2362d3Smrg// format comp
224de2362d3Smrg#define FORMAT_COMP_ALL(x)     (x)	// SQ_FORMAT_COMP_*
225de2362d3Smrg// sma
226de2362d3Smrg#define SRF_MODE_ALL(x)     (x)
227de2362d3Smrg#define SRF_MODE_ZERO_CLAMP_MINUS_ONE      0
228de2362d3Smrg#define SRF_MODE_NO_ZERO                   1
229de2362d3Smrg#define OFFSET(x)     (x)
230de2362d3Smrg// endian swap
231de2362d3Smrg#define ENDIAN_SWAP(x)     (x)		// SQ_ENDIAN_*
232de2362d3Smrg#define CONST_BUF_NO_STRIDE(x)     (x)
233de2362d3Smrg#define BUFFER_INDEX_MODE(x) (x)
234de2362d3Smrg
235de2362d3Smrg#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, ssy, str, ldsr, cr) \
236de2362d3Smrg    cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
237de2362d3Smrg		 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((ssy) << 26) | \
238de2362d3Smrg		 ((str) << 28) | ((ldsr) << 30) | ((cr) << 31)))
239de2362d3Smrg#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
240de2362d3Smrg    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
241de2362d3Smrg		 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
242de2362d3Smrg#define VTX_DWORD2(offset, es, cbns, alt_const, bim)			\
243de2362d3Smrg    cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((alt_const) << 20) | ((bim) << 21)))
244de2362d3Smrg#define VTX_DWORD_PAD cpu_to_le32(0x00000000)
245de2362d3Smrg
246de2362d3Smrg// TEX clause insts
247de2362d3Smrg// tex insts
248de2362d3Smrg#define TEX_INST(x)     (x)		// SQ_TEX_INST_*
249de2362d3Smrg#define INST_MOD(x)     (x)
250de2362d3Smrg#define FETCH_WHOLE_QUAD(x)     (x)
251de2362d3Smrg#define RESOURCE_ID(x)          (x)
252de2362d3Smrg#define RESOURCE_INDEX_MODE(x)          (x)
253de2362d3Smrg#define SAMPLER_INDEX_MODE(x)          (x)
254de2362d3Smrg
255de2362d3Smrg#define LOD_BIAS(x)     (x)
256de2362d3Smrg//ct
257de2362d3Smrg#define COORD_TYPE_X(x)     (x)
258de2362d3Smrg#define COORD_TYPE_Y(x)     (x)
259de2362d3Smrg#define COORD_TYPE_Z(x)     (x)
260de2362d3Smrg#define COORD_TYPE_W(x)     (x)
261de2362d3Smrg#define TEX_UNNORMALIZED                0
262de2362d3Smrg#define TEX_NORMALIZED                  1
263de2362d3Smrg#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
264de2362d3Smrg#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
265de2362d3Smrg#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
266de2362d3Smrg#define SAMPLER_ID(x)     (x)
267de2362d3Smrg
268de2362d3Smrg#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
269de2362d3Smrg    cpu_to_le32((((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
270de2362d3Smrg		 ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27)))
271de2362d3Smrg#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
272de2362d3Smrg    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
273de2362d3Smrg		 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
274de2362d3Smrg#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
275de2362d3Smrg    cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
276de2362d3Smrg		 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
277de2362d3Smrg#define TEX_DWORD_PAD cpu_to_le32(0x00000000)
278de2362d3Smrg
279de2362d3Smrg#endif
280