1921a55d8Smrg/* 2921a55d8Smrg * Evergreen shaders 3921a55d8Smrg * 4921a55d8Smrg * Copyright (C) 2010 Advanced Micro Devices, Inc. 5921a55d8Smrg * 6921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7921a55d8Smrg * copy of this software and associated documentation files (the "Software"), 8921a55d8Smrg * to deal in the Software without restriction, including without limitation 9921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the 11921a55d8Smrg * Software is furnished to do so, subject to the following conditions: 12921a55d8Smrg * 13921a55d8Smrg * The above copyright notice and this permission notice shall be included 14921a55d8Smrg * in all copies or substantial portions of the Software. 15921a55d8Smrg * 16921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17921a55d8Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19921a55d8Smrg * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 20921a55d8Smrg * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21921a55d8Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22921a55d8Smrg */ 23921a55d8Smrg 24921a55d8Smrg/* 25921a55d8Smrg * Shader macros 26921a55d8Smrg */ 27921a55d8Smrg 28921a55d8Smrg#ifndef __SHADER_H__ 29921a55d8Smrg#define __SHADER_H__ 30921a55d8Smrg 31921a55d8Smrg#include "radeon.h" 32921a55d8Smrg 33921a55d8Smrg/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */ 34921a55d8Smrg 35921a55d8Smrg 36921a55d8Smrg// CF insts 37921a55d8Smrg// addr 38921a55d8Smrg#define ADDR(x) (x) 39921a55d8Smrg// jumptable 40921a55d8Smrg#define JUMPTABLE_SEL(x) (x) 41921a55d8Smrg// pc 42921a55d8Smrg#define POP_COUNT(x) (x) 43921a55d8Smrg// const 44921a55d8Smrg#define CF_CONST(x) (x) 45921a55d8Smrg// cond 46921a55d8Smrg#define COND(x) (x) // SQ_COND_* 47921a55d8Smrg// count 48921a55d8Smrg#define I_COUNT(x) ((x) ? ((x) - 1) : 0) 49921a55d8Smrg// vpm 50921a55d8Smrg#define VALID_PIXEL_MODE(x) (x) 51921a55d8Smrg// eop 52921a55d8Smrg#define END_OF_PROGRAM(x) (x) 53921a55d8Smrg// cf inst 54921a55d8Smrg#define CF_INST(x) (x) // SQ_CF_INST_* 55921a55d8Smrg// wqm 56921a55d8Smrg#define WHOLE_QUAD_MODE(x) (x) 57921a55d8Smrg// barrier 58921a55d8Smrg#define BARRIER(x) (x) 59921a55d8Smrg//kb0 60921a55d8Smrg#define KCACHE_BANK0(x) (x) 61921a55d8Smrg//kb1 62921a55d8Smrg#define KCACHE_BANK1(x) (x) 63921a55d8Smrg// km0/1 64921a55d8Smrg#define KCACHE_MODE0(x) (x) 65921a55d8Smrg#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_* 66921a55d8Smrg// 67921a55d8Smrg#define KCACHE_ADDR0(x) (x) 68921a55d8Smrg#define KCACHE_ADDR1(x) (x) 69921a55d8Smrg 70921a55d8Smrg#define ALT_CONST(x) (x) 71921a55d8Smrg 72921a55d8Smrg#define ARRAY_BASE(x) (x) 73921a55d8Smrg// export pixel 74921a55d8Smrg#define CF_PIXEL_MRT0 0 75921a55d8Smrg#define CF_PIXEL_MRT1 1 76921a55d8Smrg#define CF_PIXEL_MRT2 2 77921a55d8Smrg#define CF_PIXEL_MRT3 3 78921a55d8Smrg#define CF_PIXEL_MRT4 4 79921a55d8Smrg#define CF_PIXEL_MRT5 5 80921a55d8Smrg#define CF_PIXEL_MRT6 6 81921a55d8Smrg#define CF_PIXEL_MRT7 7 82921a55d8Smrg// computed Z 83921a55d8Smrg#define CF_COMPUTED_Z 61 84921a55d8Smrg// export pos 85921a55d8Smrg#define CF_POS0 60 86921a55d8Smrg#define CF_POS1 61 87921a55d8Smrg#define CF_POS2 62 88921a55d8Smrg#define CF_POS3 63 89921a55d8Smrg// export param 90921a55d8Smrg// 0...31 91921a55d8Smrg#define TYPE(x) (x) // SQ_EXPORT_* 92921a55d8Smrg#define RW_GPR(x) (x) 93921a55d8Smrg#define RW_REL(x) (x) 94921a55d8Smrg#define ABSOLUTE 0 95921a55d8Smrg#define RELATIVE 1 96921a55d8Smrg#define INDEX_GPR(x) (x) 97921a55d8Smrg#define ELEM_SIZE(x) (x ? (x - 1) : 0) 98921a55d8Smrg#define BURST_COUNT(x) (x ? (x - 1) : 0) 99921a55d8Smrg#define MARK(x) (x) 100921a55d8Smrg 101921a55d8Smrg// swiz 102921a55d8Smrg#define SRC_SEL_X(x) (x) // SQ_SEL_* each 103921a55d8Smrg#define SRC_SEL_Y(x) (x) 104921a55d8Smrg#define SRC_SEL_Z(x) (x) 105921a55d8Smrg#define SRC_SEL_W(x) (x) 106921a55d8Smrg 107b13dfe66Smrg#define CF_DWORD0(addr, jmptbl) cpu_to_le32(((addr) | ((jmptbl) << 24))) 108921a55d8Smrg#define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \ 109b13dfe66Smrg cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \ 110b13dfe66Smrg ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31))) 111921a55d8Smrg 112b13dfe66Smrg#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))) 113921a55d8Smrg#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \ 114b13dfe66Smrg cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ 115b13dfe66Smrg ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))) 116921a55d8Smrg 117921a55d8Smrg#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ 118b13dfe66Smrg cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \ 119b13dfe66Smrg ((index_gpr) << 23) | ((es) << 30))) 120921a55d8Smrg#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \ 121b13dfe66Smrg cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \ 122b13dfe66Smrg ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \ 123b13dfe66Smrg ((m) << 30) | ((b) << 31))) 124921a55d8Smrg 125921a55d8Smrg// ALU clause insts 126921a55d8Smrg#define SRC0_SEL(x) (x) 127921a55d8Smrg#define SRC1_SEL(x) (x) 128921a55d8Smrg#define SRC2_SEL(x) (x) 129921a55d8Smrg// src[0-2]_sel 130921a55d8Smrg// 0-127 GPR 131921a55d8Smrg// 128-159 kcache constants bank 0 132921a55d8Smrg// 160-191 kcache constants bank 1 133921a55d8Smrg// 192-255 inline const values 134921a55d8Smrg// 256-287 kcache constants bank 2 135921a55d8Smrg// 288-319 kcache constants bank 3 136921a55d8Smrg// 219-255 special SQ_ALU_SRC_* (0, 1, etc.) 137921a55d8Smrg// 488-520 src param space 138921a55d8Smrg#define ALU_SRC_GPR_BASE 0 139921a55d8Smrg#define ALU_SRC_KCACHE0_BASE 128 140921a55d8Smrg#define ALU_SRC_KCACHE1_BASE 160 141921a55d8Smrg#define ALU_SRC_INLINE_K_BASE 192 142921a55d8Smrg#define ALU_SRC_KCACHE2_BASE 256 143921a55d8Smrg#define ALU_SRC_KCACHE3_BASE 288 144921a55d8Smrg#define ALU_SRC_PARAM_BASE 448 145921a55d8Smrg 146921a55d8Smrg#define SRC0_REL(x) (x) 147921a55d8Smrg#define SRC1_REL(x) (x) 148921a55d8Smrg#define SRC2_REL(x) (x) 149921a55d8Smrg// elem 150921a55d8Smrg#define SRC0_ELEM(x) (x) 151921a55d8Smrg#define SRC1_ELEM(x) (x) 152921a55d8Smrg#define SRC2_ELEM(x) (x) 153921a55d8Smrg#define ELEM_X 0 154921a55d8Smrg#define ELEM_Y 1 155921a55d8Smrg#define ELEM_Z 2 156921a55d8Smrg#define ELEM_W 3 157921a55d8Smrg// neg 158921a55d8Smrg#define SRC0_NEG(x) (x) 159921a55d8Smrg#define SRC1_NEG(x) (x) 160921a55d8Smrg#define SRC2_NEG(x) (x) 161921a55d8Smrg// im 162921a55d8Smrg#define INDEX_MODE(x) (x) // SQ_INDEX_* 163921a55d8Smrg// ps 164921a55d8Smrg#define PRED_SEL(x) (x) // SQ_PRED_SEL_* 165921a55d8Smrg// last 166921a55d8Smrg#define LAST(x) (x) 167921a55d8Smrg// abs 168921a55d8Smrg#define SRC0_ABS(x) (x) 169921a55d8Smrg#define SRC1_ABS(x) (x) 170921a55d8Smrg// uem 171921a55d8Smrg#define UPDATE_EXECUTE_MASK(x) (x) 172921a55d8Smrg// up 173921a55d8Smrg#define UPDATE_PRED(x) (x) 174921a55d8Smrg// wm 175921a55d8Smrg#define WRITE_MASK(x) (x) 176921a55d8Smrg// omod 177921a55d8Smrg#define OMOD(x) (x) // SQ_ALU_OMOD_* 178921a55d8Smrg// alu inst 179921a55d8Smrg#define ALU_INST(x) (x) // SQ_ALU_INST_* 180921a55d8Smrg//bs 181921a55d8Smrg#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_* 182921a55d8Smrg#define DST_GPR(x) (x) 183921a55d8Smrg#define DST_REL(x) (x) 184921a55d8Smrg#define DST_ELEM(x) (x) 185921a55d8Smrg#define CLAMP(x) (x) 186921a55d8Smrg 187921a55d8Smrg#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ 188b13dfe66Smrg cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ 189b13dfe66Smrg ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ 190b13dfe66Smrg ((im) << 26) | ((ps) << 29) | ((last) << 31))) 191921a55d8Smrg 192921a55d8Smrg#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ 193b13dfe66Smrg cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ 194b13dfe66Smrg ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ 195b13dfe66Smrg ((dr) << 28) | ((de) << 29) | ((clamp) << 31))) 196921a55d8Smrg 197921a55d8Smrg#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ 198b13dfe66Smrg cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ 199b13dfe66Smrg ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ 200b13dfe66Smrg ((de) << 29) | ((clamp) << 31))) 201921a55d8Smrg 202921a55d8Smrg// VTX clause insts 203921a55d8Smrg// vxt insts 204921a55d8Smrg#define VTX_INST(x) (x) // SQ_VTX_INST_* 205921a55d8Smrg 206921a55d8Smrg// fetch type 207921a55d8Smrg#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_* 208921a55d8Smrg 209921a55d8Smrg#define FETCH_WHOLE_QUAD(x) (x) 210921a55d8Smrg#define BUFFER_ID(x) (x) 211921a55d8Smrg#define SRC_GPR(x) (x) 212921a55d8Smrg#define SRC_REL(x) (x) 213921a55d8Smrg#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0) 214921a55d8Smrg 215921a55d8Smrg#define DST_SEL_X(x) (x) 216921a55d8Smrg#define DST_SEL_Y(x) (x) 217921a55d8Smrg#define DST_SEL_Z(x) (x) 218921a55d8Smrg#define DST_SEL_W(x) (x) 219921a55d8Smrg#define USE_CONST_FIELDS(x) (x) 220921a55d8Smrg#define DATA_FORMAT(x) (x) 221921a55d8Smrg// num format 222921a55d8Smrg#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_* 223921a55d8Smrg// format comp 224921a55d8Smrg#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_* 225921a55d8Smrg// sma 226921a55d8Smrg#define SRF_MODE_ALL(x) (x) 227921a55d8Smrg#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0 228921a55d8Smrg#define SRF_MODE_NO_ZERO 1 229921a55d8Smrg#define OFFSET(x) (x) 230921a55d8Smrg// endian swap 231921a55d8Smrg#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_* 232921a55d8Smrg#define CONST_BUF_NO_STRIDE(x) (x) 233921a55d8Smrg// mf 234921a55d8Smrg#define MEGA_FETCH(x) (x) 235921a55d8Smrg#define BUFFER_INDEX_MODE(x) (x) 236921a55d8Smrg 237921a55d8Smrg#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ 238b13dfe66Smrg cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ 239b13dfe66Smrg ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))) 240921a55d8Smrg#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ 241b13dfe66Smrg cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ 242b13dfe66Smrg ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))) 243921a55d8Smrg#define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim) \ 244b13dfe66Smrg cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21))) 245b13dfe66Smrg#define VTX_DWORD_PAD cpu_to_le32(0x00000000) 246921a55d8Smrg 247921a55d8Smrg// TEX clause insts 248921a55d8Smrg// tex insts 249921a55d8Smrg#define TEX_INST(x) (x) // SQ_TEX_INST_* 250921a55d8Smrg#define INST_MOD(x) (x) 251921a55d8Smrg#define FETCH_WHOLE_QUAD(x) (x) 252921a55d8Smrg#define RESOURCE_ID(x) (x) 253921a55d8Smrg#define RESOURCE_INDEX_MODE(x) (x) 254921a55d8Smrg#define SAMPLER_INDEX_MODE(x) (x) 255921a55d8Smrg 256921a55d8Smrg#define LOD_BIAS(x) (x) 257921a55d8Smrg//ct 258921a55d8Smrg#define COORD_TYPE_X(x) (x) 259921a55d8Smrg#define COORD_TYPE_Y(x) (x) 260921a55d8Smrg#define COORD_TYPE_Z(x) (x) 261921a55d8Smrg#define COORD_TYPE_W(x) (x) 262921a55d8Smrg#define TEX_UNNORMALIZED 0 263921a55d8Smrg#define TEX_NORMALIZED 1 264921a55d8Smrg#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */ 265921a55d8Smrg#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f) 266921a55d8Smrg#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f) 267921a55d8Smrg#define SAMPLER_ID(x) (x) 268921a55d8Smrg 269921a55d8Smrg#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \ 270b13dfe66Smrg cpu_to_le32((((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ 271b13dfe66Smrg ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))) 272921a55d8Smrg#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ 273b13dfe66Smrg cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ 274b13dfe66Smrg ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))) 275921a55d8Smrg#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ 276b13dfe66Smrg cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ 277b13dfe66Smrg ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))) 278b13dfe66Smrg#define TEX_DWORD_PAD cpu_to_le32(0x00000000) 279921a55d8Smrg 280921a55d8Smrgextern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs); 281921a55d8Smrgextern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps); 282921a55d8Smrg 283921a55d8Smrgextern int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs); 284921a55d8Smrgextern int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps); 285921a55d8Smrg 286921a55d8Smrgextern int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader); 287921a55d8Smrgextern int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader); 288921a55d8Smrg 289921a55d8Smrgextern int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs); 290921a55d8Smrgextern int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps); 291921a55d8Smrg 292921a55d8Smrg#endif 293