1/* 2 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#ifndef IR3_COMPILER_H_ 28#define IR3_COMPILER_H_ 29 30#include "util/disk_cache.h" 31#include "util/log.h" 32 33#include "freedreno_dev_info.h" 34 35#include "ir3.h" 36 37struct ir3_ra_reg_set; 38struct ir3_shader; 39 40struct ir3_compiler { 41 struct fd_device *dev; 42 const struct fd_dev_id *dev_id; 43 uint8_t gen; 44 uint32_t shader_count; 45 46 struct disk_cache *disk_cache; 47 48 /* If true, UBO accesses are assumed to be bounds-checked as defined by 49 * VK_EXT_robustness2 and optimizations may have to be more conservative. 50 */ 51 bool robust_ubo_access; 52 53 /* 54 * Configuration options for things that are handled differently on 55 * different generations: 56 */ 57 58 /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate 59 * so we need to use ldlv.u32 to load the varying directly: 60 */ 61 bool flat_bypass; 62 63 /* on a3xx, we need to add one to # of array levels: 64 */ 65 bool levels_add_one; 66 67 /* on a3xx, we need to scale up integer coords for isaml based 68 * on LoD: 69 */ 70 bool unminify_coords; 71 72 /* on a3xx do txf_ms w/ isaml and scaled coords: */ 73 bool txf_ms_with_isaml; 74 75 /* on a4xx, for array textures we need to add 0.5 to the array 76 * index coordinate: 77 */ 78 bool array_index_add_half; 79 80 /* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders: 81 */ 82 bool samgq_workaround; 83 84 /* on a650, vertex shader <-> tess control io uses LDL/STL */ 85 bool tess_use_shared; 86 87 /* The maximum number of constants, in vec4's, across the entire graphics 88 * pipeline. 89 */ 90 uint16_t max_const_pipeline; 91 92 /* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */ 93 uint16_t max_const_geom; 94 95 /* The maximum number of constants, in vec4's, for FS. */ 96 uint16_t max_const_frag; 97 98 /* A "safe" max constlen that can be applied to each shader in the 99 * pipeline which we guarantee will never exceed any combined limits. 100 */ 101 uint16_t max_const_safe; 102 103 /* The maximum number of constants, in vec4's, for compute shaders. */ 104 uint16_t max_const_compute; 105 106 /* Number of instructions that the shader's base address and length 107 * (instrlen divides instruction count by this) must be aligned to. 108 */ 109 uint32_t instr_align; 110 111 /* on a3xx, the unit of indirect const load is higher than later gens (in 112 * vec4 units): 113 */ 114 uint32_t const_upload_unit; 115 116 /* The base number of threads per wave. Some stages may be able to double 117 * this. 118 */ 119 uint32_t threadsize_base; 120 121 /* On at least a6xx, waves are always launched in pairs. In calculations 122 * about occupancy, we pretend that each wave pair is actually one wave, 123 * which simplifies many of the calculations, but means we have to 124 * multiply threadsize_base by this number. 125 */ 126 uint32_t wave_granularity; 127 128 /* The maximum number of simultaneous waves per core. */ 129 uint32_t max_waves; 130 131 /* This is theoretical maximum number of vec4 registers that one wave of 132 * the base threadsize could use. To get the actual size of the register 133 * file in bytes one would need to compute: 134 * 135 * reg_size_vec4 * threadsize_base * wave_granularity * 16 (bytes per vec4) 136 * 137 * However this number is more often what we actually need. For example, a 138 * max_reg more than half of this will result in a doubled threadsize 139 * being impossible (because double-sized waves take up twice as many 140 * registers). Also, the formula for the occupancy given a particular 141 * register footprint is simpler. 142 * 143 * It is in vec4 units because the register file is allocated 144 * with vec4 granularity, so it's in the same units as max_reg. 145 */ 146 uint32_t reg_size_vec4; 147 148 /* The size of local memory in bytes */ 149 uint32_t local_mem_size; 150 151 /* The number of total branch stack entries, divided by wave_granularity. */ 152 uint32_t branchstack_size; 153 154 /* Whether clip+cull distances are supported */ 155 bool has_clip_cull; 156 157 /* Whether private memory is supported */ 158 bool has_pvtmem; 159 160 /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */ 161 bool storage_16bit; 162}; 163 164void ir3_compiler_destroy(struct ir3_compiler *compiler); 165struct ir3_compiler *ir3_compiler_create(struct fd_device *dev, 166 const struct fd_dev_id *dev_id, 167 bool robust_ubo_access); 168 169void ir3_disk_cache_init(struct ir3_compiler *compiler); 170void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler, 171 struct ir3_shader *shader); 172bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler, 173 struct ir3_shader_variant *v); 174void ir3_disk_cache_store(struct ir3_compiler *compiler, 175 struct ir3_shader_variant *v); 176 177int ir3_compile_shader_nir(struct ir3_compiler *compiler, 178 struct ir3_shader_variant *so); 179 180/* gpu pointer size in units of 32bit registers/slots */ 181static inline unsigned 182ir3_pointer_size(struct ir3_compiler *compiler) 183{ 184 return fd_dev_64b(compiler->dev_id) ? 2 : 1; 185} 186 187enum ir3_shader_debug { 188 IR3_DBG_SHADER_VS = BITFIELD_BIT(0), 189 IR3_DBG_SHADER_TCS = BITFIELD_BIT(1), 190 IR3_DBG_SHADER_TES = BITFIELD_BIT(2), 191 IR3_DBG_SHADER_GS = BITFIELD_BIT(3), 192 IR3_DBG_SHADER_FS = BITFIELD_BIT(4), 193 IR3_DBG_SHADER_CS = BITFIELD_BIT(5), 194 IR3_DBG_DISASM = BITFIELD_BIT(6), 195 IR3_DBG_OPTMSGS = BITFIELD_BIT(7), 196 IR3_DBG_FORCES2EN = BITFIELD_BIT(8), 197 IR3_DBG_NOUBOOPT = BITFIELD_BIT(9), 198 IR3_DBG_NOFP16 = BITFIELD_BIT(10), 199 IR3_DBG_NOCACHE = BITFIELD_BIT(11), 200 IR3_DBG_SPILLALL = BITFIELD_BIT(12), 201 202 /* DEBUG-only options: */ 203 IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), 204 IR3_DBG_RAMSGS = BITFIELD_BIT(21), 205 206 /* Only used for the disk-caching logic: */ 207 IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30), 208}; 209 210extern enum ir3_shader_debug ir3_shader_debug; 211extern const char *ir3_shader_override_path; 212 213static inline bool 214shader_debug_enabled(gl_shader_stage type) 215{ 216 if (ir3_shader_debug & IR3_DBG_DISASM) 217 return true; 218 219 switch (type) { 220 case MESA_SHADER_VERTEX: 221 return !!(ir3_shader_debug & IR3_DBG_SHADER_VS); 222 case MESA_SHADER_TESS_CTRL: 223 return !!(ir3_shader_debug & IR3_DBG_SHADER_TCS); 224 case MESA_SHADER_TESS_EVAL: 225 return !!(ir3_shader_debug & IR3_DBG_SHADER_TES); 226 case MESA_SHADER_GEOMETRY: 227 return !!(ir3_shader_debug & IR3_DBG_SHADER_GS); 228 case MESA_SHADER_FRAGMENT: 229 return !!(ir3_shader_debug & IR3_DBG_SHADER_FS); 230 case MESA_SHADER_COMPUTE: 231 return !!(ir3_shader_debug & IR3_DBG_SHADER_CS); 232 default: 233 debug_assert(0); 234 return false; 235 } 236} 237 238static inline void 239ir3_debug_print(struct ir3 *ir, const char *when) 240{ 241 if (ir3_shader_debug & IR3_DBG_OPTMSGS) { 242 mesa_logi("%s:", when); 243 ir3_print(ir); 244 } 245} 246 247#endif /* IR3_COMPILER_H_ */ 248