1/* 2 * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "util/ralloc.h" 28 29#include "freedreno_dev_info.h" 30 31#include "ir3_compiler.h" 32 33static const struct debug_named_value shader_debug_options[] = { 34 /* clang-format off */ 35 {"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"}, 36 {"tcs", IR3_DBG_SHADER_TCS, "Print shader disasm for tess ctrl shaders"}, 37 {"tes", IR3_DBG_SHADER_TES, "Print shader disasm for tess eval shaders"}, 38 {"gs", IR3_DBG_SHADER_GS, "Print shader disasm for geometry shaders"}, 39 {"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"}, 40 {"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"}, 41 {"disasm", IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"}, 42 {"optmsgs", IR3_DBG_OPTMSGS, "Enable optimizer debug messages"}, 43 {"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"}, 44 {"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"}, 45 {"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"}, 46 {"nocache", IR3_DBG_NOCACHE, "Disable shader cache"}, 47 {"spillall", IR3_DBG_SPILLALL, "Spill as much as possible to test the spiller"}, 48#ifdef DEBUG 49 /* DEBUG-only options: */ 50 {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"}, 51 {"ramsgs", IR3_DBG_RAMSGS, "Enable register-allocation debug messages"}, 52#endif 53 DEBUG_NAMED_VALUE_END 54 /* clang-format on */ 55}; 56 57DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG", 58 shader_debug_options, 0) 59DEBUG_GET_ONCE_OPTION(ir3_shader_override_path, "IR3_SHADER_OVERRIDE_PATH", 60 NULL) 61 62enum ir3_shader_debug ir3_shader_debug = 0; 63const char *ir3_shader_override_path = NULL; 64 65void 66ir3_compiler_destroy(struct ir3_compiler *compiler) 67{ 68 disk_cache_destroy(compiler->disk_cache); 69 ralloc_free(compiler); 70} 71 72struct ir3_compiler * 73ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, 74 bool robust_ubo_access) 75{ 76 struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler); 77 78 ir3_shader_debug = debug_get_option_ir3_shader_debug(); 79 ir3_shader_override_path = 80 !__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL; 81 82 if (ir3_shader_override_path) { 83 ir3_shader_debug |= IR3_DBG_NOCACHE; 84 } 85 86 compiler->dev = dev; 87 compiler->dev_id = dev_id; 88 compiler->gen = fd_dev_gen(dev_id); 89 compiler->robust_ubo_access = robust_ubo_access; 90 91 /* All known GPU's have 32k local memory (aka shared) */ 92 compiler->local_mem_size = 32 * 1024; 93 /* TODO see if older GPU's were different here */ 94 compiler->branchstack_size = 64; 95 compiler->wave_granularity = 2; 96 compiler->max_waves = 16; 97 98 if (compiler->gen >= 6) { 99 compiler->samgq_workaround = true; 100 /* a6xx split the pipeline state into geometry and fragment state, in 101 * order to let the VS run ahead of the FS. As a result there are now 102 * separate const files for the the fragment shader and everything 103 * else, and separate limits. There seems to be a shared limit, but 104 * it's higher than the vert or frag limits. 105 * 106 * TODO: The shared limit seems to be different on different on 107 * different models. 108 */ 109 compiler->max_const_pipeline = 640; 110 compiler->max_const_frag = 512; 111 compiler->max_const_geom = 512; 112 compiler->max_const_safe = 128; 113 114 /* Compute shaders don't share a const file with the FS. Instead they 115 * have their own file, which is smaller than the FS one. 116 * 117 * TODO: is this true on earlier gen's? 118 */ 119 compiler->max_const_compute = 256; 120 121 /* TODO: implement clip+cull distances on earlier gen's */ 122 compiler->has_clip_cull = true; 123 124 /* TODO: implement private memory on earlier gen's */ 125 compiler->has_pvtmem = true; 126 127 compiler->tess_use_shared = 128 fd_dev_info(compiler->dev_id)->a6xx.tess_use_shared; 129 130 compiler->storage_16bit = 131 fd_dev_info(compiler->dev_id)->a6xx.storage_16bit; 132 } else { 133 compiler->max_const_pipeline = 512; 134 compiler->max_const_geom = 512; 135 compiler->max_const_frag = 512; 136 compiler->max_const_compute = 512; 137 138 /* Note: this will have to change if/when we support tess+GS on 139 * earlier gen's. 140 */ 141 compiler->max_const_safe = 256; 142 } 143 144 if (compiler->gen >= 6) { 145 compiler->reg_size_vec4 = 146 fd_dev_info(compiler->dev_id)->a6xx.reg_size_vec4; 147 } else if (compiler->gen >= 4) { 148 /* On a4xx-a5xx, using r24.x and above requires using the smallest 149 * threadsize. 150 */ 151 compiler->reg_size_vec4 = 48; 152 } else { 153 /* TODO: confirm this */ 154 compiler->reg_size_vec4 = 96; 155 } 156 157 if (compiler->gen >= 6) { 158 compiler->threadsize_base = 64; 159 } else if (compiler->gen >= 4) { 160 /* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan 161 * 1.1 subgroupSize which is 32. 162 */ 163 compiler->threadsize_base = 32; 164 } else { 165 compiler->threadsize_base = 8; 166 } 167 168 if (compiler->gen >= 4) { 169 /* need special handling for "flat" */ 170 compiler->flat_bypass = true; 171 compiler->levels_add_one = false; 172 compiler->unminify_coords = false; 173 compiler->txf_ms_with_isaml = false; 174 compiler->array_index_add_half = true; 175 compiler->instr_align = 16; 176 compiler->const_upload_unit = 4; 177 } else { 178 /* no special handling for "flat" */ 179 compiler->flat_bypass = false; 180 compiler->levels_add_one = true; 181 compiler->unminify_coords = true; 182 compiler->txf_ms_with_isaml = true; 183 compiler->array_index_add_half = false; 184 compiler->instr_align = 4; 185 compiler->const_upload_unit = 8; 186 } 187 188 ir3_disk_cache_init(compiler); 189 190 return compiler; 191} 192