1/* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file brw_vec4_gs.c 26 * 27 * State atom for client-programmable geometry shaders, and support code. 28 */ 29 30#include "brw_gs.h" 31#include "brw_context.h" 32#include "brw_state.h" 33#include "brw_ff_gs.h" 34#include "compiler/brw_nir.h" 35#include "brw_program.h" 36#include "compiler/glsl/ir_uniform.h" 37 38static void 39assign_gs_binding_table_offsets(const struct intel_device_info *devinfo, 40 const struct gl_program *prog, 41 struct brw_gs_prog_data *prog_data) 42{ 43 /* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform 44 * feedback surfaces. 45 */ 46 uint32_t reserved = devinfo->ver == 6 ? BRW_MAX_SOL_BINDINGS : 0; 47 48 brw_assign_common_binding_table_offsets(devinfo, prog, 49 &prog_data->base.base, reserved); 50} 51 52static void 53brw_gfx6_xfb_setup(const struct gl_transform_feedback_info *linked_xfb_info, 54 struct brw_gs_prog_data *gs_prog_data) 55{ 56 static const unsigned swizzle_for_offset[4] = { 57 BRW_SWIZZLE4(0, 1, 2, 3), 58 BRW_SWIZZLE4(1, 2, 3, 3), 59 BRW_SWIZZLE4(2, 3, 3, 3), 60 BRW_SWIZZLE4(3, 3, 3, 3) 61 }; 62 63 int i; 64 65 /* Make sure that the VUE slots won't overflow the unsigned chars in 66 * prog_data->transform_feedback_bindings[]. 67 */ 68 STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); 69 70 /* Make sure that we don't need more binding table entries than we've 71 * set aside for use in transform feedback. (We shouldn't, since we 72 * set aside enough binding table entries to have one per component). 73 */ 74 assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); 75 76 gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; 77 for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) { 78 gs_prog_data->transform_feedback_bindings[i] = 79 linked_xfb_info->Outputs[i].OutputRegister; 80 gs_prog_data->transform_feedback_swizzles[i] = 81 swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; 82 } 83} 84static bool 85brw_codegen_gs_prog(struct brw_context *brw, 86 struct brw_program *gp, 87 struct brw_gs_prog_key *key) 88{ 89 struct brw_compiler *compiler = brw->screen->compiler; 90 const struct intel_device_info *devinfo = &brw->screen->devinfo; 91 struct brw_stage_state *stage_state = &brw->gs.base; 92 struct brw_gs_prog_data prog_data; 93 bool start_busy = false; 94 double start_time = 0; 95 96 memset(&prog_data, 0, sizeof(prog_data)); 97 98 void *mem_ctx = ralloc_context(NULL); 99 100 nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir); 101 102 assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data); 103 104 brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program, 105 &prog_data.base.base, 106 compiler->scalar_stage[MESA_SHADER_GEOMETRY]); 107 if (brw->can_push_ubos) { 108 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, 109 prog_data.base.base.ubo_ranges); 110 } 111 112 uint64_t outputs_written = nir->info.outputs_written; 113 114 brw_compute_vue_map(devinfo, 115 &prog_data.base.vue_map, outputs_written, 116 gp->program.info.separate_shader, 1); 117 118 if (devinfo->ver == 6) 119 brw_gfx6_xfb_setup(gp->program.sh.LinkedTransformFeedback, 120 &prog_data); 121 122 int st_index = -1; 123 if (INTEL_DEBUG(DEBUG_SHADER_TIME)) 124 st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true); 125 126 if (unlikely(brw->perf_debug)) { 127 start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo); 128 start_time = get_time(); 129 } 130 131 char *error_str; 132 const unsigned *program = 133 brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key, 134 &prog_data, nir, st_index, 135 NULL, &error_str); 136 if (program == NULL) { 137 ralloc_strcat(&gp->program.sh.data->InfoLog, error_str); 138 _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str); 139 140 ralloc_free(mem_ctx); 141 return false; 142 } 143 144 if (unlikely(brw->perf_debug)) { 145 if (gp->compiled_once) { 146 brw_debug_recompile(brw, MESA_SHADER_GEOMETRY, gp->program.Id, 147 &key->base); 148 } 149 if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { 150 perf_debug("GS compile took %.03f ms and stalled the GPU\n", 151 (get_time() - start_time) * 1000); 152 } 153 gp->compiled_once = true; 154 } 155 156 /* Scratch space is used for register spilling */ 157 brw_alloc_stage_scratch(brw, stage_state, 158 prog_data.base.base.total_scratch); 159 160 /* The param and pull_param arrays will be freed by the shader cache. */ 161 ralloc_steal(NULL, prog_data.base.base.param); 162 ralloc_steal(NULL, prog_data.base.base.pull_param); 163 brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, 164 key, sizeof(*key), 165 program, prog_data.base.base.program_size, 166 &prog_data, sizeof(prog_data), 167 &stage_state->prog_offset, &brw->gs.base.prog_data); 168 ralloc_free(mem_ctx); 169 170 return true; 171} 172 173static bool 174brw_gs_state_dirty(const struct brw_context *brw) 175{ 176 return brw_state_dirty(brw, 177 _NEW_TEXTURE, 178 BRW_NEW_GEOMETRY_PROGRAM | 179 BRW_NEW_TRANSFORM_FEEDBACK); 180} 181 182void 183brw_gs_populate_key(struct brw_context *brw, 184 struct brw_gs_prog_key *key) 185{ 186 struct gl_context *ctx = &brw->ctx; 187 struct brw_program *gp = 188 (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY]; 189 190 memset(key, 0, sizeof(*key)); 191 192 brw_populate_base_prog_key(ctx, gp, &key->base); 193} 194 195void 196brw_upload_gs_prog(struct brw_context *brw) 197{ 198 struct brw_stage_state *stage_state = &brw->gs.base; 199 struct brw_gs_prog_key key; 200 /* BRW_NEW_GEOMETRY_PROGRAM */ 201 struct brw_program *gp = 202 (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY]; 203 204 if (!brw_gs_state_dirty(brw)) 205 return; 206 207 brw_gs_populate_key(brw, &key); 208 209 if (brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key), 210 &stage_state->prog_offset, &brw->gs.base.prog_data, 211 true)) 212 return; 213 214 if (brw_disk_cache_upload_program(brw, MESA_SHADER_GEOMETRY)) 215 return; 216 217 gp = (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY]; 218 gp->id = key.base.program_string_id; 219 220 ASSERTED bool success = brw_codegen_gs_prog(brw, gp, &key); 221 assert(success); 222} 223 224void 225brw_gs_populate_default_key(const struct brw_compiler *compiler, 226 struct brw_gs_prog_key *key, 227 struct gl_program *prog) 228{ 229 const struct intel_device_info *devinfo = compiler->devinfo; 230 231 memset(key, 0, sizeof(*key)); 232 233 brw_populate_default_base_prog_key(devinfo, brw_program(prog), 234 &key->base); 235} 236 237bool 238brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog) 239{ 240 struct brw_context *brw = brw_context(ctx); 241 struct brw_gs_prog_key key; 242 uint32_t old_prog_offset = brw->gs.base.prog_offset; 243 struct brw_stage_prog_data *old_prog_data = brw->gs.base.prog_data; 244 bool success; 245 246 struct brw_program *bgp = brw_program(prog); 247 248 brw_gs_populate_default_key(brw->screen->compiler, &key, prog); 249 250 success = brw_codegen_gs_prog(brw, bgp, &key); 251 252 brw->gs.base.prog_offset = old_prog_offset; 253 brw->gs.base.prog_data = old_prog_data; 254 255 return success; 256} 257