1/* 2 * Copyright (c) 2012-2015 Etnaviv Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Wladimir J. van der Laan <laanwj@gmail.com> 25 */ 26 27#include "etnaviv_shader.h" 28 29#include "etnaviv_compiler.h" 30#include "etnaviv_context.h" 31#include "etnaviv_debug.h" 32#include "etnaviv_disasm.h" 33#include "etnaviv_disk_cache.h" 34#include "etnaviv_screen.h" 35#include "etnaviv_util.h" 36 37#include "tgsi/tgsi_parse.h" 38#include "nir/tgsi_to_nir.h" 39#include "util/u_atomic.h" 40#include "util/u_math.h" 41#include "util/u_memory.h" 42 43/* Upload shader code to bo, if not already done */ 44static bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shader_variant *v) 45{ 46 if (v->bo) 47 return true; 48 v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, DRM_ETNA_GEM_CACHE_WC); 49 if (!v->bo) 50 return false; 51 52 void *buf = etna_bo_map(v->bo); 53 etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE); 54 memcpy(buf, v->code, v->code_size*4); 55 etna_bo_cpu_fini(v->bo); 56 DBG("Uploaded %s of %u words to bo %p", v->stage == MESA_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo); 57 return true; 58} 59 60extern const char *tgsi_swizzle_names[]; 61void 62etna_dump_shader(const struct etna_shader_variant *shader) 63{ 64 if (shader->stage == MESA_SHADER_VERTEX) 65 printf("VERT\n"); 66 else 67 printf("FRAG\n"); 68 69 etna_disasm(shader->code, shader->code_size, PRINT_RAW); 70 71 printf("num loops: %i\n", shader->num_loops); 72 printf("num temps: %i\n", shader->num_temps); 73 printf("immediates:\n"); 74 for (int idx = 0; idx < shader->uniforms.count; ++idx) { 75 printf(" [%i].%s = %f (0x%08x) (%d)\n", 76 idx / 4, 77 tgsi_swizzle_names[idx % 4], 78 *((float *)&shader->uniforms.data[idx]), 79 shader->uniforms.data[idx], 80 shader->uniforms.contents[idx]); 81 } 82 printf("inputs:\n"); 83 for (int idx = 0; idx < shader->infile.num_reg; ++idx) { 84 printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg, 85 (shader->stage == MESA_SHADER_VERTEX) ? 86 gl_vert_attrib_name(shader->infile.reg[idx].slot) : 87 gl_varying_slot_name_for_stage(shader->infile.reg[idx].slot, shader->stage), 88 shader->infile.reg[idx].num_components); 89 } 90 printf("outputs:\n"); 91 for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { 92 printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg, 93 (shader->stage == MESA_SHADER_VERTEX) ? 94 gl_varying_slot_name_for_stage(shader->outfile.reg[idx].slot, shader->stage) : 95 gl_frag_result_name(shader->outfile.reg[idx].slot), 96 shader->outfile.reg[idx].num_components); 97 } 98 printf("special:\n"); 99 if (shader->stage == MESA_SHADER_VERTEX) { 100 printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); 101 printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); 102 printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); 103 } else { 104 printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); 105 printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); 106 } 107 printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); 108} 109 110/* Link vs and fs together: fill in shader_state from vs and fs 111 * as this function is called every time a new fs or vs is bound, the goal is to 112 * do little processing as possible here, and to precompute as much as possible in 113 * the vs/fs shader_object. 114 * 115 * XXX we could cache the link result for a certain set of VS/PS; usually a pair 116 * of VS and PS will be used together anyway. 117 */ 118static bool 119etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs, 120 struct etna_shader_variant *vs, struct etna_shader_variant *fs) 121{ 122 struct etna_shader_link_info link = { }; 123 bool failed; 124 125 assert(vs->stage == MESA_SHADER_VERTEX); 126 assert(fs->stage == MESA_SHADER_FRAGMENT); 127 128#ifdef DEBUG 129 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) { 130 etna_dump_shader(vs); 131 etna_dump_shader(fs); 132 } 133#endif 134 135 if (DBG_ENABLED(ETNA_DBG_NIR)) 136 failed = etna_link_shader_nir(&link, vs, fs); 137 else 138 failed = etna_link_shader(&link, vs, fs); 139 140 if (failed) { 141 /* linking failed: some fs inputs do not have corresponding 142 * vs outputs */ 143 assert(0); 144 145 return false; 146 } 147 148 if (DBG_ENABLED(ETNA_DBG_LINKER_MSGS)) { 149 debug_printf("link result:\n"); 150 debug_printf(" vs -> fs comps use pa_attr\n"); 151 152 for (int idx = 0; idx < link.num_varyings; ++idx) 153 debug_printf(" t%-2u -> t%-2u %-5.*s %u,%u,%u,%u 0x%08x\n", 154 link.varyings[idx].reg, idx + 1, 155 link.varyings[idx].num_components, "xyzw", 156 link.varyings[idx].use[0], link.varyings[idx].use[1], 157 link.varyings[idx].use[2], link.varyings[idx].use[3], 158 link.varyings[idx].pa_attributes); 159 } 160 161 /* set last_varying_2x flag if the last varying has 1 or 2 components */ 162 bool last_varying_2x = false; 163 if (link.num_varyings > 0 && link.varyings[link.num_varyings - 1].num_components <= 2) 164 last_varying_2x = true; 165 166 cs->RA_CONTROL = VIVS_RA_CONTROL_UNK0 | 167 COND(last_varying_2x, VIVS_RA_CONTROL_LAST_VARYING_2X); 168 169 cs->PA_ATTRIBUTE_ELEMENT_COUNT = VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(link.num_varyings); 170 for (int idx = 0; idx < link.num_varyings; ++idx) 171 cs->PA_SHADER_ATTRIBUTES[idx] = link.varyings[idx].pa_attributes; 172 173 cs->VS_END_PC = vs->code_size / 4; 174 cs->VS_OUTPUT_COUNT = 1 + link.num_varyings; /* position + varyings */ 175 176 /* vs outputs (varyings) */ 177 DEFINE_ETNA_BITARRAY(vs_output, 16, 8) = {0}; 178 int varid = 0; 179 etna_bitarray_set(vs_output, 8, varid++, vs->vs_pos_out_reg); 180 for (int idx = 0; idx < link.num_varyings; ++idx) 181 etna_bitarray_set(vs_output, 8, varid++, link.varyings[idx].reg); 182 if (vs->vs_pointsize_out_reg >= 0) 183 etna_bitarray_set(vs_output, 8, varid++, vs->vs_pointsize_out_reg); /* pointsize is last */ 184 185 for (int idx = 0; idx < ARRAY_SIZE(cs->VS_OUTPUT); ++idx) 186 cs->VS_OUTPUT[idx] = vs_output[idx]; 187 188 if (vs->vs_pointsize_out_reg != -1) { 189 /* vertex shader outputs point coordinate, provide extra output and make 190 * sure PA config is 191 * not masked */ 192 cs->PA_CONFIG = ~0; 193 cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT + 1; 194 } else { 195 /* vertex shader does not output point coordinate, make sure thate 196 * POINT_SIZE_ENABLE is masked 197 * and no extra output is given */ 198 cs->PA_CONFIG = ~VIVS_PA_CONFIG_POINT_SIZE_ENABLE; 199 cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT; 200 } 201 202 /* if fragment shader doesn't read pointcoord, disable it */ 203 if (link.pcoord_varying_comp_ofs == -1) 204 cs->PA_CONFIG &= ~VIVS_PA_CONFIG_POINT_SPRITE_ENABLE; 205 206 cs->VS_LOAD_BALANCING = vs->vs_load_balancing; 207 cs->VS_START_PC = 0; 208 209 cs->PS_END_PC = fs->code_size / 4; 210 cs->PS_OUTPUT_REG = fs->ps_color_out_reg; 211 cs->PS_INPUT_COUNT = 212 VIVS_PS_INPUT_COUNT_COUNT(link.num_varyings + 1) | /* Number of inputs plus position */ 213 VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8); 214 cs->PS_TEMP_REGISTER_CONTROL = 215 VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, link.num_varyings + 1)); 216 cs->PS_START_PC = 0; 217 218 /* Precompute PS_INPUT_COUNT and TEMP_REGISTER_CONTROL in the case of MSAA 219 * mode, avoids some fumbling in sync_context. */ 220 cs->PS_INPUT_COUNT_MSAA = 221 VIVS_PS_INPUT_COUNT_COUNT(link.num_varyings + 2) | /* MSAA adds another input */ 222 VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8); 223 cs->PS_TEMP_REGISTER_CONTROL_MSAA = 224 VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, link.num_varyings + 2)); 225 226 uint32_t total_components = 0; 227 DEFINE_ETNA_BITARRAY(num_components, ETNA_NUM_VARYINGS, 4) = {0}; 228 DEFINE_ETNA_BITARRAY(component_use, 4 * ETNA_NUM_VARYINGS, 2) = {0}; 229 for (int idx = 0; idx < link.num_varyings; ++idx) { 230 const struct etna_varying *varying = &link.varyings[idx]; 231 232 etna_bitarray_set(num_components, 4, idx, varying->num_components); 233 for (int comp = 0; comp < varying->num_components; ++comp) { 234 etna_bitarray_set(component_use, 2, total_components, varying->use[comp]); 235 total_components += 1; 236 } 237 } 238 239 cs->GL_VARYING_TOTAL_COMPONENTS = 240 VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(align(total_components, 2)); 241 cs->GL_VARYING_NUM_COMPONENTS[0] = num_components[0]; 242 cs->GL_VARYING_NUM_COMPONENTS[1] = num_components[1]; 243 cs->GL_VARYING_COMPONENT_USE[0] = component_use[0]; 244 cs->GL_VARYING_COMPONENT_USE[1] = component_use[1]; 245 246 cs->GL_HALTI5_SH_SPECIALS = 247 0x7f7f0000 | /* unknown bits, probably other PS inputs */ 248 /* pointsize is last (see above) */ 249 VIVS_GL_HALTI5_SH_SPECIALS_VS_PSIZE_OUT((vs->vs_pointsize_out_reg != -1) ? 250 cs->VS_OUTPUT_COUNT * 4 : 0x00) | 251 VIVS_GL_HALTI5_SH_SPECIALS_PS_PCOORD_IN((link.pcoord_varying_comp_ofs != -1) ? 252 link.pcoord_varying_comp_ofs : 0x7f); 253 254 cs->writes_z = fs->ps_depth_out_reg >= 0; 255 cs->uses_discard = fs->uses_discard; 256 257 /* reference instruction memory */ 258 cs->vs_inst_mem_size = vs->code_size; 259 cs->VS_INST_MEM = vs->code; 260 261 cs->ps_inst_mem_size = fs->code_size; 262 cs->PS_INST_MEM = fs->code; 263 264 if (vs->needs_icache || fs->needs_icache) { 265 /* If either of the shaders needs ICACHE, we use it for both. It is 266 * either switched on or off for the entire shader processor. 267 */ 268 if (!etna_icache_upload_shader(ctx, vs) || 269 !etna_icache_upload_shader(ctx, fs)) { 270 assert(0); 271 return false; 272 } 273 274 cs->VS_INST_ADDR.bo = vs->bo; 275 cs->VS_INST_ADDR.offset = 0; 276 cs->VS_INST_ADDR.flags = ETNA_RELOC_READ; 277 cs->PS_INST_ADDR.bo = fs->bo; 278 cs->PS_INST_ADDR.offset = 0; 279 cs->PS_INST_ADDR.flags = ETNA_RELOC_READ; 280 } else { 281 /* clear relocs */ 282 memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR)); 283 memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR)); 284 } 285 286 return true; 287} 288 289bool 290etna_shader_link(struct etna_context *ctx) 291{ 292 if (!ctx->shader.vs || !ctx->shader.fs) 293 return false; 294 295 /* re-link vs and fs if needed */ 296 return etna_link_shaders(ctx, &ctx->shader_state, ctx->shader.vs, ctx->shader.fs); 297} 298 299void 300etna_destroy_shader(struct etna_shader_variant *shader) 301{ 302 assert(shader); 303 304 FREE(shader->code); 305 FREE(shader->uniforms.data); 306 FREE(shader->uniforms.contents); 307 FREE(shader); 308} 309 310static bool 311etna_shader_update_vs_inputs(struct compiled_shader_state *cs, 312 const struct etna_shader_variant *vs, 313 const struct compiled_vertex_elements_state *ves) 314{ 315 unsigned num_temps, cur_temp, num_vs_inputs; 316 317 if (!vs) 318 return false; 319 320 /* Number of vertex elements determines number of VS inputs. Otherwise, 321 * the GPU crashes. Allocate any unused vertex elements to VS temporary 322 * registers. */ 323 num_vs_inputs = MAX2(ves->num_elements, vs->infile.num_reg); 324 if (num_vs_inputs != ves->num_elements) { 325 BUG("Number of elements %u does not match the number of VS inputs %zu", 326 ves->num_elements, vs->infile.num_reg); 327 return false; 328 } 329 330 cur_temp = vs->num_temps; 331 num_temps = num_vs_inputs - vs->infile.num_reg + cur_temp; 332 333 cs->VS_INPUT_COUNT = VIVS_VS_INPUT_COUNT_COUNT(num_vs_inputs) | 334 VIVS_VS_INPUT_COUNT_UNK8(vs->input_count_unk8); 335 cs->VS_TEMP_REGISTER_CONTROL = 336 VIVS_VS_TEMP_REGISTER_CONTROL_NUM_TEMPS(num_temps); 337 338 /* vs inputs (attributes) */ 339 DEFINE_ETNA_BITARRAY(vs_input, 16, 8) = {0}; 340 for (int idx = 0; idx < num_vs_inputs; ++idx) { 341 if (idx < vs->infile.num_reg) 342 etna_bitarray_set(vs_input, 8, idx, vs->infile.reg[idx].reg); 343 else 344 etna_bitarray_set(vs_input, 8, idx, cur_temp++); 345 } 346 347 if (vs->vs_id_in_reg >= 0) { 348 cs->VS_INPUT_COUNT = VIVS_VS_INPUT_COUNT_COUNT(num_vs_inputs + 1) | 349 VIVS_VS_INPUT_COUNT_UNK8(vs->input_count_unk8) | 350 VIVS_VS_INPUT_COUNT_ID_ENABLE; 351 352 etna_bitarray_set(vs_input, 8, num_vs_inputs, vs->vs_id_in_reg); 353 354 cs->FE_HALTI5_ID_CONFIG = 355 VIVS_FE_HALTI5_ID_CONFIG_VERTEX_ID_ENABLE | 356 VIVS_FE_HALTI5_ID_CONFIG_INSTANCE_ID_ENABLE | 357 VIVS_FE_HALTI5_ID_CONFIG_VERTEX_ID_REG(vs->vs_id_in_reg * 4) | 358 VIVS_FE_HALTI5_ID_CONFIG_INSTANCE_ID_REG(vs->vs_id_in_reg * 4 + 1); 359 } 360 361 for (int idx = 0; idx < ARRAY_SIZE(cs->VS_INPUT); ++idx) 362 cs->VS_INPUT[idx] = vs_input[idx]; 363 364 return true; 365} 366 367static inline const char * 368etna_shader_stage(struct etna_shader_variant *shader) 369{ 370 switch (shader->stage) { 371 case MESA_SHADER_VERTEX: return "VERT"; 372 case MESA_SHADER_FRAGMENT: return "FRAG"; 373 case MESA_SHADER_COMPUTE: return "CL"; 374 default: 375 unreachable("invalid type"); 376 return NULL; 377 } 378} 379 380static void 381dump_shader_info(struct etna_shader_variant *v, struct pipe_debug_callback *debug) 382{ 383 if (!unlikely(etna_mesa_debug & ETNA_DBG_SHADERDB)) 384 return; 385 386 pipe_debug_message(debug, SHADER_INFO, 387 "%s shader: %u instructions, %u temps, " 388 "%u immediates, %u loops", 389 etna_shader_stage(v), 390 v->code_size, 391 v->num_temps, 392 v->uniforms.count, 393 v->num_loops); 394} 395 396bool 397etna_shader_update_vertex(struct etna_context *ctx) 398{ 399 return etna_shader_update_vs_inputs(&ctx->shader_state, ctx->shader.vs, 400 ctx->vertex_elements); 401} 402 403static struct etna_shader_variant * 404create_variant(struct etna_shader *shader, struct etna_shader_key key) 405{ 406 struct etna_shader_variant *v = CALLOC_STRUCT(etna_shader_variant); 407 int ret; 408 409 if (!v) 410 return NULL; 411 412 v->shader = shader; 413 v->key = key; 414 v->id = ++shader->variant_count; 415 416 if (etna_disk_cache_retrieve(shader->compiler, v)) 417 return v; 418 419 ret = etna_compile_shader(v); 420 if (!ret) { 421 debug_error("compile failed!"); 422 goto fail; 423 } 424 425 etna_disk_cache_store(shader->compiler, v); 426 427 return v; 428 429fail: 430 FREE(v); 431 return NULL; 432} 433 434struct etna_shader_variant * 435etna_shader_variant(struct etna_shader *shader, struct etna_shader_key key, 436 struct pipe_debug_callback *debug) 437{ 438 struct etna_shader_variant *v; 439 440 for (v = shader->variants; v; v = v->next) 441 if (etna_shader_key_equal(&key, &v->key)) 442 return v; 443 444 /* compile new variant if it doesn't exist already */ 445 v = create_variant(shader, key); 446 if (v) { 447 v->next = shader->variants; 448 shader->variants = v; 449 dump_shader_info(v, debug); 450 } 451 452 return v; 453} 454 455static void * 456etna_create_shader_state(struct pipe_context *pctx, 457 const struct pipe_shader_state *pss) 458{ 459 struct etna_context *ctx = etna_context(pctx); 460 struct etna_screen *screen = ctx->screen; 461 struct etna_compiler *compiler = screen->compiler; 462 struct etna_shader *shader = CALLOC_STRUCT(etna_shader); 463 464 if (!shader) 465 return NULL; 466 467 shader->id = p_atomic_inc_return(&compiler->shader_count); 468 shader->specs = &screen->specs; 469 shader->compiler = screen->compiler; 470 471 if (DBG_ENABLED(ETNA_DBG_NIR)) 472 shader->nir = (pss->type == PIPE_SHADER_IR_NIR) ? pss->ir.nir : 473 tgsi_to_nir(pss->tokens, pctx->screen, false); 474 else 475 shader->tokens = tgsi_dup_tokens(pss->tokens); 476 477 etna_disk_cache_init_shader_key(compiler, shader); 478 479 if (etna_mesa_debug & ETNA_DBG_SHADERDB) { 480 /* if shader-db run, create a standard variant immediately 481 * (as otherwise nothing will trigger the shader to be 482 * actually compiled). 483 */ 484 struct etna_shader_key key = {}; 485 etna_shader_variant(shader, key, &ctx->debug); 486 } 487 488 return shader; 489} 490 491static void 492etna_delete_shader_state(struct pipe_context *pctx, void *ss) 493{ 494 struct etna_shader *shader = ss; 495 struct etna_shader_variant *v, *t; 496 497 v = shader->variants; 498 while (v) { 499 t = v; 500 v = v->next; 501 if (t->bo) 502 etna_bo_del(t->bo); 503 504 etna_destroy_shader(t); 505 } 506 507 tgsi_free_tokens(shader->tokens); 508 ralloc_free(shader->nir); 509 FREE(shader); 510} 511 512static void 513etna_bind_fs_state(struct pipe_context *pctx, void *hwcso) 514{ 515 struct etna_context *ctx = etna_context(pctx); 516 517 ctx->shader.bind_fs = hwcso; 518 ctx->dirty |= ETNA_DIRTY_SHADER; 519} 520 521static void 522etna_bind_vs_state(struct pipe_context *pctx, void *hwcso) 523{ 524 struct etna_context *ctx = etna_context(pctx); 525 526 ctx->shader.bind_vs = hwcso; 527 ctx->dirty |= ETNA_DIRTY_SHADER; 528} 529 530void 531etna_shader_init(struct pipe_context *pctx) 532{ 533 pctx->create_fs_state = etna_create_shader_state; 534 pctx->bind_fs_state = etna_bind_fs_state; 535 pctx->delete_fs_state = etna_delete_shader_state; 536 pctx->create_vs_state = etna_create_shader_state; 537 pctx->bind_vs_state = etna_bind_vs_state; 538 pctx->delete_vs_state = etna_delete_shader_state; 539} 540