1/* 2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "util/u_string.h" 28#include "util/u_memory.h" 29#include "util/u_format.h" 30 31#include "drm/freedreno_drmif.h" 32 33#include "ir3_shader.h" 34#include "ir3_compiler.h" 35#include "ir3_nir.h" 36 37int 38ir3_glsl_type_size(const struct glsl_type *type, bool bindless) 39{ 40 return glsl_count_attribute_slots(type, false); 41} 42 43static void 44delete_variant(struct ir3_shader_variant *v) 45{ 46 if (v->ir) 47 ir3_destroy(v->ir); 48 if (v->bo) 49 fd_bo_del(v->bo); 50 if (v->immediates) 51 free(v->immediates); 52 free(v); 53} 54 55/* for vertex shader, the inputs are loaded into registers before the shader 56 * is executed, so max_regs from the shader instructions might not properly 57 * reflect the # of registers actually used, especially in case passthrough 58 * varyings. 59 * 60 * Likewise, for fragment shader, we can have some regs which are passed 61 * input values but never touched by the resulting shader (ie. as result 62 * of dead code elimination or simply because we don't know how to turn 63 * the reg off. 64 */ 65static void 66fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id) 67{ 68 unsigned i; 69 70 for (i = 0; i < v->inputs_count; i++) { 71 /* skip frag inputs fetch via bary.f since their reg's are 72 * not written by gpu before shader starts (and in fact the 73 * regid's might not even be valid) 74 */ 75 if (v->inputs[i].bary) 76 continue; 77 78 /* ignore high regs that are global to all threads in a warp 79 * (they exist by default) (a5xx+) 80 */ 81 if (v->inputs[i].regid >= regid(48,0)) 82 continue; 83 84 if (v->inputs[i].compmask) { 85 unsigned n = util_last_bit(v->inputs[i].compmask) - 1; 86 int32_t regid = v->inputs[i].regid + n; 87 if (v->inputs[i].half) { 88 if (gpu_id < 500) { 89 v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); 90 } else { 91 v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); 92 } 93 } else { 94 v->info.max_reg = MAX2(v->info.max_reg, regid >> 2); 95 } 96 } 97 } 98 99 for (i = 0; i < v->outputs_count; i++) { 100 int32_t regid = v->outputs[i].regid + 3; 101 if (v->outputs[i].half) { 102 if (gpu_id < 500) { 103 v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); 104 } else { 105 v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); 106 } 107 } else { 108 v->info.max_reg = MAX2(v->info.max_reg, regid >> 2); 109 } 110 } 111} 112 113/* wrapper for ir3_assemble() which does some info fixup based on 114 * shader state. Non-static since used by ir3_cmdline too. 115 */ 116void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id) 117{ 118 void *bin; 119 120 bin = ir3_assemble(v->ir, &v->info, gpu_id); 121 if (!bin) 122 return NULL; 123 124 if (gpu_id >= 400) { 125 v->instrlen = v->info.sizedwords / (2 * 16); 126 } else { 127 v->instrlen = v->info.sizedwords / (2 * 4); 128 } 129 130 /* NOTE: if relative addressing is used, we set constlen in 131 * the compiler (to worst-case value) since we don't know in 132 * the assembler what the max addr reg value can be: 133 */ 134 v->constlen = MAX2(v->constlen, v->info.max_const + 1); 135 136 fixup_regfootprint(v, gpu_id); 137 138 return bin; 139} 140 141static void 142assemble_variant(struct ir3_shader_variant *v) 143{ 144 struct ir3_compiler *compiler = v->shader->compiler; 145 struct shader_info *info = &v->shader->nir->info; 146 uint32_t gpu_id = compiler->gpu_id; 147 uint32_t sz, *bin; 148 149 bin = ir3_shader_assemble(v, gpu_id); 150 sz = v->info.sizedwords * 4; 151 152 v->bo = fd_bo_new(compiler->dev, sz, 153 DRM_FREEDRENO_GEM_CACHE_WCOMBINE | 154 DRM_FREEDRENO_GEM_TYPE_KMEM, 155 "%s:%s", ir3_shader_stage(v->shader), info->name); 156 157 memcpy(fd_bo_map(v->bo), bin, sz); 158 159 if (ir3_shader_debug & IR3_DBG_DISASM) { 160 struct ir3_shader_key key = v->key; 161 printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type, 162 v->binning_pass, key.color_two_side, key.half_precision); 163 ir3_shader_disasm(v, bin, stdout); 164 } 165 166 if (shader_debug_enabled(v->shader->type)) { 167 fprintf(stderr, "Native code for unnamed %s shader %s:\n", 168 _mesa_shader_stage_to_string(v->shader->type), 169 v->shader->nir->info.name); 170 if (v->shader->type == MESA_SHADER_FRAGMENT) 171 fprintf(stderr, "SIMD0\n"); 172 ir3_shader_disasm(v, bin, stderr); 173 } 174 175 free(bin); 176 177 /* no need to keep the ir around beyond this point: */ 178 ir3_destroy(v->ir); 179 v->ir = NULL; 180} 181 182static struct ir3_shader_variant * 183create_variant(struct ir3_shader *shader, struct ir3_shader_key *key, 184 bool binning_pass) 185{ 186 struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant); 187 int ret; 188 189 if (!v) 190 return NULL; 191 192 v->id = ++shader->variant_count; 193 v->shader = shader; 194 v->binning_pass = binning_pass; 195 v->key = *key; 196 v->type = shader->type; 197 198 ret = ir3_compile_shader_nir(shader->compiler, v); 199 if (ret) { 200 debug_error("compile failed!"); 201 goto fail; 202 } 203 204 assemble_variant(v); 205 if (!v->bo) { 206 debug_error("assemble failed!"); 207 goto fail; 208 } 209 210 return v; 211 212fail: 213 delete_variant(v); 214 return NULL; 215} 216 217static inline struct ir3_shader_variant * 218shader_variant(struct ir3_shader *shader, struct ir3_shader_key *key, 219 bool *created) 220{ 221 struct ir3_shader_variant *v; 222 223 *created = false; 224 225 for (v = shader->variants; v; v = v->next) 226 if (ir3_shader_key_equal(key, &v->key)) 227 return v; 228 229 /* compile new variant if it doesn't exist already: */ 230 v = create_variant(shader, key, false); 231 if (v) { 232 v->next = shader->variants; 233 shader->variants = v; 234 *created = true; 235 } 236 237 return v; 238} 239 240struct ir3_shader_variant * 241ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key, 242 bool binning_pass, bool *created) 243{ 244 struct ir3_shader_variant *v = 245 shader_variant(shader, key, created); 246 247 if (v && binning_pass) { 248 if (!v->binning) 249 v->binning = create_variant(shader, key, true); 250 return v->binning; 251 } 252 253 return v; 254} 255 256void 257ir3_shader_destroy(struct ir3_shader *shader) 258{ 259 struct ir3_shader_variant *v, *t; 260 for (v = shader->variants; v; ) { 261 t = v; 262 v = v->next; 263 delete_variant(t); 264 } 265 ralloc_free(shader->nir); 266 free(shader); 267} 268 269struct ir3_shader * 270ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir) 271{ 272 struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader); 273 274 shader->compiler = compiler; 275 shader->id = ++shader->compiler->shader_count; 276 shader->type = nir->info.stage; 277 278 NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 279 (nir_lower_io_options)0); 280 281 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 282 /* NOTE: lower load_barycentric_at_sample first, since it 283 * produces load_barycentric_at_offset: 284 */ 285 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample); 286 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset); 287 288 NIR_PASS_V(nir, ir3_nir_move_varying_inputs); 289 } 290 291 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 292 293 /* do first pass optimization, ignoring the key: */ 294 shader->nir = ir3_optimize_nir(shader, nir, NULL); 295 if (ir3_shader_debug & IR3_DBG_DISASM) { 296 printf("dump nir%d: type=%d", shader->id, shader->type); 297 nir_print_shader(shader->nir, stdout); 298 } 299 300 return shader; 301} 302 303static void dump_reg(FILE *out, const char *name, uint32_t r) 304{ 305 if (r != regid(63,0)) 306 fprintf(out, "; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]); 307} 308 309static void dump_output(FILE *out, struct ir3_shader_variant *so, 310 unsigned slot, const char *name) 311{ 312 uint32_t regid; 313 regid = ir3_find_output_regid(so, slot); 314 dump_reg(out, name, regid); 315} 316 317void 318ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) 319{ 320 struct ir3 *ir = so->ir; 321 struct ir3_register *reg; 322 const char *type = ir3_shader_stage(so->shader); 323 uint8_t regid; 324 unsigned i; 325 326 for (i = 0; i < ir->ninputs; i++) { 327 if (!ir->inputs[i]) { 328 fprintf(out, "; in%d unused\n", i); 329 continue; 330 } 331 reg = ir->inputs[i]->regs[0]; 332 regid = reg->num; 333 fprintf(out, "@in(%sr%d.%c)\tin%d\n", 334 (reg->flags & IR3_REG_HALF) ? "h" : "", 335 (regid >> 2), "xyzw"[regid & 0x3], i); 336 } 337 338 for (i = 0; i < ir->noutputs; i++) { 339 if (!ir->outputs[i]) { 340 fprintf(out, "; out%d unused\n", i); 341 continue; 342 } 343 /* kill shows up as a virtual output.. skip it! */ 344 if (is_kill(ir->outputs[i])) 345 continue; 346 reg = ir->outputs[i]->regs[0]; 347 regid = reg->num; 348 fprintf(out, "@out(%sr%d.%c)\tout%d\n", 349 (reg->flags & IR3_REG_HALF) ? "h" : "", 350 (regid >> 2), "xyzw"[regid & 0x3], i); 351 } 352 353 for (i = 0; i < so->immediates_count; i++) { 354 fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i); 355 fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", 356 so->immediates[i].val[0], 357 so->immediates[i].val[1], 358 so->immediates[i].val[2], 359 so->immediates[i].val[3]); 360 } 361 362 disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id); 363 364 switch (so->type) { 365 case MESA_SHADER_VERTEX: 366 fprintf(out, "; %s: outputs:", type); 367 for (i = 0; i < so->outputs_count; i++) { 368 uint8_t regid = so->outputs[i].regid; 369 fprintf(out, " r%d.%c (%s)", 370 (regid >> 2), "xyzw"[regid & 0x3], 371 gl_varying_slot_name(so->outputs[i].slot)); 372 } 373 fprintf(out, "\n"); 374 fprintf(out, "; %s: inputs:", type); 375 for (i = 0; i < so->inputs_count; i++) { 376 uint8_t regid = so->inputs[i].regid; 377 fprintf(out, " r%d.%c (cm=%x,il=%u,b=%u)", 378 (regid >> 2), "xyzw"[regid & 0x3], 379 so->inputs[i].compmask, 380 so->inputs[i].inloc, 381 so->inputs[i].bary); 382 } 383 fprintf(out, "\n"); 384 break; 385 case MESA_SHADER_FRAGMENT: 386 fprintf(out, "; %s: outputs:", type); 387 for (i = 0; i < so->outputs_count; i++) { 388 uint8_t regid = so->outputs[i].regid; 389 fprintf(out, " r%d.%c (%s)", 390 (regid >> 2), "xyzw"[regid & 0x3], 391 gl_frag_result_name(so->outputs[i].slot)); 392 } 393 fprintf(out, "\n"); 394 fprintf(out, "; %s: inputs:", type); 395 for (i = 0; i < so->inputs_count; i++) { 396 uint8_t regid = so->inputs[i].regid; 397 fprintf(out, " r%d.%c (%s,cm=%x,il=%u,b=%u)", 398 (regid >> 2), "xyzw"[regid & 0x3], 399 gl_varying_slot_name(so->inputs[i].slot), 400 so->inputs[i].compmask, 401 so->inputs[i].inloc, 402 so->inputs[i].bary); 403 } 404 fprintf(out, "\n"); 405 break; 406 default: 407 /* TODO */ 408 break; 409 } 410 411 /* print generic shader info: */ 412 fprintf(out, "; %s prog %d/%d: %u instructions, %d half, %d full\n", 413 type, so->shader->id, so->id, 414 so->info.instrs_count, 415 so->info.max_half_reg + 1, 416 so->info.max_reg + 1); 417 418 fprintf(out, "; %d const, %u constlen\n", 419 so->info.max_const + 1, 420 so->constlen); 421 422 fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy); 423 424 fprintf(out, "; max_sun=%u\n", ir->max_sun); 425 426 /* print shader type specific info: */ 427 switch (so->type) { 428 case MESA_SHADER_VERTEX: 429 dump_output(out, so, VARYING_SLOT_POS, "pos"); 430 dump_output(out, so, VARYING_SLOT_PSIZ, "psize"); 431 break; 432 case MESA_SHADER_FRAGMENT: 433 dump_reg(out, "pos (ij_pixel)", 434 ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PIXEL)); 435 dump_reg(out, "pos (ij_centroid)", 436 ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_CENTROID)); 437 dump_reg(out, "pos (ij_size)", 438 ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_SIZE)); 439 dump_output(out, so, FRAG_RESULT_DEPTH, "posz"); 440 if (so->color0_mrt) { 441 dump_output(out, so, FRAG_RESULT_COLOR, "color"); 442 } else { 443 dump_output(out, so, FRAG_RESULT_DATA0, "data0"); 444 dump_output(out, so, FRAG_RESULT_DATA1, "data1"); 445 dump_output(out, so, FRAG_RESULT_DATA2, "data2"); 446 dump_output(out, so, FRAG_RESULT_DATA3, "data3"); 447 dump_output(out, so, FRAG_RESULT_DATA4, "data4"); 448 dump_output(out, so, FRAG_RESULT_DATA5, "data5"); 449 dump_output(out, so, FRAG_RESULT_DATA6, "data6"); 450 dump_output(out, so, FRAG_RESULT_DATA7, "data7"); 451 } 452 /* these two are hard-coded since we don't know how to 453 * program them to anything but all 0's... 454 */ 455 if (so->frag_coord) 456 fprintf(out, "; fragcoord: r0.x\n"); 457 if (so->frag_face) 458 fprintf(out, "; fragface: hr0.x\n"); 459 break; 460 default: 461 /* TODO */ 462 break; 463 } 464 465 fprintf(out, "\n"); 466} 467 468uint64_t 469ir3_shader_outputs(const struct ir3_shader *so) 470{ 471 return so->nir->info.outputs_written; 472} 473