prog_to_nir.c revision b8e80941
1/* 2 * Copyright © 2015 Intel Corporation 3 * Copyright © 2014-2015 Broadcom 4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 */ 25 26#include "compiler/nir/nir.h" 27#include "compiler/nir/nir_builder.h" 28#include "compiler/glsl/list.h" 29#include "main/imports.h" 30#include "main/mtypes.h" 31#include "util/ralloc.h" 32 33#include "prog_to_nir.h" 34#include "prog_instruction.h" 35#include "prog_parameter.h" 36#include "prog_print.h" 37#include "program.h" 38 39/** 40 * \file prog_to_nir.c 41 * 42 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily 43 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function 44 * vertex processing. Full GLSL support should use glsl_to_nir instead. 45 */ 46 47struct ptn_compile { 48 const struct gl_program *prog; 49 nir_builder build; 50 bool error; 51 52 nir_variable *parameters; 53 nir_variable *input_vars[VARYING_SLOT_MAX]; 54 nir_variable *output_vars[VARYING_SLOT_MAX]; 55 nir_variable *sysval_vars[SYSTEM_VALUE_MAX]; 56 nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */ 57 nir_register **output_regs; 58 nir_register **temp_regs; 59 60 nir_register *addr_reg; 61}; 62 63#define SWIZ(X, Y, Z, W) \ 64 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W } 65#define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true) 66 67static nir_ssa_def * 68ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest) 69{ 70 nir_builder *b = &c->build; 71 72 nir_alu_src src; 73 memset(&src, 0, sizeof(src)); 74 75 if (dest->dest.is_ssa) 76 src.src = nir_src_for_ssa(&dest->dest.ssa); 77 else { 78 assert(!dest->dest.reg.indirect); 79 src.src = nir_src_for_reg(dest->dest.reg.reg); 80 src.src.reg.base_offset = dest->dest.reg.base_offset; 81 } 82 83 for (int i = 0; i < 4; i++) 84 src.swizzle[i] = i; 85 86 return nir_fmov_alu(b, src, 4); 87} 88 89static nir_alu_dest 90ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst) 91{ 92 nir_alu_dest dest; 93 94 memset(&dest, 0, sizeof(dest)); 95 96 switch (prog_dst->File) { 97 case PROGRAM_TEMPORARY: 98 dest.dest.reg.reg = c->temp_regs[prog_dst->Index]; 99 break; 100 case PROGRAM_OUTPUT: 101 dest.dest.reg.reg = c->output_regs[prog_dst->Index]; 102 break; 103 case PROGRAM_ADDRESS: 104 assert(prog_dst->Index == 0); 105 dest.dest.reg.reg = c->addr_reg; 106 break; 107 case PROGRAM_UNDEFINED: 108 break; 109 } 110 111 dest.write_mask = prog_dst->WriteMask; 112 dest.saturate = false; 113 114 assert(!prog_dst->RelAddr); 115 116 return dest; 117} 118 119static nir_ssa_def * 120ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) 121{ 122 nir_builder *b = &c->build; 123 nir_alu_src src; 124 125 memset(&src, 0, sizeof(src)); 126 127 switch (prog_src->File) { 128 case PROGRAM_UNDEFINED: 129 return nir_imm_float(b, 0.0); 130 case PROGRAM_TEMPORARY: 131 assert(!prog_src->RelAddr && prog_src->Index >= 0); 132 src.src.reg.reg = c->temp_regs[prog_src->Index]; 133 break; 134 case PROGRAM_INPUT: { 135 /* ARB_vertex_program doesn't allow relative addressing on vertex 136 * attributes; ARB_fragment_program has no relative addressing at all. 137 */ 138 assert(!prog_src->RelAddr); 139 140 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX); 141 142 nir_variable *var = c->input_vars[prog_src->Index]; 143 src.src = nir_src_for_ssa(nir_load_var(b, var)); 144 break; 145 } 146 case PROGRAM_SYSTEM_VALUE: { 147 assert(!prog_src->RelAddr); 148 149 assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX); 150 151 nir_variable *var = c->sysval_vars[prog_src->Index]; 152 src.src = nir_src_for_ssa(nir_load_var(b, var)); 153 break; 154 } 155 case PROGRAM_STATE_VAR: 156 case PROGRAM_CONSTANT: { 157 /* We actually want to look at the type in the Parameters list for this, 158 * because it lets us upload constant builtin uniforms as actual 159 * constants. 160 */ 161 struct gl_program_parameter_list *plist = c->prog->Parameters; 162 gl_register_file file = prog_src->RelAddr ? prog_src->File : 163 plist->Parameters[prog_src->Index].Type; 164 165 switch (file) { 166 case PROGRAM_CONSTANT: 167 if ((c->prog->arb.IndirectRegisterFiles & 168 (1 << PROGRAM_CONSTANT)) == 0) { 169 unsigned pvo = plist->ParameterValueOffset[prog_src->Index]; 170 float *v = (float *) plist->ParameterValues + pvo; 171 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3])); 172 break; 173 } 174 /* FALLTHROUGH */ 175 case PROGRAM_STATE_VAR: { 176 assert(c->parameters != NULL); 177 178 nir_deref_instr *deref = nir_build_deref_var(b, c->parameters); 179 180 nir_ssa_def *index = nir_imm_int(b, prog_src->Index); 181 if (prog_src->RelAddr) 182 index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg)); 183 deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0)); 184 185 src.src = nir_src_for_ssa(nir_load_deref(b, deref)); 186 break; 187 } 188 default: 189 fprintf(stderr, "bad uniform src register file: %s (%d)\n", 190 _mesa_register_file_name(file), file); 191 abort(); 192 } 193 break; 194 } 195 default: 196 fprintf(stderr, "unknown src register file: %s (%d)\n", 197 _mesa_register_file_name(prog_src->File), prog_src->File); 198 abort(); 199 } 200 201 nir_ssa_def *def; 202 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) && 203 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) { 204 /* The simple non-SWZ case. */ 205 for (int i = 0; i < 4; i++) 206 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i); 207 208 def = nir_fmov_alu(b, src, 4); 209 210 if (prog_src->Negate) 211 def = nir_fneg(b, def); 212 } else { 213 /* The SWZ instruction allows per-component zero/one swizzles, and also 214 * per-component negation. 215 */ 216 nir_ssa_def *chans[4]; 217 for (int i = 0; i < 4; i++) { 218 int swizzle = GET_SWZ(prog_src->Swizzle, i); 219 if (swizzle == SWIZZLE_ZERO) { 220 chans[i] = nir_imm_float(b, 0.0); 221 } else if (swizzle == SWIZZLE_ONE) { 222 chans[i] = nir_imm_float(b, 1.0); 223 } else { 224 assert(swizzle != SWIZZLE_NIL); 225 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov); 226 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL); 227 mov->dest.write_mask = 0x1; 228 mov->src[0] = src; 229 mov->src[0].swizzle[0] = swizzle; 230 nir_builder_instr_insert(b, &mov->instr); 231 232 chans[i] = &mov->dest.dest.ssa; 233 } 234 235 if (prog_src->Negate & (1 << i)) 236 chans[i] = nir_fneg(b, chans[i]); 237 } 238 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]); 239 } 240 241 return def; 242} 243 244static void 245ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) 246{ 247 unsigned num_srcs = nir_op_infos[op].num_inputs; 248 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); 249 unsigned i; 250 251 for (i = 0; i < num_srcs; i++) 252 instr->src[i].src = nir_src_for_ssa(src[i]); 253 254 instr->dest = dest; 255 nir_builder_instr_insert(b, &instr->instr); 256} 257 258static void 259ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest, 260 nir_ssa_def *def, unsigned write_mask) 261{ 262 if (!(dest.write_mask & write_mask)) 263 return; 264 265 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov); 266 if (!mov) 267 return; 268 269 mov->dest = dest; 270 mov->dest.write_mask &= write_mask; 271 mov->src[0].src = nir_src_for_ssa(def); 272 for (unsigned i = def->num_components; i < 4; i++) 273 mov->src[0].swizzle[i] = def->num_components - 1; 274 nir_builder_instr_insert(b, &mov->instr); 275} 276 277static void 278ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def) 279{ 280 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW); 281} 282 283static void 284ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 285{ 286 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0]))); 287} 288 289/* EXP - Approximate Exponential Base 2 290 * dst.x = 2^{\lfloor src.x\rfloor} 291 * dst.y = src.x - \lfloor src.x\rfloor 292 * dst.z = 2^{src.x} 293 * dst.w = 1.0 294 */ 295static void 296ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 297{ 298 nir_ssa_def *srcx = ptn_channel(b, src[0], X); 299 300 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X); 301 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y); 302 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z); 303 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 304} 305 306/* LOG - Approximate Logarithm Base 2 307 * dst.x = \lfloor\log_2{|src.x|}\rfloor 308 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}} 309 * dst.z = \log_2{|src.x|} 310 * dst.w = 1.0 311 */ 312static void 313ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 314{ 315 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X)); 316 nir_ssa_def *log2 = nir_flog2(b, abs_srcx); 317 nir_ssa_def *floor_log2 = nir_ffloor(b, log2); 318 319 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X); 320 ptn_move_dest_masked(b, dest, 321 nir_fmul(b, abs_srcx, 322 nir_fexp2(b, nir_fneg(b, floor_log2))), 323 WRITEMASK_Y); 324 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z); 325 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 326} 327 328/* DST - Distance Vector 329 * dst.x = 1.0 330 * dst.y = src0.y \times src1.y 331 * dst.z = src0.z 332 * dst.w = src1.w 333 */ 334static void 335ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 336{ 337 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X); 338 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y); 339 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z); 340 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W); 341} 342 343/* LIT - Light Coefficients 344 * dst.x = 1.0 345 * dst.y = max(src.x, 0.0) 346 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 347 * dst.w = 1.0 348 */ 349static void 350ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 351{ 352 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW); 353 354 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X), 355 nir_imm_float(b, 0.0)), WRITEMASK_Y); 356 357 if (dest.write_mask & WRITEMASK_Z) { 358 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y); 359 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W), 360 nir_imm_float(b, 128.0)), 361 nir_imm_float(b, -128.0)); 362 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)), 363 wclamp); 364 365 nir_ssa_def *z; 366 if (b->shader->options->native_integers) { 367 z = nir_bcsel(b, 368 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)), 369 nir_imm_float(b, 0.0), 370 pow); 371 } else { 372 z = nir_fcsel(b, 373 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)), 374 nir_imm_float(b, 0.0), 375 pow); 376 } 377 378 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z); 379 } 380} 381 382/* SCS - Sine Cosine 383 * dst.x = \cos{src.x} 384 * dst.y = \sin{src.x} 385 * dst.z = 0.0 386 * dst.w = 1.0 387 */ 388static void 389ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 390{ 391 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)), 392 WRITEMASK_X); 393 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)), 394 WRITEMASK_Y); 395 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z); 396 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 397} 398 399/** 400 * Emit SLT. For platforms with integers, prefer b2f(flt(...)). 401 */ 402static void 403ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 404{ 405 if (b->shader->options->native_integers) { 406 ptn_move_dest(b, dest, nir_b2f32(b, nir_flt(b, src[0], src[1]))); 407 } else { 408 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1])); 409 } 410} 411 412/** 413 * Emit SGE. For platforms with integers, prefer b2f(fge(...)). 414 */ 415static void 416ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 417{ 418 if (b->shader->options->native_integers) { 419 ptn_move_dest(b, dest, nir_b2f32(b, nir_fge(b, src[0], src[1]))); 420 } else { 421 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1])); 422 } 423} 424 425static void 426ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 427{ 428 ptn_move_dest_masked(b, dest, 429 nir_fsub(b, 430 nir_fmul(b, 431 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true), 432 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)), 433 nir_fmul(b, 434 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true), 435 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))), 436 WRITEMASK_XYZ); 437 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 438} 439 440static void 441ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 442{ 443 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1])); 444} 445 446static void 447ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 448{ 449 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1])); 450} 451 452static void 453ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 454{ 455 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1])); 456} 457 458static void 459ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 460{ 461 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1])); 462} 463 464static void 465ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 466{ 467 if (b->shader->options->native_integers) { 468 ptn_move_dest(b, dest, nir_bcsel(b, 469 nir_flt(b, src[0], nir_imm_float(b, 0.0)), 470 src[1], src[2])); 471 } else { 472 ptn_move_dest(b, dest, nir_fcsel(b, 473 nir_slt(b, src[0], nir_imm_float(b, 0.0)), 474 src[1], src[2])); 475 } 476} 477 478static void 479ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 480{ 481 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0])); 482} 483 484static void 485ptn_kil(nir_builder *b, nir_ssa_def **src) 486{ 487 nir_ssa_def *cmp = b->shader->options->native_integers ? 488 nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) : 489 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0)); 490 491 nir_intrinsic_instr *discard = 492 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if); 493 discard->src[0] = nir_src_for_ssa(cmp); 494 nir_builder_instr_insert(b, &discard->instr); 495} 496 497static void 498ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src, 499 struct prog_instruction *prog_inst) 500{ 501 nir_builder *b = &c->build; 502 nir_tex_instr *instr; 503 nir_texop op; 504 unsigned num_srcs; 505 506 switch (prog_inst->Opcode) { 507 case OPCODE_TEX: 508 op = nir_texop_tex; 509 num_srcs = 1; 510 break; 511 case OPCODE_TXB: 512 op = nir_texop_txb; 513 num_srcs = 2; 514 break; 515 case OPCODE_TXD: 516 op = nir_texop_txd; 517 num_srcs = 3; 518 break; 519 case OPCODE_TXL: 520 op = nir_texop_txl; 521 num_srcs = 2; 522 break; 523 case OPCODE_TXP: 524 op = nir_texop_tex; 525 num_srcs = 2; 526 break; 527 default: 528 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode); 529 abort(); 530 } 531 532 /* Deref sources */ 533 num_srcs += 2; 534 535 if (prog_inst->TexShadow) 536 num_srcs++; 537 538 instr = nir_tex_instr_create(b->shader, num_srcs); 539 instr->op = op; 540 instr->dest_type = nir_type_float; 541 instr->is_shadow = prog_inst->TexShadow; 542 543 switch (prog_inst->TexSrcTarget) { 544 case TEXTURE_1D_INDEX: 545 instr->sampler_dim = GLSL_SAMPLER_DIM_1D; 546 break; 547 case TEXTURE_2D_INDEX: 548 instr->sampler_dim = GLSL_SAMPLER_DIM_2D; 549 break; 550 case TEXTURE_3D_INDEX: 551 instr->sampler_dim = GLSL_SAMPLER_DIM_3D; 552 break; 553 case TEXTURE_CUBE_INDEX: 554 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE; 555 break; 556 case TEXTURE_RECT_INDEX: 557 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT; 558 break; 559 default: 560 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget); 561 abort(); 562 } 563 564 switch (instr->sampler_dim) { 565 case GLSL_SAMPLER_DIM_1D: 566 case GLSL_SAMPLER_DIM_BUF: 567 instr->coord_components = 1; 568 break; 569 case GLSL_SAMPLER_DIM_2D: 570 case GLSL_SAMPLER_DIM_RECT: 571 case GLSL_SAMPLER_DIM_EXTERNAL: 572 case GLSL_SAMPLER_DIM_MS: 573 instr->coord_components = 2; 574 break; 575 case GLSL_SAMPLER_DIM_3D: 576 case GLSL_SAMPLER_DIM_CUBE: 577 instr->coord_components = 3; 578 break; 579 case GLSL_SAMPLER_DIM_SUBPASS: 580 case GLSL_SAMPLER_DIM_SUBPASS_MS: 581 unreachable("can't reach"); 582 } 583 584 nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit]; 585 if (!var) { 586 const struct glsl_type *type = 587 glsl_sampler_type(instr->sampler_dim, false, false, GLSL_TYPE_FLOAT); 588 var = nir_variable_create(b->shader, nir_var_uniform, type, "sampler"); 589 var->data.binding = prog_inst->TexSrcUnit; 590 var->data.explicit_binding = true; 591 c->sampler_vars[prog_inst->TexSrcUnit] = var; 592 } 593 594 nir_deref_instr *deref = nir_build_deref_var(b, var); 595 596 unsigned src_number = 0; 597 598 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa); 599 instr->src[src_number].src_type = nir_tex_src_texture_deref; 600 src_number++; 601 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa); 602 instr->src[src_number].src_type = nir_tex_src_sampler_deref; 603 src_number++; 604 605 instr->src[src_number].src = 606 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W), 607 instr->coord_components, true)); 608 instr->src[src_number].src_type = nir_tex_src_coord; 609 src_number++; 610 611 if (prog_inst->Opcode == OPCODE_TXP) { 612 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 613 instr->src[src_number].src_type = nir_tex_src_projector; 614 src_number++; 615 } 616 617 if (prog_inst->Opcode == OPCODE_TXB) { 618 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 619 instr->src[src_number].src_type = nir_tex_src_bias; 620 src_number++; 621 } 622 623 if (prog_inst->Opcode == OPCODE_TXL) { 624 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 625 instr->src[src_number].src_type = nir_tex_src_lod; 626 src_number++; 627 } 628 629 if (instr->is_shadow) { 630 if (instr->coord_components < 3) 631 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z)); 632 else 633 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 634 635 instr->src[src_number].src_type = nir_tex_src_comparator; 636 src_number++; 637 } 638 639 assert(src_number == num_srcs); 640 641 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL); 642 nir_builder_instr_insert(b, &instr->instr); 643 644 /* Resolve the writemask on the texture op. */ 645 ptn_move_dest(b, dest, &instr->dest.ssa); 646} 647 648static const nir_op op_trans[MAX_OPCODE] = { 649 [OPCODE_NOP] = 0, 650 [OPCODE_ABS] = nir_op_fabs, 651 [OPCODE_ADD] = nir_op_fadd, 652 [OPCODE_ARL] = 0, 653 [OPCODE_CMP] = 0, 654 [OPCODE_COS] = 0, 655 [OPCODE_DDX] = nir_op_fddx, 656 [OPCODE_DDY] = nir_op_fddy, 657 [OPCODE_DP2] = 0, 658 [OPCODE_DP3] = 0, 659 [OPCODE_DP4] = 0, 660 [OPCODE_DPH] = 0, 661 [OPCODE_DST] = 0, 662 [OPCODE_END] = 0, 663 [OPCODE_EX2] = 0, 664 [OPCODE_EXP] = 0, 665 [OPCODE_FLR] = nir_op_ffloor, 666 [OPCODE_FRC] = nir_op_ffract, 667 [OPCODE_LG2] = 0, 668 [OPCODE_LIT] = 0, 669 [OPCODE_LOG] = 0, 670 [OPCODE_LRP] = 0, 671 [OPCODE_MAD] = 0, 672 [OPCODE_MAX] = nir_op_fmax, 673 [OPCODE_MIN] = nir_op_fmin, 674 [OPCODE_MOV] = nir_op_fmov, 675 [OPCODE_MUL] = nir_op_fmul, 676 [OPCODE_POW] = 0, 677 [OPCODE_RCP] = 0, 678 679 [OPCODE_RSQ] = 0, 680 [OPCODE_SCS] = 0, 681 [OPCODE_SGE] = 0, 682 [OPCODE_SIN] = 0, 683 [OPCODE_SLT] = 0, 684 [OPCODE_SSG] = nir_op_fsign, 685 [OPCODE_SUB] = nir_op_fsub, 686 [OPCODE_SWZ] = 0, 687 [OPCODE_TEX] = 0, 688 [OPCODE_TRUNC] = nir_op_ftrunc, 689 [OPCODE_TXB] = 0, 690 [OPCODE_TXD] = 0, 691 [OPCODE_TXL] = 0, 692 [OPCODE_TXP] = 0, 693 [OPCODE_XPD] = 0, 694}; 695 696static void 697ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) 698{ 699 nir_builder *b = &c->build; 700 unsigned i; 701 const unsigned op = prog_inst->Opcode; 702 703 if (op == OPCODE_END) 704 return; 705 706 nir_ssa_def *src[3]; 707 for (i = 0; i < 3; i++) { 708 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]); 709 } 710 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg); 711 if (c->error) 712 return; 713 714 switch (op) { 715 case OPCODE_RSQ: 716 ptn_move_dest(b, dest, 717 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)))); 718 break; 719 720 case OPCODE_RCP: 721 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X))); 722 break; 723 724 case OPCODE_EX2: 725 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X))); 726 break; 727 728 case OPCODE_LG2: 729 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X))); 730 break; 731 732 case OPCODE_POW: 733 ptn_move_dest(b, dest, nir_fpow(b, 734 ptn_channel(b, src[0], X), 735 ptn_channel(b, src[1], X))); 736 break; 737 738 case OPCODE_COS: 739 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X))); 740 break; 741 742 case OPCODE_SIN: 743 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X))); 744 break; 745 746 case OPCODE_ARL: 747 ptn_arl(b, dest, src); 748 break; 749 750 case OPCODE_EXP: 751 ptn_exp(b, dest, src); 752 break; 753 754 case OPCODE_LOG: 755 ptn_log(b, dest, src); 756 break; 757 758 case OPCODE_LRP: 759 ptn_lrp(b, dest, src); 760 break; 761 762 case OPCODE_MAD: 763 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2])); 764 break; 765 766 case OPCODE_DST: 767 ptn_dst(b, dest, src); 768 break; 769 770 case OPCODE_LIT: 771 ptn_lit(b, dest, src); 772 break; 773 774 case OPCODE_XPD: 775 ptn_xpd(b, dest, src); 776 break; 777 778 case OPCODE_DP2: 779 ptn_dp2(b, dest, src); 780 break; 781 782 case OPCODE_DP3: 783 ptn_dp3(b, dest, src); 784 break; 785 786 case OPCODE_DP4: 787 ptn_dp4(b, dest, src); 788 break; 789 790 case OPCODE_DPH: 791 ptn_dph(b, dest, src); 792 break; 793 794 case OPCODE_KIL: 795 ptn_kil(b, src); 796 break; 797 798 case OPCODE_CMP: 799 ptn_cmp(b, dest, src); 800 break; 801 802 case OPCODE_SCS: 803 ptn_scs(b, dest, src); 804 break; 805 806 case OPCODE_SLT: 807 ptn_slt(b, dest, src); 808 break; 809 810 case OPCODE_SGE: 811 ptn_sge(b, dest, src); 812 break; 813 814 case OPCODE_TEX: 815 case OPCODE_TXB: 816 case OPCODE_TXD: 817 case OPCODE_TXL: 818 case OPCODE_TXP: 819 ptn_tex(c, dest, src, prog_inst); 820 break; 821 822 case OPCODE_SWZ: 823 /* Extended swizzles were already handled in ptn_get_src(). */ 824 ptn_alu(b, nir_op_fmov, dest, src); 825 break; 826 827 case OPCODE_NOP: 828 break; 829 830 default: 831 if (op_trans[op] != 0) { 832 ptn_alu(b, op_trans[op], dest, src); 833 } else { 834 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op)); 835 abort(); 836 } 837 break; 838 } 839 840 if (prog_inst->Saturate) { 841 assert(prog_inst->Saturate); 842 assert(!dest.dest.is_ssa); 843 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest))); 844 } 845} 846 847/** 848 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output 849 * variables at the end of the shader. 850 * 851 * We don't generate these incrementally as the PROGRAM_OUTPUT values are 852 * written, because there's no output load intrinsic, which means we couldn't 853 * handle writemasks. 854 */ 855static void 856ptn_add_output_stores(struct ptn_compile *c) 857{ 858 nir_builder *b = &c->build; 859 860 nir_foreach_variable(var, &b->shader->outputs) { 861 nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]); 862 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 863 var->data.location == FRAG_RESULT_DEPTH) { 864 /* result.depth has this strange convention of being the .z component of 865 * a vec4 with undefined .xyw components. We resolve it to a scalar, to 866 * match GLSL's gl_FragDepth and the expectations of most backends. 867 */ 868 src = nir_channel(b, src, 2); 869 } 870 if (c->prog->Target == GL_VERTEX_PROGRAM_ARB && 871 var->data.location == VARYING_SLOT_FOGC) { 872 /* result.fogcoord is a single component value */ 873 src = nir_channel(b, src, 0); 874 } 875 unsigned num_components = glsl_get_vector_elements(var->type); 876 nir_store_var(b, var, src, (1 << num_components) - 1); 877 } 878} 879 880static void 881setup_registers_and_variables(struct ptn_compile *c) 882{ 883 nir_builder *b = &c->build; 884 struct nir_shader *shader = b->shader; 885 886 /* Create input variables. */ 887 uint64_t inputs_read = c->prog->info.inputs_read; 888 while (inputs_read) { 889 const int i = u_bit_scan64(&inputs_read); 890 891 nir_variable *var = 892 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), 893 ralloc_asprintf(shader, "in_%d", i)); 894 var->data.location = i; 895 var->data.index = 0; 896 897 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 898 if (i == VARYING_SLOT_FOGC) { 899 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual 900 * input variable a float, and create a local containing the 901 * full vec4 value. 902 */ 903 var->type = glsl_float_type(); 904 905 nir_variable *fullvar = 906 nir_local_variable_create(b->impl, glsl_vec4_type(), 907 "fogcoord_tmp"); 908 909 nir_store_var(b, fullvar, 910 nir_vec4(b, nir_load_var(b, var), 911 nir_imm_float(b, 0.0), 912 nir_imm_float(b, 0.0), 913 nir_imm_float(b, 1.0)), 914 WRITEMASK_XYZW); 915 916 /* We inserted the real input into the list so the driver has real 917 * inputs, but we set c->input_vars[i] to the temporary so we use 918 * the splatted value. 919 */ 920 c->input_vars[i] = fullvar; 921 continue; 922 } 923 } 924 925 c->input_vars[i] = var; 926 } 927 928 /* Create system value variables */ 929 uint64_t system_values_read = c->prog->info.system_values_read; 930 while (system_values_read) { 931 const int i = u_bit_scan64(&system_values_read); 932 933 nir_variable *var = 934 nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(), 935 ralloc_asprintf(shader, "sv_%d", i)); 936 var->data.location = i; 937 var->data.index = 0; 938 939 c->sysval_vars[i] = var; 940 } 941 942 /* Create output registers and variables. */ 943 int max_outputs = util_last_bit(c->prog->info.outputs_written); 944 c->output_regs = rzalloc_array(c, nir_register *, max_outputs); 945 946 uint64_t outputs_written = c->prog->info.outputs_written; 947 while (outputs_written) { 948 const int i = u_bit_scan64(&outputs_written); 949 950 /* Since we can't load from outputs in the IR, we make temporaries 951 * for the outputs and emit stores to the real outputs at the end of 952 * the shader. 953 */ 954 nir_register *reg = nir_local_reg_create(b->impl); 955 reg->num_components = 4; 956 957 nir_variable *var = rzalloc(shader, nir_variable); 958 if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) || 959 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC)) 960 var->type = glsl_float_type(); 961 else 962 var->type = glsl_vec4_type(); 963 var->data.mode = nir_var_shader_out; 964 var->name = ralloc_asprintf(var, "out_%d", i); 965 966 var->data.location = i; 967 var->data.index = 0; 968 969 c->output_regs[i] = reg; 970 971 exec_list_push_tail(&shader->outputs, &var->node); 972 c->output_vars[i] = var; 973 } 974 975 /* Create temporary registers. */ 976 c->temp_regs = rzalloc_array(c, nir_register *, 977 c->prog->arb.NumTemporaries); 978 979 nir_register *reg; 980 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) { 981 reg = nir_local_reg_create(b->impl); 982 if (!reg) { 983 c->error = true; 984 return; 985 } 986 reg->num_components = 4; 987 c->temp_regs[i] = reg; 988 } 989 990 /* Create the address register (for ARB_vertex_program). */ 991 reg = nir_local_reg_create(b->impl); 992 if (!reg) { 993 c->error = true; 994 return; 995 } 996 reg->num_components = 1; 997 c->addr_reg = reg; 998} 999 1000struct nir_shader * 1001prog_to_nir(const struct gl_program *prog, 1002 const nir_shader_compiler_options *options) 1003{ 1004 struct ptn_compile *c; 1005 struct nir_shader *s; 1006 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target); 1007 1008 c = rzalloc(NULL, struct ptn_compile); 1009 if (!c) 1010 return NULL; 1011 c->prog = prog; 1012 1013 nir_builder_init_simple_shader(&c->build, NULL, stage, options); 1014 1015 /* Copy the shader_info from the gl_program */ 1016 c->build.shader->info = prog->info; 1017 1018 s = c->build.shader; 1019 1020 if (prog->Parameters->NumParameters > 0) { 1021 const struct glsl_type *type = 1022 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0); 1023 c->parameters = 1024 nir_variable_create(s, nir_var_uniform, type, 1025 prog->Parameters->Parameters[0].Name); 1026 } 1027 1028 setup_registers_and_variables(c); 1029 if (unlikely(c->error)) 1030 goto fail; 1031 1032 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) { 1033 ptn_emit_instruction(c, &prog->arb.Instructions[i]); 1034 1035 if (unlikely(c->error)) 1036 break; 1037 } 1038 1039 ptn_add_output_stores(c); 1040 1041 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id); 1042 s->info.num_textures = util_last_bit(prog->SamplersUsed); 1043 s->info.num_ubos = 0; 1044 s->info.num_abos = 0; 1045 s->info.num_ssbos = 0; 1046 s->info.num_images = 0; 1047 s->info.uses_texture_gather = false; 1048 s->info.clip_distance_array_size = 0; 1049 s->info.cull_distance_array_size = 0; 1050 s->info.separate_shader = false; 1051 1052fail: 1053 if (c->error) { 1054 ralloc_free(s); 1055 s = NULL; 1056 } 1057 ralloc_free(c); 1058 return s; 1059} 1060