1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_cfg.h" 25#include "brw_eu.h" 26#include "brw_fs.h" 27#include "brw_nir.h" 28#include "brw_vec4_tes.h" 29#include "dev/intel_debug.h" 30#include "main/uniforms.h" 31#include "util/macros.h" 32 33enum brw_reg_type 34brw_type_for_base_type(const struct glsl_type *type) 35{ 36 switch (type->base_type) { 37 case GLSL_TYPE_FLOAT16: 38 return BRW_REGISTER_TYPE_HF; 39 case GLSL_TYPE_FLOAT: 40 return BRW_REGISTER_TYPE_F; 41 case GLSL_TYPE_INT: 42 case GLSL_TYPE_BOOL: 43 case GLSL_TYPE_SUBROUTINE: 44 return BRW_REGISTER_TYPE_D; 45 case GLSL_TYPE_INT16: 46 return BRW_REGISTER_TYPE_W; 47 case GLSL_TYPE_INT8: 48 return BRW_REGISTER_TYPE_B; 49 case GLSL_TYPE_UINT: 50 return BRW_REGISTER_TYPE_UD; 51 case GLSL_TYPE_UINT16: 52 return BRW_REGISTER_TYPE_UW; 53 case GLSL_TYPE_UINT8: 54 return BRW_REGISTER_TYPE_UB; 55 case GLSL_TYPE_ARRAY: 56 return brw_type_for_base_type(type->fields.array); 57 case GLSL_TYPE_STRUCT: 58 case GLSL_TYPE_INTERFACE: 59 case GLSL_TYPE_SAMPLER: 60 case GLSL_TYPE_ATOMIC_UINT: 61 /* These should be overridden with the type of the member when 62 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 63 * way to trip up if we don't. 64 */ 65 return BRW_REGISTER_TYPE_UD; 66 case GLSL_TYPE_IMAGE: 67 return BRW_REGISTER_TYPE_UD; 68 case GLSL_TYPE_DOUBLE: 69 return BRW_REGISTER_TYPE_DF; 70 case GLSL_TYPE_UINT64: 71 return BRW_REGISTER_TYPE_UQ; 72 case GLSL_TYPE_INT64: 73 return BRW_REGISTER_TYPE_Q; 74 case GLSL_TYPE_VOID: 75 case GLSL_TYPE_ERROR: 76 case GLSL_TYPE_FUNCTION: 77 unreachable("not reached"); 78 } 79 80 return BRW_REGISTER_TYPE_F; 81} 82 83enum brw_conditional_mod 84brw_conditional_for_comparison(unsigned int op) 85{ 86 switch (op) { 87 case ir_binop_less: 88 return BRW_CONDITIONAL_L; 89 case ir_binop_gequal: 90 return BRW_CONDITIONAL_GE; 91 case ir_binop_equal: 92 case ir_binop_all_equal: /* same as equal for scalars */ 93 return BRW_CONDITIONAL_Z; 94 case ir_binop_nequal: 95 case ir_binop_any_nequal: /* same as nequal for scalars */ 96 return BRW_CONDITIONAL_NZ; 97 default: 98 unreachable("not reached: bad operation for comparison"); 99 } 100} 101 102uint32_t 103brw_math_function(enum opcode op) 104{ 105 switch (op) { 106 case SHADER_OPCODE_RCP: 107 return BRW_MATH_FUNCTION_INV; 108 case SHADER_OPCODE_RSQ: 109 return BRW_MATH_FUNCTION_RSQ; 110 case SHADER_OPCODE_SQRT: 111 return BRW_MATH_FUNCTION_SQRT; 112 case SHADER_OPCODE_EXP2: 113 return BRW_MATH_FUNCTION_EXP; 114 case SHADER_OPCODE_LOG2: 115 return BRW_MATH_FUNCTION_LOG; 116 case SHADER_OPCODE_POW: 117 return BRW_MATH_FUNCTION_POW; 118 case SHADER_OPCODE_SIN: 119 return BRW_MATH_FUNCTION_SIN; 120 case SHADER_OPCODE_COS: 121 return BRW_MATH_FUNCTION_COS; 122 case SHADER_OPCODE_INT_QUOTIENT: 123 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; 124 case SHADER_OPCODE_INT_REMAINDER: 125 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; 126 default: 127 unreachable("not reached: unknown math function"); 128 } 129} 130 131bool 132brw_texture_offset(const nir_tex_instr *tex, unsigned src, 133 uint32_t *offset_bits_out) 134{ 135 if (!nir_src_is_const(tex->src[src].src)) 136 return false; 137 138 const unsigned num_components = nir_tex_instr_src_size(tex, src); 139 140 /* Combine all three offsets into a single unsigned dword: 141 * 142 * bits 11:8 - U Offset (X component) 143 * bits 7:4 - V Offset (Y component) 144 * bits 3:0 - R Offset (Z component) 145 */ 146 uint32_t offset_bits = 0; 147 for (unsigned i = 0; i < num_components; i++) { 148 int offset = nir_src_comp_as_int(tex->src[src].src, i); 149 150 /* offset out of bounds; caller will handle it. */ 151 if (offset > 7 || offset < -8) 152 return false; 153 154 const unsigned shift = 4 * (2 - i); 155 offset_bits |= (offset << shift) & (0xF << shift); 156 } 157 158 *offset_bits_out = offset_bits; 159 160 return true; 161} 162 163const char * 164brw_instruction_name(const struct intel_device_info *devinfo, enum opcode op) 165{ 166 switch (op) { 167 case 0 ... NUM_BRW_OPCODES - 1: 168 /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the 169 * start of a loop in the IR. 170 */ 171 if (devinfo->ver >= 6 && op == BRW_OPCODE_DO) 172 return "do"; 173 174 /* The following conversion opcodes doesn't exist on Gfx8+, but we use 175 * then to mark that we want to do the conversion. 176 */ 177 if (devinfo->ver > 7 && op == BRW_OPCODE_F32TO16) 178 return "f32to16"; 179 180 if (devinfo->ver > 7 && op == BRW_OPCODE_F16TO32) 181 return "f16to32"; 182 183 assert(brw_opcode_desc(devinfo, op)->name); 184 return brw_opcode_desc(devinfo, op)->name; 185 case FS_OPCODE_FB_WRITE: 186 return "fb_write"; 187 case FS_OPCODE_FB_WRITE_LOGICAL: 188 return "fb_write_logical"; 189 case FS_OPCODE_REP_FB_WRITE: 190 return "rep_fb_write"; 191 case FS_OPCODE_FB_READ: 192 return "fb_read"; 193 case FS_OPCODE_FB_READ_LOGICAL: 194 return "fb_read_logical"; 195 196 case SHADER_OPCODE_RCP: 197 return "rcp"; 198 case SHADER_OPCODE_RSQ: 199 return "rsq"; 200 case SHADER_OPCODE_SQRT: 201 return "sqrt"; 202 case SHADER_OPCODE_EXP2: 203 return "exp2"; 204 case SHADER_OPCODE_LOG2: 205 return "log2"; 206 case SHADER_OPCODE_POW: 207 return "pow"; 208 case SHADER_OPCODE_INT_QUOTIENT: 209 return "int_quot"; 210 case SHADER_OPCODE_INT_REMAINDER: 211 return "int_rem"; 212 case SHADER_OPCODE_SIN: 213 return "sin"; 214 case SHADER_OPCODE_COS: 215 return "cos"; 216 217 case SHADER_OPCODE_SEND: 218 return "send"; 219 220 case SHADER_OPCODE_UNDEF: 221 return "undef"; 222 223 case SHADER_OPCODE_TEX: 224 return "tex"; 225 case SHADER_OPCODE_TEX_LOGICAL: 226 return "tex_logical"; 227 case SHADER_OPCODE_TXD: 228 return "txd"; 229 case SHADER_OPCODE_TXD_LOGICAL: 230 return "txd_logical"; 231 case SHADER_OPCODE_TXF: 232 return "txf"; 233 case SHADER_OPCODE_TXF_LOGICAL: 234 return "txf_logical"; 235 case SHADER_OPCODE_TXF_LZ: 236 return "txf_lz"; 237 case SHADER_OPCODE_TXL: 238 return "txl"; 239 case SHADER_OPCODE_TXL_LOGICAL: 240 return "txl_logical"; 241 case SHADER_OPCODE_TXL_LZ: 242 return "txl_lz"; 243 case SHADER_OPCODE_TXS: 244 return "txs"; 245 case SHADER_OPCODE_TXS_LOGICAL: 246 return "txs_logical"; 247 case FS_OPCODE_TXB: 248 return "txb"; 249 case FS_OPCODE_TXB_LOGICAL: 250 return "txb_logical"; 251 case SHADER_OPCODE_TXF_CMS: 252 return "txf_cms"; 253 case SHADER_OPCODE_TXF_CMS_LOGICAL: 254 return "txf_cms_logical"; 255 case SHADER_OPCODE_TXF_CMS_W: 256 return "txf_cms_w"; 257 case SHADER_OPCODE_TXF_CMS_W_LOGICAL: 258 return "txf_cms_w_logical"; 259 case SHADER_OPCODE_TXF_UMS: 260 return "txf_ums"; 261 case SHADER_OPCODE_TXF_UMS_LOGICAL: 262 return "txf_ums_logical"; 263 case SHADER_OPCODE_TXF_MCS: 264 return "txf_mcs"; 265 case SHADER_OPCODE_TXF_MCS_LOGICAL: 266 return "txf_mcs_logical"; 267 case SHADER_OPCODE_LOD: 268 return "lod"; 269 case SHADER_OPCODE_LOD_LOGICAL: 270 return "lod_logical"; 271 case SHADER_OPCODE_TG4: 272 return "tg4"; 273 case SHADER_OPCODE_TG4_LOGICAL: 274 return "tg4_logical"; 275 case SHADER_OPCODE_TG4_OFFSET: 276 return "tg4_offset"; 277 case SHADER_OPCODE_TG4_OFFSET_LOGICAL: 278 return "tg4_offset_logical"; 279 case SHADER_OPCODE_SAMPLEINFO: 280 return "sampleinfo"; 281 case SHADER_OPCODE_SAMPLEINFO_LOGICAL: 282 return "sampleinfo_logical"; 283 284 case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: 285 return "image_size_logical"; 286 287 case SHADER_OPCODE_SHADER_TIME_ADD: 288 return "shader_time_add"; 289 290 case VEC4_OPCODE_UNTYPED_ATOMIC: 291 return "untyped_atomic"; 292 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 293 return "untyped_atomic_logical"; 294 case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 295 return "untyped_atomic_float_logical"; 296 case VEC4_OPCODE_UNTYPED_SURFACE_READ: 297 return "untyped_surface_read"; 298 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 299 return "untyped_surface_read_logical"; 300 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 301 return "untyped_surface_write"; 302 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 303 return "untyped_surface_write_logical"; 304 case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL: 305 return "oword_block_read_logical"; 306 case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: 307 return "unaligned_oword_block_read_logical"; 308 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: 309 return "oword_block_write_logical"; 310 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 311 return "a64_untyped_read_logical"; 312 case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: 313 return "a64_oword_block_read_logical"; 314 case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: 315 return "a64_unaligned_oword_block_read_logical"; 316 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: 317 return "a64_oword_block_write_logical"; 318 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 319 return "a64_untyped_write_logical"; 320 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 321 return "a64_byte_scattered_read_logical"; 322 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 323 return "a64_byte_scattered_write_logical"; 324 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 325 return "a64_untyped_atomic_logical"; 326 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: 327 return "a64_untyped_atomic_int16_logical"; 328 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 329 return "a64_untyped_atomic_int64_logical"; 330 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: 331 return "a64_untyped_atomic_float16_logical"; 332 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: 333 return "a64_untyped_atomic_float32_logical"; 334 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: 335 return "a64_untyped_atomic_float64_logical"; 336 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 337 return "typed_atomic_logical"; 338 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 339 return "typed_surface_read_logical"; 340 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 341 return "typed_surface_write_logical"; 342 case SHADER_OPCODE_MEMORY_FENCE: 343 return "memory_fence"; 344 case FS_OPCODE_SCHEDULING_FENCE: 345 return "scheduling_fence"; 346 case SHADER_OPCODE_INTERLOCK: 347 /* For an interlock we actually issue a memory fence via sendc. */ 348 return "interlock"; 349 350 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 351 return "byte_scattered_read_logical"; 352 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 353 return "byte_scattered_write_logical"; 354 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: 355 return "dword_scattered_read_logical"; 356 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: 357 return "dword_scattered_write_logical"; 358 359 case SHADER_OPCODE_LOAD_PAYLOAD: 360 return "load_payload"; 361 case FS_OPCODE_PACK: 362 return "pack"; 363 364 case SHADER_OPCODE_GFX4_SCRATCH_READ: 365 return "gfx4_scratch_read"; 366 case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 367 return "gfx4_scratch_write"; 368 case SHADER_OPCODE_GFX7_SCRATCH_READ: 369 return "gfx7_scratch_read"; 370 case SHADER_OPCODE_SCRATCH_HEADER: 371 return "scratch_header"; 372 case SHADER_OPCODE_URB_WRITE_SIMD8: 373 return "gfx8_urb_write_simd8"; 374 case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 375 return "gfx8_urb_write_simd8_per_slot"; 376 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 377 return "gfx8_urb_write_simd8_masked"; 378 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 379 return "gfx8_urb_write_simd8_masked_per_slot"; 380 case SHADER_OPCODE_URB_READ_SIMD8: 381 return "urb_read_simd8"; 382 case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 383 return "urb_read_simd8_per_slot"; 384 385 case SHADER_OPCODE_FIND_LIVE_CHANNEL: 386 return "find_live_channel"; 387 case FS_OPCODE_LOAD_LIVE_CHANNELS: 388 return "load_live_channels"; 389 390 case SHADER_OPCODE_BROADCAST: 391 return "broadcast"; 392 case SHADER_OPCODE_SHUFFLE: 393 return "shuffle"; 394 case SHADER_OPCODE_SEL_EXEC: 395 return "sel_exec"; 396 case SHADER_OPCODE_QUAD_SWIZZLE: 397 return "quad_swizzle"; 398 case SHADER_OPCODE_CLUSTER_BROADCAST: 399 return "cluster_broadcast"; 400 401 case SHADER_OPCODE_GET_BUFFER_SIZE: 402 return "get_buffer_size"; 403 404 case VEC4_OPCODE_MOV_BYTES: 405 return "mov_bytes"; 406 case VEC4_OPCODE_PACK_BYTES: 407 return "pack_bytes"; 408 case VEC4_OPCODE_UNPACK_UNIFORM: 409 return "unpack_uniform"; 410 case VEC4_OPCODE_DOUBLE_TO_F32: 411 return "double_to_f32"; 412 case VEC4_OPCODE_DOUBLE_TO_D32: 413 return "double_to_d32"; 414 case VEC4_OPCODE_DOUBLE_TO_U32: 415 return "double_to_u32"; 416 case VEC4_OPCODE_TO_DOUBLE: 417 return "single_to_double"; 418 case VEC4_OPCODE_PICK_LOW_32BIT: 419 return "pick_low_32bit"; 420 case VEC4_OPCODE_PICK_HIGH_32BIT: 421 return "pick_high_32bit"; 422 case VEC4_OPCODE_SET_LOW_32BIT: 423 return "set_low_32bit"; 424 case VEC4_OPCODE_SET_HIGH_32BIT: 425 return "set_high_32bit"; 426 case VEC4_OPCODE_MOV_FOR_SCRATCH: 427 return "mov_for_scratch"; 428 case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 429 return "zero_oob_push_regs"; 430 431 case FS_OPCODE_DDX_COARSE: 432 return "ddx_coarse"; 433 case FS_OPCODE_DDX_FINE: 434 return "ddx_fine"; 435 case FS_OPCODE_DDY_COARSE: 436 return "ddy_coarse"; 437 case FS_OPCODE_DDY_FINE: 438 return "ddy_fine"; 439 440 case FS_OPCODE_LINTERP: 441 return "linterp"; 442 443 case FS_OPCODE_PIXEL_X: 444 return "pixel_x"; 445 case FS_OPCODE_PIXEL_Y: 446 return "pixel_y"; 447 448 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: 449 return "uniform_pull_const"; 450 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7: 451 return "uniform_pull_const_gfx7"; 452 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: 453 return "varying_pull_const_gfx4"; 454 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: 455 return "varying_pull_const_logical"; 456 457 case FS_OPCODE_SET_SAMPLE_ID: 458 return "set_sample_id"; 459 460 case FS_OPCODE_PACK_HALF_2x16_SPLIT: 461 return "pack_half_2x16_split"; 462 463 case SHADER_OPCODE_HALT_TARGET: 464 return "halt_target"; 465 466 case FS_OPCODE_INTERPOLATE_AT_SAMPLE: 467 return "interp_sample"; 468 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: 469 return "interp_shared_offset"; 470 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: 471 return "interp_per_slot_offset"; 472 473 case VS_OPCODE_URB_WRITE: 474 return "vs_urb_write"; 475 case VS_OPCODE_PULL_CONSTANT_LOAD: 476 return "pull_constant_load"; 477 case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: 478 return "pull_constant_load_gfx7"; 479 480 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 481 return "unpack_flags_simd4x2"; 482 483 case GS_OPCODE_URB_WRITE: 484 return "gs_urb_write"; 485 case GS_OPCODE_URB_WRITE_ALLOCATE: 486 return "gs_urb_write_allocate"; 487 case GS_OPCODE_THREAD_END: 488 return "gs_thread_end"; 489 case GS_OPCODE_SET_WRITE_OFFSET: 490 return "set_write_offset"; 491 case GS_OPCODE_SET_VERTEX_COUNT: 492 return "set_vertex_count"; 493 case GS_OPCODE_SET_DWORD_2: 494 return "set_dword_2"; 495 case GS_OPCODE_PREPARE_CHANNEL_MASKS: 496 return "prepare_channel_masks"; 497 case GS_OPCODE_SET_CHANNEL_MASKS: 498 return "set_channel_masks"; 499 case GS_OPCODE_GET_INSTANCE_ID: 500 return "get_instance_id"; 501 case GS_OPCODE_FF_SYNC: 502 return "ff_sync"; 503 case GS_OPCODE_SET_PRIMITIVE_ID: 504 return "set_primitive_id"; 505 case GS_OPCODE_SVB_WRITE: 506 return "gs_svb_write"; 507 case GS_OPCODE_SVB_SET_DST_INDEX: 508 return "gs_svb_set_dst_index"; 509 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 510 return "gs_ff_sync_set_primitives"; 511 case CS_OPCODE_CS_TERMINATE: 512 return "cs_terminate"; 513 case SHADER_OPCODE_BARRIER: 514 return "barrier"; 515 case SHADER_OPCODE_MULH: 516 return "mulh"; 517 case SHADER_OPCODE_ISUB_SAT: 518 return "isub_sat"; 519 case SHADER_OPCODE_USUB_SAT: 520 return "usub_sat"; 521 case SHADER_OPCODE_MOV_INDIRECT: 522 return "mov_indirect"; 523 case SHADER_OPCODE_MOV_RELOC_IMM: 524 return "mov_reloc_imm"; 525 526 case VEC4_OPCODE_URB_READ: 527 return "urb_read"; 528 case TCS_OPCODE_GET_INSTANCE_ID: 529 return "tcs_get_instance_id"; 530 case TCS_OPCODE_URB_WRITE: 531 return "tcs_urb_write"; 532 case TCS_OPCODE_SET_INPUT_URB_OFFSETS: 533 return "tcs_set_input_urb_offsets"; 534 case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 535 return "tcs_set_output_urb_offsets"; 536 case TCS_OPCODE_GET_PRIMITIVE_ID: 537 return "tcs_get_primitive_id"; 538 case TCS_OPCODE_CREATE_BARRIER_HEADER: 539 return "tcs_create_barrier_header"; 540 case TCS_OPCODE_SRC0_010_IS_ZERO: 541 return "tcs_src0<0,1,0>_is_zero"; 542 case TCS_OPCODE_RELEASE_INPUT: 543 return "tcs_release_input"; 544 case TCS_OPCODE_THREAD_END: 545 return "tcs_thread_end"; 546 case TES_OPCODE_CREATE_INPUT_READ_HEADER: 547 return "tes_create_input_read_header"; 548 case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 549 return "tes_add_indirect_urb_offset"; 550 case TES_OPCODE_GET_PRIMITIVE_ID: 551 return "tes_get_primitive_id"; 552 553 case RT_OPCODE_TRACE_RAY_LOGICAL: 554 return "rt_trace_ray_logical"; 555 556 case SHADER_OPCODE_RND_MODE: 557 return "rnd_mode"; 558 case SHADER_OPCODE_FLOAT_CONTROL_MODE: 559 return "float_control_mode"; 560 case SHADER_OPCODE_GET_DSS_ID: 561 return "get_dss_id"; 562 case SHADER_OPCODE_BTD_SPAWN_LOGICAL: 563 return "btd_spawn_logical"; 564 case SHADER_OPCODE_BTD_RETIRE_LOGICAL: 565 return "btd_retire_logical"; 566 } 567 568 unreachable("not reached"); 569} 570 571bool 572brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) 573{ 574 union { 575 unsigned ud; 576 int d; 577 float f; 578 double df; 579 } imm, sat_imm = { 0 }; 580 581 const unsigned size = type_sz(type); 582 583 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise 584 * irrelevant, so just check the size of the type and copy from/to an 585 * appropriately sized field. 586 */ 587 if (size < 8) 588 imm.ud = reg->ud; 589 else 590 imm.df = reg->df; 591 592 switch (type) { 593 case BRW_REGISTER_TYPE_UD: 594 case BRW_REGISTER_TYPE_D: 595 case BRW_REGISTER_TYPE_UW: 596 case BRW_REGISTER_TYPE_W: 597 case BRW_REGISTER_TYPE_UQ: 598 case BRW_REGISTER_TYPE_Q: 599 /* Nothing to do. */ 600 return false; 601 case BRW_REGISTER_TYPE_F: 602 sat_imm.f = SATURATE(imm.f); 603 break; 604 case BRW_REGISTER_TYPE_DF: 605 sat_imm.df = SATURATE(imm.df); 606 break; 607 case BRW_REGISTER_TYPE_UB: 608 case BRW_REGISTER_TYPE_B: 609 unreachable("no UB/B immediates"); 610 case BRW_REGISTER_TYPE_V: 611 case BRW_REGISTER_TYPE_UV: 612 case BRW_REGISTER_TYPE_VF: 613 unreachable("unimplemented: saturate vector immediate"); 614 case BRW_REGISTER_TYPE_HF: 615 unreachable("unimplemented: saturate HF immediate"); 616 case BRW_REGISTER_TYPE_NF: 617 unreachable("no NF immediates"); 618 } 619 620 if (size < 8) { 621 if (imm.ud != sat_imm.ud) { 622 reg->ud = sat_imm.ud; 623 return true; 624 } 625 } else { 626 if (imm.df != sat_imm.df) { 627 reg->df = sat_imm.df; 628 return true; 629 } 630 } 631 return false; 632} 633 634bool 635brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) 636{ 637 switch (type) { 638 case BRW_REGISTER_TYPE_D: 639 case BRW_REGISTER_TYPE_UD: 640 reg->d = -reg->d; 641 return true; 642 case BRW_REGISTER_TYPE_W: 643 case BRW_REGISTER_TYPE_UW: { 644 uint16_t value = -(int16_t)reg->ud; 645 reg->ud = value | (uint32_t)value << 16; 646 return true; 647 } 648 case BRW_REGISTER_TYPE_F: 649 reg->f = -reg->f; 650 return true; 651 case BRW_REGISTER_TYPE_VF: 652 reg->ud ^= 0x80808080; 653 return true; 654 case BRW_REGISTER_TYPE_DF: 655 reg->df = -reg->df; 656 return true; 657 case BRW_REGISTER_TYPE_UQ: 658 case BRW_REGISTER_TYPE_Q: 659 reg->d64 = -reg->d64; 660 return true; 661 case BRW_REGISTER_TYPE_UB: 662 case BRW_REGISTER_TYPE_B: 663 unreachable("no UB/B immediates"); 664 case BRW_REGISTER_TYPE_UV: 665 case BRW_REGISTER_TYPE_V: 666 assert(!"unimplemented: negate UV/V immediate"); 667 case BRW_REGISTER_TYPE_HF: 668 reg->ud ^= 0x80008000; 669 return true; 670 case BRW_REGISTER_TYPE_NF: 671 unreachable("no NF immediates"); 672 } 673 674 return false; 675} 676 677bool 678brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) 679{ 680 switch (type) { 681 case BRW_REGISTER_TYPE_D: 682 reg->d = abs(reg->d); 683 return true; 684 case BRW_REGISTER_TYPE_W: { 685 uint16_t value = abs((int16_t)reg->ud); 686 reg->ud = value | (uint32_t)value << 16; 687 return true; 688 } 689 case BRW_REGISTER_TYPE_F: 690 reg->f = fabsf(reg->f); 691 return true; 692 case BRW_REGISTER_TYPE_DF: 693 reg->df = fabs(reg->df); 694 return true; 695 case BRW_REGISTER_TYPE_VF: 696 reg->ud &= ~0x80808080; 697 return true; 698 case BRW_REGISTER_TYPE_Q: 699 reg->d64 = imaxabs(reg->d64); 700 return true; 701 case BRW_REGISTER_TYPE_UB: 702 case BRW_REGISTER_TYPE_B: 703 unreachable("no UB/B immediates"); 704 case BRW_REGISTER_TYPE_UQ: 705 case BRW_REGISTER_TYPE_UD: 706 case BRW_REGISTER_TYPE_UW: 707 case BRW_REGISTER_TYPE_UV: 708 /* Presumably the absolute value modifier on an unsigned source is a 709 * nop, but it would be nice to confirm. 710 */ 711 assert(!"unimplemented: abs unsigned immediate"); 712 case BRW_REGISTER_TYPE_V: 713 assert(!"unimplemented: abs V immediate"); 714 case BRW_REGISTER_TYPE_HF: 715 reg->ud &= ~0x80008000; 716 return true; 717 case BRW_REGISTER_TYPE_NF: 718 unreachable("no NF immediates"); 719 } 720 721 return false; 722} 723 724backend_shader::backend_shader(const struct brw_compiler *compiler, 725 void *log_data, 726 void *mem_ctx, 727 const nir_shader *shader, 728 struct brw_stage_prog_data *stage_prog_data, 729 bool debug_enabled) 730 : compiler(compiler), 731 log_data(log_data), 732 devinfo(compiler->devinfo), 733 nir(shader), 734 stage_prog_data(stage_prog_data), 735 mem_ctx(mem_ctx), 736 cfg(NULL), idom_analysis(this), 737 stage(shader->info.stage), 738 debug_enabled(debug_enabled) 739{ 740 stage_name = _mesa_shader_stage_to_string(stage); 741 stage_abbrev = _mesa_shader_stage_to_abbrev(stage); 742} 743 744backend_shader::~backend_shader() 745{ 746} 747 748bool 749backend_reg::equals(const backend_reg &r) const 750{ 751 return brw_regs_equal(this, &r) && offset == r.offset; 752} 753 754bool 755backend_reg::negative_equals(const backend_reg &r) const 756{ 757 return brw_regs_negative_equal(this, &r) && offset == r.offset; 758} 759 760bool 761backend_reg::is_zero() const 762{ 763 if (file != IMM) 764 return false; 765 766 assert(type_sz(type) > 1); 767 768 switch (type) { 769 case BRW_REGISTER_TYPE_HF: 770 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 771 return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; 772 case BRW_REGISTER_TYPE_F: 773 return f == 0; 774 case BRW_REGISTER_TYPE_DF: 775 return df == 0; 776 case BRW_REGISTER_TYPE_W: 777 case BRW_REGISTER_TYPE_UW: 778 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 779 return (d & 0xffff) == 0; 780 case BRW_REGISTER_TYPE_D: 781 case BRW_REGISTER_TYPE_UD: 782 return d == 0; 783 case BRW_REGISTER_TYPE_UQ: 784 case BRW_REGISTER_TYPE_Q: 785 return u64 == 0; 786 default: 787 return false; 788 } 789} 790 791bool 792backend_reg::is_one() const 793{ 794 if (file != IMM) 795 return false; 796 797 assert(type_sz(type) > 1); 798 799 switch (type) { 800 case BRW_REGISTER_TYPE_HF: 801 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 802 return (d & 0xffff) == 0x3c00; 803 case BRW_REGISTER_TYPE_F: 804 return f == 1.0f; 805 case BRW_REGISTER_TYPE_DF: 806 return df == 1.0; 807 case BRW_REGISTER_TYPE_W: 808 case BRW_REGISTER_TYPE_UW: 809 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 810 return (d & 0xffff) == 1; 811 case BRW_REGISTER_TYPE_D: 812 case BRW_REGISTER_TYPE_UD: 813 return d == 1; 814 case BRW_REGISTER_TYPE_UQ: 815 case BRW_REGISTER_TYPE_Q: 816 return u64 == 1; 817 default: 818 return false; 819 } 820} 821 822bool 823backend_reg::is_negative_one() const 824{ 825 if (file != IMM) 826 return false; 827 828 assert(type_sz(type) > 1); 829 830 switch (type) { 831 case BRW_REGISTER_TYPE_HF: 832 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 833 return (d & 0xffff) == 0xbc00; 834 case BRW_REGISTER_TYPE_F: 835 return f == -1.0; 836 case BRW_REGISTER_TYPE_DF: 837 return df == -1.0; 838 case BRW_REGISTER_TYPE_W: 839 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 840 return (d & 0xffff) == 0xffff; 841 case BRW_REGISTER_TYPE_D: 842 return d == -1; 843 case BRW_REGISTER_TYPE_Q: 844 return d64 == -1; 845 default: 846 return false; 847 } 848} 849 850bool 851backend_reg::is_null() const 852{ 853 return file == ARF && nr == BRW_ARF_NULL; 854} 855 856 857bool 858backend_reg::is_accumulator() const 859{ 860 return file == ARF && nr == BRW_ARF_ACCUMULATOR; 861} 862 863bool 864backend_instruction::is_commutative() const 865{ 866 switch (opcode) { 867 case BRW_OPCODE_AND: 868 case BRW_OPCODE_OR: 869 case BRW_OPCODE_XOR: 870 case BRW_OPCODE_ADD: 871 case BRW_OPCODE_ADD3: 872 case BRW_OPCODE_MUL: 873 case SHADER_OPCODE_MULH: 874 return true; 875 case BRW_OPCODE_SEL: 876 /* MIN and MAX are commutative. */ 877 if (conditional_mod == BRW_CONDITIONAL_GE || 878 conditional_mod == BRW_CONDITIONAL_L) { 879 return true; 880 } 881 FALLTHROUGH; 882 default: 883 return false; 884 } 885} 886 887bool 888backend_instruction::is_3src(const struct intel_device_info *devinfo) const 889{ 890 return ::is_3src(devinfo, opcode); 891} 892 893bool 894backend_instruction::is_tex() const 895{ 896 return (opcode == SHADER_OPCODE_TEX || 897 opcode == FS_OPCODE_TXB || 898 opcode == SHADER_OPCODE_TXD || 899 opcode == SHADER_OPCODE_TXF || 900 opcode == SHADER_OPCODE_TXF_LZ || 901 opcode == SHADER_OPCODE_TXF_CMS || 902 opcode == SHADER_OPCODE_TXF_CMS_W || 903 opcode == SHADER_OPCODE_TXF_UMS || 904 opcode == SHADER_OPCODE_TXF_MCS || 905 opcode == SHADER_OPCODE_TXL || 906 opcode == SHADER_OPCODE_TXL_LZ || 907 opcode == SHADER_OPCODE_TXS || 908 opcode == SHADER_OPCODE_LOD || 909 opcode == SHADER_OPCODE_TG4 || 910 opcode == SHADER_OPCODE_TG4_OFFSET || 911 opcode == SHADER_OPCODE_SAMPLEINFO); 912} 913 914bool 915backend_instruction::is_math() const 916{ 917 return (opcode == SHADER_OPCODE_RCP || 918 opcode == SHADER_OPCODE_RSQ || 919 opcode == SHADER_OPCODE_SQRT || 920 opcode == SHADER_OPCODE_EXP2 || 921 opcode == SHADER_OPCODE_LOG2 || 922 opcode == SHADER_OPCODE_SIN || 923 opcode == SHADER_OPCODE_COS || 924 opcode == SHADER_OPCODE_INT_QUOTIENT || 925 opcode == SHADER_OPCODE_INT_REMAINDER || 926 opcode == SHADER_OPCODE_POW); 927} 928 929bool 930backend_instruction::is_control_flow() const 931{ 932 switch (opcode) { 933 case BRW_OPCODE_DO: 934 case BRW_OPCODE_WHILE: 935 case BRW_OPCODE_IF: 936 case BRW_OPCODE_ELSE: 937 case BRW_OPCODE_ENDIF: 938 case BRW_OPCODE_BREAK: 939 case BRW_OPCODE_CONTINUE: 940 return true; 941 default: 942 return false; 943 } 944} 945 946bool 947backend_instruction::uses_indirect_addressing() const 948{ 949 switch (opcode) { 950 case SHADER_OPCODE_BROADCAST: 951 case SHADER_OPCODE_CLUSTER_BROADCAST: 952 case SHADER_OPCODE_MOV_INDIRECT: 953 return true; 954 default: 955 return false; 956 } 957} 958 959bool 960backend_instruction::can_do_source_mods() const 961{ 962 switch (opcode) { 963 case BRW_OPCODE_ADDC: 964 case BRW_OPCODE_BFE: 965 case BRW_OPCODE_BFI1: 966 case BRW_OPCODE_BFI2: 967 case BRW_OPCODE_BFREV: 968 case BRW_OPCODE_CBIT: 969 case BRW_OPCODE_FBH: 970 case BRW_OPCODE_FBL: 971 case BRW_OPCODE_ROL: 972 case BRW_OPCODE_ROR: 973 case BRW_OPCODE_SUBB: 974 case BRW_OPCODE_DP4A: 975 case SHADER_OPCODE_BROADCAST: 976 case SHADER_OPCODE_CLUSTER_BROADCAST: 977 case SHADER_OPCODE_MOV_INDIRECT: 978 case SHADER_OPCODE_SHUFFLE: 979 case SHADER_OPCODE_INT_QUOTIENT: 980 case SHADER_OPCODE_INT_REMAINDER: 981 return false; 982 default: 983 return true; 984 } 985} 986 987bool 988backend_instruction::can_do_saturate() const 989{ 990 switch (opcode) { 991 case BRW_OPCODE_ADD: 992 case BRW_OPCODE_ADD3: 993 case BRW_OPCODE_ASR: 994 case BRW_OPCODE_AVG: 995 case BRW_OPCODE_CSEL: 996 case BRW_OPCODE_DP2: 997 case BRW_OPCODE_DP3: 998 case BRW_OPCODE_DP4: 999 case BRW_OPCODE_DPH: 1000 case BRW_OPCODE_DP4A: 1001 case BRW_OPCODE_F16TO32: 1002 case BRW_OPCODE_F32TO16: 1003 case BRW_OPCODE_LINE: 1004 case BRW_OPCODE_LRP: 1005 case BRW_OPCODE_MAC: 1006 case BRW_OPCODE_MAD: 1007 case BRW_OPCODE_MATH: 1008 case BRW_OPCODE_MOV: 1009 case BRW_OPCODE_MUL: 1010 case SHADER_OPCODE_MULH: 1011 case BRW_OPCODE_PLN: 1012 case BRW_OPCODE_RNDD: 1013 case BRW_OPCODE_RNDE: 1014 case BRW_OPCODE_RNDU: 1015 case BRW_OPCODE_RNDZ: 1016 case BRW_OPCODE_SEL: 1017 case BRW_OPCODE_SHL: 1018 case BRW_OPCODE_SHR: 1019 case FS_OPCODE_LINTERP: 1020 case SHADER_OPCODE_COS: 1021 case SHADER_OPCODE_EXP2: 1022 case SHADER_OPCODE_LOG2: 1023 case SHADER_OPCODE_POW: 1024 case SHADER_OPCODE_RCP: 1025 case SHADER_OPCODE_RSQ: 1026 case SHADER_OPCODE_SIN: 1027 case SHADER_OPCODE_SQRT: 1028 return true; 1029 default: 1030 return false; 1031 } 1032} 1033 1034bool 1035backend_instruction::can_do_cmod() const 1036{ 1037 switch (opcode) { 1038 case BRW_OPCODE_ADD: 1039 case BRW_OPCODE_ADD3: 1040 case BRW_OPCODE_ADDC: 1041 case BRW_OPCODE_AND: 1042 case BRW_OPCODE_ASR: 1043 case BRW_OPCODE_AVG: 1044 case BRW_OPCODE_CMP: 1045 case BRW_OPCODE_CMPN: 1046 case BRW_OPCODE_DP2: 1047 case BRW_OPCODE_DP3: 1048 case BRW_OPCODE_DP4: 1049 case BRW_OPCODE_DPH: 1050 case BRW_OPCODE_F16TO32: 1051 case BRW_OPCODE_F32TO16: 1052 case BRW_OPCODE_FRC: 1053 case BRW_OPCODE_LINE: 1054 case BRW_OPCODE_LRP: 1055 case BRW_OPCODE_LZD: 1056 case BRW_OPCODE_MAC: 1057 case BRW_OPCODE_MACH: 1058 case BRW_OPCODE_MAD: 1059 case BRW_OPCODE_MOV: 1060 case BRW_OPCODE_MUL: 1061 case BRW_OPCODE_NOT: 1062 case BRW_OPCODE_OR: 1063 case BRW_OPCODE_PLN: 1064 case BRW_OPCODE_RNDD: 1065 case BRW_OPCODE_RNDE: 1066 case BRW_OPCODE_RNDU: 1067 case BRW_OPCODE_RNDZ: 1068 case BRW_OPCODE_SAD2: 1069 case BRW_OPCODE_SADA2: 1070 case BRW_OPCODE_SHL: 1071 case BRW_OPCODE_SHR: 1072 case BRW_OPCODE_SUBB: 1073 case BRW_OPCODE_XOR: 1074 case FS_OPCODE_LINTERP: 1075 return true; 1076 default: 1077 return false; 1078 } 1079} 1080 1081bool 1082backend_instruction::reads_accumulator_implicitly() const 1083{ 1084 switch (opcode) { 1085 case BRW_OPCODE_MAC: 1086 case BRW_OPCODE_MACH: 1087 case BRW_OPCODE_SADA2: 1088 return true; 1089 default: 1090 return false; 1091 } 1092} 1093 1094bool 1095backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const 1096{ 1097 return writes_accumulator || 1098 (devinfo->ver < 6 && 1099 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || 1100 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) || 1101 (opcode == FS_OPCODE_LINTERP && 1102 (!devinfo->has_pln || devinfo->ver <= 6)) || 1103 (eot && devinfo->ver >= 12); /* See Wa_14010017096. */ 1104} 1105 1106bool 1107backend_instruction::has_side_effects() const 1108{ 1109 switch (opcode) { 1110 case SHADER_OPCODE_SEND: 1111 return send_has_side_effects; 1112 1113 case BRW_OPCODE_SYNC: 1114 case VEC4_OPCODE_UNTYPED_ATOMIC: 1115 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 1116 case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 1117 case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 1118 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 1119 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 1120 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 1121 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 1122 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 1123 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: 1124 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 1125 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: 1126 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: 1127 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: 1128 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 1129 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: 1130 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 1131 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 1132 case SHADER_OPCODE_MEMORY_FENCE: 1133 case SHADER_OPCODE_INTERLOCK: 1134 case SHADER_OPCODE_URB_WRITE_SIMD8: 1135 case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 1136 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 1137 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 1138 case FS_OPCODE_FB_WRITE: 1139 case FS_OPCODE_FB_WRITE_LOGICAL: 1140 case FS_OPCODE_REP_FB_WRITE: 1141 case SHADER_OPCODE_BARRIER: 1142 case TCS_OPCODE_URB_WRITE: 1143 case TCS_OPCODE_RELEASE_INPUT: 1144 case SHADER_OPCODE_RND_MODE: 1145 case SHADER_OPCODE_FLOAT_CONTROL_MODE: 1146 case FS_OPCODE_SCHEDULING_FENCE: 1147 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: 1148 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: 1149 case SHADER_OPCODE_BTD_SPAWN_LOGICAL: 1150 case SHADER_OPCODE_BTD_RETIRE_LOGICAL: 1151 case RT_OPCODE_TRACE_RAY_LOGICAL: 1152 case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 1153 return true; 1154 default: 1155 return eot; 1156 } 1157} 1158 1159bool 1160backend_instruction::is_volatile() const 1161{ 1162 switch (opcode) { 1163 case SHADER_OPCODE_SEND: 1164 return send_is_volatile; 1165 1166 case VEC4_OPCODE_UNTYPED_SURFACE_READ: 1167 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 1168 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 1169 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 1170 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: 1171 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 1172 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 1173 case SHADER_OPCODE_URB_READ_SIMD8: 1174 case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 1175 case VEC4_OPCODE_URB_READ: 1176 return true; 1177 default: 1178 return false; 1179 } 1180} 1181 1182#ifndef NDEBUG 1183static bool 1184inst_is_in_block(const bblock_t *block, const backend_instruction *inst) 1185{ 1186 foreach_inst_in_block (backend_instruction, i, block) { 1187 if (inst == i) 1188 return true; 1189 } 1190 return false; 1191} 1192#endif 1193 1194static void 1195adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) 1196{ 1197 for (bblock_t *block_iter = start_block->next(); 1198 block_iter; 1199 block_iter = block_iter->next()) { 1200 block_iter->start_ip += ip_adjustment; 1201 block_iter->end_ip += ip_adjustment; 1202 } 1203} 1204 1205void 1206backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) 1207{ 1208 assert(this != inst); 1209 assert(block->end_ip_delta == 0); 1210 1211 if (!this->is_head_sentinel()) 1212 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1213 1214 block->end_ip++; 1215 1216 adjust_later_block_ips(block, 1); 1217 1218 exec_node::insert_after(inst); 1219} 1220 1221void 1222backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) 1223{ 1224 assert(this != inst); 1225 assert(block->end_ip_delta == 0); 1226 1227 if (!this->is_tail_sentinel()) 1228 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1229 1230 block->end_ip++; 1231 1232 adjust_later_block_ips(block, 1); 1233 1234 exec_node::insert_before(inst); 1235} 1236 1237void 1238backend_instruction::insert_before(bblock_t *block, exec_list *list) 1239{ 1240 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1241 assert(block->end_ip_delta == 0); 1242 1243 unsigned num_inst = list->length(); 1244 1245 block->end_ip += num_inst; 1246 1247 adjust_later_block_ips(block, num_inst); 1248 1249 exec_node::insert_before(list); 1250} 1251 1252void 1253backend_instruction::remove(bblock_t *block, bool defer_later_block_ip_updates) 1254{ 1255 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1256 1257 if (defer_later_block_ip_updates) { 1258 block->end_ip_delta--; 1259 } else { 1260 assert(block->end_ip_delta == 0); 1261 adjust_later_block_ips(block, -1); 1262 } 1263 1264 if (block->start_ip == block->end_ip) { 1265 if (block->end_ip_delta != 0) { 1266 adjust_later_block_ips(block, block->end_ip_delta); 1267 block->end_ip_delta = 0; 1268 } 1269 1270 block->cfg->remove_block(block); 1271 } else { 1272 block->end_ip--; 1273 } 1274 1275 exec_node::remove(); 1276} 1277 1278void 1279backend_shader::dump_instructions() const 1280{ 1281 dump_instructions(NULL); 1282} 1283 1284void 1285backend_shader::dump_instructions(const char *name) const 1286{ 1287 FILE *file = stderr; 1288 if (name && geteuid() != 0) { 1289 file = fopen(name, "w"); 1290 if (!file) 1291 file = stderr; 1292 } 1293 1294 if (cfg) { 1295 int ip = 0; 1296 foreach_block_and_inst(block, backend_instruction, inst, cfg) { 1297 if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) 1298 fprintf(file, "%4d: ", ip++); 1299 dump_instruction(inst, file); 1300 } 1301 } else { 1302 int ip = 0; 1303 foreach_in_list(backend_instruction, inst, &instructions) { 1304 if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) 1305 fprintf(file, "%4d: ", ip++); 1306 dump_instruction(inst, file); 1307 } 1308 } 1309 1310 if (file != stderr) { 1311 fclose(file); 1312 } 1313} 1314 1315void 1316backend_shader::calculate_cfg() 1317{ 1318 if (this->cfg) 1319 return; 1320 cfg = new(mem_ctx) cfg_t(this, &this->instructions); 1321} 1322 1323void 1324backend_shader::invalidate_analysis(brw::analysis_dependency_class c) 1325{ 1326 idom_analysis.invalidate(c); 1327} 1328 1329extern "C" const unsigned * 1330brw_compile_tes(const struct brw_compiler *compiler, 1331 void *log_data, 1332 void *mem_ctx, 1333 const struct brw_tes_prog_key *key, 1334 const struct brw_vue_map *input_vue_map, 1335 struct brw_tes_prog_data *prog_data, 1336 nir_shader *nir, 1337 int shader_time_index, 1338 struct brw_compile_stats *stats, 1339 char **error_str) 1340{ 1341 const struct intel_device_info *devinfo = compiler->devinfo; 1342 const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; 1343 const bool debug_enabled = INTEL_DEBUG(DEBUG_TES); 1344 const unsigned *assembly; 1345 1346 prog_data->base.base.stage = MESA_SHADER_TESS_EVAL; 1347 1348 nir->info.inputs_read = key->inputs_read; 1349 nir->info.patch_inputs_read = key->patch_inputs_read; 1350 1351 brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); 1352 brw_nir_lower_tes_inputs(nir, input_vue_map); 1353 brw_nir_lower_vue_outputs(nir); 1354 brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, 1355 key->base.robust_buffer_access); 1356 1357 brw_compute_vue_map(devinfo, &prog_data->base.vue_map, 1358 nir->info.outputs_written, 1359 nir->info.separate_shader, 1); 1360 1361 unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; 1362 1363 assert(output_size_bytes >= 1); 1364 if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) { 1365 if (error_str) 1366 *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); 1367 return NULL; 1368 } 1369 1370 prog_data->base.clip_distance_mask = 1371 ((1 << nir->info.clip_distance_array_size) - 1); 1372 prog_data->base.cull_distance_mask = 1373 ((1 << nir->info.cull_distance_array_size) - 1) << 1374 nir->info.clip_distance_array_size; 1375 1376 /* URB entry sizes are stored as a multiple of 64 bytes. */ 1377 prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; 1378 1379 prog_data->base.urb_read_length = 0; 1380 1381 STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); 1382 STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == 1383 TESS_SPACING_FRACTIONAL_ODD - 1); 1384 STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == 1385 TESS_SPACING_FRACTIONAL_EVEN - 1); 1386 1387 prog_data->partitioning = 1388 (enum brw_tess_partitioning) (nir->info.tess.spacing - 1); 1389 1390 switch (nir->info.tess.primitive_mode) { 1391 case GL_QUADS: 1392 prog_data->domain = BRW_TESS_DOMAIN_QUAD; 1393 break; 1394 case GL_TRIANGLES: 1395 prog_data->domain = BRW_TESS_DOMAIN_TRI; 1396 break; 1397 case GL_ISOLINES: 1398 prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; 1399 break; 1400 default: 1401 unreachable("invalid domain shader primitive mode"); 1402 } 1403 1404 if (nir->info.tess.point_mode) { 1405 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; 1406 } else if (nir->info.tess.primitive_mode == GL_ISOLINES) { 1407 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; 1408 } else { 1409 /* Hardware winding order is backwards from OpenGL */ 1410 prog_data->output_topology = 1411 nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW 1412 : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; 1413 } 1414 1415 if (unlikely(debug_enabled)) { 1416 fprintf(stderr, "TES Input "); 1417 brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL); 1418 fprintf(stderr, "TES Output "); 1419 brw_print_vue_map(stderr, &prog_data->base.vue_map, 1420 MESA_SHADER_TESS_EVAL); 1421 } 1422 1423 if (is_scalar) { 1424 fs_visitor v(compiler, log_data, mem_ctx, &key->base, 1425 &prog_data->base.base, nir, 8, 1426 shader_time_index, debug_enabled); 1427 if (!v.run_tes()) { 1428 if (error_str) 1429 *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1430 return NULL; 1431 } 1432 1433 prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; 1434 prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; 1435 1436 fs_generator g(compiler, log_data, mem_ctx, 1437 &prog_data->base.base, false, MESA_SHADER_TESS_EVAL); 1438 if (unlikely(debug_enabled)) { 1439 g.enable_debug(ralloc_asprintf(mem_ctx, 1440 "%s tessellation evaluation shader %s", 1441 nir->info.label ? nir->info.label 1442 : "unnamed", 1443 nir->info.name)); 1444 } 1445 1446 g.generate_code(v.cfg, 8, v.shader_stats, 1447 v.performance_analysis.require(), stats); 1448 1449 g.add_const_data(nir->constant_data, nir->constant_data_size); 1450 1451 assembly = g.get_assembly(); 1452 } else { 1453 brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, 1454 nir, mem_ctx, shader_time_index, debug_enabled); 1455 if (!v.run()) { 1456 if (error_str) 1457 *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1458 return NULL; 1459 } 1460 1461 if (unlikely(debug_enabled)) 1462 v.dump_instructions(); 1463 1464 assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, 1465 &prog_data->base, v.cfg, 1466 v.performance_analysis.require(), 1467 stats, debug_enabled); 1468 } 1469 1470 return assembly; 1471} 1472