1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2010 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "brw_cfg.h" 25b8e80941Smrg#include "brw_eu.h" 26b8e80941Smrg#include "brw_fs.h" 27b8e80941Smrg#include "brw_nir.h" 28b8e80941Smrg#include "brw_vec4_tes.h" 29b8e80941Smrg#include "dev/gen_debug.h" 30b8e80941Smrg#include "main/uniforms.h" 31b8e80941Smrg#include "util/macros.h" 32b8e80941Smrg 33b8e80941Smrgenum brw_reg_type 34b8e80941Smrgbrw_type_for_base_type(const struct glsl_type *type) 35b8e80941Smrg{ 36b8e80941Smrg switch (type->base_type) { 37b8e80941Smrg case GLSL_TYPE_FLOAT16: 38b8e80941Smrg return BRW_REGISTER_TYPE_HF; 39b8e80941Smrg case GLSL_TYPE_FLOAT: 40b8e80941Smrg return BRW_REGISTER_TYPE_F; 41b8e80941Smrg case GLSL_TYPE_INT: 42b8e80941Smrg case GLSL_TYPE_BOOL: 43b8e80941Smrg case GLSL_TYPE_SUBROUTINE: 44b8e80941Smrg return BRW_REGISTER_TYPE_D; 45b8e80941Smrg case GLSL_TYPE_INT16: 46b8e80941Smrg return BRW_REGISTER_TYPE_W; 47b8e80941Smrg case GLSL_TYPE_INT8: 48b8e80941Smrg return BRW_REGISTER_TYPE_B; 49b8e80941Smrg case GLSL_TYPE_UINT: 50b8e80941Smrg return BRW_REGISTER_TYPE_UD; 51b8e80941Smrg case GLSL_TYPE_UINT16: 52b8e80941Smrg return BRW_REGISTER_TYPE_UW; 53b8e80941Smrg case GLSL_TYPE_UINT8: 54b8e80941Smrg return BRW_REGISTER_TYPE_UB; 55b8e80941Smrg case GLSL_TYPE_ARRAY: 56b8e80941Smrg return brw_type_for_base_type(type->fields.array); 57b8e80941Smrg case GLSL_TYPE_STRUCT: 58b8e80941Smrg case GLSL_TYPE_INTERFACE: 59b8e80941Smrg case GLSL_TYPE_SAMPLER: 60b8e80941Smrg case GLSL_TYPE_ATOMIC_UINT: 61b8e80941Smrg /* These should be overridden with the type of the member when 62b8e80941Smrg * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 63b8e80941Smrg * way to trip up if we don't. 64b8e80941Smrg */ 65b8e80941Smrg return BRW_REGISTER_TYPE_UD; 66b8e80941Smrg case GLSL_TYPE_IMAGE: 67b8e80941Smrg return BRW_REGISTER_TYPE_UD; 68b8e80941Smrg case GLSL_TYPE_DOUBLE: 69b8e80941Smrg return BRW_REGISTER_TYPE_DF; 70b8e80941Smrg case GLSL_TYPE_UINT64: 71b8e80941Smrg return BRW_REGISTER_TYPE_UQ; 72b8e80941Smrg case GLSL_TYPE_INT64: 73b8e80941Smrg return BRW_REGISTER_TYPE_Q; 74b8e80941Smrg case GLSL_TYPE_VOID: 75b8e80941Smrg case GLSL_TYPE_ERROR: 76b8e80941Smrg case GLSL_TYPE_FUNCTION: 77b8e80941Smrg unreachable("not reached"); 78b8e80941Smrg } 79b8e80941Smrg 80b8e80941Smrg return BRW_REGISTER_TYPE_F; 81b8e80941Smrg} 82b8e80941Smrg 83b8e80941Smrgenum brw_conditional_mod 84b8e80941Smrgbrw_conditional_for_comparison(unsigned int op) 85b8e80941Smrg{ 86b8e80941Smrg switch (op) { 87b8e80941Smrg case ir_binop_less: 88b8e80941Smrg return BRW_CONDITIONAL_L; 89b8e80941Smrg case ir_binop_gequal: 90b8e80941Smrg return BRW_CONDITIONAL_GE; 91b8e80941Smrg case ir_binop_equal: 92b8e80941Smrg case ir_binop_all_equal: /* same as equal for scalars */ 93b8e80941Smrg return BRW_CONDITIONAL_Z; 94b8e80941Smrg case ir_binop_nequal: 95b8e80941Smrg case ir_binop_any_nequal: /* same as nequal for scalars */ 96b8e80941Smrg return BRW_CONDITIONAL_NZ; 97b8e80941Smrg default: 98b8e80941Smrg unreachable("not reached: bad operation for comparison"); 99b8e80941Smrg } 100b8e80941Smrg} 101b8e80941Smrg 102b8e80941Smrguint32_t 103b8e80941Smrgbrw_math_function(enum opcode op) 104b8e80941Smrg{ 105b8e80941Smrg switch (op) { 106b8e80941Smrg case SHADER_OPCODE_RCP: 107b8e80941Smrg return BRW_MATH_FUNCTION_INV; 108b8e80941Smrg case SHADER_OPCODE_RSQ: 109b8e80941Smrg return BRW_MATH_FUNCTION_RSQ; 110b8e80941Smrg case SHADER_OPCODE_SQRT: 111b8e80941Smrg return BRW_MATH_FUNCTION_SQRT; 112b8e80941Smrg case SHADER_OPCODE_EXP2: 113b8e80941Smrg return BRW_MATH_FUNCTION_EXP; 114b8e80941Smrg case SHADER_OPCODE_LOG2: 115b8e80941Smrg return BRW_MATH_FUNCTION_LOG; 116b8e80941Smrg case SHADER_OPCODE_POW: 117b8e80941Smrg return BRW_MATH_FUNCTION_POW; 118b8e80941Smrg case SHADER_OPCODE_SIN: 119b8e80941Smrg return BRW_MATH_FUNCTION_SIN; 120b8e80941Smrg case SHADER_OPCODE_COS: 121b8e80941Smrg return BRW_MATH_FUNCTION_COS; 122b8e80941Smrg case SHADER_OPCODE_INT_QUOTIENT: 123b8e80941Smrg return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; 124b8e80941Smrg case SHADER_OPCODE_INT_REMAINDER: 125b8e80941Smrg return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; 126b8e80941Smrg default: 127b8e80941Smrg unreachable("not reached: unknown math function"); 128b8e80941Smrg } 129b8e80941Smrg} 130b8e80941Smrg 131b8e80941Smrgbool 132b8e80941Smrgbrw_texture_offset(const nir_tex_instr *tex, unsigned src, 133b8e80941Smrg uint32_t *offset_bits_out) 134b8e80941Smrg{ 135b8e80941Smrg if (!nir_src_is_const(tex->src[src].src)) 136b8e80941Smrg return false; 137b8e80941Smrg 138b8e80941Smrg const unsigned num_components = nir_tex_instr_src_size(tex, src); 139b8e80941Smrg 140b8e80941Smrg /* Combine all three offsets into a single unsigned dword: 141b8e80941Smrg * 142b8e80941Smrg * bits 11:8 - U Offset (X component) 143b8e80941Smrg * bits 7:4 - V Offset (Y component) 144b8e80941Smrg * bits 3:0 - R Offset (Z component) 145b8e80941Smrg */ 146b8e80941Smrg uint32_t offset_bits = 0; 147b8e80941Smrg for (unsigned i = 0; i < num_components; i++) { 148b8e80941Smrg int offset = nir_src_comp_as_int(tex->src[src].src, i); 149b8e80941Smrg 150b8e80941Smrg /* offset out of bounds; caller will handle it. */ 151b8e80941Smrg if (offset > 7 || offset < -8) 152b8e80941Smrg return false; 153b8e80941Smrg 154b8e80941Smrg const unsigned shift = 4 * (2 - i); 155b8e80941Smrg offset_bits |= (offset << shift) & (0xF << shift); 156b8e80941Smrg } 157b8e80941Smrg 158b8e80941Smrg *offset_bits_out = offset_bits; 159b8e80941Smrg 160b8e80941Smrg return true; 161b8e80941Smrg} 162b8e80941Smrg 163b8e80941Smrgconst char * 164b8e80941Smrgbrw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) 165b8e80941Smrg{ 166b8e80941Smrg switch (op) { 167b8e80941Smrg case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: 168b8e80941Smrg /* The DO instruction doesn't exist on Gen6+, but we use it to mark the 169b8e80941Smrg * start of a loop in the IR. 170b8e80941Smrg */ 171b8e80941Smrg if (devinfo->gen >= 6 && op == BRW_OPCODE_DO) 172b8e80941Smrg return "do"; 173b8e80941Smrg 174b8e80941Smrg /* The following conversion opcodes doesn't exist on Gen8+, but we use 175b8e80941Smrg * then to mark that we want to do the conversion. 176b8e80941Smrg */ 177b8e80941Smrg if (devinfo->gen > 7 && op == BRW_OPCODE_F32TO16) 178b8e80941Smrg return "f32to16"; 179b8e80941Smrg 180b8e80941Smrg if (devinfo->gen > 7 && op == BRW_OPCODE_F16TO32) 181b8e80941Smrg return "f16to32"; 182b8e80941Smrg 183b8e80941Smrg assert(brw_opcode_desc(devinfo, op)->name); 184b8e80941Smrg return brw_opcode_desc(devinfo, op)->name; 185b8e80941Smrg case FS_OPCODE_FB_WRITE: 186b8e80941Smrg return "fb_write"; 187b8e80941Smrg case FS_OPCODE_FB_WRITE_LOGICAL: 188b8e80941Smrg return "fb_write_logical"; 189b8e80941Smrg case FS_OPCODE_REP_FB_WRITE: 190b8e80941Smrg return "rep_fb_write"; 191b8e80941Smrg case FS_OPCODE_FB_READ: 192b8e80941Smrg return "fb_read"; 193b8e80941Smrg case FS_OPCODE_FB_READ_LOGICAL: 194b8e80941Smrg return "fb_read_logical"; 195b8e80941Smrg 196b8e80941Smrg case SHADER_OPCODE_RCP: 197b8e80941Smrg return "rcp"; 198b8e80941Smrg case SHADER_OPCODE_RSQ: 199b8e80941Smrg return "rsq"; 200b8e80941Smrg case SHADER_OPCODE_SQRT: 201b8e80941Smrg return "sqrt"; 202b8e80941Smrg case SHADER_OPCODE_EXP2: 203b8e80941Smrg return "exp2"; 204b8e80941Smrg case SHADER_OPCODE_LOG2: 205b8e80941Smrg return "log2"; 206b8e80941Smrg case SHADER_OPCODE_POW: 207b8e80941Smrg return "pow"; 208b8e80941Smrg case SHADER_OPCODE_INT_QUOTIENT: 209b8e80941Smrg return "int_quot"; 210b8e80941Smrg case SHADER_OPCODE_INT_REMAINDER: 211b8e80941Smrg return "int_rem"; 212b8e80941Smrg case SHADER_OPCODE_SIN: 213b8e80941Smrg return "sin"; 214b8e80941Smrg case SHADER_OPCODE_COS: 215b8e80941Smrg return "cos"; 216b8e80941Smrg 217b8e80941Smrg case SHADER_OPCODE_SEND: 218b8e80941Smrg return "send"; 219b8e80941Smrg 220b8e80941Smrg case SHADER_OPCODE_TEX: 221b8e80941Smrg return "tex"; 222b8e80941Smrg case SHADER_OPCODE_TEX_LOGICAL: 223b8e80941Smrg return "tex_logical"; 224b8e80941Smrg case SHADER_OPCODE_TXD: 225b8e80941Smrg return "txd"; 226b8e80941Smrg case SHADER_OPCODE_TXD_LOGICAL: 227b8e80941Smrg return "txd_logical"; 228b8e80941Smrg case SHADER_OPCODE_TXF: 229b8e80941Smrg return "txf"; 230b8e80941Smrg case SHADER_OPCODE_TXF_LOGICAL: 231b8e80941Smrg return "txf_logical"; 232b8e80941Smrg case SHADER_OPCODE_TXF_LZ: 233b8e80941Smrg return "txf_lz"; 234b8e80941Smrg case SHADER_OPCODE_TXL: 235b8e80941Smrg return "txl"; 236b8e80941Smrg case SHADER_OPCODE_TXL_LOGICAL: 237b8e80941Smrg return "txl_logical"; 238b8e80941Smrg case SHADER_OPCODE_TXL_LZ: 239b8e80941Smrg return "txl_lz"; 240b8e80941Smrg case SHADER_OPCODE_TXS: 241b8e80941Smrg return "txs"; 242b8e80941Smrg case SHADER_OPCODE_TXS_LOGICAL: 243b8e80941Smrg return "txs_logical"; 244b8e80941Smrg case FS_OPCODE_TXB: 245b8e80941Smrg return "txb"; 246b8e80941Smrg case FS_OPCODE_TXB_LOGICAL: 247b8e80941Smrg return "txb_logical"; 248b8e80941Smrg case SHADER_OPCODE_TXF_CMS: 249b8e80941Smrg return "txf_cms"; 250b8e80941Smrg case SHADER_OPCODE_TXF_CMS_LOGICAL: 251b8e80941Smrg return "txf_cms_logical"; 252b8e80941Smrg case SHADER_OPCODE_TXF_CMS_W: 253b8e80941Smrg return "txf_cms_w"; 254b8e80941Smrg case SHADER_OPCODE_TXF_CMS_W_LOGICAL: 255b8e80941Smrg return "txf_cms_w_logical"; 256b8e80941Smrg case SHADER_OPCODE_TXF_UMS: 257b8e80941Smrg return "txf_ums"; 258b8e80941Smrg case SHADER_OPCODE_TXF_UMS_LOGICAL: 259b8e80941Smrg return "txf_ums_logical"; 260b8e80941Smrg case SHADER_OPCODE_TXF_MCS: 261b8e80941Smrg return "txf_mcs"; 262b8e80941Smrg case SHADER_OPCODE_TXF_MCS_LOGICAL: 263b8e80941Smrg return "txf_mcs_logical"; 264b8e80941Smrg case SHADER_OPCODE_LOD: 265b8e80941Smrg return "lod"; 266b8e80941Smrg case SHADER_OPCODE_LOD_LOGICAL: 267b8e80941Smrg return "lod_logical"; 268b8e80941Smrg case SHADER_OPCODE_TG4: 269b8e80941Smrg return "tg4"; 270b8e80941Smrg case SHADER_OPCODE_TG4_LOGICAL: 271b8e80941Smrg return "tg4_logical"; 272b8e80941Smrg case SHADER_OPCODE_TG4_OFFSET: 273b8e80941Smrg return "tg4_offset"; 274b8e80941Smrg case SHADER_OPCODE_TG4_OFFSET_LOGICAL: 275b8e80941Smrg return "tg4_offset_logical"; 276b8e80941Smrg case SHADER_OPCODE_SAMPLEINFO: 277b8e80941Smrg return "sampleinfo"; 278b8e80941Smrg case SHADER_OPCODE_SAMPLEINFO_LOGICAL: 279b8e80941Smrg return "sampleinfo_logical"; 280b8e80941Smrg 281b8e80941Smrg case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: 282b8e80941Smrg return "image_size_logical"; 283b8e80941Smrg 284b8e80941Smrg case SHADER_OPCODE_SHADER_TIME_ADD: 285b8e80941Smrg return "shader_time_add"; 286b8e80941Smrg 287b8e80941Smrg case VEC4_OPCODE_UNTYPED_ATOMIC: 288b8e80941Smrg return "untyped_atomic"; 289b8e80941Smrg case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 290b8e80941Smrg return "untyped_atomic_logical"; 291b8e80941Smrg case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 292b8e80941Smrg return "untyped_atomic_float_logical"; 293b8e80941Smrg case VEC4_OPCODE_UNTYPED_SURFACE_READ: 294b8e80941Smrg return "untyped_surface_read"; 295b8e80941Smrg case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 296b8e80941Smrg return "untyped_surface_read_logical"; 297b8e80941Smrg case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 298b8e80941Smrg return "untyped_surface_write"; 299b8e80941Smrg case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 300b8e80941Smrg return "untyped_surface_write_logical"; 301b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 302b8e80941Smrg return "a64_untyped_read_logical"; 303b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 304b8e80941Smrg return "a64_untyped_write_logical"; 305b8e80941Smrg case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 306b8e80941Smrg return "a64_byte_scattered_read_logical"; 307b8e80941Smrg case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 308b8e80941Smrg return "a64_byte_scattered_write_logical"; 309b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 310b8e80941Smrg return "a64_untyped_atomic_logical"; 311b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 312b8e80941Smrg return "a64_untyped_atomic_int64_logical"; 313b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: 314b8e80941Smrg return "a64_untyped_atomic_float_logical"; 315b8e80941Smrg case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 316b8e80941Smrg return "typed_atomic_logical"; 317b8e80941Smrg case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 318b8e80941Smrg return "typed_surface_read_logical"; 319b8e80941Smrg case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 320b8e80941Smrg return "typed_surface_write_logical"; 321b8e80941Smrg case SHADER_OPCODE_MEMORY_FENCE: 322b8e80941Smrg return "memory_fence"; 323b8e80941Smrg case SHADER_OPCODE_INTERLOCK: 324b8e80941Smrg /* For an interlock we actually issue a memory fence via sendc. */ 325b8e80941Smrg return "interlock"; 326b8e80941Smrg 327b8e80941Smrg case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 328b8e80941Smrg return "byte_scattered_read_logical"; 329b8e80941Smrg case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 330b8e80941Smrg return "byte_scattered_write_logical"; 331b8e80941Smrg 332b8e80941Smrg case SHADER_OPCODE_LOAD_PAYLOAD: 333b8e80941Smrg return "load_payload"; 334b8e80941Smrg case FS_OPCODE_PACK: 335b8e80941Smrg return "pack"; 336b8e80941Smrg 337b8e80941Smrg case SHADER_OPCODE_GEN4_SCRATCH_READ: 338b8e80941Smrg return "gen4_scratch_read"; 339b8e80941Smrg case SHADER_OPCODE_GEN4_SCRATCH_WRITE: 340b8e80941Smrg return "gen4_scratch_write"; 341b8e80941Smrg case SHADER_OPCODE_GEN7_SCRATCH_READ: 342b8e80941Smrg return "gen7_scratch_read"; 343b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8: 344b8e80941Smrg return "gen8_urb_write_simd8"; 345b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 346b8e80941Smrg return "gen8_urb_write_simd8_per_slot"; 347b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 348b8e80941Smrg return "gen8_urb_write_simd8_masked"; 349b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 350b8e80941Smrg return "gen8_urb_write_simd8_masked_per_slot"; 351b8e80941Smrg case SHADER_OPCODE_URB_READ_SIMD8: 352b8e80941Smrg return "urb_read_simd8"; 353b8e80941Smrg case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 354b8e80941Smrg return "urb_read_simd8_per_slot"; 355b8e80941Smrg 356b8e80941Smrg case SHADER_OPCODE_FIND_LIVE_CHANNEL: 357b8e80941Smrg return "find_live_channel"; 358b8e80941Smrg case SHADER_OPCODE_BROADCAST: 359b8e80941Smrg return "broadcast"; 360b8e80941Smrg case SHADER_OPCODE_SHUFFLE: 361b8e80941Smrg return "shuffle"; 362b8e80941Smrg case SHADER_OPCODE_SEL_EXEC: 363b8e80941Smrg return "sel_exec"; 364b8e80941Smrg case SHADER_OPCODE_QUAD_SWIZZLE: 365b8e80941Smrg return "quad_swizzle"; 366b8e80941Smrg case SHADER_OPCODE_CLUSTER_BROADCAST: 367b8e80941Smrg return "cluster_broadcast"; 368b8e80941Smrg 369b8e80941Smrg case SHADER_OPCODE_GET_BUFFER_SIZE: 370b8e80941Smrg return "get_buffer_size"; 371b8e80941Smrg 372b8e80941Smrg case VEC4_OPCODE_MOV_BYTES: 373b8e80941Smrg return "mov_bytes"; 374b8e80941Smrg case VEC4_OPCODE_PACK_BYTES: 375b8e80941Smrg return "pack_bytes"; 376b8e80941Smrg case VEC4_OPCODE_UNPACK_UNIFORM: 377b8e80941Smrg return "unpack_uniform"; 378b8e80941Smrg case VEC4_OPCODE_DOUBLE_TO_F32: 379b8e80941Smrg return "double_to_f32"; 380b8e80941Smrg case VEC4_OPCODE_DOUBLE_TO_D32: 381b8e80941Smrg return "double_to_d32"; 382b8e80941Smrg case VEC4_OPCODE_DOUBLE_TO_U32: 383b8e80941Smrg return "double_to_u32"; 384b8e80941Smrg case VEC4_OPCODE_TO_DOUBLE: 385b8e80941Smrg return "single_to_double"; 386b8e80941Smrg case VEC4_OPCODE_PICK_LOW_32BIT: 387b8e80941Smrg return "pick_low_32bit"; 388b8e80941Smrg case VEC4_OPCODE_PICK_HIGH_32BIT: 389b8e80941Smrg return "pick_high_32bit"; 390b8e80941Smrg case VEC4_OPCODE_SET_LOW_32BIT: 391b8e80941Smrg return "set_low_32bit"; 392b8e80941Smrg case VEC4_OPCODE_SET_HIGH_32BIT: 393b8e80941Smrg return "set_high_32bit"; 394b8e80941Smrg 395b8e80941Smrg case FS_OPCODE_DDX_COARSE: 396b8e80941Smrg return "ddx_coarse"; 397b8e80941Smrg case FS_OPCODE_DDX_FINE: 398b8e80941Smrg return "ddx_fine"; 399b8e80941Smrg case FS_OPCODE_DDY_COARSE: 400b8e80941Smrg return "ddy_coarse"; 401b8e80941Smrg case FS_OPCODE_DDY_FINE: 402b8e80941Smrg return "ddy_fine"; 403b8e80941Smrg 404b8e80941Smrg case FS_OPCODE_LINTERP: 405b8e80941Smrg return "linterp"; 406b8e80941Smrg 407b8e80941Smrg case FS_OPCODE_PIXEL_X: 408b8e80941Smrg return "pixel_x"; 409b8e80941Smrg case FS_OPCODE_PIXEL_Y: 410b8e80941Smrg return "pixel_y"; 411b8e80941Smrg 412b8e80941Smrg case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: 413b8e80941Smrg return "uniform_pull_const"; 414b8e80941Smrg case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: 415b8e80941Smrg return "uniform_pull_const_gen7"; 416b8e80941Smrg case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: 417b8e80941Smrg return "varying_pull_const_gen4"; 418b8e80941Smrg case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: 419b8e80941Smrg return "varying_pull_const_logical"; 420b8e80941Smrg 421b8e80941Smrg case FS_OPCODE_DISCARD_JUMP: 422b8e80941Smrg return "discard_jump"; 423b8e80941Smrg 424b8e80941Smrg case FS_OPCODE_SET_SAMPLE_ID: 425b8e80941Smrg return "set_sample_id"; 426b8e80941Smrg 427b8e80941Smrg case FS_OPCODE_PACK_HALF_2x16_SPLIT: 428b8e80941Smrg return "pack_half_2x16_split"; 429b8e80941Smrg 430b8e80941Smrg case FS_OPCODE_PLACEHOLDER_HALT: 431b8e80941Smrg return "placeholder_halt"; 432b8e80941Smrg 433b8e80941Smrg case FS_OPCODE_INTERPOLATE_AT_SAMPLE: 434b8e80941Smrg return "interp_sample"; 435b8e80941Smrg case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: 436b8e80941Smrg return "interp_shared_offset"; 437b8e80941Smrg case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: 438b8e80941Smrg return "interp_per_slot_offset"; 439b8e80941Smrg 440b8e80941Smrg case VS_OPCODE_URB_WRITE: 441b8e80941Smrg return "vs_urb_write"; 442b8e80941Smrg case VS_OPCODE_PULL_CONSTANT_LOAD: 443b8e80941Smrg return "pull_constant_load"; 444b8e80941Smrg case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: 445b8e80941Smrg return "pull_constant_load_gen7"; 446b8e80941Smrg 447b8e80941Smrg case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: 448b8e80941Smrg return "set_simd4x2_header_gen9"; 449b8e80941Smrg 450b8e80941Smrg case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 451b8e80941Smrg return "unpack_flags_simd4x2"; 452b8e80941Smrg 453b8e80941Smrg case GS_OPCODE_URB_WRITE: 454b8e80941Smrg return "gs_urb_write"; 455b8e80941Smrg case GS_OPCODE_URB_WRITE_ALLOCATE: 456b8e80941Smrg return "gs_urb_write_allocate"; 457b8e80941Smrg case GS_OPCODE_THREAD_END: 458b8e80941Smrg return "gs_thread_end"; 459b8e80941Smrg case GS_OPCODE_SET_WRITE_OFFSET: 460b8e80941Smrg return "set_write_offset"; 461b8e80941Smrg case GS_OPCODE_SET_VERTEX_COUNT: 462b8e80941Smrg return "set_vertex_count"; 463b8e80941Smrg case GS_OPCODE_SET_DWORD_2: 464b8e80941Smrg return "set_dword_2"; 465b8e80941Smrg case GS_OPCODE_PREPARE_CHANNEL_MASKS: 466b8e80941Smrg return "prepare_channel_masks"; 467b8e80941Smrg case GS_OPCODE_SET_CHANNEL_MASKS: 468b8e80941Smrg return "set_channel_masks"; 469b8e80941Smrg case GS_OPCODE_GET_INSTANCE_ID: 470b8e80941Smrg return "get_instance_id"; 471b8e80941Smrg case GS_OPCODE_FF_SYNC: 472b8e80941Smrg return "ff_sync"; 473b8e80941Smrg case GS_OPCODE_SET_PRIMITIVE_ID: 474b8e80941Smrg return "set_primitive_id"; 475b8e80941Smrg case GS_OPCODE_SVB_WRITE: 476b8e80941Smrg return "gs_svb_write"; 477b8e80941Smrg case GS_OPCODE_SVB_SET_DST_INDEX: 478b8e80941Smrg return "gs_svb_set_dst_index"; 479b8e80941Smrg case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 480b8e80941Smrg return "gs_ff_sync_set_primitives"; 481b8e80941Smrg case CS_OPCODE_CS_TERMINATE: 482b8e80941Smrg return "cs_terminate"; 483b8e80941Smrg case SHADER_OPCODE_BARRIER: 484b8e80941Smrg return "barrier"; 485b8e80941Smrg case SHADER_OPCODE_MULH: 486b8e80941Smrg return "mulh"; 487b8e80941Smrg case SHADER_OPCODE_MOV_INDIRECT: 488b8e80941Smrg return "mov_indirect"; 489b8e80941Smrg 490b8e80941Smrg case VEC4_OPCODE_URB_READ: 491b8e80941Smrg return "urb_read"; 492b8e80941Smrg case TCS_OPCODE_GET_INSTANCE_ID: 493b8e80941Smrg return "tcs_get_instance_id"; 494b8e80941Smrg case TCS_OPCODE_URB_WRITE: 495b8e80941Smrg return "tcs_urb_write"; 496b8e80941Smrg case TCS_OPCODE_SET_INPUT_URB_OFFSETS: 497b8e80941Smrg return "tcs_set_input_urb_offsets"; 498b8e80941Smrg case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 499b8e80941Smrg return "tcs_set_output_urb_offsets"; 500b8e80941Smrg case TCS_OPCODE_GET_PRIMITIVE_ID: 501b8e80941Smrg return "tcs_get_primitive_id"; 502b8e80941Smrg case TCS_OPCODE_CREATE_BARRIER_HEADER: 503b8e80941Smrg return "tcs_create_barrier_header"; 504b8e80941Smrg case TCS_OPCODE_SRC0_010_IS_ZERO: 505b8e80941Smrg return "tcs_src0<0,1,0>_is_zero"; 506b8e80941Smrg case TCS_OPCODE_RELEASE_INPUT: 507b8e80941Smrg return "tcs_release_input"; 508b8e80941Smrg case TCS_OPCODE_THREAD_END: 509b8e80941Smrg return "tcs_thread_end"; 510b8e80941Smrg case TES_OPCODE_CREATE_INPUT_READ_HEADER: 511b8e80941Smrg return "tes_create_input_read_header"; 512b8e80941Smrg case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 513b8e80941Smrg return "tes_add_indirect_urb_offset"; 514b8e80941Smrg case TES_OPCODE_GET_PRIMITIVE_ID: 515b8e80941Smrg return "tes_get_primitive_id"; 516b8e80941Smrg 517b8e80941Smrg case SHADER_OPCODE_RND_MODE: 518b8e80941Smrg return "rnd_mode"; 519b8e80941Smrg } 520b8e80941Smrg 521b8e80941Smrg unreachable("not reached"); 522b8e80941Smrg} 523b8e80941Smrg 524b8e80941Smrgbool 525b8e80941Smrgbrw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) 526b8e80941Smrg{ 527b8e80941Smrg union { 528b8e80941Smrg unsigned ud; 529b8e80941Smrg int d; 530b8e80941Smrg float f; 531b8e80941Smrg double df; 532b8e80941Smrg } imm, sat_imm = { 0 }; 533b8e80941Smrg 534b8e80941Smrg const unsigned size = type_sz(type); 535b8e80941Smrg 536b8e80941Smrg /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise 537b8e80941Smrg * irrelevant, so just check the size of the type and copy from/to an 538b8e80941Smrg * appropriately sized field. 539b8e80941Smrg */ 540b8e80941Smrg if (size < 8) 541b8e80941Smrg imm.ud = reg->ud; 542b8e80941Smrg else 543b8e80941Smrg imm.df = reg->df; 544b8e80941Smrg 545b8e80941Smrg switch (type) { 546b8e80941Smrg case BRW_REGISTER_TYPE_UD: 547b8e80941Smrg case BRW_REGISTER_TYPE_D: 548b8e80941Smrg case BRW_REGISTER_TYPE_UW: 549b8e80941Smrg case BRW_REGISTER_TYPE_W: 550b8e80941Smrg case BRW_REGISTER_TYPE_UQ: 551b8e80941Smrg case BRW_REGISTER_TYPE_Q: 552b8e80941Smrg /* Nothing to do. */ 553b8e80941Smrg return false; 554b8e80941Smrg case BRW_REGISTER_TYPE_F: 555b8e80941Smrg sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); 556b8e80941Smrg break; 557b8e80941Smrg case BRW_REGISTER_TYPE_DF: 558b8e80941Smrg sat_imm.df = CLAMP(imm.df, 0.0, 1.0); 559b8e80941Smrg break; 560b8e80941Smrg case BRW_REGISTER_TYPE_UB: 561b8e80941Smrg case BRW_REGISTER_TYPE_B: 562b8e80941Smrg unreachable("no UB/B immediates"); 563b8e80941Smrg case BRW_REGISTER_TYPE_V: 564b8e80941Smrg case BRW_REGISTER_TYPE_UV: 565b8e80941Smrg case BRW_REGISTER_TYPE_VF: 566b8e80941Smrg unreachable("unimplemented: saturate vector immediate"); 567b8e80941Smrg case BRW_REGISTER_TYPE_HF: 568b8e80941Smrg unreachable("unimplemented: saturate HF immediate"); 569b8e80941Smrg case BRW_REGISTER_TYPE_NF: 570b8e80941Smrg unreachable("no NF immediates"); 571b8e80941Smrg } 572b8e80941Smrg 573b8e80941Smrg if (size < 8) { 574b8e80941Smrg if (imm.ud != sat_imm.ud) { 575b8e80941Smrg reg->ud = sat_imm.ud; 576b8e80941Smrg return true; 577b8e80941Smrg } 578b8e80941Smrg } else { 579b8e80941Smrg if (imm.df != sat_imm.df) { 580b8e80941Smrg reg->df = sat_imm.df; 581b8e80941Smrg return true; 582b8e80941Smrg } 583b8e80941Smrg } 584b8e80941Smrg return false; 585b8e80941Smrg} 586b8e80941Smrg 587b8e80941Smrgbool 588b8e80941Smrgbrw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) 589b8e80941Smrg{ 590b8e80941Smrg switch (type) { 591b8e80941Smrg case BRW_REGISTER_TYPE_D: 592b8e80941Smrg case BRW_REGISTER_TYPE_UD: 593b8e80941Smrg reg->d = -reg->d; 594b8e80941Smrg return true; 595b8e80941Smrg case BRW_REGISTER_TYPE_W: 596b8e80941Smrg case BRW_REGISTER_TYPE_UW: { 597b8e80941Smrg uint16_t value = -(int16_t)reg->ud; 598b8e80941Smrg reg->ud = value | (uint32_t)value << 16; 599b8e80941Smrg return true; 600b8e80941Smrg } 601b8e80941Smrg case BRW_REGISTER_TYPE_F: 602b8e80941Smrg reg->f = -reg->f; 603b8e80941Smrg return true; 604b8e80941Smrg case BRW_REGISTER_TYPE_VF: 605b8e80941Smrg reg->ud ^= 0x80808080; 606b8e80941Smrg return true; 607b8e80941Smrg case BRW_REGISTER_TYPE_DF: 608b8e80941Smrg reg->df = -reg->df; 609b8e80941Smrg return true; 610b8e80941Smrg case BRW_REGISTER_TYPE_UQ: 611b8e80941Smrg case BRW_REGISTER_TYPE_Q: 612b8e80941Smrg reg->d64 = -reg->d64; 613b8e80941Smrg return true; 614b8e80941Smrg case BRW_REGISTER_TYPE_UB: 615b8e80941Smrg case BRW_REGISTER_TYPE_B: 616b8e80941Smrg unreachable("no UB/B immediates"); 617b8e80941Smrg case BRW_REGISTER_TYPE_UV: 618b8e80941Smrg case BRW_REGISTER_TYPE_V: 619b8e80941Smrg assert(!"unimplemented: negate UV/V immediate"); 620b8e80941Smrg case BRW_REGISTER_TYPE_HF: 621b8e80941Smrg reg->ud ^= 0x80008000; 622b8e80941Smrg return true; 623b8e80941Smrg case BRW_REGISTER_TYPE_NF: 624b8e80941Smrg unreachable("no NF immediates"); 625b8e80941Smrg } 626b8e80941Smrg 627b8e80941Smrg return false; 628b8e80941Smrg} 629b8e80941Smrg 630b8e80941Smrgbool 631b8e80941Smrgbrw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) 632b8e80941Smrg{ 633b8e80941Smrg switch (type) { 634b8e80941Smrg case BRW_REGISTER_TYPE_D: 635b8e80941Smrg reg->d = abs(reg->d); 636b8e80941Smrg return true; 637b8e80941Smrg case BRW_REGISTER_TYPE_W: { 638b8e80941Smrg uint16_t value = abs((int16_t)reg->ud); 639b8e80941Smrg reg->ud = value | (uint32_t)value << 16; 640b8e80941Smrg return true; 641b8e80941Smrg } 642b8e80941Smrg case BRW_REGISTER_TYPE_F: 643b8e80941Smrg reg->f = fabsf(reg->f); 644b8e80941Smrg return true; 645b8e80941Smrg case BRW_REGISTER_TYPE_DF: 646b8e80941Smrg reg->df = fabs(reg->df); 647b8e80941Smrg return true; 648b8e80941Smrg case BRW_REGISTER_TYPE_VF: 649b8e80941Smrg reg->ud &= ~0x80808080; 650b8e80941Smrg return true; 651b8e80941Smrg case BRW_REGISTER_TYPE_Q: 652b8e80941Smrg reg->d64 = imaxabs(reg->d64); 653b8e80941Smrg return true; 654b8e80941Smrg case BRW_REGISTER_TYPE_UB: 655b8e80941Smrg case BRW_REGISTER_TYPE_B: 656b8e80941Smrg unreachable("no UB/B immediates"); 657b8e80941Smrg case BRW_REGISTER_TYPE_UQ: 658b8e80941Smrg case BRW_REGISTER_TYPE_UD: 659b8e80941Smrg case BRW_REGISTER_TYPE_UW: 660b8e80941Smrg case BRW_REGISTER_TYPE_UV: 661b8e80941Smrg /* Presumably the absolute value modifier on an unsigned source is a 662b8e80941Smrg * nop, but it would be nice to confirm. 663b8e80941Smrg */ 664b8e80941Smrg assert(!"unimplemented: abs unsigned immediate"); 665b8e80941Smrg case BRW_REGISTER_TYPE_V: 666b8e80941Smrg assert(!"unimplemented: abs V immediate"); 667b8e80941Smrg case BRW_REGISTER_TYPE_HF: 668b8e80941Smrg reg->ud &= ~0x80008000; 669b8e80941Smrg return true; 670b8e80941Smrg case BRW_REGISTER_TYPE_NF: 671b8e80941Smrg unreachable("no NF immediates"); 672b8e80941Smrg } 673b8e80941Smrg 674b8e80941Smrg return false; 675b8e80941Smrg} 676b8e80941Smrg 677b8e80941Smrgbackend_shader::backend_shader(const struct brw_compiler *compiler, 678b8e80941Smrg void *log_data, 679b8e80941Smrg void *mem_ctx, 680b8e80941Smrg const nir_shader *shader, 681b8e80941Smrg struct brw_stage_prog_data *stage_prog_data) 682b8e80941Smrg : compiler(compiler), 683b8e80941Smrg log_data(log_data), 684b8e80941Smrg devinfo(compiler->devinfo), 685b8e80941Smrg nir(shader), 686b8e80941Smrg stage_prog_data(stage_prog_data), 687b8e80941Smrg mem_ctx(mem_ctx), 688b8e80941Smrg cfg(NULL), 689b8e80941Smrg stage(shader->info.stage) 690b8e80941Smrg{ 691b8e80941Smrg debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); 692b8e80941Smrg stage_name = _mesa_shader_stage_to_string(stage); 693b8e80941Smrg stage_abbrev = _mesa_shader_stage_to_abbrev(stage); 694b8e80941Smrg} 695b8e80941Smrg 696b8e80941Smrgbackend_shader::~backend_shader() 697b8e80941Smrg{ 698b8e80941Smrg} 699b8e80941Smrg 700b8e80941Smrgbool 701b8e80941Smrgbackend_reg::equals(const backend_reg &r) const 702b8e80941Smrg{ 703b8e80941Smrg return brw_regs_equal(this, &r) && offset == r.offset; 704b8e80941Smrg} 705b8e80941Smrg 706b8e80941Smrgbool 707b8e80941Smrgbackend_reg::negative_equals(const backend_reg &r) const 708b8e80941Smrg{ 709b8e80941Smrg return brw_regs_negative_equal(this, &r) && offset == r.offset; 710b8e80941Smrg} 711b8e80941Smrg 712b8e80941Smrgbool 713b8e80941Smrgbackend_reg::is_zero() const 714b8e80941Smrg{ 715b8e80941Smrg if (file != IMM) 716b8e80941Smrg return false; 717b8e80941Smrg 718b8e80941Smrg assert(type_sz(type) > 1); 719b8e80941Smrg 720b8e80941Smrg switch (type) { 721b8e80941Smrg case BRW_REGISTER_TYPE_HF: 722b8e80941Smrg assert((d & 0xffff) == ((d >> 16) & 0xffff)); 723b8e80941Smrg return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; 724b8e80941Smrg case BRW_REGISTER_TYPE_F: 725b8e80941Smrg return f == 0; 726b8e80941Smrg case BRW_REGISTER_TYPE_DF: 727b8e80941Smrg return df == 0; 728b8e80941Smrg case BRW_REGISTER_TYPE_W: 729b8e80941Smrg case BRW_REGISTER_TYPE_UW: 730b8e80941Smrg assert((d & 0xffff) == ((d >> 16) & 0xffff)); 731b8e80941Smrg return (d & 0xffff) == 0; 732b8e80941Smrg case BRW_REGISTER_TYPE_D: 733b8e80941Smrg case BRW_REGISTER_TYPE_UD: 734b8e80941Smrg return d == 0; 735b8e80941Smrg case BRW_REGISTER_TYPE_UQ: 736b8e80941Smrg case BRW_REGISTER_TYPE_Q: 737b8e80941Smrg return u64 == 0; 738b8e80941Smrg default: 739b8e80941Smrg return false; 740b8e80941Smrg } 741b8e80941Smrg} 742b8e80941Smrg 743b8e80941Smrgbool 744b8e80941Smrgbackend_reg::is_one() const 745b8e80941Smrg{ 746b8e80941Smrg if (file != IMM) 747b8e80941Smrg return false; 748b8e80941Smrg 749b8e80941Smrg assert(type_sz(type) > 1); 750b8e80941Smrg 751b8e80941Smrg switch (type) { 752b8e80941Smrg case BRW_REGISTER_TYPE_HF: 753b8e80941Smrg assert((d & 0xffff) == ((d >> 16) & 0xffff)); 754b8e80941Smrg return (d & 0xffff) == 0x3c00; 755b8e80941Smrg case BRW_REGISTER_TYPE_F: 756b8e80941Smrg return f == 1.0f; 757b8e80941Smrg case BRW_REGISTER_TYPE_DF: 758b8e80941Smrg return df == 1.0; 759b8e80941Smrg case BRW_REGISTER_TYPE_W: 760b8e80941Smrg case BRW_REGISTER_TYPE_UW: 761b8e80941Smrg assert((d & 0xffff) == ((d >> 16) & 0xffff)); 762b8e80941Smrg return (d & 0xffff) == 1; 763b8e80941Smrg case BRW_REGISTER_TYPE_D: 764b8e80941Smrg case BRW_REGISTER_TYPE_UD: 765b8e80941Smrg return d == 1; 766b8e80941Smrg case BRW_REGISTER_TYPE_UQ: 767b8e80941Smrg case BRW_REGISTER_TYPE_Q: 768b8e80941Smrg return u64 == 1; 769b8e80941Smrg default: 770b8e80941Smrg return false; 771b8e80941Smrg } 772b8e80941Smrg} 773b8e80941Smrg 774b8e80941Smrgbool 775b8e80941Smrgbackend_reg::is_negative_one() const 776b8e80941Smrg{ 777b8e80941Smrg if (file != IMM) 778b8e80941Smrg return false; 779b8e80941Smrg 780b8e80941Smrg assert(type_sz(type) > 1); 781b8e80941Smrg 782b8e80941Smrg switch (type) { 783b8e80941Smrg case BRW_REGISTER_TYPE_HF: 784b8e80941Smrg assert((d & 0xffff) == ((d >> 16) & 0xffff)); 785b8e80941Smrg return (d & 0xffff) == 0xbc00; 786b8e80941Smrg case BRW_REGISTER_TYPE_F: 787b8e80941Smrg return f == -1.0; 788b8e80941Smrg case BRW_REGISTER_TYPE_DF: 789b8e80941Smrg return df == -1.0; 790b8e80941Smrg case BRW_REGISTER_TYPE_W: 791b8e80941Smrg assert((d & 0xffff) == ((d >> 16) & 0xffff)); 792b8e80941Smrg return (d & 0xffff) == 0xffff; 793b8e80941Smrg case BRW_REGISTER_TYPE_D: 794b8e80941Smrg return d == -1; 795b8e80941Smrg case BRW_REGISTER_TYPE_Q: 796b8e80941Smrg return d64 == -1; 797b8e80941Smrg default: 798b8e80941Smrg return false; 799b8e80941Smrg } 800b8e80941Smrg} 801b8e80941Smrg 802b8e80941Smrgbool 803b8e80941Smrgbackend_reg::is_null() const 804b8e80941Smrg{ 805b8e80941Smrg return file == ARF && nr == BRW_ARF_NULL; 806b8e80941Smrg} 807b8e80941Smrg 808b8e80941Smrg 809b8e80941Smrgbool 810b8e80941Smrgbackend_reg::is_accumulator() const 811b8e80941Smrg{ 812b8e80941Smrg return file == ARF && nr == BRW_ARF_ACCUMULATOR; 813b8e80941Smrg} 814b8e80941Smrg 815b8e80941Smrgbool 816b8e80941Smrgbackend_instruction::is_commutative() const 817b8e80941Smrg{ 818b8e80941Smrg switch (opcode) { 819b8e80941Smrg case BRW_OPCODE_AND: 820b8e80941Smrg case BRW_OPCODE_OR: 821b8e80941Smrg case BRW_OPCODE_XOR: 822b8e80941Smrg case BRW_OPCODE_ADD: 823b8e80941Smrg case BRW_OPCODE_MUL: 824b8e80941Smrg case SHADER_OPCODE_MULH: 825b8e80941Smrg return true; 826b8e80941Smrg case BRW_OPCODE_SEL: 827b8e80941Smrg /* MIN and MAX are commutative. */ 828b8e80941Smrg if (conditional_mod == BRW_CONDITIONAL_GE || 829b8e80941Smrg conditional_mod == BRW_CONDITIONAL_L) { 830b8e80941Smrg return true; 831b8e80941Smrg } 832b8e80941Smrg /* fallthrough */ 833b8e80941Smrg default: 834b8e80941Smrg return false; 835b8e80941Smrg } 836b8e80941Smrg} 837b8e80941Smrg 838b8e80941Smrgbool 839b8e80941Smrgbackend_instruction::is_3src(const struct gen_device_info *devinfo) const 840b8e80941Smrg{ 841b8e80941Smrg return ::is_3src(devinfo, opcode); 842b8e80941Smrg} 843b8e80941Smrg 844b8e80941Smrgbool 845b8e80941Smrgbackend_instruction::is_tex() const 846b8e80941Smrg{ 847b8e80941Smrg return (opcode == SHADER_OPCODE_TEX || 848b8e80941Smrg opcode == FS_OPCODE_TXB || 849b8e80941Smrg opcode == SHADER_OPCODE_TXD || 850b8e80941Smrg opcode == SHADER_OPCODE_TXF || 851b8e80941Smrg opcode == SHADER_OPCODE_TXF_LZ || 852b8e80941Smrg opcode == SHADER_OPCODE_TXF_CMS || 853b8e80941Smrg opcode == SHADER_OPCODE_TXF_CMS_W || 854b8e80941Smrg opcode == SHADER_OPCODE_TXF_UMS || 855b8e80941Smrg opcode == SHADER_OPCODE_TXF_MCS || 856b8e80941Smrg opcode == SHADER_OPCODE_TXL || 857b8e80941Smrg opcode == SHADER_OPCODE_TXL_LZ || 858b8e80941Smrg opcode == SHADER_OPCODE_TXS || 859b8e80941Smrg opcode == SHADER_OPCODE_LOD || 860b8e80941Smrg opcode == SHADER_OPCODE_TG4 || 861b8e80941Smrg opcode == SHADER_OPCODE_TG4_OFFSET || 862b8e80941Smrg opcode == SHADER_OPCODE_SAMPLEINFO); 863b8e80941Smrg} 864b8e80941Smrg 865b8e80941Smrgbool 866b8e80941Smrgbackend_instruction::is_math() const 867b8e80941Smrg{ 868b8e80941Smrg return (opcode == SHADER_OPCODE_RCP || 869b8e80941Smrg opcode == SHADER_OPCODE_RSQ || 870b8e80941Smrg opcode == SHADER_OPCODE_SQRT || 871b8e80941Smrg opcode == SHADER_OPCODE_EXP2 || 872b8e80941Smrg opcode == SHADER_OPCODE_LOG2 || 873b8e80941Smrg opcode == SHADER_OPCODE_SIN || 874b8e80941Smrg opcode == SHADER_OPCODE_COS || 875b8e80941Smrg opcode == SHADER_OPCODE_INT_QUOTIENT || 876b8e80941Smrg opcode == SHADER_OPCODE_INT_REMAINDER || 877b8e80941Smrg opcode == SHADER_OPCODE_POW); 878b8e80941Smrg} 879b8e80941Smrg 880b8e80941Smrgbool 881b8e80941Smrgbackend_instruction::is_control_flow() const 882b8e80941Smrg{ 883b8e80941Smrg switch (opcode) { 884b8e80941Smrg case BRW_OPCODE_DO: 885b8e80941Smrg case BRW_OPCODE_WHILE: 886b8e80941Smrg case BRW_OPCODE_IF: 887b8e80941Smrg case BRW_OPCODE_ELSE: 888b8e80941Smrg case BRW_OPCODE_ENDIF: 889b8e80941Smrg case BRW_OPCODE_BREAK: 890b8e80941Smrg case BRW_OPCODE_CONTINUE: 891b8e80941Smrg return true; 892b8e80941Smrg default: 893b8e80941Smrg return false; 894b8e80941Smrg } 895b8e80941Smrg} 896b8e80941Smrg 897b8e80941Smrgbool 898b8e80941Smrgbackend_instruction::can_do_source_mods() const 899b8e80941Smrg{ 900b8e80941Smrg switch (opcode) { 901b8e80941Smrg case BRW_OPCODE_ADDC: 902b8e80941Smrg case BRW_OPCODE_BFE: 903b8e80941Smrg case BRW_OPCODE_BFI1: 904b8e80941Smrg case BRW_OPCODE_BFI2: 905b8e80941Smrg case BRW_OPCODE_BFREV: 906b8e80941Smrg case BRW_OPCODE_CBIT: 907b8e80941Smrg case BRW_OPCODE_FBH: 908b8e80941Smrg case BRW_OPCODE_FBL: 909b8e80941Smrg case BRW_OPCODE_SUBB: 910b8e80941Smrg case SHADER_OPCODE_BROADCAST: 911b8e80941Smrg case SHADER_OPCODE_CLUSTER_BROADCAST: 912b8e80941Smrg case SHADER_OPCODE_MOV_INDIRECT: 913b8e80941Smrg return false; 914b8e80941Smrg default: 915b8e80941Smrg return true; 916b8e80941Smrg } 917b8e80941Smrg} 918b8e80941Smrg 919b8e80941Smrgbool 920b8e80941Smrgbackend_instruction::can_do_saturate() const 921b8e80941Smrg{ 922b8e80941Smrg switch (opcode) { 923b8e80941Smrg case BRW_OPCODE_ADD: 924b8e80941Smrg case BRW_OPCODE_ASR: 925b8e80941Smrg case BRW_OPCODE_AVG: 926b8e80941Smrg case BRW_OPCODE_DP2: 927b8e80941Smrg case BRW_OPCODE_DP3: 928b8e80941Smrg case BRW_OPCODE_DP4: 929b8e80941Smrg case BRW_OPCODE_DPH: 930b8e80941Smrg case BRW_OPCODE_F16TO32: 931b8e80941Smrg case BRW_OPCODE_F32TO16: 932b8e80941Smrg case BRW_OPCODE_LINE: 933b8e80941Smrg case BRW_OPCODE_LRP: 934b8e80941Smrg case BRW_OPCODE_MAC: 935b8e80941Smrg case BRW_OPCODE_MAD: 936b8e80941Smrg case BRW_OPCODE_MATH: 937b8e80941Smrg case BRW_OPCODE_MOV: 938b8e80941Smrg case BRW_OPCODE_MUL: 939b8e80941Smrg case SHADER_OPCODE_MULH: 940b8e80941Smrg case BRW_OPCODE_PLN: 941b8e80941Smrg case BRW_OPCODE_RNDD: 942b8e80941Smrg case BRW_OPCODE_RNDE: 943b8e80941Smrg case BRW_OPCODE_RNDU: 944b8e80941Smrg case BRW_OPCODE_RNDZ: 945b8e80941Smrg case BRW_OPCODE_SEL: 946b8e80941Smrg case BRW_OPCODE_SHL: 947b8e80941Smrg case BRW_OPCODE_SHR: 948b8e80941Smrg case FS_OPCODE_LINTERP: 949b8e80941Smrg case SHADER_OPCODE_COS: 950b8e80941Smrg case SHADER_OPCODE_EXP2: 951b8e80941Smrg case SHADER_OPCODE_LOG2: 952b8e80941Smrg case SHADER_OPCODE_POW: 953b8e80941Smrg case SHADER_OPCODE_RCP: 954b8e80941Smrg case SHADER_OPCODE_RSQ: 955b8e80941Smrg case SHADER_OPCODE_SIN: 956b8e80941Smrg case SHADER_OPCODE_SQRT: 957b8e80941Smrg return true; 958b8e80941Smrg default: 959b8e80941Smrg return false; 960b8e80941Smrg } 961b8e80941Smrg} 962b8e80941Smrg 963b8e80941Smrgbool 964b8e80941Smrgbackend_instruction::can_do_cmod() const 965b8e80941Smrg{ 966b8e80941Smrg switch (opcode) { 967b8e80941Smrg case BRW_OPCODE_ADD: 968b8e80941Smrg case BRW_OPCODE_ADDC: 969b8e80941Smrg case BRW_OPCODE_AND: 970b8e80941Smrg case BRW_OPCODE_ASR: 971b8e80941Smrg case BRW_OPCODE_AVG: 972b8e80941Smrg case BRW_OPCODE_CMP: 973b8e80941Smrg case BRW_OPCODE_CMPN: 974b8e80941Smrg case BRW_OPCODE_DP2: 975b8e80941Smrg case BRW_OPCODE_DP3: 976b8e80941Smrg case BRW_OPCODE_DP4: 977b8e80941Smrg case BRW_OPCODE_DPH: 978b8e80941Smrg case BRW_OPCODE_F16TO32: 979b8e80941Smrg case BRW_OPCODE_F32TO16: 980b8e80941Smrg case BRW_OPCODE_FRC: 981b8e80941Smrg case BRW_OPCODE_LINE: 982b8e80941Smrg case BRW_OPCODE_LRP: 983b8e80941Smrg case BRW_OPCODE_LZD: 984b8e80941Smrg case BRW_OPCODE_MAC: 985b8e80941Smrg case BRW_OPCODE_MACH: 986b8e80941Smrg case BRW_OPCODE_MAD: 987b8e80941Smrg case BRW_OPCODE_MOV: 988b8e80941Smrg case BRW_OPCODE_MUL: 989b8e80941Smrg case BRW_OPCODE_NOT: 990b8e80941Smrg case BRW_OPCODE_OR: 991b8e80941Smrg case BRW_OPCODE_PLN: 992b8e80941Smrg case BRW_OPCODE_RNDD: 993b8e80941Smrg case BRW_OPCODE_RNDE: 994b8e80941Smrg case BRW_OPCODE_RNDU: 995b8e80941Smrg case BRW_OPCODE_RNDZ: 996b8e80941Smrg case BRW_OPCODE_SAD2: 997b8e80941Smrg case BRW_OPCODE_SADA2: 998b8e80941Smrg case BRW_OPCODE_SHL: 999b8e80941Smrg case BRW_OPCODE_SHR: 1000b8e80941Smrg case BRW_OPCODE_SUBB: 1001b8e80941Smrg case BRW_OPCODE_XOR: 1002b8e80941Smrg case FS_OPCODE_LINTERP: 1003b8e80941Smrg return true; 1004b8e80941Smrg default: 1005b8e80941Smrg return false; 1006b8e80941Smrg } 1007b8e80941Smrg} 1008b8e80941Smrg 1009b8e80941Smrgbool 1010b8e80941Smrgbackend_instruction::reads_accumulator_implicitly() const 1011b8e80941Smrg{ 1012b8e80941Smrg switch (opcode) { 1013b8e80941Smrg case BRW_OPCODE_MAC: 1014b8e80941Smrg case BRW_OPCODE_MACH: 1015b8e80941Smrg case BRW_OPCODE_SADA2: 1016b8e80941Smrg return true; 1017b8e80941Smrg default: 1018b8e80941Smrg return false; 1019b8e80941Smrg } 1020b8e80941Smrg} 1021b8e80941Smrg 1022b8e80941Smrgbool 1023b8e80941Smrgbackend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const 1024b8e80941Smrg{ 1025b8e80941Smrg return writes_accumulator || 1026b8e80941Smrg (devinfo->gen < 6 && 1027b8e80941Smrg ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || 1028b8e80941Smrg (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) || 1029b8e80941Smrg (opcode == FS_OPCODE_LINTERP && 1030b8e80941Smrg (!devinfo->has_pln || devinfo->gen <= 6)); 1031b8e80941Smrg} 1032b8e80941Smrg 1033b8e80941Smrgbool 1034b8e80941Smrgbackend_instruction::has_side_effects() const 1035b8e80941Smrg{ 1036b8e80941Smrg switch (opcode) { 1037b8e80941Smrg case SHADER_OPCODE_SEND: 1038b8e80941Smrg return send_has_side_effects; 1039b8e80941Smrg 1040b8e80941Smrg case VEC4_OPCODE_UNTYPED_ATOMIC: 1041b8e80941Smrg case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 1042b8e80941Smrg case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 1043b8e80941Smrg case SHADER_OPCODE_GEN4_SCRATCH_WRITE: 1044b8e80941Smrg case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 1045b8e80941Smrg case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 1046b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 1047b8e80941Smrg case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 1048b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 1049b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 1050b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: 1051b8e80941Smrg case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 1052b8e80941Smrg case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 1053b8e80941Smrg case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 1054b8e80941Smrg case SHADER_OPCODE_MEMORY_FENCE: 1055b8e80941Smrg case SHADER_OPCODE_INTERLOCK: 1056b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8: 1057b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 1058b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 1059b8e80941Smrg case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 1060b8e80941Smrg case FS_OPCODE_FB_WRITE: 1061b8e80941Smrg case FS_OPCODE_FB_WRITE_LOGICAL: 1062b8e80941Smrg case FS_OPCODE_REP_FB_WRITE: 1063b8e80941Smrg case SHADER_OPCODE_BARRIER: 1064b8e80941Smrg case TCS_OPCODE_URB_WRITE: 1065b8e80941Smrg case TCS_OPCODE_RELEASE_INPUT: 1066b8e80941Smrg case SHADER_OPCODE_RND_MODE: 1067b8e80941Smrg return true; 1068b8e80941Smrg default: 1069b8e80941Smrg return eot; 1070b8e80941Smrg } 1071b8e80941Smrg} 1072b8e80941Smrg 1073b8e80941Smrgbool 1074b8e80941Smrgbackend_instruction::is_volatile() const 1075b8e80941Smrg{ 1076b8e80941Smrg switch (opcode) { 1077b8e80941Smrg case SHADER_OPCODE_SEND: 1078b8e80941Smrg return send_is_volatile; 1079b8e80941Smrg 1080b8e80941Smrg case VEC4_OPCODE_UNTYPED_SURFACE_READ: 1081b8e80941Smrg case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 1082b8e80941Smrg case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 1083b8e80941Smrg case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 1084b8e80941Smrg case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 1085b8e80941Smrg case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 1086b8e80941Smrg case SHADER_OPCODE_URB_READ_SIMD8: 1087b8e80941Smrg case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 1088b8e80941Smrg case VEC4_OPCODE_URB_READ: 1089b8e80941Smrg return true; 1090b8e80941Smrg default: 1091b8e80941Smrg return false; 1092b8e80941Smrg } 1093b8e80941Smrg} 1094b8e80941Smrg 1095b8e80941Smrg#ifndef NDEBUG 1096b8e80941Smrgstatic bool 1097b8e80941Smrginst_is_in_block(const bblock_t *block, const backend_instruction *inst) 1098b8e80941Smrg{ 1099b8e80941Smrg bool found = false; 1100b8e80941Smrg foreach_inst_in_block (backend_instruction, i, block) { 1101b8e80941Smrg if (inst == i) { 1102b8e80941Smrg found = true; 1103b8e80941Smrg } 1104b8e80941Smrg } 1105b8e80941Smrg return found; 1106b8e80941Smrg} 1107b8e80941Smrg#endif 1108b8e80941Smrg 1109b8e80941Smrgstatic void 1110b8e80941Smrgadjust_later_block_ips(bblock_t *start_block, int ip_adjustment) 1111b8e80941Smrg{ 1112b8e80941Smrg for (bblock_t *block_iter = start_block->next(); 1113b8e80941Smrg block_iter; 1114b8e80941Smrg block_iter = block_iter->next()) { 1115b8e80941Smrg block_iter->start_ip += ip_adjustment; 1116b8e80941Smrg block_iter->end_ip += ip_adjustment; 1117b8e80941Smrg } 1118b8e80941Smrg} 1119b8e80941Smrg 1120b8e80941Smrgvoid 1121b8e80941Smrgbackend_instruction::insert_after(bblock_t *block, backend_instruction *inst) 1122b8e80941Smrg{ 1123b8e80941Smrg assert(this != inst); 1124b8e80941Smrg 1125b8e80941Smrg if (!this->is_head_sentinel()) 1126b8e80941Smrg assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1127b8e80941Smrg 1128b8e80941Smrg block->end_ip++; 1129b8e80941Smrg 1130b8e80941Smrg adjust_later_block_ips(block, 1); 1131b8e80941Smrg 1132b8e80941Smrg exec_node::insert_after(inst); 1133b8e80941Smrg} 1134b8e80941Smrg 1135b8e80941Smrgvoid 1136b8e80941Smrgbackend_instruction::insert_before(bblock_t *block, backend_instruction *inst) 1137b8e80941Smrg{ 1138b8e80941Smrg assert(this != inst); 1139b8e80941Smrg 1140b8e80941Smrg if (!this->is_tail_sentinel()) 1141b8e80941Smrg assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1142b8e80941Smrg 1143b8e80941Smrg block->end_ip++; 1144b8e80941Smrg 1145b8e80941Smrg adjust_later_block_ips(block, 1); 1146b8e80941Smrg 1147b8e80941Smrg exec_node::insert_before(inst); 1148b8e80941Smrg} 1149b8e80941Smrg 1150b8e80941Smrgvoid 1151b8e80941Smrgbackend_instruction::insert_before(bblock_t *block, exec_list *list) 1152b8e80941Smrg{ 1153b8e80941Smrg assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1154b8e80941Smrg 1155b8e80941Smrg unsigned num_inst = list->length(); 1156b8e80941Smrg 1157b8e80941Smrg block->end_ip += num_inst; 1158b8e80941Smrg 1159b8e80941Smrg adjust_later_block_ips(block, num_inst); 1160b8e80941Smrg 1161b8e80941Smrg exec_node::insert_before(list); 1162b8e80941Smrg} 1163b8e80941Smrg 1164b8e80941Smrgvoid 1165b8e80941Smrgbackend_instruction::remove(bblock_t *block) 1166b8e80941Smrg{ 1167b8e80941Smrg assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1168b8e80941Smrg 1169b8e80941Smrg adjust_later_block_ips(block, -1); 1170b8e80941Smrg 1171b8e80941Smrg if (block->start_ip == block->end_ip) { 1172b8e80941Smrg block->cfg->remove_block(block); 1173b8e80941Smrg } else { 1174b8e80941Smrg block->end_ip--; 1175b8e80941Smrg } 1176b8e80941Smrg 1177b8e80941Smrg exec_node::remove(); 1178b8e80941Smrg} 1179b8e80941Smrg 1180b8e80941Smrgvoid 1181b8e80941Smrgbackend_shader::dump_instructions() 1182b8e80941Smrg{ 1183b8e80941Smrg dump_instructions(NULL); 1184b8e80941Smrg} 1185b8e80941Smrg 1186b8e80941Smrgvoid 1187b8e80941Smrgbackend_shader::dump_instructions(const char *name) 1188b8e80941Smrg{ 1189b8e80941Smrg FILE *file = stderr; 1190b8e80941Smrg if (name && geteuid() != 0) { 1191b8e80941Smrg file = fopen(name, "w"); 1192b8e80941Smrg if (!file) 1193b8e80941Smrg file = stderr; 1194b8e80941Smrg } 1195b8e80941Smrg 1196b8e80941Smrg if (cfg) { 1197b8e80941Smrg int ip = 0; 1198b8e80941Smrg foreach_block_and_inst(block, backend_instruction, inst, cfg) { 1199b8e80941Smrg if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) 1200b8e80941Smrg fprintf(file, "%4d: ", ip++); 1201b8e80941Smrg dump_instruction(inst, file); 1202b8e80941Smrg } 1203b8e80941Smrg } else { 1204b8e80941Smrg int ip = 0; 1205b8e80941Smrg foreach_in_list(backend_instruction, inst, &instructions) { 1206b8e80941Smrg if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) 1207b8e80941Smrg fprintf(file, "%4d: ", ip++); 1208b8e80941Smrg dump_instruction(inst, file); 1209b8e80941Smrg } 1210b8e80941Smrg } 1211b8e80941Smrg 1212b8e80941Smrg if (file != stderr) { 1213b8e80941Smrg fclose(file); 1214b8e80941Smrg } 1215b8e80941Smrg} 1216b8e80941Smrg 1217b8e80941Smrgvoid 1218b8e80941Smrgbackend_shader::calculate_cfg() 1219b8e80941Smrg{ 1220b8e80941Smrg if (this->cfg) 1221b8e80941Smrg return; 1222b8e80941Smrg cfg = new(mem_ctx) cfg_t(&this->instructions); 1223b8e80941Smrg} 1224b8e80941Smrg 1225b8e80941Smrgextern "C" const unsigned * 1226b8e80941Smrgbrw_compile_tes(const struct brw_compiler *compiler, 1227b8e80941Smrg void *log_data, 1228b8e80941Smrg void *mem_ctx, 1229b8e80941Smrg const struct brw_tes_prog_key *key, 1230b8e80941Smrg const struct brw_vue_map *input_vue_map, 1231b8e80941Smrg struct brw_tes_prog_data *prog_data, 1232b8e80941Smrg nir_shader *nir, 1233b8e80941Smrg struct gl_program *prog, 1234b8e80941Smrg int shader_time_index, 1235b8e80941Smrg char **error_str) 1236b8e80941Smrg{ 1237b8e80941Smrg const struct gen_device_info *devinfo = compiler->devinfo; 1238b8e80941Smrg const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; 1239b8e80941Smrg const unsigned *assembly; 1240b8e80941Smrg 1241b8e80941Smrg nir->info.inputs_read = key->inputs_read; 1242b8e80941Smrg nir->info.patch_inputs_read = key->patch_inputs_read; 1243b8e80941Smrg 1244b8e80941Smrg nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); 1245b8e80941Smrg brw_nir_lower_tes_inputs(nir, input_vue_map); 1246b8e80941Smrg brw_nir_lower_vue_outputs(nir); 1247b8e80941Smrg nir = brw_postprocess_nir(nir, compiler, is_scalar); 1248b8e80941Smrg 1249b8e80941Smrg brw_compute_vue_map(devinfo, &prog_data->base.vue_map, 1250b8e80941Smrg nir->info.outputs_written, 1251b8e80941Smrg nir->info.separate_shader); 1252b8e80941Smrg 1253b8e80941Smrg unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; 1254b8e80941Smrg 1255b8e80941Smrg assert(output_size_bytes >= 1); 1256b8e80941Smrg if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) { 1257b8e80941Smrg if (error_str) 1258b8e80941Smrg *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); 1259b8e80941Smrg return NULL; 1260b8e80941Smrg } 1261b8e80941Smrg 1262b8e80941Smrg prog_data->base.clip_distance_mask = 1263b8e80941Smrg ((1 << nir->info.clip_distance_array_size) - 1); 1264b8e80941Smrg prog_data->base.cull_distance_mask = 1265b8e80941Smrg ((1 << nir->info.cull_distance_array_size) - 1) << 1266b8e80941Smrg nir->info.clip_distance_array_size; 1267b8e80941Smrg 1268b8e80941Smrg /* URB entry sizes are stored as a multiple of 64 bytes. */ 1269b8e80941Smrg prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; 1270b8e80941Smrg 1271b8e80941Smrg /* On Cannonlake software shall not program an allocation size that 1272b8e80941Smrg * specifies a size that is a multiple of 3 64B (512-bit) cachelines. 1273b8e80941Smrg */ 1274b8e80941Smrg if (devinfo->gen == 10 && 1275b8e80941Smrg prog_data->base.urb_entry_size % 3 == 0) 1276b8e80941Smrg prog_data->base.urb_entry_size++; 1277b8e80941Smrg 1278b8e80941Smrg prog_data->base.urb_read_length = 0; 1279b8e80941Smrg 1280b8e80941Smrg STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); 1281b8e80941Smrg STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == 1282b8e80941Smrg TESS_SPACING_FRACTIONAL_ODD - 1); 1283b8e80941Smrg STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == 1284b8e80941Smrg TESS_SPACING_FRACTIONAL_EVEN - 1); 1285b8e80941Smrg 1286b8e80941Smrg prog_data->partitioning = 1287b8e80941Smrg (enum brw_tess_partitioning) (nir->info.tess.spacing - 1); 1288b8e80941Smrg 1289b8e80941Smrg switch (nir->info.tess.primitive_mode) { 1290b8e80941Smrg case GL_QUADS: 1291b8e80941Smrg prog_data->domain = BRW_TESS_DOMAIN_QUAD; 1292b8e80941Smrg break; 1293b8e80941Smrg case GL_TRIANGLES: 1294b8e80941Smrg prog_data->domain = BRW_TESS_DOMAIN_TRI; 1295b8e80941Smrg break; 1296b8e80941Smrg case GL_ISOLINES: 1297b8e80941Smrg prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; 1298b8e80941Smrg break; 1299b8e80941Smrg default: 1300b8e80941Smrg unreachable("invalid domain shader primitive mode"); 1301b8e80941Smrg } 1302b8e80941Smrg 1303b8e80941Smrg if (nir->info.tess.point_mode) { 1304b8e80941Smrg prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; 1305b8e80941Smrg } else if (nir->info.tess.primitive_mode == GL_ISOLINES) { 1306b8e80941Smrg prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; 1307b8e80941Smrg } else { 1308b8e80941Smrg /* Hardware winding order is backwards from OpenGL */ 1309b8e80941Smrg prog_data->output_topology = 1310b8e80941Smrg nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW 1311b8e80941Smrg : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; 1312b8e80941Smrg } 1313b8e80941Smrg 1314b8e80941Smrg if (unlikely(INTEL_DEBUG & DEBUG_TES)) { 1315b8e80941Smrg fprintf(stderr, "TES Input "); 1316b8e80941Smrg brw_print_vue_map(stderr, input_vue_map); 1317b8e80941Smrg fprintf(stderr, "TES Output "); 1318b8e80941Smrg brw_print_vue_map(stderr, &prog_data->base.vue_map); 1319b8e80941Smrg } 1320b8e80941Smrg 1321b8e80941Smrg if (is_scalar) { 1322b8e80941Smrg fs_visitor v(compiler, log_data, mem_ctx, (void *) key, 1323b8e80941Smrg &prog_data->base.base, NULL, nir, 8, 1324b8e80941Smrg shader_time_index, input_vue_map); 1325b8e80941Smrg if (!v.run_tes()) { 1326b8e80941Smrg if (error_str) 1327b8e80941Smrg *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1328b8e80941Smrg return NULL; 1329b8e80941Smrg } 1330b8e80941Smrg 1331b8e80941Smrg prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; 1332b8e80941Smrg prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; 1333b8e80941Smrg 1334b8e80941Smrg fs_generator g(compiler, log_data, mem_ctx, 1335b8e80941Smrg &prog_data->base.base, v.promoted_constants, false, 1336b8e80941Smrg MESA_SHADER_TESS_EVAL); 1337b8e80941Smrg if (unlikely(INTEL_DEBUG & DEBUG_TES)) { 1338b8e80941Smrg g.enable_debug(ralloc_asprintf(mem_ctx, 1339b8e80941Smrg "%s tessellation evaluation shader %s", 1340b8e80941Smrg nir->info.label ? nir->info.label 1341b8e80941Smrg : "unnamed", 1342b8e80941Smrg nir->info.name)); 1343b8e80941Smrg } 1344b8e80941Smrg 1345b8e80941Smrg g.generate_code(v.cfg, 8); 1346b8e80941Smrg 1347b8e80941Smrg assembly = g.get_assembly(); 1348b8e80941Smrg } else { 1349b8e80941Smrg brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, 1350b8e80941Smrg nir, mem_ctx, shader_time_index); 1351b8e80941Smrg if (!v.run()) { 1352b8e80941Smrg if (error_str) 1353b8e80941Smrg *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1354b8e80941Smrg return NULL; 1355b8e80941Smrg } 1356b8e80941Smrg 1357b8e80941Smrg if (unlikely(INTEL_DEBUG & DEBUG_TES)) 1358b8e80941Smrg v.dump_instructions(); 1359b8e80941Smrg 1360b8e80941Smrg assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, 1361b8e80941Smrg &prog_data->base, v.cfg); 1362b8e80941Smrg } 1363b8e80941Smrg 1364b8e80941Smrg return assembly; 1365b8e80941Smrg} 1366