101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015-2016 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "brw_compiler.h" 2501e04c3fSmrg#include "brw_shader.h" 2601e04c3fSmrg#include "brw_eu.h" 277ec681f3Smrg#include "dev/intel_debug.h" 2801e04c3fSmrg#include "compiler/nir/nir.h" 2901e04c3fSmrg#include "main/errors.h" 3001e04c3fSmrg#include "util/debug.h" 3101e04c3fSmrg 3201e04c3fSmrg#define COMMON_OPTIONS \ 3301e04c3fSmrg .lower_fdiv = true, \ 3401e04c3fSmrg .lower_scmp = true, \ 359f464c52Smaya .lower_flrp16 = true, \ 367ec681f3Smrg .lower_fmod = true, \ 3701e04c3fSmrg .lower_bitfield_extract = true, \ 3801e04c3fSmrg .lower_bitfield_insert = true, \ 3901e04c3fSmrg .lower_uadd_carry = true, \ 4001e04c3fSmrg .lower_usub_borrow = true, \ 4101e04c3fSmrg .lower_flrp64 = true, \ 429f464c52Smaya .lower_isign = true, \ 4301e04c3fSmrg .lower_ldexp = true, \ 4401e04c3fSmrg .lower_device_index_to_zero = true, \ 457ec681f3Smrg .vectorize_io = true, \ 4601e04c3fSmrg .use_interpolated_input_intrinsics = true, \ 477ec681f3Smrg .lower_insert_byte = true, \ 487ec681f3Smrg .lower_insert_word = true, \ 4901e04c3fSmrg .vertex_id_zero_based = true, \ 507ec681f3Smrg .lower_base_vertex = true, \ 517ec681f3Smrg .use_scoped_barrier = true, \ 527ec681f3Smrg .support_16bit_alu = true, \ 537ec681f3Smrg .lower_uniforms_to_ubo = true, \ 547ec681f3Smrg .has_txs = true 5501e04c3fSmrg 5601e04c3fSmrg#define COMMON_SCALAR_OPTIONS \ 577ec681f3Smrg .lower_to_scalar = true, \ 5801e04c3fSmrg .lower_pack_half_2x16 = true, \ 5901e04c3fSmrg .lower_pack_snorm_2x16 = true, \ 6001e04c3fSmrg .lower_pack_snorm_4x8 = true, \ 6101e04c3fSmrg .lower_pack_unorm_2x16 = true, \ 6201e04c3fSmrg .lower_pack_unorm_4x8 = true, \ 6301e04c3fSmrg .lower_unpack_half_2x16 = true, \ 6401e04c3fSmrg .lower_unpack_snorm_2x16 = true, \ 6501e04c3fSmrg .lower_unpack_snorm_4x8 = true, \ 6601e04c3fSmrg .lower_unpack_unorm_2x16 = true, \ 6701e04c3fSmrg .lower_unpack_unorm_4x8 = true, \ 687ec681f3Smrg .lower_usub_sat64 = true, \ 697ec681f3Smrg .lower_hadd64 = true, \ 707ec681f3Smrg .avoid_ternary_with_two_constants = true, \ 717ec681f3Smrg .has_pack_32_4x8 = true, \ 727ec681f3Smrg .max_unroll_iterations = 32, \ 737ec681f3Smrg .force_indirect_unrolling = nir_var_function_temp 7401e04c3fSmrg 7501e04c3fSmrgstatic const struct nir_shader_compiler_options scalar_nir_options = { 7601e04c3fSmrg COMMON_OPTIONS, 7701e04c3fSmrg COMMON_SCALAR_OPTIONS, 7801e04c3fSmrg}; 7901e04c3fSmrg 8001e04c3fSmrgstatic const struct nir_shader_compiler_options vector_nir_options = { 8101e04c3fSmrg COMMON_OPTIONS, 8201e04c3fSmrg 8301e04c3fSmrg /* In the vec4 backend, our dpN instruction replicates its result to all the 8401e04c3fSmrg * components of a vec4. We would like NIR to give us replicated fdot 8501e04c3fSmrg * instructions because it can optimize better for us. 8601e04c3fSmrg */ 8701e04c3fSmrg .fdot_replicates = true, 8801e04c3fSmrg 8901e04c3fSmrg .lower_pack_snorm_2x16 = true, 9001e04c3fSmrg .lower_pack_unorm_2x16 = true, 9101e04c3fSmrg .lower_unpack_snorm_2x16 = true, 9201e04c3fSmrg .lower_unpack_unorm_2x16 = true, 9301e04c3fSmrg .lower_extract_byte = true, 9401e04c3fSmrg .lower_extract_word = true, 957ec681f3Smrg .intel_vec4 = true, 9601e04c3fSmrg .max_unroll_iterations = 32, 9701e04c3fSmrg}; 9801e04c3fSmrg 9901e04c3fSmrgstruct brw_compiler * 1007ec681f3Smrgbrw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) 10101e04c3fSmrg{ 10201e04c3fSmrg struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); 10301e04c3fSmrg 10401e04c3fSmrg compiler->devinfo = devinfo; 10501e04c3fSmrg 10601e04c3fSmrg brw_fs_alloc_reg_sets(compiler); 10701e04c3fSmrg brw_vec4_alloc_reg_set(compiler); 10801e04c3fSmrg 10901e04c3fSmrg compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false); 11001e04c3fSmrg 1117ec681f3Smrg compiler->use_tcs_8_patch = 1127ec681f3Smrg devinfo->ver >= 12 || 1137ec681f3Smrg (devinfo->ver >= 9 && INTEL_DEBUG(DEBUG_TCS_EIGHT_PATCH)); 1147ec681f3Smrg 1157ec681f3Smrg /* Default to the sampler since that's what we've done since forever */ 1167ec681f3Smrg compiler->indirect_ubos_use_sampler = true; 1177ec681f3Smrg 1187ec681f3Smrg /* There is no vec4 mode on Gfx10+, and we don't use it at all on Gfx8+. */ 1197ec681f3Smrg for (int i = MESA_SHADER_VERTEX; i < MESA_ALL_SHADER_STAGES; i++) { 1207ec681f3Smrg compiler->scalar_stage[i] = devinfo->ver >= 8 || 1217ec681f3Smrg i == MESA_SHADER_FRAGMENT || i == MESA_SHADER_COMPUTE; 12201e04c3fSmrg } 12301e04c3fSmrg 1247ec681f3Smrg for (int i = MESA_SHADER_TASK; i < MESA_VULKAN_SHADER_STAGES; i++) 1257ec681f3Smrg compiler->scalar_stage[i] = true; 1267ec681f3Smrg 1279f464c52Smaya nir_lower_int64_options int64_options = 1289f464c52Smaya nir_lower_imul64 | 1299f464c52Smaya nir_lower_isign64 | 1309f464c52Smaya nir_lower_divmod64 | 1319f464c52Smaya nir_lower_imul_high64; 1329f464c52Smaya nir_lower_doubles_options fp64_options = 1339f464c52Smaya nir_lower_drcp | 1349f464c52Smaya nir_lower_dsqrt | 1359f464c52Smaya nir_lower_drsq | 1369f464c52Smaya nir_lower_dtrunc | 1379f464c52Smaya nir_lower_dfloor | 1389f464c52Smaya nir_lower_dceil | 1399f464c52Smaya nir_lower_dfract | 1409f464c52Smaya nir_lower_dround_even | 1417ec681f3Smrg nir_lower_dmod | 1427ec681f3Smrg nir_lower_dsub | 1437ec681f3Smrg nir_lower_ddiv; 1447ec681f3Smrg 1457ec681f3Smrg if (!devinfo->has_64bit_float || INTEL_DEBUG(DEBUG_SOFT64)) { 1467ec681f3Smrg int64_options |= (nir_lower_int64_options)~0; 1479f464c52Smaya fp64_options |= nir_lower_fp64_full_software; 1489f464c52Smaya } 1499f464c52Smaya 1509f464c52Smaya /* The Bspec's section tittled "Instruction_multiply[DevBDW+]" claims that 1517ec681f3Smrg * destination type can be Quadword and source type Doubleword for Gfx8 and 1527ec681f3Smrg * Gfx9. So, lower 64 bit multiply instruction on rest of the platforms. 1539f464c52Smaya */ 1547ec681f3Smrg if (devinfo->ver < 8 || devinfo->ver > 9) 1559f464c52Smaya int64_options |= nir_lower_imul_2x32_64; 1569f464c52Smaya 15701e04c3fSmrg /* We want the GLSL compiler to emit code that uses condition codes */ 1587ec681f3Smrg for (int i = 0; i < MESA_ALL_SHADER_STAGES; i++) { 15901e04c3fSmrg compiler->glsl_compiler_options[i].MaxUnrollIterations = 0; 16001e04c3fSmrg compiler->glsl_compiler_options[i].MaxIfDepth = 1617ec681f3Smrg devinfo->ver < 6 ? 16 : UINT_MAX; 16201e04c3fSmrg 1637ec681f3Smrg /* We handle this in NIR */ 1647ec681f3Smrg compiler->glsl_compiler_options[i].EmitNoIndirectInput = false; 1657ec681f3Smrg compiler->glsl_compiler_options[i].EmitNoIndirectOutput = false; 16601e04c3fSmrg compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; 1677ec681f3Smrg compiler->glsl_compiler_options[i].EmitNoIndirectTemp = false; 16801e04c3fSmrg 16901e04c3fSmrg bool is_scalar = compiler->scalar_stage[i]; 17001e04c3fSmrg compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; 17101e04c3fSmrg 1729f464c52Smaya struct nir_shader_compiler_options *nir_options = 1739f464c52Smaya rzalloc(compiler, struct nir_shader_compiler_options); 17401e04c3fSmrg if (is_scalar) { 1759f464c52Smaya *nir_options = scalar_nir_options; 17601e04c3fSmrg } else { 1779f464c52Smaya *nir_options = vector_nir_options; 17801e04c3fSmrg } 17901e04c3fSmrg 1807ec681f3Smrg /* Prior to Gfx6, there are no three source operations, and Gfx11 loses 1817ec681f3Smrg * LRP. 1827ec681f3Smrg */ 1837ec681f3Smrg nir_options->lower_ffma16 = devinfo->ver < 6; 1847ec681f3Smrg nir_options->lower_ffma32 = devinfo->ver < 6; 1857ec681f3Smrg nir_options->lower_ffma64 = devinfo->ver < 6; 1867ec681f3Smrg nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11; 1877ec681f3Smrg nir_options->lower_fpow = devinfo->ver >= 12; 1887ec681f3Smrg 1897ec681f3Smrg nir_options->lower_rotate = devinfo->ver < 11; 1907ec681f3Smrg nir_options->lower_bitfield_reverse = devinfo->ver < 7; 1917ec681f3Smrg nir_options->has_iadd3 = devinfo->verx10 >= 125; 1929f464c52Smaya 1937ec681f3Smrg nir_options->has_dot_4x8 = devinfo->ver >= 12; 1947ec681f3Smrg nir_options->has_sudot_4x8 = devinfo->ver >= 12; 1959f464c52Smaya 1969f464c52Smaya nir_options->lower_int64_options = int64_options; 1979f464c52Smaya nir_options->lower_doubles_options = fp64_options; 1987ec681f3Smrg 1997ec681f3Smrg nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT; 2007ec681f3Smrg 2017ec681f3Smrg nir_options->force_indirect_unrolling |= 2027ec681f3Smrg brw_nir_no_indirect_mask(compiler, i); 2037ec681f3Smrg 2049f464c52Smaya compiler->glsl_compiler_options[i].NirOptions = nir_options; 2059f464c52Smaya 20601e04c3fSmrg compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true; 20701e04c3fSmrg } 20801e04c3fSmrg 20901e04c3fSmrg return compiler; 21001e04c3fSmrg} 21101e04c3fSmrg 21201e04c3fSmrgstatic void 21301e04c3fSmrginsert_u64_bit(uint64_t *val, bool add) 21401e04c3fSmrg{ 21501e04c3fSmrg *val = (*val << 1) | !!add; 21601e04c3fSmrg} 21701e04c3fSmrg 21801e04c3fSmrguint64_t 21901e04c3fSmrgbrw_get_compiler_config_value(const struct brw_compiler *compiler) 22001e04c3fSmrg{ 22101e04c3fSmrg uint64_t config = 0; 22201e04c3fSmrg insert_u64_bit(&config, compiler->precise_trig); 2237ec681f3Smrg if (compiler->devinfo->ver >= 8 && compiler->devinfo->ver < 10) { 22401e04c3fSmrg insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_VERTEX]); 22501e04c3fSmrg insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); 22601e04c3fSmrg insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); 22701e04c3fSmrg insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_GEOMETRY]); 22801e04c3fSmrg } 22901e04c3fSmrg uint64_t mask = DEBUG_DISK_CACHE_MASK; 23001e04c3fSmrg while (mask != 0) { 23101e04c3fSmrg const uint64_t bit = 1ULL << (ffsll(mask) - 1); 2327ec681f3Smrg insert_u64_bit(&config, INTEL_DEBUG(bit)); 23301e04c3fSmrg mask &= ~bit; 23401e04c3fSmrg } 23501e04c3fSmrg return config; 23601e04c3fSmrg} 23701e04c3fSmrg 23801e04c3fSmrgunsigned 23901e04c3fSmrgbrw_prog_data_size(gl_shader_stage stage) 24001e04c3fSmrg{ 24101e04c3fSmrg static const size_t stage_sizes[] = { 2427ec681f3Smrg [MESA_SHADER_VERTEX] = sizeof(struct brw_vs_prog_data), 2437ec681f3Smrg [MESA_SHADER_TESS_CTRL] = sizeof(struct brw_tcs_prog_data), 2447ec681f3Smrg [MESA_SHADER_TESS_EVAL] = sizeof(struct brw_tes_prog_data), 2457ec681f3Smrg [MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_data), 2467ec681f3Smrg [MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_data), 2477ec681f3Smrg [MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_data), 2487ec681f3Smrg [MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_data), 2497ec681f3Smrg [MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_data), 2507ec681f3Smrg [MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_data), 2517ec681f3Smrg [MESA_SHADER_MISS] = sizeof(struct brw_bs_prog_data), 2527ec681f3Smrg [MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_data), 2537ec681f3Smrg [MESA_SHADER_CALLABLE] = sizeof(struct brw_bs_prog_data), 2547ec681f3Smrg [MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_data), 25501e04c3fSmrg }; 25601e04c3fSmrg assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes)); 25701e04c3fSmrg return stage_sizes[stage]; 25801e04c3fSmrg} 25901e04c3fSmrg 26001e04c3fSmrgunsigned 26101e04c3fSmrgbrw_prog_key_size(gl_shader_stage stage) 26201e04c3fSmrg{ 26301e04c3fSmrg static const size_t stage_sizes[] = { 2647ec681f3Smrg [MESA_SHADER_VERTEX] = sizeof(struct brw_vs_prog_key), 2657ec681f3Smrg [MESA_SHADER_TESS_CTRL] = sizeof(struct brw_tcs_prog_key), 2667ec681f3Smrg [MESA_SHADER_TESS_EVAL] = sizeof(struct brw_tes_prog_key), 2677ec681f3Smrg [MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_key), 2687ec681f3Smrg [MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_key), 2697ec681f3Smrg [MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_key), 2707ec681f3Smrg [MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_key), 2717ec681f3Smrg [MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_key), 2727ec681f3Smrg [MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_key), 2737ec681f3Smrg [MESA_SHADER_MISS] = sizeof(struct brw_bs_prog_key), 2747ec681f3Smrg [MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_key), 2757ec681f3Smrg [MESA_SHADER_CALLABLE] = sizeof(struct brw_bs_prog_key), 2767ec681f3Smrg [MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_key), 27701e04c3fSmrg }; 27801e04c3fSmrg assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes)); 27901e04c3fSmrg return stage_sizes[stage]; 28001e04c3fSmrg} 2817ec681f3Smrg 2827ec681f3Smrgvoid 2837ec681f3Smrgbrw_write_shader_relocs(const struct intel_device_info *devinfo, 2847ec681f3Smrg void *program, 2857ec681f3Smrg const struct brw_stage_prog_data *prog_data, 2867ec681f3Smrg struct brw_shader_reloc_value *values, 2877ec681f3Smrg unsigned num_values) 2887ec681f3Smrg{ 2897ec681f3Smrg for (unsigned i = 0; i < prog_data->num_relocs; i++) { 2907ec681f3Smrg assert(prog_data->relocs[i].offset % 8 == 0); 2917ec681f3Smrg void *dst = program + prog_data->relocs[i].offset; 2927ec681f3Smrg for (unsigned j = 0; j < num_values; j++) { 2937ec681f3Smrg if (prog_data->relocs[i].id == values[j].id) { 2947ec681f3Smrg uint32_t value = values[j].value + prog_data->relocs[i].delta; 2957ec681f3Smrg switch (prog_data->relocs[i].type) { 2967ec681f3Smrg case BRW_SHADER_RELOC_TYPE_U32: 2977ec681f3Smrg *(uint32_t *)dst = value; 2987ec681f3Smrg break; 2997ec681f3Smrg case BRW_SHADER_RELOC_TYPE_MOV_IMM: 3007ec681f3Smrg brw_update_reloc_imm(devinfo, dst, value); 3017ec681f3Smrg break; 3027ec681f3Smrg default: 3037ec681f3Smrg unreachable("Invalid relocation type"); 3047ec681f3Smrg } 3057ec681f3Smrg break; 3067ec681f3Smrg } 3077ec681f3Smrg } 3087ec681f3Smrg } 3097ec681f3Smrg} 310