17ec681f3Smrg/* 27ec681f3Smrg * Copyright © Microsoft Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "u_math.h" 257ec681f3Smrg#include "nir.h" 267ec681f3Smrg#include "glsl_types.h" 277ec681f3Smrg#include "nir_types.h" 287ec681f3Smrg#include "nir_builder.h" 297ec681f3Smrg 307ec681f3Smrg#include "clc_nir.h" 317ec681f3Smrg#include "clc_compiler.h" 327ec681f3Smrg#include "../compiler/dxil_nir.h" 337ec681f3Smrg 347ec681f3Smrgstatic bool 357ec681f3Smrglower_load_base_global_invocation_id(nir_builder *b, nir_intrinsic_instr *intr, 367ec681f3Smrg nir_variable *var) 377ec681f3Smrg{ 387ec681f3Smrg b->cursor = nir_after_instr(&intr->instr); 397ec681f3Smrg 407ec681f3Smrg nir_ssa_def *offset = 417ec681f3Smrg build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding), 427ec681f3Smrg nir_imm_int(b, 437ec681f3Smrg offsetof(struct clc_work_properties_data, 447ec681f3Smrg global_offset_x)), 457ec681f3Smrg nir_dest_num_components(intr->dest), 467ec681f3Smrg nir_dest_bit_size(intr->dest)); 477ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, offset); 487ec681f3Smrg nir_instr_remove(&intr->instr); 497ec681f3Smrg return true; 507ec681f3Smrg} 517ec681f3Smrg 527ec681f3Smrgstatic bool 537ec681f3Smrglower_load_work_dim(nir_builder *b, nir_intrinsic_instr *intr, 547ec681f3Smrg nir_variable *var) 557ec681f3Smrg{ 567ec681f3Smrg b->cursor = nir_after_instr(&intr->instr); 577ec681f3Smrg 587ec681f3Smrg nir_ssa_def *dim = 597ec681f3Smrg build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding), 607ec681f3Smrg nir_imm_int(b, 617ec681f3Smrg offsetof(struct clc_work_properties_data, 627ec681f3Smrg work_dim)), 637ec681f3Smrg nir_dest_num_components(intr->dest), 647ec681f3Smrg nir_dest_bit_size(intr->dest)); 657ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, dim); 667ec681f3Smrg nir_instr_remove(&intr->instr); 677ec681f3Smrg return true; 687ec681f3Smrg} 697ec681f3Smrg 707ec681f3Smrgstatic bool 717ec681f3Smrglower_load_local_group_size(nir_builder *b, nir_intrinsic_instr *intr) 727ec681f3Smrg{ 737ec681f3Smrg b->cursor = nir_after_instr(&intr->instr); 747ec681f3Smrg 757ec681f3Smrg nir_const_value v[3] = { 767ec681f3Smrg nir_const_value_for_int(b->shader->info.workgroup_size[0], 32), 777ec681f3Smrg nir_const_value_for_int(b->shader->info.workgroup_size[1], 32), 787ec681f3Smrg nir_const_value_for_int(b->shader->info.workgroup_size[2], 32) 797ec681f3Smrg }; 807ec681f3Smrg nir_ssa_def *size = nir_build_imm(b, 3, 32, v); 817ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, size); 827ec681f3Smrg nir_instr_remove(&intr->instr); 837ec681f3Smrg return true; 847ec681f3Smrg} 857ec681f3Smrg 867ec681f3Smrgstatic bool 877ec681f3Smrglower_load_num_workgroups(nir_builder *b, nir_intrinsic_instr *intr, 887ec681f3Smrg nir_variable *var) 897ec681f3Smrg{ 907ec681f3Smrg b->cursor = nir_after_instr(&intr->instr); 917ec681f3Smrg 927ec681f3Smrg nir_ssa_def *count = 937ec681f3Smrg build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding), 947ec681f3Smrg nir_imm_int(b, 957ec681f3Smrg offsetof(struct clc_work_properties_data, 967ec681f3Smrg group_count_total_x)), 977ec681f3Smrg nir_dest_num_components(intr->dest), 987ec681f3Smrg nir_dest_bit_size(intr->dest)); 997ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, count); 1007ec681f3Smrg nir_instr_remove(&intr->instr); 1017ec681f3Smrg return true; 1027ec681f3Smrg} 1037ec681f3Smrg 1047ec681f3Smrgstatic bool 1057ec681f3Smrglower_load_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intr, 1067ec681f3Smrg nir_variable *var) 1077ec681f3Smrg{ 1087ec681f3Smrg b->cursor = nir_after_instr(&intr->instr); 1097ec681f3Smrg 1107ec681f3Smrg nir_ssa_def *offset = 1117ec681f3Smrg build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding), 1127ec681f3Smrg nir_imm_int(b, 1137ec681f3Smrg offsetof(struct clc_work_properties_data, 1147ec681f3Smrg group_id_offset_x)), 1157ec681f3Smrg nir_dest_num_components(intr->dest), 1167ec681f3Smrg nir_dest_bit_size(intr->dest)); 1177ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, offset); 1187ec681f3Smrg nir_instr_remove(&intr->instr); 1197ec681f3Smrg return true; 1207ec681f3Smrg} 1217ec681f3Smrg 1227ec681f3Smrgbool 1237ec681f3Smrgclc_nir_lower_system_values(nir_shader *nir, nir_variable *var) 1247ec681f3Smrg{ 1257ec681f3Smrg bool progress = false; 1267ec681f3Smrg 1277ec681f3Smrg foreach_list_typed(nir_function, func, node, &nir->functions) { 1287ec681f3Smrg if (!func->is_entrypoint) 1297ec681f3Smrg continue; 1307ec681f3Smrg assert(func->impl); 1317ec681f3Smrg 1327ec681f3Smrg nir_builder b; 1337ec681f3Smrg nir_builder_init(&b, func->impl); 1347ec681f3Smrg 1357ec681f3Smrg nir_foreach_block(block, func->impl) { 1367ec681f3Smrg nir_foreach_instr_safe(instr, block) { 1377ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 1387ec681f3Smrg continue; 1397ec681f3Smrg 1407ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1417ec681f3Smrg 1427ec681f3Smrg switch (intr->intrinsic) { 1437ec681f3Smrg case nir_intrinsic_load_base_global_invocation_id: 1447ec681f3Smrg progress |= lower_load_base_global_invocation_id(&b, intr, var); 1457ec681f3Smrg break; 1467ec681f3Smrg case nir_intrinsic_load_work_dim: 1477ec681f3Smrg progress |= lower_load_work_dim(&b, intr, var); 1487ec681f3Smrg break; 1497ec681f3Smrg case nir_intrinsic_load_workgroup_size: 1507ec681f3Smrg lower_load_local_group_size(&b, intr); 1517ec681f3Smrg break; 1527ec681f3Smrg case nir_intrinsic_load_num_workgroups: 1537ec681f3Smrg lower_load_num_workgroups(&b, intr, var); 1547ec681f3Smrg break; 1557ec681f3Smrg case nir_intrinsic_load_base_workgroup_id: 1567ec681f3Smrg lower_load_base_workgroup_id(&b, intr, var); 1577ec681f3Smrg break; 1587ec681f3Smrg default: break; 1597ec681f3Smrg } 1607ec681f3Smrg } 1617ec681f3Smrg } 1627ec681f3Smrg } 1637ec681f3Smrg 1647ec681f3Smrg return progress; 1657ec681f3Smrg} 1667ec681f3Smrg 1677ec681f3Smrgstatic bool 1687ec681f3Smrglower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *intr, 1697ec681f3Smrg nir_variable *var) 1707ec681f3Smrg{ 1717ec681f3Smrg b->cursor = nir_before_instr(&intr->instr); 1727ec681f3Smrg 1737ec681f3Smrg unsigned bit_size = nir_dest_bit_size(intr->dest); 1747ec681f3Smrg enum glsl_base_type base_type; 1757ec681f3Smrg 1767ec681f3Smrg switch (bit_size) { 1777ec681f3Smrg case 64: 1787ec681f3Smrg base_type = GLSL_TYPE_UINT64; 1797ec681f3Smrg break; 1807ec681f3Smrg case 32: 1817ec681f3Smrg base_type = GLSL_TYPE_UINT; 1827ec681f3Smrg break; 1837ec681f3Smrg case 16: 1847ec681f3Smrg base_type = GLSL_TYPE_UINT16; 1857ec681f3Smrg break; 1867ec681f3Smrg case 8: 1877ec681f3Smrg base_type = GLSL_TYPE_UINT8; 1887ec681f3Smrg break; 1897ec681f3Smrg } 1907ec681f3Smrg 1917ec681f3Smrg const struct glsl_type *type = 1927ec681f3Smrg glsl_vector_type(base_type, nir_dest_num_components(intr->dest)); 1937ec681f3Smrg nir_ssa_def *ptr = nir_vec2(b, nir_imm_int(b, var->data.binding), 1947ec681f3Smrg nir_u2u(b, intr->src[0].ssa, 32)); 1957ec681f3Smrg nir_deref_instr *deref = nir_build_deref_cast(b, ptr, nir_var_mem_ubo, type, 1967ec681f3Smrg bit_size / 8); 1977ec681f3Smrg deref->cast.align_mul = nir_intrinsic_align_mul(intr); 1987ec681f3Smrg deref->cast.align_offset = nir_intrinsic_align_offset(intr); 1997ec681f3Smrg 2007ec681f3Smrg nir_ssa_def *result = 2017ec681f3Smrg nir_load_deref(b, deref); 2027ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, result); 2037ec681f3Smrg nir_instr_remove(&intr->instr); 2047ec681f3Smrg return true; 2057ec681f3Smrg} 2067ec681f3Smrg 2077ec681f3Smrgbool 2087ec681f3Smrgclc_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var) 2097ec681f3Smrg{ 2107ec681f3Smrg bool progress = false; 2117ec681f3Smrg 2127ec681f3Smrg foreach_list_typed(nir_function, func, node, &nir->functions) { 2137ec681f3Smrg if (!func->is_entrypoint) 2147ec681f3Smrg continue; 2157ec681f3Smrg assert(func->impl); 2167ec681f3Smrg 2177ec681f3Smrg nir_builder b; 2187ec681f3Smrg nir_builder_init(&b, func->impl); 2197ec681f3Smrg 2207ec681f3Smrg nir_foreach_block(block, func->impl) { 2217ec681f3Smrg nir_foreach_instr_safe(instr, block) { 2227ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 2237ec681f3Smrg continue; 2247ec681f3Smrg 2257ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 2267ec681f3Smrg 2277ec681f3Smrg if (intr->intrinsic == nir_intrinsic_load_kernel_input) 2287ec681f3Smrg progress |= lower_load_kernel_input(&b, intr, var); 2297ec681f3Smrg } 2307ec681f3Smrg } 2317ec681f3Smrg } 2327ec681f3Smrg 2337ec681f3Smrg return progress; 2347ec681f3Smrg} 2357ec681f3Smrg 2367ec681f3Smrg 2377ec681f3Smrgstatic nir_variable * 2387ec681f3Smrgadd_printf_var(struct nir_shader *nir, unsigned uav_id) 2397ec681f3Smrg{ 2407ec681f3Smrg /* This size is arbitrary. Minimum required per spec is 1MB */ 2417ec681f3Smrg const unsigned max_printf_size = 1 * 1024 * 1024; 2427ec681f3Smrg const unsigned printf_array_size = max_printf_size / sizeof(unsigned); 2437ec681f3Smrg nir_variable *var = 2447ec681f3Smrg nir_variable_create(nir, nir_var_mem_ssbo, 2457ec681f3Smrg glsl_array_type(glsl_uint_type(), printf_array_size, sizeof(unsigned)), 2467ec681f3Smrg "printf"); 2477ec681f3Smrg var->data.binding = uav_id; 2487ec681f3Smrg return var; 2497ec681f3Smrg} 2507ec681f3Smrg 2517ec681f3Smrgbool 2527ec681f3Smrgclc_lower_printf_base(nir_shader *nir, unsigned uav_id) 2537ec681f3Smrg{ 2547ec681f3Smrg nir_variable *printf_var = NULL; 2557ec681f3Smrg nir_ssa_def *printf_deref = NULL; 2567ec681f3Smrg nir_foreach_function(func, nir) { 2577ec681f3Smrg nir_builder b; 2587ec681f3Smrg nir_builder_init(&b, func->impl); 2597ec681f3Smrg b.cursor = nir_before_instr(nir_block_first_instr(nir_start_block(func->impl))); 2607ec681f3Smrg bool progress = false; 2617ec681f3Smrg 2627ec681f3Smrg nir_foreach_block(block, func->impl) { 2637ec681f3Smrg nir_foreach_instr_safe(instr, block) { 2647ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 2657ec681f3Smrg continue; 2667ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2677ec681f3Smrg if (intrin->intrinsic != nir_intrinsic_load_printf_buffer_address) 2687ec681f3Smrg continue; 2697ec681f3Smrg 2707ec681f3Smrg if (!printf_var) { 2717ec681f3Smrg printf_var = add_printf_var(nir, uav_id); 2727ec681f3Smrg nir_deref_instr *deref = nir_build_deref_var(&b, printf_var); 2737ec681f3Smrg printf_deref = &deref->dest.ssa; 2747ec681f3Smrg } 2757ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, printf_deref); 2767ec681f3Smrg progress = true; 2777ec681f3Smrg } 2787ec681f3Smrg } 2797ec681f3Smrg 2807ec681f3Smrg if (progress) 2817ec681f3Smrg nir_metadata_preserve(func->impl, nir_metadata_loop_analysis | 2827ec681f3Smrg nir_metadata_block_index | 2837ec681f3Smrg nir_metadata_dominance); 2847ec681f3Smrg else 2857ec681f3Smrg nir_metadata_preserve(func->impl, nir_metadata_all); 2867ec681f3Smrg } 2877ec681f3Smrg 2887ec681f3Smrg return printf_var != NULL; 2897ec681f3Smrg} 2907ec681f3Smrg 2917ec681f3Smrgstatic nir_variable * 2927ec681f3Smrgfind_identical_const_sampler(nir_shader *nir, nir_variable *sampler) 2937ec681f3Smrg{ 2947ec681f3Smrg nir_foreach_variable_with_modes(uniform, nir, nir_var_uniform) { 2957ec681f3Smrg if (!glsl_type_is_sampler(uniform->type) || !uniform->data.sampler.is_inline_sampler) 2967ec681f3Smrg continue; 2977ec681f3Smrg if (uniform->data.sampler.addressing_mode == sampler->data.sampler.addressing_mode && 2987ec681f3Smrg uniform->data.sampler.normalized_coordinates == sampler->data.sampler.normalized_coordinates && 2997ec681f3Smrg uniform->data.sampler.filter_mode == sampler->data.sampler.filter_mode) 3007ec681f3Smrg return uniform; 3017ec681f3Smrg } 3027ec681f3Smrg unreachable("Should have at least found the input sampler"); 3037ec681f3Smrg} 3047ec681f3Smrg 3057ec681f3Smrgstatic bool 3067ec681f3Smrgclc_nir_dedupe_const_samplers_instr(nir_builder *b, 3077ec681f3Smrg nir_instr *instr, 3087ec681f3Smrg void *cb_data) 3097ec681f3Smrg{ 3107ec681f3Smrg nir_shader *nir = cb_data; 3117ec681f3Smrg if (instr->type != nir_instr_type_tex) 3127ec681f3Smrg return false; 3137ec681f3Smrg 3147ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(instr); 3157ec681f3Smrg int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); 3167ec681f3Smrg if (sampler_idx == -1) 3177ec681f3Smrg return false; 3187ec681f3Smrg 3197ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_idx].src); 3207ec681f3Smrg nir_variable *sampler = nir_deref_instr_get_variable(deref); 3217ec681f3Smrg if (!sampler) 3227ec681f3Smrg return false; 3237ec681f3Smrg 3247ec681f3Smrg assert(sampler->data.mode == nir_var_uniform); 3257ec681f3Smrg 3267ec681f3Smrg if (!sampler->data.sampler.is_inline_sampler) 3277ec681f3Smrg return false; 3287ec681f3Smrg 3297ec681f3Smrg nir_variable *replacement = find_identical_const_sampler(nir, sampler); 3307ec681f3Smrg if (replacement == sampler) 3317ec681f3Smrg return false; 3327ec681f3Smrg 3337ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 3347ec681f3Smrg nir_deref_instr *replacement_deref = nir_build_deref_var(b, replacement); 3357ec681f3Smrg nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_idx].src, 3367ec681f3Smrg nir_src_for_ssa(&replacement_deref->dest.ssa)); 3377ec681f3Smrg nir_deref_instr_remove_if_unused(deref); 3387ec681f3Smrg 3397ec681f3Smrg return true; 3407ec681f3Smrg} 3417ec681f3Smrg 3427ec681f3Smrgbool 3437ec681f3Smrgclc_nir_dedupe_const_samplers(nir_shader *nir) 3447ec681f3Smrg{ 3457ec681f3Smrg return nir_shader_instructions_pass(nir, 3467ec681f3Smrg clc_nir_dedupe_const_samplers_instr, 3477ec681f3Smrg nir_metadata_block_index | 3487ec681f3Smrg nir_metadata_dominance, 3497ec681f3Smrg nir); 3507ec681f3Smrg} 351