17ec681f3Smrg/* 27ec681f3Smrg * Copyright © Microsoft Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "nir.h" 257ec681f3Smrg#include "nir_serialize.h" 267ec681f3Smrg#include "glsl_types.h" 277ec681f3Smrg#include "nir_types.h" 287ec681f3Smrg#include "clc_compiler.h" 297ec681f3Smrg#include "clc_helpers.h" 307ec681f3Smrg#include "clc_nir.h" 317ec681f3Smrg#include "../compiler/dxil_nir.h" 327ec681f3Smrg#include "../compiler/dxil_nir_lower_int_samplers.h" 337ec681f3Smrg#include "../compiler/nir_to_dxil.h" 347ec681f3Smrg 357ec681f3Smrg#include "util/u_debug.h" 367ec681f3Smrg#include <util/u_math.h> 377ec681f3Smrg#include "spirv/nir_spirv.h" 387ec681f3Smrg#include "nir_builder.h" 397ec681f3Smrg#include "nir_builtin_builder.h" 407ec681f3Smrg 417ec681f3Smrg#include "git_sha1.h" 427ec681f3Smrg 437ec681f3Smrgstruct clc_image_lower_context 447ec681f3Smrg{ 457ec681f3Smrg struct clc_dxil_metadata *metadata; 467ec681f3Smrg unsigned *num_srvs; 477ec681f3Smrg unsigned *num_uavs; 487ec681f3Smrg nir_deref_instr *deref; 497ec681f3Smrg unsigned num_buf_ids; 507ec681f3Smrg int metadata_index; 517ec681f3Smrg}; 527ec681f3Smrg 537ec681f3Smrgstatic int 547ec681f3Smrglower_image_deref_impl(nir_builder *b, struct clc_image_lower_context *context, 557ec681f3Smrg const struct glsl_type *new_var_type, 567ec681f3Smrg unsigned *num_bindings) 577ec681f3Smrg{ 587ec681f3Smrg nir_variable *in_var = nir_deref_instr_get_variable(context->deref); 597ec681f3Smrg nir_variable *uniform = nir_variable_create(b->shader, nir_var_uniform, new_var_type, NULL); 607ec681f3Smrg uniform->data.access = in_var->data.access; 617ec681f3Smrg uniform->data.binding = in_var->data.binding; 627ec681f3Smrg if (context->num_buf_ids > 0) { 637ec681f3Smrg // Need to assign a new binding 647ec681f3Smrg context->metadata->args[context->metadata_index]. 657ec681f3Smrg image.buf_ids[context->num_buf_ids] = uniform->data.binding = (*num_bindings)++; 667ec681f3Smrg } 677ec681f3Smrg context->num_buf_ids++; 687ec681f3Smrg return uniform->data.binding; 697ec681f3Smrg} 707ec681f3Smrg 717ec681f3Smrgstatic int 727ec681f3Smrglower_read_only_image_deref(nir_builder *b, struct clc_image_lower_context *context, 737ec681f3Smrg nir_alu_type image_type) 747ec681f3Smrg{ 757ec681f3Smrg nir_variable *in_var = nir_deref_instr_get_variable(context->deref); 767ec681f3Smrg 777ec681f3Smrg // Non-writeable images should be converted to samplers, 787ec681f3Smrg // since they may have texture operations done on them 797ec681f3Smrg const struct glsl_type *new_var_type = 807ec681f3Smrg glsl_sampler_type(glsl_get_sampler_dim(in_var->type), 817ec681f3Smrg false, glsl_sampler_type_is_array(in_var->type), 827ec681f3Smrg nir_get_glsl_base_type_for_nir_type(image_type | 32)); 837ec681f3Smrg return lower_image_deref_impl(b, context, new_var_type, context->num_srvs); 847ec681f3Smrg} 857ec681f3Smrg 867ec681f3Smrgstatic int 877ec681f3Smrglower_read_write_image_deref(nir_builder *b, struct clc_image_lower_context *context, 887ec681f3Smrg nir_alu_type image_type) 897ec681f3Smrg{ 907ec681f3Smrg nir_variable *in_var = nir_deref_instr_get_variable(context->deref); 917ec681f3Smrg const struct glsl_type *new_var_type = 927ec681f3Smrg glsl_image_type(glsl_get_sampler_dim(in_var->type), 937ec681f3Smrg glsl_sampler_type_is_array(in_var->type), 947ec681f3Smrg nir_get_glsl_base_type_for_nir_type(image_type | 32)); 957ec681f3Smrg return lower_image_deref_impl(b, context, new_var_type, context->num_uavs); 967ec681f3Smrg} 977ec681f3Smrg 987ec681f3Smrgstatic void 997ec681f3Smrgclc_lower_input_image_deref(nir_builder *b, struct clc_image_lower_context *context) 1007ec681f3Smrg{ 1017ec681f3Smrg // The input variable here isn't actually an image, it's just the 1027ec681f3Smrg // image format data. 1037ec681f3Smrg // 1047ec681f3Smrg // For every use of an image in a different way, we'll add an 1057ec681f3Smrg // appropriate uniform to match it. That can result in up to 1067ec681f3Smrg // 3 uniforms (float4, int4, uint4) for each image. Only one of these 1077ec681f3Smrg // formats will actually produce correct data, but a single kernel 1087ec681f3Smrg // could use runtime conditionals to potentially access any of them. 1097ec681f3Smrg // 1107ec681f3Smrg // If the image is used in a query that doesn't have a corresponding 1117ec681f3Smrg // DXIL intrinsic (CL image channel order or channel format), then 1127ec681f3Smrg // we'll add a kernel input for that data that'll be lowered by the 1137ec681f3Smrg // explicit IO pass later on. 1147ec681f3Smrg // 1157ec681f3Smrg // After all that, we can remove the image input variable and deref. 1167ec681f3Smrg 1177ec681f3Smrg enum image_uniform_type { 1187ec681f3Smrg FLOAT4, 1197ec681f3Smrg INT4, 1207ec681f3Smrg UINT4, 1217ec681f3Smrg IMAGE_UNIFORM_TYPE_COUNT 1227ec681f3Smrg }; 1237ec681f3Smrg 1247ec681f3Smrg int image_bindings[IMAGE_UNIFORM_TYPE_COUNT] = {-1, -1, -1}; 1257ec681f3Smrg nir_ssa_def *format_deref_dest = NULL, *order_deref_dest = NULL; 1267ec681f3Smrg 1277ec681f3Smrg nir_variable *in_var = nir_deref_instr_get_variable(context->deref); 1287ec681f3Smrg enum gl_access_qualifier access = in_var->data.access; 1297ec681f3Smrg 1307ec681f3Smrg context->metadata_index = 0; 1317ec681f3Smrg while (context->metadata->args[context->metadata_index].image.buf_ids[0] != in_var->data.binding) 1327ec681f3Smrg context->metadata_index++; 1337ec681f3Smrg 1347ec681f3Smrg context->num_buf_ids = 0; 1357ec681f3Smrg 1367ec681f3Smrg /* Do this in 2 passes: 1377ec681f3Smrg * 1. When encountering a strongly-typed access (load/store), replace the deref 1387ec681f3Smrg * with one that references an appropriately typed variable. When encountering 1397ec681f3Smrg * an untyped access (size query), if we have a strongly-typed variable already, 1407ec681f3Smrg * replace the deref to point to it. 1417ec681f3Smrg * 2. If there's any references left, they should all be untyped. If we found 1427ec681f3Smrg * a strongly-typed access later in the 1st pass, then just replace the reference. 1437ec681f3Smrg * If we didn't, e.g. the resource is only used for a size query, then pick an 1447ec681f3Smrg * arbitrary type for it. 1457ec681f3Smrg */ 1467ec681f3Smrg for (int pass = 0; pass < 2; ++pass) { 1477ec681f3Smrg nir_foreach_use_safe(src, &context->deref->dest.ssa) { 1487ec681f3Smrg enum image_uniform_type type; 1497ec681f3Smrg 1507ec681f3Smrg if (src->parent_instr->type == nir_instr_type_intrinsic) { 1517ec681f3Smrg nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(src->parent_instr); 1527ec681f3Smrg enum nir_alu_type dest_type; 1537ec681f3Smrg 1547ec681f3Smrg b->cursor = nir_before_instr(&intrinsic->instr); 1557ec681f3Smrg 1567ec681f3Smrg switch (intrinsic->intrinsic) { 1577ec681f3Smrg case nir_intrinsic_image_deref_load: 1587ec681f3Smrg case nir_intrinsic_image_deref_store: { 1597ec681f3Smrg dest_type = intrinsic->intrinsic == nir_intrinsic_image_deref_load ? 1607ec681f3Smrg nir_intrinsic_dest_type(intrinsic) : nir_intrinsic_src_type(intrinsic); 1617ec681f3Smrg 1627ec681f3Smrg switch (nir_alu_type_get_base_type(dest_type)) { 1637ec681f3Smrg case nir_type_float: type = FLOAT4; break; 1647ec681f3Smrg case nir_type_int: type = INT4; break; 1657ec681f3Smrg case nir_type_uint: type = UINT4; break; 1667ec681f3Smrg default: unreachable("Unsupported image type for load."); 1677ec681f3Smrg } 1687ec681f3Smrg 1697ec681f3Smrg int image_binding = image_bindings[type]; 1707ec681f3Smrg if (image_binding < 0) { 1717ec681f3Smrg image_binding = image_bindings[type] = 1727ec681f3Smrg lower_read_write_image_deref(b, context, dest_type); 1737ec681f3Smrg } 1747ec681f3Smrg 1757ec681f3Smrg assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0); 1767ec681f3Smrg nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false); 1777ec681f3Smrg break; 1787ec681f3Smrg } 1797ec681f3Smrg 1807ec681f3Smrg case nir_intrinsic_image_deref_size: { 1817ec681f3Smrg int image_binding = -1; 1827ec681f3Smrg for (unsigned i = 0; i < IMAGE_UNIFORM_TYPE_COUNT; ++i) { 1837ec681f3Smrg if (image_bindings[i] >= 0) { 1847ec681f3Smrg image_binding = image_bindings[i]; 1857ec681f3Smrg break; 1867ec681f3Smrg } 1877ec681f3Smrg } 1887ec681f3Smrg if (image_binding < 0) { 1897ec681f3Smrg // Skip for now and come back to it 1907ec681f3Smrg if (pass == 0) 1917ec681f3Smrg break; 1927ec681f3Smrg 1937ec681f3Smrg type = FLOAT4; 1947ec681f3Smrg image_binding = image_bindings[type] = 1957ec681f3Smrg lower_read_write_image_deref(b, context, nir_type_float32); 1967ec681f3Smrg } 1977ec681f3Smrg 1987ec681f3Smrg assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0); 1997ec681f3Smrg nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false); 2007ec681f3Smrg break; 2017ec681f3Smrg } 2027ec681f3Smrg 2037ec681f3Smrg case nir_intrinsic_image_deref_format: 2047ec681f3Smrg case nir_intrinsic_image_deref_order: { 2057ec681f3Smrg nir_ssa_def **cached_deref = intrinsic->intrinsic == nir_intrinsic_image_deref_format ? 2067ec681f3Smrg &format_deref_dest : &order_deref_dest; 2077ec681f3Smrg if (!*cached_deref) { 2087ec681f3Smrg nir_variable *new_input = nir_variable_create(b->shader, nir_var_uniform, glsl_uint_type(), NULL); 2097ec681f3Smrg new_input->data.driver_location = in_var->data.driver_location; 2107ec681f3Smrg if (intrinsic->intrinsic == nir_intrinsic_image_deref_format) { 2117ec681f3Smrg /* Match cl_image_format { image_channel_order, image_channel_data_type }; */ 2127ec681f3Smrg new_input->data.driver_location += glsl_get_cl_size(new_input->type); 2137ec681f3Smrg } 2147ec681f3Smrg 2157ec681f3Smrg b->cursor = nir_after_instr(&context->deref->instr); 2167ec681f3Smrg *cached_deref = nir_load_var(b, new_input); 2177ec681f3Smrg } 2187ec681f3Smrg 2197ec681f3Smrg /* No actual intrinsic needed here, just reference the loaded variable */ 2207ec681f3Smrg nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, *cached_deref); 2217ec681f3Smrg nir_instr_remove(&intrinsic->instr); 2227ec681f3Smrg break; 2237ec681f3Smrg } 2247ec681f3Smrg 2257ec681f3Smrg default: 2267ec681f3Smrg unreachable("Unsupported image intrinsic"); 2277ec681f3Smrg } 2287ec681f3Smrg } else if (src->parent_instr->type == nir_instr_type_tex) { 2297ec681f3Smrg assert(in_var->data.access & ACCESS_NON_WRITEABLE); 2307ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(src->parent_instr); 2317ec681f3Smrg 2327ec681f3Smrg switch (nir_alu_type_get_base_type(tex->dest_type)) { 2337ec681f3Smrg case nir_type_float: type = FLOAT4; break; 2347ec681f3Smrg case nir_type_int: type = INT4; break; 2357ec681f3Smrg case nir_type_uint: type = UINT4; break; 2367ec681f3Smrg default: unreachable("Unsupported image format for sample."); 2377ec681f3Smrg } 2387ec681f3Smrg 2397ec681f3Smrg int image_binding = image_bindings[type]; 2407ec681f3Smrg if (image_binding < 0) { 2417ec681f3Smrg image_binding = image_bindings[type] = 2427ec681f3Smrg lower_read_only_image_deref(b, context, tex->dest_type); 2437ec681f3Smrg } 2447ec681f3Smrg 2457ec681f3Smrg nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)); 2467ec681f3Smrg tex->texture_index = image_binding; 2477ec681f3Smrg } 2487ec681f3Smrg } 2497ec681f3Smrg } 2507ec681f3Smrg 2517ec681f3Smrg context->metadata->args[context->metadata_index].image.num_buf_ids = context->num_buf_ids; 2527ec681f3Smrg 2537ec681f3Smrg nir_instr_remove(&context->deref->instr); 2547ec681f3Smrg exec_node_remove(&in_var->node); 2557ec681f3Smrg} 2567ec681f3Smrg 2577ec681f3Smrgstatic void 2587ec681f3Smrgclc_lower_images(nir_shader *nir, struct clc_image_lower_context *context) 2597ec681f3Smrg{ 2607ec681f3Smrg nir_foreach_function(func, nir) { 2617ec681f3Smrg if (!func->is_entrypoint) 2627ec681f3Smrg continue; 2637ec681f3Smrg assert(func->impl); 2647ec681f3Smrg 2657ec681f3Smrg nir_builder b; 2667ec681f3Smrg nir_builder_init(&b, func->impl); 2677ec681f3Smrg 2687ec681f3Smrg nir_foreach_block(block, func->impl) { 2697ec681f3Smrg nir_foreach_instr_safe(instr, block) { 2707ec681f3Smrg if (instr->type == nir_instr_type_deref) { 2717ec681f3Smrg context->deref = nir_instr_as_deref(instr); 2727ec681f3Smrg 2737ec681f3Smrg if (glsl_type_is_image(context->deref->type)) { 2747ec681f3Smrg assert(context->deref->deref_type == nir_deref_type_var); 2757ec681f3Smrg clc_lower_input_image_deref(&b, context); 2767ec681f3Smrg } 2777ec681f3Smrg } 2787ec681f3Smrg } 2797ec681f3Smrg } 2807ec681f3Smrg } 2817ec681f3Smrg} 2827ec681f3Smrg 2837ec681f3Smrgstatic void 2847ec681f3Smrgclc_lower_64bit_semantics(nir_shader *nir) 2857ec681f3Smrg{ 2867ec681f3Smrg nir_foreach_function(func, nir) { 2877ec681f3Smrg nir_builder b; 2887ec681f3Smrg nir_builder_init(&b, func->impl); 2897ec681f3Smrg 2907ec681f3Smrg nir_foreach_block(block, func->impl) { 2917ec681f3Smrg nir_foreach_instr_safe(instr, block) { 2927ec681f3Smrg if (instr->type == nir_instr_type_intrinsic) { 2937ec681f3Smrg nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); 2947ec681f3Smrg switch (intrinsic->intrinsic) { 2957ec681f3Smrg case nir_intrinsic_load_global_invocation_id: 2967ec681f3Smrg case nir_intrinsic_load_global_invocation_id_zero_base: 2977ec681f3Smrg case nir_intrinsic_load_base_global_invocation_id: 2987ec681f3Smrg case nir_intrinsic_load_local_invocation_id: 2997ec681f3Smrg case nir_intrinsic_load_workgroup_id: 3007ec681f3Smrg case nir_intrinsic_load_workgroup_id_zero_base: 3017ec681f3Smrg case nir_intrinsic_load_base_workgroup_id: 3027ec681f3Smrg case nir_intrinsic_load_num_workgroups: 3037ec681f3Smrg break; 3047ec681f3Smrg default: 3057ec681f3Smrg continue; 3067ec681f3Smrg } 3077ec681f3Smrg 3087ec681f3Smrg if (nir_instr_ssa_def(instr)->bit_size != 64) 3097ec681f3Smrg continue; 3107ec681f3Smrg 3117ec681f3Smrg intrinsic->dest.ssa.bit_size = 32; 3127ec681f3Smrg b.cursor = nir_after_instr(instr); 3137ec681f3Smrg 3147ec681f3Smrg nir_ssa_def *i64 = nir_u2u64(&b, &intrinsic->dest.ssa); 3157ec681f3Smrg nir_ssa_def_rewrite_uses_after( 3167ec681f3Smrg &intrinsic->dest.ssa, 3177ec681f3Smrg i64, 3187ec681f3Smrg i64->parent_instr); 3197ec681f3Smrg } 3207ec681f3Smrg } 3217ec681f3Smrg } 3227ec681f3Smrg } 3237ec681f3Smrg} 3247ec681f3Smrg 3257ec681f3Smrgstatic void 3267ec681f3Smrgclc_lower_nonnormalized_samplers(nir_shader *nir, 3277ec681f3Smrg const dxil_wrap_sampler_state *states) 3287ec681f3Smrg{ 3297ec681f3Smrg nir_foreach_function(func, nir) { 3307ec681f3Smrg if (!func->is_entrypoint) 3317ec681f3Smrg continue; 3327ec681f3Smrg assert(func->impl); 3337ec681f3Smrg 3347ec681f3Smrg nir_builder b; 3357ec681f3Smrg nir_builder_init(&b, func->impl); 3367ec681f3Smrg 3377ec681f3Smrg nir_foreach_block(block, func->impl) { 3387ec681f3Smrg nir_foreach_instr_safe(instr, block) { 3397ec681f3Smrg if (instr->type != nir_instr_type_tex) 3407ec681f3Smrg continue; 3417ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(instr); 3427ec681f3Smrg 3437ec681f3Smrg int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); 3447ec681f3Smrg if (sampler_src_idx == -1) 3457ec681f3Smrg continue; 3467ec681f3Smrg 3477ec681f3Smrg nir_src *sampler_src = &tex->src[sampler_src_idx].src; 3487ec681f3Smrg assert(sampler_src->is_ssa && sampler_src->ssa->parent_instr->type == nir_instr_type_deref); 3497ec681f3Smrg nir_variable *sampler = nir_deref_instr_get_variable( 3507ec681f3Smrg nir_instr_as_deref(sampler_src->ssa->parent_instr)); 3517ec681f3Smrg 3527ec681f3Smrg // If the sampler returns ints, we'll handle this in the int lowering pass 3537ec681f3Smrg if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) 3547ec681f3Smrg continue; 3557ec681f3Smrg 3567ec681f3Smrg // If sampler uses normalized coords, nothing to do 3577ec681f3Smrg if (!states[sampler->data.binding].is_nonnormalized_coords) 3587ec681f3Smrg continue; 3597ec681f3Smrg 3607ec681f3Smrg b.cursor = nir_before_instr(&tex->instr); 3617ec681f3Smrg 3627ec681f3Smrg int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); 3637ec681f3Smrg assert(coords_idx != -1); 3647ec681f3Smrg nir_ssa_def *coords = 3657ec681f3Smrg nir_ssa_for_src(&b, tex->src[coords_idx].src, tex->coord_components); 3667ec681f3Smrg 3677ec681f3Smrg nir_ssa_def *txs = nir_i2f32(&b, nir_get_texture_size(&b, tex)); 3687ec681f3Smrg 3697ec681f3Smrg // Normalize coords for tex 3707ec681f3Smrg nir_ssa_def *scale = nir_frcp(&b, txs); 3717ec681f3Smrg nir_ssa_def *comps[4]; 3727ec681f3Smrg for (unsigned i = 0; i < coords->num_components; ++i) { 3737ec681f3Smrg comps[i] = nir_channel(&b, coords, i); 3747ec681f3Smrg if (tex->is_array && i == coords->num_components - 1) { 3757ec681f3Smrg // Don't scale the array index, but do clamp it 3767ec681f3Smrg comps[i] = nir_fround_even(&b, comps[i]); 3777ec681f3Smrg comps[i] = nir_fmax(&b, comps[i], nir_imm_float(&b, 0.0f)); 3787ec681f3Smrg comps[i] = nir_fmin(&b, comps[i], nir_fsub(&b, nir_channel(&b, txs, i), nir_imm_float(&b, 1.0f))); 3797ec681f3Smrg break; 3807ec681f3Smrg } 3817ec681f3Smrg 3827ec681f3Smrg // The CTS is pretty clear that this value has to be floored for nearest sampling 3837ec681f3Smrg // but must not be for linear sampling. 3847ec681f3Smrg if (!states[sampler->data.binding].is_linear_filtering) 3857ec681f3Smrg comps[i] = nir_fadd_imm(&b, nir_ffloor(&b, comps[i]), 0.5f); 3867ec681f3Smrg comps[i] = nir_fmul(&b, comps[i], nir_channel(&b, scale, i)); 3877ec681f3Smrg } 3887ec681f3Smrg nir_ssa_def *normalized_coords = nir_vec(&b, comps, coords->num_components); 3897ec681f3Smrg nir_instr_rewrite_src(&tex->instr, 3907ec681f3Smrg &tex->src[coords_idx].src, 3917ec681f3Smrg nir_src_for_ssa(normalized_coords)); 3927ec681f3Smrg } 3937ec681f3Smrg } 3947ec681f3Smrg } 3957ec681f3Smrg} 3967ec681f3Smrg 3977ec681f3Smrgstatic nir_variable * 3987ec681f3Smrgadd_kernel_inputs_var(struct clc_dxil_object *dxil, nir_shader *nir, 3997ec681f3Smrg unsigned *cbv_id) 4007ec681f3Smrg{ 4017ec681f3Smrg if (!dxil->kernel->num_args) 4027ec681f3Smrg return NULL; 4037ec681f3Smrg 4047ec681f3Smrg struct clc_dxil_metadata *metadata = &dxil->metadata; 4057ec681f3Smrg unsigned size = 0; 4067ec681f3Smrg 4077ec681f3Smrg nir_foreach_variable_with_modes(var, nir, nir_var_uniform) 4087ec681f3Smrg size = MAX2(size, 4097ec681f3Smrg var->data.driver_location + 4107ec681f3Smrg glsl_get_cl_size(var->type)); 4117ec681f3Smrg 4127ec681f3Smrg size = align(size, 4); 4137ec681f3Smrg 4147ec681f3Smrg const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), size / 4, 4); 4157ec681f3Smrg const struct glsl_struct_field field = { array_type, "arr" }; 4167ec681f3Smrg nir_variable *var = 4177ec681f3Smrg nir_variable_create(nir, nir_var_mem_ubo, 4187ec681f3Smrg glsl_struct_type(&field, 1, "kernel_inputs", false), 4197ec681f3Smrg "kernel_inputs"); 4207ec681f3Smrg var->data.binding = (*cbv_id)++; 4217ec681f3Smrg var->data.how_declared = nir_var_hidden; 4227ec681f3Smrg return var; 4237ec681f3Smrg} 4247ec681f3Smrg 4257ec681f3Smrgstatic nir_variable * 4267ec681f3Smrgadd_work_properties_var(struct clc_dxil_object *dxil, 4277ec681f3Smrg struct nir_shader *nir, unsigned *cbv_id) 4287ec681f3Smrg{ 4297ec681f3Smrg struct clc_dxil_metadata *metadata = &dxil->metadata; 4307ec681f3Smrg const struct glsl_type *array_type = 4317ec681f3Smrg glsl_array_type(glsl_uint_type(), 4327ec681f3Smrg sizeof(struct clc_work_properties_data) / sizeof(unsigned), 4337ec681f3Smrg sizeof(unsigned)); 4347ec681f3Smrg const struct glsl_struct_field field = { array_type, "arr" }; 4357ec681f3Smrg nir_variable *var = 4367ec681f3Smrg nir_variable_create(nir, nir_var_mem_ubo, 4377ec681f3Smrg glsl_struct_type(&field, 1, "kernel_work_properties", false), 4387ec681f3Smrg "kernel_work_properies"); 4397ec681f3Smrg var->data.binding = (*cbv_id)++; 4407ec681f3Smrg var->data.how_declared = nir_var_hidden; 4417ec681f3Smrg return var; 4427ec681f3Smrg} 4437ec681f3Smrg 4447ec681f3Smrgstatic void 4457ec681f3Smrgclc_lower_constant_to_ssbo(nir_shader *nir, 4467ec681f3Smrg const struct clc_kernel_info *kerninfo, unsigned *uav_id) 4477ec681f3Smrg{ 4487ec681f3Smrg /* Update UBO vars and assign them a binding. */ 4497ec681f3Smrg nir_foreach_variable_with_modes(var, nir, nir_var_mem_constant) { 4507ec681f3Smrg var->data.mode = nir_var_mem_ssbo; 4517ec681f3Smrg var->data.binding = (*uav_id)++; 4527ec681f3Smrg } 4537ec681f3Smrg 4547ec681f3Smrg /* And finally patch all the derefs referincing the constant 4557ec681f3Smrg * variables/pointers. 4567ec681f3Smrg */ 4577ec681f3Smrg nir_foreach_function(func, nir) { 4587ec681f3Smrg if (!func->is_entrypoint) 4597ec681f3Smrg continue; 4607ec681f3Smrg 4617ec681f3Smrg assert(func->impl); 4627ec681f3Smrg 4637ec681f3Smrg nir_builder b; 4647ec681f3Smrg nir_builder_init(&b, func->impl); 4657ec681f3Smrg 4667ec681f3Smrg nir_foreach_block(block, func->impl) { 4677ec681f3Smrg nir_foreach_instr(instr, block) { 4687ec681f3Smrg if (instr->type != nir_instr_type_deref) 4697ec681f3Smrg continue; 4707ec681f3Smrg 4717ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 4727ec681f3Smrg 4737ec681f3Smrg if (deref->modes != nir_var_mem_constant) 4747ec681f3Smrg continue; 4757ec681f3Smrg 4767ec681f3Smrg deref->modes = nir_var_mem_ssbo; 4777ec681f3Smrg } 4787ec681f3Smrg } 4797ec681f3Smrg } 4807ec681f3Smrg} 4817ec681f3Smrg 4827ec681f3Smrgstatic void 4837ec681f3Smrgclc_lower_global_to_ssbo(nir_shader *nir) 4847ec681f3Smrg{ 4857ec681f3Smrg nir_foreach_function(func, nir) { 4867ec681f3Smrg if (!func->is_entrypoint) 4877ec681f3Smrg continue; 4887ec681f3Smrg 4897ec681f3Smrg assert(func->impl); 4907ec681f3Smrg 4917ec681f3Smrg nir_foreach_block(block, func->impl) { 4927ec681f3Smrg nir_foreach_instr(instr, block) { 4937ec681f3Smrg if (instr->type != nir_instr_type_deref) 4947ec681f3Smrg continue; 4957ec681f3Smrg 4967ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 4977ec681f3Smrg 4987ec681f3Smrg if (deref->modes != nir_var_mem_global) 4997ec681f3Smrg continue; 5007ec681f3Smrg 5017ec681f3Smrg deref->modes = nir_var_mem_ssbo; 5027ec681f3Smrg } 5037ec681f3Smrg } 5047ec681f3Smrg } 5057ec681f3Smrg} 5067ec681f3Smrg 5077ec681f3Smrgstatic void 5087ec681f3Smrgcopy_const_initializer(const nir_constant *constant, const struct glsl_type *type, 5097ec681f3Smrg uint8_t *data) 5107ec681f3Smrg{ 5117ec681f3Smrg unsigned size = glsl_get_cl_size(type); 5127ec681f3Smrg 5137ec681f3Smrg if (glsl_type_is_array(type)) { 5147ec681f3Smrg const struct glsl_type *elm_type = glsl_get_array_element(type); 5157ec681f3Smrg unsigned step_size = glsl_get_explicit_stride(type); 5167ec681f3Smrg 5177ec681f3Smrg for (unsigned i = 0; i < constant->num_elements; i++) { 5187ec681f3Smrg copy_const_initializer(constant->elements[i], elm_type, 5197ec681f3Smrg data + (i * step_size)); 5207ec681f3Smrg } 5217ec681f3Smrg } else if (glsl_type_is_struct(type)) { 5227ec681f3Smrg for (unsigned i = 0; i < constant->num_elements; i++) { 5237ec681f3Smrg const struct glsl_type *elm_type = glsl_get_struct_field(type, i); 5247ec681f3Smrg int offset = glsl_get_struct_field_offset(type, i); 5257ec681f3Smrg copy_const_initializer(constant->elements[i], elm_type, data + offset); 5267ec681f3Smrg } 5277ec681f3Smrg } else { 5287ec681f3Smrg assert(glsl_type_is_vector_or_scalar(type)); 5297ec681f3Smrg 5307ec681f3Smrg for (unsigned i = 0; i < glsl_get_components(type); i++) { 5317ec681f3Smrg switch (glsl_get_bit_size(type)) { 5327ec681f3Smrg case 64: 5337ec681f3Smrg *((uint64_t *)data) = constant->values[i].u64; 5347ec681f3Smrg break; 5357ec681f3Smrg case 32: 5367ec681f3Smrg *((uint32_t *)data) = constant->values[i].u32; 5377ec681f3Smrg break; 5387ec681f3Smrg case 16: 5397ec681f3Smrg *((uint16_t *)data) = constant->values[i].u16; 5407ec681f3Smrg break; 5417ec681f3Smrg case 8: 5427ec681f3Smrg *((uint8_t *)data) = constant->values[i].u8; 5437ec681f3Smrg break; 5447ec681f3Smrg default: 5457ec681f3Smrg unreachable("Invalid base type"); 5467ec681f3Smrg } 5477ec681f3Smrg 5487ec681f3Smrg data += glsl_get_bit_size(type) / 8; 5497ec681f3Smrg } 5507ec681f3Smrg } 5517ec681f3Smrg} 5527ec681f3Smrg 5537ec681f3Smrgstatic const struct glsl_type * 5547ec681f3Smrgget_cast_type(unsigned bit_size) 5557ec681f3Smrg{ 5567ec681f3Smrg switch (bit_size) { 5577ec681f3Smrg case 64: 5587ec681f3Smrg return glsl_int64_t_type(); 5597ec681f3Smrg case 32: 5607ec681f3Smrg return glsl_int_type(); 5617ec681f3Smrg case 16: 5627ec681f3Smrg return glsl_int16_t_type(); 5637ec681f3Smrg case 8: 5647ec681f3Smrg return glsl_int8_t_type(); 5657ec681f3Smrg } 5667ec681f3Smrg unreachable("Invalid bit_size"); 5677ec681f3Smrg} 5687ec681f3Smrg 5697ec681f3Smrgstatic void 5707ec681f3Smrgsplit_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment) 5717ec681f3Smrg{ 5727ec681f3Smrg enum gl_access_qualifier access = nir_intrinsic_access(intrin); 5737ec681f3Smrg nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8]; 5747ec681f3Smrg unsigned comp_size = intrin->dest.ssa.bit_size / 8; 5757ec681f3Smrg unsigned num_comps = intrin->dest.ssa.num_components; 5767ec681f3Smrg 5777ec681f3Smrg b->cursor = nir_before_instr(&intrin->instr); 5787ec681f3Smrg 5797ec681f3Smrg nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]); 5807ec681f3Smrg 5817ec681f3Smrg const struct glsl_type *cast_type = get_cast_type(alignment * 8); 5827ec681f3Smrg nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment); 5837ec681f3Smrg 5847ec681f3Smrg unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment); 5857ec681f3Smrg for (unsigned i = 0; i < num_loads; ++i) { 5867ec681f3Smrg nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size)); 5877ec681f3Smrg srcs[i] = nir_load_deref_with_access(b, elem, access); 5887ec681f3Smrg } 5897ec681f3Smrg 5907ec681f3Smrg nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size); 5917ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest); 5927ec681f3Smrg nir_instr_remove(&intrin->instr); 5937ec681f3Smrg} 5947ec681f3Smrg 5957ec681f3Smrgstatic void 5967ec681f3Smrgsplit_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment) 5977ec681f3Smrg{ 5987ec681f3Smrg enum gl_access_qualifier access = nir_intrinsic_access(intrin); 5997ec681f3Smrg 6007ec681f3Smrg assert(intrin->src[1].is_ssa); 6017ec681f3Smrg nir_ssa_def *value = intrin->src[1].ssa; 6027ec681f3Smrg unsigned comp_size = value->bit_size / 8; 6037ec681f3Smrg unsigned num_comps = value->num_components; 6047ec681f3Smrg 6057ec681f3Smrg b->cursor = nir_before_instr(&intrin->instr); 6067ec681f3Smrg 6077ec681f3Smrg nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]); 6087ec681f3Smrg 6097ec681f3Smrg const struct glsl_type *cast_type = get_cast_type(alignment * 8); 6107ec681f3Smrg nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment); 6117ec681f3Smrg 6127ec681f3Smrg unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment); 6137ec681f3Smrg for (unsigned i = 0; i < num_stores; ++i) { 6147ec681f3Smrg nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8); 6157ec681f3Smrg nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size)); 6167ec681f3Smrg nir_store_deref_with_access(b, elem, substore_val, ~0, access); 6177ec681f3Smrg } 6187ec681f3Smrg 6197ec681f3Smrg nir_instr_remove(&intrin->instr); 6207ec681f3Smrg} 6217ec681f3Smrg 6227ec681f3Smrgstatic bool 6237ec681f3Smrgsplit_unaligned_loads_stores(nir_shader *shader) 6247ec681f3Smrg{ 6257ec681f3Smrg bool progress = false; 6267ec681f3Smrg 6277ec681f3Smrg nir_foreach_function(function, shader) { 6287ec681f3Smrg if (!function->impl) 6297ec681f3Smrg continue; 6307ec681f3Smrg 6317ec681f3Smrg nir_builder b; 6327ec681f3Smrg nir_builder_init(&b, function->impl); 6337ec681f3Smrg 6347ec681f3Smrg nir_foreach_block(block, function->impl) { 6357ec681f3Smrg nir_foreach_instr_safe(instr, block) { 6367ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 6377ec681f3Smrg continue; 6387ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 6397ec681f3Smrg if (intrin->intrinsic != nir_intrinsic_load_deref && 6407ec681f3Smrg intrin->intrinsic != nir_intrinsic_store_deref) 6417ec681f3Smrg continue; 6427ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 6437ec681f3Smrg 6447ec681f3Smrg unsigned align_mul = 0, align_offset = 0; 6457ec681f3Smrg nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset); 6467ec681f3Smrg 6477ec681f3Smrg unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 6487ec681f3Smrg 6497ec681f3Smrg /* We can load anything at 4-byte alignment, except for 6507ec681f3Smrg * UBOs (AKA CBs where the granularity is 16 bytes). 6517ec681f3Smrg */ 6527ec681f3Smrg if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4)) 6537ec681f3Smrg continue; 6547ec681f3Smrg 6557ec681f3Smrg nir_ssa_def *val; 6567ec681f3Smrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 6577ec681f3Smrg assert(intrin->dest.is_ssa); 6587ec681f3Smrg val = &intrin->dest.ssa; 6597ec681f3Smrg } else { 6607ec681f3Smrg assert(intrin->src[1].is_ssa); 6617ec681f3Smrg val = intrin->src[1].ssa; 6627ec681f3Smrg } 6637ec681f3Smrg 6647ec681f3Smrg unsigned natural_alignment = 6657ec681f3Smrg val->bit_size / 8 * 6667ec681f3Smrg (val->num_components == 3 ? 4 : val->num_components); 6677ec681f3Smrg 6687ec681f3Smrg if (alignment >= natural_alignment) 6697ec681f3Smrg continue; 6707ec681f3Smrg 6717ec681f3Smrg if (intrin->intrinsic == nir_intrinsic_load_deref) 6727ec681f3Smrg split_unaligned_load(&b, intrin, alignment); 6737ec681f3Smrg else 6747ec681f3Smrg split_unaligned_store(&b, intrin, alignment); 6757ec681f3Smrg progress = true; 6767ec681f3Smrg } 6777ec681f3Smrg } 6787ec681f3Smrg } 6797ec681f3Smrg 6807ec681f3Smrg return progress; 6817ec681f3Smrg} 6827ec681f3Smrg 6837ec681f3Smrgstatic enum pipe_tex_wrap 6847ec681f3Smrgwrap_from_cl_addressing(unsigned addressing_mode) 6857ec681f3Smrg{ 6867ec681f3Smrg switch (addressing_mode) 6877ec681f3Smrg { 6887ec681f3Smrg default: 6897ec681f3Smrg case SAMPLER_ADDRESSING_MODE_NONE: 6907ec681f3Smrg case SAMPLER_ADDRESSING_MODE_CLAMP: 6917ec681f3Smrg // Since OpenCL's only border color is 0's and D3D specs out-of-bounds loads to return 0, don't apply any wrap mode 6927ec681f3Smrg return (enum pipe_tex_wrap)-1; 6937ec681f3Smrg case SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: return PIPE_TEX_WRAP_CLAMP_TO_EDGE; 6947ec681f3Smrg case SAMPLER_ADDRESSING_MODE_REPEAT: return PIPE_TEX_WRAP_REPEAT; 6957ec681f3Smrg case SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED: return PIPE_TEX_WRAP_MIRROR_REPEAT; 6967ec681f3Smrg } 6977ec681f3Smrg} 6987ec681f3Smrg 6997ec681f3Smrgstatic bool shader_has_double(nir_shader *nir) 7007ec681f3Smrg{ 7017ec681f3Smrg bool progress = false; 7027ec681f3Smrg 7037ec681f3Smrg foreach_list_typed(nir_function, func, node, &nir->functions) { 7047ec681f3Smrg if (!func->is_entrypoint) 7057ec681f3Smrg continue; 7067ec681f3Smrg 7077ec681f3Smrg assert(func->impl); 7087ec681f3Smrg 7097ec681f3Smrg nir_foreach_block(block, func->impl) { 7107ec681f3Smrg nir_foreach_instr_safe(instr, block) { 7117ec681f3Smrg if (instr->type != nir_instr_type_alu) 7127ec681f3Smrg continue; 7137ec681f3Smrg 7147ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 7157ec681f3Smrg const nir_op_info *info = &nir_op_infos[alu->op]; 7167ec681f3Smrg 7177ec681f3Smrg if (info->output_type & nir_type_float && 7187ec681f3Smrg nir_dest_bit_size(alu->dest.dest) == 64) 7197ec681f3Smrg return true; 7207ec681f3Smrg } 7217ec681f3Smrg } 7227ec681f3Smrg } 7237ec681f3Smrg 7247ec681f3Smrg return false; 7257ec681f3Smrg} 7267ec681f3Smrg 7277ec681f3Smrgstatic bool 7287ec681f3Smrgscale_fdiv(nir_shader *nir) 7297ec681f3Smrg{ 7307ec681f3Smrg bool progress = false; 7317ec681f3Smrg nir_foreach_function(func, nir) { 7327ec681f3Smrg if (!func->impl) 7337ec681f3Smrg continue; 7347ec681f3Smrg nir_builder b; 7357ec681f3Smrg nir_builder_init(&b, func->impl); 7367ec681f3Smrg nir_foreach_block(block, func->impl) { 7377ec681f3Smrg nir_foreach_instr(instr, block) { 7387ec681f3Smrg if (instr->type != nir_instr_type_alu) 7397ec681f3Smrg continue; 7407ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 7417ec681f3Smrg if (alu->op != nir_op_fdiv || alu->src[0].src.ssa->bit_size != 32) 7427ec681f3Smrg continue; 7437ec681f3Smrg 7447ec681f3Smrg b.cursor = nir_before_instr(instr); 7457ec681f3Smrg nir_ssa_def *fabs = nir_fabs(&b, alu->src[1].src.ssa); 7467ec681f3Smrg nir_ssa_def *big = nir_flt(&b, nir_imm_int(&b, 0x7e800000), fabs); 7477ec681f3Smrg nir_ssa_def *small = nir_flt(&b, fabs, nir_imm_int(&b, 0x00800000)); 7487ec681f3Smrg 7497ec681f3Smrg nir_ssa_def *scaled_down_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 0.25); 7507ec681f3Smrg nir_ssa_def *scaled_down_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 0.25); 7517ec681f3Smrg nir_ssa_def *scaled_up_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 16777216.0); 7527ec681f3Smrg nir_ssa_def *scaled_up_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 16777216.0); 7537ec681f3Smrg 7547ec681f3Smrg nir_ssa_def *final_a = 7557ec681f3Smrg nir_bcsel(&b, big, scaled_down_a, 7567ec681f3Smrg (nir_bcsel(&b, small, scaled_up_a, alu->src[0].src.ssa))); 7577ec681f3Smrg nir_ssa_def *final_b = 7587ec681f3Smrg nir_bcsel(&b, big, scaled_down_b, 7597ec681f3Smrg (nir_bcsel(&b, small, scaled_up_b, alu->src[1].src.ssa))); 7607ec681f3Smrg 7617ec681f3Smrg nir_instr_rewrite_src(instr, &alu->src[0].src, nir_src_for_ssa(final_a)); 7627ec681f3Smrg nir_instr_rewrite_src(instr, &alu->src[1].src, nir_src_for_ssa(final_b)); 7637ec681f3Smrg progress = true; 7647ec681f3Smrg } 7657ec681f3Smrg } 7667ec681f3Smrg } 7677ec681f3Smrg return progress; 7687ec681f3Smrg} 7697ec681f3Smrg 7707ec681f3Smrgstruct clc_libclc * 7717ec681f3Smrgclc_libclc_new_dxil(const struct clc_logger *logger, 7727ec681f3Smrg const struct clc_libclc_dxil_options *options) 7737ec681f3Smrg{ 7747ec681f3Smrg struct clc_libclc_options clc_options = { 7757ec681f3Smrg .optimize = options->optimize, 7767ec681f3Smrg .nir_options = dxil_get_nir_compiler_options(), 7777ec681f3Smrg }; 7787ec681f3Smrg 7797ec681f3Smrg return clc_libclc_new(logger, &clc_options); 7807ec681f3Smrg} 7817ec681f3Smrg 7827ec681f3Smrgbool 7837ec681f3Smrgclc_spirv_to_dxil(struct clc_libclc *lib, 7847ec681f3Smrg const struct clc_binary *linked_spirv, 7857ec681f3Smrg const struct clc_parsed_spirv *parsed_data, 7867ec681f3Smrg const char *entrypoint, 7877ec681f3Smrg const struct clc_runtime_kernel_conf *conf, 7887ec681f3Smrg const struct clc_spirv_specialization_consts *consts, 7897ec681f3Smrg const struct clc_logger *logger, 7907ec681f3Smrg struct clc_dxil_object *out_dxil) 7917ec681f3Smrg{ 7927ec681f3Smrg struct nir_shader *nir; 7937ec681f3Smrg 7947ec681f3Smrg for (unsigned i = 0; i < parsed_data->num_kernels; i++) { 7957ec681f3Smrg if (!strcmp(parsed_data->kernels[i].name, entrypoint)) { 7967ec681f3Smrg out_dxil->kernel = &parsed_data->kernels[i]; 7977ec681f3Smrg break; 7987ec681f3Smrg } 7997ec681f3Smrg } 8007ec681f3Smrg 8017ec681f3Smrg if (!out_dxil->kernel) { 8027ec681f3Smrg clc_error(logger, "no '%s' kernel found", entrypoint); 8037ec681f3Smrg return false; 8047ec681f3Smrg } 8057ec681f3Smrg 8067ec681f3Smrg const struct spirv_to_nir_options spirv_options = { 8077ec681f3Smrg .environment = NIR_SPIRV_OPENCL, 8087ec681f3Smrg .clc_shader = clc_libclc_get_clc_shader(lib), 8097ec681f3Smrg .constant_addr_format = nir_address_format_32bit_index_offset_pack64, 8107ec681f3Smrg .global_addr_format = nir_address_format_32bit_index_offset_pack64, 8117ec681f3Smrg .shared_addr_format = nir_address_format_32bit_offset_as_64bit, 8127ec681f3Smrg .temp_addr_format = nir_address_format_32bit_offset_as_64bit, 8137ec681f3Smrg .float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32, 8147ec681f3Smrg .caps = { 8157ec681f3Smrg .address = true, 8167ec681f3Smrg .float64 = true, 8177ec681f3Smrg .int8 = true, 8187ec681f3Smrg .int16 = true, 8197ec681f3Smrg .int64 = true, 8207ec681f3Smrg .kernel = true, 8217ec681f3Smrg .kernel_image = true, 8227ec681f3Smrg .kernel_image_read_write = true, 8237ec681f3Smrg .literal_sampler = true, 8247ec681f3Smrg .printf = true, 8257ec681f3Smrg }, 8267ec681f3Smrg }; 8277ec681f3Smrg nir_shader_compiler_options nir_options = 8287ec681f3Smrg *dxil_get_nir_compiler_options(); 8297ec681f3Smrg 8307ec681f3Smrg if (conf && conf->lower_bit_size & 64) { 8317ec681f3Smrg nir_options.lower_pack_64_2x32_split = false; 8327ec681f3Smrg nir_options.lower_unpack_64_2x32_split = false; 8337ec681f3Smrg nir_options.lower_int64_options = ~0; 8347ec681f3Smrg } 8357ec681f3Smrg 8367ec681f3Smrg if (conf && conf->lower_bit_size & 16) 8377ec681f3Smrg nir_options.support_16bit_alu = true; 8387ec681f3Smrg 8397ec681f3Smrg glsl_type_singleton_init_or_ref(); 8407ec681f3Smrg 8417ec681f3Smrg nir = spirv_to_nir(linked_spirv->data, linked_spirv->size / 4, 8427ec681f3Smrg consts ? (struct nir_spirv_specialization *)consts->specializations : NULL, 8437ec681f3Smrg consts ? consts->num_specializations : 0, 8447ec681f3Smrg MESA_SHADER_KERNEL, entrypoint, 8457ec681f3Smrg &spirv_options, 8467ec681f3Smrg &nir_options); 8477ec681f3Smrg if (!nir) { 8487ec681f3Smrg clc_error(logger, "spirv_to_nir() failed"); 8497ec681f3Smrg goto err_free_dxil; 8507ec681f3Smrg } 8517ec681f3Smrg nir->info.workgroup_size_variable = true; 8527ec681f3Smrg 8537ec681f3Smrg NIR_PASS_V(nir, nir_lower_goto_ifs); 8547ec681f3Smrg NIR_PASS_V(nir, nir_opt_dead_cf); 8557ec681f3Smrg 8567ec681f3Smrg struct clc_dxil_metadata *metadata = &out_dxil->metadata; 8577ec681f3Smrg 8587ec681f3Smrg metadata->args = calloc(out_dxil->kernel->num_args, 8597ec681f3Smrg sizeof(*metadata->args)); 8607ec681f3Smrg if (!metadata->args) { 8617ec681f3Smrg clc_error(logger, "failed to allocate arg positions"); 8627ec681f3Smrg goto err_free_dxil; 8637ec681f3Smrg } 8647ec681f3Smrg 8657ec681f3Smrg { 8667ec681f3Smrg bool progress; 8677ec681f3Smrg do 8687ec681f3Smrg { 8697ec681f3Smrg progress = false; 8707ec681f3Smrg NIR_PASS(progress, nir, nir_copy_prop); 8717ec681f3Smrg NIR_PASS(progress, nir, nir_opt_copy_prop_vars); 8727ec681f3Smrg NIR_PASS(progress, nir, nir_opt_deref); 8737ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 8747ec681f3Smrg NIR_PASS(progress, nir, nir_opt_undef); 8757ec681f3Smrg NIR_PASS(progress, nir, nir_opt_constant_folding); 8767ec681f3Smrg NIR_PASS(progress, nir, nir_opt_cse); 8777ec681f3Smrg NIR_PASS(progress, nir, nir_lower_vars_to_ssa); 8787ec681f3Smrg NIR_PASS(progress, nir, nir_opt_algebraic); 8797ec681f3Smrg } while (progress); 8807ec681f3Smrg } 8817ec681f3Smrg 8827ec681f3Smrg // Inline all functions first. 8837ec681f3Smrg // according to the comment on nir_inline_functions 8847ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); 8857ec681f3Smrg NIR_PASS_V(nir, nir_lower_returns); 8867ec681f3Smrg NIR_PASS_V(nir, nir_lower_libclc, clc_libclc_get_clc_shader(lib)); 8877ec681f3Smrg NIR_PASS_V(nir, nir_inline_functions); 8887ec681f3Smrg 8897ec681f3Smrg // Pick off the single entrypoint that we want. 8907ec681f3Smrg foreach_list_typed_safe(nir_function, func, node, &nir->functions) { 8917ec681f3Smrg if (!func->is_entrypoint) 8927ec681f3Smrg exec_node_remove(&func->node); 8937ec681f3Smrg } 8947ec681f3Smrg assert(exec_list_length(&nir->functions) == 1); 8957ec681f3Smrg 8967ec681f3Smrg { 8977ec681f3Smrg bool progress; 8987ec681f3Smrg do 8997ec681f3Smrg { 9007ec681f3Smrg progress = false; 9017ec681f3Smrg NIR_PASS(progress, nir, nir_copy_prop); 9027ec681f3Smrg NIR_PASS(progress, nir, nir_opt_copy_prop_vars); 9037ec681f3Smrg NIR_PASS(progress, nir, nir_opt_deref); 9047ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 9057ec681f3Smrg NIR_PASS(progress, nir, nir_opt_undef); 9067ec681f3Smrg NIR_PASS(progress, nir, nir_opt_constant_folding); 9077ec681f3Smrg NIR_PASS(progress, nir, nir_opt_cse); 9087ec681f3Smrg NIR_PASS(progress, nir, nir_split_var_copies); 9097ec681f3Smrg NIR_PASS(progress, nir, nir_lower_var_copies); 9107ec681f3Smrg NIR_PASS(progress, nir, nir_lower_vars_to_ssa); 9117ec681f3Smrg NIR_PASS(progress, nir, nir_opt_algebraic); 9127ec681f3Smrg NIR_PASS(progress, nir, nir_opt_if, true); 9137ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dead_cf); 9147ec681f3Smrg NIR_PASS(progress, nir, nir_opt_remove_phis); 9157ec681f3Smrg NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); 9167ec681f3Smrg NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform); 9177ec681f3Smrg } while (progress); 9187ec681f3Smrg } 9197ec681f3Smrg 9207ec681f3Smrg NIR_PASS_V(nir, scale_fdiv); 9217ec681f3Smrg 9227ec681f3Smrg dxil_wrap_sampler_state int_sampler_states[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { {{0}} }; 9237ec681f3Smrg unsigned sampler_id = 0; 9247ec681f3Smrg 9257ec681f3Smrg struct exec_list inline_samplers_list; 9267ec681f3Smrg exec_list_make_empty(&inline_samplers_list); 9277ec681f3Smrg 9287ec681f3Smrg // Move inline samplers to the end of the uniforms list 9297ec681f3Smrg nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) { 9307ec681f3Smrg if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) { 9317ec681f3Smrg exec_node_remove(&var->node); 9327ec681f3Smrg exec_list_push_tail(&inline_samplers_list, &var->node); 9337ec681f3Smrg } 9347ec681f3Smrg } 9357ec681f3Smrg exec_node_insert_list_after(exec_list_get_tail(&nir->variables), &inline_samplers_list); 9367ec681f3Smrg 9377ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_function_temp | nir_var_shader_temp)); 9387ec681f3Smrg 9397ec681f3Smrg // Lower memcpy 9407ec681f3Smrg NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref); 9417ec681f3Smrg 9427ec681f3Smrg // Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't 9437ec681f3Smrg // necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later 9447ec681f3Smrg assert(nir->scratch_size == 0); 9457ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align); 9467ec681f3Smrg 9477ec681f3Smrg nir_lower_printf_options printf_options = { 9487ec681f3Smrg .treat_doubles_as_floats = true, 9497ec681f3Smrg .max_buffer_size = 1024 * 1024 9507ec681f3Smrg }; 9517ec681f3Smrg NIR_PASS_V(nir, nir_lower_printf, &printf_options); 9527ec681f3Smrg 9537ec681f3Smrg metadata->printf.info_count = nir->printf_info_count; 9547ec681f3Smrg metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info)); 9557ec681f3Smrg for (unsigned i = 0; i < nir->printf_info_count; i++) { 9567ec681f3Smrg metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size); 9577ec681f3Smrg memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size); 9587ec681f3Smrg metadata->printf.infos[i].num_args = nir->printf_info[i].num_args; 9597ec681f3Smrg metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned)); 9607ec681f3Smrg memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned)); 9617ec681f3Smrg } 9627ec681f3Smrg 9637ec681f3Smrg // copy propagate to prepare for lower_explicit_io 9647ec681f3Smrg NIR_PASS_V(nir, nir_split_var_copies); 9657ec681f3Smrg NIR_PASS_V(nir, nir_opt_copy_prop_vars); 9667ec681f3Smrg NIR_PASS_V(nir, nir_lower_var_copies); 9677ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_ssa); 9687ec681f3Smrg NIR_PASS_V(nir, nir_lower_alu); 9697ec681f3Smrg NIR_PASS_V(nir, nir_opt_dce); 9707ec681f3Smrg NIR_PASS_V(nir, nir_opt_deref); 9717ec681f3Smrg 9727ec681f3Smrg // For uniforms (kernel inputs), run this before adjusting variable list via image/sampler lowering 9737ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align); 9747ec681f3Smrg 9757ec681f3Smrg // Calculate input offsets/metadata. 9767ec681f3Smrg unsigned uav_id = 0; 9777ec681f3Smrg nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { 9787ec681f3Smrg int i = var->data.location; 9797ec681f3Smrg if (i < 0) 9807ec681f3Smrg continue; 9817ec681f3Smrg 9827ec681f3Smrg unsigned size = glsl_get_cl_size(var->type); 9837ec681f3Smrg 9847ec681f3Smrg metadata->args[i].offset = var->data.driver_location; 9857ec681f3Smrg metadata->args[i].size = size; 9867ec681f3Smrg metadata->kernel_inputs_buf_size = MAX2(metadata->kernel_inputs_buf_size, 9877ec681f3Smrg var->data.driver_location + size); 9887ec681f3Smrg if ((out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL || 9897ec681f3Smrg out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) && 9907ec681f3Smrg // Ignore images during this pass - global memory buffers need to have contiguous bindings 9917ec681f3Smrg !glsl_type_is_image(var->type)) { 9927ec681f3Smrg metadata->args[i].globconstptr.buf_id = uav_id++; 9937ec681f3Smrg } else if (glsl_type_is_sampler(var->type)) { 9947ec681f3Smrg unsigned address_mode = conf ? conf->args[i].sampler.addressing_mode : 0u; 9957ec681f3Smrg int_sampler_states[sampler_id].wrap[0] = 9967ec681f3Smrg int_sampler_states[sampler_id].wrap[1] = 9977ec681f3Smrg int_sampler_states[sampler_id].wrap[2] = wrap_from_cl_addressing(address_mode); 9987ec681f3Smrg int_sampler_states[sampler_id].is_nonnormalized_coords = 9997ec681f3Smrg conf ? !conf->args[i].sampler.normalized_coords : 0; 10007ec681f3Smrg int_sampler_states[sampler_id].is_linear_filtering = 10017ec681f3Smrg conf ? conf->args[i].sampler.linear_filtering : 0; 10027ec681f3Smrg metadata->args[i].sampler.sampler_id = var->data.binding = sampler_id++; 10037ec681f3Smrg } 10047ec681f3Smrg } 10057ec681f3Smrg 10067ec681f3Smrg unsigned num_global_inputs = uav_id; 10077ec681f3Smrg 10087ec681f3Smrg // Second pass over inputs to calculate image bindings 10097ec681f3Smrg unsigned srv_id = 0; 10107ec681f3Smrg nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { 10117ec681f3Smrg int i = var->data.location; 10127ec681f3Smrg if (i < 0) 10137ec681f3Smrg continue; 10147ec681f3Smrg 10157ec681f3Smrg if (glsl_type_is_image(var->type)) { 10167ec681f3Smrg if (var->data.access == ACCESS_NON_WRITEABLE) { 10177ec681f3Smrg metadata->args[i].image.buf_ids[0] = srv_id++; 10187ec681f3Smrg } else { 10197ec681f3Smrg // Write or read-write are UAVs 10207ec681f3Smrg metadata->args[i].image.buf_ids[0] = uav_id++; 10217ec681f3Smrg } 10227ec681f3Smrg 10237ec681f3Smrg metadata->args[i].image.num_buf_ids = 1; 10247ec681f3Smrg var->data.binding = metadata->args[i].image.buf_ids[0]; 10257ec681f3Smrg } 10267ec681f3Smrg } 10277ec681f3Smrg 10287ec681f3Smrg // Before removing dead uniforms, dedupe constant samplers to make more dead uniforms 10297ec681f3Smrg NIR_PASS_V(nir, clc_nir_dedupe_const_samplers); 10307ec681f3Smrg NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | nir_var_mem_constant | nir_var_function_temp, NULL); 10317ec681f3Smrg 10327ec681f3Smrg // Fill out inline sampler metadata, now that they've been deduped and dead ones removed 10337ec681f3Smrg nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { 10347ec681f3Smrg if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) { 10357ec681f3Smrg int_sampler_states[sampler_id].wrap[0] = 10367ec681f3Smrg int_sampler_states[sampler_id].wrap[1] = 10377ec681f3Smrg int_sampler_states[sampler_id].wrap[2] = 10387ec681f3Smrg wrap_from_cl_addressing(var->data.sampler.addressing_mode); 10397ec681f3Smrg int_sampler_states[sampler_id].is_nonnormalized_coords = 10407ec681f3Smrg !var->data.sampler.normalized_coordinates; 10417ec681f3Smrg int_sampler_states[sampler_id].is_linear_filtering = 10427ec681f3Smrg var->data.sampler.filter_mode == SAMPLER_FILTER_MODE_LINEAR; 10437ec681f3Smrg var->data.binding = sampler_id++; 10447ec681f3Smrg 10457ec681f3Smrg assert(metadata->num_const_samplers < CLC_MAX_SAMPLERS); 10467ec681f3Smrg metadata->const_samplers[metadata->num_const_samplers].sampler_id = var->data.binding; 10477ec681f3Smrg metadata->const_samplers[metadata->num_const_samplers].addressing_mode = var->data.sampler.addressing_mode; 10487ec681f3Smrg metadata->const_samplers[metadata->num_const_samplers].normalized_coords = var->data.sampler.normalized_coordinates; 10497ec681f3Smrg metadata->const_samplers[metadata->num_const_samplers].filter_mode = var->data.sampler.filter_mode; 10507ec681f3Smrg metadata->num_const_samplers++; 10517ec681f3Smrg } 10527ec681f3Smrg } 10537ec681f3Smrg 10547ec681f3Smrg // Needs to come before lower_explicit_io 10557ec681f3Smrg NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false); 10567ec681f3Smrg struct clc_image_lower_context image_lower_context = { metadata, &srv_id, &uav_id }; 10577ec681f3Smrg NIR_PASS_V(nir, clc_lower_images, &image_lower_context); 10587ec681f3Smrg NIR_PASS_V(nir, clc_lower_nonnormalized_samplers, int_sampler_states); 10597ec681f3Smrg NIR_PASS_V(nir, nir_lower_samplers); 10607ec681f3Smrg NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex, 10617ec681f3Smrg int_sampler_states, NULL, 14.0f); 10627ec681f3Smrg 10637ec681f3Smrg NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL); 10647ec681f3Smrg 10657ec681f3Smrg nir->scratch_size = 0; 10667ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, 10677ec681f3Smrg nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant, 10687ec681f3Smrg glsl_get_cl_type_size_align); 10697ec681f3Smrg 10707ec681f3Smrg NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp); 10717ec681f3Smrg NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id); 10727ec681f3Smrg NIR_PASS_V(nir, clc_lower_global_to_ssbo); 10737ec681f3Smrg 10747ec681f3Smrg bool has_printf = false; 10757ec681f3Smrg NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id); 10767ec681f3Smrg metadata->printf.uav_id = has_printf ? uav_id++ : -1; 10777ec681f3Smrg 10787ec681f3Smrg NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo); 10797ec681f3Smrg 10807ec681f3Smrg NIR_PASS_V(nir, split_unaligned_loads_stores); 10817ec681f3Smrg 10827ec681f3Smrg assert(nir->info.cs.ptr_size == 64); 10837ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, 10847ec681f3Smrg nir_address_format_32bit_index_offset_pack64); 10857ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, 10867ec681f3Smrg nir_var_mem_shared | nir_var_function_temp | nir_var_uniform, 10877ec681f3Smrg nir_address_format_32bit_offset_as_64bit); 10887ec681f3Smrg 10897ec681f3Smrg NIR_PASS_V(nir, nir_lower_system_values); 10907ec681f3Smrg 10917ec681f3Smrg nir_lower_compute_system_values_options compute_options = { 10927ec681f3Smrg .has_base_global_invocation_id = (conf && conf->support_global_work_id_offsets), 10937ec681f3Smrg .has_base_workgroup_id = (conf && conf->support_workgroup_id_offsets), 10947ec681f3Smrg }; 10957ec681f3Smrg NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_options); 10967ec681f3Smrg 10977ec681f3Smrg NIR_PASS_V(nir, clc_lower_64bit_semantics); 10987ec681f3Smrg 10997ec681f3Smrg NIR_PASS_V(nir, nir_opt_deref); 11007ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_ssa); 11017ec681f3Smrg 11027ec681f3Smrg unsigned cbv_id = 0; 11037ec681f3Smrg 11047ec681f3Smrg nir_variable *inputs_var = 11057ec681f3Smrg add_kernel_inputs_var(out_dxil, nir, &cbv_id); 11067ec681f3Smrg nir_variable *work_properties_var = 11077ec681f3Smrg add_work_properties_var(out_dxil, nir, &cbv_id); 11087ec681f3Smrg 11097ec681f3Smrg memcpy(metadata->local_size, nir->info.workgroup_size, 11107ec681f3Smrg sizeof(metadata->local_size)); 11117ec681f3Smrg memcpy(metadata->local_size_hint, nir->info.cs.workgroup_size_hint, 11127ec681f3Smrg sizeof(metadata->local_size)); 11137ec681f3Smrg 11147ec681f3Smrg // Patch the localsize before calling clc_nir_lower_system_values(). 11157ec681f3Smrg if (conf) { 11167ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) { 11177ec681f3Smrg if (!conf->local_size[i] || 11187ec681f3Smrg conf->local_size[i] == nir->info.workgroup_size[i]) 11197ec681f3Smrg continue; 11207ec681f3Smrg 11217ec681f3Smrg if (nir->info.workgroup_size[i] && 11227ec681f3Smrg nir->info.workgroup_size[i] != conf->local_size[i]) { 11237ec681f3Smrg debug_printf("D3D12: runtime local size does not match reqd_work_group_size() values\n"); 11247ec681f3Smrg goto err_free_dxil; 11257ec681f3Smrg } 11267ec681f3Smrg 11277ec681f3Smrg nir->info.workgroup_size[i] = conf->local_size[i]; 11287ec681f3Smrg } 11297ec681f3Smrg memcpy(metadata->local_size, nir->info.workgroup_size, 11307ec681f3Smrg sizeof(metadata->local_size)); 11317ec681f3Smrg } else { 11327ec681f3Smrg /* Make sure there's at least one thread that's set to run */ 11337ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) { 11347ec681f3Smrg if (nir->info.workgroup_size[i] == 0) 11357ec681f3Smrg nir->info.workgroup_size[i] = 1; 11367ec681f3Smrg } 11377ec681f3Smrg } 11387ec681f3Smrg 11397ec681f3Smrg NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var); 11407ec681f3Smrg NIR_PASS_V(nir, split_unaligned_loads_stores); 11417ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, 11427ec681f3Smrg nir_address_format_32bit_index_offset); 11437ec681f3Smrg NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var); 11447ec681f3Smrg NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil); 11457ec681f3Smrg NIR_PASS_V(nir, dxil_nir_opt_alu_deref_srcs); 11467ec681f3Smrg NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil); 11477ec681f3Smrg NIR_PASS_V(nir, nir_lower_fp16_casts); 11487ec681f3Smrg NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL); 11497ec681f3Smrg 11507ec681f3Smrg // Convert pack to pack_split 11517ec681f3Smrg NIR_PASS_V(nir, nir_lower_pack); 11527ec681f3Smrg // Lower pack_split to bit math 11537ec681f3Smrg NIR_PASS_V(nir, nir_opt_algebraic); 11547ec681f3Smrg 11557ec681f3Smrg NIR_PASS_V(nir, nir_opt_dce); 11567ec681f3Smrg 11577ec681f3Smrg nir_validate_shader(nir, "Validate before feeding NIR to the DXIL compiler"); 11587ec681f3Smrg struct nir_to_dxil_options opts = { 11597ec681f3Smrg .interpolate_at_vertex = false, 11607ec681f3Smrg .lower_int16 = (conf && (conf->lower_bit_size & 16) != 0), 11617ec681f3Smrg .ubo_binding_offset = 0, 11627ec681f3Smrg .disable_math_refactoring = true, 11637ec681f3Smrg .num_kernel_globals = num_global_inputs, 11647ec681f3Smrg }; 11657ec681f3Smrg 11667ec681f3Smrg for (unsigned i = 0; i < out_dxil->kernel->num_args; i++) { 11677ec681f3Smrg if (out_dxil->kernel->args[i].address_qualifier != CLC_KERNEL_ARG_ADDRESS_LOCAL) 11687ec681f3Smrg continue; 11697ec681f3Smrg 11707ec681f3Smrg /* If we don't have the runtime conf yet, we just create a dummy variable. 11717ec681f3Smrg * This will be adjusted when clc_spirv_to_dxil() is called with a conf 11727ec681f3Smrg * argument. 11737ec681f3Smrg */ 11747ec681f3Smrg unsigned size = 4; 11757ec681f3Smrg if (conf && conf->args) 11767ec681f3Smrg size = conf->args[i].localptr.size; 11777ec681f3Smrg 11787ec681f3Smrg /* The alignment required for the pointee type is not easy to get from 11797ec681f3Smrg * here, so let's base our logic on the size itself. Anything bigger than 11807ec681f3Smrg * the maximum alignment constraint (which is 128 bytes, since ulong16 or 11817ec681f3Smrg * doubl16 size are the biggest base types) should be aligned on this 11827ec681f3Smrg * maximum alignment constraint. For smaller types, we use the size 11837ec681f3Smrg * itself to calculate the alignment. 11847ec681f3Smrg */ 11857ec681f3Smrg unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128; 11867ec681f3Smrg 11877ec681f3Smrg nir->info.shared_size = align(nir->info.shared_size, alignment); 11887ec681f3Smrg metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size; 11897ec681f3Smrg nir->info.shared_size += size; 11907ec681f3Smrg } 11917ec681f3Smrg 11927ec681f3Smrg metadata->local_mem_size = nir->info.shared_size; 11937ec681f3Smrg metadata->priv_mem_size = nir->scratch_size; 11947ec681f3Smrg 11957ec681f3Smrg /* DXIL double math is too limited compared to what NIR expects. Let's refuse 11967ec681f3Smrg * to compile a shader when it contains double operations until we have 11977ec681f3Smrg * double lowering hooked up. 11987ec681f3Smrg */ 11997ec681f3Smrg if (shader_has_double(nir)) { 12007ec681f3Smrg clc_error(logger, "NIR shader contains doubles, which we don't support yet"); 12017ec681f3Smrg goto err_free_dxil; 12027ec681f3Smrg } 12037ec681f3Smrg 12047ec681f3Smrg struct blob tmp; 12057ec681f3Smrg if (!nir_to_dxil(nir, &opts, &tmp)) { 12067ec681f3Smrg debug_printf("D3D12: nir_to_dxil failed\n"); 12077ec681f3Smrg goto err_free_dxil; 12087ec681f3Smrg } 12097ec681f3Smrg 12107ec681f3Smrg nir_foreach_variable_with_modes(var, nir, nir_var_mem_ssbo) { 12117ec681f3Smrg if (var->constant_initializer) { 12127ec681f3Smrg if (glsl_type_is_array(var->type)) { 12137ec681f3Smrg int size = align(glsl_get_cl_size(var->type), 4); 12147ec681f3Smrg uint8_t *data = malloc(size); 12157ec681f3Smrg if (!data) 12167ec681f3Smrg goto err_free_dxil; 12177ec681f3Smrg 12187ec681f3Smrg copy_const_initializer(var->constant_initializer, var->type, data); 12197ec681f3Smrg metadata->consts[metadata->num_consts].data = data; 12207ec681f3Smrg metadata->consts[metadata->num_consts].size = size; 12217ec681f3Smrg metadata->consts[metadata->num_consts].uav_id = var->data.binding; 12227ec681f3Smrg metadata->num_consts++; 12237ec681f3Smrg } else 12247ec681f3Smrg unreachable("unexpected constant initializer"); 12257ec681f3Smrg } 12267ec681f3Smrg } 12277ec681f3Smrg 12287ec681f3Smrg metadata->kernel_inputs_cbv_id = inputs_var ? inputs_var->data.binding : 0; 12297ec681f3Smrg metadata->work_properties_cbv_id = work_properties_var->data.binding; 12307ec681f3Smrg metadata->num_uavs = uav_id; 12317ec681f3Smrg metadata->num_srvs = srv_id; 12327ec681f3Smrg metadata->num_samplers = sampler_id; 12337ec681f3Smrg 12347ec681f3Smrg ralloc_free(nir); 12357ec681f3Smrg glsl_type_singleton_decref(); 12367ec681f3Smrg 12377ec681f3Smrg blob_finish_get_buffer(&tmp, &out_dxil->binary.data, 12387ec681f3Smrg &out_dxil->binary.size); 12397ec681f3Smrg return true; 12407ec681f3Smrg 12417ec681f3Smrgerr_free_dxil: 12427ec681f3Smrg clc_free_dxil_object(out_dxil); 12437ec681f3Smrg return false; 12447ec681f3Smrg} 12457ec681f3Smrg 12467ec681f3Smrgvoid clc_free_dxil_object(struct clc_dxil_object *dxil) 12477ec681f3Smrg{ 12487ec681f3Smrg for (unsigned i = 0; i < dxil->metadata.num_consts; i++) 12497ec681f3Smrg free(dxil->metadata.consts[i].data); 12507ec681f3Smrg 12517ec681f3Smrg for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) { 12527ec681f3Smrg free(dxil->metadata.printf.infos[i].arg_sizes); 12537ec681f3Smrg free(dxil->metadata.printf.infos[i].str); 12547ec681f3Smrg } 12557ec681f3Smrg free(dxil->metadata.printf.infos); 12567ec681f3Smrg 12577ec681f3Smrg free(dxil->binary.data); 12587ec681f3Smrg} 12597ec681f3Smrg 12607ec681f3Smrguint64_t clc_compiler_get_version() 12617ec681f3Smrg{ 12627ec681f3Smrg const char sha1[] = MESA_GIT_SHA1; 12637ec681f3Smrg const char* dash = strchr(sha1, '-'); 12647ec681f3Smrg if (dash) { 12657ec681f3Smrg return strtoull(dash + 1, NULL, 16); 12667ec681f3Smrg } 12677ec681f3Smrg return 0; 12687ec681f3Smrg} 1269