17ec681f3Smrg// 27ec681f3Smrg// Copyright 2019 Karol Herbst 37ec681f3Smrg// 47ec681f3Smrg// Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg// copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg// to deal in the Software without restriction, including without limitation 77ec681f3Smrg// the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg// and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg// Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg// 117ec681f3Smrg// The above copyright notice and this permission notice shall be included in 127ec681f3Smrg// all copies or substantial portions of the Software. 137ec681f3Smrg// 147ec681f3Smrg// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 157ec681f3Smrg// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 167ec681f3Smrg// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 177ec681f3Smrg// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 187ec681f3Smrg// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 197ec681f3Smrg// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 207ec681f3Smrg// OTHER DEALINGS IN THE SOFTWARE. 217ec681f3Smrg// 227ec681f3Smrg 237ec681f3Smrg#include "invocation.hpp" 247ec681f3Smrg 257ec681f3Smrg#include <tuple> 267ec681f3Smrg 277ec681f3Smrg#include "core/device.hpp" 287ec681f3Smrg#include "core/error.hpp" 297ec681f3Smrg#include "core/binary.hpp" 307ec681f3Smrg#include "pipe/p_state.h" 317ec681f3Smrg#include "util/algorithm.hpp" 327ec681f3Smrg#include "util/functional.hpp" 337ec681f3Smrg 347ec681f3Smrg#include <compiler/glsl_types.h> 357ec681f3Smrg#include <compiler/nir/nir_builder.h> 367ec681f3Smrg#include <compiler/nir/nir_serialize.h> 377ec681f3Smrg#include <compiler/spirv/nir_spirv.h> 387ec681f3Smrg#include <util/u_math.h> 397ec681f3Smrg 407ec681f3Smrgusing namespace clover; 417ec681f3Smrg 427ec681f3Smrg#ifdef HAVE_CLOVER_SPIRV 437ec681f3Smrg 447ec681f3Smrg// Refs and unrefs the glsl_type_singleton. 457ec681f3Smrgstatic class glsl_type_ref { 467ec681f3Smrgpublic: 477ec681f3Smrg glsl_type_ref() { 487ec681f3Smrg glsl_type_singleton_init_or_ref(); 497ec681f3Smrg } 507ec681f3Smrg 517ec681f3Smrg ~glsl_type_ref() { 527ec681f3Smrg glsl_type_singleton_decref(); 537ec681f3Smrg } 547ec681f3Smrg} glsl_type_ref; 557ec681f3Smrg 567ec681f3Smrgstatic const nir_shader_compiler_options * 577ec681f3Smrgdev_get_nir_compiler_options(const device &dev) 587ec681f3Smrg{ 597ec681f3Smrg const void *co = dev.get_compiler_options(PIPE_SHADER_IR_NIR); 607ec681f3Smrg return static_cast<const nir_shader_compiler_options*>(co); 617ec681f3Smrg} 627ec681f3Smrg 637ec681f3Smrgstatic void debug_function(void *private_data, 647ec681f3Smrg enum nir_spirv_debug_level level, size_t spirv_offset, 657ec681f3Smrg const char *message) 667ec681f3Smrg{ 677ec681f3Smrg assert(private_data); 687ec681f3Smrg auto r_log = reinterpret_cast<std::string *>(private_data); 697ec681f3Smrg *r_log += message; 707ec681f3Smrg} 717ec681f3Smrg 727ec681f3Smrgstatic void 737ec681f3Smrgclover_arg_size_align(const glsl_type *type, unsigned *size, unsigned *align) 747ec681f3Smrg{ 757ec681f3Smrg if (type == glsl_type::sampler_type) { 767ec681f3Smrg *size = 0; 777ec681f3Smrg *align = 1; 787ec681f3Smrg } else if (type->is_image()) { 797ec681f3Smrg *size = *align = sizeof(cl_mem); 807ec681f3Smrg } else { 817ec681f3Smrg *size = type->cl_size(); 827ec681f3Smrg *align = type->cl_alignment(); 837ec681f3Smrg } 847ec681f3Smrg} 857ec681f3Smrg 867ec681f3Smrgstatic bool 877ec681f3Smrgclover_nir_lower_images(nir_shader *shader) 887ec681f3Smrg{ 897ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(shader); 907ec681f3Smrg 917ec681f3Smrg ASSERTED int last_loc = -1; 927ec681f3Smrg int num_rd_images = 0, num_wr_images = 0, num_samplers = 0; 937ec681f3Smrg nir_foreach_uniform_variable(var, shader) { 947ec681f3Smrg if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) { 957ec681f3Smrg /* Assume they come in order */ 967ec681f3Smrg assert(var->data.location > last_loc); 977ec681f3Smrg last_loc = var->data.location; 987ec681f3Smrg } 997ec681f3Smrg 1007ec681f3Smrg /* TODO: Constant samplers */ 1017ec681f3Smrg if (var->type == glsl_bare_sampler_type()) { 1027ec681f3Smrg var->data.driver_location = num_samplers++; 1037ec681f3Smrg } else if (glsl_type_is_image(var->type)) { 1047ec681f3Smrg if (var->data.access & ACCESS_NON_WRITEABLE) 1057ec681f3Smrg var->data.driver_location = num_rd_images++; 1067ec681f3Smrg else 1077ec681f3Smrg var->data.driver_location = num_wr_images++; 1087ec681f3Smrg } else { 1097ec681f3Smrg /* CL shouldn't have any sampled images */ 1107ec681f3Smrg assert(!glsl_type_is_sampler(var->type)); 1117ec681f3Smrg } 1127ec681f3Smrg } 1137ec681f3Smrg shader->info.num_textures = num_rd_images; 1147ec681f3Smrg BITSET_ZERO(shader->info.textures_used); 1157ec681f3Smrg if (num_rd_images) 1167ec681f3Smrg BITSET_SET_RANGE_INSIDE_WORD(shader->info.textures_used, 0, num_rd_images - 1); 1177ec681f3Smrg shader->info.num_images = num_wr_images; 1187ec681f3Smrg 1197ec681f3Smrg nir_builder b; 1207ec681f3Smrg nir_builder_init(&b, impl); 1217ec681f3Smrg 1227ec681f3Smrg bool progress = false; 1237ec681f3Smrg nir_foreach_block_reverse(block, impl) { 1247ec681f3Smrg nir_foreach_instr_reverse_safe(instr, block) { 1257ec681f3Smrg switch (instr->type) { 1267ec681f3Smrg case nir_instr_type_deref: { 1277ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 1287ec681f3Smrg if (deref->deref_type != nir_deref_type_var) 1297ec681f3Smrg break; 1307ec681f3Smrg 1317ec681f3Smrg if (!glsl_type_is_image(deref->type) && 1327ec681f3Smrg !glsl_type_is_sampler(deref->type)) 1337ec681f3Smrg break; 1347ec681f3Smrg 1357ec681f3Smrg b.cursor = nir_instr_remove(&deref->instr); 1367ec681f3Smrg nir_ssa_def *loc = 1377ec681f3Smrg nir_imm_intN_t(&b, deref->var->data.driver_location, 1387ec681f3Smrg deref->dest.ssa.bit_size); 1397ec681f3Smrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, loc); 1407ec681f3Smrg progress = true; 1417ec681f3Smrg break; 1427ec681f3Smrg } 1437ec681f3Smrg 1447ec681f3Smrg case nir_instr_type_tex: { 1457ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(instr); 1467ec681f3Smrg unsigned count = 0; 1477ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 1487ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_texture_deref || 1497ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_deref) { 1507ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(tex->src[i].src); 1517ec681f3Smrg if (deref->deref_type == nir_deref_type_var) { 1527ec681f3Smrg /* In this case, we know the actual variable */ 1537ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_texture_deref) 1547ec681f3Smrg tex->texture_index = deref->var->data.driver_location; 1557ec681f3Smrg else 1567ec681f3Smrg tex->sampler_index = deref->var->data.driver_location; 1577ec681f3Smrg /* This source gets discarded */ 1587ec681f3Smrg nir_instr_rewrite_src(&tex->instr, &tex->src[i].src, 1597ec681f3Smrg NIR_SRC_INIT); 1607ec681f3Smrg continue; 1617ec681f3Smrg } else { 1627ec681f3Smrg assert(tex->src[i].src.is_ssa); 1637ec681f3Smrg b.cursor = nir_before_instr(&tex->instr); 1647ec681f3Smrg /* Back-ends expect a 32-bit thing, not 64-bit */ 1657ec681f3Smrg nir_ssa_def *offset = nir_u2u32(&b, tex->src[i].src.ssa); 1667ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_texture_deref) 1677ec681f3Smrg tex->src[count].src_type = nir_tex_src_texture_offset; 1687ec681f3Smrg else 1697ec681f3Smrg tex->src[count].src_type = nir_tex_src_sampler_offset; 1707ec681f3Smrg nir_instr_rewrite_src(&tex->instr, &tex->src[count].src, 1717ec681f3Smrg nir_src_for_ssa(offset)); 1727ec681f3Smrg } 1737ec681f3Smrg } else { 1747ec681f3Smrg /* If we've removed a source, move this one down */ 1757ec681f3Smrg if (count != i) { 1767ec681f3Smrg assert(count < i); 1777ec681f3Smrg tex->src[count].src_type = tex->src[i].src_type; 1787ec681f3Smrg nir_instr_move_src(&tex->instr, &tex->src[count].src, 1797ec681f3Smrg &tex->src[i].src); 1807ec681f3Smrg } 1817ec681f3Smrg } 1827ec681f3Smrg count++; 1837ec681f3Smrg } 1847ec681f3Smrg tex->num_srcs = count; 1857ec681f3Smrg progress = true; 1867ec681f3Smrg break; 1877ec681f3Smrg } 1887ec681f3Smrg 1897ec681f3Smrg case nir_instr_type_intrinsic: { 1907ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1917ec681f3Smrg switch (intrin->intrinsic) { 1927ec681f3Smrg case nir_intrinsic_image_deref_load: 1937ec681f3Smrg case nir_intrinsic_image_deref_store: 1947ec681f3Smrg case nir_intrinsic_image_deref_atomic_add: 1957ec681f3Smrg case nir_intrinsic_image_deref_atomic_imin: 1967ec681f3Smrg case nir_intrinsic_image_deref_atomic_umin: 1977ec681f3Smrg case nir_intrinsic_image_deref_atomic_imax: 1987ec681f3Smrg case nir_intrinsic_image_deref_atomic_umax: 1997ec681f3Smrg case nir_intrinsic_image_deref_atomic_and: 2007ec681f3Smrg case nir_intrinsic_image_deref_atomic_or: 2017ec681f3Smrg case nir_intrinsic_image_deref_atomic_xor: 2027ec681f3Smrg case nir_intrinsic_image_deref_atomic_exchange: 2037ec681f3Smrg case nir_intrinsic_image_deref_atomic_comp_swap: 2047ec681f3Smrg case nir_intrinsic_image_deref_atomic_fadd: 2057ec681f3Smrg case nir_intrinsic_image_deref_atomic_inc_wrap: 2067ec681f3Smrg case nir_intrinsic_image_deref_atomic_dec_wrap: 2077ec681f3Smrg case nir_intrinsic_image_deref_size: 2087ec681f3Smrg case nir_intrinsic_image_deref_samples: { 2097ec681f3Smrg assert(intrin->src[0].is_ssa); 2107ec681f3Smrg b.cursor = nir_before_instr(&intrin->instr); 2117ec681f3Smrg /* Back-ends expect a 32-bit thing, not 64-bit */ 2127ec681f3Smrg nir_ssa_def *offset = nir_u2u32(&b, intrin->src[0].ssa); 2137ec681f3Smrg nir_rewrite_image_intrinsic(intrin, offset, false); 2147ec681f3Smrg progress = true; 2157ec681f3Smrg break; 2167ec681f3Smrg } 2177ec681f3Smrg 2187ec681f3Smrg default: 2197ec681f3Smrg break; 2207ec681f3Smrg } 2217ec681f3Smrg break; 2227ec681f3Smrg } 2237ec681f3Smrg 2247ec681f3Smrg default: 2257ec681f3Smrg break; 2267ec681f3Smrg } 2277ec681f3Smrg } 2287ec681f3Smrg } 2297ec681f3Smrg 2307ec681f3Smrg if (progress) { 2317ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_block_index | 2327ec681f3Smrg nir_metadata_dominance); 2337ec681f3Smrg } else { 2347ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 2357ec681f3Smrg } 2367ec681f3Smrg 2377ec681f3Smrg return progress; 2387ec681f3Smrg} 2397ec681f3Smrg 2407ec681f3Smrgstruct clover_lower_nir_state { 2417ec681f3Smrg std::vector<binary::argument> &args; 2427ec681f3Smrg uint32_t global_dims; 2437ec681f3Smrg nir_variable *constant_var; 2447ec681f3Smrg nir_variable *printf_buffer; 2457ec681f3Smrg nir_variable *offset_vars[3]; 2467ec681f3Smrg}; 2477ec681f3Smrg 2487ec681f3Smrgstatic bool 2497ec681f3Smrgclover_lower_nir_filter(const nir_instr *instr, const void *) 2507ec681f3Smrg{ 2517ec681f3Smrg return instr->type == nir_instr_type_intrinsic; 2527ec681f3Smrg} 2537ec681f3Smrg 2547ec681f3Smrgstatic nir_ssa_def * 2557ec681f3Smrgclover_lower_nir_instr(nir_builder *b, nir_instr *instr, void *_state) 2567ec681f3Smrg{ 2577ec681f3Smrg clover_lower_nir_state *state = reinterpret_cast<clover_lower_nir_state*>(_state); 2587ec681f3Smrg nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); 2597ec681f3Smrg 2607ec681f3Smrg switch (intrinsic->intrinsic) { 2617ec681f3Smrg case nir_intrinsic_load_printf_buffer_address: { 2627ec681f3Smrg if (!state->printf_buffer) { 2637ec681f3Smrg unsigned location = state->args.size(); 2647ec681f3Smrg state->args.emplace_back(binary::argument::global, sizeof(size_t), 2657ec681f3Smrg 8, 8, binary::argument::zero_ext, 2667ec681f3Smrg binary::argument::printf_buffer); 2677ec681f3Smrg 2687ec681f3Smrg const glsl_type *type = glsl_uint64_t_type(); 2697ec681f3Smrg state->printf_buffer = nir_variable_create(b->shader, nir_var_uniform, 2707ec681f3Smrg type, "global_printf_buffer"); 2717ec681f3Smrg state->printf_buffer->data.location = location; 2727ec681f3Smrg } 2737ec681f3Smrg return nir_load_var(b, state->printf_buffer); 2747ec681f3Smrg } 2757ec681f3Smrg case nir_intrinsic_load_base_global_invocation_id: { 2767ec681f3Smrg nir_ssa_def *loads[3]; 2777ec681f3Smrg 2787ec681f3Smrg /* create variables if we didn't do so alrady */ 2797ec681f3Smrg if (!state->offset_vars[0]) { 2807ec681f3Smrg /* TODO: fix for 64 bit */ 2817ec681f3Smrg /* Even though we only place one scalar argument, clover will bind up to 2827ec681f3Smrg * three 32 bit values 2837ec681f3Smrg */ 2847ec681f3Smrg unsigned location = state->args.size(); 2857ec681f3Smrg state->args.emplace_back(binary::argument::scalar, 4, 4, 4, 2867ec681f3Smrg binary::argument::zero_ext, 2877ec681f3Smrg binary::argument::grid_offset); 2887ec681f3Smrg 2897ec681f3Smrg const glsl_type *type = glsl_uint_type(); 2907ec681f3Smrg for (uint32_t i = 0; i < 3; i++) { 2917ec681f3Smrg state->offset_vars[i] = 2927ec681f3Smrg nir_variable_create(b->shader, nir_var_uniform, type, 2937ec681f3Smrg "global_invocation_id_offsets"); 2947ec681f3Smrg state->offset_vars[i]->data.location = location + i; 2957ec681f3Smrg } 2967ec681f3Smrg } 2977ec681f3Smrg 2987ec681f3Smrg for (int i = 0; i < 3; i++) { 2997ec681f3Smrg nir_variable *var = state->offset_vars[i]; 3007ec681f3Smrg loads[i] = var ? nir_load_var(b, var) : nir_imm_int(b, 0); 3017ec681f3Smrg } 3027ec681f3Smrg 3037ec681f3Smrg return nir_u2u(b, nir_vec(b, loads, state->global_dims), 3047ec681f3Smrg nir_dest_bit_size(intrinsic->dest)); 3057ec681f3Smrg } 3067ec681f3Smrg case nir_intrinsic_load_constant_base_ptr: { 3077ec681f3Smrg return nir_load_var(b, state->constant_var); 3087ec681f3Smrg } 3097ec681f3Smrg 3107ec681f3Smrg default: 3117ec681f3Smrg return NULL; 3127ec681f3Smrg } 3137ec681f3Smrg} 3147ec681f3Smrg 3157ec681f3Smrgstatic bool 3167ec681f3Smrgclover_lower_nir(nir_shader *nir, std::vector<binary::argument> &args, 3177ec681f3Smrg uint32_t dims, uint32_t pointer_bit_size) 3187ec681f3Smrg{ 3197ec681f3Smrg nir_variable *constant_var = NULL; 3207ec681f3Smrg if (nir->constant_data_size) { 3217ec681f3Smrg const glsl_type *type = pointer_bit_size == 64 ? glsl_uint64_t_type() : glsl_uint_type(); 3227ec681f3Smrg 3237ec681f3Smrg constant_var = nir_variable_create(nir, nir_var_uniform, type, 3247ec681f3Smrg "constant_buffer_addr"); 3257ec681f3Smrg constant_var->data.location = args.size(); 3267ec681f3Smrg 3277ec681f3Smrg args.emplace_back(binary::argument::global, sizeof(cl_mem), 3287ec681f3Smrg pointer_bit_size / 8, pointer_bit_size / 8, 3297ec681f3Smrg binary::argument::zero_ext, 3307ec681f3Smrg binary::argument::constant_buffer); 3317ec681f3Smrg } 3327ec681f3Smrg 3337ec681f3Smrg clover_lower_nir_state state = { args, dims, constant_var }; 3347ec681f3Smrg return nir_shader_lower_instructions(nir, 3357ec681f3Smrg clover_lower_nir_filter, clover_lower_nir_instr, &state); 3367ec681f3Smrg} 3377ec681f3Smrg 3387ec681f3Smrgstatic spirv_to_nir_options 3397ec681f3Smrgcreate_spirv_options(const device &dev, std::string &r_log) 3407ec681f3Smrg{ 3417ec681f3Smrg struct spirv_to_nir_options spirv_options = {}; 3427ec681f3Smrg spirv_options.environment = NIR_SPIRV_OPENCL; 3437ec681f3Smrg if (dev.address_bits() == 32u) { 3447ec681f3Smrg spirv_options.shared_addr_format = nir_address_format_32bit_offset; 3457ec681f3Smrg spirv_options.global_addr_format = nir_address_format_32bit_global; 3467ec681f3Smrg spirv_options.temp_addr_format = nir_address_format_32bit_offset; 3477ec681f3Smrg spirv_options.constant_addr_format = nir_address_format_32bit_global; 3487ec681f3Smrg } else { 3497ec681f3Smrg spirv_options.shared_addr_format = nir_address_format_32bit_offset_as_64bit; 3507ec681f3Smrg spirv_options.global_addr_format = nir_address_format_64bit_global; 3517ec681f3Smrg spirv_options.temp_addr_format = nir_address_format_32bit_offset_as_64bit; 3527ec681f3Smrg spirv_options.constant_addr_format = nir_address_format_64bit_global; 3537ec681f3Smrg } 3547ec681f3Smrg spirv_options.caps.address = true; 3557ec681f3Smrg spirv_options.caps.float64 = true; 3567ec681f3Smrg spirv_options.caps.int8 = true; 3577ec681f3Smrg spirv_options.caps.int16 = true; 3587ec681f3Smrg spirv_options.caps.int64 = true; 3597ec681f3Smrg spirv_options.caps.kernel = true; 3607ec681f3Smrg spirv_options.caps.kernel_image = dev.image_support(); 3617ec681f3Smrg spirv_options.caps.int64_atomics = dev.has_int64_atomics(); 3627ec681f3Smrg spirv_options.debug.func = &debug_function; 3637ec681f3Smrg spirv_options.debug.private_data = &r_log; 3647ec681f3Smrg spirv_options.caps.printf = true; 3657ec681f3Smrg return spirv_options; 3667ec681f3Smrg} 3677ec681f3Smrg 3687ec681f3Smrgstruct disk_cache *clover::nir::create_clc_disk_cache(void) 3697ec681f3Smrg{ 3707ec681f3Smrg struct mesa_sha1 ctx; 3717ec681f3Smrg unsigned char sha1[20]; 3727ec681f3Smrg char cache_id[20 * 2 + 1]; 3737ec681f3Smrg _mesa_sha1_init(&ctx); 3747ec681f3Smrg 3757ec681f3Smrg if (!disk_cache_get_function_identifier((void *)clover::nir::create_clc_disk_cache, &ctx)) 3767ec681f3Smrg return NULL; 3777ec681f3Smrg 3787ec681f3Smrg _mesa_sha1_final(&ctx, sha1); 3797ec681f3Smrg 3807ec681f3Smrg disk_cache_format_hex_id(cache_id, sha1, 20 * 2); 3817ec681f3Smrg return disk_cache_create("clover-clc", cache_id, 0); 3827ec681f3Smrg} 3837ec681f3Smrg 3847ec681f3Smrgvoid clover::nir::check_for_libclc(const device &dev) 3857ec681f3Smrg{ 3867ec681f3Smrg if (!nir_can_find_libclc(dev.address_bits())) 3877ec681f3Smrg throw error(CL_COMPILER_NOT_AVAILABLE); 3887ec681f3Smrg} 3897ec681f3Smrg 3907ec681f3Smrgnir_shader *clover::nir::load_libclc_nir(const device &dev, std::string &r_log) 3917ec681f3Smrg{ 3927ec681f3Smrg spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log); 3937ec681f3Smrg auto *compiler_options = dev_get_nir_compiler_options(dev); 3947ec681f3Smrg 3957ec681f3Smrg return nir_load_libclc_shader(dev.address_bits(), dev.clc_cache, 3967ec681f3Smrg &spirv_options, compiler_options); 3977ec681f3Smrg} 3987ec681f3Smrg 3997ec681f3Smrgstatic bool 4007ec681f3Smrgcan_remove_var(nir_variable *var, void *data) 4017ec681f3Smrg{ 4027ec681f3Smrg return !(var->type->is_sampler() || var->type->is_image()); 4037ec681f3Smrg} 4047ec681f3Smrg 4057ec681f3Smrgbinary clover::nir::spirv_to_nir(const binary &mod, const device &dev, 4067ec681f3Smrg std::string &r_log) 4077ec681f3Smrg{ 4087ec681f3Smrg spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log); 4097ec681f3Smrg std::shared_ptr<nir_shader> nir = dev.clc_nir; 4107ec681f3Smrg spirv_options.clc_shader = nir.get(); 4117ec681f3Smrg 4127ec681f3Smrg binary b; 4137ec681f3Smrg // We only insert one section. 4147ec681f3Smrg assert(mod.secs.size() == 1); 4157ec681f3Smrg auto §ion = mod.secs[0]; 4167ec681f3Smrg 4177ec681f3Smrg binary::resource_id section_id = 0; 4187ec681f3Smrg for (const auto &sym : mod.syms) { 4197ec681f3Smrg assert(sym.section == 0); 4207ec681f3Smrg 4217ec681f3Smrg const auto *binary = 4227ec681f3Smrg reinterpret_cast<const pipe_binary_program_header *>(section.data.data()); 4237ec681f3Smrg const uint32_t *data = reinterpret_cast<const uint32_t *>(binary->blob); 4247ec681f3Smrg const size_t num_words = binary->num_bytes / 4; 4257ec681f3Smrg const char *name = sym.name.c_str(); 4267ec681f3Smrg auto *compiler_options = dev_get_nir_compiler_options(dev); 4277ec681f3Smrg 4287ec681f3Smrg nir_shader *nir = spirv_to_nir(data, num_words, nullptr, 0, 4297ec681f3Smrg MESA_SHADER_KERNEL, name, 4307ec681f3Smrg &spirv_options, compiler_options); 4317ec681f3Smrg if (!nir) { 4327ec681f3Smrg r_log += "Translation from SPIR-V to NIR for kernel \"" + sym.name + 4337ec681f3Smrg "\" failed.\n"; 4347ec681f3Smrg throw build_error(); 4357ec681f3Smrg } 4367ec681f3Smrg 4377ec681f3Smrg nir->info.workgroup_size_variable = sym.reqd_work_group_size[0] == 0; 4387ec681f3Smrg nir->info.workgroup_size[0] = sym.reqd_work_group_size[0]; 4397ec681f3Smrg nir->info.workgroup_size[1] = sym.reqd_work_group_size[1]; 4407ec681f3Smrg nir->info.workgroup_size[2] = sym.reqd_work_group_size[2]; 4417ec681f3Smrg nir_validate_shader(nir, "clover"); 4427ec681f3Smrg 4437ec681f3Smrg // Inline all functions first. 4447ec681f3Smrg // according to the comment on nir_inline_functions 4457ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); 4467ec681f3Smrg NIR_PASS_V(nir, nir_lower_returns); 4477ec681f3Smrg NIR_PASS_V(nir, nir_lower_libclc, spirv_options.clc_shader); 4487ec681f3Smrg 4497ec681f3Smrg NIR_PASS_V(nir, nir_inline_functions); 4507ec681f3Smrg NIR_PASS_V(nir, nir_copy_prop); 4517ec681f3Smrg NIR_PASS_V(nir, nir_opt_deref); 4527ec681f3Smrg 4537ec681f3Smrg // Pick off the single entrypoint that we want. 4547ec681f3Smrg foreach_list_typed_safe(nir_function, func, node, &nir->functions) { 4557ec681f3Smrg if (!func->is_entrypoint) 4567ec681f3Smrg exec_node_remove(&func->node); 4577ec681f3Smrg } 4587ec681f3Smrg assert(exec_list_length(&nir->functions) == 1); 4597ec681f3Smrg 4607ec681f3Smrg nir_validate_shader(nir, "clover after function inlining"); 4617ec681f3Smrg 4627ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp); 4637ec681f3Smrg 4647ec681f3Smrg struct nir_lower_printf_options printf_options; 4657ec681f3Smrg printf_options.treat_doubles_as_floats = false; 4667ec681f3Smrg printf_options.max_buffer_size = dev.max_printf_buffer_size(); 4677ec681f3Smrg 4687ec681f3Smrg NIR_PASS_V(nir, nir_lower_printf, &printf_options); 4697ec681f3Smrg 4707ec681f3Smrg NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); 4717ec681f3Smrg 4727ec681f3Smrg // copy propagate to prepare for lower_explicit_io 4737ec681f3Smrg NIR_PASS_V(nir, nir_split_var_copies); 4747ec681f3Smrg NIR_PASS_V(nir, nir_opt_copy_prop_vars); 4757ec681f3Smrg NIR_PASS_V(nir, nir_lower_var_copies); 4767ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_ssa); 4777ec681f3Smrg NIR_PASS_V(nir, nir_opt_dce); 4787ec681f3Smrg NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL); 4797ec681f3Smrg 4807ec681f3Smrg NIR_PASS_V(nir, nir_lower_system_values); 4817ec681f3Smrg nir_lower_compute_system_values_options sysval_options = { 0 }; 4827ec681f3Smrg sysval_options.has_base_global_invocation_id = true; 4837ec681f3Smrg NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options); 4847ec681f3Smrg 4857ec681f3Smrg // constant fold before lowering mem constants 4867ec681f3Smrg NIR_PASS_V(nir, nir_opt_constant_folding); 4877ec681f3Smrg 4887ec681f3Smrg NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_constant, NULL); 4897ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_constant, 4907ec681f3Smrg glsl_get_cl_type_size_align); 4917ec681f3Smrg if (nir->constant_data_size > 0) { 4927ec681f3Smrg assert(nir->constant_data == NULL); 4937ec681f3Smrg nir->constant_data = rzalloc_size(nir, nir->constant_data_size); 4947ec681f3Smrg nir_gather_explicit_io_initializers(nir, nir->constant_data, 4957ec681f3Smrg nir->constant_data_size, 4967ec681f3Smrg nir_var_mem_constant); 4977ec681f3Smrg } 4987ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant, 4997ec681f3Smrg spirv_options.constant_addr_format); 5007ec681f3Smrg 5017ec681f3Smrg auto args = sym.args; 5027ec681f3Smrg NIR_PASS_V(nir, clover_lower_nir, args, dev.max_block_size().size(), 5037ec681f3Smrg dev.address_bits()); 5047ec681f3Smrg 5057ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, 5067ec681f3Smrg nir_var_uniform, clover_arg_size_align); 5077ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, 5087ec681f3Smrg nir_var_mem_shared | nir_var_mem_global | 5097ec681f3Smrg nir_var_function_temp, 5107ec681f3Smrg glsl_get_cl_type_size_align); 5117ec681f3Smrg 5127ec681f3Smrg NIR_PASS_V(nir, nir_opt_deref); 5137ec681f3Smrg NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false); 5147ec681f3Smrg NIR_PASS_V(nir, clover_nir_lower_images); 5157ec681f3Smrg NIR_PASS_V(nir, nir_lower_memcpy); 5167ec681f3Smrg 5177ec681f3Smrg /* use offsets for kernel inputs (uniform) */ 5187ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform, 5197ec681f3Smrg nir->info.cs.ptr_size == 64 ? 5207ec681f3Smrg nir_address_format_32bit_offset_as_64bit : 5217ec681f3Smrg nir_address_format_32bit_offset); 5227ec681f3Smrg 5237ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant, 5247ec681f3Smrg spirv_options.constant_addr_format); 5257ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, 5267ec681f3Smrg spirv_options.shared_addr_format); 5277ec681f3Smrg 5287ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, 5297ec681f3Smrg spirv_options.temp_addr_format); 5307ec681f3Smrg 5317ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global, 5327ec681f3Smrg spirv_options.global_addr_format); 5337ec681f3Smrg 5347ec681f3Smrg struct nir_remove_dead_variables_options remove_dead_variables_options = { 5357ec681f3Smrg .can_remove_var = can_remove_var, 5367ec681f3Smrg }; 5377ec681f3Smrg NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, &remove_dead_variables_options); 5387ec681f3Smrg 5397ec681f3Smrg if (compiler_options->lower_int64_options) 5407ec681f3Smrg NIR_PASS_V(nir, nir_lower_int64); 5417ec681f3Smrg 5427ec681f3Smrg NIR_PASS_V(nir, nir_opt_dce); 5437ec681f3Smrg 5447ec681f3Smrg if (nir->constant_data_size) { 5457ec681f3Smrg const char *ptr = reinterpret_cast<const char *>(nir->constant_data); 5467ec681f3Smrg const binary::section constants { 5477ec681f3Smrg section_id, 5487ec681f3Smrg binary::section::data_constant, 5497ec681f3Smrg nir->constant_data_size, 5507ec681f3Smrg { ptr, ptr + nir->constant_data_size } 5517ec681f3Smrg }; 5527ec681f3Smrg nir->constant_data = NULL; 5537ec681f3Smrg nir->constant_data_size = 0; 5547ec681f3Smrg b.secs.push_back(constants); 5557ec681f3Smrg } 5567ec681f3Smrg 5577ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 5587ec681f3Smrg unsigned printf_info_count = nir->printf_info_count; 5597ec681f3Smrg nir_printf_info *printf_infos = nir->printf_info; 5607ec681f3Smrg 5617ec681f3Smrg ralloc_steal(mem_ctx, printf_infos); 5627ec681f3Smrg 5637ec681f3Smrg struct blob blob; 5647ec681f3Smrg blob_init(&blob); 5657ec681f3Smrg nir_serialize(&blob, nir, false); 5667ec681f3Smrg 5677ec681f3Smrg ralloc_free(nir); 5687ec681f3Smrg 5697ec681f3Smrg const pipe_binary_program_header header { uint32_t(blob.size) }; 5707ec681f3Smrg binary::section text { section_id, binary::section::text_executable, header.num_bytes, {} }; 5717ec681f3Smrg text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header), 5727ec681f3Smrg reinterpret_cast<const char *>(&header) + sizeof(header)); 5737ec681f3Smrg text.data.insert(text.data.end(), blob.data, blob.data + blob.size); 5747ec681f3Smrg 5757ec681f3Smrg free(blob.data); 5767ec681f3Smrg 5777ec681f3Smrg b.printf_strings_in_buffer = false; 5787ec681f3Smrg b.printf_infos.reserve(printf_info_count); 5797ec681f3Smrg for (unsigned i = 0; i < printf_info_count; i++) { 5807ec681f3Smrg binary::printf_info info; 5817ec681f3Smrg 5827ec681f3Smrg info.arg_sizes.reserve(printf_infos[i].num_args); 5837ec681f3Smrg for (unsigned j = 0; j < printf_infos[i].num_args; j++) 5847ec681f3Smrg info.arg_sizes.push_back(printf_infos[i].arg_sizes[j]); 5857ec681f3Smrg 5867ec681f3Smrg info.strings.resize(printf_infos[i].string_size); 5877ec681f3Smrg memcpy(info.strings.data(), printf_infos[i].strings, printf_infos[i].string_size); 5887ec681f3Smrg b.printf_infos.push_back(info); 5897ec681f3Smrg } 5907ec681f3Smrg 5917ec681f3Smrg ralloc_free(mem_ctx); 5927ec681f3Smrg 5937ec681f3Smrg b.syms.emplace_back(sym.name, sym.attributes, 5947ec681f3Smrg sym.reqd_work_group_size, section_id, 0, args); 5957ec681f3Smrg b.secs.push_back(text); 5967ec681f3Smrg section_id++; 5977ec681f3Smrg } 5987ec681f3Smrg return b; 5997ec681f3Smrg} 6007ec681f3Smrg#else 6017ec681f3Smrgbinary clover::nir::spirv_to_nir(const binary &mod, const device &dev, std::string &r_log) 6027ec681f3Smrg{ 6037ec681f3Smrg r_log += "SPIR-V support in clover is not enabled.\n"; 6047ec681f3Smrg throw error(CL_LINKER_NOT_AVAILABLE); 6057ec681f3Smrg} 6067ec681f3Smrg#endif 607