17ec681f3Smrg//
27ec681f3Smrg// Copyright 2019 Karol Herbst
37ec681f3Smrg//
47ec681f3Smrg// Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg// copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg// to deal in the Software without restriction, including without limitation
77ec681f3Smrg// the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg// and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg// Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg//
117ec681f3Smrg// The above copyright notice and this permission notice shall be included in
127ec681f3Smrg// all copies or substantial portions of the Software.
137ec681f3Smrg//
147ec681f3Smrg// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
157ec681f3Smrg// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167ec681f3Smrg// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
177ec681f3Smrg// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
187ec681f3Smrg// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
197ec681f3Smrg// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
207ec681f3Smrg// OTHER DEALINGS IN THE SOFTWARE.
217ec681f3Smrg//
227ec681f3Smrg
237ec681f3Smrg#include "invocation.hpp"
247ec681f3Smrg
257ec681f3Smrg#include <tuple>
267ec681f3Smrg
277ec681f3Smrg#include "core/device.hpp"
287ec681f3Smrg#include "core/error.hpp"
297ec681f3Smrg#include "core/binary.hpp"
307ec681f3Smrg#include "pipe/p_state.h"
317ec681f3Smrg#include "util/algorithm.hpp"
327ec681f3Smrg#include "util/functional.hpp"
337ec681f3Smrg
347ec681f3Smrg#include <compiler/glsl_types.h>
357ec681f3Smrg#include <compiler/nir/nir_builder.h>
367ec681f3Smrg#include <compiler/nir/nir_serialize.h>
377ec681f3Smrg#include <compiler/spirv/nir_spirv.h>
387ec681f3Smrg#include <util/u_math.h>
397ec681f3Smrg
407ec681f3Smrgusing namespace clover;
417ec681f3Smrg
427ec681f3Smrg#ifdef HAVE_CLOVER_SPIRV
437ec681f3Smrg
447ec681f3Smrg// Refs and unrefs the glsl_type_singleton.
457ec681f3Smrgstatic class glsl_type_ref {
467ec681f3Smrgpublic:
477ec681f3Smrg   glsl_type_ref() {
487ec681f3Smrg      glsl_type_singleton_init_or_ref();
497ec681f3Smrg   }
507ec681f3Smrg
517ec681f3Smrg   ~glsl_type_ref() {
527ec681f3Smrg      glsl_type_singleton_decref();
537ec681f3Smrg   }
547ec681f3Smrg} glsl_type_ref;
557ec681f3Smrg
567ec681f3Smrgstatic const nir_shader_compiler_options *
577ec681f3Smrgdev_get_nir_compiler_options(const device &dev)
587ec681f3Smrg{
597ec681f3Smrg   const void *co = dev.get_compiler_options(PIPE_SHADER_IR_NIR);
607ec681f3Smrg   return static_cast<const nir_shader_compiler_options*>(co);
617ec681f3Smrg}
627ec681f3Smrg
637ec681f3Smrgstatic void debug_function(void *private_data,
647ec681f3Smrg                   enum nir_spirv_debug_level level, size_t spirv_offset,
657ec681f3Smrg                   const char *message)
667ec681f3Smrg{
677ec681f3Smrg   assert(private_data);
687ec681f3Smrg   auto r_log = reinterpret_cast<std::string *>(private_data);
697ec681f3Smrg   *r_log += message;
707ec681f3Smrg}
717ec681f3Smrg
727ec681f3Smrgstatic void
737ec681f3Smrgclover_arg_size_align(const glsl_type *type, unsigned *size, unsigned *align)
747ec681f3Smrg{
757ec681f3Smrg   if (type == glsl_type::sampler_type) {
767ec681f3Smrg      *size = 0;
777ec681f3Smrg      *align = 1;
787ec681f3Smrg   } else if (type->is_image()) {
797ec681f3Smrg      *size = *align = sizeof(cl_mem);
807ec681f3Smrg   } else {
817ec681f3Smrg      *size = type->cl_size();
827ec681f3Smrg      *align = type->cl_alignment();
837ec681f3Smrg   }
847ec681f3Smrg}
857ec681f3Smrg
867ec681f3Smrgstatic bool
877ec681f3Smrgclover_nir_lower_images(nir_shader *shader)
887ec681f3Smrg{
897ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
907ec681f3Smrg
917ec681f3Smrg   ASSERTED int last_loc = -1;
927ec681f3Smrg   int num_rd_images = 0, num_wr_images = 0, num_samplers = 0;
937ec681f3Smrg   nir_foreach_uniform_variable(var, shader) {
947ec681f3Smrg      if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
957ec681f3Smrg         /* Assume they come in order */
967ec681f3Smrg         assert(var->data.location > last_loc);
977ec681f3Smrg         last_loc = var->data.location;
987ec681f3Smrg      }
997ec681f3Smrg
1007ec681f3Smrg      /* TODO: Constant samplers */
1017ec681f3Smrg      if (var->type == glsl_bare_sampler_type()) {
1027ec681f3Smrg         var->data.driver_location = num_samplers++;
1037ec681f3Smrg      } else if (glsl_type_is_image(var->type)) {
1047ec681f3Smrg         if (var->data.access & ACCESS_NON_WRITEABLE)
1057ec681f3Smrg            var->data.driver_location = num_rd_images++;
1067ec681f3Smrg         else
1077ec681f3Smrg            var->data.driver_location = num_wr_images++;
1087ec681f3Smrg      } else {
1097ec681f3Smrg         /* CL shouldn't have any sampled images */
1107ec681f3Smrg         assert(!glsl_type_is_sampler(var->type));
1117ec681f3Smrg      }
1127ec681f3Smrg   }
1137ec681f3Smrg   shader->info.num_textures = num_rd_images;
1147ec681f3Smrg   BITSET_ZERO(shader->info.textures_used);
1157ec681f3Smrg   if (num_rd_images)
1167ec681f3Smrg      BITSET_SET_RANGE_INSIDE_WORD(shader->info.textures_used, 0, num_rd_images - 1);
1177ec681f3Smrg   shader->info.num_images = num_wr_images;
1187ec681f3Smrg
1197ec681f3Smrg   nir_builder b;
1207ec681f3Smrg   nir_builder_init(&b, impl);
1217ec681f3Smrg
1227ec681f3Smrg   bool progress = false;
1237ec681f3Smrg   nir_foreach_block_reverse(block, impl) {
1247ec681f3Smrg      nir_foreach_instr_reverse_safe(instr, block) {
1257ec681f3Smrg         switch (instr->type) {
1267ec681f3Smrg         case nir_instr_type_deref: {
1277ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
1287ec681f3Smrg            if (deref->deref_type != nir_deref_type_var)
1297ec681f3Smrg               break;
1307ec681f3Smrg
1317ec681f3Smrg            if (!glsl_type_is_image(deref->type) &&
1327ec681f3Smrg                !glsl_type_is_sampler(deref->type))
1337ec681f3Smrg               break;
1347ec681f3Smrg
1357ec681f3Smrg            b.cursor = nir_instr_remove(&deref->instr);
1367ec681f3Smrg            nir_ssa_def *loc =
1377ec681f3Smrg               nir_imm_intN_t(&b, deref->var->data.driver_location,
1387ec681f3Smrg                                  deref->dest.ssa.bit_size);
1397ec681f3Smrg            nir_ssa_def_rewrite_uses(&deref->dest.ssa, loc);
1407ec681f3Smrg            progress = true;
1417ec681f3Smrg            break;
1427ec681f3Smrg         }
1437ec681f3Smrg
1447ec681f3Smrg         case nir_instr_type_tex: {
1457ec681f3Smrg            nir_tex_instr *tex = nir_instr_as_tex(instr);
1467ec681f3Smrg            unsigned count = 0;
1477ec681f3Smrg            for (unsigned i = 0; i < tex->num_srcs; i++) {
1487ec681f3Smrg               if (tex->src[i].src_type == nir_tex_src_texture_deref ||
1497ec681f3Smrg                   tex->src[i].src_type == nir_tex_src_sampler_deref) {
1507ec681f3Smrg                  nir_deref_instr *deref = nir_src_as_deref(tex->src[i].src);
1517ec681f3Smrg                  if (deref->deref_type == nir_deref_type_var) {
1527ec681f3Smrg                     /* In this case, we know the actual variable */
1537ec681f3Smrg                     if (tex->src[i].src_type == nir_tex_src_texture_deref)
1547ec681f3Smrg                        tex->texture_index = deref->var->data.driver_location;
1557ec681f3Smrg                     else
1567ec681f3Smrg                        tex->sampler_index = deref->var->data.driver_location;
1577ec681f3Smrg                     /* This source gets discarded */
1587ec681f3Smrg                     nir_instr_rewrite_src(&tex->instr, &tex->src[i].src,
1597ec681f3Smrg                                           NIR_SRC_INIT);
1607ec681f3Smrg                     continue;
1617ec681f3Smrg                  } else {
1627ec681f3Smrg                     assert(tex->src[i].src.is_ssa);
1637ec681f3Smrg                     b.cursor = nir_before_instr(&tex->instr);
1647ec681f3Smrg                     /* Back-ends expect a 32-bit thing, not 64-bit */
1657ec681f3Smrg                     nir_ssa_def *offset = nir_u2u32(&b, tex->src[i].src.ssa);
1667ec681f3Smrg                     if (tex->src[i].src_type == nir_tex_src_texture_deref)
1677ec681f3Smrg                        tex->src[count].src_type = nir_tex_src_texture_offset;
1687ec681f3Smrg                     else
1697ec681f3Smrg                        tex->src[count].src_type = nir_tex_src_sampler_offset;
1707ec681f3Smrg                     nir_instr_rewrite_src(&tex->instr, &tex->src[count].src,
1717ec681f3Smrg                                           nir_src_for_ssa(offset));
1727ec681f3Smrg                  }
1737ec681f3Smrg               } else {
1747ec681f3Smrg                  /* If we've removed a source, move this one down */
1757ec681f3Smrg                  if (count != i) {
1767ec681f3Smrg                     assert(count < i);
1777ec681f3Smrg                     tex->src[count].src_type = tex->src[i].src_type;
1787ec681f3Smrg                     nir_instr_move_src(&tex->instr, &tex->src[count].src,
1797ec681f3Smrg                                        &tex->src[i].src);
1807ec681f3Smrg                  }
1817ec681f3Smrg               }
1827ec681f3Smrg               count++;
1837ec681f3Smrg            }
1847ec681f3Smrg            tex->num_srcs = count;
1857ec681f3Smrg            progress = true;
1867ec681f3Smrg            break;
1877ec681f3Smrg         }
1887ec681f3Smrg
1897ec681f3Smrg         case nir_instr_type_intrinsic: {
1907ec681f3Smrg            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1917ec681f3Smrg            switch (intrin->intrinsic) {
1927ec681f3Smrg            case nir_intrinsic_image_deref_load:
1937ec681f3Smrg            case nir_intrinsic_image_deref_store:
1947ec681f3Smrg            case nir_intrinsic_image_deref_atomic_add:
1957ec681f3Smrg            case nir_intrinsic_image_deref_atomic_imin:
1967ec681f3Smrg            case nir_intrinsic_image_deref_atomic_umin:
1977ec681f3Smrg            case nir_intrinsic_image_deref_atomic_imax:
1987ec681f3Smrg            case nir_intrinsic_image_deref_atomic_umax:
1997ec681f3Smrg            case nir_intrinsic_image_deref_atomic_and:
2007ec681f3Smrg            case nir_intrinsic_image_deref_atomic_or:
2017ec681f3Smrg            case nir_intrinsic_image_deref_atomic_xor:
2027ec681f3Smrg            case nir_intrinsic_image_deref_atomic_exchange:
2037ec681f3Smrg            case nir_intrinsic_image_deref_atomic_comp_swap:
2047ec681f3Smrg            case nir_intrinsic_image_deref_atomic_fadd:
2057ec681f3Smrg            case nir_intrinsic_image_deref_atomic_inc_wrap:
2067ec681f3Smrg            case nir_intrinsic_image_deref_atomic_dec_wrap:
2077ec681f3Smrg            case nir_intrinsic_image_deref_size:
2087ec681f3Smrg            case nir_intrinsic_image_deref_samples: {
2097ec681f3Smrg               assert(intrin->src[0].is_ssa);
2107ec681f3Smrg               b.cursor = nir_before_instr(&intrin->instr);
2117ec681f3Smrg               /* Back-ends expect a 32-bit thing, not 64-bit */
2127ec681f3Smrg               nir_ssa_def *offset = nir_u2u32(&b, intrin->src[0].ssa);
2137ec681f3Smrg               nir_rewrite_image_intrinsic(intrin, offset, false);
2147ec681f3Smrg               progress = true;
2157ec681f3Smrg               break;
2167ec681f3Smrg            }
2177ec681f3Smrg
2187ec681f3Smrg            default:
2197ec681f3Smrg               break;
2207ec681f3Smrg            }
2217ec681f3Smrg            break;
2227ec681f3Smrg         }
2237ec681f3Smrg
2247ec681f3Smrg         default:
2257ec681f3Smrg            break;
2267ec681f3Smrg         }
2277ec681f3Smrg      }
2287ec681f3Smrg   }
2297ec681f3Smrg
2307ec681f3Smrg   if (progress) {
2317ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_block_index |
2327ec681f3Smrg                                  nir_metadata_dominance);
2337ec681f3Smrg   } else {
2347ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
2357ec681f3Smrg   }
2367ec681f3Smrg
2377ec681f3Smrg   return progress;
2387ec681f3Smrg}
2397ec681f3Smrg
2407ec681f3Smrgstruct clover_lower_nir_state {
2417ec681f3Smrg   std::vector<binary::argument> &args;
2427ec681f3Smrg   uint32_t global_dims;
2437ec681f3Smrg   nir_variable *constant_var;
2447ec681f3Smrg   nir_variable *printf_buffer;
2457ec681f3Smrg   nir_variable *offset_vars[3];
2467ec681f3Smrg};
2477ec681f3Smrg
2487ec681f3Smrgstatic bool
2497ec681f3Smrgclover_lower_nir_filter(const nir_instr *instr, const void *)
2507ec681f3Smrg{
2517ec681f3Smrg   return instr->type == nir_instr_type_intrinsic;
2527ec681f3Smrg}
2537ec681f3Smrg
2547ec681f3Smrgstatic nir_ssa_def *
2557ec681f3Smrgclover_lower_nir_instr(nir_builder *b, nir_instr *instr, void *_state)
2567ec681f3Smrg{
2577ec681f3Smrg   clover_lower_nir_state *state = reinterpret_cast<clover_lower_nir_state*>(_state);
2587ec681f3Smrg   nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
2597ec681f3Smrg
2607ec681f3Smrg   switch (intrinsic->intrinsic) {
2617ec681f3Smrg   case nir_intrinsic_load_printf_buffer_address: {
2627ec681f3Smrg      if (!state->printf_buffer) {
2637ec681f3Smrg         unsigned location = state->args.size();
2647ec681f3Smrg         state->args.emplace_back(binary::argument::global, sizeof(size_t),
2657ec681f3Smrg                                  8, 8, binary::argument::zero_ext,
2667ec681f3Smrg                                  binary::argument::printf_buffer);
2677ec681f3Smrg
2687ec681f3Smrg         const glsl_type *type = glsl_uint64_t_type();
2697ec681f3Smrg         state->printf_buffer = nir_variable_create(b->shader, nir_var_uniform,
2707ec681f3Smrg                                                    type, "global_printf_buffer");
2717ec681f3Smrg         state->printf_buffer->data.location = location;
2727ec681f3Smrg      }
2737ec681f3Smrg      return nir_load_var(b, state->printf_buffer);
2747ec681f3Smrg   }
2757ec681f3Smrg   case nir_intrinsic_load_base_global_invocation_id: {
2767ec681f3Smrg      nir_ssa_def *loads[3];
2777ec681f3Smrg
2787ec681f3Smrg      /* create variables if we didn't do so alrady */
2797ec681f3Smrg      if (!state->offset_vars[0]) {
2807ec681f3Smrg         /* TODO: fix for 64 bit */
2817ec681f3Smrg         /* Even though we only place one scalar argument, clover will bind up to
2827ec681f3Smrg          * three 32 bit values
2837ec681f3Smrg         */
2847ec681f3Smrg         unsigned location = state->args.size();
2857ec681f3Smrg         state->args.emplace_back(binary::argument::scalar, 4, 4, 4,
2867ec681f3Smrg                                  binary::argument::zero_ext,
2877ec681f3Smrg                                  binary::argument::grid_offset);
2887ec681f3Smrg
2897ec681f3Smrg         const glsl_type *type = glsl_uint_type();
2907ec681f3Smrg         for (uint32_t i = 0; i < 3; i++) {
2917ec681f3Smrg            state->offset_vars[i] =
2927ec681f3Smrg               nir_variable_create(b->shader, nir_var_uniform, type,
2937ec681f3Smrg                                   "global_invocation_id_offsets");
2947ec681f3Smrg            state->offset_vars[i]->data.location = location + i;
2957ec681f3Smrg         }
2967ec681f3Smrg      }
2977ec681f3Smrg
2987ec681f3Smrg      for (int i = 0; i < 3; i++) {
2997ec681f3Smrg         nir_variable *var = state->offset_vars[i];
3007ec681f3Smrg         loads[i] = var ? nir_load_var(b, var) : nir_imm_int(b, 0);
3017ec681f3Smrg      }
3027ec681f3Smrg
3037ec681f3Smrg      return nir_u2u(b, nir_vec(b, loads, state->global_dims),
3047ec681f3Smrg                     nir_dest_bit_size(intrinsic->dest));
3057ec681f3Smrg   }
3067ec681f3Smrg   case nir_intrinsic_load_constant_base_ptr: {
3077ec681f3Smrg      return nir_load_var(b, state->constant_var);
3087ec681f3Smrg   }
3097ec681f3Smrg
3107ec681f3Smrg   default:
3117ec681f3Smrg      return NULL;
3127ec681f3Smrg   }
3137ec681f3Smrg}
3147ec681f3Smrg
3157ec681f3Smrgstatic bool
3167ec681f3Smrgclover_lower_nir(nir_shader *nir, std::vector<binary::argument> &args,
3177ec681f3Smrg                 uint32_t dims, uint32_t pointer_bit_size)
3187ec681f3Smrg{
3197ec681f3Smrg   nir_variable *constant_var = NULL;
3207ec681f3Smrg   if (nir->constant_data_size) {
3217ec681f3Smrg      const glsl_type *type = pointer_bit_size == 64 ? glsl_uint64_t_type() : glsl_uint_type();
3227ec681f3Smrg
3237ec681f3Smrg      constant_var = nir_variable_create(nir, nir_var_uniform, type,
3247ec681f3Smrg                                         "constant_buffer_addr");
3257ec681f3Smrg      constant_var->data.location = args.size();
3267ec681f3Smrg
3277ec681f3Smrg      args.emplace_back(binary::argument::global, sizeof(cl_mem),
3287ec681f3Smrg                        pointer_bit_size / 8, pointer_bit_size / 8,
3297ec681f3Smrg                        binary::argument::zero_ext,
3307ec681f3Smrg                        binary::argument::constant_buffer);
3317ec681f3Smrg   }
3327ec681f3Smrg
3337ec681f3Smrg   clover_lower_nir_state state = { args, dims, constant_var };
3347ec681f3Smrg   return nir_shader_lower_instructions(nir,
3357ec681f3Smrg      clover_lower_nir_filter, clover_lower_nir_instr, &state);
3367ec681f3Smrg}
3377ec681f3Smrg
3387ec681f3Smrgstatic spirv_to_nir_options
3397ec681f3Smrgcreate_spirv_options(const device &dev, std::string &r_log)
3407ec681f3Smrg{
3417ec681f3Smrg   struct spirv_to_nir_options spirv_options = {};
3427ec681f3Smrg   spirv_options.environment = NIR_SPIRV_OPENCL;
3437ec681f3Smrg   if (dev.address_bits() == 32u) {
3447ec681f3Smrg      spirv_options.shared_addr_format = nir_address_format_32bit_offset;
3457ec681f3Smrg      spirv_options.global_addr_format = nir_address_format_32bit_global;
3467ec681f3Smrg      spirv_options.temp_addr_format = nir_address_format_32bit_offset;
3477ec681f3Smrg      spirv_options.constant_addr_format = nir_address_format_32bit_global;
3487ec681f3Smrg   } else {
3497ec681f3Smrg      spirv_options.shared_addr_format = nir_address_format_32bit_offset_as_64bit;
3507ec681f3Smrg      spirv_options.global_addr_format = nir_address_format_64bit_global;
3517ec681f3Smrg      spirv_options.temp_addr_format = nir_address_format_32bit_offset_as_64bit;
3527ec681f3Smrg      spirv_options.constant_addr_format = nir_address_format_64bit_global;
3537ec681f3Smrg   }
3547ec681f3Smrg   spirv_options.caps.address = true;
3557ec681f3Smrg   spirv_options.caps.float64 = true;
3567ec681f3Smrg   spirv_options.caps.int8 = true;
3577ec681f3Smrg   spirv_options.caps.int16 = true;
3587ec681f3Smrg   spirv_options.caps.int64 = true;
3597ec681f3Smrg   spirv_options.caps.kernel = true;
3607ec681f3Smrg   spirv_options.caps.kernel_image = dev.image_support();
3617ec681f3Smrg   spirv_options.caps.int64_atomics = dev.has_int64_atomics();
3627ec681f3Smrg   spirv_options.debug.func = &debug_function;
3637ec681f3Smrg   spirv_options.debug.private_data = &r_log;
3647ec681f3Smrg   spirv_options.caps.printf = true;
3657ec681f3Smrg   return spirv_options;
3667ec681f3Smrg}
3677ec681f3Smrg
3687ec681f3Smrgstruct disk_cache *clover::nir::create_clc_disk_cache(void)
3697ec681f3Smrg{
3707ec681f3Smrg   struct mesa_sha1 ctx;
3717ec681f3Smrg   unsigned char sha1[20];
3727ec681f3Smrg   char cache_id[20 * 2 + 1];
3737ec681f3Smrg   _mesa_sha1_init(&ctx);
3747ec681f3Smrg
3757ec681f3Smrg   if (!disk_cache_get_function_identifier((void *)clover::nir::create_clc_disk_cache, &ctx))
3767ec681f3Smrg      return NULL;
3777ec681f3Smrg
3787ec681f3Smrg   _mesa_sha1_final(&ctx, sha1);
3797ec681f3Smrg
3807ec681f3Smrg   disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
3817ec681f3Smrg   return disk_cache_create("clover-clc", cache_id, 0);
3827ec681f3Smrg}
3837ec681f3Smrg
3847ec681f3Smrgvoid clover::nir::check_for_libclc(const device &dev)
3857ec681f3Smrg{
3867ec681f3Smrg   if (!nir_can_find_libclc(dev.address_bits()))
3877ec681f3Smrg      throw error(CL_COMPILER_NOT_AVAILABLE);
3887ec681f3Smrg}
3897ec681f3Smrg
3907ec681f3Smrgnir_shader *clover::nir::load_libclc_nir(const device &dev, std::string &r_log)
3917ec681f3Smrg{
3927ec681f3Smrg   spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);
3937ec681f3Smrg   auto *compiler_options = dev_get_nir_compiler_options(dev);
3947ec681f3Smrg
3957ec681f3Smrg   return nir_load_libclc_shader(dev.address_bits(), dev.clc_cache,
3967ec681f3Smrg				 &spirv_options, compiler_options);
3977ec681f3Smrg}
3987ec681f3Smrg
3997ec681f3Smrgstatic bool
4007ec681f3Smrgcan_remove_var(nir_variable *var, void *data)
4017ec681f3Smrg{
4027ec681f3Smrg   return !(var->type->is_sampler() || var->type->is_image());
4037ec681f3Smrg}
4047ec681f3Smrg
4057ec681f3Smrgbinary clover::nir::spirv_to_nir(const binary &mod, const device &dev,
4067ec681f3Smrg                                 std::string &r_log)
4077ec681f3Smrg{
4087ec681f3Smrg   spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);
4097ec681f3Smrg   std::shared_ptr<nir_shader> nir = dev.clc_nir;
4107ec681f3Smrg   spirv_options.clc_shader = nir.get();
4117ec681f3Smrg
4127ec681f3Smrg   binary b;
4137ec681f3Smrg   // We only insert one section.
4147ec681f3Smrg   assert(mod.secs.size() == 1);
4157ec681f3Smrg   auto &section = mod.secs[0];
4167ec681f3Smrg
4177ec681f3Smrg   binary::resource_id section_id = 0;
4187ec681f3Smrg   for (const auto &sym : mod.syms) {
4197ec681f3Smrg      assert(sym.section == 0);
4207ec681f3Smrg
4217ec681f3Smrg      const auto *binary =
4227ec681f3Smrg         reinterpret_cast<const pipe_binary_program_header *>(section.data.data());
4237ec681f3Smrg      const uint32_t *data = reinterpret_cast<const uint32_t *>(binary->blob);
4247ec681f3Smrg      const size_t num_words = binary->num_bytes / 4;
4257ec681f3Smrg      const char *name = sym.name.c_str();
4267ec681f3Smrg      auto *compiler_options = dev_get_nir_compiler_options(dev);
4277ec681f3Smrg
4287ec681f3Smrg      nir_shader *nir = spirv_to_nir(data, num_words, nullptr, 0,
4297ec681f3Smrg                                     MESA_SHADER_KERNEL, name,
4307ec681f3Smrg                                     &spirv_options, compiler_options);
4317ec681f3Smrg      if (!nir) {
4327ec681f3Smrg         r_log += "Translation from SPIR-V to NIR for kernel \"" + sym.name +
4337ec681f3Smrg                  "\" failed.\n";
4347ec681f3Smrg         throw build_error();
4357ec681f3Smrg      }
4367ec681f3Smrg
4377ec681f3Smrg      nir->info.workgroup_size_variable = sym.reqd_work_group_size[0] == 0;
4387ec681f3Smrg      nir->info.workgroup_size[0] = sym.reqd_work_group_size[0];
4397ec681f3Smrg      nir->info.workgroup_size[1] = sym.reqd_work_group_size[1];
4407ec681f3Smrg      nir->info.workgroup_size[2] = sym.reqd_work_group_size[2];
4417ec681f3Smrg      nir_validate_shader(nir, "clover");
4427ec681f3Smrg
4437ec681f3Smrg      // Inline all functions first.
4447ec681f3Smrg      // according to the comment on nir_inline_functions
4457ec681f3Smrg      NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
4467ec681f3Smrg      NIR_PASS_V(nir, nir_lower_returns);
4477ec681f3Smrg      NIR_PASS_V(nir, nir_lower_libclc, spirv_options.clc_shader);
4487ec681f3Smrg
4497ec681f3Smrg      NIR_PASS_V(nir, nir_inline_functions);
4507ec681f3Smrg      NIR_PASS_V(nir, nir_copy_prop);
4517ec681f3Smrg      NIR_PASS_V(nir, nir_opt_deref);
4527ec681f3Smrg
4537ec681f3Smrg      // Pick off the single entrypoint that we want.
4547ec681f3Smrg      foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
4557ec681f3Smrg         if (!func->is_entrypoint)
4567ec681f3Smrg            exec_node_remove(&func->node);
4577ec681f3Smrg      }
4587ec681f3Smrg      assert(exec_list_length(&nir->functions) == 1);
4597ec681f3Smrg
4607ec681f3Smrg      nir_validate_shader(nir, "clover after function inlining");
4617ec681f3Smrg
4627ec681f3Smrg      NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
4637ec681f3Smrg
4647ec681f3Smrg      struct nir_lower_printf_options printf_options;
4657ec681f3Smrg      printf_options.treat_doubles_as_floats = false;
4667ec681f3Smrg      printf_options.max_buffer_size = dev.max_printf_buffer_size();
4677ec681f3Smrg
4687ec681f3Smrg      NIR_PASS_V(nir, nir_lower_printf, &printf_options);
4697ec681f3Smrg
4707ec681f3Smrg      NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4717ec681f3Smrg
4727ec681f3Smrg      // copy propagate to prepare for lower_explicit_io
4737ec681f3Smrg      NIR_PASS_V(nir, nir_split_var_copies);
4747ec681f3Smrg      NIR_PASS_V(nir, nir_opt_copy_prop_vars);
4757ec681f3Smrg      NIR_PASS_V(nir, nir_lower_var_copies);
4767ec681f3Smrg      NIR_PASS_V(nir, nir_lower_vars_to_ssa);
4777ec681f3Smrg      NIR_PASS_V(nir, nir_opt_dce);
4787ec681f3Smrg      NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
4797ec681f3Smrg
4807ec681f3Smrg      NIR_PASS_V(nir, nir_lower_system_values);
4817ec681f3Smrg      nir_lower_compute_system_values_options sysval_options = { 0 };
4827ec681f3Smrg      sysval_options.has_base_global_invocation_id = true;
4837ec681f3Smrg      NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
4847ec681f3Smrg
4857ec681f3Smrg      // constant fold before lowering mem constants
4867ec681f3Smrg      NIR_PASS_V(nir, nir_opt_constant_folding);
4877ec681f3Smrg
4887ec681f3Smrg      NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_constant, NULL);
4897ec681f3Smrg      NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_constant,
4907ec681f3Smrg                 glsl_get_cl_type_size_align);
4917ec681f3Smrg      if (nir->constant_data_size > 0) {
4927ec681f3Smrg         assert(nir->constant_data == NULL);
4937ec681f3Smrg         nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
4947ec681f3Smrg         nir_gather_explicit_io_initializers(nir, nir->constant_data,
4957ec681f3Smrg                                             nir->constant_data_size,
4967ec681f3Smrg                                             nir_var_mem_constant);
4977ec681f3Smrg      }
4987ec681f3Smrg      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
4997ec681f3Smrg                 spirv_options.constant_addr_format);
5007ec681f3Smrg
5017ec681f3Smrg      auto args = sym.args;
5027ec681f3Smrg      NIR_PASS_V(nir, clover_lower_nir, args, dev.max_block_size().size(),
5037ec681f3Smrg                 dev.address_bits());
5047ec681f3Smrg
5057ec681f3Smrg      NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
5067ec681f3Smrg                 nir_var_uniform, clover_arg_size_align);
5077ec681f3Smrg      NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
5087ec681f3Smrg                 nir_var_mem_shared | nir_var_mem_global |
5097ec681f3Smrg                 nir_var_function_temp,
5107ec681f3Smrg                 glsl_get_cl_type_size_align);
5117ec681f3Smrg
5127ec681f3Smrg      NIR_PASS_V(nir, nir_opt_deref);
5137ec681f3Smrg      NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);
5147ec681f3Smrg      NIR_PASS_V(nir, clover_nir_lower_images);
5157ec681f3Smrg      NIR_PASS_V(nir, nir_lower_memcpy);
5167ec681f3Smrg
5177ec681f3Smrg      /* use offsets for kernel inputs (uniform) */
5187ec681f3Smrg      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
5197ec681f3Smrg                 nir->info.cs.ptr_size == 64 ?
5207ec681f3Smrg                 nir_address_format_32bit_offset_as_64bit :
5217ec681f3Smrg                 nir_address_format_32bit_offset);
5227ec681f3Smrg
5237ec681f3Smrg      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
5247ec681f3Smrg                 spirv_options.constant_addr_format);
5257ec681f3Smrg      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared,
5267ec681f3Smrg                 spirv_options.shared_addr_format);
5277ec681f3Smrg
5287ec681f3Smrg      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
5297ec681f3Smrg                 spirv_options.temp_addr_format);
5307ec681f3Smrg
5317ec681f3Smrg      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
5327ec681f3Smrg                 spirv_options.global_addr_format);
5337ec681f3Smrg
5347ec681f3Smrg      struct nir_remove_dead_variables_options remove_dead_variables_options = {
5357ec681f3Smrg            .can_remove_var = can_remove_var,
5367ec681f3Smrg      };
5377ec681f3Smrg      NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, &remove_dead_variables_options);
5387ec681f3Smrg
5397ec681f3Smrg      if (compiler_options->lower_int64_options)
5407ec681f3Smrg         NIR_PASS_V(nir, nir_lower_int64);
5417ec681f3Smrg
5427ec681f3Smrg      NIR_PASS_V(nir, nir_opt_dce);
5437ec681f3Smrg
5447ec681f3Smrg      if (nir->constant_data_size) {
5457ec681f3Smrg         const char *ptr = reinterpret_cast<const char *>(nir->constant_data);
5467ec681f3Smrg         const binary::section constants {
5477ec681f3Smrg            section_id,
5487ec681f3Smrg            binary::section::data_constant,
5497ec681f3Smrg            nir->constant_data_size,
5507ec681f3Smrg            { ptr, ptr + nir->constant_data_size }
5517ec681f3Smrg         };
5527ec681f3Smrg         nir->constant_data = NULL;
5537ec681f3Smrg         nir->constant_data_size = 0;
5547ec681f3Smrg         b.secs.push_back(constants);
5557ec681f3Smrg      }
5567ec681f3Smrg
5577ec681f3Smrg      void *mem_ctx = ralloc_context(NULL);
5587ec681f3Smrg      unsigned printf_info_count = nir->printf_info_count;
5597ec681f3Smrg      nir_printf_info *printf_infos = nir->printf_info;
5607ec681f3Smrg
5617ec681f3Smrg      ralloc_steal(mem_ctx, printf_infos);
5627ec681f3Smrg
5637ec681f3Smrg      struct blob blob;
5647ec681f3Smrg      blob_init(&blob);
5657ec681f3Smrg      nir_serialize(&blob, nir, false);
5667ec681f3Smrg
5677ec681f3Smrg      ralloc_free(nir);
5687ec681f3Smrg
5697ec681f3Smrg      const pipe_binary_program_header header { uint32_t(blob.size) };
5707ec681f3Smrg      binary::section text { section_id, binary::section::text_executable, header.num_bytes, {} };
5717ec681f3Smrg      text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header),
5727ec681f3Smrg                       reinterpret_cast<const char *>(&header) + sizeof(header));
5737ec681f3Smrg      text.data.insert(text.data.end(), blob.data, blob.data + blob.size);
5747ec681f3Smrg
5757ec681f3Smrg      free(blob.data);
5767ec681f3Smrg
5777ec681f3Smrg      b.printf_strings_in_buffer = false;
5787ec681f3Smrg      b.printf_infos.reserve(printf_info_count);
5797ec681f3Smrg      for (unsigned i = 0; i < printf_info_count; i++) {
5807ec681f3Smrg         binary::printf_info info;
5817ec681f3Smrg
5827ec681f3Smrg         info.arg_sizes.reserve(printf_infos[i].num_args);
5837ec681f3Smrg         for (unsigned j = 0; j < printf_infos[i].num_args; j++)
5847ec681f3Smrg            info.arg_sizes.push_back(printf_infos[i].arg_sizes[j]);
5857ec681f3Smrg
5867ec681f3Smrg         info.strings.resize(printf_infos[i].string_size);
5877ec681f3Smrg         memcpy(info.strings.data(), printf_infos[i].strings, printf_infos[i].string_size);
5887ec681f3Smrg         b.printf_infos.push_back(info);
5897ec681f3Smrg      }
5907ec681f3Smrg
5917ec681f3Smrg      ralloc_free(mem_ctx);
5927ec681f3Smrg
5937ec681f3Smrg      b.syms.emplace_back(sym.name, sym.attributes,
5947ec681f3Smrg                          sym.reqd_work_group_size, section_id, 0, args);
5957ec681f3Smrg      b.secs.push_back(text);
5967ec681f3Smrg      section_id++;
5977ec681f3Smrg   }
5987ec681f3Smrg   return b;
5997ec681f3Smrg}
6007ec681f3Smrg#else
6017ec681f3Smrgbinary clover::nir::spirv_to_nir(const binary &mod, const device &dev, std::string &r_log)
6027ec681f3Smrg{
6037ec681f3Smrg   r_log += "SPIR-V support in clover is not enabled.\n";
6047ec681f3Smrg   throw error(CL_LINKER_NOT_AVAILABLE);
6057ec681f3Smrg}
6067ec681f3Smrg#endif
607