19f464c52Smaya/*
29f464c52Smaya * Copyright (c) 2017-2019 Lima Project
39f464c52Smaya *
49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a
59f464c52Smaya * copy of this software and associated documentation files (the "Software"),
69f464c52Smaya * to deal in the Software without restriction, including without limitation
79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sub license,
89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the
99f464c52Smaya * Software is furnished to do so, subject to the following conditions:
109f464c52Smaya *
119f464c52Smaya * The above copyright notice and this permission notice (including the
129f464c52Smaya * next paragraph) shall be included in all copies or substantial portions
139f464c52Smaya * of the Software.
149f464c52Smaya *
159f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
169f464c52Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
179f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
189f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
199f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
209f464c52Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
219f464c52Smaya * DEALINGS IN THE SOFTWARE.
229f464c52Smaya *
239f464c52Smaya */
249f464c52Smaya
259f464c52Smaya#include "util/u_memory.h"
269f464c52Smaya#include "util/ralloc.h"
279f464c52Smaya#include "util/u_debug.h"
289f464c52Smaya
299f464c52Smaya#include "tgsi/tgsi_dump.h"
309f464c52Smaya#include "compiler/nir/nir.h"
317ec681f3Smrg#include "compiler/nir/nir_serialize.h"
329f464c52Smaya#include "nir/tgsi_to_nir.h"
339f464c52Smaya
349f464c52Smaya#include "pipe/p_state.h"
359f464c52Smaya
369f464c52Smaya#include "lima_screen.h"
379f464c52Smaya#include "lima_context.h"
387ec681f3Smrg#include "lima_job.h"
399f464c52Smaya#include "lima_program.h"
409f464c52Smaya#include "lima_bo.h"
417ec681f3Smrg#include "lima_disk_cache.h"
427ec681f3Smrg
439f464c52Smaya#include "ir/lima_ir.h"
449f464c52Smaya
459f464c52Smayastatic const nir_shader_compiler_options vs_nir_options = {
467ec681f3Smrg   .lower_ffma16 = true,
477ec681f3Smrg   .lower_ffma32 = true,
487ec681f3Smrg   .lower_ffma64 = true,
499f464c52Smaya   .lower_fpow = true,
509f464c52Smaya   .lower_ffract = true,
519f464c52Smaya   .lower_fdiv = true,
527ec681f3Smrg   .lower_fmod = true,
539f464c52Smaya   .lower_fsqrt = true,
549f464c52Smaya   .lower_flrp32 = true,
559f464c52Smaya   .lower_flrp64 = true,
569f464c52Smaya   /* could be implemented by clamp */
579f464c52Smaya   .lower_fsat = true,
587ec681f3Smrg   .lower_bitops = true,
597ec681f3Smrg   .lower_rotate = true,
607ec681f3Smrg   .lower_sincos = true,
617ec681f3Smrg   .lower_fceil = true,
627ec681f3Smrg   .lower_insert_byte = true,
637ec681f3Smrg   .lower_insert_word = true,
647ec681f3Smrg   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
659f464c52Smaya};
669f464c52Smaya
679f464c52Smayastatic const nir_shader_compiler_options fs_nir_options = {
687ec681f3Smrg   .lower_ffma16 = true,
697ec681f3Smrg   .lower_ffma32 = true,
707ec681f3Smrg   .lower_ffma64 = true,
719f464c52Smaya   .lower_fpow = true,
729f464c52Smaya   .lower_fdiv = true,
737ec681f3Smrg   .lower_fmod = true,
749f464c52Smaya   .lower_flrp32 = true,
759f464c52Smaya   .lower_flrp64 = true,
769f464c52Smaya   .lower_fsign = true,
777ec681f3Smrg   .lower_rotate = true,
787ec681f3Smrg   .lower_fdot = true,
797ec681f3Smrg   .lower_fdph = true,
807ec681f3Smrg   .lower_insert_byte = true,
817ec681f3Smrg   .lower_insert_word = true,
827ec681f3Smrg   .lower_bitops = true,
837ec681f3Smrg   .lower_vector_cmp = true,
847ec681f3Smrg   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
859f464c52Smaya};
869f464c52Smaya
879f464c52Smayaconst void *
889f464c52Smayalima_program_get_compiler_options(enum pipe_shader_type shader)
899f464c52Smaya{
909f464c52Smaya   switch (shader) {
919f464c52Smaya   case PIPE_SHADER_VERTEX:
929f464c52Smaya      return &vs_nir_options;
939f464c52Smaya   case PIPE_SHADER_FRAGMENT:
949f464c52Smaya      return &fs_nir_options;
959f464c52Smaya   default:
969f464c52Smaya      return NULL;
979f464c52Smaya   }
989f464c52Smaya}
999f464c52Smaya
1009f464c52Smayastatic int
1019f464c52Smayatype_size(const struct glsl_type *type, bool bindless)
1029f464c52Smaya{
1039f464c52Smaya   return glsl_count_attribute_slots(type, false);
1049f464c52Smaya}
1059f464c52Smaya
1067ec681f3Smrgvoid
1079f464c52Smayalima_program_optimize_vs_nir(struct nir_shader *s)
1089f464c52Smaya{
1099f464c52Smaya   bool progress;
1109f464c52Smaya
1117ec681f3Smrg   NIR_PASS_V(s, nir_lower_viewport_transform);
1127ec681f3Smrg   NIR_PASS_V(s, nir_lower_point_size, 1.0f, 100.0f);
1137ec681f3Smrg   NIR_PASS_V(s, nir_lower_io,
1147ec681f3Smrg	      nir_var_shader_in | nir_var_shader_out, type_size, 0);
1159f464c52Smaya   NIR_PASS_V(s, nir_lower_load_const_to_scalar);
1169f464c52Smaya   NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
1179f464c52Smaya   NIR_PASS_V(s, nir_lower_io_to_scalar,
1189f464c52Smaya              nir_var_shader_in|nir_var_shader_out);
1199f464c52Smaya
1209f464c52Smaya   do {
1219f464c52Smaya      progress = false;
1229f464c52Smaya
1239f464c52Smaya      NIR_PASS_V(s, nir_lower_vars_to_ssa);
1247ec681f3Smrg      NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
1257ec681f3Smrg      NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1269f464c52Smaya      NIR_PASS(progress, s, nir_copy_prop);
1279f464c52Smaya      NIR_PASS(progress, s, nir_opt_remove_phis);
1289f464c52Smaya      NIR_PASS(progress, s, nir_opt_dce);
1299f464c52Smaya      NIR_PASS(progress, s, nir_opt_dead_cf);
1309f464c52Smaya      NIR_PASS(progress, s, nir_opt_cse);
1319f464c52Smaya      NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1329f464c52Smaya      NIR_PASS(progress, s, nir_opt_algebraic);
1337ec681f3Smrg      NIR_PASS(progress, s, lima_nir_lower_ftrunc);
1349f464c52Smaya      NIR_PASS(progress, s, nir_opt_constant_folding);
1359f464c52Smaya      NIR_PASS(progress, s, nir_opt_undef);
1367ec681f3Smrg      NIR_PASS(progress, s, nir_opt_loop_unroll);
1379f464c52Smaya   } while (progress);
1389f464c52Smaya
1397ec681f3Smrg   NIR_PASS_V(s, nir_lower_int_to_float);
1407ec681f3Smrg   /* int_to_float pass generates ftrunc, so lower it */
1417ec681f3Smrg   NIR_PASS(progress, s, lima_nir_lower_ftrunc);
1427ec681f3Smrg   NIR_PASS_V(s, nir_lower_bool_to_float);
1437ec681f3Smrg
1447ec681f3Smrg   NIR_PASS_V(s, nir_copy_prop);
1457ec681f3Smrg   NIR_PASS_V(s, nir_opt_dce);
1467ec681f3Smrg   NIR_PASS_V(s, lima_nir_split_loads);
1479f464c52Smaya   NIR_PASS_V(s, nir_lower_locals_to_regs);
1489f464c52Smaya   NIR_PASS_V(s, nir_convert_from_ssa, true);
1497ec681f3Smrg   NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
1509f464c52Smaya   nir_sweep(s);
1519f464c52Smaya}
1529f464c52Smaya
1537ec681f3Smrgstatic bool
1547ec681f3Smrglima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
1557ec681f3Smrg{
1567ec681f3Smrg   if (instr->type != nir_instr_type_alu)
1577ec681f3Smrg      return false;
1587ec681f3Smrg
1597ec681f3Smrg   nir_alu_instr *alu = nir_instr_as_alu(instr);
1607ec681f3Smrg   switch (alu->op) {
1617ec681f3Smrg   case nir_op_frcp:
1627ec681f3Smrg   case nir_op_frsq:
1637ec681f3Smrg   case nir_op_flog2:
1647ec681f3Smrg   case nir_op_fexp2:
1657ec681f3Smrg   case nir_op_fsqrt:
1667ec681f3Smrg   case nir_op_fsin:
1677ec681f3Smrg   case nir_op_fcos:
1687ec681f3Smrg      return true;
1697ec681f3Smrg   default:
1707ec681f3Smrg      break;
1717ec681f3Smrg   }
1727ec681f3Smrg
1737ec681f3Smrg   /* nir vec4 fcsel assumes that each component of the condition will be
1747ec681f3Smrg    * used to select the same component from the two options, but Utgard PP
1757ec681f3Smrg    * has only 1 component condition. If all condition components are not the
1767ec681f3Smrg    * same we need to lower it to scalar.
1777ec681f3Smrg    */
1787ec681f3Smrg   switch (alu->op) {
1797ec681f3Smrg   case nir_op_bcsel:
1807ec681f3Smrg   case nir_op_fcsel:
1817ec681f3Smrg      break;
1827ec681f3Smrg   default:
1837ec681f3Smrg      return false;
1847ec681f3Smrg   }
1857ec681f3Smrg
1867ec681f3Smrg   int num_components = nir_dest_num_components(alu->dest.dest);
1877ec681f3Smrg
1887ec681f3Smrg   uint8_t swizzle = alu->src[0].swizzle[0];
1897ec681f3Smrg
1907ec681f3Smrg   for (int i = 1; i < num_components; i++)
1917ec681f3Smrg      if (alu->src[0].swizzle[i] != swizzle)
1927ec681f3Smrg         return true;
1937ec681f3Smrg
1947ec681f3Smrg   return false;
1957ec681f3Smrg}
1967ec681f3Smrg
1977ec681f3Smrgstatic bool
1987ec681f3Smrglima_vec_to_movs_filter_cb(const nir_instr *instr, unsigned writemask,
1997ec681f3Smrg                           const void *data)
2007ec681f3Smrg{
2017ec681f3Smrg   assert(writemask > 0);
2027ec681f3Smrg   if (util_bitcount(writemask) == 1)
2037ec681f3Smrg      return true;
2047ec681f3Smrg
2057ec681f3Smrg   return !lima_alu_to_scalar_filter_cb(instr, data);
2067ec681f3Smrg}
2077ec681f3Smrg
2087ec681f3Smrgvoid
2097ec681f3Smrglima_program_optimize_fs_nir(struct nir_shader *s,
2107ec681f3Smrg                             struct nir_lower_tex_options *tex_options)
2119f464c52Smaya{
2129f464c52Smaya   bool progress;
2139f464c52Smaya
2149f464c52Smaya   NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
2157ec681f3Smrg   NIR_PASS_V(s, nir_lower_io,
2167ec681f3Smrg	      nir_var_shader_in | nir_var_shader_out, type_size, 0);
2179f464c52Smaya   NIR_PASS_V(s, nir_lower_regs_to_ssa);
2187ec681f3Smrg   NIR_PASS_V(s, nir_lower_tex, tex_options);
2197ec681f3Smrg
2207ec681f3Smrg   do {
2217ec681f3Smrg      progress = false;
2227ec681f3Smrg      NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
2237ec681f3Smrg   } while (progress);
2249f464c52Smaya
2259f464c52Smaya   do {
2269f464c52Smaya      progress = false;
2279f464c52Smaya
2289f464c52Smaya      NIR_PASS_V(s, nir_lower_vars_to_ssa);
2297ec681f3Smrg      NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL);
2309f464c52Smaya      NIR_PASS(progress, s, nir_copy_prop);
2319f464c52Smaya      NIR_PASS(progress, s, nir_opt_remove_phis);
2329f464c52Smaya      NIR_PASS(progress, s, nir_opt_dce);
2339f464c52Smaya      NIR_PASS(progress, s, nir_opt_dead_cf);
2349f464c52Smaya      NIR_PASS(progress, s, nir_opt_cse);
2359f464c52Smaya      NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
2369f464c52Smaya      NIR_PASS(progress, s, nir_opt_algebraic);
2379f464c52Smaya      NIR_PASS(progress, s, nir_opt_constant_folding);
2389f464c52Smaya      NIR_PASS(progress, s, nir_opt_undef);
2397ec681f3Smrg      NIR_PASS(progress, s, nir_opt_loop_unroll);
2407ec681f3Smrg      NIR_PASS(progress, s, lima_nir_split_load_input);
2419f464c52Smaya   } while (progress);
2429f464c52Smaya
2437ec681f3Smrg   NIR_PASS_V(s, nir_lower_int_to_float);
2447ec681f3Smrg   NIR_PASS_V(s, nir_lower_bool_to_float);
2457ec681f3Smrg
2467ec681f3Smrg   /* Some ops must be lowered after being converted from int ops,
2477ec681f3Smrg    * so re-run nir_opt_algebraic after int lowering. */
2487ec681f3Smrg   do {
2497ec681f3Smrg      progress = false;
2507ec681f3Smrg      NIR_PASS(progress, s, nir_opt_algebraic);
2517ec681f3Smrg   } while (progress);
2527ec681f3Smrg
2537ec681f3Smrg   /* Must be run after optimization loop */
2547ec681f3Smrg   NIR_PASS_V(s, lima_nir_scale_trig);
2557ec681f3Smrg
2569f464c52Smaya   /* Lower modifiers */
2579f464c52Smaya   NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
2589f464c52Smaya   NIR_PASS_V(s, nir_copy_prop);
2599f464c52Smaya   NIR_PASS_V(s, nir_opt_dce);
2609f464c52Smaya
2619f464c52Smaya   NIR_PASS_V(s, nir_lower_locals_to_regs);
2629f464c52Smaya   NIR_PASS_V(s, nir_convert_from_ssa, true);
2637ec681f3Smrg   NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
2649f464c52Smaya
2659f464c52Smaya   NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
2667ec681f3Smrg   NIR_PASS_V(s, nir_lower_vec_to_movs, lima_vec_to_movs_filter_cb, NULL);
2677ec681f3Smrg   NIR_PASS_V(s, nir_opt_dce); /* clean up any new dead code from vec to movs */
2687ec681f3Smrg
2697ec681f3Smrg   NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
2707ec681f3Smrg   NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
2717ec681f3Smrg   NIR_PASS_V(s, lima_nir_duplicate_load_consts);
2729f464c52Smaya
2739f464c52Smaya   nir_sweep(s);
2749f464c52Smaya}
2759f464c52Smaya
2767ec681f3Smrgstatic bool
2777ec681f3Smrglima_fs_compile_shader(struct lima_context *ctx,
2787ec681f3Smrg                       struct lima_fs_key *key,
2797ec681f3Smrg                       struct lima_fs_uncompiled_shader *ufs,
2807ec681f3Smrg                       struct lima_fs_compiled_shader *fs)
2817ec681f3Smrg{
2827ec681f3Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
2837ec681f3Smrg   nir_shader *nir = nir_shader_clone(fs, ufs->base.ir.nir);
2847ec681f3Smrg
2857ec681f3Smrg   struct nir_lower_tex_options tex_options = {
2867ec681f3Smrg      .lower_txp = ~0u,
2877ec681f3Smrg      .swizzle_result = ~0u,
2887ec681f3Smrg   };
2897ec681f3Smrg
2907ec681f3Smrg   for (int i = 0; i < ARRAY_SIZE(key->tex); i++) {
2917ec681f3Smrg      for (int j = 0; j < 4; j++)
2927ec681f3Smrg         tex_options.swizzles[i][j] = key->tex[i].swizzle[j];
2937ec681f3Smrg   }
2947ec681f3Smrg
2957ec681f3Smrg   lima_program_optimize_fs_nir(nir, &tex_options);
2967ec681f3Smrg
2977ec681f3Smrg   if (lima_debug & LIMA_DEBUG_PP)
2987ec681f3Smrg      nir_print_shader(nir, stdout);
2997ec681f3Smrg
3007ec681f3Smrg   if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->debug)) {
3017ec681f3Smrg      ralloc_free(nir);
3027ec681f3Smrg      return false;
3037ec681f3Smrg   }
3047ec681f3Smrg
3057ec681f3Smrg   fs->state.uses_discard = nir->info.fs.uses_discard;
3067ec681f3Smrg   ralloc_free(nir);
3077ec681f3Smrg
3087ec681f3Smrg   return true;
3097ec681f3Smrg}
3107ec681f3Smrg
3117ec681f3Smrgstatic bool
3127ec681f3Smrglima_fs_upload_shader(struct lima_context *ctx,
3137ec681f3Smrg                      struct lima_fs_compiled_shader *fs)
3147ec681f3Smrg{
3157ec681f3Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
3167ec681f3Smrg
3177ec681f3Smrg   fs->bo = lima_bo_create(screen, fs->state.shader_size, 0);
3187ec681f3Smrg   if (!fs->bo) {
3197ec681f3Smrg      fprintf(stderr, "lima: create fs shader bo fail\n");
3207ec681f3Smrg      return false;
3217ec681f3Smrg   }
3227ec681f3Smrg
3237ec681f3Smrg   memcpy(lima_bo_map(fs->bo), fs->shader, fs->state.shader_size);
3247ec681f3Smrg
3257ec681f3Smrg   return true;
3267ec681f3Smrg}
3277ec681f3Smrg
3287ec681f3Smrgstatic struct lima_fs_compiled_shader *
3297ec681f3Smrglima_get_compiled_fs(struct lima_context *ctx,
3307ec681f3Smrg                     struct lima_fs_uncompiled_shader *ufs,
3317ec681f3Smrg                     struct lima_fs_key *key)
3327ec681f3Smrg{
3337ec681f3Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
3347ec681f3Smrg   struct hash_table *ht;
3357ec681f3Smrg   uint32_t key_size;
3367ec681f3Smrg
3377ec681f3Smrg   ht = ctx->fs_cache;
3387ec681f3Smrg   key_size = sizeof(struct lima_fs_key);
3397ec681f3Smrg
3407ec681f3Smrg   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
3417ec681f3Smrg   if (entry)
3427ec681f3Smrg      return entry->data;
3437ec681f3Smrg
3447ec681f3Smrg   /* Not on memory cache, try disk cache */
3457ec681f3Smrg   struct lima_fs_compiled_shader *fs =
3467ec681f3Smrg      lima_fs_disk_cache_retrieve(screen->disk_cache, key);
3477ec681f3Smrg
3487ec681f3Smrg   if (!fs) {
3497ec681f3Smrg      /* Not on disk cache, compile and insert into disk cache*/
3507ec681f3Smrg      fs = rzalloc(NULL, struct lima_fs_compiled_shader);
3517ec681f3Smrg      if (!fs)
3527ec681f3Smrg         return NULL;
3537ec681f3Smrg
3547ec681f3Smrg      if (!lima_fs_compile_shader(ctx, key, ufs, fs))
3557ec681f3Smrg         goto err;
3567ec681f3Smrg
3577ec681f3Smrg      lima_fs_disk_cache_store(screen->disk_cache, key, fs);
3587ec681f3Smrg   }
3597ec681f3Smrg
3607ec681f3Smrg   if (!lima_fs_upload_shader(ctx, fs))
3617ec681f3Smrg      goto err;
3627ec681f3Smrg
3637ec681f3Smrg   ralloc_free(fs->shader);
3647ec681f3Smrg   fs->shader = NULL;
3657ec681f3Smrg
3667ec681f3Smrg   /* Insert into memory cache */
3677ec681f3Smrg   struct lima_key *dup_key;
3687ec681f3Smrg   dup_key = rzalloc_size(fs, key_size);
3697ec681f3Smrg   memcpy(dup_key, key, key_size);
3707ec681f3Smrg   _mesa_hash_table_insert(ht, dup_key, fs);
3717ec681f3Smrg
3727ec681f3Smrg   return fs;
3737ec681f3Smrg
3747ec681f3Smrgerr:
3757ec681f3Smrg   ralloc_free(fs);
3767ec681f3Smrg   return NULL;
3777ec681f3Smrg}
3787ec681f3Smrg
3799f464c52Smayastatic void *
3809f464c52Smayalima_create_fs_state(struct pipe_context *pctx,
3819f464c52Smaya                     const struct pipe_shader_state *cso)
3829f464c52Smaya{
3837ec681f3Smrg   struct lima_context *ctx = lima_context(pctx);
3847ec681f3Smrg   struct lima_fs_uncompiled_shader *so = rzalloc(NULL, struct lima_fs_uncompiled_shader);
3859f464c52Smaya
3869f464c52Smaya   if (!so)
3879f464c52Smaya      return NULL;
3889f464c52Smaya
3899f464c52Smaya   nir_shader *nir;
3909f464c52Smaya   if (cso->type == PIPE_SHADER_IR_NIR)
3917ec681f3Smrg      /* The backend takes ownership of the NIR shader on state
3927ec681f3Smrg       * creation. */
3939f464c52Smaya      nir = cso->ir.nir;
3949f464c52Smaya   else {
3959f464c52Smaya      assert(cso->type == PIPE_SHADER_IR_TGSI);
3969f464c52Smaya
3977ec681f3Smrg      nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
3989f464c52Smaya   }
3999f464c52Smaya
4007ec681f3Smrg   so->base.type = PIPE_SHADER_IR_NIR;
4017ec681f3Smrg   so->base.ir.nir = nir;
4027ec681f3Smrg
4037ec681f3Smrg   /* Serialize the NIR to a binary blob that we can hash for the disk
4047ec681f3Smrg    * cache.  Drop unnecessary information (like variable names)
4057ec681f3Smrg    * so the serialized NIR is smaller, and also to let us detect more
4067ec681f3Smrg    * isomorphic shaders when hashing, increasing cache hits.
4077ec681f3Smrg    */
4087ec681f3Smrg   struct blob blob;
4097ec681f3Smrg   blob_init(&blob);
4107ec681f3Smrg   nir_serialize(&blob, nir, true);
4117ec681f3Smrg   _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
4127ec681f3Smrg   blob_finish(&blob);
4137ec681f3Smrg
4147ec681f3Smrg   if (lima_debug & LIMA_DEBUG_PRECOMPILE) {
4157ec681f3Smrg      /* Trigger initial compilation with default settings */
4167ec681f3Smrg      struct lima_fs_key key;
4177ec681f3Smrg      memset(&key, 0, sizeof(key));
4187ec681f3Smrg      memcpy(key.nir_sha1, so->nir_sha1, sizeof(so->nir_sha1));
4197ec681f3Smrg      for (int i = 0; i < ARRAY_SIZE(key.tex); i++) {
4207ec681f3Smrg         for (int j = 0; j < 4; j++)
4217ec681f3Smrg            key.tex[i].swizzle[j] = j;
4227ec681f3Smrg      }
4237ec681f3Smrg      lima_get_compiled_fs(ctx, so, &key);
4249f464c52Smaya   }
4259f464c52Smaya
4269f464c52Smaya   return so;
4279f464c52Smaya}
4289f464c52Smaya
4299f464c52Smayastatic void
4309f464c52Smayalima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
4319f464c52Smaya{
4329f464c52Smaya   struct lima_context *ctx = lima_context(pctx);
4339f464c52Smaya
4347ec681f3Smrg   ctx->uncomp_fs = hwcso;
4357ec681f3Smrg   ctx->dirty |= LIMA_CONTEXT_DIRTY_UNCOMPILED_FS;
4369f464c52Smaya}
4379f464c52Smaya
4389f464c52Smayastatic void
4399f464c52Smayalima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
4409f464c52Smaya{
4417ec681f3Smrg   struct lima_context *ctx = lima_context(pctx);
4427ec681f3Smrg   struct lima_fs_uncompiled_shader *so = hwcso;
4439f464c52Smaya
4447ec681f3Smrg   hash_table_foreach(ctx->fs_cache, entry) {
4457ec681f3Smrg      const struct lima_fs_key *key = entry->key;
4467ec681f3Smrg      if (!memcmp(key->nir_sha1, so->nir_sha1, sizeof(so->nir_sha1))) {
4477ec681f3Smrg         struct lima_fs_compiled_shader *fs = entry->data;
4487ec681f3Smrg         _mesa_hash_table_remove(ctx->fs_cache, entry);
4497ec681f3Smrg         if (fs->bo)
4507ec681f3Smrg            lima_bo_unreference(fs->bo);
4519f464c52Smaya
4527ec681f3Smrg         if (fs == ctx->fs)
4537ec681f3Smrg            ctx->fs = NULL;
4547ec681f3Smrg
4557ec681f3Smrg         ralloc_free(fs);
4567ec681f3Smrg      }
4577ec681f3Smrg   }
4587ec681f3Smrg
4597ec681f3Smrg   ralloc_free(so->base.ir.nir);
4609f464c52Smaya   ralloc_free(so);
4619f464c52Smaya}
4629f464c52Smaya
4637ec681f3Smrgstatic bool
4647ec681f3Smrglima_vs_compile_shader(struct lima_context *ctx,
4657ec681f3Smrg                       struct lima_vs_key *key,
4667ec681f3Smrg                       struct lima_vs_uncompiled_shader *uvs,
4677ec681f3Smrg                       struct lima_vs_compiled_shader *vs)
4687ec681f3Smrg{
4697ec681f3Smrg   nir_shader *nir = nir_shader_clone(vs, uvs->base.ir.nir);
4707ec681f3Smrg
4717ec681f3Smrg   lima_program_optimize_vs_nir(nir);
4727ec681f3Smrg
4737ec681f3Smrg   if (lima_debug & LIMA_DEBUG_GP)
4747ec681f3Smrg      nir_print_shader(nir, stdout);
4757ec681f3Smrg
4767ec681f3Smrg   if (!gpir_compile_nir(vs, nir, &ctx->debug)) {
4777ec681f3Smrg      ralloc_free(nir);
4787ec681f3Smrg      return false;
4797ec681f3Smrg   }
4807ec681f3Smrg
4817ec681f3Smrg   ralloc_free(nir);
4827ec681f3Smrg
4837ec681f3Smrg   return true;
4847ec681f3Smrg}
4857ec681f3Smrg
4867ec681f3Smrgstatic bool
4877ec681f3Smrglima_vs_upload_shader(struct lima_context *ctx,
4887ec681f3Smrg                      struct lima_vs_compiled_shader *vs)
4899f464c52Smaya{
4907ec681f3Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
4917ec681f3Smrg   vs->bo = lima_bo_create(screen, vs->state.shader_size, 0);
4929f464c52Smaya   if (!vs->bo) {
4937ec681f3Smrg      fprintf(stderr, "lima: create vs shader bo fail\n");
4947ec681f3Smrg      return false;
4957ec681f3Smrg   }
4969f464c52Smaya
4977ec681f3Smrg   memcpy(lima_bo_map(vs->bo), vs->shader, vs->state.shader_size);
4987ec681f3Smrg
4997ec681f3Smrg   return true;
5007ec681f3Smrg}
5017ec681f3Smrg
5027ec681f3Smrgstatic struct lima_vs_compiled_shader *
5037ec681f3Smrglima_get_compiled_vs(struct lima_context *ctx,
5047ec681f3Smrg                     struct lima_vs_uncompiled_shader *uvs,
5057ec681f3Smrg                     struct lima_vs_key *key)
5067ec681f3Smrg{
5077ec681f3Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
5087ec681f3Smrg   struct hash_table *ht;
5097ec681f3Smrg   uint32_t key_size;
5107ec681f3Smrg
5117ec681f3Smrg   ht = ctx->vs_cache;
5127ec681f3Smrg   key_size = sizeof(struct lima_vs_key);
5137ec681f3Smrg
5147ec681f3Smrg   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
5157ec681f3Smrg   if (entry)
5167ec681f3Smrg      return entry->data;
5177ec681f3Smrg
5187ec681f3Smrg   /* Not on memory cache, try disk cache */
5197ec681f3Smrg   struct lima_vs_compiled_shader *vs =
5207ec681f3Smrg      lima_vs_disk_cache_retrieve(screen->disk_cache, key);
5217ec681f3Smrg
5227ec681f3Smrg   if (!vs) {
5237ec681f3Smrg      /* Not on disk cache, compile and insert into disk cache */
5247ec681f3Smrg      vs = rzalloc(NULL, struct lima_vs_compiled_shader);
5257ec681f3Smrg      if (!vs)
5267ec681f3Smrg         return NULL;
5277ec681f3Smrg      if (!lima_vs_compile_shader(ctx, key, uvs, vs))
5287ec681f3Smrg         goto err;
5297ec681f3Smrg
5307ec681f3Smrg      lima_vs_disk_cache_store(screen->disk_cache, key, vs);
5319f464c52Smaya   }
5329f464c52Smaya
5337ec681f3Smrg   if (!lima_vs_upload_shader(ctx, vs))
5347ec681f3Smrg      goto err;
5357ec681f3Smrg
5367ec681f3Smrg   ralloc_free(vs->shader);
5377ec681f3Smrg   vs->shader = NULL;
5387ec681f3Smrg
5397ec681f3Smrg   struct lima_key *dup_key;
5407ec681f3Smrg   dup_key = rzalloc_size(vs, key_size);
5417ec681f3Smrg   memcpy(dup_key, key, key_size);
5427ec681f3Smrg   _mesa_hash_table_insert(ht, dup_key, vs);
5437ec681f3Smrg
5447ec681f3Smrg   return vs;
5457ec681f3Smrg
5467ec681f3Smrgerr:
5477ec681f3Smrg   ralloc_free(vs);
5487ec681f3Smrg   return NULL;
5497ec681f3Smrg}
5507ec681f3Smrg
5517ec681f3Smrgbool
5527ec681f3Smrglima_update_vs_state(struct lima_context *ctx)
5537ec681f3Smrg{
5547ec681f3Smrg   if (!(ctx->dirty & LIMA_CONTEXT_DIRTY_UNCOMPILED_VS)) {
5557ec681f3Smrg      return true;
5567ec681f3Smrg   }
5577ec681f3Smrg
5587ec681f3Smrg   struct lima_vs_key local_key;
5597ec681f3Smrg   struct lima_vs_key *key = &local_key;
5607ec681f3Smrg   memset(key, 0, sizeof(*key));
5617ec681f3Smrg   memcpy(key->nir_sha1, ctx->uncomp_vs->nir_sha1,
5627ec681f3Smrg          sizeof(ctx->uncomp_vs->nir_sha1));
5637ec681f3Smrg
5647ec681f3Smrg   struct lima_vs_compiled_shader *old_vs = ctx->vs;
5657ec681f3Smrg   struct lima_vs_compiled_shader *vs = lima_get_compiled_vs(ctx,
5667ec681f3Smrg                                                             ctx->uncomp_vs,
5677ec681f3Smrg                                                             key);
5687ec681f3Smrg   if (!vs)
5697ec681f3Smrg      return false;
5707ec681f3Smrg
5717ec681f3Smrg   ctx->vs = vs;
5727ec681f3Smrg
5737ec681f3Smrg   if (ctx->vs != old_vs)
5747ec681f3Smrg      ctx->dirty |= LIMA_CONTEXT_DIRTY_COMPILED_VS;
5757ec681f3Smrg
5769f464c52Smaya   return true;
5779f464c52Smaya}
5789f464c52Smaya
5799f464c52Smayabool
5809f464c52Smayalima_update_fs_state(struct lima_context *ctx)
5819f464c52Smaya{
5827ec681f3Smrg   if (!(ctx->dirty & (LIMA_CONTEXT_DIRTY_UNCOMPILED_FS |
5837ec681f3Smrg                       LIMA_CONTEXT_DIRTY_TEXTURES))) {
5847ec681f3Smrg      return true;
5857ec681f3Smrg   }
5869f464c52Smaya
5877ec681f3Smrg   struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
5887ec681f3Smrg   struct lima_fs_key local_key;
5897ec681f3Smrg   struct lima_fs_key *key = &local_key;
5907ec681f3Smrg   memset(key, 0, sizeof(*key));
5917ec681f3Smrg   memcpy(key->nir_sha1, ctx->uncomp_fs->nir_sha1,
5927ec681f3Smrg          sizeof(ctx->uncomp_fs->nir_sha1));
5937ec681f3Smrg
5947ec681f3Smrg   for (int i = 0; i < lima_tex->num_textures; i++) {
5957ec681f3Smrg      struct lima_sampler_view *sampler = lima_sampler_view(lima_tex->textures[i]);
5967ec681f3Smrg      for (int j = 0; j < 4; j++)
5977ec681f3Smrg         key->tex[i].swizzle[j] = sampler->swizzle[j];
5989f464c52Smaya   }
5999f464c52Smaya
6007ec681f3Smrg   /* Fill rest with identity swizzle */
6017ec681f3Smrg   uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
6027ec681f3Smrg                           PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
6037ec681f3Smrg   for (int i = lima_tex->num_textures; i < ARRAY_SIZE(key->tex); i++)
6047ec681f3Smrg      memcpy(key->tex[i].swizzle, identity, 4);
6057ec681f3Smrg
6067ec681f3Smrg   struct lima_fs_compiled_shader *old_fs = ctx->fs;
6077ec681f3Smrg
6087ec681f3Smrg   struct lima_fs_compiled_shader *fs = lima_get_compiled_fs(ctx,
6097ec681f3Smrg                                                             ctx->uncomp_fs,
6107ec681f3Smrg                                                             key);
6117ec681f3Smrg   if (!fs)
6127ec681f3Smrg      return false;
6137ec681f3Smrg
6147ec681f3Smrg   ctx->fs = fs;
6157ec681f3Smrg
6167ec681f3Smrg   if (ctx->fs != old_fs)
6177ec681f3Smrg      ctx->dirty |= LIMA_CONTEXT_DIRTY_COMPILED_FS;
6187ec681f3Smrg
6199f464c52Smaya   return true;
6209f464c52Smaya}
6219f464c52Smaya
6229f464c52Smayastatic void *
6239f464c52Smayalima_create_vs_state(struct pipe_context *pctx,
6249f464c52Smaya                     const struct pipe_shader_state *cso)
6259f464c52Smaya{
6267ec681f3Smrg   struct lima_context *ctx = lima_context(pctx);
6277ec681f3Smrg   struct lima_vs_uncompiled_shader *so = rzalloc(NULL, struct lima_vs_uncompiled_shader);
6289f464c52Smaya
6299f464c52Smaya   if (!so)
6309f464c52Smaya      return NULL;
6319f464c52Smaya
6329f464c52Smaya   nir_shader *nir;
6339f464c52Smaya   if (cso->type == PIPE_SHADER_IR_NIR)
6347ec681f3Smrg      /* The backend takes ownership of the NIR shader on state
6357ec681f3Smrg       * creation. */
6369f464c52Smaya      nir = cso->ir.nir;
6379f464c52Smaya   else {
6389f464c52Smaya      assert(cso->type == PIPE_SHADER_IR_TGSI);
6399f464c52Smaya
6407ec681f3Smrg      nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
6419f464c52Smaya   }
6429f464c52Smaya
6437ec681f3Smrg   so->base.type = PIPE_SHADER_IR_NIR;
6447ec681f3Smrg   so->base.ir.nir = nir;
6457ec681f3Smrg
6467ec681f3Smrg   /* Serialize the NIR to a binary blob that we can hash for the disk
6477ec681f3Smrg    * cache.  Drop unnecessary information (like variable names)
6487ec681f3Smrg    * so the serialized NIR is smaller, and also to let us detect more
6497ec681f3Smrg    * isomorphic shaders when hashing, increasing cache hits.
6507ec681f3Smrg    */
6517ec681f3Smrg   struct blob blob;
6527ec681f3Smrg   blob_init(&blob);
6537ec681f3Smrg   nir_serialize(&blob, nir, true);
6547ec681f3Smrg   _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
6557ec681f3Smrg   blob_finish(&blob);
6567ec681f3Smrg
6577ec681f3Smrg   if (lima_debug & LIMA_DEBUG_PRECOMPILE) {
6587ec681f3Smrg      /* Trigger initial compilation with default settings */
6597ec681f3Smrg      struct lima_vs_key key;
6607ec681f3Smrg      memset(&key, 0, sizeof(key));
6617ec681f3Smrg      memcpy(key.nir_sha1, so->nir_sha1, sizeof(so->nir_sha1));
6627ec681f3Smrg      lima_get_compiled_vs(ctx, so, &key);
6639f464c52Smaya   }
6649f464c52Smaya
6659f464c52Smaya   return so;
6669f464c52Smaya}
6679f464c52Smaya
6689f464c52Smayastatic void
6699f464c52Smayalima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
6709f464c52Smaya{
6719f464c52Smaya   struct lima_context *ctx = lima_context(pctx);
6729f464c52Smaya
6737ec681f3Smrg   ctx->uncomp_vs = hwcso;
6747ec681f3Smrg   ctx->dirty |= LIMA_CONTEXT_DIRTY_UNCOMPILED_VS;
6759f464c52Smaya}
6769f464c52Smaya
6779f464c52Smayastatic void
6789f464c52Smayalima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
6799f464c52Smaya{
6807ec681f3Smrg   struct lima_context *ctx = lima_context(pctx);
6817ec681f3Smrg   struct lima_vs_uncompiled_shader *so = hwcso;
6827ec681f3Smrg
6837ec681f3Smrg   hash_table_foreach(ctx->vs_cache, entry) {
6847ec681f3Smrg      const struct lima_vs_key *key = entry->key;
6857ec681f3Smrg      if (!memcmp(key->nir_sha1, so->nir_sha1, sizeof(so->nir_sha1))) {
6867ec681f3Smrg         struct lima_vs_compiled_shader *vs = entry->data;
6877ec681f3Smrg         _mesa_hash_table_remove(ctx->vs_cache, entry);
6887ec681f3Smrg         if (vs->bo)
6897ec681f3Smrg            lima_bo_unreference(vs->bo);
6909f464c52Smaya
6917ec681f3Smrg         if (vs == ctx->vs)
6927ec681f3Smrg            ctx->vs = NULL;
6937ec681f3Smrg
6947ec681f3Smrg         ralloc_free(vs);
6957ec681f3Smrg      }
6967ec681f3Smrg   }
6979f464c52Smaya
6987ec681f3Smrg   ralloc_free(so->base.ir.nir);
6999f464c52Smaya   ralloc_free(so);
7009f464c52Smaya}
7019f464c52Smaya
7027ec681f3Smrgstatic uint32_t
7037ec681f3Smrglima_fs_cache_hash(const void *key)
7047ec681f3Smrg{
7057ec681f3Smrg   return _mesa_hash_data(key, sizeof(struct lima_fs_key));
7067ec681f3Smrg}
7077ec681f3Smrg
7087ec681f3Smrgstatic uint32_t
7097ec681f3Smrglima_vs_cache_hash(const void *key)
7107ec681f3Smrg{
7117ec681f3Smrg   return _mesa_hash_data(key, sizeof(struct lima_vs_key));
7127ec681f3Smrg}
7137ec681f3Smrg
7147ec681f3Smrgstatic bool
7157ec681f3Smrglima_fs_cache_compare(const void *key1, const void *key2)
7167ec681f3Smrg{
7177ec681f3Smrg   return memcmp(key1, key2, sizeof(struct lima_fs_key)) == 0;
7187ec681f3Smrg}
7197ec681f3Smrg
7207ec681f3Smrgstatic bool
7217ec681f3Smrglima_vs_cache_compare(const void *key1, const void *key2)
7227ec681f3Smrg{
7237ec681f3Smrg   return memcmp(key1, key2, sizeof(struct lima_vs_key)) == 0;
7247ec681f3Smrg}
7257ec681f3Smrg
7269f464c52Smayavoid
7279f464c52Smayalima_program_init(struct lima_context *ctx)
7289f464c52Smaya{
7299f464c52Smaya   ctx->base.create_fs_state = lima_create_fs_state;
7309f464c52Smaya   ctx->base.bind_fs_state = lima_bind_fs_state;
7319f464c52Smaya   ctx->base.delete_fs_state = lima_delete_fs_state;
7329f464c52Smaya
7339f464c52Smaya   ctx->base.create_vs_state = lima_create_vs_state;
7349f464c52Smaya   ctx->base.bind_vs_state = lima_bind_vs_state;
7359f464c52Smaya   ctx->base.delete_vs_state = lima_delete_vs_state;
7367ec681f3Smrg
7377ec681f3Smrg   ctx->fs_cache = _mesa_hash_table_create(ctx, lima_fs_cache_hash,
7387ec681f3Smrg                                           lima_fs_cache_compare);
7397ec681f3Smrg   ctx->vs_cache = _mesa_hash_table_create(ctx, lima_vs_cache_hash,
7407ec681f3Smrg                                           lima_vs_cache_compare);
7417ec681f3Smrg}
7427ec681f3Smrg
7437ec681f3Smrgvoid
7447ec681f3Smrglima_program_fini(struct lima_context *ctx)
7457ec681f3Smrg{
7467ec681f3Smrg   hash_table_foreach(ctx->vs_cache, entry) {
7477ec681f3Smrg      struct lima_vs_compiled_shader *vs = entry->data;
7487ec681f3Smrg      if (vs->bo)
7497ec681f3Smrg         lima_bo_unreference(vs->bo);
7507ec681f3Smrg      ralloc_free(vs);
7517ec681f3Smrg      _mesa_hash_table_remove(ctx->vs_cache, entry);
7527ec681f3Smrg   }
7537ec681f3Smrg
7547ec681f3Smrg   hash_table_foreach(ctx->fs_cache, entry) {
7557ec681f3Smrg      struct lima_fs_compiled_shader *fs = entry->data;
7567ec681f3Smrg      if (fs->bo)
7577ec681f3Smrg         lima_bo_unreference(fs->bo);
7587ec681f3Smrg      ralloc_free(fs);
7597ec681f3Smrg      _mesa_hash_table_remove(ctx->fs_cache, entry);
7607ec681f3Smrg   }
7619f464c52Smaya}
762