19f464c52Smaya/* 29f464c52Smaya * Copyright (c) 2017-2019 Lima Project 39f464c52Smaya * 49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a 59f464c52Smaya * copy of this software and associated documentation files (the "Software"), 69f464c52Smaya * to deal in the Software without restriction, including without limitation 79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sub license, 89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the 99f464c52Smaya * Software is furnished to do so, subject to the following conditions: 109f464c52Smaya * 119f464c52Smaya * The above copyright notice and this permission notice (including the 129f464c52Smaya * next paragraph) shall be included in all copies or substantial portions 139f464c52Smaya * of the Software. 149f464c52Smaya * 159f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 169f464c52Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 179f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 189f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 199f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 209f464c52Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 219f464c52Smaya * DEALINGS IN THE SOFTWARE. 229f464c52Smaya * 239f464c52Smaya */ 249f464c52Smaya 259f464c52Smaya#include "util/u_memory.h" 269f464c52Smaya#include "util/ralloc.h" 279f464c52Smaya#include "util/u_debug.h" 289f464c52Smaya 299f464c52Smaya#include "tgsi/tgsi_dump.h" 309f464c52Smaya#include "compiler/nir/nir.h" 317ec681f3Smrg#include "compiler/nir/nir_serialize.h" 329f464c52Smaya#include "nir/tgsi_to_nir.h" 339f464c52Smaya 349f464c52Smaya#include "pipe/p_state.h" 359f464c52Smaya 369f464c52Smaya#include "lima_screen.h" 379f464c52Smaya#include "lima_context.h" 387ec681f3Smrg#include "lima_job.h" 399f464c52Smaya#include "lima_program.h" 409f464c52Smaya#include "lima_bo.h" 417ec681f3Smrg#include "lima_disk_cache.h" 427ec681f3Smrg 439f464c52Smaya#include "ir/lima_ir.h" 449f464c52Smaya 459f464c52Smayastatic const nir_shader_compiler_options vs_nir_options = { 467ec681f3Smrg .lower_ffma16 = true, 477ec681f3Smrg .lower_ffma32 = true, 487ec681f3Smrg .lower_ffma64 = true, 499f464c52Smaya .lower_fpow = true, 509f464c52Smaya .lower_ffract = true, 519f464c52Smaya .lower_fdiv = true, 527ec681f3Smrg .lower_fmod = true, 539f464c52Smaya .lower_fsqrt = true, 549f464c52Smaya .lower_flrp32 = true, 559f464c52Smaya .lower_flrp64 = true, 569f464c52Smaya /* could be implemented by clamp */ 579f464c52Smaya .lower_fsat = true, 587ec681f3Smrg .lower_bitops = true, 597ec681f3Smrg .lower_rotate = true, 607ec681f3Smrg .lower_sincos = true, 617ec681f3Smrg .lower_fceil = true, 627ec681f3Smrg .lower_insert_byte = true, 637ec681f3Smrg .lower_insert_word = true, 647ec681f3Smrg .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), 659f464c52Smaya}; 669f464c52Smaya 679f464c52Smayastatic const nir_shader_compiler_options fs_nir_options = { 687ec681f3Smrg .lower_ffma16 = true, 697ec681f3Smrg .lower_ffma32 = true, 707ec681f3Smrg .lower_ffma64 = true, 719f464c52Smaya .lower_fpow = true, 729f464c52Smaya .lower_fdiv = true, 737ec681f3Smrg .lower_fmod = true, 749f464c52Smaya .lower_flrp32 = true, 759f464c52Smaya .lower_flrp64 = true, 769f464c52Smaya .lower_fsign = true, 777ec681f3Smrg .lower_rotate = true, 787ec681f3Smrg .lower_fdot = true, 797ec681f3Smrg .lower_fdph = true, 807ec681f3Smrg .lower_insert_byte = true, 817ec681f3Smrg .lower_insert_word = true, 827ec681f3Smrg .lower_bitops = true, 837ec681f3Smrg .lower_vector_cmp = true, 847ec681f3Smrg .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), 859f464c52Smaya}; 869f464c52Smaya 879f464c52Smayaconst void * 889f464c52Smayalima_program_get_compiler_options(enum pipe_shader_type shader) 899f464c52Smaya{ 909f464c52Smaya switch (shader) { 919f464c52Smaya case PIPE_SHADER_VERTEX: 929f464c52Smaya return &vs_nir_options; 939f464c52Smaya case PIPE_SHADER_FRAGMENT: 949f464c52Smaya return &fs_nir_options; 959f464c52Smaya default: 969f464c52Smaya return NULL; 979f464c52Smaya } 989f464c52Smaya} 999f464c52Smaya 1009f464c52Smayastatic int 1019f464c52Smayatype_size(const struct glsl_type *type, bool bindless) 1029f464c52Smaya{ 1039f464c52Smaya return glsl_count_attribute_slots(type, false); 1049f464c52Smaya} 1059f464c52Smaya 1067ec681f3Smrgvoid 1079f464c52Smayalima_program_optimize_vs_nir(struct nir_shader *s) 1089f464c52Smaya{ 1099f464c52Smaya bool progress; 1109f464c52Smaya 1117ec681f3Smrg NIR_PASS_V(s, nir_lower_viewport_transform); 1127ec681f3Smrg NIR_PASS_V(s, nir_lower_point_size, 1.0f, 100.0f); 1137ec681f3Smrg NIR_PASS_V(s, nir_lower_io, 1147ec681f3Smrg nir_var_shader_in | nir_var_shader_out, type_size, 0); 1159f464c52Smaya NIR_PASS_V(s, nir_lower_load_const_to_scalar); 1169f464c52Smaya NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar); 1179f464c52Smaya NIR_PASS_V(s, nir_lower_io_to_scalar, 1189f464c52Smaya nir_var_shader_in|nir_var_shader_out); 1199f464c52Smaya 1209f464c52Smaya do { 1219f464c52Smaya progress = false; 1229f464c52Smaya 1239f464c52Smaya NIR_PASS_V(s, nir_lower_vars_to_ssa); 1247ec681f3Smrg NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); 1257ec681f3Smrg NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); 1269f464c52Smaya NIR_PASS(progress, s, nir_copy_prop); 1279f464c52Smaya NIR_PASS(progress, s, nir_opt_remove_phis); 1289f464c52Smaya NIR_PASS(progress, s, nir_opt_dce); 1299f464c52Smaya NIR_PASS(progress, s, nir_opt_dead_cf); 1309f464c52Smaya NIR_PASS(progress, s, nir_opt_cse); 1319f464c52Smaya NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); 1329f464c52Smaya NIR_PASS(progress, s, nir_opt_algebraic); 1337ec681f3Smrg NIR_PASS(progress, s, lima_nir_lower_ftrunc); 1349f464c52Smaya NIR_PASS(progress, s, nir_opt_constant_folding); 1359f464c52Smaya NIR_PASS(progress, s, nir_opt_undef); 1367ec681f3Smrg NIR_PASS(progress, s, nir_opt_loop_unroll); 1379f464c52Smaya } while (progress); 1389f464c52Smaya 1397ec681f3Smrg NIR_PASS_V(s, nir_lower_int_to_float); 1407ec681f3Smrg /* int_to_float pass generates ftrunc, so lower it */ 1417ec681f3Smrg NIR_PASS(progress, s, lima_nir_lower_ftrunc); 1427ec681f3Smrg NIR_PASS_V(s, nir_lower_bool_to_float); 1437ec681f3Smrg 1447ec681f3Smrg NIR_PASS_V(s, nir_copy_prop); 1457ec681f3Smrg NIR_PASS_V(s, nir_opt_dce); 1467ec681f3Smrg NIR_PASS_V(s, lima_nir_split_loads); 1479f464c52Smaya NIR_PASS_V(s, nir_lower_locals_to_regs); 1489f464c52Smaya NIR_PASS_V(s, nir_convert_from_ssa, true); 1497ec681f3Smrg NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); 1509f464c52Smaya nir_sweep(s); 1519f464c52Smaya} 1529f464c52Smaya 1537ec681f3Smrgstatic bool 1547ec681f3Smrglima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) 1557ec681f3Smrg{ 1567ec681f3Smrg if (instr->type != nir_instr_type_alu) 1577ec681f3Smrg return false; 1587ec681f3Smrg 1597ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 1607ec681f3Smrg switch (alu->op) { 1617ec681f3Smrg case nir_op_frcp: 1627ec681f3Smrg case nir_op_frsq: 1637ec681f3Smrg case nir_op_flog2: 1647ec681f3Smrg case nir_op_fexp2: 1657ec681f3Smrg case nir_op_fsqrt: 1667ec681f3Smrg case nir_op_fsin: 1677ec681f3Smrg case nir_op_fcos: 1687ec681f3Smrg return true; 1697ec681f3Smrg default: 1707ec681f3Smrg break; 1717ec681f3Smrg } 1727ec681f3Smrg 1737ec681f3Smrg /* nir vec4 fcsel assumes that each component of the condition will be 1747ec681f3Smrg * used to select the same component from the two options, but Utgard PP 1757ec681f3Smrg * has only 1 component condition. If all condition components are not the 1767ec681f3Smrg * same we need to lower it to scalar. 1777ec681f3Smrg */ 1787ec681f3Smrg switch (alu->op) { 1797ec681f3Smrg case nir_op_bcsel: 1807ec681f3Smrg case nir_op_fcsel: 1817ec681f3Smrg break; 1827ec681f3Smrg default: 1837ec681f3Smrg return false; 1847ec681f3Smrg } 1857ec681f3Smrg 1867ec681f3Smrg int num_components = nir_dest_num_components(alu->dest.dest); 1877ec681f3Smrg 1887ec681f3Smrg uint8_t swizzle = alu->src[0].swizzle[0]; 1897ec681f3Smrg 1907ec681f3Smrg for (int i = 1; i < num_components; i++) 1917ec681f3Smrg if (alu->src[0].swizzle[i] != swizzle) 1927ec681f3Smrg return true; 1937ec681f3Smrg 1947ec681f3Smrg return false; 1957ec681f3Smrg} 1967ec681f3Smrg 1977ec681f3Smrgstatic bool 1987ec681f3Smrglima_vec_to_movs_filter_cb(const nir_instr *instr, unsigned writemask, 1997ec681f3Smrg const void *data) 2007ec681f3Smrg{ 2017ec681f3Smrg assert(writemask > 0); 2027ec681f3Smrg if (util_bitcount(writemask) == 1) 2037ec681f3Smrg return true; 2047ec681f3Smrg 2057ec681f3Smrg return !lima_alu_to_scalar_filter_cb(instr, data); 2067ec681f3Smrg} 2077ec681f3Smrg 2087ec681f3Smrgvoid 2097ec681f3Smrglima_program_optimize_fs_nir(struct nir_shader *s, 2107ec681f3Smrg struct nir_lower_tex_options *tex_options) 2119f464c52Smaya{ 2129f464c52Smaya bool progress; 2139f464c52Smaya 2149f464c52Smaya NIR_PASS_V(s, nir_lower_fragcoord_wtrans); 2157ec681f3Smrg NIR_PASS_V(s, nir_lower_io, 2167ec681f3Smrg nir_var_shader_in | nir_var_shader_out, type_size, 0); 2179f464c52Smaya NIR_PASS_V(s, nir_lower_regs_to_ssa); 2187ec681f3Smrg NIR_PASS_V(s, nir_lower_tex, tex_options); 2197ec681f3Smrg 2207ec681f3Smrg do { 2217ec681f3Smrg progress = false; 2227ec681f3Smrg NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL); 2237ec681f3Smrg } while (progress); 2249f464c52Smaya 2259f464c52Smaya do { 2269f464c52Smaya progress = false; 2279f464c52Smaya 2289f464c52Smaya NIR_PASS_V(s, nir_lower_vars_to_ssa); 2297ec681f3Smrg NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL); 2309f464c52Smaya NIR_PASS(progress, s, nir_copy_prop); 2319f464c52Smaya NIR_PASS(progress, s, nir_opt_remove_phis); 2329f464c52Smaya NIR_PASS(progress, s, nir_opt_dce); 2339f464c52Smaya NIR_PASS(progress, s, nir_opt_dead_cf); 2349f464c52Smaya NIR_PASS(progress, s, nir_opt_cse); 2359f464c52Smaya NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); 2369f464c52Smaya NIR_PASS(progress, s, nir_opt_algebraic); 2379f464c52Smaya NIR_PASS(progress, s, nir_opt_constant_folding); 2389f464c52Smaya NIR_PASS(progress, s, nir_opt_undef); 2397ec681f3Smrg NIR_PASS(progress, s, nir_opt_loop_unroll); 2407ec681f3Smrg NIR_PASS(progress, s, lima_nir_split_load_input); 2419f464c52Smaya } while (progress); 2429f464c52Smaya 2437ec681f3Smrg NIR_PASS_V(s, nir_lower_int_to_float); 2447ec681f3Smrg NIR_PASS_V(s, nir_lower_bool_to_float); 2457ec681f3Smrg 2467ec681f3Smrg /* Some ops must be lowered after being converted from int ops, 2477ec681f3Smrg * so re-run nir_opt_algebraic after int lowering. */ 2487ec681f3Smrg do { 2497ec681f3Smrg progress = false; 2507ec681f3Smrg NIR_PASS(progress, s, nir_opt_algebraic); 2517ec681f3Smrg } while (progress); 2527ec681f3Smrg 2537ec681f3Smrg /* Must be run after optimization loop */ 2547ec681f3Smrg NIR_PASS_V(s, lima_nir_scale_trig); 2557ec681f3Smrg 2569f464c52Smaya /* Lower modifiers */ 2579f464c52Smaya NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods); 2589f464c52Smaya NIR_PASS_V(s, nir_copy_prop); 2599f464c52Smaya NIR_PASS_V(s, nir_opt_dce); 2609f464c52Smaya 2619f464c52Smaya NIR_PASS_V(s, nir_lower_locals_to_regs); 2629f464c52Smaya NIR_PASS_V(s, nir_convert_from_ssa, true); 2637ec681f3Smrg NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); 2649f464c52Smaya 2659f464c52Smaya NIR_PASS_V(s, nir_move_vec_src_uses_to_dest); 2667ec681f3Smrg NIR_PASS_V(s, nir_lower_vec_to_movs, lima_vec_to_movs_filter_cb, NULL); 2677ec681f3Smrg NIR_PASS_V(s, nir_opt_dce); /* clean up any new dead code from vec to movs */ 2687ec681f3Smrg 2697ec681f3Smrg NIR_PASS_V(s, lima_nir_duplicate_load_uniforms); 2707ec681f3Smrg NIR_PASS_V(s, lima_nir_duplicate_load_inputs); 2717ec681f3Smrg NIR_PASS_V(s, lima_nir_duplicate_load_consts); 2729f464c52Smaya 2739f464c52Smaya nir_sweep(s); 2749f464c52Smaya} 2759f464c52Smaya 2767ec681f3Smrgstatic bool 2777ec681f3Smrglima_fs_compile_shader(struct lima_context *ctx, 2787ec681f3Smrg struct lima_fs_key *key, 2797ec681f3Smrg struct lima_fs_uncompiled_shader *ufs, 2807ec681f3Smrg struct lima_fs_compiled_shader *fs) 2817ec681f3Smrg{ 2827ec681f3Smrg struct lima_screen *screen = lima_screen(ctx->base.screen); 2837ec681f3Smrg nir_shader *nir = nir_shader_clone(fs, ufs->base.ir.nir); 2847ec681f3Smrg 2857ec681f3Smrg struct nir_lower_tex_options tex_options = { 2867ec681f3Smrg .lower_txp = ~0u, 2877ec681f3Smrg .swizzle_result = ~0u, 2887ec681f3Smrg }; 2897ec681f3Smrg 2907ec681f3Smrg for (int i = 0; i < ARRAY_SIZE(key->tex); i++) { 2917ec681f3Smrg for (int j = 0; j < 4; j++) 2927ec681f3Smrg tex_options.swizzles[i][j] = key->tex[i].swizzle[j]; 2937ec681f3Smrg } 2947ec681f3Smrg 2957ec681f3Smrg lima_program_optimize_fs_nir(nir, &tex_options); 2967ec681f3Smrg 2977ec681f3Smrg if (lima_debug & LIMA_DEBUG_PP) 2987ec681f3Smrg nir_print_shader(nir, stdout); 2997ec681f3Smrg 3007ec681f3Smrg if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->debug)) { 3017ec681f3Smrg ralloc_free(nir); 3027ec681f3Smrg return false; 3037ec681f3Smrg } 3047ec681f3Smrg 3057ec681f3Smrg fs->state.uses_discard = nir->info.fs.uses_discard; 3067ec681f3Smrg ralloc_free(nir); 3077ec681f3Smrg 3087ec681f3Smrg return true; 3097ec681f3Smrg} 3107ec681f3Smrg 3117ec681f3Smrgstatic bool 3127ec681f3Smrglima_fs_upload_shader(struct lima_context *ctx, 3137ec681f3Smrg struct lima_fs_compiled_shader *fs) 3147ec681f3Smrg{ 3157ec681f3Smrg struct lima_screen *screen = lima_screen(ctx->base.screen); 3167ec681f3Smrg 3177ec681f3Smrg fs->bo = lima_bo_create(screen, fs->state.shader_size, 0); 3187ec681f3Smrg if (!fs->bo) { 3197ec681f3Smrg fprintf(stderr, "lima: create fs shader bo fail\n"); 3207ec681f3Smrg return false; 3217ec681f3Smrg } 3227ec681f3Smrg 3237ec681f3Smrg memcpy(lima_bo_map(fs->bo), fs->shader, fs->state.shader_size); 3247ec681f3Smrg 3257ec681f3Smrg return true; 3267ec681f3Smrg} 3277ec681f3Smrg 3287ec681f3Smrgstatic struct lima_fs_compiled_shader * 3297ec681f3Smrglima_get_compiled_fs(struct lima_context *ctx, 3307ec681f3Smrg struct lima_fs_uncompiled_shader *ufs, 3317ec681f3Smrg struct lima_fs_key *key) 3327ec681f3Smrg{ 3337ec681f3Smrg struct lima_screen *screen = lima_screen(ctx->base.screen); 3347ec681f3Smrg struct hash_table *ht; 3357ec681f3Smrg uint32_t key_size; 3367ec681f3Smrg 3377ec681f3Smrg ht = ctx->fs_cache; 3387ec681f3Smrg key_size = sizeof(struct lima_fs_key); 3397ec681f3Smrg 3407ec681f3Smrg struct hash_entry *entry = _mesa_hash_table_search(ht, key); 3417ec681f3Smrg if (entry) 3427ec681f3Smrg return entry->data; 3437ec681f3Smrg 3447ec681f3Smrg /* Not on memory cache, try disk cache */ 3457ec681f3Smrg struct lima_fs_compiled_shader *fs = 3467ec681f3Smrg lima_fs_disk_cache_retrieve(screen->disk_cache, key); 3477ec681f3Smrg 3487ec681f3Smrg if (!fs) { 3497ec681f3Smrg /* Not on disk cache, compile and insert into disk cache*/ 3507ec681f3Smrg fs = rzalloc(NULL, struct lima_fs_compiled_shader); 3517ec681f3Smrg if (!fs) 3527ec681f3Smrg return NULL; 3537ec681f3Smrg 3547ec681f3Smrg if (!lima_fs_compile_shader(ctx, key, ufs, fs)) 3557ec681f3Smrg goto err; 3567ec681f3Smrg 3577ec681f3Smrg lima_fs_disk_cache_store(screen->disk_cache, key, fs); 3587ec681f3Smrg } 3597ec681f3Smrg 3607ec681f3Smrg if (!lima_fs_upload_shader(ctx, fs)) 3617ec681f3Smrg goto err; 3627ec681f3Smrg 3637ec681f3Smrg ralloc_free(fs->shader); 3647ec681f3Smrg fs->shader = NULL; 3657ec681f3Smrg 3667ec681f3Smrg /* Insert into memory cache */ 3677ec681f3Smrg struct lima_key *dup_key; 3687ec681f3Smrg dup_key = rzalloc_size(fs, key_size); 3697ec681f3Smrg memcpy(dup_key, key, key_size); 3707ec681f3Smrg _mesa_hash_table_insert(ht, dup_key, fs); 3717ec681f3Smrg 3727ec681f3Smrg return fs; 3737ec681f3Smrg 3747ec681f3Smrgerr: 3757ec681f3Smrg ralloc_free(fs); 3767ec681f3Smrg return NULL; 3777ec681f3Smrg} 3787ec681f3Smrg 3799f464c52Smayastatic void * 3809f464c52Smayalima_create_fs_state(struct pipe_context *pctx, 3819f464c52Smaya const struct pipe_shader_state *cso) 3829f464c52Smaya{ 3837ec681f3Smrg struct lima_context *ctx = lima_context(pctx); 3847ec681f3Smrg struct lima_fs_uncompiled_shader *so = rzalloc(NULL, struct lima_fs_uncompiled_shader); 3859f464c52Smaya 3869f464c52Smaya if (!so) 3879f464c52Smaya return NULL; 3889f464c52Smaya 3899f464c52Smaya nir_shader *nir; 3909f464c52Smaya if (cso->type == PIPE_SHADER_IR_NIR) 3917ec681f3Smrg /* The backend takes ownership of the NIR shader on state 3927ec681f3Smrg * creation. */ 3939f464c52Smaya nir = cso->ir.nir; 3949f464c52Smaya else { 3959f464c52Smaya assert(cso->type == PIPE_SHADER_IR_TGSI); 3969f464c52Smaya 3977ec681f3Smrg nir = tgsi_to_nir(cso->tokens, pctx->screen, false); 3989f464c52Smaya } 3999f464c52Smaya 4007ec681f3Smrg so->base.type = PIPE_SHADER_IR_NIR; 4017ec681f3Smrg so->base.ir.nir = nir; 4027ec681f3Smrg 4037ec681f3Smrg /* Serialize the NIR to a binary blob that we can hash for the disk 4047ec681f3Smrg * cache. Drop unnecessary information (like variable names) 4057ec681f3Smrg * so the serialized NIR is smaller, and also to let us detect more 4067ec681f3Smrg * isomorphic shaders when hashing, increasing cache hits. 4077ec681f3Smrg */ 4087ec681f3Smrg struct blob blob; 4097ec681f3Smrg blob_init(&blob); 4107ec681f3Smrg nir_serialize(&blob, nir, true); 4117ec681f3Smrg _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1); 4127ec681f3Smrg blob_finish(&blob); 4137ec681f3Smrg 4147ec681f3Smrg if (lima_debug & LIMA_DEBUG_PRECOMPILE) { 4157ec681f3Smrg /* Trigger initial compilation with default settings */ 4167ec681f3Smrg struct lima_fs_key key; 4177ec681f3Smrg memset(&key, 0, sizeof(key)); 4187ec681f3Smrg memcpy(key.nir_sha1, so->nir_sha1, sizeof(so->nir_sha1)); 4197ec681f3Smrg for (int i = 0; i < ARRAY_SIZE(key.tex); i++) { 4207ec681f3Smrg for (int j = 0; j < 4; j++) 4217ec681f3Smrg key.tex[i].swizzle[j] = j; 4227ec681f3Smrg } 4237ec681f3Smrg lima_get_compiled_fs(ctx, so, &key); 4249f464c52Smaya } 4259f464c52Smaya 4269f464c52Smaya return so; 4279f464c52Smaya} 4289f464c52Smaya 4299f464c52Smayastatic void 4309f464c52Smayalima_bind_fs_state(struct pipe_context *pctx, void *hwcso) 4319f464c52Smaya{ 4329f464c52Smaya struct lima_context *ctx = lima_context(pctx); 4339f464c52Smaya 4347ec681f3Smrg ctx->uncomp_fs = hwcso; 4357ec681f3Smrg ctx->dirty |= LIMA_CONTEXT_DIRTY_UNCOMPILED_FS; 4369f464c52Smaya} 4379f464c52Smaya 4389f464c52Smayastatic void 4399f464c52Smayalima_delete_fs_state(struct pipe_context *pctx, void *hwcso) 4409f464c52Smaya{ 4417ec681f3Smrg struct lima_context *ctx = lima_context(pctx); 4427ec681f3Smrg struct lima_fs_uncompiled_shader *so = hwcso; 4439f464c52Smaya 4447ec681f3Smrg hash_table_foreach(ctx->fs_cache, entry) { 4457ec681f3Smrg const struct lima_fs_key *key = entry->key; 4467ec681f3Smrg if (!memcmp(key->nir_sha1, so->nir_sha1, sizeof(so->nir_sha1))) { 4477ec681f3Smrg struct lima_fs_compiled_shader *fs = entry->data; 4487ec681f3Smrg _mesa_hash_table_remove(ctx->fs_cache, entry); 4497ec681f3Smrg if (fs->bo) 4507ec681f3Smrg lima_bo_unreference(fs->bo); 4519f464c52Smaya 4527ec681f3Smrg if (fs == ctx->fs) 4537ec681f3Smrg ctx->fs = NULL; 4547ec681f3Smrg 4557ec681f3Smrg ralloc_free(fs); 4567ec681f3Smrg } 4577ec681f3Smrg } 4587ec681f3Smrg 4597ec681f3Smrg ralloc_free(so->base.ir.nir); 4609f464c52Smaya ralloc_free(so); 4619f464c52Smaya} 4629f464c52Smaya 4637ec681f3Smrgstatic bool 4647ec681f3Smrglima_vs_compile_shader(struct lima_context *ctx, 4657ec681f3Smrg struct lima_vs_key *key, 4667ec681f3Smrg struct lima_vs_uncompiled_shader *uvs, 4677ec681f3Smrg struct lima_vs_compiled_shader *vs) 4687ec681f3Smrg{ 4697ec681f3Smrg nir_shader *nir = nir_shader_clone(vs, uvs->base.ir.nir); 4707ec681f3Smrg 4717ec681f3Smrg lima_program_optimize_vs_nir(nir); 4727ec681f3Smrg 4737ec681f3Smrg if (lima_debug & LIMA_DEBUG_GP) 4747ec681f3Smrg nir_print_shader(nir, stdout); 4757ec681f3Smrg 4767ec681f3Smrg if (!gpir_compile_nir(vs, nir, &ctx->debug)) { 4777ec681f3Smrg ralloc_free(nir); 4787ec681f3Smrg return false; 4797ec681f3Smrg } 4807ec681f3Smrg 4817ec681f3Smrg ralloc_free(nir); 4827ec681f3Smrg 4837ec681f3Smrg return true; 4847ec681f3Smrg} 4857ec681f3Smrg 4867ec681f3Smrgstatic bool 4877ec681f3Smrglima_vs_upload_shader(struct lima_context *ctx, 4887ec681f3Smrg struct lima_vs_compiled_shader *vs) 4899f464c52Smaya{ 4907ec681f3Smrg struct lima_screen *screen = lima_screen(ctx->base.screen); 4917ec681f3Smrg vs->bo = lima_bo_create(screen, vs->state.shader_size, 0); 4929f464c52Smaya if (!vs->bo) { 4937ec681f3Smrg fprintf(stderr, "lima: create vs shader bo fail\n"); 4947ec681f3Smrg return false; 4957ec681f3Smrg } 4969f464c52Smaya 4977ec681f3Smrg memcpy(lima_bo_map(vs->bo), vs->shader, vs->state.shader_size); 4987ec681f3Smrg 4997ec681f3Smrg return true; 5007ec681f3Smrg} 5017ec681f3Smrg 5027ec681f3Smrgstatic struct lima_vs_compiled_shader * 5037ec681f3Smrglima_get_compiled_vs(struct lima_context *ctx, 5047ec681f3Smrg struct lima_vs_uncompiled_shader *uvs, 5057ec681f3Smrg struct lima_vs_key *key) 5067ec681f3Smrg{ 5077ec681f3Smrg struct lima_screen *screen = lima_screen(ctx->base.screen); 5087ec681f3Smrg struct hash_table *ht; 5097ec681f3Smrg uint32_t key_size; 5107ec681f3Smrg 5117ec681f3Smrg ht = ctx->vs_cache; 5127ec681f3Smrg key_size = sizeof(struct lima_vs_key); 5137ec681f3Smrg 5147ec681f3Smrg struct hash_entry *entry = _mesa_hash_table_search(ht, key); 5157ec681f3Smrg if (entry) 5167ec681f3Smrg return entry->data; 5177ec681f3Smrg 5187ec681f3Smrg /* Not on memory cache, try disk cache */ 5197ec681f3Smrg struct lima_vs_compiled_shader *vs = 5207ec681f3Smrg lima_vs_disk_cache_retrieve(screen->disk_cache, key); 5217ec681f3Smrg 5227ec681f3Smrg if (!vs) { 5237ec681f3Smrg /* Not on disk cache, compile and insert into disk cache */ 5247ec681f3Smrg vs = rzalloc(NULL, struct lima_vs_compiled_shader); 5257ec681f3Smrg if (!vs) 5267ec681f3Smrg return NULL; 5277ec681f3Smrg if (!lima_vs_compile_shader(ctx, key, uvs, vs)) 5287ec681f3Smrg goto err; 5297ec681f3Smrg 5307ec681f3Smrg lima_vs_disk_cache_store(screen->disk_cache, key, vs); 5319f464c52Smaya } 5329f464c52Smaya 5337ec681f3Smrg if (!lima_vs_upload_shader(ctx, vs)) 5347ec681f3Smrg goto err; 5357ec681f3Smrg 5367ec681f3Smrg ralloc_free(vs->shader); 5377ec681f3Smrg vs->shader = NULL; 5387ec681f3Smrg 5397ec681f3Smrg struct lima_key *dup_key; 5407ec681f3Smrg dup_key = rzalloc_size(vs, key_size); 5417ec681f3Smrg memcpy(dup_key, key, key_size); 5427ec681f3Smrg _mesa_hash_table_insert(ht, dup_key, vs); 5437ec681f3Smrg 5447ec681f3Smrg return vs; 5457ec681f3Smrg 5467ec681f3Smrgerr: 5477ec681f3Smrg ralloc_free(vs); 5487ec681f3Smrg return NULL; 5497ec681f3Smrg} 5507ec681f3Smrg 5517ec681f3Smrgbool 5527ec681f3Smrglima_update_vs_state(struct lima_context *ctx) 5537ec681f3Smrg{ 5547ec681f3Smrg if (!(ctx->dirty & LIMA_CONTEXT_DIRTY_UNCOMPILED_VS)) { 5557ec681f3Smrg return true; 5567ec681f3Smrg } 5577ec681f3Smrg 5587ec681f3Smrg struct lima_vs_key local_key; 5597ec681f3Smrg struct lima_vs_key *key = &local_key; 5607ec681f3Smrg memset(key, 0, sizeof(*key)); 5617ec681f3Smrg memcpy(key->nir_sha1, ctx->uncomp_vs->nir_sha1, 5627ec681f3Smrg sizeof(ctx->uncomp_vs->nir_sha1)); 5637ec681f3Smrg 5647ec681f3Smrg struct lima_vs_compiled_shader *old_vs = ctx->vs; 5657ec681f3Smrg struct lima_vs_compiled_shader *vs = lima_get_compiled_vs(ctx, 5667ec681f3Smrg ctx->uncomp_vs, 5677ec681f3Smrg key); 5687ec681f3Smrg if (!vs) 5697ec681f3Smrg return false; 5707ec681f3Smrg 5717ec681f3Smrg ctx->vs = vs; 5727ec681f3Smrg 5737ec681f3Smrg if (ctx->vs != old_vs) 5747ec681f3Smrg ctx->dirty |= LIMA_CONTEXT_DIRTY_COMPILED_VS; 5757ec681f3Smrg 5769f464c52Smaya return true; 5779f464c52Smaya} 5789f464c52Smaya 5799f464c52Smayabool 5809f464c52Smayalima_update_fs_state(struct lima_context *ctx) 5819f464c52Smaya{ 5827ec681f3Smrg if (!(ctx->dirty & (LIMA_CONTEXT_DIRTY_UNCOMPILED_FS | 5837ec681f3Smrg LIMA_CONTEXT_DIRTY_TEXTURES))) { 5847ec681f3Smrg return true; 5857ec681f3Smrg } 5869f464c52Smaya 5877ec681f3Smrg struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj; 5887ec681f3Smrg struct lima_fs_key local_key; 5897ec681f3Smrg struct lima_fs_key *key = &local_key; 5907ec681f3Smrg memset(key, 0, sizeof(*key)); 5917ec681f3Smrg memcpy(key->nir_sha1, ctx->uncomp_fs->nir_sha1, 5927ec681f3Smrg sizeof(ctx->uncomp_fs->nir_sha1)); 5937ec681f3Smrg 5947ec681f3Smrg for (int i = 0; i < lima_tex->num_textures; i++) { 5957ec681f3Smrg struct lima_sampler_view *sampler = lima_sampler_view(lima_tex->textures[i]); 5967ec681f3Smrg for (int j = 0; j < 4; j++) 5977ec681f3Smrg key->tex[i].swizzle[j] = sampler->swizzle[j]; 5989f464c52Smaya } 5999f464c52Smaya 6007ec681f3Smrg /* Fill rest with identity swizzle */ 6017ec681f3Smrg uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 6027ec681f3Smrg PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }; 6037ec681f3Smrg for (int i = lima_tex->num_textures; i < ARRAY_SIZE(key->tex); i++) 6047ec681f3Smrg memcpy(key->tex[i].swizzle, identity, 4); 6057ec681f3Smrg 6067ec681f3Smrg struct lima_fs_compiled_shader *old_fs = ctx->fs; 6077ec681f3Smrg 6087ec681f3Smrg struct lima_fs_compiled_shader *fs = lima_get_compiled_fs(ctx, 6097ec681f3Smrg ctx->uncomp_fs, 6107ec681f3Smrg key); 6117ec681f3Smrg if (!fs) 6127ec681f3Smrg return false; 6137ec681f3Smrg 6147ec681f3Smrg ctx->fs = fs; 6157ec681f3Smrg 6167ec681f3Smrg if (ctx->fs != old_fs) 6177ec681f3Smrg ctx->dirty |= LIMA_CONTEXT_DIRTY_COMPILED_FS; 6187ec681f3Smrg 6199f464c52Smaya return true; 6209f464c52Smaya} 6219f464c52Smaya 6229f464c52Smayastatic void * 6239f464c52Smayalima_create_vs_state(struct pipe_context *pctx, 6249f464c52Smaya const struct pipe_shader_state *cso) 6259f464c52Smaya{ 6267ec681f3Smrg struct lima_context *ctx = lima_context(pctx); 6277ec681f3Smrg struct lima_vs_uncompiled_shader *so = rzalloc(NULL, struct lima_vs_uncompiled_shader); 6289f464c52Smaya 6299f464c52Smaya if (!so) 6309f464c52Smaya return NULL; 6319f464c52Smaya 6329f464c52Smaya nir_shader *nir; 6339f464c52Smaya if (cso->type == PIPE_SHADER_IR_NIR) 6347ec681f3Smrg /* The backend takes ownership of the NIR shader on state 6357ec681f3Smrg * creation. */ 6369f464c52Smaya nir = cso->ir.nir; 6379f464c52Smaya else { 6389f464c52Smaya assert(cso->type == PIPE_SHADER_IR_TGSI); 6399f464c52Smaya 6407ec681f3Smrg nir = tgsi_to_nir(cso->tokens, pctx->screen, false); 6419f464c52Smaya } 6429f464c52Smaya 6437ec681f3Smrg so->base.type = PIPE_SHADER_IR_NIR; 6447ec681f3Smrg so->base.ir.nir = nir; 6457ec681f3Smrg 6467ec681f3Smrg /* Serialize the NIR to a binary blob that we can hash for the disk 6477ec681f3Smrg * cache. Drop unnecessary information (like variable names) 6487ec681f3Smrg * so the serialized NIR is smaller, and also to let us detect more 6497ec681f3Smrg * isomorphic shaders when hashing, increasing cache hits. 6507ec681f3Smrg */ 6517ec681f3Smrg struct blob blob; 6527ec681f3Smrg blob_init(&blob); 6537ec681f3Smrg nir_serialize(&blob, nir, true); 6547ec681f3Smrg _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1); 6557ec681f3Smrg blob_finish(&blob); 6567ec681f3Smrg 6577ec681f3Smrg if (lima_debug & LIMA_DEBUG_PRECOMPILE) { 6587ec681f3Smrg /* Trigger initial compilation with default settings */ 6597ec681f3Smrg struct lima_vs_key key; 6607ec681f3Smrg memset(&key, 0, sizeof(key)); 6617ec681f3Smrg memcpy(key.nir_sha1, so->nir_sha1, sizeof(so->nir_sha1)); 6627ec681f3Smrg lima_get_compiled_vs(ctx, so, &key); 6639f464c52Smaya } 6649f464c52Smaya 6659f464c52Smaya return so; 6669f464c52Smaya} 6679f464c52Smaya 6689f464c52Smayastatic void 6699f464c52Smayalima_bind_vs_state(struct pipe_context *pctx, void *hwcso) 6709f464c52Smaya{ 6719f464c52Smaya struct lima_context *ctx = lima_context(pctx); 6729f464c52Smaya 6737ec681f3Smrg ctx->uncomp_vs = hwcso; 6747ec681f3Smrg ctx->dirty |= LIMA_CONTEXT_DIRTY_UNCOMPILED_VS; 6759f464c52Smaya} 6769f464c52Smaya 6779f464c52Smayastatic void 6789f464c52Smayalima_delete_vs_state(struct pipe_context *pctx, void *hwcso) 6799f464c52Smaya{ 6807ec681f3Smrg struct lima_context *ctx = lima_context(pctx); 6817ec681f3Smrg struct lima_vs_uncompiled_shader *so = hwcso; 6827ec681f3Smrg 6837ec681f3Smrg hash_table_foreach(ctx->vs_cache, entry) { 6847ec681f3Smrg const struct lima_vs_key *key = entry->key; 6857ec681f3Smrg if (!memcmp(key->nir_sha1, so->nir_sha1, sizeof(so->nir_sha1))) { 6867ec681f3Smrg struct lima_vs_compiled_shader *vs = entry->data; 6877ec681f3Smrg _mesa_hash_table_remove(ctx->vs_cache, entry); 6887ec681f3Smrg if (vs->bo) 6897ec681f3Smrg lima_bo_unreference(vs->bo); 6909f464c52Smaya 6917ec681f3Smrg if (vs == ctx->vs) 6927ec681f3Smrg ctx->vs = NULL; 6937ec681f3Smrg 6947ec681f3Smrg ralloc_free(vs); 6957ec681f3Smrg } 6967ec681f3Smrg } 6979f464c52Smaya 6987ec681f3Smrg ralloc_free(so->base.ir.nir); 6999f464c52Smaya ralloc_free(so); 7009f464c52Smaya} 7019f464c52Smaya 7027ec681f3Smrgstatic uint32_t 7037ec681f3Smrglima_fs_cache_hash(const void *key) 7047ec681f3Smrg{ 7057ec681f3Smrg return _mesa_hash_data(key, sizeof(struct lima_fs_key)); 7067ec681f3Smrg} 7077ec681f3Smrg 7087ec681f3Smrgstatic uint32_t 7097ec681f3Smrglima_vs_cache_hash(const void *key) 7107ec681f3Smrg{ 7117ec681f3Smrg return _mesa_hash_data(key, sizeof(struct lima_vs_key)); 7127ec681f3Smrg} 7137ec681f3Smrg 7147ec681f3Smrgstatic bool 7157ec681f3Smrglima_fs_cache_compare(const void *key1, const void *key2) 7167ec681f3Smrg{ 7177ec681f3Smrg return memcmp(key1, key2, sizeof(struct lima_fs_key)) == 0; 7187ec681f3Smrg} 7197ec681f3Smrg 7207ec681f3Smrgstatic bool 7217ec681f3Smrglima_vs_cache_compare(const void *key1, const void *key2) 7227ec681f3Smrg{ 7237ec681f3Smrg return memcmp(key1, key2, sizeof(struct lima_vs_key)) == 0; 7247ec681f3Smrg} 7257ec681f3Smrg 7269f464c52Smayavoid 7279f464c52Smayalima_program_init(struct lima_context *ctx) 7289f464c52Smaya{ 7299f464c52Smaya ctx->base.create_fs_state = lima_create_fs_state; 7309f464c52Smaya ctx->base.bind_fs_state = lima_bind_fs_state; 7319f464c52Smaya ctx->base.delete_fs_state = lima_delete_fs_state; 7329f464c52Smaya 7339f464c52Smaya ctx->base.create_vs_state = lima_create_vs_state; 7349f464c52Smaya ctx->base.bind_vs_state = lima_bind_vs_state; 7359f464c52Smaya ctx->base.delete_vs_state = lima_delete_vs_state; 7367ec681f3Smrg 7377ec681f3Smrg ctx->fs_cache = _mesa_hash_table_create(ctx, lima_fs_cache_hash, 7387ec681f3Smrg lima_fs_cache_compare); 7397ec681f3Smrg ctx->vs_cache = _mesa_hash_table_create(ctx, lima_vs_cache_hash, 7407ec681f3Smrg lima_vs_cache_compare); 7417ec681f3Smrg} 7427ec681f3Smrg 7437ec681f3Smrgvoid 7447ec681f3Smrglima_program_fini(struct lima_context *ctx) 7457ec681f3Smrg{ 7467ec681f3Smrg hash_table_foreach(ctx->vs_cache, entry) { 7477ec681f3Smrg struct lima_vs_compiled_shader *vs = entry->data; 7487ec681f3Smrg if (vs->bo) 7497ec681f3Smrg lima_bo_unreference(vs->bo); 7507ec681f3Smrg ralloc_free(vs); 7517ec681f3Smrg _mesa_hash_table_remove(ctx->vs_cache, entry); 7527ec681f3Smrg } 7537ec681f3Smrg 7547ec681f3Smrg hash_table_foreach(ctx->fs_cache, entry) { 7557ec681f3Smrg struct lima_fs_compiled_shader *fs = entry->data; 7567ec681f3Smrg if (fs->bo) 7577ec681f3Smrg lima_bo_unreference(fs->bo); 7587ec681f3Smrg ralloc_free(fs); 7597ec681f3Smrg _mesa_hash_table_remove(ctx->fs_cache, entry); 7607ec681f3Smrg } 7619f464c52Smaya} 762