17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io> 37ec681f3Smrg * Copyright (C) 2019-2020 Collabora, Ltd. 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 227ec681f3Smrg * SOFTWARE. 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#include <sys/types.h> 267ec681f3Smrg#include <sys/stat.h> 277ec681f3Smrg#include <sys/mman.h> 287ec681f3Smrg#include <fcntl.h> 297ec681f3Smrg#include <stdint.h> 307ec681f3Smrg#include <stdlib.h> 317ec681f3Smrg#include <stdio.h> 327ec681f3Smrg#include <err.h> 337ec681f3Smrg 347ec681f3Smrg#include "main/mtypes.h" 357ec681f3Smrg#include "compiler/glsl/glsl_to_nir.h" 367ec681f3Smrg#include "compiler/nir_types.h" 377ec681f3Smrg#include "compiler/nir/nir_builder.h" 387ec681f3Smrg#include "util/half_float.h" 397ec681f3Smrg#include "util/u_math.h" 407ec681f3Smrg#include "util/u_debug.h" 417ec681f3Smrg#include "util/u_dynarray.h" 427ec681f3Smrg#include "util/list.h" 437ec681f3Smrg#include "main/mtypes.h" 447ec681f3Smrg 457ec681f3Smrg#include "midgard.h" 467ec681f3Smrg#include "midgard_nir.h" 477ec681f3Smrg#include "midgard_compile.h" 487ec681f3Smrg#include "midgard_ops.h" 497ec681f3Smrg#include "helpers.h" 507ec681f3Smrg#include "compiler.h" 517ec681f3Smrg#include "midgard_quirks.h" 527ec681f3Smrg#include "panfrost-quirks.h" 537ec681f3Smrg#include "panfrost/util/pan_lower_framebuffer.h" 547ec681f3Smrg 557ec681f3Smrg#include "disassemble.h" 567ec681f3Smrg 577ec681f3Smrgstatic const struct debug_named_value midgard_debug_options[] = { 587ec681f3Smrg {"msgs", MIDGARD_DBG_MSGS, "Print debug messages"}, 597ec681f3Smrg {"shaders", MIDGARD_DBG_SHADERS, "Dump shaders in NIR and MIR"}, 607ec681f3Smrg {"shaderdb", MIDGARD_DBG_SHADERDB, "Prints shader-db statistics"}, 617ec681f3Smrg {"inorder", MIDGARD_DBG_INORDER, "Disables out-of-order scheduling"}, 627ec681f3Smrg {"verbose", MIDGARD_DBG_VERBOSE, "Dump shaders verbosely"}, 637ec681f3Smrg {"internal", MIDGARD_DBG_INTERNAL, "Dump internal shaders"}, 647ec681f3Smrg DEBUG_NAMED_VALUE_END 657ec681f3Smrg}; 667ec681f3Smrg 677ec681f3SmrgDEBUG_GET_ONCE_FLAGS_OPTION(midgard_debug, "MIDGARD_MESA_DEBUG", midgard_debug_options, 0) 687ec681f3Smrg 697ec681f3Smrgint midgard_debug = 0; 707ec681f3Smrg 717ec681f3Smrg#define DBG(fmt, ...) \ 727ec681f3Smrg do { if (midgard_debug & MIDGARD_DBG_MSGS) \ 737ec681f3Smrg fprintf(stderr, "%s:%d: "fmt, \ 747ec681f3Smrg __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) 757ec681f3Smrgstatic midgard_block * 767ec681f3Smrgcreate_empty_block(compiler_context *ctx) 777ec681f3Smrg{ 787ec681f3Smrg midgard_block *blk = rzalloc(ctx, midgard_block); 797ec681f3Smrg 807ec681f3Smrg blk->base.predecessors = _mesa_set_create(blk, 817ec681f3Smrg _mesa_hash_pointer, 827ec681f3Smrg _mesa_key_pointer_equal); 837ec681f3Smrg 847ec681f3Smrg blk->base.name = ctx->block_source_count++; 857ec681f3Smrg 867ec681f3Smrg return blk; 877ec681f3Smrg} 887ec681f3Smrg 897ec681f3Smrgstatic void 907ec681f3Smrgschedule_barrier(compiler_context *ctx) 917ec681f3Smrg{ 927ec681f3Smrg midgard_block *temp = ctx->after_block; 937ec681f3Smrg ctx->after_block = create_empty_block(ctx); 947ec681f3Smrg ctx->block_count++; 957ec681f3Smrg list_addtail(&ctx->after_block->base.link, &ctx->blocks); 967ec681f3Smrg list_inithead(&ctx->after_block->base.instructions); 977ec681f3Smrg pan_block_add_successor(&ctx->current_block->base, &ctx->after_block->base); 987ec681f3Smrg ctx->current_block = ctx->after_block; 997ec681f3Smrg ctx->after_block = temp; 1007ec681f3Smrg} 1017ec681f3Smrg 1027ec681f3Smrg/* Helpers to generate midgard_instruction's using macro magic, since every 1037ec681f3Smrg * driver seems to do it that way */ 1047ec681f3Smrg 1057ec681f3Smrg#define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__)); 1067ec681f3Smrg 1077ec681f3Smrg#define M_LOAD_STORE(name, store, T) \ 1087ec681f3Smrg static midgard_instruction m_##name(unsigned ssa, unsigned address) { \ 1097ec681f3Smrg midgard_instruction i = { \ 1107ec681f3Smrg .type = TAG_LOAD_STORE_4, \ 1117ec681f3Smrg .mask = 0xF, \ 1127ec681f3Smrg .dest = ~0, \ 1137ec681f3Smrg .src = { ~0, ~0, ~0, ~0 }, \ 1147ec681f3Smrg .swizzle = SWIZZLE_IDENTITY_4, \ 1157ec681f3Smrg .op = midgard_op_##name, \ 1167ec681f3Smrg .load_store = { \ 1177ec681f3Smrg .signed_offset = address \ 1187ec681f3Smrg } \ 1197ec681f3Smrg }; \ 1207ec681f3Smrg \ 1217ec681f3Smrg if (store) { \ 1227ec681f3Smrg i.src[0] = ssa; \ 1237ec681f3Smrg i.src_types[0] = T; \ 1247ec681f3Smrg i.dest_type = T; \ 1257ec681f3Smrg } else { \ 1267ec681f3Smrg i.dest = ssa; \ 1277ec681f3Smrg i.dest_type = T; \ 1287ec681f3Smrg } \ 1297ec681f3Smrg return i; \ 1307ec681f3Smrg } 1317ec681f3Smrg 1327ec681f3Smrg#define M_LOAD(name, T) M_LOAD_STORE(name, false, T) 1337ec681f3Smrg#define M_STORE(name, T) M_LOAD_STORE(name, true, T) 1347ec681f3Smrg 1357ec681f3SmrgM_LOAD(ld_attr_32, nir_type_uint32); 1367ec681f3SmrgM_LOAD(ld_vary_32, nir_type_uint32); 1377ec681f3SmrgM_LOAD(ld_ubo_32, nir_type_uint32); 1387ec681f3SmrgM_LOAD(ld_ubo_64, nir_type_uint32); 1397ec681f3SmrgM_LOAD(ld_ubo_128, nir_type_uint32); 1407ec681f3SmrgM_LOAD(ld_32, nir_type_uint32); 1417ec681f3SmrgM_LOAD(ld_64, nir_type_uint32); 1427ec681f3SmrgM_LOAD(ld_128, nir_type_uint32); 1437ec681f3SmrgM_STORE(st_32, nir_type_uint32); 1447ec681f3SmrgM_STORE(st_64, nir_type_uint32); 1457ec681f3SmrgM_STORE(st_128, nir_type_uint32); 1467ec681f3SmrgM_LOAD(ld_tilebuffer_raw, nir_type_uint32); 1477ec681f3SmrgM_LOAD(ld_tilebuffer_16f, nir_type_float16); 1487ec681f3SmrgM_LOAD(ld_tilebuffer_32f, nir_type_float32); 1497ec681f3SmrgM_STORE(st_vary_32, nir_type_uint32); 1507ec681f3SmrgM_LOAD(ld_cubemap_coords, nir_type_uint32); 1517ec681f3SmrgM_LOAD(ldst_mov, nir_type_uint32); 1527ec681f3SmrgM_LOAD(ld_image_32f, nir_type_float32); 1537ec681f3SmrgM_LOAD(ld_image_16f, nir_type_float16); 1547ec681f3SmrgM_LOAD(ld_image_32u, nir_type_uint32); 1557ec681f3SmrgM_LOAD(ld_image_32i, nir_type_int32); 1567ec681f3SmrgM_STORE(st_image_32f, nir_type_float32); 1577ec681f3SmrgM_STORE(st_image_16f, nir_type_float16); 1587ec681f3SmrgM_STORE(st_image_32u, nir_type_uint32); 1597ec681f3SmrgM_STORE(st_image_32i, nir_type_int32); 1607ec681f3SmrgM_LOAD(lea_image, nir_type_uint64); 1617ec681f3Smrg 1627ec681f3Smrg#define M_IMAGE(op) \ 1637ec681f3Smrgstatic midgard_instruction \ 1647ec681f3Smrgop ## _image(nir_alu_type type, unsigned val, unsigned address) \ 1657ec681f3Smrg{ \ 1667ec681f3Smrg switch (type) { \ 1677ec681f3Smrg case nir_type_float32: \ 1687ec681f3Smrg return m_ ## op ## _image_32f(val, address); \ 1697ec681f3Smrg case nir_type_float16: \ 1707ec681f3Smrg return m_ ## op ## _image_16f(val, address); \ 1717ec681f3Smrg case nir_type_uint32: \ 1727ec681f3Smrg return m_ ## op ## _image_32u(val, address); \ 1737ec681f3Smrg case nir_type_int32: \ 1747ec681f3Smrg return m_ ## op ## _image_32i(val, address); \ 1757ec681f3Smrg default: \ 1767ec681f3Smrg unreachable("Invalid image type"); \ 1777ec681f3Smrg } \ 1787ec681f3Smrg} 1797ec681f3Smrg 1807ec681f3SmrgM_IMAGE(ld); 1817ec681f3SmrgM_IMAGE(st); 1827ec681f3Smrg 1837ec681f3Smrgstatic midgard_instruction 1847ec681f3Smrgv_branch(bool conditional, bool invert) 1857ec681f3Smrg{ 1867ec681f3Smrg midgard_instruction ins = { 1877ec681f3Smrg .type = TAG_ALU_4, 1887ec681f3Smrg .unit = ALU_ENAB_BRANCH, 1897ec681f3Smrg .compact_branch = true, 1907ec681f3Smrg .branch = { 1917ec681f3Smrg .conditional = conditional, 1927ec681f3Smrg .invert_conditional = invert 1937ec681f3Smrg }, 1947ec681f3Smrg .dest = ~0, 1957ec681f3Smrg .src = { ~0, ~0, ~0, ~0 }, 1967ec681f3Smrg }; 1977ec681f3Smrg 1987ec681f3Smrg return ins; 1997ec681f3Smrg} 2007ec681f3Smrg 2017ec681f3Smrgstatic void 2027ec681f3Smrgattach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name) 2037ec681f3Smrg{ 2047ec681f3Smrg ins->has_constants = true; 2057ec681f3Smrg memcpy(&ins->constants, constants, 16); 2067ec681f3Smrg} 2077ec681f3Smrg 2087ec681f3Smrgstatic int 2097ec681f3Smrgglsl_type_size(const struct glsl_type *type, bool bindless) 2107ec681f3Smrg{ 2117ec681f3Smrg return glsl_count_attribute_slots(type, false); 2127ec681f3Smrg} 2137ec681f3Smrg 2147ec681f3Smrg/* Lower fdot2 to a vector multiplication followed by channel addition */ 2157ec681f3Smrgstatic bool 2167ec681f3Smrgmidgard_nir_lower_fdot2_instr(nir_builder *b, nir_instr *instr, void *data) 2177ec681f3Smrg{ 2187ec681f3Smrg if (instr->type != nir_instr_type_alu) 2197ec681f3Smrg return false; 2207ec681f3Smrg 2217ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 2227ec681f3Smrg if (alu->op != nir_op_fdot2) 2237ec681f3Smrg return false; 2247ec681f3Smrg 2257ec681f3Smrg b->cursor = nir_before_instr(&alu->instr); 2267ec681f3Smrg 2277ec681f3Smrg nir_ssa_def *src0 = nir_ssa_for_alu_src(b, alu, 0); 2287ec681f3Smrg nir_ssa_def *src1 = nir_ssa_for_alu_src(b, alu, 1); 2297ec681f3Smrg 2307ec681f3Smrg nir_ssa_def *product = nir_fmul(b, src0, src1); 2317ec681f3Smrg 2327ec681f3Smrg nir_ssa_def *sum = nir_fadd(b, 2337ec681f3Smrg nir_channel(b, product, 0), 2347ec681f3Smrg nir_channel(b, product, 1)); 2357ec681f3Smrg 2367ec681f3Smrg /* Replace the fdot2 with this sum */ 2377ec681f3Smrg nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, sum); 2387ec681f3Smrg 2397ec681f3Smrg return true; 2407ec681f3Smrg} 2417ec681f3Smrg 2427ec681f3Smrgstatic bool 2437ec681f3Smrgmidgard_nir_lower_fdot2(nir_shader *shader) 2447ec681f3Smrg{ 2457ec681f3Smrg return nir_shader_instructions_pass(shader, 2467ec681f3Smrg midgard_nir_lower_fdot2_instr, 2477ec681f3Smrg nir_metadata_block_index | nir_metadata_dominance, 2487ec681f3Smrg NULL); 2497ec681f3Smrg} 2507ec681f3Smrg 2517ec681f3Smrgstatic bool 2527ec681f3Smrgmdg_is_64(const nir_instr *instr, const void *_unused) 2537ec681f3Smrg{ 2547ec681f3Smrg const nir_alu_instr *alu = nir_instr_as_alu(instr); 2557ec681f3Smrg 2567ec681f3Smrg if (nir_dest_bit_size(alu->dest.dest) == 64) 2577ec681f3Smrg return true; 2587ec681f3Smrg 2597ec681f3Smrg switch (alu->op) { 2607ec681f3Smrg case nir_op_umul_high: 2617ec681f3Smrg case nir_op_imul_high: 2627ec681f3Smrg return true; 2637ec681f3Smrg default: 2647ec681f3Smrg return false; 2657ec681f3Smrg } 2667ec681f3Smrg} 2677ec681f3Smrg 2687ec681f3Smrg/* Only vectorize int64 up to vec2 */ 2697ec681f3Smrgstatic bool 2707ec681f3Smrgmidgard_vectorize_filter(const nir_instr *instr, void *data) 2717ec681f3Smrg{ 2727ec681f3Smrg if (instr->type != nir_instr_type_alu) 2737ec681f3Smrg return true; 2747ec681f3Smrg 2757ec681f3Smrg const nir_alu_instr *alu = nir_instr_as_alu(instr); 2767ec681f3Smrg 2777ec681f3Smrg unsigned num_components = alu->dest.dest.ssa.num_components; 2787ec681f3Smrg 2797ec681f3Smrg int src_bit_size = nir_src_bit_size(alu->src[0].src); 2807ec681f3Smrg int dst_bit_size = nir_dest_bit_size(alu->dest.dest); 2817ec681f3Smrg 2827ec681f3Smrg if (src_bit_size == 64 || dst_bit_size == 64) { 2837ec681f3Smrg if (num_components > 1) 2847ec681f3Smrg return false; 2857ec681f3Smrg } 2867ec681f3Smrg 2877ec681f3Smrg return true; 2887ec681f3Smrg} 2897ec681f3Smrg 2907ec681f3Smrg 2917ec681f3Smrg/* Flushes undefined values to zero */ 2927ec681f3Smrg 2937ec681f3Smrgstatic void 2947ec681f3Smrgoptimise_nir(nir_shader *nir, unsigned quirks, bool is_blend) 2957ec681f3Smrg{ 2967ec681f3Smrg bool progress; 2977ec681f3Smrg unsigned lower_flrp = 2987ec681f3Smrg (nir->options->lower_flrp16 ? 16 : 0) | 2997ec681f3Smrg (nir->options->lower_flrp32 ? 32 : 0) | 3007ec681f3Smrg (nir->options->lower_flrp64 ? 64 : 0); 3017ec681f3Smrg 3027ec681f3Smrg NIR_PASS(progress, nir, nir_lower_regs_to_ssa); 3037ec681f3Smrg nir_lower_idiv_options idiv_options = { 3047ec681f3Smrg .imprecise_32bit_lowering = true, 3057ec681f3Smrg .allow_fp16 = true, 3067ec681f3Smrg }; 3077ec681f3Smrg NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options); 3087ec681f3Smrg 3097ec681f3Smrg nir_lower_tex_options lower_tex_options = { 3107ec681f3Smrg .lower_txs_lod = true, 3117ec681f3Smrg .lower_txp = ~0, 3127ec681f3Smrg .lower_tg4_broadcom_swizzle = true, 3137ec681f3Smrg /* TODO: we have native gradient.. */ 3147ec681f3Smrg .lower_txd = true, 3157ec681f3Smrg }; 3167ec681f3Smrg 3177ec681f3Smrg NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options); 3187ec681f3Smrg 3197ec681f3Smrg /* Must lower fdot2 after tex is lowered */ 3207ec681f3Smrg NIR_PASS(progress, nir, midgard_nir_lower_fdot2); 3217ec681f3Smrg 3227ec681f3Smrg /* T720 is broken. */ 3237ec681f3Smrg 3247ec681f3Smrg if (quirks & MIDGARD_BROKEN_LOD) 3257ec681f3Smrg NIR_PASS_V(nir, midgard_nir_lod_errata); 3267ec681f3Smrg 3277ec681f3Smrg /* Midgard image ops coordinates are 16-bit instead of 32-bit */ 3287ec681f3Smrg NIR_PASS(progress, nir, midgard_nir_lower_image_bitsize); 3297ec681f3Smrg NIR_PASS(progress, nir, midgard_nir_lower_helper_writes); 3307ec681f3Smrg NIR_PASS(progress, nir, pan_lower_helper_invocation); 3317ec681f3Smrg NIR_PASS(progress, nir, pan_lower_sample_pos); 3327ec681f3Smrg 3337ec681f3Smrg NIR_PASS(progress, nir, midgard_nir_lower_algebraic_early); 3347ec681f3Smrg 3357ec681f3Smrg do { 3367ec681f3Smrg progress = false; 3377ec681f3Smrg 3387ec681f3Smrg NIR_PASS(progress, nir, nir_lower_var_copies); 3397ec681f3Smrg NIR_PASS(progress, nir, nir_lower_vars_to_ssa); 3407ec681f3Smrg 3417ec681f3Smrg NIR_PASS(progress, nir, nir_copy_prop); 3427ec681f3Smrg NIR_PASS(progress, nir, nir_opt_remove_phis); 3437ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 3447ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dead_cf); 3457ec681f3Smrg NIR_PASS(progress, nir, nir_opt_cse); 3467ec681f3Smrg NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true); 3477ec681f3Smrg NIR_PASS(progress, nir, nir_opt_algebraic); 3487ec681f3Smrg NIR_PASS(progress, nir, nir_opt_constant_folding); 3497ec681f3Smrg 3507ec681f3Smrg if (lower_flrp != 0) { 3517ec681f3Smrg bool lower_flrp_progress = false; 3527ec681f3Smrg NIR_PASS(lower_flrp_progress, 3537ec681f3Smrg nir, 3547ec681f3Smrg nir_lower_flrp, 3557ec681f3Smrg lower_flrp, 3567ec681f3Smrg false /* always_precise */); 3577ec681f3Smrg if (lower_flrp_progress) { 3587ec681f3Smrg NIR_PASS(progress, nir, 3597ec681f3Smrg nir_opt_constant_folding); 3607ec681f3Smrg progress = true; 3617ec681f3Smrg } 3627ec681f3Smrg 3637ec681f3Smrg /* Nothing should rematerialize any flrps, so we only 3647ec681f3Smrg * need to do this lowering once. 3657ec681f3Smrg */ 3667ec681f3Smrg lower_flrp = 0; 3677ec681f3Smrg } 3687ec681f3Smrg 3697ec681f3Smrg NIR_PASS(progress, nir, nir_opt_undef); 3707ec681f3Smrg NIR_PASS(progress, nir, nir_lower_undef_to_zero); 3717ec681f3Smrg 3727ec681f3Smrg NIR_PASS(progress, nir, nir_opt_loop_unroll); 3737ec681f3Smrg 3747ec681f3Smrg NIR_PASS(progress, nir, nir_opt_vectorize, 3757ec681f3Smrg midgard_vectorize_filter, NULL); 3767ec681f3Smrg } while (progress); 3777ec681f3Smrg 3787ec681f3Smrg NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL); 3797ec681f3Smrg 3807ec681f3Smrg /* Run after opts so it can hit more */ 3817ec681f3Smrg if (!is_blend) 3827ec681f3Smrg NIR_PASS(progress, nir, nir_fuse_io_16); 3837ec681f3Smrg 3847ec681f3Smrg /* Must be run at the end to prevent creation of fsin/fcos ops */ 3857ec681f3Smrg NIR_PASS(progress, nir, midgard_nir_scale_trig); 3867ec681f3Smrg 3877ec681f3Smrg do { 3887ec681f3Smrg progress = false; 3897ec681f3Smrg 3907ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 3917ec681f3Smrg NIR_PASS(progress, nir, nir_opt_algebraic); 3927ec681f3Smrg NIR_PASS(progress, nir, nir_opt_constant_folding); 3937ec681f3Smrg NIR_PASS(progress, nir, nir_copy_prop); 3947ec681f3Smrg } while (progress); 3957ec681f3Smrg 3967ec681f3Smrg NIR_PASS(progress, nir, nir_opt_algebraic_late); 3977ec681f3Smrg NIR_PASS(progress, nir, nir_opt_algebraic_distribute_src_mods); 3987ec681f3Smrg 3997ec681f3Smrg /* We implement booleans as 32-bit 0/~0 */ 4007ec681f3Smrg NIR_PASS(progress, nir, nir_lower_bool_to_int32); 4017ec681f3Smrg 4027ec681f3Smrg /* Now that booleans are lowered, we can run out late opts */ 4037ec681f3Smrg NIR_PASS(progress, nir, midgard_nir_lower_algebraic_late); 4047ec681f3Smrg NIR_PASS(progress, nir, midgard_nir_cancel_inot); 4057ec681f3Smrg 4067ec681f3Smrg NIR_PASS(progress, nir, nir_copy_prop); 4077ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 4087ec681f3Smrg 4097ec681f3Smrg /* Backend scheduler is purely local, so do some global optimizations 4107ec681f3Smrg * to reduce register pressure. */ 4117ec681f3Smrg nir_move_options move_all = 4127ec681f3Smrg nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | 4137ec681f3Smrg nir_move_comparisons | nir_move_copies | nir_move_load_ssbo; 4147ec681f3Smrg 4157ec681f3Smrg NIR_PASS_V(nir, nir_opt_sink, move_all); 4167ec681f3Smrg NIR_PASS_V(nir, nir_opt_move, move_all); 4177ec681f3Smrg 4187ec681f3Smrg /* Take us out of SSA */ 4197ec681f3Smrg NIR_PASS(progress, nir, nir_lower_locals_to_regs); 4207ec681f3Smrg NIR_PASS(progress, nir, nir_convert_from_ssa, true); 4217ec681f3Smrg 4227ec681f3Smrg /* We are a vector architecture; write combine where possible */ 4237ec681f3Smrg NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest); 4247ec681f3Smrg NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL); 4257ec681f3Smrg 4267ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 4277ec681f3Smrg} 4287ec681f3Smrg 4297ec681f3Smrg/* Do not actually emit a load; instead, cache the constant for inlining */ 4307ec681f3Smrg 4317ec681f3Smrgstatic void 4327ec681f3Smrgemit_load_const(compiler_context *ctx, nir_load_const_instr *instr) 4337ec681f3Smrg{ 4347ec681f3Smrg nir_ssa_def def = instr->def; 4357ec681f3Smrg 4367ec681f3Smrg midgard_constants *consts = rzalloc(ctx, midgard_constants); 4377ec681f3Smrg 4387ec681f3Smrg assert(instr->def.num_components * instr->def.bit_size <= sizeof(*consts) * 8); 4397ec681f3Smrg 4407ec681f3Smrg#define RAW_CONST_COPY(bits) \ 4417ec681f3Smrg nir_const_value_to_array(consts->u##bits, instr->value, \ 4427ec681f3Smrg instr->def.num_components, u##bits) 4437ec681f3Smrg 4447ec681f3Smrg switch (instr->def.bit_size) { 4457ec681f3Smrg case 64: 4467ec681f3Smrg RAW_CONST_COPY(64); 4477ec681f3Smrg break; 4487ec681f3Smrg case 32: 4497ec681f3Smrg RAW_CONST_COPY(32); 4507ec681f3Smrg break; 4517ec681f3Smrg case 16: 4527ec681f3Smrg RAW_CONST_COPY(16); 4537ec681f3Smrg break; 4547ec681f3Smrg case 8: 4557ec681f3Smrg RAW_CONST_COPY(8); 4567ec681f3Smrg break; 4577ec681f3Smrg default: 4587ec681f3Smrg unreachable("Invalid bit_size for load_const instruction\n"); 4597ec681f3Smrg } 4607ec681f3Smrg 4617ec681f3Smrg /* Shifted for SSA, +1 for off-by-one */ 4627ec681f3Smrg _mesa_hash_table_u64_insert(ctx->ssa_constants, (def.index << 1) + 1, consts); 4637ec681f3Smrg} 4647ec681f3Smrg 4657ec681f3Smrg/* Normally constants are embedded implicitly, but for I/O and such we have to 4667ec681f3Smrg * explicitly emit a move with the constant source */ 4677ec681f3Smrg 4687ec681f3Smrgstatic void 4697ec681f3Smrgemit_explicit_constant(compiler_context *ctx, unsigned node, unsigned to) 4707ec681f3Smrg{ 4717ec681f3Smrg void *constant_value = _mesa_hash_table_u64_search(ctx->ssa_constants, node + 1); 4727ec681f3Smrg 4737ec681f3Smrg if (constant_value) { 4747ec681f3Smrg midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), to); 4757ec681f3Smrg attach_constants(ctx, &ins, constant_value, node + 1); 4767ec681f3Smrg emit_mir_instruction(ctx, ins); 4777ec681f3Smrg } 4787ec681f3Smrg} 4797ec681f3Smrg 4807ec681f3Smrgstatic bool 4817ec681f3Smrgnir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components) 4827ec681f3Smrg{ 4837ec681f3Smrg unsigned comp = src->swizzle[0]; 4847ec681f3Smrg 4857ec681f3Smrg for (unsigned c = 1; c < nr_components; ++c) { 4867ec681f3Smrg if (src->swizzle[c] != comp) 4877ec681f3Smrg return true; 4887ec681f3Smrg } 4897ec681f3Smrg 4907ec681f3Smrg return false; 4917ec681f3Smrg} 4927ec681f3Smrg 4937ec681f3Smrg#define ATOMIC_CASE_IMPL(ctx, instr, nir, op, is_shared) \ 4947ec681f3Smrg case nir_intrinsic_##nir: \ 4957ec681f3Smrg emit_atomic(ctx, instr, is_shared, midgard_op_##op, ~0); \ 4967ec681f3Smrg break; 4977ec681f3Smrg 4987ec681f3Smrg#define ATOMIC_CASE(ctx, instr, nir, op) \ 4997ec681f3Smrg ATOMIC_CASE_IMPL(ctx, instr, shared_atomic_##nir, atomic_##op, true); \ 5007ec681f3Smrg ATOMIC_CASE_IMPL(ctx, instr, global_atomic_##nir, atomic_##op, false); 5017ec681f3Smrg 5027ec681f3Smrg#define IMAGE_ATOMIC_CASE(ctx, instr, nir, op) \ 5037ec681f3Smrg case nir_intrinsic_image_atomic_##nir: { \ 5047ec681f3Smrg midgard_instruction ins = emit_image_op(ctx, instr, true); \ 5057ec681f3Smrg emit_atomic(ctx, instr, false, midgard_op_atomic_##op, ins.dest); \ 5067ec681f3Smrg break; \ 5077ec681f3Smrg } 5087ec681f3Smrg 5097ec681f3Smrg#define ALU_CASE(nir, _op) \ 5107ec681f3Smrg case nir_op_##nir: \ 5117ec681f3Smrg op = midgard_alu_op_##_op; \ 5127ec681f3Smrg assert(src_bitsize == dst_bitsize); \ 5137ec681f3Smrg break; 5147ec681f3Smrg 5157ec681f3Smrg#define ALU_CASE_RTZ(nir, _op) \ 5167ec681f3Smrg case nir_op_##nir: \ 5177ec681f3Smrg op = midgard_alu_op_##_op; \ 5187ec681f3Smrg roundmode = MIDGARD_RTZ; \ 5197ec681f3Smrg break; 5207ec681f3Smrg 5217ec681f3Smrg#define ALU_CHECK_CMP() \ 5227ec681f3Smrg assert(src_bitsize == 16 || src_bitsize == 32 || src_bitsize == 64); \ 5237ec681f3Smrg assert(dst_bitsize == 16 || dst_bitsize == 32); \ 5247ec681f3Smrg 5257ec681f3Smrg#define ALU_CASE_BCAST(nir, _op, count) \ 5267ec681f3Smrg case nir_op_##nir: \ 5277ec681f3Smrg op = midgard_alu_op_##_op; \ 5287ec681f3Smrg broadcast_swizzle = count; \ 5297ec681f3Smrg ALU_CHECK_CMP(); \ 5307ec681f3Smrg break; 5317ec681f3Smrg 5327ec681f3Smrg#define ALU_CASE_CMP(nir, _op) \ 5337ec681f3Smrg case nir_op_##nir: \ 5347ec681f3Smrg op = midgard_alu_op_##_op; \ 5357ec681f3Smrg ALU_CHECK_CMP(); \ 5367ec681f3Smrg break; 5377ec681f3Smrg 5387ec681f3Smrg/* Compare mir_lower_invert */ 5397ec681f3Smrgstatic bool 5407ec681f3Smrgnir_accepts_inot(nir_op op, unsigned src) 5417ec681f3Smrg{ 5427ec681f3Smrg switch (op) { 5437ec681f3Smrg case nir_op_ior: 5447ec681f3Smrg case nir_op_iand: /* TODO: b2f16 */ 5457ec681f3Smrg case nir_op_ixor: 5467ec681f3Smrg return true; 5477ec681f3Smrg case nir_op_b32csel: 5487ec681f3Smrg /* Only the condition */ 5497ec681f3Smrg return (src == 0); 5507ec681f3Smrg default: 5517ec681f3Smrg return false; 5527ec681f3Smrg } 5537ec681f3Smrg} 5547ec681f3Smrg 5557ec681f3Smrgstatic bool 5567ec681f3Smrgmir_accept_dest_mod(compiler_context *ctx, nir_dest **dest, nir_op op) 5577ec681f3Smrg{ 5587ec681f3Smrg if (pan_has_dest_mod(dest, op)) { 5597ec681f3Smrg assert((*dest)->is_ssa); 5607ec681f3Smrg BITSET_SET(ctx->already_emitted, (*dest)->ssa.index); 5617ec681f3Smrg return true; 5627ec681f3Smrg } 5637ec681f3Smrg 5647ec681f3Smrg return false; 5657ec681f3Smrg} 5667ec681f3Smrg 5677ec681f3Smrg/* Look for floating point mods. We have the mods clamp_m1_1, clamp_0_1, 5687ec681f3Smrg * and clamp_0_inf. We also have the relations (note 3 * 2 = 6 cases): 5697ec681f3Smrg * 5707ec681f3Smrg * clamp_0_1(clamp_0_inf(x)) = clamp_m1_1(x) 5717ec681f3Smrg * clamp_0_1(clamp_m1_1(x)) = clamp_m1_1(x) 5727ec681f3Smrg * clamp_0_inf(clamp_0_1(x)) = clamp_m1_1(x) 5737ec681f3Smrg * clamp_0_inf(clamp_m1_1(x)) = clamp_m1_1(x) 5747ec681f3Smrg * clamp_m1_1(clamp_0_1(x)) = clamp_m1_1(x) 5757ec681f3Smrg * clamp_m1_1(clamp_0_inf(x)) = clamp_m1_1(x) 5767ec681f3Smrg * 5777ec681f3Smrg * So by cases any composition of output modifiers is equivalent to 5787ec681f3Smrg * clamp_m1_1 alone. 5797ec681f3Smrg */ 5807ec681f3Smrgstatic unsigned 5817ec681f3Smrgmir_determine_float_outmod(compiler_context *ctx, nir_dest **dest, unsigned prior_outmod) 5827ec681f3Smrg{ 5837ec681f3Smrg bool clamp_0_inf = mir_accept_dest_mod(ctx, dest, nir_op_fclamp_pos_mali); 5847ec681f3Smrg bool clamp_0_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat); 5857ec681f3Smrg bool clamp_m1_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat_signed_mali); 5867ec681f3Smrg bool prior = (prior_outmod != midgard_outmod_none); 5877ec681f3Smrg int count = (int) prior + (int) clamp_0_inf + (int) clamp_0_1 + (int) clamp_m1_1; 5887ec681f3Smrg 5897ec681f3Smrg return ((count > 1) || clamp_0_1) ? midgard_outmod_clamp_0_1 : 5907ec681f3Smrg clamp_0_inf ? midgard_outmod_clamp_0_inf : 5917ec681f3Smrg clamp_m1_1 ? midgard_outmod_clamp_m1_1 : 5927ec681f3Smrg prior_outmod; 5937ec681f3Smrg} 5947ec681f3Smrg 5957ec681f3Smrgstatic void 5967ec681f3Smrgmir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigned to, bool *abs, bool *neg, bool *not, enum midgard_roundmode *roundmode, bool is_int, unsigned bcast_count) 5977ec681f3Smrg{ 5987ec681f3Smrg nir_alu_src src = instr->src[i]; 5997ec681f3Smrg 6007ec681f3Smrg if (!is_int) { 6017ec681f3Smrg if (pan_has_source_mod(&src, nir_op_fneg)) 6027ec681f3Smrg *neg = !(*neg); 6037ec681f3Smrg 6047ec681f3Smrg if (pan_has_source_mod(&src, nir_op_fabs)) 6057ec681f3Smrg *abs = true; 6067ec681f3Smrg } 6077ec681f3Smrg 6087ec681f3Smrg if (nir_accepts_inot(instr->op, i) && pan_has_source_mod(&src, nir_op_inot)) 6097ec681f3Smrg *not = true; 6107ec681f3Smrg 6117ec681f3Smrg if (roundmode) { 6127ec681f3Smrg if (pan_has_source_mod(&src, nir_op_fround_even)) 6137ec681f3Smrg *roundmode = MIDGARD_RTE; 6147ec681f3Smrg 6157ec681f3Smrg if (pan_has_source_mod(&src, nir_op_ftrunc)) 6167ec681f3Smrg *roundmode = MIDGARD_RTZ; 6177ec681f3Smrg 6187ec681f3Smrg if (pan_has_source_mod(&src, nir_op_ffloor)) 6197ec681f3Smrg *roundmode = MIDGARD_RTN; 6207ec681f3Smrg 6217ec681f3Smrg if (pan_has_source_mod(&src, nir_op_fceil)) 6227ec681f3Smrg *roundmode = MIDGARD_RTP; 6237ec681f3Smrg } 6247ec681f3Smrg 6257ec681f3Smrg unsigned bits = nir_src_bit_size(src.src); 6267ec681f3Smrg 6277ec681f3Smrg ins->src[to] = nir_src_index(NULL, &src.src); 6287ec681f3Smrg ins->src_types[to] = nir_op_infos[instr->op].input_types[i] | bits; 6297ec681f3Smrg 6307ec681f3Smrg for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) { 6317ec681f3Smrg ins->swizzle[to][c] = src.swizzle[ 6327ec681f3Smrg (!bcast_count || c < bcast_count) ? c : 6337ec681f3Smrg (bcast_count - 1)]; 6347ec681f3Smrg } 6357ec681f3Smrg} 6367ec681f3Smrg 6377ec681f3Smrg/* Midgard features both fcsel and icsel, depending on whether you want int or 6387ec681f3Smrg * float modifiers. NIR's csel is typeless, so we want a heuristic to guess if 6397ec681f3Smrg * we should emit an int or float csel depending on what modifiers could be 6407ec681f3Smrg * placed. In the absense of modifiers, this is probably arbitrary. */ 6417ec681f3Smrg 6427ec681f3Smrgstatic bool 6437ec681f3Smrgmir_is_bcsel_float(nir_alu_instr *instr) 6447ec681f3Smrg{ 6457ec681f3Smrg nir_op intmods[] = { 6467ec681f3Smrg nir_op_i2i8, nir_op_i2i16, 6477ec681f3Smrg nir_op_i2i32, nir_op_i2i64 6487ec681f3Smrg }; 6497ec681f3Smrg 6507ec681f3Smrg nir_op floatmods[] = { 6517ec681f3Smrg nir_op_fabs, nir_op_fneg, 6527ec681f3Smrg nir_op_f2f16, nir_op_f2f32, 6537ec681f3Smrg nir_op_f2f64 6547ec681f3Smrg }; 6557ec681f3Smrg 6567ec681f3Smrg nir_op floatdestmods[] = { 6577ec681f3Smrg nir_op_fsat, nir_op_fsat_signed_mali, nir_op_fclamp_pos_mali, 6587ec681f3Smrg nir_op_f2f16, nir_op_f2f32 6597ec681f3Smrg }; 6607ec681f3Smrg 6617ec681f3Smrg signed score = 0; 6627ec681f3Smrg 6637ec681f3Smrg for (unsigned i = 1; i < 3; ++i) { 6647ec681f3Smrg nir_alu_src s = instr->src[i]; 6657ec681f3Smrg for (unsigned q = 0; q < ARRAY_SIZE(intmods); ++q) { 6667ec681f3Smrg if (pan_has_source_mod(&s, intmods[q])) 6677ec681f3Smrg score--; 6687ec681f3Smrg } 6697ec681f3Smrg } 6707ec681f3Smrg 6717ec681f3Smrg for (unsigned i = 1; i < 3; ++i) { 6727ec681f3Smrg nir_alu_src s = instr->src[i]; 6737ec681f3Smrg for (unsigned q = 0; q < ARRAY_SIZE(floatmods); ++q) { 6747ec681f3Smrg if (pan_has_source_mod(&s, floatmods[q])) 6757ec681f3Smrg score++; 6767ec681f3Smrg } 6777ec681f3Smrg } 6787ec681f3Smrg 6797ec681f3Smrg for (unsigned q = 0; q < ARRAY_SIZE(floatdestmods); ++q) { 6807ec681f3Smrg nir_dest *dest = &instr->dest.dest; 6817ec681f3Smrg if (pan_has_dest_mod(&dest, floatdestmods[q])) 6827ec681f3Smrg score++; 6837ec681f3Smrg } 6847ec681f3Smrg 6857ec681f3Smrg return (score > 0); 6867ec681f3Smrg} 6877ec681f3Smrg 6887ec681f3Smrgstatic void 6897ec681f3Smrgemit_alu(compiler_context *ctx, nir_alu_instr *instr) 6907ec681f3Smrg{ 6917ec681f3Smrg nir_dest *dest = &instr->dest.dest; 6927ec681f3Smrg 6937ec681f3Smrg if (dest->is_ssa && BITSET_TEST(ctx->already_emitted, dest->ssa.index)) 6947ec681f3Smrg return; 6957ec681f3Smrg 6967ec681f3Smrg /* Derivatives end up emitted on the texture pipe, not the ALUs. This 6977ec681f3Smrg * is handled elsewhere */ 6987ec681f3Smrg 6997ec681f3Smrg if (instr->op == nir_op_fddx || instr->op == nir_op_fddy) { 7007ec681f3Smrg midgard_emit_derivatives(ctx, instr); 7017ec681f3Smrg return; 7027ec681f3Smrg } 7037ec681f3Smrg 7047ec681f3Smrg bool is_ssa = dest->is_ssa; 7057ec681f3Smrg 7067ec681f3Smrg unsigned nr_components = nir_dest_num_components(*dest); 7077ec681f3Smrg unsigned nr_inputs = nir_op_infos[instr->op].num_inputs; 7087ec681f3Smrg unsigned op = 0; 7097ec681f3Smrg 7107ec681f3Smrg /* Number of components valid to check for the instruction (the rest 7117ec681f3Smrg * will be forced to the last), or 0 to use as-is. Relevant as 7127ec681f3Smrg * ball-type instructions have a channel count in NIR but are all vec4 7137ec681f3Smrg * in Midgard */ 7147ec681f3Smrg 7157ec681f3Smrg unsigned broadcast_swizzle = 0; 7167ec681f3Smrg 7177ec681f3Smrg /* Should we swap arguments? */ 7187ec681f3Smrg bool flip_src12 = false; 7197ec681f3Smrg 7207ec681f3Smrg ASSERTED unsigned src_bitsize = nir_src_bit_size(instr->src[0].src); 7217ec681f3Smrg ASSERTED unsigned dst_bitsize = nir_dest_bit_size(*dest); 7227ec681f3Smrg 7237ec681f3Smrg enum midgard_roundmode roundmode = MIDGARD_RTE; 7247ec681f3Smrg 7257ec681f3Smrg switch (instr->op) { 7267ec681f3Smrg ALU_CASE(fadd, fadd); 7277ec681f3Smrg ALU_CASE(fmul, fmul); 7287ec681f3Smrg ALU_CASE(fmin, fmin); 7297ec681f3Smrg ALU_CASE(fmax, fmax); 7307ec681f3Smrg ALU_CASE(imin, imin); 7317ec681f3Smrg ALU_CASE(imax, imax); 7327ec681f3Smrg ALU_CASE(umin, umin); 7337ec681f3Smrg ALU_CASE(umax, umax); 7347ec681f3Smrg ALU_CASE(ffloor, ffloor); 7357ec681f3Smrg ALU_CASE(fround_even, froundeven); 7367ec681f3Smrg ALU_CASE(ftrunc, ftrunc); 7377ec681f3Smrg ALU_CASE(fceil, fceil); 7387ec681f3Smrg ALU_CASE(fdot3, fdot3); 7397ec681f3Smrg ALU_CASE(fdot4, fdot4); 7407ec681f3Smrg ALU_CASE(iadd, iadd); 7417ec681f3Smrg ALU_CASE(isub, isub); 7427ec681f3Smrg ALU_CASE(iadd_sat, iaddsat); 7437ec681f3Smrg ALU_CASE(isub_sat, isubsat); 7447ec681f3Smrg ALU_CASE(uadd_sat, uaddsat); 7457ec681f3Smrg ALU_CASE(usub_sat, usubsat); 7467ec681f3Smrg ALU_CASE(imul, imul); 7477ec681f3Smrg ALU_CASE(imul_high, imul); 7487ec681f3Smrg ALU_CASE(umul_high, imul); 7497ec681f3Smrg ALU_CASE(uclz, iclz); 7507ec681f3Smrg 7517ec681f3Smrg /* Zero shoved as second-arg */ 7527ec681f3Smrg ALU_CASE(iabs, iabsdiff); 7537ec681f3Smrg 7547ec681f3Smrg ALU_CASE(uabs_isub, iabsdiff); 7557ec681f3Smrg ALU_CASE(uabs_usub, uabsdiff); 7567ec681f3Smrg 7577ec681f3Smrg ALU_CASE(mov, imov); 7587ec681f3Smrg 7597ec681f3Smrg ALU_CASE_CMP(feq32, feq); 7607ec681f3Smrg ALU_CASE_CMP(fneu32, fne); 7617ec681f3Smrg ALU_CASE_CMP(flt32, flt); 7627ec681f3Smrg ALU_CASE_CMP(ieq32, ieq); 7637ec681f3Smrg ALU_CASE_CMP(ine32, ine); 7647ec681f3Smrg ALU_CASE_CMP(ilt32, ilt); 7657ec681f3Smrg ALU_CASE_CMP(ult32, ult); 7667ec681f3Smrg 7677ec681f3Smrg /* We don't have a native b2f32 instruction. Instead, like many 7687ec681f3Smrg * GPUs, we exploit booleans as 0/~0 for false/true, and 7697ec681f3Smrg * correspondingly AND 7707ec681f3Smrg * by 1.0 to do the type conversion. For the moment, prime us 7717ec681f3Smrg * to emit: 7727ec681f3Smrg * 7737ec681f3Smrg * iand [whatever], #0 7747ec681f3Smrg * 7757ec681f3Smrg * At the end of emit_alu (as MIR), we'll fix-up the constant 7767ec681f3Smrg */ 7777ec681f3Smrg 7787ec681f3Smrg ALU_CASE_CMP(b2f32, iand); 7797ec681f3Smrg ALU_CASE_CMP(b2f16, iand); 7807ec681f3Smrg ALU_CASE_CMP(b2i32, iand); 7817ec681f3Smrg 7827ec681f3Smrg /* Likewise, we don't have a dedicated f2b32 instruction, but 7837ec681f3Smrg * we can do a "not equal to 0.0" test. */ 7847ec681f3Smrg 7857ec681f3Smrg ALU_CASE_CMP(f2b32, fne); 7867ec681f3Smrg ALU_CASE_CMP(i2b32, ine); 7877ec681f3Smrg 7887ec681f3Smrg ALU_CASE(frcp, frcp); 7897ec681f3Smrg ALU_CASE(frsq, frsqrt); 7907ec681f3Smrg ALU_CASE(fsqrt, fsqrt); 7917ec681f3Smrg ALU_CASE(fexp2, fexp2); 7927ec681f3Smrg ALU_CASE(flog2, flog2); 7937ec681f3Smrg 7947ec681f3Smrg ALU_CASE_RTZ(f2i64, f2i_rte); 7957ec681f3Smrg ALU_CASE_RTZ(f2u64, f2u_rte); 7967ec681f3Smrg ALU_CASE_RTZ(i2f64, i2f_rte); 7977ec681f3Smrg ALU_CASE_RTZ(u2f64, u2f_rte); 7987ec681f3Smrg 7997ec681f3Smrg ALU_CASE_RTZ(f2i32, f2i_rte); 8007ec681f3Smrg ALU_CASE_RTZ(f2u32, f2u_rte); 8017ec681f3Smrg ALU_CASE_RTZ(i2f32, i2f_rte); 8027ec681f3Smrg ALU_CASE_RTZ(u2f32, u2f_rte); 8037ec681f3Smrg 8047ec681f3Smrg ALU_CASE_RTZ(f2i8, f2i_rte); 8057ec681f3Smrg ALU_CASE_RTZ(f2u8, f2u_rte); 8067ec681f3Smrg 8077ec681f3Smrg ALU_CASE_RTZ(f2i16, f2i_rte); 8087ec681f3Smrg ALU_CASE_RTZ(f2u16, f2u_rte); 8097ec681f3Smrg ALU_CASE_RTZ(i2f16, i2f_rte); 8107ec681f3Smrg ALU_CASE_RTZ(u2f16, u2f_rte); 8117ec681f3Smrg 8127ec681f3Smrg ALU_CASE(fsin, fsinpi); 8137ec681f3Smrg ALU_CASE(fcos, fcospi); 8147ec681f3Smrg 8157ec681f3Smrg /* We'll get 0 in the second arg, so: 8167ec681f3Smrg * ~a = ~(a | 0) = nor(a, 0) */ 8177ec681f3Smrg ALU_CASE(inot, inor); 8187ec681f3Smrg ALU_CASE(iand, iand); 8197ec681f3Smrg ALU_CASE(ior, ior); 8207ec681f3Smrg ALU_CASE(ixor, ixor); 8217ec681f3Smrg ALU_CASE(ishl, ishl); 8227ec681f3Smrg ALU_CASE(ishr, iasr); 8237ec681f3Smrg ALU_CASE(ushr, ilsr); 8247ec681f3Smrg 8257ec681f3Smrg ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2); 8267ec681f3Smrg ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3); 8277ec681f3Smrg ALU_CASE_CMP(b32all_fequal4, fball_eq); 8287ec681f3Smrg 8297ec681f3Smrg ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2); 8307ec681f3Smrg ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3); 8317ec681f3Smrg ALU_CASE_CMP(b32any_fnequal4, fbany_neq); 8327ec681f3Smrg 8337ec681f3Smrg ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2); 8347ec681f3Smrg ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3); 8357ec681f3Smrg ALU_CASE_CMP(b32all_iequal4, iball_eq); 8367ec681f3Smrg 8377ec681f3Smrg ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2); 8387ec681f3Smrg ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3); 8397ec681f3Smrg ALU_CASE_CMP(b32any_inequal4, ibany_neq); 8407ec681f3Smrg 8417ec681f3Smrg /* Source mods will be shoved in later */ 8427ec681f3Smrg ALU_CASE(fabs, fmov); 8437ec681f3Smrg ALU_CASE(fneg, fmov); 8447ec681f3Smrg ALU_CASE(fsat, fmov); 8457ec681f3Smrg ALU_CASE(fsat_signed_mali, fmov); 8467ec681f3Smrg ALU_CASE(fclamp_pos_mali, fmov); 8477ec681f3Smrg 8487ec681f3Smrg /* For size conversion, we use a move. Ideally though we would squash 8497ec681f3Smrg * these ops together; maybe that has to happen after in NIR as part of 8507ec681f3Smrg * propagation...? An earlier algebraic pass ensured we step down by 8517ec681f3Smrg * only / exactly one size. If stepping down, we use a dest override to 8527ec681f3Smrg * reduce the size; if stepping up, we use a larger-sized move with a 8537ec681f3Smrg * half source and a sign/zero-extension modifier */ 8547ec681f3Smrg 8557ec681f3Smrg case nir_op_i2i8: 8567ec681f3Smrg case nir_op_i2i16: 8577ec681f3Smrg case nir_op_i2i32: 8587ec681f3Smrg case nir_op_i2i64: 8597ec681f3Smrg case nir_op_u2u8: 8607ec681f3Smrg case nir_op_u2u16: 8617ec681f3Smrg case nir_op_u2u32: 8627ec681f3Smrg case nir_op_u2u64: 8637ec681f3Smrg case nir_op_f2f16: 8647ec681f3Smrg case nir_op_f2f32: 8657ec681f3Smrg case nir_op_f2f64: { 8667ec681f3Smrg if (instr->op == nir_op_f2f16 || instr->op == nir_op_f2f32 || 8677ec681f3Smrg instr->op == nir_op_f2f64) 8687ec681f3Smrg op = midgard_alu_op_fmov; 8697ec681f3Smrg else 8707ec681f3Smrg op = midgard_alu_op_imov; 8717ec681f3Smrg 8727ec681f3Smrg break; 8737ec681f3Smrg } 8747ec681f3Smrg 8757ec681f3Smrg /* For greater-or-equal, we lower to less-or-equal and flip the 8767ec681f3Smrg * arguments */ 8777ec681f3Smrg 8787ec681f3Smrg case nir_op_fge: 8797ec681f3Smrg case nir_op_fge32: 8807ec681f3Smrg case nir_op_ige32: 8817ec681f3Smrg case nir_op_uge32: { 8827ec681f3Smrg op = 8837ec681f3Smrg instr->op == nir_op_fge ? midgard_alu_op_fle : 8847ec681f3Smrg instr->op == nir_op_fge32 ? midgard_alu_op_fle : 8857ec681f3Smrg instr->op == nir_op_ige32 ? midgard_alu_op_ile : 8867ec681f3Smrg instr->op == nir_op_uge32 ? midgard_alu_op_ule : 8877ec681f3Smrg 0; 8887ec681f3Smrg 8897ec681f3Smrg flip_src12 = true; 8907ec681f3Smrg ALU_CHECK_CMP(); 8917ec681f3Smrg break; 8927ec681f3Smrg } 8937ec681f3Smrg 8947ec681f3Smrg case nir_op_b32csel: { 8957ec681f3Smrg bool mixed = nir_is_non_scalar_swizzle(&instr->src[0], nr_components); 8967ec681f3Smrg bool is_float = mir_is_bcsel_float(instr); 8977ec681f3Smrg op = is_float ? 8987ec681f3Smrg (mixed ? midgard_alu_op_fcsel_v : midgard_alu_op_fcsel) : 8997ec681f3Smrg (mixed ? midgard_alu_op_icsel_v : midgard_alu_op_icsel); 9007ec681f3Smrg 9017ec681f3Smrg break; 9027ec681f3Smrg } 9037ec681f3Smrg 9047ec681f3Smrg case nir_op_unpack_32_2x16: 9057ec681f3Smrg case nir_op_unpack_32_4x8: 9067ec681f3Smrg case nir_op_pack_32_2x16: 9077ec681f3Smrg case nir_op_pack_32_4x8: { 9087ec681f3Smrg op = midgard_alu_op_imov; 9097ec681f3Smrg break; 9107ec681f3Smrg } 9117ec681f3Smrg 9127ec681f3Smrg default: 9137ec681f3Smrg DBG("Unhandled ALU op %s\n", nir_op_infos[instr->op].name); 9147ec681f3Smrg assert(0); 9157ec681f3Smrg return; 9167ec681f3Smrg } 9177ec681f3Smrg 9187ec681f3Smrg /* Promote imov to fmov if it might help inline a constant */ 9197ec681f3Smrg if (op == midgard_alu_op_imov && nir_src_is_const(instr->src[0].src) 9207ec681f3Smrg && nir_src_bit_size(instr->src[0].src) == 32 9217ec681f3Smrg && nir_is_same_comp_swizzle(instr->src[0].swizzle, 9227ec681f3Smrg nir_src_num_components(instr->src[0].src))) { 9237ec681f3Smrg op = midgard_alu_op_fmov; 9247ec681f3Smrg } 9257ec681f3Smrg 9267ec681f3Smrg /* Midgard can perform certain modifiers on output of an ALU op */ 9277ec681f3Smrg 9287ec681f3Smrg unsigned outmod = 0; 9297ec681f3Smrg bool is_int = midgard_is_integer_op(op); 9307ec681f3Smrg 9317ec681f3Smrg if (instr->op == nir_op_umul_high || instr->op == nir_op_imul_high) { 9327ec681f3Smrg outmod = midgard_outmod_keephi; 9337ec681f3Smrg } else if (midgard_is_integer_out_op(op)) { 9347ec681f3Smrg outmod = midgard_outmod_keeplo; 9357ec681f3Smrg } else if (instr->op == nir_op_fsat) { 9367ec681f3Smrg outmod = midgard_outmod_clamp_0_1; 9377ec681f3Smrg } else if (instr->op == nir_op_fsat_signed_mali) { 9387ec681f3Smrg outmod = midgard_outmod_clamp_m1_1; 9397ec681f3Smrg } else if (instr->op == nir_op_fclamp_pos_mali) { 9407ec681f3Smrg outmod = midgard_outmod_clamp_0_inf; 9417ec681f3Smrg } 9427ec681f3Smrg 9437ec681f3Smrg /* Fetch unit, quirks, etc information */ 9447ec681f3Smrg unsigned opcode_props = alu_opcode_props[op].props; 9457ec681f3Smrg bool quirk_flipped_r24 = opcode_props & QUIRK_FLIPPED_R24; 9467ec681f3Smrg 9477ec681f3Smrg if (!midgard_is_integer_out_op(op)) { 9487ec681f3Smrg outmod = mir_determine_float_outmod(ctx, &dest, outmod); 9497ec681f3Smrg } 9507ec681f3Smrg 9517ec681f3Smrg midgard_instruction ins = { 9527ec681f3Smrg .type = TAG_ALU_4, 9537ec681f3Smrg .dest = nir_dest_index(dest), 9547ec681f3Smrg .dest_type = nir_op_infos[instr->op].output_type 9557ec681f3Smrg | nir_dest_bit_size(*dest), 9567ec681f3Smrg .roundmode = roundmode, 9577ec681f3Smrg }; 9587ec681f3Smrg 9597ec681f3Smrg enum midgard_roundmode *roundptr = (opcode_props & MIDGARD_ROUNDS) ? 9607ec681f3Smrg &ins.roundmode : NULL; 9617ec681f3Smrg 9627ec681f3Smrg for (unsigned i = nr_inputs; i < ARRAY_SIZE(ins.src); ++i) 9637ec681f3Smrg ins.src[i] = ~0; 9647ec681f3Smrg 9657ec681f3Smrg if (quirk_flipped_r24) { 9667ec681f3Smrg ins.src[0] = ~0; 9677ec681f3Smrg mir_copy_src(&ins, instr, 0, 1, &ins.src_abs[1], &ins.src_neg[1], &ins.src_invert[1], roundptr, is_int, broadcast_swizzle); 9687ec681f3Smrg } else { 9697ec681f3Smrg for (unsigned i = 0; i < nr_inputs; ++i) { 9707ec681f3Smrg unsigned to = i; 9717ec681f3Smrg 9727ec681f3Smrg if (instr->op == nir_op_b32csel) { 9737ec681f3Smrg /* The condition is the first argument; move 9747ec681f3Smrg * the other arguments up one to be a binary 9757ec681f3Smrg * instruction for Midgard with the condition 9767ec681f3Smrg * last */ 9777ec681f3Smrg 9787ec681f3Smrg if (i == 0) 9797ec681f3Smrg to = 2; 9807ec681f3Smrg else if (flip_src12) 9817ec681f3Smrg to = 2 - i; 9827ec681f3Smrg else 9837ec681f3Smrg to = i - 1; 9847ec681f3Smrg } else if (flip_src12) { 9857ec681f3Smrg to = 1 - to; 9867ec681f3Smrg } 9877ec681f3Smrg 9887ec681f3Smrg mir_copy_src(&ins, instr, i, to, &ins.src_abs[to], &ins.src_neg[to], &ins.src_invert[to], roundptr, is_int, broadcast_swizzle); 9897ec681f3Smrg 9907ec681f3Smrg /* (!c) ? a : b = c ? b : a */ 9917ec681f3Smrg if (instr->op == nir_op_b32csel && ins.src_invert[2]) { 9927ec681f3Smrg ins.src_invert[2] = false; 9937ec681f3Smrg flip_src12 ^= true; 9947ec681f3Smrg } 9957ec681f3Smrg } 9967ec681f3Smrg } 9977ec681f3Smrg 9987ec681f3Smrg if (instr->op == nir_op_fneg || instr->op == nir_op_fabs) { 9997ec681f3Smrg /* Lowered to move */ 10007ec681f3Smrg if (instr->op == nir_op_fneg) 10017ec681f3Smrg ins.src_neg[1] ^= true; 10027ec681f3Smrg 10037ec681f3Smrg if (instr->op == nir_op_fabs) 10047ec681f3Smrg ins.src_abs[1] = true; 10057ec681f3Smrg } 10067ec681f3Smrg 10077ec681f3Smrg ins.mask = mask_of(nr_components); 10087ec681f3Smrg 10097ec681f3Smrg /* Apply writemask if non-SSA, keeping in mind that we can't write to 10107ec681f3Smrg * components that don't exist. Note modifier => SSA => !reg => no 10117ec681f3Smrg * writemask, so we don't have to worry about writemasks here.*/ 10127ec681f3Smrg 10137ec681f3Smrg if (!is_ssa) 10147ec681f3Smrg ins.mask &= instr->dest.write_mask; 10157ec681f3Smrg 10167ec681f3Smrg ins.op = op; 10177ec681f3Smrg ins.outmod = outmod; 10187ec681f3Smrg 10197ec681f3Smrg /* Late fixup for emulated instructions */ 10207ec681f3Smrg 10217ec681f3Smrg if (instr->op == nir_op_b2f32 || instr->op == nir_op_b2i32) { 10227ec681f3Smrg /* Presently, our second argument is an inline #0 constant. 10237ec681f3Smrg * Switch over to an embedded 1.0 constant (that can't fit 10247ec681f3Smrg * inline, since we're 32-bit, not 16-bit like the inline 10257ec681f3Smrg * constants) */ 10267ec681f3Smrg 10277ec681f3Smrg ins.has_inline_constant = false; 10287ec681f3Smrg ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); 10297ec681f3Smrg ins.src_types[1] = nir_type_float32; 10307ec681f3Smrg ins.has_constants = true; 10317ec681f3Smrg 10327ec681f3Smrg if (instr->op == nir_op_b2f32) 10337ec681f3Smrg ins.constants.f32[0] = 1.0f; 10347ec681f3Smrg else 10357ec681f3Smrg ins.constants.i32[0] = 1; 10367ec681f3Smrg 10377ec681f3Smrg for (unsigned c = 0; c < 16; ++c) 10387ec681f3Smrg ins.swizzle[1][c] = 0; 10397ec681f3Smrg } else if (instr->op == nir_op_b2f16) { 10407ec681f3Smrg ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); 10417ec681f3Smrg ins.src_types[1] = nir_type_float16; 10427ec681f3Smrg ins.has_constants = true; 10437ec681f3Smrg ins.constants.i16[0] = _mesa_float_to_half(1.0); 10447ec681f3Smrg 10457ec681f3Smrg for (unsigned c = 0; c < 16; ++c) 10467ec681f3Smrg ins.swizzle[1][c] = 0; 10477ec681f3Smrg } else if (nr_inputs == 1 && !quirk_flipped_r24) { 10487ec681f3Smrg /* Lots of instructions need a 0 plonked in */ 10497ec681f3Smrg ins.has_inline_constant = false; 10507ec681f3Smrg ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); 10517ec681f3Smrg ins.src_types[1] = ins.src_types[0]; 10527ec681f3Smrg ins.has_constants = true; 10537ec681f3Smrg ins.constants.u32[0] = 0; 10547ec681f3Smrg 10557ec681f3Smrg for (unsigned c = 0; c < 16; ++c) 10567ec681f3Smrg ins.swizzle[1][c] = 0; 10577ec681f3Smrg } else if (instr->op == nir_op_pack_32_2x16) { 10587ec681f3Smrg ins.dest_type = nir_type_uint16; 10597ec681f3Smrg ins.mask = mask_of(nr_components * 2); 10607ec681f3Smrg ins.is_pack = true; 10617ec681f3Smrg } else if (instr->op == nir_op_pack_32_4x8) { 10627ec681f3Smrg ins.dest_type = nir_type_uint8; 10637ec681f3Smrg ins.mask = mask_of(nr_components * 4); 10647ec681f3Smrg ins.is_pack = true; 10657ec681f3Smrg } else if (instr->op == nir_op_unpack_32_2x16) { 10667ec681f3Smrg ins.dest_type = nir_type_uint32; 10677ec681f3Smrg ins.mask = mask_of(nr_components >> 1); 10687ec681f3Smrg ins.is_pack = true; 10697ec681f3Smrg } else if (instr->op == nir_op_unpack_32_4x8) { 10707ec681f3Smrg ins.dest_type = nir_type_uint32; 10717ec681f3Smrg ins.mask = mask_of(nr_components >> 2); 10727ec681f3Smrg ins.is_pack = true; 10737ec681f3Smrg } 10747ec681f3Smrg 10757ec681f3Smrg if ((opcode_props & UNITS_ALL) == UNIT_VLUT) { 10767ec681f3Smrg /* To avoid duplicating the lookup tables (probably), true LUT 10777ec681f3Smrg * instructions can only operate as if they were scalars. Lower 10787ec681f3Smrg * them here by changing the component. */ 10797ec681f3Smrg 10807ec681f3Smrg unsigned orig_mask = ins.mask; 10817ec681f3Smrg 10827ec681f3Smrg unsigned swizzle_back[MIR_VEC_COMPONENTS]; 10837ec681f3Smrg memcpy(&swizzle_back, ins.swizzle[0], sizeof(swizzle_back)); 10847ec681f3Smrg 10857ec681f3Smrg midgard_instruction ins_split[MIR_VEC_COMPONENTS]; 10867ec681f3Smrg unsigned ins_count = 0; 10877ec681f3Smrg 10887ec681f3Smrg for (int i = 0; i < nr_components; ++i) { 10897ec681f3Smrg /* Mask the associated component, dropping the 10907ec681f3Smrg * instruction if needed */ 10917ec681f3Smrg 10927ec681f3Smrg ins.mask = 1 << i; 10937ec681f3Smrg ins.mask &= orig_mask; 10947ec681f3Smrg 10957ec681f3Smrg for (unsigned j = 0; j < ins_count; ++j) { 10967ec681f3Smrg if (swizzle_back[i] == ins_split[j].swizzle[0][0]) { 10977ec681f3Smrg ins_split[j].mask |= ins.mask; 10987ec681f3Smrg ins.mask = 0; 10997ec681f3Smrg break; 11007ec681f3Smrg } 11017ec681f3Smrg } 11027ec681f3Smrg 11037ec681f3Smrg if (!ins.mask) 11047ec681f3Smrg continue; 11057ec681f3Smrg 11067ec681f3Smrg for (unsigned j = 0; j < MIR_VEC_COMPONENTS; ++j) 11077ec681f3Smrg ins.swizzle[0][j] = swizzle_back[i]; /* Pull from the correct component */ 11087ec681f3Smrg 11097ec681f3Smrg ins_split[ins_count] = ins; 11107ec681f3Smrg 11117ec681f3Smrg ++ins_count; 11127ec681f3Smrg } 11137ec681f3Smrg 11147ec681f3Smrg for (unsigned i = 0; i < ins_count; ++i) { 11157ec681f3Smrg emit_mir_instruction(ctx, ins_split[i]); 11167ec681f3Smrg } 11177ec681f3Smrg } else { 11187ec681f3Smrg emit_mir_instruction(ctx, ins); 11197ec681f3Smrg } 11207ec681f3Smrg} 11217ec681f3Smrg 11227ec681f3Smrg#undef ALU_CASE 11237ec681f3Smrg 11247ec681f3Smrgstatic void 11257ec681f3Smrgmir_set_intr_mask(nir_instr *instr, midgard_instruction *ins, bool is_read) 11267ec681f3Smrg{ 11277ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 11287ec681f3Smrg unsigned nir_mask = 0; 11297ec681f3Smrg unsigned dsize = 0; 11307ec681f3Smrg 11317ec681f3Smrg if (is_read) { 11327ec681f3Smrg nir_mask = mask_of(nir_intrinsic_dest_components(intr)); 11337ec681f3Smrg dsize = nir_dest_bit_size(intr->dest); 11347ec681f3Smrg } else { 11357ec681f3Smrg nir_mask = nir_intrinsic_write_mask(intr); 11367ec681f3Smrg dsize = 32; 11377ec681f3Smrg } 11387ec681f3Smrg 11397ec681f3Smrg /* Once we have the NIR mask, we need to normalize to work in 32-bit space */ 11407ec681f3Smrg unsigned bytemask = pan_to_bytemask(dsize, nir_mask); 11417ec681f3Smrg ins->dest_type = nir_type_uint | dsize; 11427ec681f3Smrg mir_set_bytemask(ins, bytemask); 11437ec681f3Smrg} 11447ec681f3Smrg 11457ec681f3Smrg/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly 11467ec681f3Smrg * optimized) versions of UBO #0 */ 11477ec681f3Smrg 11487ec681f3Smrgstatic midgard_instruction * 11497ec681f3Smrgemit_ubo_read( 11507ec681f3Smrg compiler_context *ctx, 11517ec681f3Smrg nir_instr *instr, 11527ec681f3Smrg unsigned dest, 11537ec681f3Smrg unsigned offset, 11547ec681f3Smrg nir_src *indirect_offset, 11557ec681f3Smrg unsigned indirect_shift, 11567ec681f3Smrg unsigned index, 11577ec681f3Smrg unsigned nr_comps) 11587ec681f3Smrg{ 11597ec681f3Smrg midgard_instruction ins; 11607ec681f3Smrg 11617ec681f3Smrg unsigned dest_size = (instr->type == nir_instr_type_intrinsic) ? 11627ec681f3Smrg nir_dest_bit_size(nir_instr_as_intrinsic(instr)->dest) : 32; 11637ec681f3Smrg 11647ec681f3Smrg unsigned bitsize = dest_size * nr_comps; 11657ec681f3Smrg 11667ec681f3Smrg /* Pick the smallest intrinsic to avoid out-of-bounds reads */ 11677ec681f3Smrg if (bitsize <= 32) 11687ec681f3Smrg ins = m_ld_ubo_32(dest, 0); 11697ec681f3Smrg else if (bitsize <= 64) 11707ec681f3Smrg ins = m_ld_ubo_64(dest, 0); 11717ec681f3Smrg else if (bitsize <= 128) 11727ec681f3Smrg ins = m_ld_ubo_128(dest, 0); 11737ec681f3Smrg else 11747ec681f3Smrg unreachable("Invalid UBO read size"); 11757ec681f3Smrg 11767ec681f3Smrg ins.constants.u32[0] = offset; 11777ec681f3Smrg 11787ec681f3Smrg if (instr->type == nir_instr_type_intrinsic) 11797ec681f3Smrg mir_set_intr_mask(instr, &ins, true); 11807ec681f3Smrg 11817ec681f3Smrg if (indirect_offset) { 11827ec681f3Smrg ins.src[2] = nir_src_index(ctx, indirect_offset); 11837ec681f3Smrg ins.src_types[2] = nir_type_uint32; 11847ec681f3Smrg ins.load_store.index_shift = indirect_shift; 11857ec681f3Smrg 11867ec681f3Smrg /* X component for the whole swizzle to prevent register 11877ec681f3Smrg * pressure from ballooning from the extra components */ 11887ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i) 11897ec681f3Smrg ins.swizzle[2][i] = 0; 11907ec681f3Smrg } else { 11917ec681f3Smrg ins.load_store.index_reg = REGISTER_LDST_ZERO; 11927ec681f3Smrg } 11937ec681f3Smrg 11947ec681f3Smrg if (indirect_offset && indirect_offset->is_ssa && !indirect_shift) 11957ec681f3Smrg mir_set_ubo_offset(&ins, indirect_offset, offset); 11967ec681f3Smrg 11977ec681f3Smrg midgard_pack_ubo_index_imm(&ins.load_store, index); 11987ec681f3Smrg 11997ec681f3Smrg return emit_mir_instruction(ctx, ins); 12007ec681f3Smrg} 12017ec681f3Smrg 12027ec681f3Smrg/* Globals are like UBOs if you squint. And shared memory is like globals if 12037ec681f3Smrg * you squint even harder */ 12047ec681f3Smrg 12057ec681f3Smrgstatic void 12067ec681f3Smrgemit_global( 12077ec681f3Smrg compiler_context *ctx, 12087ec681f3Smrg nir_instr *instr, 12097ec681f3Smrg bool is_read, 12107ec681f3Smrg unsigned srcdest, 12117ec681f3Smrg nir_src *offset, 12127ec681f3Smrg unsigned seg) 12137ec681f3Smrg{ 12147ec681f3Smrg midgard_instruction ins; 12157ec681f3Smrg 12167ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 12177ec681f3Smrg if (is_read) { 12187ec681f3Smrg unsigned bitsize = nir_dest_bit_size(intr->dest) * 12197ec681f3Smrg nir_dest_num_components(intr->dest); 12207ec681f3Smrg 12217ec681f3Smrg if (bitsize <= 32) 12227ec681f3Smrg ins = m_ld_32(srcdest, 0); 12237ec681f3Smrg else if (bitsize <= 64) 12247ec681f3Smrg ins = m_ld_64(srcdest, 0); 12257ec681f3Smrg else if (bitsize <= 128) 12267ec681f3Smrg ins = m_ld_128(srcdest, 0); 12277ec681f3Smrg else 12287ec681f3Smrg unreachable("Invalid global read size"); 12297ec681f3Smrg } else { 12307ec681f3Smrg unsigned bitsize = nir_src_bit_size(intr->src[0]) * 12317ec681f3Smrg nir_src_num_components(intr->src[0]); 12327ec681f3Smrg 12337ec681f3Smrg if (bitsize <= 32) 12347ec681f3Smrg ins = m_st_32(srcdest, 0); 12357ec681f3Smrg else if (bitsize <= 64) 12367ec681f3Smrg ins = m_st_64(srcdest, 0); 12377ec681f3Smrg else if (bitsize <= 128) 12387ec681f3Smrg ins = m_st_128(srcdest, 0); 12397ec681f3Smrg else 12407ec681f3Smrg unreachable("Invalid global store size"); 12417ec681f3Smrg } 12427ec681f3Smrg 12437ec681f3Smrg mir_set_offset(ctx, &ins, offset, seg); 12447ec681f3Smrg mir_set_intr_mask(instr, &ins, is_read); 12457ec681f3Smrg 12467ec681f3Smrg /* Set a valid swizzle for masked out components */ 12477ec681f3Smrg assert(ins.mask); 12487ec681f3Smrg unsigned first_component = __builtin_ffs(ins.mask) - 1; 12497ec681f3Smrg 12507ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) { 12517ec681f3Smrg if (!(ins.mask & (1 << i))) 12527ec681f3Smrg ins.swizzle[0][i] = first_component; 12537ec681f3Smrg } 12547ec681f3Smrg 12557ec681f3Smrg emit_mir_instruction(ctx, ins); 12567ec681f3Smrg} 12577ec681f3Smrg 12587ec681f3Smrg/* If is_shared is off, the only other possible value are globals, since 12597ec681f3Smrg * SSBO's are being lowered to globals through a NIR pass. 12607ec681f3Smrg * `image_direct_address` should be ~0 when instr is not an image_atomic 12617ec681f3Smrg * and the destination register of a lea_image op when it is an image_atomic. */ 12627ec681f3Smrgstatic void 12637ec681f3Smrgemit_atomic( 12647ec681f3Smrg compiler_context *ctx, 12657ec681f3Smrg nir_intrinsic_instr *instr, 12667ec681f3Smrg bool is_shared, 12677ec681f3Smrg midgard_load_store_op op, 12687ec681f3Smrg unsigned image_direct_address) 12697ec681f3Smrg{ 12707ec681f3Smrg nir_alu_type type = 12717ec681f3Smrg (op == midgard_op_atomic_imin || op == midgard_op_atomic_imax) ? 12727ec681f3Smrg nir_type_int : nir_type_uint; 12737ec681f3Smrg 12747ec681f3Smrg bool is_image = image_direct_address != ~0; 12757ec681f3Smrg 12767ec681f3Smrg unsigned dest = nir_dest_index(&instr->dest); 12777ec681f3Smrg unsigned val_src = is_image ? 3 : 1; 12787ec681f3Smrg unsigned val = nir_src_index(ctx, &instr->src[val_src]); 12797ec681f3Smrg unsigned bitsize = nir_src_bit_size(instr->src[val_src]); 12807ec681f3Smrg emit_explicit_constant(ctx, val, val); 12817ec681f3Smrg 12827ec681f3Smrg midgard_instruction ins = { 12837ec681f3Smrg .type = TAG_LOAD_STORE_4, 12847ec681f3Smrg .mask = 0xF, 12857ec681f3Smrg .dest = dest, 12867ec681f3Smrg .src = { ~0, ~0, ~0, val }, 12877ec681f3Smrg .src_types = { 0, 0, 0, type | bitsize }, 12887ec681f3Smrg .op = op 12897ec681f3Smrg }; 12907ec681f3Smrg 12917ec681f3Smrg nir_src *src_offset = nir_get_io_offset_src(instr); 12927ec681f3Smrg 12937ec681f3Smrg if (op == midgard_op_atomic_cmpxchg) { 12947ec681f3Smrg unsigned xchg_val_src = is_image ? 4 : 2; 12957ec681f3Smrg unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]); 12967ec681f3Smrg emit_explicit_constant(ctx, xchg_val, xchg_val); 12977ec681f3Smrg 12987ec681f3Smrg ins.src[2] = val; 12997ec681f3Smrg ins.src_types[2] = type | bitsize; 13007ec681f3Smrg ins.src[3] = xchg_val; 13017ec681f3Smrg 13027ec681f3Smrg if (is_shared) { 13037ec681f3Smrg ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR; 13047ec681f3Smrg ins.load_store.arg_comp = COMPONENT_Z; 13057ec681f3Smrg ins.load_store.bitsize_toggle = true; 13067ec681f3Smrg } else { 13077ec681f3Smrg for(unsigned i = 0; i < 2; ++i) 13087ec681f3Smrg ins.swizzle[1][i] = i; 13097ec681f3Smrg 13107ec681f3Smrg ins.src[1] = is_image ? image_direct_address : 13117ec681f3Smrg nir_src_index(ctx, src_offset); 13127ec681f3Smrg ins.src_types[1] = nir_type_uint64; 13137ec681f3Smrg } 13147ec681f3Smrg } else if (is_image) { 13157ec681f3Smrg for(unsigned i = 0; i < 2; ++i) 13167ec681f3Smrg ins.swizzle[2][i] = i; 13177ec681f3Smrg 13187ec681f3Smrg ins.src[2] = image_direct_address; 13197ec681f3Smrg ins.src_types[2] = nir_type_uint64; 13207ec681f3Smrg 13217ec681f3Smrg ins.load_store.arg_reg = REGISTER_LDST_ZERO; 13227ec681f3Smrg ins.load_store.bitsize_toggle = true; 13237ec681f3Smrg ins.load_store.index_format = midgard_index_address_u64; 13247ec681f3Smrg } else 13257ec681f3Smrg mir_set_offset(ctx, &ins, src_offset, is_shared ? LDST_SHARED : LDST_GLOBAL); 13267ec681f3Smrg 13277ec681f3Smrg mir_set_intr_mask(&instr->instr, &ins, true); 13287ec681f3Smrg 13297ec681f3Smrg emit_mir_instruction(ctx, ins); 13307ec681f3Smrg} 13317ec681f3Smrg 13327ec681f3Smrgstatic void 13337ec681f3Smrgemit_varying_read( 13347ec681f3Smrg compiler_context *ctx, 13357ec681f3Smrg unsigned dest, unsigned offset, 13367ec681f3Smrg unsigned nr_comp, unsigned component, 13377ec681f3Smrg nir_src *indirect_offset, nir_alu_type type, bool flat) 13387ec681f3Smrg{ 13397ec681f3Smrg /* XXX: Half-floats? */ 13407ec681f3Smrg /* TODO: swizzle, mask */ 13417ec681f3Smrg 13427ec681f3Smrg midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset)); 13437ec681f3Smrg ins.mask = mask_of(nr_comp); 13447ec681f3Smrg ins.dest_type = type; 13457ec681f3Smrg 13467ec681f3Smrg if (type == nir_type_float16) { 13477ec681f3Smrg /* Ensure we are aligned so we can pack it later */ 13487ec681f3Smrg ins.mask = mask_of(ALIGN_POT(nr_comp, 2)); 13497ec681f3Smrg } 13507ec681f3Smrg 13517ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) 13527ec681f3Smrg ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W); 13537ec681f3Smrg 13547ec681f3Smrg 13557ec681f3Smrg midgard_varying_params p = { 13567ec681f3Smrg .flat_shading = flat, 13577ec681f3Smrg .perspective_correction = 1, 13587ec681f3Smrg .interpolate_sample = true, 13597ec681f3Smrg }; 13607ec681f3Smrg midgard_pack_varying_params(&ins.load_store, p); 13617ec681f3Smrg 13627ec681f3Smrg if (indirect_offset) { 13637ec681f3Smrg ins.src[2] = nir_src_index(ctx, indirect_offset); 13647ec681f3Smrg ins.src_types[2] = nir_type_uint32; 13657ec681f3Smrg } else 13667ec681f3Smrg ins.load_store.index_reg = REGISTER_LDST_ZERO; 13677ec681f3Smrg 13687ec681f3Smrg ins.load_store.arg_reg = REGISTER_LDST_ZERO; 13697ec681f3Smrg ins.load_store.index_format = midgard_index_address_u32; 13707ec681f3Smrg 13717ec681f3Smrg /* Use the type appropriate load */ 13727ec681f3Smrg switch (type) { 13737ec681f3Smrg case nir_type_uint32: 13747ec681f3Smrg case nir_type_bool32: 13757ec681f3Smrg ins.op = midgard_op_ld_vary_32u; 13767ec681f3Smrg break; 13777ec681f3Smrg case nir_type_int32: 13787ec681f3Smrg ins.op = midgard_op_ld_vary_32i; 13797ec681f3Smrg break; 13807ec681f3Smrg case nir_type_float32: 13817ec681f3Smrg ins.op = midgard_op_ld_vary_32; 13827ec681f3Smrg break; 13837ec681f3Smrg case nir_type_float16: 13847ec681f3Smrg ins.op = midgard_op_ld_vary_16; 13857ec681f3Smrg break; 13867ec681f3Smrg default: 13877ec681f3Smrg unreachable("Attempted to load unknown type"); 13887ec681f3Smrg break; 13897ec681f3Smrg } 13907ec681f3Smrg 13917ec681f3Smrg emit_mir_instruction(ctx, ins); 13927ec681f3Smrg} 13937ec681f3Smrg 13947ec681f3Smrg 13957ec681f3Smrg/* If `is_atomic` is true, we emit a `lea_image` since midgard doesn't not have special 13967ec681f3Smrg * image_atomic opcodes. The caller can then use that address to emit a normal atomic opcode. */ 13977ec681f3Smrgstatic midgard_instruction 13987ec681f3Smrgemit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic) 13997ec681f3Smrg{ 14007ec681f3Smrg enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); 14017ec681f3Smrg unsigned nr_attr = ctx->stage == MESA_SHADER_VERTEX ? 14027ec681f3Smrg util_bitcount64(ctx->nir->info.inputs_read) : 0; 14037ec681f3Smrg unsigned nr_dim = glsl_get_sampler_dim_coordinate_components(dim); 14047ec681f3Smrg bool is_array = nir_intrinsic_image_array(instr); 14057ec681f3Smrg bool is_store = instr->intrinsic == nir_intrinsic_image_store; 14067ec681f3Smrg 14077ec681f3Smrg /* TODO: MSAA */ 14087ec681f3Smrg assert(dim != GLSL_SAMPLER_DIM_MS && "MSAA'd images not supported"); 14097ec681f3Smrg 14107ec681f3Smrg unsigned coord_reg = nir_src_index(ctx, &instr->src[1]); 14117ec681f3Smrg emit_explicit_constant(ctx, coord_reg, coord_reg); 14127ec681f3Smrg 14137ec681f3Smrg nir_src *index = &instr->src[0]; 14147ec681f3Smrg bool is_direct = nir_src_is_const(*index); 14157ec681f3Smrg 14167ec681f3Smrg /* For image opcodes, address is used as an index into the attribute descriptor */ 14177ec681f3Smrg unsigned address = nr_attr; 14187ec681f3Smrg if (is_direct) 14197ec681f3Smrg address += nir_src_as_uint(*index); 14207ec681f3Smrg 14217ec681f3Smrg midgard_instruction ins; 14227ec681f3Smrg if (is_store) { /* emit st_image_* */ 14237ec681f3Smrg unsigned val = nir_src_index(ctx, &instr->src[3]); 14247ec681f3Smrg emit_explicit_constant(ctx, val, val); 14257ec681f3Smrg 14267ec681f3Smrg nir_alu_type type = nir_intrinsic_src_type(instr); 14277ec681f3Smrg ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address)); 14287ec681f3Smrg nir_alu_type base_type = nir_alu_type_get_base_type(type); 14297ec681f3Smrg ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]); 14307ec681f3Smrg } else if (is_atomic) { /* emit lea_image */ 14317ec681f3Smrg unsigned dest = make_compiler_temp_reg(ctx); 14327ec681f3Smrg ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address)); 14337ec681f3Smrg ins.mask = mask_of(2); /* 64-bit memory address */ 14347ec681f3Smrg } else { /* emit ld_image_* */ 14357ec681f3Smrg nir_alu_type type = nir_intrinsic_dest_type(instr); 14367ec681f3Smrg ins = ld_image(type, nir_dest_index(&instr->dest), PACK_LDST_ATTRIB_OFS(address)); 14377ec681f3Smrg ins.mask = mask_of(nir_intrinsic_dest_components(instr)); 14387ec681f3Smrg ins.dest_type = type; 14397ec681f3Smrg } 14407ec681f3Smrg 14417ec681f3Smrg /* Coord reg */ 14427ec681f3Smrg ins.src[1] = coord_reg; 14437ec681f3Smrg ins.src_types[1] = nir_type_uint16; 14447ec681f3Smrg if (nr_dim == 3 || is_array) { 14457ec681f3Smrg ins.load_store.bitsize_toggle = true; 14467ec681f3Smrg } 14477ec681f3Smrg 14487ec681f3Smrg /* Image index reg */ 14497ec681f3Smrg if (!is_direct) { 14507ec681f3Smrg ins.src[2] = nir_src_index(ctx, index); 14517ec681f3Smrg ins.src_types[2] = nir_type_uint32; 14527ec681f3Smrg } else 14537ec681f3Smrg ins.load_store.index_reg = REGISTER_LDST_ZERO; 14547ec681f3Smrg 14557ec681f3Smrg emit_mir_instruction(ctx, ins); 14567ec681f3Smrg 14577ec681f3Smrg return ins; 14587ec681f3Smrg} 14597ec681f3Smrg 14607ec681f3Smrgstatic void 14617ec681f3Smrgemit_attr_read( 14627ec681f3Smrg compiler_context *ctx, 14637ec681f3Smrg unsigned dest, unsigned offset, 14647ec681f3Smrg unsigned nr_comp, nir_alu_type t) 14657ec681f3Smrg{ 14667ec681f3Smrg midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset)); 14677ec681f3Smrg ins.load_store.arg_reg = REGISTER_LDST_ZERO; 14687ec681f3Smrg ins.load_store.index_reg = REGISTER_LDST_ZERO; 14697ec681f3Smrg ins.mask = mask_of(nr_comp); 14707ec681f3Smrg 14717ec681f3Smrg /* Use the type appropriate load */ 14727ec681f3Smrg switch (t) { 14737ec681f3Smrg case nir_type_uint: 14747ec681f3Smrg case nir_type_bool: 14757ec681f3Smrg ins.op = midgard_op_ld_attr_32u; 14767ec681f3Smrg break; 14777ec681f3Smrg case nir_type_int: 14787ec681f3Smrg ins.op = midgard_op_ld_attr_32i; 14797ec681f3Smrg break; 14807ec681f3Smrg case nir_type_float: 14817ec681f3Smrg ins.op = midgard_op_ld_attr_32; 14827ec681f3Smrg break; 14837ec681f3Smrg default: 14847ec681f3Smrg unreachable("Attempted to load unknown type"); 14857ec681f3Smrg break; 14867ec681f3Smrg } 14877ec681f3Smrg 14887ec681f3Smrg emit_mir_instruction(ctx, ins); 14897ec681f3Smrg} 14907ec681f3Smrg 14917ec681f3Smrgstatic void 14927ec681f3Smrgemit_sysval_read(compiler_context *ctx, nir_instr *instr, 14937ec681f3Smrg unsigned nr_components, unsigned offset) 14947ec681f3Smrg{ 14957ec681f3Smrg nir_dest nir_dest; 14967ec681f3Smrg 14977ec681f3Smrg /* Figure out which uniform this is */ 14987ec681f3Smrg unsigned sysval_ubo = 14997ec681f3Smrg MAX2(ctx->inputs->sysval_ubo, ctx->nir->info.num_ubos); 15007ec681f3Smrg int sysval = panfrost_sysval_for_instr(instr, &nir_dest); 15017ec681f3Smrg unsigned dest = nir_dest_index(&nir_dest); 15027ec681f3Smrg unsigned uniform = 15037ec681f3Smrg pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval); 15047ec681f3Smrg 15057ec681f3Smrg /* Emit the read itself -- this is never indirect */ 15067ec681f3Smrg midgard_instruction *ins = 15077ec681f3Smrg emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0, 15087ec681f3Smrg sysval_ubo, nr_components); 15097ec681f3Smrg 15107ec681f3Smrg ins->mask = mask_of(nr_components); 15117ec681f3Smrg} 15127ec681f3Smrg 15137ec681f3Smrgstatic unsigned 15147ec681f3Smrgcompute_builtin_arg(nir_intrinsic_op op) 15157ec681f3Smrg{ 15167ec681f3Smrg switch (op) { 15177ec681f3Smrg case nir_intrinsic_load_workgroup_id: 15187ec681f3Smrg return REGISTER_LDST_GROUP_ID; 15197ec681f3Smrg case nir_intrinsic_load_local_invocation_id: 15207ec681f3Smrg return REGISTER_LDST_LOCAL_THREAD_ID; 15217ec681f3Smrg case nir_intrinsic_load_global_invocation_id: 15227ec681f3Smrg case nir_intrinsic_load_global_invocation_id_zero_base: 15237ec681f3Smrg return REGISTER_LDST_GLOBAL_THREAD_ID; 15247ec681f3Smrg default: 15257ec681f3Smrg unreachable("Invalid compute paramater loaded"); 15267ec681f3Smrg } 15277ec681f3Smrg} 15287ec681f3Smrg 15297ec681f3Smrgstatic void 15307ec681f3Smrgemit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s, 15317ec681f3Smrg enum midgard_rt_id rt, unsigned sample_iter) 15327ec681f3Smrg{ 15337ec681f3Smrg assert(rt < ARRAY_SIZE(ctx->writeout_branch)); 15347ec681f3Smrg assert(sample_iter < ARRAY_SIZE(ctx->writeout_branch[0])); 15357ec681f3Smrg 15367ec681f3Smrg midgard_instruction *br = ctx->writeout_branch[rt][sample_iter]; 15377ec681f3Smrg 15387ec681f3Smrg assert(!br); 15397ec681f3Smrg 15407ec681f3Smrg emit_explicit_constant(ctx, src, src); 15417ec681f3Smrg 15427ec681f3Smrg struct midgard_instruction ins = 15437ec681f3Smrg v_branch(false, false); 15447ec681f3Smrg 15457ec681f3Smrg bool depth_only = (rt == MIDGARD_ZS_RT); 15467ec681f3Smrg 15477ec681f3Smrg ins.writeout = depth_only ? 0 : PAN_WRITEOUT_C; 15487ec681f3Smrg 15497ec681f3Smrg /* Add dependencies */ 15507ec681f3Smrg ins.src[0] = src; 15517ec681f3Smrg ins.src_types[0] = nir_type_uint32; 15527ec681f3Smrg 15537ec681f3Smrg if (depth_only) 15547ec681f3Smrg ins.constants.u32[0] = 0xFF; 15557ec681f3Smrg else 15567ec681f3Smrg ins.constants.u32[0] = ((rt - MIDGARD_COLOR_RT0) << 8) | sample_iter; 15577ec681f3Smrg 15587ec681f3Smrg for (int i = 0; i < 4; ++i) 15597ec681f3Smrg ins.swizzle[0][i] = i; 15607ec681f3Smrg 15617ec681f3Smrg if (~src_z) { 15627ec681f3Smrg emit_explicit_constant(ctx, src_z, src_z); 15637ec681f3Smrg ins.src[2] = src_z; 15647ec681f3Smrg ins.src_types[2] = nir_type_uint32; 15657ec681f3Smrg ins.writeout |= PAN_WRITEOUT_Z; 15667ec681f3Smrg } 15677ec681f3Smrg if (~src_s) { 15687ec681f3Smrg emit_explicit_constant(ctx, src_s, src_s); 15697ec681f3Smrg ins.src[3] = src_s; 15707ec681f3Smrg ins.src_types[3] = nir_type_uint32; 15717ec681f3Smrg ins.writeout |= PAN_WRITEOUT_S; 15727ec681f3Smrg } 15737ec681f3Smrg 15747ec681f3Smrg /* Emit the branch */ 15757ec681f3Smrg br = emit_mir_instruction(ctx, ins); 15767ec681f3Smrg schedule_barrier(ctx); 15777ec681f3Smrg ctx->writeout_branch[rt][sample_iter] = br; 15787ec681f3Smrg 15797ec681f3Smrg /* Push our current location = current block count - 1 = where we'll 15807ec681f3Smrg * jump to. Maybe a bit too clever for my own good */ 15817ec681f3Smrg 15827ec681f3Smrg br->branch.target_block = ctx->block_count - 1; 15837ec681f3Smrg} 15847ec681f3Smrg 15857ec681f3Smrgstatic void 15867ec681f3Smrgemit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr) 15877ec681f3Smrg{ 15887ec681f3Smrg unsigned reg = nir_dest_index(&instr->dest); 15897ec681f3Smrg midgard_instruction ins = m_ldst_mov(reg, 0); 15907ec681f3Smrg ins.mask = mask_of(3); 15917ec681f3Smrg ins.swizzle[0][3] = COMPONENT_X; /* xyzx */ 15927ec681f3Smrg ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic); 15937ec681f3Smrg emit_mir_instruction(ctx, ins); 15947ec681f3Smrg} 15957ec681f3Smrg 15967ec681f3Smrgstatic unsigned 15977ec681f3Smrgvertex_builtin_arg(nir_intrinsic_op op) 15987ec681f3Smrg{ 15997ec681f3Smrg switch (op) { 16007ec681f3Smrg case nir_intrinsic_load_vertex_id_zero_base: 16017ec681f3Smrg return PAN_VERTEX_ID; 16027ec681f3Smrg case nir_intrinsic_load_instance_id: 16037ec681f3Smrg return PAN_INSTANCE_ID; 16047ec681f3Smrg default: 16057ec681f3Smrg unreachable("Invalid vertex builtin"); 16067ec681f3Smrg } 16077ec681f3Smrg} 16087ec681f3Smrg 16097ec681f3Smrgstatic void 16107ec681f3Smrgemit_vertex_builtin(compiler_context *ctx, nir_intrinsic_instr *instr) 16117ec681f3Smrg{ 16127ec681f3Smrg unsigned reg = nir_dest_index(&instr->dest); 16137ec681f3Smrg emit_attr_read(ctx, reg, vertex_builtin_arg(instr->intrinsic), 1, nir_type_int); 16147ec681f3Smrg} 16157ec681f3Smrg 16167ec681f3Smrgstatic void 16177ec681f3Smrgemit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx) 16187ec681f3Smrg{ 16197ec681f3Smrg unsigned reg = nir_dest_index(&instr->dest); 16207ec681f3Smrg 16217ec681f3Smrg midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0); 16227ec681f3Smrg ld.op = midgard_op_ld_special_32u; 16237ec681f3Smrg ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx); 16247ec681f3Smrg ld.load_store.index_reg = REGISTER_LDST_ZERO; 16257ec681f3Smrg 16267ec681f3Smrg for (int i = 0; i < 4; ++i) 16277ec681f3Smrg ld.swizzle[0][i] = COMPONENT_X; 16287ec681f3Smrg 16297ec681f3Smrg emit_mir_instruction(ctx, ld); 16307ec681f3Smrg} 16317ec681f3Smrg 16327ec681f3Smrgstatic void 16337ec681f3Smrgemit_control_barrier(compiler_context *ctx) 16347ec681f3Smrg{ 16357ec681f3Smrg midgard_instruction ins = { 16367ec681f3Smrg .type = TAG_TEXTURE_4, 16377ec681f3Smrg .dest = ~0, 16387ec681f3Smrg .src = { ~0, ~0, ~0, ~0 }, 16397ec681f3Smrg .op = midgard_tex_op_barrier, 16407ec681f3Smrg }; 16417ec681f3Smrg 16427ec681f3Smrg emit_mir_instruction(ctx, ins); 16437ec681f3Smrg} 16447ec681f3Smrg 16457ec681f3Smrgstatic unsigned 16467ec681f3Smrgmir_get_branch_cond(nir_src *src, bool *invert) 16477ec681f3Smrg{ 16487ec681f3Smrg /* Wrap it. No swizzle since it's a scalar */ 16497ec681f3Smrg 16507ec681f3Smrg nir_alu_src alu = { 16517ec681f3Smrg .src = *src 16527ec681f3Smrg }; 16537ec681f3Smrg 16547ec681f3Smrg *invert = pan_has_source_mod(&alu, nir_op_inot); 16557ec681f3Smrg return nir_src_index(NULL, &alu.src); 16567ec681f3Smrg} 16577ec681f3Smrg 16587ec681f3Smrgstatic uint8_t 16597ec681f3Smrgoutput_load_rt_addr(compiler_context *ctx, nir_intrinsic_instr *instr) 16607ec681f3Smrg{ 16617ec681f3Smrg if (ctx->inputs->is_blend) 16627ec681f3Smrg return MIDGARD_COLOR_RT0 + ctx->inputs->blend.rt; 16637ec681f3Smrg 16647ec681f3Smrg const nir_variable *var; 16657ec681f3Smrg var = nir_find_variable_with_driver_location(ctx->nir, nir_var_shader_out, nir_intrinsic_base(instr)); 16667ec681f3Smrg assert(var); 16677ec681f3Smrg 16687ec681f3Smrg unsigned loc = var->data.location; 16697ec681f3Smrg 16707ec681f3Smrg if (loc >= FRAG_RESULT_DATA0) 16717ec681f3Smrg return loc - FRAG_RESULT_DATA0; 16727ec681f3Smrg 16737ec681f3Smrg if (loc == FRAG_RESULT_DEPTH) 16747ec681f3Smrg return 0x1F; 16757ec681f3Smrg if (loc == FRAG_RESULT_STENCIL) 16767ec681f3Smrg return 0x1E; 16777ec681f3Smrg 16787ec681f3Smrg unreachable("Invalid RT to load from"); 16797ec681f3Smrg} 16807ec681f3Smrg 16817ec681f3Smrgstatic void 16827ec681f3Smrgemit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) 16837ec681f3Smrg{ 16847ec681f3Smrg unsigned offset = 0, reg; 16857ec681f3Smrg 16867ec681f3Smrg switch (instr->intrinsic) { 16877ec681f3Smrg case nir_intrinsic_discard_if: 16887ec681f3Smrg case nir_intrinsic_discard: { 16897ec681f3Smrg bool conditional = instr->intrinsic == nir_intrinsic_discard_if; 16907ec681f3Smrg struct midgard_instruction discard = v_branch(conditional, false); 16917ec681f3Smrg discard.branch.target_type = TARGET_DISCARD; 16927ec681f3Smrg 16937ec681f3Smrg if (conditional) { 16947ec681f3Smrg discard.src[0] = mir_get_branch_cond(&instr->src[0], 16957ec681f3Smrg &discard.branch.invert_conditional); 16967ec681f3Smrg discard.src_types[0] = nir_type_uint32; 16977ec681f3Smrg } 16987ec681f3Smrg 16997ec681f3Smrg emit_mir_instruction(ctx, discard); 17007ec681f3Smrg schedule_barrier(ctx); 17017ec681f3Smrg 17027ec681f3Smrg break; 17037ec681f3Smrg } 17047ec681f3Smrg 17057ec681f3Smrg case nir_intrinsic_image_load: 17067ec681f3Smrg case nir_intrinsic_image_store: 17077ec681f3Smrg emit_image_op(ctx, instr, false); 17087ec681f3Smrg break; 17097ec681f3Smrg 17107ec681f3Smrg case nir_intrinsic_image_size: { 17117ec681f3Smrg unsigned nr_comp = nir_intrinsic_dest_components(instr); 17127ec681f3Smrg emit_sysval_read(ctx, &instr->instr, nr_comp, 0); 17137ec681f3Smrg break; 17147ec681f3Smrg } 17157ec681f3Smrg 17167ec681f3Smrg case nir_intrinsic_load_ubo: 17177ec681f3Smrg case nir_intrinsic_load_global: 17187ec681f3Smrg case nir_intrinsic_load_global_constant: 17197ec681f3Smrg case nir_intrinsic_load_shared: 17207ec681f3Smrg case nir_intrinsic_load_scratch: 17217ec681f3Smrg case nir_intrinsic_load_input: 17227ec681f3Smrg case nir_intrinsic_load_kernel_input: 17237ec681f3Smrg case nir_intrinsic_load_interpolated_input: { 17247ec681f3Smrg bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo; 17257ec681f3Smrg bool is_global = instr->intrinsic == nir_intrinsic_load_global || 17267ec681f3Smrg instr->intrinsic == nir_intrinsic_load_global_constant; 17277ec681f3Smrg bool is_shared = instr->intrinsic == nir_intrinsic_load_shared; 17287ec681f3Smrg bool is_scratch = instr->intrinsic == nir_intrinsic_load_scratch; 17297ec681f3Smrg bool is_flat = instr->intrinsic == nir_intrinsic_load_input; 17307ec681f3Smrg bool is_kernel = instr->intrinsic == nir_intrinsic_load_kernel_input; 17317ec681f3Smrg bool is_interp = instr->intrinsic == nir_intrinsic_load_interpolated_input; 17327ec681f3Smrg 17337ec681f3Smrg /* Get the base type of the intrinsic */ 17347ec681f3Smrg /* TODO: Infer type? Does it matter? */ 17357ec681f3Smrg nir_alu_type t = 17367ec681f3Smrg (is_interp) ? nir_type_float : 17377ec681f3Smrg (is_flat) ? nir_intrinsic_dest_type(instr) : 17387ec681f3Smrg nir_type_uint; 17397ec681f3Smrg 17407ec681f3Smrg t = nir_alu_type_get_base_type(t); 17417ec681f3Smrg 17427ec681f3Smrg if (!(is_ubo || is_global || is_scratch)) { 17437ec681f3Smrg offset = nir_intrinsic_base(instr); 17447ec681f3Smrg } 17457ec681f3Smrg 17467ec681f3Smrg unsigned nr_comp = nir_intrinsic_dest_components(instr); 17477ec681f3Smrg 17487ec681f3Smrg nir_src *src_offset = nir_get_io_offset_src(instr); 17497ec681f3Smrg 17507ec681f3Smrg bool direct = nir_src_is_const(*src_offset); 17517ec681f3Smrg nir_src *indirect_offset = direct ? NULL : src_offset; 17527ec681f3Smrg 17537ec681f3Smrg if (direct) 17547ec681f3Smrg offset += nir_src_as_uint(*src_offset); 17557ec681f3Smrg 17567ec681f3Smrg /* We may need to apply a fractional offset */ 17577ec681f3Smrg int component = (is_flat || is_interp) ? 17587ec681f3Smrg nir_intrinsic_component(instr) : 0; 17597ec681f3Smrg reg = nir_dest_index(&instr->dest); 17607ec681f3Smrg 17617ec681f3Smrg if (is_kernel) { 17627ec681f3Smrg emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, 0, nr_comp); 17637ec681f3Smrg } else if (is_ubo) { 17647ec681f3Smrg nir_src index = instr->src[0]; 17657ec681f3Smrg 17667ec681f3Smrg /* TODO: Is indirect block number possible? */ 17677ec681f3Smrg assert(nir_src_is_const(index)); 17687ec681f3Smrg 17697ec681f3Smrg uint32_t uindex = nir_src_as_uint(index); 17707ec681f3Smrg emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex, nr_comp); 17717ec681f3Smrg } else if (is_global || is_shared || is_scratch) { 17727ec681f3Smrg unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH); 17737ec681f3Smrg emit_global(ctx, &instr->instr, true, reg, src_offset, seg); 17747ec681f3Smrg } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) { 17757ec681f3Smrg emit_varying_read(ctx, reg, offset, nr_comp, component, indirect_offset, t | nir_dest_bit_size(instr->dest), is_flat); 17767ec681f3Smrg } else if (ctx->inputs->is_blend) { 17777ec681f3Smrg /* ctx->blend_input will be precoloured to r0/r2, where 17787ec681f3Smrg * the input is preloaded */ 17797ec681f3Smrg 17807ec681f3Smrg unsigned *input = offset ? &ctx->blend_src1 : &ctx->blend_input; 17817ec681f3Smrg 17827ec681f3Smrg if (*input == ~0) 17837ec681f3Smrg *input = reg; 17847ec681f3Smrg else 17857ec681f3Smrg emit_mir_instruction(ctx, v_mov(*input, reg)); 17867ec681f3Smrg } else if (ctx->stage == MESA_SHADER_VERTEX) { 17877ec681f3Smrg emit_attr_read(ctx, reg, offset, nr_comp, t); 17887ec681f3Smrg } else { 17897ec681f3Smrg DBG("Unknown load\n"); 17907ec681f3Smrg assert(0); 17917ec681f3Smrg } 17927ec681f3Smrg 17937ec681f3Smrg break; 17947ec681f3Smrg } 17957ec681f3Smrg 17967ec681f3Smrg /* Handled together with load_interpolated_input */ 17977ec681f3Smrg case nir_intrinsic_load_barycentric_pixel: 17987ec681f3Smrg case nir_intrinsic_load_barycentric_centroid: 17997ec681f3Smrg case nir_intrinsic_load_barycentric_sample: 18007ec681f3Smrg break; 18017ec681f3Smrg 18027ec681f3Smrg /* Reads 128-bit value raw off the tilebuffer during blending, tasty */ 18037ec681f3Smrg 18047ec681f3Smrg case nir_intrinsic_load_raw_output_pan: { 18057ec681f3Smrg reg = nir_dest_index(&instr->dest); 18067ec681f3Smrg 18077ec681f3Smrg /* T720 and below use different blend opcodes with slightly 18087ec681f3Smrg * different semantics than T760 and up */ 18097ec681f3Smrg 18107ec681f3Smrg midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0); 18117ec681f3Smrg 18127ec681f3Smrg unsigned target = output_load_rt_addr(ctx, instr); 18137ec681f3Smrg ld.load_store.index_comp = target & 0x3; 18147ec681f3Smrg ld.load_store.index_reg = target >> 2; 18157ec681f3Smrg 18167ec681f3Smrg if (nir_src_is_const(instr->src[0])) { 18177ec681f3Smrg unsigned sample = nir_src_as_uint(instr->src[0]); 18187ec681f3Smrg ld.load_store.arg_comp = sample & 0x3; 18197ec681f3Smrg ld.load_store.arg_reg = sample >> 2; 18207ec681f3Smrg } else { 18217ec681f3Smrg /* Enable sample index via register. */ 18227ec681f3Smrg ld.load_store.signed_offset |= 1; 18237ec681f3Smrg ld.src[1] = nir_src_index(ctx, &instr->src[0]); 18247ec681f3Smrg ld.src_types[1] = nir_type_int32; 18257ec681f3Smrg } 18267ec681f3Smrg 18277ec681f3Smrg if (ctx->quirks & MIDGARD_OLD_BLEND) { 18287ec681f3Smrg ld.op = midgard_op_ld_special_32u; 18297ec681f3Smrg ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16); 18307ec681f3Smrg ld.load_store.index_reg = REGISTER_LDST_ZERO; 18317ec681f3Smrg } 18327ec681f3Smrg 18337ec681f3Smrg emit_mir_instruction(ctx, ld); 18347ec681f3Smrg break; 18357ec681f3Smrg } 18367ec681f3Smrg 18377ec681f3Smrg case nir_intrinsic_load_output: { 18387ec681f3Smrg reg = nir_dest_index(&instr->dest); 18397ec681f3Smrg 18407ec681f3Smrg unsigned bits = nir_dest_bit_size(instr->dest); 18417ec681f3Smrg 18427ec681f3Smrg midgard_instruction ld; 18437ec681f3Smrg if (bits == 16) 18447ec681f3Smrg ld = m_ld_tilebuffer_16f(reg, 0); 18457ec681f3Smrg else 18467ec681f3Smrg ld = m_ld_tilebuffer_32f(reg, 0); 18477ec681f3Smrg 18487ec681f3Smrg unsigned index = output_load_rt_addr(ctx, instr); 18497ec681f3Smrg ld.load_store.index_comp = index & 0x3; 18507ec681f3Smrg ld.load_store.index_reg = index >> 2; 18517ec681f3Smrg 18527ec681f3Smrg for (unsigned c = 4; c < 16; ++c) 18537ec681f3Smrg ld.swizzle[0][c] = 0; 18547ec681f3Smrg 18557ec681f3Smrg if (ctx->quirks & MIDGARD_OLD_BLEND) { 18567ec681f3Smrg if (bits == 16) 18577ec681f3Smrg ld.op = midgard_op_ld_special_16f; 18587ec681f3Smrg else 18597ec681f3Smrg ld.op = midgard_op_ld_special_32f; 18607ec681f3Smrg ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1); 18617ec681f3Smrg ld.load_store.index_reg = REGISTER_LDST_ZERO; 18627ec681f3Smrg } 18637ec681f3Smrg 18647ec681f3Smrg emit_mir_instruction(ctx, ld); 18657ec681f3Smrg break; 18667ec681f3Smrg } 18677ec681f3Smrg 18687ec681f3Smrg case nir_intrinsic_store_output: 18697ec681f3Smrg case nir_intrinsic_store_combined_output_pan: 18707ec681f3Smrg assert(nir_src_is_const(instr->src[1]) && "no indirect outputs"); 18717ec681f3Smrg 18727ec681f3Smrg offset = nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[1]); 18737ec681f3Smrg 18747ec681f3Smrg reg = nir_src_index(ctx, &instr->src[0]); 18757ec681f3Smrg 18767ec681f3Smrg if (ctx->stage == MESA_SHADER_FRAGMENT) { 18777ec681f3Smrg bool combined = instr->intrinsic == 18787ec681f3Smrg nir_intrinsic_store_combined_output_pan; 18797ec681f3Smrg 18807ec681f3Smrg const nir_variable *var; 18817ec681f3Smrg var = nir_find_variable_with_driver_location(ctx->nir, nir_var_shader_out, 18827ec681f3Smrg nir_intrinsic_base(instr)); 18837ec681f3Smrg assert(var); 18847ec681f3Smrg 18857ec681f3Smrg /* Dual-source blend writeout is done by leaving the 18867ec681f3Smrg * value in r2 for the blend shader to use. */ 18877ec681f3Smrg if (var->data.index) { 18887ec681f3Smrg if (instr->src[0].is_ssa) { 18897ec681f3Smrg emit_explicit_constant(ctx, reg, reg); 18907ec681f3Smrg 18917ec681f3Smrg unsigned out = make_compiler_temp(ctx); 18927ec681f3Smrg 18937ec681f3Smrg midgard_instruction ins = v_mov(reg, out); 18947ec681f3Smrg emit_mir_instruction(ctx, ins); 18957ec681f3Smrg 18967ec681f3Smrg ctx->blend_src1 = out; 18977ec681f3Smrg } else { 18987ec681f3Smrg ctx->blend_src1 = reg; 18997ec681f3Smrg } 19007ec681f3Smrg 19017ec681f3Smrg break; 19027ec681f3Smrg } 19037ec681f3Smrg 19047ec681f3Smrg enum midgard_rt_id rt; 19057ec681f3Smrg if (var->data.location >= FRAG_RESULT_DATA0) 19067ec681f3Smrg rt = MIDGARD_COLOR_RT0 + var->data.location - 19077ec681f3Smrg FRAG_RESULT_DATA0; 19087ec681f3Smrg else if (combined) 19097ec681f3Smrg rt = MIDGARD_ZS_RT; 19107ec681f3Smrg else 19117ec681f3Smrg unreachable("bad rt"); 19127ec681f3Smrg 19137ec681f3Smrg unsigned reg_z = ~0, reg_s = ~0; 19147ec681f3Smrg if (combined) { 19157ec681f3Smrg unsigned writeout = nir_intrinsic_component(instr); 19167ec681f3Smrg if (writeout & PAN_WRITEOUT_Z) 19177ec681f3Smrg reg_z = nir_src_index(ctx, &instr->src[2]); 19187ec681f3Smrg if (writeout & PAN_WRITEOUT_S) 19197ec681f3Smrg reg_s = nir_src_index(ctx, &instr->src[3]); 19207ec681f3Smrg } 19217ec681f3Smrg 19227ec681f3Smrg emit_fragment_store(ctx, reg, reg_z, reg_s, rt, 0); 19237ec681f3Smrg } else if (ctx->stage == MESA_SHADER_VERTEX) { 19247ec681f3Smrg assert(instr->intrinsic == nir_intrinsic_store_output); 19257ec681f3Smrg 19267ec681f3Smrg /* We should have been vectorized, though we don't 19277ec681f3Smrg * currently check that st_vary is emitted only once 19287ec681f3Smrg * per slot (this is relevant, since there's not a mask 19297ec681f3Smrg * parameter available on the store [set to 0 by the 19307ec681f3Smrg * blob]). We do respect the component by adjusting the 19317ec681f3Smrg * swizzle. If this is a constant source, we'll need to 19327ec681f3Smrg * emit that explicitly. */ 19337ec681f3Smrg 19347ec681f3Smrg emit_explicit_constant(ctx, reg, reg); 19357ec681f3Smrg 19367ec681f3Smrg unsigned dst_component = nir_intrinsic_component(instr); 19377ec681f3Smrg unsigned nr_comp = nir_src_num_components(instr->src[0]); 19387ec681f3Smrg 19397ec681f3Smrg midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset)); 19407ec681f3Smrg st.load_store.arg_reg = REGISTER_LDST_ZERO; 19417ec681f3Smrg st.load_store.index_format = midgard_index_address_u32; 19427ec681f3Smrg st.load_store.index_reg = REGISTER_LDST_ZERO; 19437ec681f3Smrg 19447ec681f3Smrg switch (nir_alu_type_get_base_type(nir_intrinsic_src_type(instr))) { 19457ec681f3Smrg case nir_type_uint: 19467ec681f3Smrg case nir_type_bool: 19477ec681f3Smrg st.op = midgard_op_st_vary_32u; 19487ec681f3Smrg break; 19497ec681f3Smrg case nir_type_int: 19507ec681f3Smrg st.op = midgard_op_st_vary_32i; 19517ec681f3Smrg break; 19527ec681f3Smrg case nir_type_float: 19537ec681f3Smrg st.op = midgard_op_st_vary_32; 19547ec681f3Smrg break; 19557ec681f3Smrg default: 19567ec681f3Smrg unreachable("Attempted to store unknown type"); 19577ec681f3Smrg break; 19587ec681f3Smrg } 19597ec681f3Smrg 19607ec681f3Smrg /* nir_intrinsic_component(store_intr) encodes the 19617ec681f3Smrg * destination component start. Source component offset 19627ec681f3Smrg * adjustment is taken care of in 19637ec681f3Smrg * install_registers_instr(), when offset_swizzle() is 19647ec681f3Smrg * called. 19657ec681f3Smrg */ 19667ec681f3Smrg unsigned src_component = COMPONENT_X; 19677ec681f3Smrg 19687ec681f3Smrg assert(nr_comp > 0); 19697ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle); ++i) { 19707ec681f3Smrg st.swizzle[0][i] = src_component; 19717ec681f3Smrg if (i >= dst_component && i < dst_component + nr_comp - 1) 19727ec681f3Smrg src_component++; 19737ec681f3Smrg } 19747ec681f3Smrg 19757ec681f3Smrg emit_mir_instruction(ctx, st); 19767ec681f3Smrg } else { 19777ec681f3Smrg DBG("Unknown store\n"); 19787ec681f3Smrg assert(0); 19797ec681f3Smrg } 19807ec681f3Smrg 19817ec681f3Smrg break; 19827ec681f3Smrg 19837ec681f3Smrg /* Special case of store_output for lowered blend shaders */ 19847ec681f3Smrg case nir_intrinsic_store_raw_output_pan: 19857ec681f3Smrg assert (ctx->stage == MESA_SHADER_FRAGMENT); 19867ec681f3Smrg reg = nir_src_index(ctx, &instr->src[0]); 19877ec681f3Smrg for (unsigned s = 0; s < ctx->blend_sample_iterations; s++) 19887ec681f3Smrg emit_fragment_store(ctx, reg, ~0, ~0, 19897ec681f3Smrg ctx->inputs->blend.rt + MIDGARD_COLOR_RT0, 19907ec681f3Smrg s); 19917ec681f3Smrg break; 19927ec681f3Smrg 19937ec681f3Smrg case nir_intrinsic_store_global: 19947ec681f3Smrg case nir_intrinsic_store_shared: 19957ec681f3Smrg case nir_intrinsic_store_scratch: 19967ec681f3Smrg reg = nir_src_index(ctx, &instr->src[0]); 19977ec681f3Smrg emit_explicit_constant(ctx, reg, reg); 19987ec681f3Smrg 19997ec681f3Smrg unsigned seg; 20007ec681f3Smrg if (instr->intrinsic == nir_intrinsic_store_global) 20017ec681f3Smrg seg = LDST_GLOBAL; 20027ec681f3Smrg else if (instr->intrinsic == nir_intrinsic_store_shared) 20037ec681f3Smrg seg = LDST_SHARED; 20047ec681f3Smrg else 20057ec681f3Smrg seg = LDST_SCRATCH; 20067ec681f3Smrg 20077ec681f3Smrg emit_global(ctx, &instr->instr, false, reg, &instr->src[1], seg); 20087ec681f3Smrg break; 20097ec681f3Smrg 20107ec681f3Smrg case nir_intrinsic_load_first_vertex: 20117ec681f3Smrg case nir_intrinsic_load_ssbo_address: 20127ec681f3Smrg case nir_intrinsic_load_work_dim: 20137ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 1, 0); 20147ec681f3Smrg break; 20157ec681f3Smrg 20167ec681f3Smrg case nir_intrinsic_load_base_vertex: 20177ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 1, 4); 20187ec681f3Smrg break; 20197ec681f3Smrg 20207ec681f3Smrg case nir_intrinsic_load_base_instance: 20217ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 1, 8); 20227ec681f3Smrg break; 20237ec681f3Smrg 20247ec681f3Smrg case nir_intrinsic_load_sample_positions_pan: 20257ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 2, 0); 20267ec681f3Smrg break; 20277ec681f3Smrg 20287ec681f3Smrg case nir_intrinsic_get_ssbo_size: 20297ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 1, 8); 20307ec681f3Smrg break; 20317ec681f3Smrg 20327ec681f3Smrg case nir_intrinsic_load_viewport_scale: 20337ec681f3Smrg case nir_intrinsic_load_viewport_offset: 20347ec681f3Smrg case nir_intrinsic_load_num_workgroups: 20357ec681f3Smrg case nir_intrinsic_load_sampler_lod_parameters_pan: 20367ec681f3Smrg case nir_intrinsic_load_workgroup_size: 20377ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 3, 0); 20387ec681f3Smrg break; 20397ec681f3Smrg 20407ec681f3Smrg case nir_intrinsic_load_blend_const_color_rgba: 20417ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 4, 0); 20427ec681f3Smrg break; 20437ec681f3Smrg 20447ec681f3Smrg case nir_intrinsic_load_workgroup_id: 20457ec681f3Smrg case nir_intrinsic_load_local_invocation_id: 20467ec681f3Smrg case nir_intrinsic_load_global_invocation_id: 20477ec681f3Smrg case nir_intrinsic_load_global_invocation_id_zero_base: 20487ec681f3Smrg emit_compute_builtin(ctx, instr); 20497ec681f3Smrg break; 20507ec681f3Smrg 20517ec681f3Smrg case nir_intrinsic_load_vertex_id_zero_base: 20527ec681f3Smrg case nir_intrinsic_load_instance_id: 20537ec681f3Smrg emit_vertex_builtin(ctx, instr); 20547ec681f3Smrg break; 20557ec681f3Smrg 20567ec681f3Smrg case nir_intrinsic_load_sample_mask_in: 20577ec681f3Smrg emit_special(ctx, instr, 96); 20587ec681f3Smrg break; 20597ec681f3Smrg 20607ec681f3Smrg case nir_intrinsic_load_sample_id: 20617ec681f3Smrg emit_special(ctx, instr, 97); 20627ec681f3Smrg break; 20637ec681f3Smrg 20647ec681f3Smrg /* Midgard doesn't seem to want special handling */ 20657ec681f3Smrg case nir_intrinsic_memory_barrier: 20667ec681f3Smrg case nir_intrinsic_memory_barrier_buffer: 20677ec681f3Smrg case nir_intrinsic_memory_barrier_image: 20687ec681f3Smrg case nir_intrinsic_memory_barrier_shared: 20697ec681f3Smrg case nir_intrinsic_group_memory_barrier: 20707ec681f3Smrg break; 20717ec681f3Smrg 20727ec681f3Smrg case nir_intrinsic_control_barrier: 20737ec681f3Smrg schedule_barrier(ctx); 20747ec681f3Smrg emit_control_barrier(ctx); 20757ec681f3Smrg schedule_barrier(ctx); 20767ec681f3Smrg break; 20777ec681f3Smrg 20787ec681f3Smrg ATOMIC_CASE(ctx, instr, add, add); 20797ec681f3Smrg ATOMIC_CASE(ctx, instr, and, and); 20807ec681f3Smrg ATOMIC_CASE(ctx, instr, comp_swap, cmpxchg); 20817ec681f3Smrg ATOMIC_CASE(ctx, instr, exchange, xchg); 20827ec681f3Smrg ATOMIC_CASE(ctx, instr, imax, imax); 20837ec681f3Smrg ATOMIC_CASE(ctx, instr, imin, imin); 20847ec681f3Smrg ATOMIC_CASE(ctx, instr, or, or); 20857ec681f3Smrg ATOMIC_CASE(ctx, instr, umax, umax); 20867ec681f3Smrg ATOMIC_CASE(ctx, instr, umin, umin); 20877ec681f3Smrg ATOMIC_CASE(ctx, instr, xor, xor); 20887ec681f3Smrg 20897ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, add, add); 20907ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, and, and); 20917ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, comp_swap, cmpxchg); 20927ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, exchange, xchg); 20937ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, imax, imax); 20947ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, imin, imin); 20957ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, or, or); 20967ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, umax, umax); 20977ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, umin, umin); 20987ec681f3Smrg IMAGE_ATOMIC_CASE(ctx, instr, xor, xor); 20997ec681f3Smrg 21007ec681f3Smrg default: 21017ec681f3Smrg fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name); 21027ec681f3Smrg assert(0); 21037ec681f3Smrg break; 21047ec681f3Smrg } 21057ec681f3Smrg} 21067ec681f3Smrg 21077ec681f3Smrg/* Returns dimension with 0 special casing cubemaps */ 21087ec681f3Smrgstatic unsigned 21097ec681f3Smrgmidgard_tex_format(enum glsl_sampler_dim dim) 21107ec681f3Smrg{ 21117ec681f3Smrg switch (dim) { 21127ec681f3Smrg case GLSL_SAMPLER_DIM_1D: 21137ec681f3Smrg case GLSL_SAMPLER_DIM_BUF: 21147ec681f3Smrg return 1; 21157ec681f3Smrg 21167ec681f3Smrg case GLSL_SAMPLER_DIM_2D: 21177ec681f3Smrg case GLSL_SAMPLER_DIM_MS: 21187ec681f3Smrg case GLSL_SAMPLER_DIM_EXTERNAL: 21197ec681f3Smrg case GLSL_SAMPLER_DIM_RECT: 21207ec681f3Smrg return 2; 21217ec681f3Smrg 21227ec681f3Smrg case GLSL_SAMPLER_DIM_3D: 21237ec681f3Smrg return 3; 21247ec681f3Smrg 21257ec681f3Smrg case GLSL_SAMPLER_DIM_CUBE: 21267ec681f3Smrg return 0; 21277ec681f3Smrg 21287ec681f3Smrg default: 21297ec681f3Smrg DBG("Unknown sampler dim type\n"); 21307ec681f3Smrg assert(0); 21317ec681f3Smrg return 0; 21327ec681f3Smrg } 21337ec681f3Smrg} 21347ec681f3Smrg 21357ec681f3Smrg/* Tries to attach an explicit LOD or bias as a constant. Returns whether this 21367ec681f3Smrg * was successful */ 21377ec681f3Smrg 21387ec681f3Smrgstatic bool 21397ec681f3Smrgpan_attach_constant_bias( 21407ec681f3Smrg compiler_context *ctx, 21417ec681f3Smrg nir_src lod, 21427ec681f3Smrg midgard_texture_word *word) 21437ec681f3Smrg{ 21447ec681f3Smrg /* To attach as constant, it has to *be* constant */ 21457ec681f3Smrg 21467ec681f3Smrg if (!nir_src_is_const(lod)) 21477ec681f3Smrg return false; 21487ec681f3Smrg 21497ec681f3Smrg float f = nir_src_as_float(lod); 21507ec681f3Smrg 21517ec681f3Smrg /* Break into fixed-point */ 21527ec681f3Smrg signed lod_int = f; 21537ec681f3Smrg float lod_frac = f - lod_int; 21547ec681f3Smrg 21557ec681f3Smrg /* Carry over negative fractions */ 21567ec681f3Smrg if (lod_frac < 0.0) { 21577ec681f3Smrg lod_int--; 21587ec681f3Smrg lod_frac += 1.0; 21597ec681f3Smrg } 21607ec681f3Smrg 21617ec681f3Smrg /* Encode */ 21627ec681f3Smrg word->bias = float_to_ubyte(lod_frac); 21637ec681f3Smrg word->bias_int = lod_int; 21647ec681f3Smrg 21657ec681f3Smrg return true; 21667ec681f3Smrg} 21677ec681f3Smrg 21687ec681f3Smrgstatic enum mali_texture_mode 21697ec681f3Smrgmdg_texture_mode(nir_tex_instr *instr) 21707ec681f3Smrg{ 21717ec681f3Smrg if (instr->op == nir_texop_tg4 && instr->is_shadow) 21727ec681f3Smrg return TEXTURE_GATHER_SHADOW; 21737ec681f3Smrg else if (instr->op == nir_texop_tg4) 21747ec681f3Smrg return TEXTURE_GATHER_X + instr->component; 21757ec681f3Smrg else if (instr->is_shadow) 21767ec681f3Smrg return TEXTURE_SHADOW; 21777ec681f3Smrg else 21787ec681f3Smrg return TEXTURE_NORMAL; 21797ec681f3Smrg} 21807ec681f3Smrg 21817ec681f3Smrgstatic void 21827ec681f3Smrgset_tex_coord(compiler_context *ctx, nir_tex_instr *instr, 21837ec681f3Smrg midgard_instruction *ins) 21847ec681f3Smrg{ 21857ec681f3Smrg int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord); 21867ec681f3Smrg 21877ec681f3Smrg assert(coord_idx >= 0); 21887ec681f3Smrg 21897ec681f3Smrg int comparator_idx = nir_tex_instr_src_index(instr, nir_tex_src_comparator); 21907ec681f3Smrg int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index); 21917ec681f3Smrg assert(comparator_idx < 0 || ms_idx < 0); 21927ec681f3Smrg int ms_or_comparator_idx = ms_idx >= 0 ? ms_idx : comparator_idx; 21937ec681f3Smrg 21947ec681f3Smrg unsigned coords = nir_src_index(ctx, &instr->src[coord_idx].src); 21957ec681f3Smrg 21967ec681f3Smrg emit_explicit_constant(ctx, coords, coords); 21977ec681f3Smrg 21987ec681f3Smrg ins->src_types[1] = nir_tex_instr_src_type(instr, coord_idx) | 21997ec681f3Smrg nir_src_bit_size(instr->src[coord_idx].src); 22007ec681f3Smrg 22017ec681f3Smrg unsigned nr_comps = instr->coord_components; 22027ec681f3Smrg unsigned written_mask = 0, write_mask = 0; 22037ec681f3Smrg 22047ec681f3Smrg /* Initialize all components to coord.x which is expected to always be 22057ec681f3Smrg * present. Swizzle is updated below based on the texture dimension 22067ec681f3Smrg * and extra attributes that are packed in the coordinate argument. 22077ec681f3Smrg */ 22087ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) 22097ec681f3Smrg ins->swizzle[1][c] = COMPONENT_X; 22107ec681f3Smrg 22117ec681f3Smrg /* Shadow ref value is part of the coordinates if there's no comparator 22127ec681f3Smrg * source, in that case it's always placed in the last component. 22137ec681f3Smrg * Midgard wants the ref value in coord.z. 22147ec681f3Smrg */ 22157ec681f3Smrg if (instr->is_shadow && comparator_idx < 0) { 22167ec681f3Smrg ins->swizzle[1][COMPONENT_Z] = --nr_comps; 22177ec681f3Smrg write_mask |= 1 << COMPONENT_Z; 22187ec681f3Smrg } 22197ec681f3Smrg 22207ec681f3Smrg /* The array index is the last component if there's no shadow ref value 22217ec681f3Smrg * or second last if there's one. We already decremented the number of 22227ec681f3Smrg * components to account for the shadow ref value above. 22237ec681f3Smrg * Midgard wants the array index in coord.w. 22247ec681f3Smrg */ 22257ec681f3Smrg if (instr->is_array) { 22267ec681f3Smrg ins->swizzle[1][COMPONENT_W] = --nr_comps; 22277ec681f3Smrg write_mask |= 1 << COMPONENT_W; 22287ec681f3Smrg } 22297ec681f3Smrg 22307ec681f3Smrg if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 22317ec681f3Smrg /* texelFetch is undefined on samplerCube */ 22327ec681f3Smrg assert(ins->op != midgard_tex_op_fetch); 22337ec681f3Smrg 22347ec681f3Smrg ins->src[1] = make_compiler_temp_reg(ctx); 22357ec681f3Smrg 22367ec681f3Smrg /* For cubemaps, we use a special ld/st op to select the face 22377ec681f3Smrg * and copy the xy into the texture register 22387ec681f3Smrg */ 22397ec681f3Smrg midgard_instruction ld = m_ld_cubemap_coords(ins->src[1], 0); 22407ec681f3Smrg ld.src[1] = coords; 22417ec681f3Smrg ld.src_types[1] = ins->src_types[1]; 22427ec681f3Smrg ld.mask = 0x3; /* xy */ 22437ec681f3Smrg ld.load_store.bitsize_toggle = true; 22447ec681f3Smrg ld.swizzle[1][3] = COMPONENT_X; 22457ec681f3Smrg emit_mir_instruction(ctx, ld); 22467ec681f3Smrg 22477ec681f3Smrg /* We packed cube coordiates (X,Y,Z) into (X,Y), update the 22487ec681f3Smrg * written mask accordingly and decrement the number of 22497ec681f3Smrg * components 22507ec681f3Smrg */ 22517ec681f3Smrg nr_comps--; 22527ec681f3Smrg written_mask |= 3; 22537ec681f3Smrg } 22547ec681f3Smrg 22557ec681f3Smrg /* Now flag tex coord components that have not been written yet */ 22567ec681f3Smrg write_mask |= mask_of(nr_comps) & ~written_mask; 22577ec681f3Smrg for (unsigned c = 0; c < nr_comps; c++) 22587ec681f3Smrg ins->swizzle[1][c] = c; 22597ec681f3Smrg 22607ec681f3Smrg /* Sample index and shadow ref are expected in coord.z */ 22617ec681f3Smrg if (ms_or_comparator_idx >= 0) { 22627ec681f3Smrg assert(!((write_mask | written_mask) & (1 << COMPONENT_Z))); 22637ec681f3Smrg 22647ec681f3Smrg unsigned sample_or_ref = 22657ec681f3Smrg nir_src_index(ctx, &instr->src[ms_or_comparator_idx].src); 22667ec681f3Smrg 22677ec681f3Smrg emit_explicit_constant(ctx, sample_or_ref, sample_or_ref); 22687ec681f3Smrg 22697ec681f3Smrg if (ins->src[1] == ~0) 22707ec681f3Smrg ins->src[1] = make_compiler_temp_reg(ctx); 22717ec681f3Smrg 22727ec681f3Smrg midgard_instruction mov = v_mov(sample_or_ref, ins->src[1]); 22737ec681f3Smrg 22747ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) 22757ec681f3Smrg mov.swizzle[1][c] = COMPONENT_X; 22767ec681f3Smrg 22777ec681f3Smrg mov.mask = 1 << COMPONENT_Z; 22787ec681f3Smrg written_mask |= 1 << COMPONENT_Z; 22797ec681f3Smrg ins->swizzle[1][COMPONENT_Z] = COMPONENT_Z; 22807ec681f3Smrg emit_mir_instruction(ctx, mov); 22817ec681f3Smrg } 22827ec681f3Smrg 22837ec681f3Smrg /* Texelfetch coordinates uses all four elements (xyz/index) regardless 22847ec681f3Smrg * of texture dimensionality, which means it's necessary to zero the 22857ec681f3Smrg * unused components to keep everything happy. 22867ec681f3Smrg */ 22877ec681f3Smrg if (ins->op == midgard_tex_op_fetch && 22887ec681f3Smrg (written_mask | write_mask) != 0xF) { 22897ec681f3Smrg if (ins->src[1] == ~0) 22907ec681f3Smrg ins->src[1] = make_compiler_temp_reg(ctx); 22917ec681f3Smrg 22927ec681f3Smrg /* mov index.zw, #0, or generalized */ 22937ec681f3Smrg midgard_instruction mov = 22947ec681f3Smrg v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), ins->src[1]); 22957ec681f3Smrg mov.has_constants = true; 22967ec681f3Smrg mov.mask = (written_mask | write_mask) ^ 0xF; 22977ec681f3Smrg emit_mir_instruction(ctx, mov); 22987ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) { 22997ec681f3Smrg if (mov.mask & (1 << c)) 23007ec681f3Smrg ins->swizzle[1][c] = c; 23017ec681f3Smrg } 23027ec681f3Smrg } 23037ec681f3Smrg 23047ec681f3Smrg if (ins->src[1] == ~0) { 23057ec681f3Smrg /* No temporary reg created, use the src coords directly */ 23067ec681f3Smrg ins->src[1] = coords; 23077ec681f3Smrg } else if (write_mask) { 23087ec681f3Smrg /* Move the remaining coordinates to the temporary reg */ 23097ec681f3Smrg midgard_instruction mov = v_mov(coords, ins->src[1]); 23107ec681f3Smrg 23117ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) { 23127ec681f3Smrg if ((1 << c) & write_mask) { 23137ec681f3Smrg mov.swizzle[1][c] = ins->swizzle[1][c]; 23147ec681f3Smrg ins->swizzle[1][c] = c; 23157ec681f3Smrg } else { 23167ec681f3Smrg mov.swizzle[1][c] = COMPONENT_X; 23177ec681f3Smrg } 23187ec681f3Smrg } 23197ec681f3Smrg 23207ec681f3Smrg mov.mask = write_mask; 23217ec681f3Smrg emit_mir_instruction(ctx, mov); 23227ec681f3Smrg } 23237ec681f3Smrg} 23247ec681f3Smrg 23257ec681f3Smrgstatic void 23267ec681f3Smrgemit_texop_native(compiler_context *ctx, nir_tex_instr *instr, 23277ec681f3Smrg unsigned midgard_texop) 23287ec681f3Smrg{ 23297ec681f3Smrg /* TODO */ 23307ec681f3Smrg //assert (!instr->sampler); 23317ec681f3Smrg 23327ec681f3Smrg nir_dest *dest = &instr->dest; 23337ec681f3Smrg 23347ec681f3Smrg int texture_index = instr->texture_index; 23357ec681f3Smrg int sampler_index = instr->sampler_index; 23367ec681f3Smrg 23377ec681f3Smrg nir_alu_type dest_base = nir_alu_type_get_base_type(instr->dest_type); 23387ec681f3Smrg 23397ec681f3Smrg /* texture instructions support float outmods */ 23407ec681f3Smrg unsigned outmod = midgard_outmod_none; 23417ec681f3Smrg if (dest_base == nir_type_float) { 23427ec681f3Smrg outmod = mir_determine_float_outmod(ctx, &dest, 0); 23437ec681f3Smrg } 23447ec681f3Smrg 23457ec681f3Smrg midgard_instruction ins = { 23467ec681f3Smrg .type = TAG_TEXTURE_4, 23477ec681f3Smrg .mask = 0xF, 23487ec681f3Smrg .dest = nir_dest_index(dest), 23497ec681f3Smrg .src = { ~0, ~0, ~0, ~0 }, 23507ec681f3Smrg .dest_type = instr->dest_type, 23517ec681f3Smrg .swizzle = SWIZZLE_IDENTITY_4, 23527ec681f3Smrg .outmod = outmod, 23537ec681f3Smrg .op = midgard_texop, 23547ec681f3Smrg .texture = { 23557ec681f3Smrg .format = midgard_tex_format(instr->sampler_dim), 23567ec681f3Smrg .texture_handle = texture_index, 23577ec681f3Smrg .sampler_handle = sampler_index, 23587ec681f3Smrg .mode = mdg_texture_mode(instr) 23597ec681f3Smrg } 23607ec681f3Smrg }; 23617ec681f3Smrg 23627ec681f3Smrg if (instr->is_shadow && !instr->is_new_style_shadow && instr->op != nir_texop_tg4) 23637ec681f3Smrg for (int i = 0; i < 4; ++i) 23647ec681f3Smrg ins.swizzle[0][i] = COMPONENT_X; 23657ec681f3Smrg 23667ec681f3Smrg for (unsigned i = 0; i < instr->num_srcs; ++i) { 23677ec681f3Smrg int index = nir_src_index(ctx, &instr->src[i].src); 23687ec681f3Smrg unsigned sz = nir_src_bit_size(instr->src[i].src); 23697ec681f3Smrg nir_alu_type T = nir_tex_instr_src_type(instr, i) | sz; 23707ec681f3Smrg 23717ec681f3Smrg switch (instr->src[i].src_type) { 23727ec681f3Smrg case nir_tex_src_coord: 23737ec681f3Smrg set_tex_coord(ctx, instr, &ins); 23747ec681f3Smrg break; 23757ec681f3Smrg 23767ec681f3Smrg case nir_tex_src_bias: 23777ec681f3Smrg case nir_tex_src_lod: { 23787ec681f3Smrg /* Try as a constant if we can */ 23797ec681f3Smrg 23807ec681f3Smrg bool is_txf = midgard_texop == midgard_tex_op_fetch; 23817ec681f3Smrg if (!is_txf && pan_attach_constant_bias(ctx, instr->src[i].src, &ins.texture)) 23827ec681f3Smrg break; 23837ec681f3Smrg 23847ec681f3Smrg ins.texture.lod_register = true; 23857ec681f3Smrg ins.src[2] = index; 23867ec681f3Smrg ins.src_types[2] = T; 23877ec681f3Smrg 23887ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) 23897ec681f3Smrg ins.swizzle[2][c] = COMPONENT_X; 23907ec681f3Smrg 23917ec681f3Smrg emit_explicit_constant(ctx, index, index); 23927ec681f3Smrg 23937ec681f3Smrg break; 23947ec681f3Smrg }; 23957ec681f3Smrg 23967ec681f3Smrg case nir_tex_src_offset: { 23977ec681f3Smrg ins.texture.offset_register = true; 23987ec681f3Smrg ins.src[3] = index; 23997ec681f3Smrg ins.src_types[3] = T; 24007ec681f3Smrg 24017ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) 24027ec681f3Smrg ins.swizzle[3][c] = (c > COMPONENT_Z) ? 0 : c; 24037ec681f3Smrg 24047ec681f3Smrg emit_explicit_constant(ctx, index, index); 24057ec681f3Smrg break; 24067ec681f3Smrg }; 24077ec681f3Smrg 24087ec681f3Smrg case nir_tex_src_comparator: 24097ec681f3Smrg case nir_tex_src_ms_index: 24107ec681f3Smrg /* Nothing to do, handled in set_tex_coord() */ 24117ec681f3Smrg break; 24127ec681f3Smrg 24137ec681f3Smrg default: { 24147ec681f3Smrg fprintf(stderr, "Unknown texture source type: %d\n", instr->src[i].src_type); 24157ec681f3Smrg assert(0); 24167ec681f3Smrg } 24177ec681f3Smrg } 24187ec681f3Smrg } 24197ec681f3Smrg 24207ec681f3Smrg emit_mir_instruction(ctx, ins); 24217ec681f3Smrg} 24227ec681f3Smrg 24237ec681f3Smrgstatic void 24247ec681f3Smrgemit_tex(compiler_context *ctx, nir_tex_instr *instr) 24257ec681f3Smrg{ 24267ec681f3Smrg switch (instr->op) { 24277ec681f3Smrg case nir_texop_tex: 24287ec681f3Smrg case nir_texop_txb: 24297ec681f3Smrg emit_texop_native(ctx, instr, midgard_tex_op_normal); 24307ec681f3Smrg break; 24317ec681f3Smrg case nir_texop_txl: 24327ec681f3Smrg case nir_texop_tg4: 24337ec681f3Smrg emit_texop_native(ctx, instr, midgard_tex_op_gradient); 24347ec681f3Smrg break; 24357ec681f3Smrg case nir_texop_txf: 24367ec681f3Smrg case nir_texop_txf_ms: 24377ec681f3Smrg emit_texop_native(ctx, instr, midgard_tex_op_fetch); 24387ec681f3Smrg break; 24397ec681f3Smrg case nir_texop_txs: 24407ec681f3Smrg emit_sysval_read(ctx, &instr->instr, 4, 0); 24417ec681f3Smrg break; 24427ec681f3Smrg default: { 24437ec681f3Smrg fprintf(stderr, "Unhandled texture op: %d\n", instr->op); 24447ec681f3Smrg assert(0); 24457ec681f3Smrg } 24467ec681f3Smrg } 24477ec681f3Smrg} 24487ec681f3Smrg 24497ec681f3Smrgstatic void 24507ec681f3Smrgemit_jump(compiler_context *ctx, nir_jump_instr *instr) 24517ec681f3Smrg{ 24527ec681f3Smrg switch (instr->type) { 24537ec681f3Smrg case nir_jump_break: { 24547ec681f3Smrg /* Emit a branch out of the loop */ 24557ec681f3Smrg struct midgard_instruction br = v_branch(false, false); 24567ec681f3Smrg br.branch.target_type = TARGET_BREAK; 24577ec681f3Smrg br.branch.target_break = ctx->current_loop_depth; 24587ec681f3Smrg emit_mir_instruction(ctx, br); 24597ec681f3Smrg break; 24607ec681f3Smrg } 24617ec681f3Smrg 24627ec681f3Smrg default: 24637ec681f3Smrg DBG("Unknown jump type %d\n", instr->type); 24647ec681f3Smrg break; 24657ec681f3Smrg } 24667ec681f3Smrg} 24677ec681f3Smrg 24687ec681f3Smrgstatic void 24697ec681f3Smrgemit_instr(compiler_context *ctx, struct nir_instr *instr) 24707ec681f3Smrg{ 24717ec681f3Smrg switch (instr->type) { 24727ec681f3Smrg case nir_instr_type_load_const: 24737ec681f3Smrg emit_load_const(ctx, nir_instr_as_load_const(instr)); 24747ec681f3Smrg break; 24757ec681f3Smrg 24767ec681f3Smrg case nir_instr_type_intrinsic: 24777ec681f3Smrg emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 24787ec681f3Smrg break; 24797ec681f3Smrg 24807ec681f3Smrg case nir_instr_type_alu: 24817ec681f3Smrg emit_alu(ctx, nir_instr_as_alu(instr)); 24827ec681f3Smrg break; 24837ec681f3Smrg 24847ec681f3Smrg case nir_instr_type_tex: 24857ec681f3Smrg emit_tex(ctx, nir_instr_as_tex(instr)); 24867ec681f3Smrg break; 24877ec681f3Smrg 24887ec681f3Smrg case nir_instr_type_jump: 24897ec681f3Smrg emit_jump(ctx, nir_instr_as_jump(instr)); 24907ec681f3Smrg break; 24917ec681f3Smrg 24927ec681f3Smrg case nir_instr_type_ssa_undef: 24937ec681f3Smrg /* Spurious */ 24947ec681f3Smrg break; 24957ec681f3Smrg 24967ec681f3Smrg default: 24977ec681f3Smrg DBG("Unhandled instruction type\n"); 24987ec681f3Smrg break; 24997ec681f3Smrg } 25007ec681f3Smrg} 25017ec681f3Smrg 25027ec681f3Smrg 25037ec681f3Smrg/* ALU instructions can inline or embed constants, which decreases register 25047ec681f3Smrg * pressure and saves space. */ 25057ec681f3Smrg 25067ec681f3Smrg#define CONDITIONAL_ATTACH(idx) { \ 25077ec681f3Smrg void *entry = _mesa_hash_table_u64_search(ctx->ssa_constants, alu->src[idx] + 1); \ 25087ec681f3Smrg\ 25097ec681f3Smrg if (entry) { \ 25107ec681f3Smrg attach_constants(ctx, alu, entry, alu->src[idx] + 1); \ 25117ec681f3Smrg alu->src[idx] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); \ 25127ec681f3Smrg } \ 25137ec681f3Smrg} 25147ec681f3Smrg 25157ec681f3Smrgstatic void 25167ec681f3Smrginline_alu_constants(compiler_context *ctx, midgard_block *block) 25177ec681f3Smrg{ 25187ec681f3Smrg mir_foreach_instr_in_block(block, alu) { 25197ec681f3Smrg /* Other instructions cannot inline constants */ 25207ec681f3Smrg if (alu->type != TAG_ALU_4) continue; 25217ec681f3Smrg if (alu->compact_branch) continue; 25227ec681f3Smrg 25237ec681f3Smrg /* If there is already a constant here, we can do nothing */ 25247ec681f3Smrg if (alu->has_constants) continue; 25257ec681f3Smrg 25267ec681f3Smrg CONDITIONAL_ATTACH(0); 25277ec681f3Smrg 25287ec681f3Smrg if (!alu->has_constants) { 25297ec681f3Smrg CONDITIONAL_ATTACH(1) 25307ec681f3Smrg } else if (!alu->inline_constant) { 25317ec681f3Smrg /* Corner case: _two_ vec4 constants, for instance with a 25327ec681f3Smrg * csel. For this case, we can only use a constant 25337ec681f3Smrg * register for one, we'll have to emit a move for the 25347ec681f3Smrg * other. */ 25357ec681f3Smrg 25367ec681f3Smrg void *entry = _mesa_hash_table_u64_search(ctx->ssa_constants, alu->src[1] + 1); 25377ec681f3Smrg unsigned scratch = make_compiler_temp(ctx); 25387ec681f3Smrg 25397ec681f3Smrg if (entry) { 25407ec681f3Smrg midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), scratch); 25417ec681f3Smrg attach_constants(ctx, &ins, entry, alu->src[1] + 1); 25427ec681f3Smrg 25437ec681f3Smrg /* Set the source */ 25447ec681f3Smrg alu->src[1] = scratch; 25457ec681f3Smrg 25467ec681f3Smrg /* Inject us -before- the last instruction which set r31 */ 25477ec681f3Smrg mir_insert_instruction_before(ctx, mir_prev_op(alu), ins); 25487ec681f3Smrg } 25497ec681f3Smrg } 25507ec681f3Smrg } 25517ec681f3Smrg} 25527ec681f3Smrg 25537ec681f3Smrgunsigned 25547ec681f3Smrgmax_bitsize_for_alu(midgard_instruction *ins) 25557ec681f3Smrg{ 25567ec681f3Smrg unsigned max_bitsize = 0; 25577ec681f3Smrg for (int i = 0; i < MIR_SRC_COUNT; i++) { 25587ec681f3Smrg if (ins->src[i] == ~0) continue; 25597ec681f3Smrg unsigned src_bitsize = nir_alu_type_get_type_size(ins->src_types[i]); 25607ec681f3Smrg max_bitsize = MAX2(src_bitsize, max_bitsize); 25617ec681f3Smrg } 25627ec681f3Smrg unsigned dst_bitsize = nir_alu_type_get_type_size(ins->dest_type); 25637ec681f3Smrg max_bitsize = MAX2(dst_bitsize, max_bitsize); 25647ec681f3Smrg 25657ec681f3Smrg /* We don't have fp16 LUTs, so we'll want to emit code like: 25667ec681f3Smrg * 25677ec681f3Smrg * vlut.fsinr hr0, hr0 25687ec681f3Smrg * 25697ec681f3Smrg * where both input and output are 16-bit but the operation is carried 25707ec681f3Smrg * out in 32-bit 25717ec681f3Smrg */ 25727ec681f3Smrg 25737ec681f3Smrg switch (ins->op) { 25747ec681f3Smrg case midgard_alu_op_fsqrt: 25757ec681f3Smrg case midgard_alu_op_frcp: 25767ec681f3Smrg case midgard_alu_op_frsqrt: 25777ec681f3Smrg case midgard_alu_op_fsinpi: 25787ec681f3Smrg case midgard_alu_op_fcospi: 25797ec681f3Smrg case midgard_alu_op_fexp2: 25807ec681f3Smrg case midgard_alu_op_flog2: 25817ec681f3Smrg max_bitsize = MAX2(max_bitsize, 32); 25827ec681f3Smrg break; 25837ec681f3Smrg 25847ec681f3Smrg default: 25857ec681f3Smrg break; 25867ec681f3Smrg } 25877ec681f3Smrg 25887ec681f3Smrg /* High implies computing at a higher bitsize, e.g umul_high of 32-bit 25897ec681f3Smrg * requires computing at 64-bit */ 25907ec681f3Smrg if (midgard_is_integer_out_op(ins->op) && ins->outmod == midgard_outmod_keephi) { 25917ec681f3Smrg max_bitsize *= 2; 25927ec681f3Smrg assert(max_bitsize <= 64); 25937ec681f3Smrg } 25947ec681f3Smrg 25957ec681f3Smrg return max_bitsize; 25967ec681f3Smrg} 25977ec681f3Smrg 25987ec681f3Smrgmidgard_reg_mode 25997ec681f3Smrgreg_mode_for_bitsize(unsigned bitsize) 26007ec681f3Smrg{ 26017ec681f3Smrg switch (bitsize) { 26027ec681f3Smrg /* use 16 pipe for 8 since we don't support vec16 yet */ 26037ec681f3Smrg case 8: 26047ec681f3Smrg case 16: 26057ec681f3Smrg return midgard_reg_mode_16; 26067ec681f3Smrg case 32: 26077ec681f3Smrg return midgard_reg_mode_32; 26087ec681f3Smrg case 64: 26097ec681f3Smrg return midgard_reg_mode_64; 26107ec681f3Smrg default: 26117ec681f3Smrg unreachable("invalid bit size"); 26127ec681f3Smrg } 26137ec681f3Smrg} 26147ec681f3Smrg 26157ec681f3Smrg/* Midgard supports two types of constants, embedded constants (128-bit) and 26167ec681f3Smrg * inline constants (16-bit). Sometimes, especially with scalar ops, embedded 26177ec681f3Smrg * constants can be demoted to inline constants, for space savings and 26187ec681f3Smrg * sometimes a performance boost */ 26197ec681f3Smrg 26207ec681f3Smrgstatic void 26217ec681f3Smrgembedded_to_inline_constant(compiler_context *ctx, midgard_block *block) 26227ec681f3Smrg{ 26237ec681f3Smrg mir_foreach_instr_in_block(block, ins) { 26247ec681f3Smrg if (!ins->has_constants) continue; 26257ec681f3Smrg if (ins->has_inline_constant) continue; 26267ec681f3Smrg 26277ec681f3Smrg unsigned max_bitsize = max_bitsize_for_alu(ins); 26287ec681f3Smrg 26297ec681f3Smrg /* We can inline 32-bit (sometimes) or 16-bit (usually) */ 26307ec681f3Smrg bool is_16 = max_bitsize == 16; 26317ec681f3Smrg bool is_32 = max_bitsize == 32; 26327ec681f3Smrg 26337ec681f3Smrg if (!(is_16 || is_32)) 26347ec681f3Smrg continue; 26357ec681f3Smrg 26367ec681f3Smrg /* src1 cannot be an inline constant due to encoding 26377ec681f3Smrg * restrictions. So, if possible we try to flip the arguments 26387ec681f3Smrg * in that case */ 26397ec681f3Smrg 26407ec681f3Smrg int op = ins->op; 26417ec681f3Smrg 26427ec681f3Smrg if (ins->src[0] == SSA_FIXED_REGISTER(REGISTER_CONSTANT) && 26437ec681f3Smrg alu_opcode_props[op].props & OP_COMMUTES) { 26447ec681f3Smrg mir_flip(ins); 26457ec681f3Smrg } 26467ec681f3Smrg 26477ec681f3Smrg if (ins->src[1] == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) { 26487ec681f3Smrg /* Component is from the swizzle. Take a nonzero component */ 26497ec681f3Smrg assert(ins->mask); 26507ec681f3Smrg unsigned first_comp = ffs(ins->mask) - 1; 26517ec681f3Smrg unsigned component = ins->swizzle[1][first_comp]; 26527ec681f3Smrg 26537ec681f3Smrg /* Scale constant appropriately, if we can legally */ 26547ec681f3Smrg int16_t scaled_constant = 0; 26557ec681f3Smrg 26567ec681f3Smrg if (is_16) { 26577ec681f3Smrg scaled_constant = ins->constants.u16[component]; 26587ec681f3Smrg } else if (midgard_is_integer_op(op)) { 26597ec681f3Smrg scaled_constant = ins->constants.u32[component]; 26607ec681f3Smrg 26617ec681f3Smrg /* Constant overflow after resize */ 26627ec681f3Smrg if (scaled_constant != ins->constants.u32[component]) 26637ec681f3Smrg continue; 26647ec681f3Smrg } else { 26657ec681f3Smrg float original = ins->constants.f32[component]; 26667ec681f3Smrg scaled_constant = _mesa_float_to_half(original); 26677ec681f3Smrg 26687ec681f3Smrg /* Check for loss of precision. If this is 26697ec681f3Smrg * mediump, we don't care, but for a highp 26707ec681f3Smrg * shader, we need to pay attention. NIR 26717ec681f3Smrg * doesn't yet tell us which mode we're in! 26727ec681f3Smrg * Practically this prevents most constants 26737ec681f3Smrg * from being inlined, sadly. */ 26747ec681f3Smrg 26757ec681f3Smrg float fp32 = _mesa_half_to_float(scaled_constant); 26767ec681f3Smrg 26777ec681f3Smrg if (fp32 != original) 26787ec681f3Smrg continue; 26797ec681f3Smrg } 26807ec681f3Smrg 26817ec681f3Smrg /* Should've been const folded */ 26827ec681f3Smrg if (ins->src_abs[1] || ins->src_neg[1]) 26837ec681f3Smrg continue; 26847ec681f3Smrg 26857ec681f3Smrg /* Make sure that the constant is not itself a vector 26867ec681f3Smrg * by checking if all accessed values are the same. */ 26877ec681f3Smrg 26887ec681f3Smrg const midgard_constants *cons = &ins->constants; 26897ec681f3Smrg uint32_t value = is_16 ? cons->u16[component] : cons->u32[component]; 26907ec681f3Smrg 26917ec681f3Smrg bool is_vector = false; 26927ec681f3Smrg unsigned mask = effective_writemask(ins->op, ins->mask); 26937ec681f3Smrg 26947ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) { 26957ec681f3Smrg /* We only care if this component is actually used */ 26967ec681f3Smrg if (!(mask & (1 << c))) 26977ec681f3Smrg continue; 26987ec681f3Smrg 26997ec681f3Smrg uint32_t test = is_16 ? 27007ec681f3Smrg cons->u16[ins->swizzle[1][c]] : 27017ec681f3Smrg cons->u32[ins->swizzle[1][c]]; 27027ec681f3Smrg 27037ec681f3Smrg if (test != value) { 27047ec681f3Smrg is_vector = true; 27057ec681f3Smrg break; 27067ec681f3Smrg } 27077ec681f3Smrg } 27087ec681f3Smrg 27097ec681f3Smrg if (is_vector) 27107ec681f3Smrg continue; 27117ec681f3Smrg 27127ec681f3Smrg /* Get rid of the embedded constant */ 27137ec681f3Smrg ins->has_constants = false; 27147ec681f3Smrg ins->src[1] = ~0; 27157ec681f3Smrg ins->has_inline_constant = true; 27167ec681f3Smrg ins->inline_constant = scaled_constant; 27177ec681f3Smrg } 27187ec681f3Smrg } 27197ec681f3Smrg} 27207ec681f3Smrg 27217ec681f3Smrg/* Dead code elimination for branches at the end of a block - only one branch 27227ec681f3Smrg * per block is legal semantically */ 27237ec681f3Smrg 27247ec681f3Smrgstatic void 27257ec681f3Smrgmidgard_cull_dead_branch(compiler_context *ctx, midgard_block *block) 27267ec681f3Smrg{ 27277ec681f3Smrg bool branched = false; 27287ec681f3Smrg 27297ec681f3Smrg mir_foreach_instr_in_block_safe(block, ins) { 27307ec681f3Smrg if (!midgard_is_branch_unit(ins->unit)) continue; 27317ec681f3Smrg 27327ec681f3Smrg if (branched) 27337ec681f3Smrg mir_remove_instruction(ins); 27347ec681f3Smrg 27357ec681f3Smrg branched = true; 27367ec681f3Smrg } 27377ec681f3Smrg} 27387ec681f3Smrg 27397ec681f3Smrg/* We want to force the invert on AND/OR to the second slot to legalize into 27407ec681f3Smrg * iandnot/iornot. The relevant patterns are for AND (and OR respectively) 27417ec681f3Smrg * 27427ec681f3Smrg * ~a & #b = ~a & ~(#~b) 27437ec681f3Smrg * ~a & b = b & ~a 27447ec681f3Smrg */ 27457ec681f3Smrg 27467ec681f3Smrgstatic void 27477ec681f3Smrgmidgard_legalize_invert(compiler_context *ctx, midgard_block *block) 27487ec681f3Smrg{ 27497ec681f3Smrg mir_foreach_instr_in_block(block, ins) { 27507ec681f3Smrg if (ins->type != TAG_ALU_4) continue; 27517ec681f3Smrg 27527ec681f3Smrg if (ins->op != midgard_alu_op_iand && 27537ec681f3Smrg ins->op != midgard_alu_op_ior) continue; 27547ec681f3Smrg 27557ec681f3Smrg if (ins->src_invert[1] || !ins->src_invert[0]) continue; 27567ec681f3Smrg 27577ec681f3Smrg if (ins->has_inline_constant) { 27587ec681f3Smrg /* ~(#~a) = ~(~#a) = a, so valid, and forces both 27597ec681f3Smrg * inverts on */ 27607ec681f3Smrg ins->inline_constant = ~ins->inline_constant; 27617ec681f3Smrg ins->src_invert[1] = true; 27627ec681f3Smrg } else { 27637ec681f3Smrg /* Flip to the right invert order. Note 27647ec681f3Smrg * has_inline_constant false by assumption on the 27657ec681f3Smrg * branch, so flipping makes sense. */ 27667ec681f3Smrg mir_flip(ins); 27677ec681f3Smrg } 27687ec681f3Smrg } 27697ec681f3Smrg} 27707ec681f3Smrg 27717ec681f3Smrgstatic unsigned 27727ec681f3Smrgemit_fragment_epilogue(compiler_context *ctx, unsigned rt, unsigned sample_iter) 27737ec681f3Smrg{ 27747ec681f3Smrg /* Loop to ourselves */ 27757ec681f3Smrg midgard_instruction *br = ctx->writeout_branch[rt][sample_iter]; 27767ec681f3Smrg struct midgard_instruction ins = v_branch(false, false); 27777ec681f3Smrg ins.writeout = br->writeout; 27787ec681f3Smrg ins.branch.target_block = ctx->block_count - 1; 27797ec681f3Smrg ins.constants.u32[0] = br->constants.u32[0]; 27807ec681f3Smrg memcpy(&ins.src_types, &br->src_types, sizeof(ins.src_types)); 27817ec681f3Smrg emit_mir_instruction(ctx, ins); 27827ec681f3Smrg 27837ec681f3Smrg ctx->current_block->epilogue = true; 27847ec681f3Smrg schedule_barrier(ctx); 27857ec681f3Smrg return ins.branch.target_block; 27867ec681f3Smrg} 27877ec681f3Smrg 27887ec681f3Smrgstatic midgard_block * 27897ec681f3Smrgemit_block_init(compiler_context *ctx) 27907ec681f3Smrg{ 27917ec681f3Smrg midgard_block *this_block = ctx->after_block; 27927ec681f3Smrg ctx->after_block = NULL; 27937ec681f3Smrg 27947ec681f3Smrg if (!this_block) 27957ec681f3Smrg this_block = create_empty_block(ctx); 27967ec681f3Smrg 27977ec681f3Smrg list_addtail(&this_block->base.link, &ctx->blocks); 27987ec681f3Smrg 27997ec681f3Smrg this_block->scheduled = false; 28007ec681f3Smrg ++ctx->block_count; 28017ec681f3Smrg 28027ec681f3Smrg /* Set up current block */ 28037ec681f3Smrg list_inithead(&this_block->base.instructions); 28047ec681f3Smrg ctx->current_block = this_block; 28057ec681f3Smrg 28067ec681f3Smrg return this_block; 28077ec681f3Smrg} 28087ec681f3Smrg 28097ec681f3Smrgstatic midgard_block * 28107ec681f3Smrgemit_block(compiler_context *ctx, nir_block *block) 28117ec681f3Smrg{ 28127ec681f3Smrg midgard_block *this_block = emit_block_init(ctx); 28137ec681f3Smrg 28147ec681f3Smrg nir_foreach_instr(instr, block) { 28157ec681f3Smrg emit_instr(ctx, instr); 28167ec681f3Smrg ++ctx->instruction_count; 28177ec681f3Smrg } 28187ec681f3Smrg 28197ec681f3Smrg return this_block; 28207ec681f3Smrg} 28217ec681f3Smrg 28227ec681f3Smrgstatic midgard_block *emit_cf_list(struct compiler_context *ctx, struct exec_list *list); 28237ec681f3Smrg 28247ec681f3Smrgstatic void 28257ec681f3Smrgemit_if(struct compiler_context *ctx, nir_if *nif) 28267ec681f3Smrg{ 28277ec681f3Smrg midgard_block *before_block = ctx->current_block; 28287ec681f3Smrg 28297ec681f3Smrg /* Speculatively emit the branch, but we can't fill it in until later */ 28307ec681f3Smrg bool inv = false; 28317ec681f3Smrg EMIT(branch, true, true); 28327ec681f3Smrg midgard_instruction *then_branch = mir_last_in_block(ctx->current_block); 28337ec681f3Smrg then_branch->src[0] = mir_get_branch_cond(&nif->condition, &inv); 28347ec681f3Smrg then_branch->src_types[0] = nir_type_uint32; 28357ec681f3Smrg then_branch->branch.invert_conditional = !inv; 28367ec681f3Smrg 28377ec681f3Smrg /* Emit the two subblocks. */ 28387ec681f3Smrg midgard_block *then_block = emit_cf_list(ctx, &nif->then_list); 28397ec681f3Smrg midgard_block *end_then_block = ctx->current_block; 28407ec681f3Smrg 28417ec681f3Smrg /* Emit a jump from the end of the then block to the end of the else */ 28427ec681f3Smrg EMIT(branch, false, false); 28437ec681f3Smrg midgard_instruction *then_exit = mir_last_in_block(ctx->current_block); 28447ec681f3Smrg 28457ec681f3Smrg /* Emit second block, and check if it's empty */ 28467ec681f3Smrg 28477ec681f3Smrg int else_idx = ctx->block_count; 28487ec681f3Smrg int count_in = ctx->instruction_count; 28497ec681f3Smrg midgard_block *else_block = emit_cf_list(ctx, &nif->else_list); 28507ec681f3Smrg midgard_block *end_else_block = ctx->current_block; 28517ec681f3Smrg int after_else_idx = ctx->block_count; 28527ec681f3Smrg 28537ec681f3Smrg /* Now that we have the subblocks emitted, fix up the branches */ 28547ec681f3Smrg 28557ec681f3Smrg assert(then_block); 28567ec681f3Smrg assert(else_block); 28577ec681f3Smrg 28587ec681f3Smrg if (ctx->instruction_count == count_in) { 28597ec681f3Smrg /* The else block is empty, so don't emit an exit jump */ 28607ec681f3Smrg mir_remove_instruction(then_exit); 28617ec681f3Smrg then_branch->branch.target_block = after_else_idx; 28627ec681f3Smrg } else { 28637ec681f3Smrg then_branch->branch.target_block = else_idx; 28647ec681f3Smrg then_exit->branch.target_block = after_else_idx; 28657ec681f3Smrg } 28667ec681f3Smrg 28677ec681f3Smrg /* Wire up the successors */ 28687ec681f3Smrg 28697ec681f3Smrg ctx->after_block = create_empty_block(ctx); 28707ec681f3Smrg 28717ec681f3Smrg pan_block_add_successor(&before_block->base, &then_block->base); 28727ec681f3Smrg pan_block_add_successor(&before_block->base, &else_block->base); 28737ec681f3Smrg 28747ec681f3Smrg pan_block_add_successor(&end_then_block->base, &ctx->after_block->base); 28757ec681f3Smrg pan_block_add_successor(&end_else_block->base, &ctx->after_block->base); 28767ec681f3Smrg} 28777ec681f3Smrg 28787ec681f3Smrgstatic void 28797ec681f3Smrgemit_loop(struct compiler_context *ctx, nir_loop *nloop) 28807ec681f3Smrg{ 28817ec681f3Smrg /* Remember where we are */ 28827ec681f3Smrg midgard_block *start_block = ctx->current_block; 28837ec681f3Smrg 28847ec681f3Smrg /* Allocate a loop number, growing the current inner loop depth */ 28857ec681f3Smrg int loop_idx = ++ctx->current_loop_depth; 28867ec681f3Smrg 28877ec681f3Smrg /* Get index from before the body so we can loop back later */ 28887ec681f3Smrg int start_idx = ctx->block_count; 28897ec681f3Smrg 28907ec681f3Smrg /* Emit the body itself */ 28917ec681f3Smrg midgard_block *loop_block = emit_cf_list(ctx, &nloop->body); 28927ec681f3Smrg 28937ec681f3Smrg /* Branch back to loop back */ 28947ec681f3Smrg struct midgard_instruction br_back = v_branch(false, false); 28957ec681f3Smrg br_back.branch.target_block = start_idx; 28967ec681f3Smrg emit_mir_instruction(ctx, br_back); 28977ec681f3Smrg 28987ec681f3Smrg /* Mark down that branch in the graph. */ 28997ec681f3Smrg pan_block_add_successor(&start_block->base, &loop_block->base); 29007ec681f3Smrg pan_block_add_successor(&ctx->current_block->base, &loop_block->base); 29017ec681f3Smrg 29027ec681f3Smrg /* Find the index of the block about to follow us (note: we don't add 29037ec681f3Smrg * one; blocks are 0-indexed so we get a fencepost problem) */ 29047ec681f3Smrg int break_block_idx = ctx->block_count; 29057ec681f3Smrg 29067ec681f3Smrg /* Fix up the break statements we emitted to point to the right place, 29077ec681f3Smrg * now that we can allocate a block number for them */ 29087ec681f3Smrg ctx->after_block = create_empty_block(ctx); 29097ec681f3Smrg 29107ec681f3Smrg mir_foreach_block_from(ctx, start_block, _block) { 29117ec681f3Smrg mir_foreach_instr_in_block(((midgard_block *) _block), ins) { 29127ec681f3Smrg if (ins->type != TAG_ALU_4) continue; 29137ec681f3Smrg if (!ins->compact_branch) continue; 29147ec681f3Smrg 29157ec681f3Smrg /* We found a branch -- check the type to see if we need to do anything */ 29167ec681f3Smrg if (ins->branch.target_type != TARGET_BREAK) continue; 29177ec681f3Smrg 29187ec681f3Smrg /* It's a break! Check if it's our break */ 29197ec681f3Smrg if (ins->branch.target_break != loop_idx) continue; 29207ec681f3Smrg 29217ec681f3Smrg /* Okay, cool, we're breaking out of this loop. 29227ec681f3Smrg * Rewrite from a break to a goto */ 29237ec681f3Smrg 29247ec681f3Smrg ins->branch.target_type = TARGET_GOTO; 29257ec681f3Smrg ins->branch.target_block = break_block_idx; 29267ec681f3Smrg 29277ec681f3Smrg pan_block_add_successor(_block, &ctx->after_block->base); 29287ec681f3Smrg } 29297ec681f3Smrg } 29307ec681f3Smrg 29317ec681f3Smrg /* Now that we've finished emitting the loop, free up the depth again 29327ec681f3Smrg * so we play nice with recursion amid nested loops */ 29337ec681f3Smrg --ctx->current_loop_depth; 29347ec681f3Smrg 29357ec681f3Smrg /* Dump loop stats */ 29367ec681f3Smrg ++ctx->loop_count; 29377ec681f3Smrg} 29387ec681f3Smrg 29397ec681f3Smrgstatic midgard_block * 29407ec681f3Smrgemit_cf_list(struct compiler_context *ctx, struct exec_list *list) 29417ec681f3Smrg{ 29427ec681f3Smrg midgard_block *start_block = NULL; 29437ec681f3Smrg 29447ec681f3Smrg foreach_list_typed(nir_cf_node, node, node, list) { 29457ec681f3Smrg switch (node->type) { 29467ec681f3Smrg case nir_cf_node_block: { 29477ec681f3Smrg midgard_block *block = emit_block(ctx, nir_cf_node_as_block(node)); 29487ec681f3Smrg 29497ec681f3Smrg if (!start_block) 29507ec681f3Smrg start_block = block; 29517ec681f3Smrg 29527ec681f3Smrg break; 29537ec681f3Smrg } 29547ec681f3Smrg 29557ec681f3Smrg case nir_cf_node_if: 29567ec681f3Smrg emit_if(ctx, nir_cf_node_as_if(node)); 29577ec681f3Smrg break; 29587ec681f3Smrg 29597ec681f3Smrg case nir_cf_node_loop: 29607ec681f3Smrg emit_loop(ctx, nir_cf_node_as_loop(node)); 29617ec681f3Smrg break; 29627ec681f3Smrg 29637ec681f3Smrg case nir_cf_node_function: 29647ec681f3Smrg assert(0); 29657ec681f3Smrg break; 29667ec681f3Smrg } 29677ec681f3Smrg } 29687ec681f3Smrg 29697ec681f3Smrg return start_block; 29707ec681f3Smrg} 29717ec681f3Smrg 29727ec681f3Smrg/* Due to lookahead, we need to report the first tag executed in the command 29737ec681f3Smrg * stream and in branch targets. An initial block might be empty, so iterate 29747ec681f3Smrg * until we find one that 'works' */ 29757ec681f3Smrg 29767ec681f3Smrgunsigned 29777ec681f3Smrgmidgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx) 29787ec681f3Smrg{ 29797ec681f3Smrg midgard_block *initial_block = mir_get_block(ctx, block_idx); 29807ec681f3Smrg 29817ec681f3Smrg mir_foreach_block_from(ctx, initial_block, _v) { 29827ec681f3Smrg midgard_block *v = (midgard_block *) _v; 29837ec681f3Smrg if (v->quadword_count) { 29847ec681f3Smrg midgard_bundle *initial_bundle = 29857ec681f3Smrg util_dynarray_element(&v->bundles, midgard_bundle, 0); 29867ec681f3Smrg 29877ec681f3Smrg return initial_bundle->tag; 29887ec681f3Smrg } 29897ec681f3Smrg } 29907ec681f3Smrg 29917ec681f3Smrg /* Default to a tag 1 which will break from the shader, in case we jump 29927ec681f3Smrg * to the exit block (i.e. `return` in a compute shader) */ 29937ec681f3Smrg 29947ec681f3Smrg return 1; 29957ec681f3Smrg} 29967ec681f3Smrg 29977ec681f3Smrg/* For each fragment writeout instruction, generate a writeout loop to 29987ec681f3Smrg * associate with it */ 29997ec681f3Smrg 30007ec681f3Smrgstatic void 30017ec681f3Smrgmir_add_writeout_loops(compiler_context *ctx) 30027ec681f3Smrg{ 30037ec681f3Smrg for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) { 30047ec681f3Smrg for (unsigned s = 0; s < MIDGARD_MAX_SAMPLE_ITER; ++s) { 30057ec681f3Smrg midgard_instruction *br = ctx->writeout_branch[rt][s]; 30067ec681f3Smrg if (!br) continue; 30077ec681f3Smrg 30087ec681f3Smrg unsigned popped = br->branch.target_block; 30097ec681f3Smrg pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base), 30107ec681f3Smrg &ctx->current_block->base); 30117ec681f3Smrg br->branch.target_block = emit_fragment_epilogue(ctx, rt, s); 30127ec681f3Smrg br->branch.target_type = TARGET_GOTO; 30137ec681f3Smrg 30147ec681f3Smrg /* If we have more RTs, we'll need to restore back after our 30157ec681f3Smrg * loop terminates */ 30167ec681f3Smrg midgard_instruction *next_br = NULL; 30177ec681f3Smrg 30187ec681f3Smrg if ((s + 1) < MIDGARD_MAX_SAMPLE_ITER) 30197ec681f3Smrg next_br = ctx->writeout_branch[rt][s + 1]; 30207ec681f3Smrg 30217ec681f3Smrg if (!next_br && (rt + 1) < ARRAY_SIZE(ctx->writeout_branch)) 30227ec681f3Smrg next_br = ctx->writeout_branch[rt + 1][0]; 30237ec681f3Smrg 30247ec681f3Smrg if (next_br) { 30257ec681f3Smrg midgard_instruction uncond = v_branch(false, false); 30267ec681f3Smrg uncond.branch.target_block = popped; 30277ec681f3Smrg uncond.branch.target_type = TARGET_GOTO; 30287ec681f3Smrg emit_mir_instruction(ctx, uncond); 30297ec681f3Smrg pan_block_add_successor(&ctx->current_block->base, 30307ec681f3Smrg &(mir_get_block(ctx, popped)->base)); 30317ec681f3Smrg schedule_barrier(ctx); 30327ec681f3Smrg } else { 30337ec681f3Smrg /* We're last, so we can terminate here */ 30347ec681f3Smrg br->last_writeout = true; 30357ec681f3Smrg } 30367ec681f3Smrg } 30377ec681f3Smrg } 30387ec681f3Smrg} 30397ec681f3Smrg 30407ec681f3Smrgvoid 30417ec681f3Smrgmidgard_compile_shader_nir(nir_shader *nir, 30427ec681f3Smrg const struct panfrost_compile_inputs *inputs, 30437ec681f3Smrg struct util_dynarray *binary, 30447ec681f3Smrg struct pan_shader_info *info) 30457ec681f3Smrg{ 30467ec681f3Smrg midgard_debug = debug_get_option_midgard_debug(); 30477ec681f3Smrg 30487ec681f3Smrg /* TODO: Bound against what? */ 30497ec681f3Smrg compiler_context *ctx = rzalloc(NULL, compiler_context); 30507ec681f3Smrg ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx); 30517ec681f3Smrg 30527ec681f3Smrg ctx->inputs = inputs; 30537ec681f3Smrg ctx->nir = nir; 30547ec681f3Smrg ctx->info = info; 30557ec681f3Smrg ctx->stage = nir->info.stage; 30567ec681f3Smrg 30577ec681f3Smrg if (inputs->is_blend) { 30587ec681f3Smrg unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1); 30597ec681f3Smrg const struct util_format_description *desc = 30607ec681f3Smrg util_format_description(inputs->rt_formats[inputs->blend.rt]); 30617ec681f3Smrg 30627ec681f3Smrg /* We have to split writeout in 128 bit chunks */ 30637ec681f3Smrg ctx->blend_sample_iterations = 30647ec681f3Smrg DIV_ROUND_UP(desc->block.bits * nr_samples, 128); 30657ec681f3Smrg } 30667ec681f3Smrg ctx->blend_input = ~0; 30677ec681f3Smrg ctx->blend_src1 = ~0; 30687ec681f3Smrg ctx->quirks = midgard_get_quirks(inputs->gpu_id); 30697ec681f3Smrg 30707ec681f3Smrg /* Initialize at a global (not block) level hash tables */ 30717ec681f3Smrg 30727ec681f3Smrg ctx->ssa_constants = _mesa_hash_table_u64_create(ctx); 30737ec681f3Smrg 30747ec681f3Smrg /* Lower gl_Position pre-optimisation, but after lowering vars to ssa 30757ec681f3Smrg * (so we don't accidentally duplicate the epilogue since mesa/st has 30767ec681f3Smrg * messed with our I/O quite a bit already) */ 30777ec681f3Smrg 30787ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_ssa); 30797ec681f3Smrg 30807ec681f3Smrg if (ctx->stage == MESA_SHADER_VERTEX) { 30817ec681f3Smrg NIR_PASS_V(nir, nir_lower_viewport_transform); 30827ec681f3Smrg NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0); 30837ec681f3Smrg } 30847ec681f3Smrg 30857ec681f3Smrg NIR_PASS_V(nir, nir_lower_var_copies); 30867ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_ssa); 30877ec681f3Smrg NIR_PASS_V(nir, nir_split_var_copies); 30887ec681f3Smrg NIR_PASS_V(nir, nir_lower_var_copies); 30897ec681f3Smrg NIR_PASS_V(nir, nir_lower_global_vars_to_local); 30907ec681f3Smrg NIR_PASS_V(nir, nir_lower_var_copies); 30917ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_ssa); 30927ec681f3Smrg 30937ec681f3Smrg unsigned pan_quirks = panfrost_get_quirks(inputs->gpu_id, 0); 30947ec681f3Smrg NIR_PASS_V(nir, pan_lower_framebuffer, 30957ec681f3Smrg inputs->rt_formats, inputs->raw_fmt_mask, 30967ec681f3Smrg inputs->is_blend, pan_quirks); 30977ec681f3Smrg 30987ec681f3Smrg NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 30997ec681f3Smrg glsl_type_size, 0); 31007ec681f3Smrg NIR_PASS_V(nir, nir_lower_ssbo); 31017ec681f3Smrg NIR_PASS_V(nir, pan_nir_lower_zs_store); 31027ec681f3Smrg 31037ec681f3Smrg NIR_PASS_V(nir, pan_nir_lower_64bit_intrin); 31047ec681f3Smrg 31057ec681f3Smrg /* Optimisation passes */ 31067ec681f3Smrg 31077ec681f3Smrg optimise_nir(nir, ctx->quirks, inputs->is_blend); 31087ec681f3Smrg 31097ec681f3Smrg NIR_PASS_V(nir, pan_nir_reorder_writeout); 31107ec681f3Smrg 31117ec681f3Smrg if ((midgard_debug & MIDGARD_DBG_SHADERS) && 31127ec681f3Smrg ((midgard_debug & MIDGARD_DBG_INTERNAL) || !nir->info.internal)) { 31137ec681f3Smrg nir_print_shader(nir, stdout); 31147ec681f3Smrg } 31157ec681f3Smrg 31167ec681f3Smrg info->tls_size = nir->scratch_size; 31177ec681f3Smrg 31187ec681f3Smrg nir_foreach_function(func, nir) { 31197ec681f3Smrg if (!func->impl) 31207ec681f3Smrg continue; 31217ec681f3Smrg 31227ec681f3Smrg list_inithead(&ctx->blocks); 31237ec681f3Smrg ctx->block_count = 0; 31247ec681f3Smrg ctx->func = func; 31257ec681f3Smrg ctx->already_emitted = calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD)); 31267ec681f3Smrg 31277ec681f3Smrg if (nir->info.outputs_read && !inputs->is_blend) { 31287ec681f3Smrg emit_block_init(ctx); 31297ec681f3Smrg 31307ec681f3Smrg struct midgard_instruction wait = v_branch(false, false); 31317ec681f3Smrg wait.branch.target_type = TARGET_TILEBUF_WAIT; 31327ec681f3Smrg 31337ec681f3Smrg emit_mir_instruction(ctx, wait); 31347ec681f3Smrg 31357ec681f3Smrg ++ctx->instruction_count; 31367ec681f3Smrg } 31377ec681f3Smrg 31387ec681f3Smrg emit_cf_list(ctx, &func->impl->body); 31397ec681f3Smrg free(ctx->already_emitted); 31407ec681f3Smrg break; /* TODO: Multi-function shaders */ 31417ec681f3Smrg } 31427ec681f3Smrg 31437ec681f3Smrg /* Per-block lowering before opts */ 31447ec681f3Smrg 31457ec681f3Smrg mir_foreach_block(ctx, _block) { 31467ec681f3Smrg midgard_block *block = (midgard_block *) _block; 31477ec681f3Smrg inline_alu_constants(ctx, block); 31487ec681f3Smrg embedded_to_inline_constant(ctx, block); 31497ec681f3Smrg } 31507ec681f3Smrg /* MIR-level optimizations */ 31517ec681f3Smrg 31527ec681f3Smrg bool progress = false; 31537ec681f3Smrg 31547ec681f3Smrg do { 31557ec681f3Smrg progress = false; 31567ec681f3Smrg progress |= midgard_opt_dead_code_eliminate(ctx); 31577ec681f3Smrg 31587ec681f3Smrg mir_foreach_block(ctx, _block) { 31597ec681f3Smrg midgard_block *block = (midgard_block *) _block; 31607ec681f3Smrg progress |= midgard_opt_copy_prop(ctx, block); 31617ec681f3Smrg progress |= midgard_opt_combine_projection(ctx, block); 31627ec681f3Smrg progress |= midgard_opt_varying_projection(ctx, block); 31637ec681f3Smrg } 31647ec681f3Smrg } while (progress); 31657ec681f3Smrg 31667ec681f3Smrg mir_foreach_block(ctx, _block) { 31677ec681f3Smrg midgard_block *block = (midgard_block *) _block; 31687ec681f3Smrg midgard_lower_derivatives(ctx, block); 31697ec681f3Smrg midgard_legalize_invert(ctx, block); 31707ec681f3Smrg midgard_cull_dead_branch(ctx, block); 31717ec681f3Smrg } 31727ec681f3Smrg 31737ec681f3Smrg if (ctx->stage == MESA_SHADER_FRAGMENT) 31747ec681f3Smrg mir_add_writeout_loops(ctx); 31757ec681f3Smrg 31767ec681f3Smrg /* Analyze now that the code is known but before scheduling creates 31777ec681f3Smrg * pipeline registers which are harder to track */ 31787ec681f3Smrg mir_analyze_helper_requirements(ctx); 31797ec681f3Smrg 31807ec681f3Smrg /* Schedule! */ 31817ec681f3Smrg midgard_schedule_program(ctx); 31827ec681f3Smrg mir_ra(ctx); 31837ec681f3Smrg 31847ec681f3Smrg /* Analyze after scheduling since this is order-dependent */ 31857ec681f3Smrg mir_analyze_helper_terminate(ctx); 31867ec681f3Smrg 31877ec681f3Smrg /* Emit flat binary from the instruction arrays. Iterate each block in 31887ec681f3Smrg * sequence. Save instruction boundaries such that lookahead tags can 31897ec681f3Smrg * be assigned easily */ 31907ec681f3Smrg 31917ec681f3Smrg /* Cache _all_ bundles in source order for lookahead across failed branches */ 31927ec681f3Smrg 31937ec681f3Smrg int bundle_count = 0; 31947ec681f3Smrg mir_foreach_block(ctx, _block) { 31957ec681f3Smrg midgard_block *block = (midgard_block *) _block; 31967ec681f3Smrg bundle_count += block->bundles.size / sizeof(midgard_bundle); 31977ec681f3Smrg } 31987ec681f3Smrg midgard_bundle **source_order_bundles = malloc(sizeof(midgard_bundle *) * bundle_count); 31997ec681f3Smrg int bundle_idx = 0; 32007ec681f3Smrg mir_foreach_block(ctx, _block) { 32017ec681f3Smrg midgard_block *block = (midgard_block *) _block; 32027ec681f3Smrg util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) { 32037ec681f3Smrg source_order_bundles[bundle_idx++] = bundle; 32047ec681f3Smrg } 32057ec681f3Smrg } 32067ec681f3Smrg 32077ec681f3Smrg int current_bundle = 0; 32087ec681f3Smrg 32097ec681f3Smrg /* Midgard prefetches instruction types, so during emission we 32107ec681f3Smrg * need to lookahead. Unless this is the last instruction, in 32117ec681f3Smrg * which we return 1. */ 32127ec681f3Smrg 32137ec681f3Smrg mir_foreach_block(ctx, _block) { 32147ec681f3Smrg midgard_block *block = (midgard_block *) _block; 32157ec681f3Smrg mir_foreach_bundle_in_block(block, bundle) { 32167ec681f3Smrg int lookahead = 1; 32177ec681f3Smrg 32187ec681f3Smrg if (!bundle->last_writeout && (current_bundle + 1 < bundle_count)) 32197ec681f3Smrg lookahead = source_order_bundles[current_bundle + 1]->tag; 32207ec681f3Smrg 32217ec681f3Smrg emit_binary_bundle(ctx, block, bundle, binary, lookahead); 32227ec681f3Smrg ++current_bundle; 32237ec681f3Smrg } 32247ec681f3Smrg 32257ec681f3Smrg /* TODO: Free deeper */ 32267ec681f3Smrg //util_dynarray_fini(&block->instructions); 32277ec681f3Smrg } 32287ec681f3Smrg 32297ec681f3Smrg free(source_order_bundles); 32307ec681f3Smrg 32317ec681f3Smrg /* Report the very first tag executed */ 32327ec681f3Smrg info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0); 32337ec681f3Smrg 32347ec681f3Smrg info->ubo_mask = ctx->ubo_mask & BITSET_MASK(ctx->nir->info.num_ubos); 32357ec681f3Smrg 32367ec681f3Smrg if ((midgard_debug & MIDGARD_DBG_SHADERS) && 32377ec681f3Smrg ((midgard_debug & MIDGARD_DBG_INTERNAL) || !nir->info.internal)) { 32387ec681f3Smrg disassemble_midgard(stdout, binary->data, 32397ec681f3Smrg binary->size, inputs->gpu_id, 32407ec681f3Smrg midgard_debug & MIDGARD_DBG_VERBOSE); 32417ec681f3Smrg fflush(stdout); 32427ec681f3Smrg } 32437ec681f3Smrg 32447ec681f3Smrg /* A shader ending on a 16MB boundary causes INSTR_INVALID_PC faults, 32457ec681f3Smrg * workaround by adding some padding to the end of the shader. (The 32467ec681f3Smrg * kernel makes sure shader BOs can't cross 16MB boundaries.) */ 32477ec681f3Smrg if (binary->size) 32487ec681f3Smrg memset(util_dynarray_grow(binary, uint8_t, 16), 0, 16); 32497ec681f3Smrg 32507ec681f3Smrg if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) && 32517ec681f3Smrg !nir->info.internal) { 32527ec681f3Smrg unsigned nr_bundles = 0, nr_ins = 0; 32537ec681f3Smrg 32547ec681f3Smrg /* Count instructions and bundles */ 32557ec681f3Smrg 32567ec681f3Smrg mir_foreach_block(ctx, _block) { 32577ec681f3Smrg midgard_block *block = (midgard_block *) _block; 32587ec681f3Smrg nr_bundles += util_dynarray_num_elements( 32597ec681f3Smrg &block->bundles, midgard_bundle); 32607ec681f3Smrg 32617ec681f3Smrg mir_foreach_bundle_in_block(block, bun) 32627ec681f3Smrg nr_ins += bun->instruction_count; 32637ec681f3Smrg } 32647ec681f3Smrg 32657ec681f3Smrg /* Calculate thread count. There are certain cutoffs by 32667ec681f3Smrg * register count for thread count */ 32677ec681f3Smrg 32687ec681f3Smrg unsigned nr_registers = info->work_reg_count; 32697ec681f3Smrg 32707ec681f3Smrg unsigned nr_threads = 32717ec681f3Smrg (nr_registers <= 4) ? 4 : 32727ec681f3Smrg (nr_registers <= 8) ? 2 : 32737ec681f3Smrg 1; 32747ec681f3Smrg 32757ec681f3Smrg /* Dump stats */ 32767ec681f3Smrg 32777ec681f3Smrg fprintf(stderr, "%s - %s shader: " 32787ec681f3Smrg "%u inst, %u bundles, %u quadwords, " 32797ec681f3Smrg "%u registers, %u threads, %u loops, " 32807ec681f3Smrg "%u:%u spills:fills\n", 32817ec681f3Smrg ctx->nir->info.label ?: "", 32827ec681f3Smrg ctx->inputs->is_blend ? "PAN_SHADER_BLEND" : 32837ec681f3Smrg gl_shader_stage_name(ctx->stage), 32847ec681f3Smrg nr_ins, nr_bundles, ctx->quadword_count, 32857ec681f3Smrg nr_registers, nr_threads, 32867ec681f3Smrg ctx->loop_count, 32877ec681f3Smrg ctx->spills, ctx->fills); 32887ec681f3Smrg } 32897ec681f3Smrg 32907ec681f3Smrg _mesa_hash_table_u64_destroy(ctx->ssa_constants); 32917ec681f3Smrg _mesa_hash_table_u64_destroy(ctx->sysval_to_id); 32927ec681f3Smrg 32937ec681f3Smrg ralloc_free(ctx); 32947ec681f3Smrg} 3295