17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2020 Collabora Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors (Collabora): 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg#ifndef __BIFROST_COMPILER_H 287ec681f3Smrg#define __BIFROST_COMPILER_H 297ec681f3Smrg 307ec681f3Smrg#include "bifrost.h" 317ec681f3Smrg#include "bi_opcodes.h" 327ec681f3Smrg#include "compiler/nir/nir.h" 337ec681f3Smrg#include "panfrost/util/pan_ir.h" 347ec681f3Smrg#include "util/u_math.h" 357ec681f3Smrg#include "util/half_float.h" 367ec681f3Smrg 377ec681f3Smrg/* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly. 387ec681f3Smrg * To express widen, use the correpsonding replicated form, i.e. H01 = identity 397ec681f3Smrg * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also 407ec681f3Smrg * use the replicated form (interpretation is governed by the opcode). For 417ec681f3Smrg * 8-bit lanes with two channels, use replicated forms for replicated forms 427ec681f3Smrg * (TODO: what about others?). For 8-bit lanes with four channels using 437ec681f3Smrg * matching form (TODO: what about others?). 447ec681f3Smrg */ 457ec681f3Smrg 467ec681f3Smrgenum bi_swizzle { 477ec681f3Smrg /* 16-bit swizzle ordering deliberate for fast compute */ 487ec681f3Smrg BI_SWIZZLE_H00 = 0, /* = B0101 */ 497ec681f3Smrg BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */ 507ec681f3Smrg BI_SWIZZLE_H10 = 2, /* = B2301 */ 517ec681f3Smrg BI_SWIZZLE_H11 = 3, /* = B2323 */ 527ec681f3Smrg 537ec681f3Smrg /* replication order should be maintained for fast compute */ 547ec681f3Smrg BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */ 557ec681f3Smrg BI_SWIZZLE_B1111 = 5, 567ec681f3Smrg BI_SWIZZLE_B2222 = 6, 577ec681f3Smrg BI_SWIZZLE_B3333 = 7, 587ec681f3Smrg 597ec681f3Smrg /* totally special for explicit pattern matching */ 607ec681f3Smrg BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */ 617ec681f3Smrg BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */ 627ec681f3Smrg BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */ 637ec681f3Smrg BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */ 647ec681f3Smrg 657ec681f3Smrg BI_SWIZZLE_B0022 = 12, /* for b02 lanes */ 667ec681f3Smrg}; 677ec681f3Smrg 687ec681f3Smrg/* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant 697ec681f3Smrg * folding and Valhall constant optimization. */ 707ec681f3Smrg 717ec681f3Smrgstatic inline uint32_t 727ec681f3Smrgbi_apply_swizzle(uint32_t value, enum bi_swizzle swz) 737ec681f3Smrg{ 747ec681f3Smrg const uint16_t *h = (const uint16_t *) &value; 757ec681f3Smrg const uint8_t *b = (const uint8_t *) &value; 767ec681f3Smrg 777ec681f3Smrg#define H(h0, h1) (h[h0] | (h[h1] << 16)) 787ec681f3Smrg#define B(b0, b1, b2, b3) (b[b0] | (b[b1] << 8) | (b[b2] << 16) | (b[b3] << 24)) 797ec681f3Smrg 807ec681f3Smrg switch (swz) { 817ec681f3Smrg case BI_SWIZZLE_H00: return H(0, 0); 827ec681f3Smrg case BI_SWIZZLE_H01: return H(0, 1); 837ec681f3Smrg case BI_SWIZZLE_H10: return H(1, 0); 847ec681f3Smrg case BI_SWIZZLE_H11: return H(1, 1); 857ec681f3Smrg case BI_SWIZZLE_B0000: return B(0, 0, 0, 0); 867ec681f3Smrg case BI_SWIZZLE_B1111: return B(1, 1, 1, 1); 877ec681f3Smrg case BI_SWIZZLE_B2222: return B(2, 2, 2, 2); 887ec681f3Smrg case BI_SWIZZLE_B3333: return B(3, 3, 3, 3); 897ec681f3Smrg case BI_SWIZZLE_B0011: return B(0, 0, 1, 1); 907ec681f3Smrg case BI_SWIZZLE_B2233: return B(2, 2, 3, 3); 917ec681f3Smrg case BI_SWIZZLE_B1032: return B(1, 0, 3, 2); 927ec681f3Smrg case BI_SWIZZLE_B3210: return B(3, 2, 1, 0); 937ec681f3Smrg case BI_SWIZZLE_B0022: return B(0, 0, 2, 2); 947ec681f3Smrg } 957ec681f3Smrg 967ec681f3Smrg#undef H 977ec681f3Smrg#undef B 987ec681f3Smrg 997ec681f3Smrg unreachable("Invalid swizzle"); 1007ec681f3Smrg} 1017ec681f3Smrg 1027ec681f3Smrgenum bi_index_type { 1037ec681f3Smrg BI_INDEX_NULL = 0, 1047ec681f3Smrg BI_INDEX_NORMAL = 1, 1057ec681f3Smrg BI_INDEX_REGISTER = 2, 1067ec681f3Smrg BI_INDEX_CONSTANT = 3, 1077ec681f3Smrg BI_INDEX_PASS = 4, 1087ec681f3Smrg BI_INDEX_FAU = 5 1097ec681f3Smrg}; 1107ec681f3Smrg 1117ec681f3Smrgtypedef struct { 1127ec681f3Smrg uint32_t value; 1137ec681f3Smrg 1147ec681f3Smrg /* modifiers, should only be set if applicable for a given instruction. 1157ec681f3Smrg * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where 1167ec681f3Smrg * applicable, neg plays the role of not */ 1177ec681f3Smrg bool abs : 1; 1187ec681f3Smrg bool neg : 1; 1197ec681f3Smrg 1207ec681f3Smrg /* The last use of a value, should be purged from the register cache. 1217ec681f3Smrg * Set by liveness analysis. */ 1227ec681f3Smrg bool discard : 1; 1237ec681f3Smrg 1247ec681f3Smrg /* For a source, the swizzle. For a destination, acts a bit like a 1257ec681f3Smrg * write mask. Identity for the full 32-bit, H00 for only caring about 1267ec681f3Smrg * the lower half, other values unused. */ 1277ec681f3Smrg enum bi_swizzle swizzle : 4; 1287ec681f3Smrg uint32_t offset : 2; 1297ec681f3Smrg bool reg : 1; 1307ec681f3Smrg enum bi_index_type type : 3; 1317ec681f3Smrg} bi_index; 1327ec681f3Smrg 1337ec681f3Smrgstatic inline bi_index 1347ec681f3Smrgbi_get_index(unsigned value, bool is_reg, unsigned offset) 1357ec681f3Smrg{ 1367ec681f3Smrg return (bi_index) { 1377ec681f3Smrg .type = BI_INDEX_NORMAL, 1387ec681f3Smrg .value = value, 1397ec681f3Smrg .swizzle = BI_SWIZZLE_H01, 1407ec681f3Smrg .offset = offset, 1417ec681f3Smrg .reg = is_reg, 1427ec681f3Smrg }; 1437ec681f3Smrg} 1447ec681f3Smrg 1457ec681f3Smrgstatic inline bi_index 1467ec681f3Smrgbi_register(unsigned reg) 1477ec681f3Smrg{ 1487ec681f3Smrg assert(reg < 64); 1497ec681f3Smrg 1507ec681f3Smrg return (bi_index) { 1517ec681f3Smrg .type = BI_INDEX_REGISTER, 1527ec681f3Smrg .swizzle = BI_SWIZZLE_H01, 1537ec681f3Smrg .value = reg 1547ec681f3Smrg }; 1557ec681f3Smrg} 1567ec681f3Smrg 1577ec681f3Smrgstatic inline bi_index 1587ec681f3Smrgbi_imm_u32(uint32_t imm) 1597ec681f3Smrg{ 1607ec681f3Smrg return (bi_index) { 1617ec681f3Smrg .type = BI_INDEX_CONSTANT, 1627ec681f3Smrg .swizzle = BI_SWIZZLE_H01, 1637ec681f3Smrg .value = imm 1647ec681f3Smrg }; 1657ec681f3Smrg} 1667ec681f3Smrg 1677ec681f3Smrgstatic inline bi_index 1687ec681f3Smrgbi_imm_f32(float imm) 1697ec681f3Smrg{ 1707ec681f3Smrg return bi_imm_u32(fui(imm)); 1717ec681f3Smrg} 1727ec681f3Smrg 1737ec681f3Smrgstatic inline bi_index 1747ec681f3Smrgbi_null() 1757ec681f3Smrg{ 1767ec681f3Smrg return (bi_index) { .type = BI_INDEX_NULL }; 1777ec681f3Smrg} 1787ec681f3Smrg 1797ec681f3Smrgstatic inline bi_index 1807ec681f3Smrgbi_zero() 1817ec681f3Smrg{ 1827ec681f3Smrg return bi_imm_u32(0); 1837ec681f3Smrg} 1847ec681f3Smrg 1857ec681f3Smrgstatic inline bi_index 1867ec681f3Smrgbi_passthrough(enum bifrost_packed_src value) 1877ec681f3Smrg{ 1887ec681f3Smrg return (bi_index) { 1897ec681f3Smrg .type = BI_INDEX_PASS, 1907ec681f3Smrg .swizzle = BI_SWIZZLE_H01, 1917ec681f3Smrg .value = value 1927ec681f3Smrg }; 1937ec681f3Smrg} 1947ec681f3Smrg 1957ec681f3Smrg/* Read back power-efficent garbage, TODO maybe merge with null? */ 1967ec681f3Smrgstatic inline bi_index 1977ec681f3Smrgbi_dontcare() 1987ec681f3Smrg{ 1997ec681f3Smrg return bi_passthrough(BIFROST_SRC_FAU_HI); 2007ec681f3Smrg} 2017ec681f3Smrg 2027ec681f3Smrg/* Extracts a word from a vectored index */ 2037ec681f3Smrgstatic inline bi_index 2047ec681f3Smrgbi_word(bi_index idx, unsigned component) 2057ec681f3Smrg{ 2067ec681f3Smrg idx.offset += component; 2077ec681f3Smrg return idx; 2087ec681f3Smrg} 2097ec681f3Smrg 2107ec681f3Smrg/* Helps construct swizzles */ 2117ec681f3Smrgstatic inline bi_index 2127ec681f3Smrgbi_swz_16(bi_index idx, bool x, bool y) 2137ec681f3Smrg{ 2147ec681f3Smrg assert(idx.swizzle == BI_SWIZZLE_H01); 2157ec681f3Smrg idx.swizzle = BI_SWIZZLE_H00 | (x << 1) | y; 2167ec681f3Smrg return idx; 2177ec681f3Smrg} 2187ec681f3Smrg 2197ec681f3Smrgstatic inline bi_index 2207ec681f3Smrgbi_half(bi_index idx, bool upper) 2217ec681f3Smrg{ 2227ec681f3Smrg return bi_swz_16(idx, upper, upper); 2237ec681f3Smrg} 2247ec681f3Smrg 2257ec681f3Smrgstatic inline bi_index 2267ec681f3Smrgbi_byte(bi_index idx, unsigned lane) 2277ec681f3Smrg{ 2287ec681f3Smrg assert(idx.swizzle == BI_SWIZZLE_H01); 2297ec681f3Smrg assert(lane < 4); 2307ec681f3Smrg idx.swizzle = BI_SWIZZLE_B0000 + lane; 2317ec681f3Smrg return idx; 2327ec681f3Smrg} 2337ec681f3Smrg 2347ec681f3Smrgstatic inline bi_index 2357ec681f3Smrgbi_abs(bi_index idx) 2367ec681f3Smrg{ 2377ec681f3Smrg idx.abs = true; 2387ec681f3Smrg return idx; 2397ec681f3Smrg} 2407ec681f3Smrg 2417ec681f3Smrgstatic inline bi_index 2427ec681f3Smrgbi_neg(bi_index idx) 2437ec681f3Smrg{ 2447ec681f3Smrg idx.neg ^= true; 2457ec681f3Smrg return idx; 2467ec681f3Smrg} 2477ec681f3Smrg 2487ec681f3Smrgstatic inline bi_index 2497ec681f3Smrgbi_discard(bi_index idx) 2507ec681f3Smrg{ 2517ec681f3Smrg idx.discard = true; 2527ec681f3Smrg return idx; 2537ec681f3Smrg} 2547ec681f3Smrg 2557ec681f3Smrg/* Additive identity in IEEE 754 arithmetic */ 2567ec681f3Smrgstatic inline bi_index 2577ec681f3Smrgbi_negzero() 2587ec681f3Smrg{ 2597ec681f3Smrg return bi_neg(bi_zero()); 2607ec681f3Smrg} 2617ec681f3Smrg 2627ec681f3Smrg/* Replaces an index, preserving any modifiers */ 2637ec681f3Smrg 2647ec681f3Smrgstatic inline bi_index 2657ec681f3Smrgbi_replace_index(bi_index old, bi_index replacement) 2667ec681f3Smrg{ 2677ec681f3Smrg replacement.abs = old.abs; 2687ec681f3Smrg replacement.neg = old.neg; 2697ec681f3Smrg replacement.swizzle = old.swizzle; 2707ec681f3Smrg return replacement; 2717ec681f3Smrg} 2727ec681f3Smrg 2737ec681f3Smrg/* Remove any modifiers. This has the property: 2747ec681f3Smrg * 2757ec681f3Smrg * replace_index(x, strip_index(x)) = x 2767ec681f3Smrg * 2777ec681f3Smrg * This ensures it is suitable to use when lowering sources to moves */ 2787ec681f3Smrg 2797ec681f3Smrgstatic inline bi_index 2807ec681f3Smrgbi_strip_index(bi_index index) 2817ec681f3Smrg{ 2827ec681f3Smrg index.abs = index.neg = false; 2837ec681f3Smrg index.swizzle = BI_SWIZZLE_H01; 2847ec681f3Smrg return index; 2857ec681f3Smrg} 2867ec681f3Smrg 2877ec681f3Smrg/* For bitwise instructions */ 2887ec681f3Smrg#define bi_not(x) bi_neg(x) 2897ec681f3Smrg 2907ec681f3Smrgstatic inline bi_index 2917ec681f3Smrgbi_imm_u8(uint8_t imm) 2927ec681f3Smrg{ 2937ec681f3Smrg return bi_byte(bi_imm_u32(imm), 0); 2947ec681f3Smrg} 2957ec681f3Smrg 2967ec681f3Smrgstatic inline bi_index 2977ec681f3Smrgbi_imm_u16(uint16_t imm) 2987ec681f3Smrg{ 2997ec681f3Smrg return bi_half(bi_imm_u32(imm), false); 3007ec681f3Smrg} 3017ec681f3Smrg 3027ec681f3Smrgstatic inline bi_index 3037ec681f3Smrgbi_imm_uintN(uint32_t imm, unsigned sz) 3047ec681f3Smrg{ 3057ec681f3Smrg assert(sz == 8 || sz == 16 || sz == 32); 3067ec681f3Smrg return (sz == 8) ? bi_imm_u8(imm) : 3077ec681f3Smrg (sz == 16) ? bi_imm_u16(imm) : 3087ec681f3Smrg bi_imm_u32(imm); 3097ec681f3Smrg} 3107ec681f3Smrg 3117ec681f3Smrgstatic inline bi_index 3127ec681f3Smrgbi_imm_f16(float imm) 3137ec681f3Smrg{ 3147ec681f3Smrg return bi_imm_u16(_mesa_float_to_half(imm)); 3157ec681f3Smrg} 3167ec681f3Smrg 3177ec681f3Smrgstatic inline bool 3187ec681f3Smrgbi_is_null(bi_index idx) 3197ec681f3Smrg{ 3207ec681f3Smrg return idx.type == BI_INDEX_NULL; 3217ec681f3Smrg} 3227ec681f3Smrg 3237ec681f3Smrgstatic inline bool 3247ec681f3Smrgbi_is_ssa(bi_index idx) 3257ec681f3Smrg{ 3267ec681f3Smrg return idx.type == BI_INDEX_NORMAL && !idx.reg; 3277ec681f3Smrg} 3287ec681f3Smrg 3297ec681f3Smrg/* Compares equivalence as references. Does not compare offsets, swizzles, or 3307ec681f3Smrg * modifiers. In other words, this forms bi_index equivalence classes by 3317ec681f3Smrg * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */ 3327ec681f3Smrg 3337ec681f3Smrgstatic inline bool 3347ec681f3Smrgbi_is_equiv(bi_index left, bi_index right) 3357ec681f3Smrg{ 3367ec681f3Smrg return (left.type == right.type) && 3377ec681f3Smrg (left.reg == right.reg) && 3387ec681f3Smrg (left.value == right.value); 3397ec681f3Smrg} 3407ec681f3Smrg 3417ec681f3Smrg/* A stronger equivalence relation that requires the indices access the 3427ec681f3Smrg * same offset, useful for RA/scheduling to see what registers will 3437ec681f3Smrg * correspond to */ 3447ec681f3Smrg 3457ec681f3Smrgstatic inline bool 3467ec681f3Smrgbi_is_word_equiv(bi_index left, bi_index right) 3477ec681f3Smrg{ 3487ec681f3Smrg return bi_is_equiv(left, right) && left.offset == right.offset; 3497ec681f3Smrg} 3507ec681f3Smrg 3517ec681f3Smrg#define BI_MAX_DESTS 2 3527ec681f3Smrg#define BI_MAX_SRCS 4 3537ec681f3Smrg 3547ec681f3Smrgtypedef struct { 3557ec681f3Smrg /* Must be first */ 3567ec681f3Smrg struct list_head link; 3577ec681f3Smrg 3587ec681f3Smrg enum bi_opcode op; 3597ec681f3Smrg 3607ec681f3Smrg /* Data flow */ 3617ec681f3Smrg bi_index dest[BI_MAX_DESTS]; 3627ec681f3Smrg bi_index src[BI_MAX_SRCS]; 3637ec681f3Smrg 3647ec681f3Smrg /* For a branch */ 3657ec681f3Smrg struct bi_block *branch_target; 3667ec681f3Smrg 3677ec681f3Smrg /* These don't fit neatly with anything else.. */ 3687ec681f3Smrg enum bi_register_format register_format; 3697ec681f3Smrg enum bi_vecsize vecsize; 3707ec681f3Smrg 3717ec681f3Smrg /* Can we spill the value written here? Used to prevent 3727ec681f3Smrg * useless double fills */ 3737ec681f3Smrg bool no_spill; 3747ec681f3Smrg 3757ec681f3Smrg /* Override table, inducing a DTSEL_IMM pair if nonzero */ 3767ec681f3Smrg enum bi_table table; 3777ec681f3Smrg 3787ec681f3Smrg /* Everything after this MUST NOT be accessed directly, since 3797ec681f3Smrg * interpretation depends on opcodes */ 3807ec681f3Smrg 3817ec681f3Smrg /* Destination modifiers */ 3827ec681f3Smrg union { 3837ec681f3Smrg enum bi_clamp clamp; 3847ec681f3Smrg bool saturate; 3857ec681f3Smrg bool not_result; 3867ec681f3Smrg unsigned dest_mod; 3877ec681f3Smrg }; 3887ec681f3Smrg 3897ec681f3Smrg /* Immediates. All seen alone in an instruction, except for varying/texture 3907ec681f3Smrg * which are specified jointly for VARTEX */ 3917ec681f3Smrg union { 3927ec681f3Smrg uint32_t shift; 3937ec681f3Smrg uint32_t fill; 3947ec681f3Smrg uint32_t index; 3957ec681f3Smrg uint32_t attribute_index; 3967ec681f3Smrg int32_t branch_offset; 3977ec681f3Smrg 3987ec681f3Smrg struct { 3997ec681f3Smrg uint32_t varying_index; 4007ec681f3Smrg uint32_t sampler_index; 4017ec681f3Smrg uint32_t texture_index; 4027ec681f3Smrg }; 4037ec681f3Smrg 4047ec681f3Smrg /* TEXC, ATOM_CX: # of staging registers used */ 4057ec681f3Smrg uint32_t sr_count; 4067ec681f3Smrg }; 4077ec681f3Smrg 4087ec681f3Smrg /* Modifiers specific to particular instructions are thrown in a union */ 4097ec681f3Smrg union { 4107ec681f3Smrg enum bi_adj adj; /* FEXP_TABLE.u4 */ 4117ec681f3Smrg enum bi_atom_opc atom_opc; /* atomics */ 4127ec681f3Smrg enum bi_func func; /* FPOW_SC_DET */ 4137ec681f3Smrg enum bi_function function; /* LD_VAR_FLAT */ 4147ec681f3Smrg enum bi_mux mux; /* MUX */ 4157ec681f3Smrg enum bi_sem sem; /* FMAX, FMIN */ 4167ec681f3Smrg enum bi_source source; /* LD_GCLK */ 4177ec681f3Smrg bool scale; /* VN_ASST2, FSINCOS_OFFSET */ 4187ec681f3Smrg bool offset; /* FSIN_TABLE, FOCS_TABLE */ 4197ec681f3Smrg bool mask; /* CLZ */ 4207ec681f3Smrg bool threads; /* IMULD, IMOV_FMA */ 4217ec681f3Smrg bool combine; /* BRANCHC */ 4227ec681f3Smrg bool format; /* LEA_TEX */ 4237ec681f3Smrg 4247ec681f3Smrg struct { 4257ec681f3Smrg enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */ 4267ec681f3Smrg enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */ 4277ec681f3Smrg }; 4287ec681f3Smrg 4297ec681f3Smrg struct { 4307ec681f3Smrg enum bi_result_type result_type; /* FCMP, ICMP */ 4317ec681f3Smrg enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */ 4327ec681f3Smrg }; 4337ec681f3Smrg 4347ec681f3Smrg struct { 4357ec681f3Smrg enum bi_stack_mode stack_mode; /* JUMP_EX */ 4367ec681f3Smrg bool test_mode; 4377ec681f3Smrg }; 4387ec681f3Smrg 4397ec681f3Smrg struct { 4407ec681f3Smrg enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */ 4417ec681f3Smrg bool preserve_null; /* SEG_ADD, SEG_SUB */ 4427ec681f3Smrg enum bi_extend extend; /* LOAD, IMUL */ 4437ec681f3Smrg }; 4447ec681f3Smrg 4457ec681f3Smrg struct { 4467ec681f3Smrg enum bi_sample sample; /* VAR_TEX, LD_VAR */ 4477ec681f3Smrg enum bi_update update; /* VAR_TEX, LD_VAR */ 4487ec681f3Smrg enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */ 4497ec681f3Smrg bool skip; /* VAR_TEX, TEXS, TEXC */ 4507ec681f3Smrg bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */ 4517ec681f3Smrg }; 4527ec681f3Smrg 4537ec681f3Smrg /* Maximum size, for hashing */ 4547ec681f3Smrg unsigned flags[5]; 4557ec681f3Smrg 4567ec681f3Smrg struct { 4577ec681f3Smrg enum bi_subgroup subgroup; /* WMASK, CLPER */ 4587ec681f3Smrg enum bi_inactive_result inactive_result; /* CLPER */ 4597ec681f3Smrg enum bi_lane_op lane_op; /* CLPER */ 4607ec681f3Smrg }; 4617ec681f3Smrg 4627ec681f3Smrg struct { 4637ec681f3Smrg bool z; /* ZS_EMIT */ 4647ec681f3Smrg bool stencil; /* ZS_EMIT */ 4657ec681f3Smrg }; 4667ec681f3Smrg 4677ec681f3Smrg struct { 4687ec681f3Smrg bool h; /* VN_ASST1.f16 */ 4697ec681f3Smrg bool l; /* VN_ASST1.f16 */ 4707ec681f3Smrg }; 4717ec681f3Smrg 4727ec681f3Smrg struct { 4737ec681f3Smrg bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */ 4747ec681f3Smrg bool result_word; 4757ec681f3Smrg }; 4767ec681f3Smrg 4777ec681f3Smrg struct { 4787ec681f3Smrg bool sqrt; /* FREXPM */ 4797ec681f3Smrg bool log; /* FREXPM */ 4807ec681f3Smrg }; 4817ec681f3Smrg 4827ec681f3Smrg struct { 4837ec681f3Smrg enum bi_mode mode; /* FLOG_TABLE */ 4847ec681f3Smrg enum bi_precision precision; /* FLOG_TABLE */ 4857ec681f3Smrg bool divzero; /* FRSQ_APPROX, FRSQ */ 4867ec681f3Smrg }; 4877ec681f3Smrg }; 4887ec681f3Smrg} bi_instr; 4897ec681f3Smrg 4907ec681f3Smrg/* Represents the assignment of slots for a given bi_tuple */ 4917ec681f3Smrg 4927ec681f3Smrgtypedef struct { 4937ec681f3Smrg /* Register to assign to each slot */ 4947ec681f3Smrg unsigned slot[4]; 4957ec681f3Smrg 4967ec681f3Smrg /* Read slots can be disabled */ 4977ec681f3Smrg bool enabled[2]; 4987ec681f3Smrg 4997ec681f3Smrg /* Configuration for slots 2/3 */ 5007ec681f3Smrg struct bifrost_reg_ctrl_23 slot23; 5017ec681f3Smrg 5027ec681f3Smrg /* Fast-Access-Uniform RAM index */ 5037ec681f3Smrg uint8_t fau_idx; 5047ec681f3Smrg 5057ec681f3Smrg /* Whether writes are actually for the last instruction */ 5067ec681f3Smrg bool first_instruction; 5077ec681f3Smrg} bi_registers; 5087ec681f3Smrg 5097ec681f3Smrg/* A bi_tuple contains two paired instruction pointers. If a slot is unfilled, 5107ec681f3Smrg * leave it NULL; the emitter will fill in a nop. Instructions reference 5117ec681f3Smrg * registers via slots which are assigned per tuple. 5127ec681f3Smrg */ 5137ec681f3Smrg 5147ec681f3Smrgtypedef struct { 5157ec681f3Smrg uint8_t fau_idx; 5167ec681f3Smrg bi_registers regs; 5177ec681f3Smrg bi_instr *fma; 5187ec681f3Smrg bi_instr *add; 5197ec681f3Smrg} bi_tuple; 5207ec681f3Smrg 5217ec681f3Smrgstruct bi_block; 5227ec681f3Smrg 5237ec681f3Smrgtypedef struct { 5247ec681f3Smrg struct list_head link; 5257ec681f3Smrg 5267ec681f3Smrg /* Link back up for branch calculations */ 5277ec681f3Smrg struct bi_block *block; 5287ec681f3Smrg 5297ec681f3Smrg /* Architectural limit of 8 tuples/clause */ 5307ec681f3Smrg unsigned tuple_count; 5317ec681f3Smrg bi_tuple tuples[8]; 5327ec681f3Smrg 5337ec681f3Smrg /* For scoreboarding -- the clause ID (this is not globally unique!) 5347ec681f3Smrg * and its dependencies in terms of other clauses, computed during 5357ec681f3Smrg * scheduling and used when emitting code. Dependencies expressed as a 5367ec681f3Smrg * bitfield matching the hardware, except shifted by a clause (the 5377ec681f3Smrg * shift back to the ISA's off-by-one encoding is worked out when 5387ec681f3Smrg * emitting clauses) */ 5397ec681f3Smrg unsigned scoreboard_id; 5407ec681f3Smrg uint8_t dependencies; 5417ec681f3Smrg 5427ec681f3Smrg /* See ISA header for description */ 5437ec681f3Smrg enum bifrost_flow flow_control; 5447ec681f3Smrg 5457ec681f3Smrg /* Can we prefetch the next clause? Usually it makes sense, except for 5467ec681f3Smrg * clauses ending in unconditional branches */ 5477ec681f3Smrg bool next_clause_prefetch; 5487ec681f3Smrg 5497ec681f3Smrg /* Assigned data register */ 5507ec681f3Smrg unsigned staging_register; 5517ec681f3Smrg 5527ec681f3Smrg /* Corresponds to the usual bit but shifted by a clause */ 5537ec681f3Smrg bool staging_barrier; 5547ec681f3Smrg 5557ec681f3Smrg /* Constants read by this clause. ISA limit. Must satisfy: 5567ec681f3Smrg * 5577ec681f3Smrg * constant_count + tuple_count <= 13 5587ec681f3Smrg * 5597ec681f3Smrg * Also implicitly constant_count <= tuple_count since a tuple only 5607ec681f3Smrg * reads a single constant. 5617ec681f3Smrg */ 5627ec681f3Smrg uint64_t constants[8]; 5637ec681f3Smrg unsigned constant_count; 5647ec681f3Smrg 5657ec681f3Smrg /* Index of a constant to be PC-relative */ 5667ec681f3Smrg unsigned pcrel_idx; 5677ec681f3Smrg 5687ec681f3Smrg /* Branches encode a constant offset relative to the program counter 5697ec681f3Smrg * with some magic flags. By convention, if there is a branch, its 5707ec681f3Smrg * constant will be last. Set this flag to indicate this is required. 5717ec681f3Smrg */ 5727ec681f3Smrg bool branch_constant; 5737ec681f3Smrg 5747ec681f3Smrg /* Unique in a clause */ 5757ec681f3Smrg enum bifrost_message_type message_type; 5767ec681f3Smrg bi_instr *message; 5777ec681f3Smrg 5787ec681f3Smrg /* Discard helper threads */ 5797ec681f3Smrg bool td; 5807ec681f3Smrg} bi_clause; 5817ec681f3Smrg 5827ec681f3Smrgtypedef struct bi_block { 5837ec681f3Smrg /* Link to next block. Must be first for mir_get_block */ 5847ec681f3Smrg struct list_head link; 5857ec681f3Smrg 5867ec681f3Smrg /* List of instructions emitted for the current block */ 5877ec681f3Smrg struct list_head instructions; 5887ec681f3Smrg 5897ec681f3Smrg /* Index of the block in source order */ 5907ec681f3Smrg unsigned name; 5917ec681f3Smrg 5927ec681f3Smrg /* Control flow graph */ 5937ec681f3Smrg struct bi_block *successors[2]; 5947ec681f3Smrg struct set *predecessors; 5957ec681f3Smrg bool unconditional_jumps; 5967ec681f3Smrg 5977ec681f3Smrg /* Per 32-bit word live masks for the block indexed by node */ 5987ec681f3Smrg uint8_t *live_in; 5997ec681f3Smrg uint8_t *live_out; 6007ec681f3Smrg 6017ec681f3Smrg /* If true, uses clauses; if false, uses instructions */ 6027ec681f3Smrg bool scheduled; 6037ec681f3Smrg struct list_head clauses; /* list of bi_clause */ 6047ec681f3Smrg 6057ec681f3Smrg /* Post-RA liveness */ 6067ec681f3Smrg uint64_t reg_live_in, reg_live_out; 6077ec681f3Smrg 6087ec681f3Smrg /* Flags available for pass-internal use */ 6097ec681f3Smrg uint8_t pass_flags; 6107ec681f3Smrg} bi_block; 6117ec681f3Smrg 6127ec681f3Smrgtypedef struct { 6137ec681f3Smrg const struct panfrost_compile_inputs *inputs; 6147ec681f3Smrg nir_shader *nir; 6157ec681f3Smrg struct pan_shader_info *info; 6167ec681f3Smrg gl_shader_stage stage; 6177ec681f3Smrg struct list_head blocks; /* list of bi_block */ 6187ec681f3Smrg struct hash_table_u64 *sysval_to_id; 6197ec681f3Smrg uint32_t quirks; 6207ec681f3Smrg unsigned arch; 6217ec681f3Smrg 6227ec681f3Smrg /* During NIR->BIR */ 6237ec681f3Smrg bi_block *current_block; 6247ec681f3Smrg bi_block *after_block; 6257ec681f3Smrg bi_block *break_block; 6267ec681f3Smrg bi_block *continue_block; 6277ec681f3Smrg bool emitted_atest; 6287ec681f3Smrg 6297ec681f3Smrg /* For creating temporaries */ 6307ec681f3Smrg unsigned ssa_alloc; 6317ec681f3Smrg unsigned reg_alloc; 6327ec681f3Smrg 6337ec681f3Smrg /* Analysis results */ 6347ec681f3Smrg bool has_liveness; 6357ec681f3Smrg 6367ec681f3Smrg /* Mask of UBOs that need to be uploaded */ 6377ec681f3Smrg uint32_t ubo_mask; 6387ec681f3Smrg 6397ec681f3Smrg /* Stats for shader-db */ 6407ec681f3Smrg unsigned instruction_count; 6417ec681f3Smrg unsigned loop_count; 6427ec681f3Smrg unsigned spills; 6437ec681f3Smrg unsigned fills; 6447ec681f3Smrg} bi_context; 6457ec681f3Smrg 6467ec681f3Smrgstatic inline void 6477ec681f3Smrgbi_remove_instruction(bi_instr *ins) 6487ec681f3Smrg{ 6497ec681f3Smrg list_del(&ins->link); 6507ec681f3Smrg} 6517ec681f3Smrg 6527ec681f3Smrgenum bir_fau { 6537ec681f3Smrg BIR_FAU_ZERO = 0, 6547ec681f3Smrg BIR_FAU_LANE_ID = 1, 6557ec681f3Smrg BIR_FAU_WARP_ID = 2, 6567ec681f3Smrg BIR_FAU_CORE_ID = 3, 6577ec681f3Smrg BIR_FAU_FB_EXTENT = 4, 6587ec681f3Smrg BIR_FAU_ATEST_PARAM = 5, 6597ec681f3Smrg BIR_FAU_SAMPLE_POS_ARRAY = 6, 6607ec681f3Smrg BIR_FAU_BLEND_0 = 8, 6617ec681f3Smrg /* blend descs 1 - 7 */ 6627ec681f3Smrg BIR_FAU_TYPE_MASK = 15, 6637ec681f3Smrg 6647ec681f3Smrg /* Valhall only */ 6657ec681f3Smrg BIR_FAU_TLS_PTR = 16, 6667ec681f3Smrg BIR_FAU_WLS_PTR = 17, 6677ec681f3Smrg BIR_FAU_PROGRAM_COUNTER = 18, 6687ec681f3Smrg 6697ec681f3Smrg BIR_FAU_UNIFORM = (1 << 7), 6707ec681f3Smrg /* Look up table on Valhall */ 6717ec681f3Smrg BIR_FAU_IMMEDIATE = (1 << 8), 6727ec681f3Smrg 6737ec681f3Smrg}; 6747ec681f3Smrg 6757ec681f3Smrgstatic inline bi_index 6767ec681f3Smrgbi_fau(enum bir_fau value, bool hi) 6777ec681f3Smrg{ 6787ec681f3Smrg return (bi_index) { 6797ec681f3Smrg .type = BI_INDEX_FAU, 6807ec681f3Smrg .value = value, 6817ec681f3Smrg .swizzle = BI_SWIZZLE_H01, 6827ec681f3Smrg .offset = hi ? 1 : 0 6837ec681f3Smrg }; 6847ec681f3Smrg} 6857ec681f3Smrg 6867ec681f3Smrgstatic inline unsigned 6877ec681f3Smrgbi_max_temp(bi_context *ctx) 6887ec681f3Smrg{ 6897ec681f3Smrg return (MAX2(ctx->reg_alloc, ctx->ssa_alloc) + 2) << 1; 6907ec681f3Smrg} 6917ec681f3Smrg 6927ec681f3Smrgstatic inline bi_index 6937ec681f3Smrgbi_temp(bi_context *ctx) 6947ec681f3Smrg{ 6957ec681f3Smrg return bi_get_index(ctx->ssa_alloc++, false, 0); 6967ec681f3Smrg} 6977ec681f3Smrg 6987ec681f3Smrgstatic inline bi_index 6997ec681f3Smrgbi_temp_reg(bi_context *ctx) 7007ec681f3Smrg{ 7017ec681f3Smrg return bi_get_index(ctx->reg_alloc++, true, 0); 7027ec681f3Smrg} 7037ec681f3Smrg 7047ec681f3Smrg/* NIR booleans are 1-bit (0/1). For now, backend IR booleans are N-bit 7057ec681f3Smrg * (0/~0) where N depends on the context. This requires us to sign-extend 7067ec681f3Smrg * when converting constants from NIR to the backend IR. 7077ec681f3Smrg */ 7087ec681f3Smrgstatic inline uint32_t 7097ec681f3Smrgbi_extend_constant(uint32_t constant, unsigned bit_size) 7107ec681f3Smrg{ 7117ec681f3Smrg if (bit_size == 1 && constant != 0) 7127ec681f3Smrg return ~0; 7137ec681f3Smrg else 7147ec681f3Smrg return constant; 7157ec681f3Smrg} 7167ec681f3Smrg 7177ec681f3Smrg/* Inline constants automatically, will be lowered out by bi_lower_fau where a 7187ec681f3Smrg * constant is not allowed. load_const_to_scalar gaurantees that this makes 7197ec681f3Smrg * sense */ 7207ec681f3Smrg 7217ec681f3Smrgstatic inline bi_index 7227ec681f3Smrgbi_src_index(nir_src *src) 7237ec681f3Smrg{ 7247ec681f3Smrg if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) { 7257ec681f3Smrg uint32_t v = nir_src_as_uint(*src); 7267ec681f3Smrg 7277ec681f3Smrg return bi_imm_u32(bi_extend_constant(v, nir_src_bit_size(*src))); 7287ec681f3Smrg } else if (src->is_ssa) { 7297ec681f3Smrg return bi_get_index(src->ssa->index, false, 0); 7307ec681f3Smrg } else { 7317ec681f3Smrg assert(!src->reg.indirect); 7327ec681f3Smrg return bi_get_index(src->reg.reg->index, true, 0); 7337ec681f3Smrg } 7347ec681f3Smrg} 7357ec681f3Smrg 7367ec681f3Smrgstatic inline bi_index 7377ec681f3Smrgbi_dest_index(nir_dest *dst) 7387ec681f3Smrg{ 7397ec681f3Smrg if (dst->is_ssa) 7407ec681f3Smrg return bi_get_index(dst->ssa.index, false, 0); 7417ec681f3Smrg else { 7427ec681f3Smrg assert(!dst->reg.indirect); 7437ec681f3Smrg return bi_get_index(dst->reg.reg->index, true, 0); 7447ec681f3Smrg } 7457ec681f3Smrg} 7467ec681f3Smrg 7477ec681f3Smrgstatic inline unsigned 7487ec681f3Smrgbi_get_node(bi_index index) 7497ec681f3Smrg{ 7507ec681f3Smrg if (bi_is_null(index) || index.type != BI_INDEX_NORMAL) 7517ec681f3Smrg return ~0; 7527ec681f3Smrg else 7537ec681f3Smrg return (index.value << 1) | index.reg; 7547ec681f3Smrg} 7557ec681f3Smrg 7567ec681f3Smrgstatic inline bi_index 7577ec681f3Smrgbi_node_to_index(unsigned node, unsigned node_count) 7587ec681f3Smrg{ 7597ec681f3Smrg assert(node < node_count); 7607ec681f3Smrg assert(node_count < ~0); 7617ec681f3Smrg 7627ec681f3Smrg return bi_get_index(node >> 1, node & PAN_IS_REG, 0); 7637ec681f3Smrg} 7647ec681f3Smrg 7657ec681f3Smrg/* Iterators for Bifrost IR */ 7667ec681f3Smrg 7677ec681f3Smrg#define bi_foreach_block(ctx, v) \ 7687ec681f3Smrg list_for_each_entry(bi_block, v, &ctx->blocks, link) 7697ec681f3Smrg 7707ec681f3Smrg#define bi_foreach_block_rev(ctx, v) \ 7717ec681f3Smrg list_for_each_entry_rev(bi_block, v, &ctx->blocks, link) 7727ec681f3Smrg 7737ec681f3Smrg#define bi_foreach_block_from(ctx, from, v) \ 7747ec681f3Smrg list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link) 7757ec681f3Smrg 7767ec681f3Smrg#define bi_foreach_block_from_rev(ctx, from, v) \ 7777ec681f3Smrg list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link) 7787ec681f3Smrg 7797ec681f3Smrg#define bi_foreach_instr_in_block(block, v) \ 7807ec681f3Smrg list_for_each_entry(bi_instr, v, &(block)->instructions, link) 7817ec681f3Smrg 7827ec681f3Smrg#define bi_foreach_instr_in_block_rev(block, v) \ 7837ec681f3Smrg list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link) 7847ec681f3Smrg 7857ec681f3Smrg#define bi_foreach_instr_in_block_safe(block, v) \ 7867ec681f3Smrg list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link) 7877ec681f3Smrg 7887ec681f3Smrg#define bi_foreach_instr_in_block_safe_rev(block, v) \ 7897ec681f3Smrg list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link) 7907ec681f3Smrg 7917ec681f3Smrg#define bi_foreach_instr_in_block_from(block, v, from) \ 7927ec681f3Smrg list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link) 7937ec681f3Smrg 7947ec681f3Smrg#define bi_foreach_instr_in_block_from_rev(block, v, from) \ 7957ec681f3Smrg list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link) 7967ec681f3Smrg 7977ec681f3Smrg#define bi_foreach_clause_in_block(block, v) \ 7987ec681f3Smrg list_for_each_entry(bi_clause, v, &(block)->clauses, link) 7997ec681f3Smrg 8007ec681f3Smrg#define bi_foreach_clause_in_block_rev(block, v) \ 8017ec681f3Smrg list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link) 8027ec681f3Smrg 8037ec681f3Smrg#define bi_foreach_clause_in_block_safe(block, v) \ 8047ec681f3Smrg list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link) 8057ec681f3Smrg 8067ec681f3Smrg#define bi_foreach_clause_in_block_from(block, v, from) \ 8077ec681f3Smrg list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link) 8087ec681f3Smrg 8097ec681f3Smrg#define bi_foreach_clause_in_block_from_rev(block, v, from) \ 8107ec681f3Smrg list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link) 8117ec681f3Smrg 8127ec681f3Smrg#define bi_foreach_instr_global(ctx, v) \ 8137ec681f3Smrg bi_foreach_block(ctx, v_block) \ 8147ec681f3Smrg bi_foreach_instr_in_block(v_block, v) 8157ec681f3Smrg 8167ec681f3Smrg#define bi_foreach_instr_global_rev(ctx, v) \ 8177ec681f3Smrg bi_foreach_block_rev(ctx, v_block) \ 8187ec681f3Smrg bi_foreach_instr_in_block_rev(v_block, v) 8197ec681f3Smrg 8207ec681f3Smrg#define bi_foreach_instr_global_safe(ctx, v) \ 8217ec681f3Smrg bi_foreach_block(ctx, v_block) \ 8227ec681f3Smrg bi_foreach_instr_in_block_safe(v_block, v) 8237ec681f3Smrg 8247ec681f3Smrg#define bi_foreach_instr_global_rev_safe(ctx, v) \ 8257ec681f3Smrg bi_foreach_block_rev(ctx, v_block) \ 8267ec681f3Smrg bi_foreach_instr_in_block_rev_safe(v_block, v) 8277ec681f3Smrg 8287ec681f3Smrg#define bi_foreach_instr_in_tuple(tuple, v) \ 8297ec681f3Smrg for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \ 8307ec681f3Smrg v != NULL; \ 8317ec681f3Smrg v = (v == (tuple)->add) ? NULL : (tuple)->add) 8327ec681f3Smrg 8337ec681f3Smrg#define bi_foreach_successor(blk, v) \ 8347ec681f3Smrg bi_block *v; \ 8357ec681f3Smrg bi_block **_v; \ 8367ec681f3Smrg for (_v = &blk->successors[0], \ 8377ec681f3Smrg v = *_v; \ 8387ec681f3Smrg v != NULL && _v < &blk->successors[2]; \ 8397ec681f3Smrg _v++, v = *_v) \ 8407ec681f3Smrg 8417ec681f3Smrg/* Based on set_foreach, expanded with automatic type casts */ 8427ec681f3Smrg 8437ec681f3Smrg#define bi_foreach_predecessor(blk, v) \ 8447ec681f3Smrg struct set_entry *_entry_##v; \ 8457ec681f3Smrg bi_block *v; \ 8467ec681f3Smrg for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \ 8477ec681f3Smrg v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL); \ 8487ec681f3Smrg _entry_##v != NULL; \ 8497ec681f3Smrg _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \ 8507ec681f3Smrg v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL)) 8517ec681f3Smrg 8527ec681f3Smrg#define bi_foreach_src(ins, v) \ 8537ec681f3Smrg for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v) 8547ec681f3Smrg 8557ec681f3Smrg#define bi_foreach_dest(ins, v) \ 8567ec681f3Smrg for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v) 8577ec681f3Smrg 8587ec681f3Smrg#define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \ 8597ec681f3Smrg bi_foreach_instr_in_tuple(tuple, ins) \ 8607ec681f3Smrg bi_foreach_src(ins, s) 8617ec681f3Smrg 8627ec681f3Smrgstatic inline bi_instr * 8637ec681f3Smrgbi_prev_op(bi_instr *ins) 8647ec681f3Smrg{ 8657ec681f3Smrg return list_last_entry(&(ins->link), bi_instr, link); 8667ec681f3Smrg} 8677ec681f3Smrg 8687ec681f3Smrgstatic inline bi_instr * 8697ec681f3Smrgbi_next_op(bi_instr *ins) 8707ec681f3Smrg{ 8717ec681f3Smrg return list_first_entry(&(ins->link), bi_instr, link); 8727ec681f3Smrg} 8737ec681f3Smrg 8747ec681f3Smrgstatic inline bi_block * 8757ec681f3Smrgbi_next_block(bi_block *block) 8767ec681f3Smrg{ 8777ec681f3Smrg return list_first_entry(&(block->link), bi_block, link); 8787ec681f3Smrg} 8797ec681f3Smrg 8807ec681f3Smrgstatic inline bi_block * 8817ec681f3Smrgbi_entry_block(bi_context *ctx) 8827ec681f3Smrg{ 8837ec681f3Smrg return list_first_entry(&ctx->blocks, bi_block, link); 8847ec681f3Smrg} 8857ec681f3Smrg 8867ec681f3Smrg/* BIR manipulation */ 8877ec681f3Smrg 8887ec681f3Smrgbool bi_has_arg(const bi_instr *ins, bi_index arg); 8897ec681f3Smrgunsigned bi_count_read_registers(const bi_instr *ins, unsigned src); 8907ec681f3Smrgunsigned bi_count_write_registers(const bi_instr *ins, unsigned dest); 8917ec681f3Smrgbool bi_is_regfmt_16(enum bi_register_format fmt); 8927ec681f3Smrgunsigned bi_writemask(const bi_instr *ins, unsigned dest); 8937ec681f3Smrgbi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause); 8947ec681f3Smrgbool bi_side_effects(enum bi_opcode op); 8957ec681f3Smrgbool bi_reconverge_branches(bi_block *block); 8967ec681f3Smrg 8977ec681f3Smrgvoid bi_print_instr(const bi_instr *I, FILE *fp); 8987ec681f3Smrgvoid bi_print_slots(bi_registers *regs, FILE *fp); 8997ec681f3Smrgvoid bi_print_tuple(bi_tuple *tuple, FILE *fp); 9007ec681f3Smrgvoid bi_print_clause(bi_clause *clause, FILE *fp); 9017ec681f3Smrgvoid bi_print_block(bi_block *block, FILE *fp); 9027ec681f3Smrgvoid bi_print_shader(bi_context *ctx, FILE *fp); 9037ec681f3Smrg 9047ec681f3Smrg/* BIR passes */ 9057ec681f3Smrg 9067ec681f3Smrgvoid bi_analyze_helper_terminate(bi_context *ctx); 9077ec681f3Smrgvoid bi_analyze_helper_requirements(bi_context *ctx); 9087ec681f3Smrgvoid bi_opt_copy_prop(bi_context *ctx); 9097ec681f3Smrgvoid bi_opt_cse(bi_context *ctx); 9107ec681f3Smrgvoid bi_opt_mod_prop_forward(bi_context *ctx); 9117ec681f3Smrgvoid bi_opt_mod_prop_backward(bi_context *ctx); 9127ec681f3Smrgvoid bi_opt_dead_code_eliminate(bi_context *ctx); 9137ec681f3Smrgvoid bi_opt_dce_post_ra(bi_context *ctx); 9147ec681f3Smrgvoid bi_opt_push_ubo(bi_context *ctx); 9157ec681f3Smrgvoid bi_lower_swizzle(bi_context *ctx); 9167ec681f3Smrgvoid bi_lower_fau(bi_context *ctx); 9177ec681f3Smrgvoid bi_assign_scoreboard(bi_context *ctx); 9187ec681f3Smrgvoid bi_register_allocate(bi_context *ctx); 9197ec681f3Smrg 9207ec681f3Smrgvoid bi_lower_opt_instruction(bi_instr *I); 9217ec681f3Smrg 9227ec681f3Smrgvoid bi_schedule(bi_context *ctx); 9237ec681f3Smrgbool bi_can_fma(bi_instr *ins); 9247ec681f3Smrgbool bi_can_add(bi_instr *ins); 9257ec681f3Smrgbool bi_must_message(bi_instr *ins); 9267ec681f3Smrgbool bi_reads_zero(bi_instr *ins); 9277ec681f3Smrgbool bi_reads_temps(bi_instr *ins, unsigned src); 9287ec681f3Smrgbool bi_reads_t(bi_instr *ins, unsigned src); 9297ec681f3Smrg 9307ec681f3Smrg#ifndef NDEBUG 9317ec681f3Smrgbool bi_validate_initialization(bi_context *ctx); 9327ec681f3Smrgvoid bi_validate(bi_context *ctx, const char *after_str); 9337ec681f3Smrg#else 9347ec681f3Smrgstatic inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; } 9357ec681f3Smrgstatic inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; } 9367ec681f3Smrg#endif 9377ec681f3Smrg 9387ec681f3Smrguint32_t bi_fold_constant(bi_instr *I, bool *unsupported); 9397ec681f3Smrgvoid bi_opt_constant_fold(bi_context *ctx); 9407ec681f3Smrg 9417ec681f3Smrg/* Liveness */ 9427ec681f3Smrg 9437ec681f3Smrgvoid bi_compute_liveness(bi_context *ctx); 9447ec681f3Smrgvoid bi_liveness_ins_update(uint8_t *live, bi_instr *ins, unsigned max); 9457ec681f3Smrgvoid bi_invalidate_liveness(bi_context *ctx); 9467ec681f3Smrg 9477ec681f3Smrgvoid bi_postra_liveness(bi_context *ctx); 9487ec681f3Smrguint64_t bi_postra_liveness_ins(uint64_t live, bi_instr *ins); 9497ec681f3Smrg 9507ec681f3Smrg/* Layout */ 9517ec681f3Smrg 9527ec681f3Smrgsigned bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target); 9537ec681f3Smrgbool bi_ec0_packed(unsigned tuple_count); 9547ec681f3Smrg 9557ec681f3Smrg/* Check if there are no more instructions starting with a given block, this 9567ec681f3Smrg * needs to recurse in case a shader ends with multiple empty blocks */ 9577ec681f3Smrg 9587ec681f3Smrgstatic inline bool 9597ec681f3Smrgbi_is_terminal_block(bi_block *block) 9607ec681f3Smrg{ 9617ec681f3Smrg return (block == NULL) || 9627ec681f3Smrg (list_is_empty(&block->instructions) && 9637ec681f3Smrg bi_is_terminal_block(block->successors[0]) && 9647ec681f3Smrg bi_is_terminal_block(block->successors[1])); 9657ec681f3Smrg} 9667ec681f3Smrg 9677ec681f3Smrg/* Code emit */ 9687ec681f3Smrg 9697ec681f3Smrg/* Returns the size of the final clause */ 9707ec681f3Smrgunsigned bi_pack(bi_context *ctx, struct util_dynarray *emission); 9717ec681f3Smrg 9727ec681f3Smrgstruct bi_packed_tuple { 9737ec681f3Smrg uint64_t lo; 9747ec681f3Smrg uint64_t hi; 9757ec681f3Smrg}; 9767ec681f3Smrg 9777ec681f3Smrguint8_t bi_pack_literal(enum bi_clause_subword literal); 9787ec681f3Smrg 9797ec681f3Smrguint8_t 9807ec681f3Smrgbi_pack_upper(enum bi_clause_subword upper, 9817ec681f3Smrg struct bi_packed_tuple *tuples, 9827ec681f3Smrg ASSERTED unsigned tuple_count); 9837ec681f3Smrguint64_t 9847ec681f3Smrgbi_pack_tuple_bits(enum bi_clause_subword idx, 9857ec681f3Smrg struct bi_packed_tuple *tuples, 9867ec681f3Smrg ASSERTED unsigned tuple_count, 9877ec681f3Smrg unsigned offset, unsigned nbits); 9887ec681f3Smrg 9897ec681f3Smrguint8_t 9907ec681f3Smrgbi_pack_sync(enum bi_clause_subword t1, 9917ec681f3Smrg enum bi_clause_subword t2, 9927ec681f3Smrg enum bi_clause_subword t3, 9937ec681f3Smrg struct bi_packed_tuple *tuples, 9947ec681f3Smrg ASSERTED unsigned tuple_count, 9957ec681f3Smrg bool z); 9967ec681f3Smrg 9977ec681f3Smrgvoid 9987ec681f3Smrgbi_pack_format(struct util_dynarray *emission, 9997ec681f3Smrg unsigned index, 10007ec681f3Smrg struct bi_packed_tuple *tuples, 10017ec681f3Smrg ASSERTED unsigned tuple_count, 10027ec681f3Smrg uint64_t header, uint64_t ec0, 10037ec681f3Smrg unsigned m0, bool z); 10047ec681f3Smrg 10057ec681f3Smrgunsigned bi_pack_fma(bi_instr *I, 10067ec681f3Smrg enum bifrost_packed_src src0, 10077ec681f3Smrg enum bifrost_packed_src src1, 10087ec681f3Smrg enum bifrost_packed_src src2, 10097ec681f3Smrg enum bifrost_packed_src src3); 10107ec681f3Smrgunsigned bi_pack_add(bi_instr *I, 10117ec681f3Smrg enum bifrost_packed_src src0, 10127ec681f3Smrg enum bifrost_packed_src src1, 10137ec681f3Smrg enum bifrost_packed_src src2, 10147ec681f3Smrg enum bifrost_packed_src src3); 10157ec681f3Smrg 10167ec681f3Smrg/* Like in NIR, for use with the builder */ 10177ec681f3Smrg 10187ec681f3Smrgenum bi_cursor_option { 10197ec681f3Smrg bi_cursor_after_block, 10207ec681f3Smrg bi_cursor_before_instr, 10217ec681f3Smrg bi_cursor_after_instr 10227ec681f3Smrg}; 10237ec681f3Smrg 10247ec681f3Smrgtypedef struct { 10257ec681f3Smrg enum bi_cursor_option option; 10267ec681f3Smrg 10277ec681f3Smrg union { 10287ec681f3Smrg bi_block *block; 10297ec681f3Smrg bi_instr *instr; 10307ec681f3Smrg }; 10317ec681f3Smrg} bi_cursor; 10327ec681f3Smrg 10337ec681f3Smrgstatic inline bi_cursor 10347ec681f3Smrgbi_after_block(bi_block *block) 10357ec681f3Smrg{ 10367ec681f3Smrg return (bi_cursor) { 10377ec681f3Smrg .option = bi_cursor_after_block, 10387ec681f3Smrg .block = block 10397ec681f3Smrg }; 10407ec681f3Smrg} 10417ec681f3Smrg 10427ec681f3Smrgstatic inline bi_cursor 10437ec681f3Smrgbi_before_instr(bi_instr *instr) 10447ec681f3Smrg{ 10457ec681f3Smrg return (bi_cursor) { 10467ec681f3Smrg .option = bi_cursor_before_instr, 10477ec681f3Smrg .instr = instr 10487ec681f3Smrg }; 10497ec681f3Smrg} 10507ec681f3Smrg 10517ec681f3Smrgstatic inline bi_cursor 10527ec681f3Smrgbi_after_instr(bi_instr *instr) 10537ec681f3Smrg{ 10547ec681f3Smrg return (bi_cursor) { 10557ec681f3Smrg .option = bi_cursor_after_instr, 10567ec681f3Smrg .instr = instr 10577ec681f3Smrg }; 10587ec681f3Smrg} 10597ec681f3Smrg 10607ec681f3Smrg/* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause, 10617ec681f3Smrg * in which case there must exist a nonempty penultimate tuple */ 10627ec681f3Smrg 10637ec681f3SmrgATTRIBUTE_RETURNS_NONNULL static inline bi_instr * 10647ec681f3Smrgbi_first_instr_in_tuple(bi_tuple *tuple) 10657ec681f3Smrg{ 10667ec681f3Smrg bi_instr *instr = tuple->fma ?: tuple->add; 10677ec681f3Smrg assert(instr != NULL); 10687ec681f3Smrg return instr; 10697ec681f3Smrg} 10707ec681f3Smrg 10717ec681f3SmrgATTRIBUTE_RETURNS_NONNULL static inline bi_instr * 10727ec681f3Smrgbi_first_instr_in_clause(bi_clause *clause) 10737ec681f3Smrg{ 10747ec681f3Smrg return bi_first_instr_in_tuple(&clause->tuples[0]); 10757ec681f3Smrg} 10767ec681f3Smrg 10777ec681f3SmrgATTRIBUTE_RETURNS_NONNULL static inline bi_instr * 10787ec681f3Smrgbi_last_instr_in_clause(bi_clause *clause) 10797ec681f3Smrg{ 10807ec681f3Smrg bi_tuple tuple = clause->tuples[clause->tuple_count - 1]; 10817ec681f3Smrg bi_instr *instr = tuple.add ?: tuple.fma; 10827ec681f3Smrg 10837ec681f3Smrg if (!instr) { 10847ec681f3Smrg assert(clause->tuple_count >= 2); 10857ec681f3Smrg tuple = clause->tuples[clause->tuple_count - 2]; 10867ec681f3Smrg instr = tuple.add ?: tuple.fma; 10877ec681f3Smrg } 10887ec681f3Smrg 10897ec681f3Smrg assert(instr != NULL); 10907ec681f3Smrg return instr; 10917ec681f3Smrg} 10927ec681f3Smrg 10937ec681f3Smrg/* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start 10947ec681f3Smrg * (end) of the clause and adding a condition for the clause boundary */ 10957ec681f3Smrg 10967ec681f3Smrg#define bi_foreach_instr_in_clause(block, clause, pos) \ 10977ec681f3Smrg for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_first_instr_in_clause(clause), link); \ 10987ec681f3Smrg (&pos->link != &(block)->instructions) \ 10997ec681f3Smrg && (pos != bi_next_op(bi_last_instr_in_clause(clause))); \ 11007ec681f3Smrg pos = LIST_ENTRY(bi_instr, pos->link.next, link)) 11017ec681f3Smrg 11027ec681f3Smrg#define bi_foreach_instr_in_clause_rev(block, clause, pos) \ 11037ec681f3Smrg for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_last_instr_in_clause(clause), link); \ 11047ec681f3Smrg (&pos->link != &(block)->instructions) \ 11057ec681f3Smrg && pos != bi_prev_op(bi_first_instr_in_clause(clause)); \ 11067ec681f3Smrg pos = LIST_ENTRY(bi_instr, pos->link.prev, link)) 11077ec681f3Smrg 11087ec681f3Smrgstatic inline bi_cursor 11097ec681f3Smrgbi_before_clause(bi_clause *clause) 11107ec681f3Smrg{ 11117ec681f3Smrg return bi_before_instr(bi_first_instr_in_clause(clause)); 11127ec681f3Smrg} 11137ec681f3Smrg 11147ec681f3Smrgstatic inline bi_cursor 11157ec681f3Smrgbi_before_tuple(bi_tuple *tuple) 11167ec681f3Smrg{ 11177ec681f3Smrg return bi_before_instr(bi_first_instr_in_tuple(tuple)); 11187ec681f3Smrg} 11197ec681f3Smrg 11207ec681f3Smrgstatic inline bi_cursor 11217ec681f3Smrgbi_after_clause(bi_clause *clause) 11227ec681f3Smrg{ 11237ec681f3Smrg return bi_after_instr(bi_last_instr_in_clause(clause)); 11247ec681f3Smrg} 11257ec681f3Smrg 11267ec681f3Smrg/* IR builder in terms of cursor infrastructure */ 11277ec681f3Smrg 11287ec681f3Smrgtypedef struct { 11297ec681f3Smrg bi_context *shader; 11307ec681f3Smrg bi_cursor cursor; 11317ec681f3Smrg} bi_builder; 11327ec681f3Smrg 11337ec681f3Smrgstatic inline bi_builder 11347ec681f3Smrgbi_init_builder(bi_context *ctx, bi_cursor cursor) 11357ec681f3Smrg{ 11367ec681f3Smrg return (bi_builder) { 11377ec681f3Smrg .shader = ctx, 11387ec681f3Smrg .cursor = cursor 11397ec681f3Smrg }; 11407ec681f3Smrg} 11417ec681f3Smrg 11427ec681f3Smrg/* Insert an instruction at the cursor and move the cursor */ 11437ec681f3Smrg 11447ec681f3Smrgstatic inline void 11457ec681f3Smrgbi_builder_insert(bi_cursor *cursor, bi_instr *I) 11467ec681f3Smrg{ 11477ec681f3Smrg switch (cursor->option) { 11487ec681f3Smrg case bi_cursor_after_instr: 11497ec681f3Smrg list_add(&I->link, &cursor->instr->link); 11507ec681f3Smrg cursor->instr = I; 11517ec681f3Smrg return; 11527ec681f3Smrg 11537ec681f3Smrg case bi_cursor_after_block: 11547ec681f3Smrg list_addtail(&I->link, &cursor->block->instructions); 11557ec681f3Smrg cursor->option = bi_cursor_after_instr; 11567ec681f3Smrg cursor->instr = I; 11577ec681f3Smrg return; 11587ec681f3Smrg 11597ec681f3Smrg case bi_cursor_before_instr: 11607ec681f3Smrg list_addtail(&I->link, &cursor->instr->link); 11617ec681f3Smrg cursor->option = bi_cursor_after_instr; 11627ec681f3Smrg cursor->instr = I; 11637ec681f3Smrg return; 11647ec681f3Smrg } 11657ec681f3Smrg 11667ec681f3Smrg unreachable("Invalid cursor option"); 11677ec681f3Smrg} 11687ec681f3Smrg 11697ec681f3Smrgstatic inline unsigned 11707ec681f3Smrgbi_word_node(bi_index idx) 11717ec681f3Smrg{ 11727ec681f3Smrg assert(idx.type == BI_INDEX_NORMAL && !idx.reg); 11737ec681f3Smrg return (idx.value << 2) | idx.offset; 11747ec681f3Smrg} 11757ec681f3Smrg 11767ec681f3Smrg/* NIR passes */ 11777ec681f3Smrg 11787ec681f3Smrgbool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes); 11797ec681f3Smrg 11807ec681f3Smrg#endif 1181