17e102996Smaya/* 27e102996Smaya * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 37e102996Smaya * 47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a 57e102996Smaya * copy of this software and associated documentation files (the "Software"), 67e102996Smaya * to deal in the Software without restriction, including without limitation 77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the 97e102996Smaya * Software is furnished to do so, subject to the following conditions: 107e102996Smaya * 117e102996Smaya * The above copyright notice and this permission notice (including the next 127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the 137e102996Smaya * Software. 147e102996Smaya * 157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217e102996Smaya * SOFTWARE. 227e102996Smaya */ 237e102996Smaya 247e102996Smaya#ifndef IR3_H_ 257e102996Smaya#define IR3_H_ 267e102996Smaya 277e102996Smaya#include <stdbool.h> 287ec681f3Smrg#include <stdint.h> 297e102996Smaya 307e102996Smaya#include "compiler/shader_enums.h" 317e102996Smaya 327e102996Smaya#include "util/bitscan.h" 337e102996Smaya#include "util/list.h" 347ec681f3Smrg#include "util/set.h" 357e102996Smaya#include "util/u_debug.h" 367e102996Smaya 377e102996Smaya#include "instr-a3xx.h" 387e102996Smaya 397e102996Smaya/* low level intermediate representation of an adreno shader program */ 407e102996Smaya 417e102996Smayastruct ir3_compiler; 427e102996Smayastruct ir3; 437e102996Smayastruct ir3_instruction; 447e102996Smayastruct ir3_block; 457e102996Smaya 467e102996Smayastruct ir3_info { 477ec681f3Smrg void *data; /* used internally in ir3 assembler */ 487ec681f3Smrg /* Size in bytes of the shader binary, including NIR constants and 497ec681f3Smrg * padding 507ec681f3Smrg */ 517ec681f3Smrg uint32_t size; 527ec681f3Smrg /* byte offset from start of the shader to the NIR constant data. */ 537ec681f3Smrg uint32_t constant_data_offset; 547ec681f3Smrg /* Size in dwords of the instructions. */ 557ec681f3Smrg uint16_t sizedwords; 567ec681f3Smrg uint16_t instrs_count; /* expanded to account for rpt's */ 577ec681f3Smrg uint16_t nops_count; /* # of nop instructions, including nopN */ 587ec681f3Smrg uint16_t mov_count; 597ec681f3Smrg uint16_t cov_count; 607ec681f3Smrg uint16_t stp_count; 617ec681f3Smrg uint16_t ldp_count; 627ec681f3Smrg /* NOTE: max_reg, etc, does not include registers not touched 637ec681f3Smrg * by the shader (ie. vertex fetched via VFD_DECODE but not 647ec681f3Smrg * touched by shader) 657ec681f3Smrg */ 667ec681f3Smrg int8_t max_reg; /* highest GPR # used by shader */ 677ec681f3Smrg int8_t max_half_reg; 687ec681f3Smrg int16_t max_const; 697ec681f3Smrg /* This is the maximum # of waves that can executed at once in one core, 707ec681f3Smrg * assuming that they are all executing this shader. 717ec681f3Smrg */ 727ec681f3Smrg int8_t max_waves; 737ec681f3Smrg bool double_threadsize; 747ec681f3Smrg bool multi_dword_ldp_stp; 757ec681f3Smrg 767ec681f3Smrg /* number of sync bits: */ 777ec681f3Smrg uint16_t ss, sy; 787ec681f3Smrg 797ec681f3Smrg /* estimate of number of cycles stalled on (ss) */ 807ec681f3Smrg uint16_t sstall; 817ec681f3Smrg 827ec681f3Smrg uint16_t last_baryf; /* instruction # of last varying fetch */ 837ec681f3Smrg 847ec681f3Smrg /* Number of instructions of a given category: */ 857ec681f3Smrg uint16_t instrs_per_cat[8]; 867ec681f3Smrg}; 877ec681f3Smrg 887ec681f3Smrgstruct ir3_merge_set { 897ec681f3Smrg uint16_t preferred_reg; 907ec681f3Smrg uint16_t size; 917ec681f3Smrg uint16_t alignment; 927ec681f3Smrg 937ec681f3Smrg unsigned interval_start; 947ec681f3Smrg unsigned spill_slot; 957ec681f3Smrg 967ec681f3Smrg unsigned regs_count; 977ec681f3Smrg struct ir3_register **regs; 987e102996Smaya}; 997e102996Smaya 1007e102996Smayastruct ir3_register { 1017ec681f3Smrg enum { 1027ec681f3Smrg IR3_REG_CONST = 0x001, 1037ec681f3Smrg IR3_REG_IMMED = 0x002, 1047ec681f3Smrg IR3_REG_HALF = 0x004, 1057ec681f3Smrg /* Shared registers have the same value for all threads when read. 1067ec681f3Smrg * They can only be written when one thread is active (that is, inside 1077ec681f3Smrg * a "getone" block). 1087ec681f3Smrg */ 1097ec681f3Smrg IR3_REG_SHARED = 0x008, 1107ec681f3Smrg IR3_REG_RELATIV = 0x010, 1117ec681f3Smrg IR3_REG_R = 0x020, 1127ec681f3Smrg /* Most instructions, it seems, can do float abs/neg but not 1137ec681f3Smrg * integer. The CP pass needs to know what is intended (int or 1147ec681f3Smrg * float) in order to do the right thing. For this reason the 1157ec681f3Smrg * abs/neg flags are split out into float and int variants. In 1167ec681f3Smrg * addition, .b (bitwise) operations, the negate is actually a 1177ec681f3Smrg * bitwise not, so split that out into a new flag to make it 1187ec681f3Smrg * more clear. 1197ec681f3Smrg */ 1207ec681f3Smrg IR3_REG_FNEG = 0x040, 1217ec681f3Smrg IR3_REG_FABS = 0x080, 1227ec681f3Smrg IR3_REG_SNEG = 0x100, 1237ec681f3Smrg IR3_REG_SABS = 0x200, 1247ec681f3Smrg IR3_REG_BNOT = 0x400, 1257ec681f3Smrg /* (ei) flag, end-input? Set on last bary, presumably to signal 1267ec681f3Smrg * that the shader needs no more input: 1277ec681f3Smrg */ 1287ec681f3Smrg IR3_REG_EI = 0x2000, 1297ec681f3Smrg /* meta-flags, for intermediate stages of IR, ie. 1307ec681f3Smrg * before register assignment is done: 1317ec681f3Smrg */ 1327ec681f3Smrg IR3_REG_SSA = 0x4000, /* 'def' is ptr to assigning destination */ 1337ec681f3Smrg IR3_REG_ARRAY = 0x8000, 1347ec681f3Smrg 1357ec681f3Smrg /* Set on a use whenever the SSA value becomes dead after the current 1367ec681f3Smrg * instruction. 1377ec681f3Smrg */ 1387ec681f3Smrg IR3_REG_KILL = 0x10000, 1397ec681f3Smrg 1407ec681f3Smrg /* Similar to IR3_REG_KILL, except that if there are multiple uses of the 1417ec681f3Smrg * same SSA value in a single instruction, this is only set on the first 1427ec681f3Smrg * use. 1437ec681f3Smrg */ 1447ec681f3Smrg IR3_REG_FIRST_KILL = 0x20000, 1457ec681f3Smrg 1467ec681f3Smrg /* Set when a destination doesn't have any uses and is dead immediately 1477ec681f3Smrg * after the instruction. This can happen even after optimizations for 1487ec681f3Smrg * corner cases such as destinations of atomic instructions. 1497ec681f3Smrg */ 1507ec681f3Smrg IR3_REG_UNUSED = 0x40000, 1517ec681f3Smrg } flags; 1527ec681f3Smrg 1537ec681f3Smrg unsigned name; 1547ec681f3Smrg 1557ec681f3Smrg /* used for cat5 instructions, but also for internal/IR level 1567ec681f3Smrg * tracking of what registers are read/written by an instruction. 1577ec681f3Smrg * wrmask may be a bad name since it is used to represent both 1587ec681f3Smrg * src and dst that touch multiple adjacent registers. 1597ec681f3Smrg */ 1607ec681f3Smrg unsigned wrmask : 16; /* up to vec16 */ 1617ec681f3Smrg 1627ec681f3Smrg /* for relative addressing, 32bits for array size is too small, 1637ec681f3Smrg * but otoh we don't need to deal with disjoint sets, so instead 1647ec681f3Smrg * use a simple size field (number of scalar components). 1657ec681f3Smrg * 1667ec681f3Smrg * Note the size field isn't important for relative const (since 1677ec681f3Smrg * we don't have to do register allocation for constants). 1687ec681f3Smrg */ 1697ec681f3Smrg unsigned size : 16; 1707ec681f3Smrg 1717ec681f3Smrg /* normal registers: 1727ec681f3Smrg * the component is in the low two bits of the reg #, so 1737ec681f3Smrg * rN.x becomes: (N << 2) | x 1747ec681f3Smrg */ 1757ec681f3Smrg uint16_t num; 1767ec681f3Smrg union { 1777ec681f3Smrg /* immediate: */ 1787ec681f3Smrg int32_t iim_val; 1797ec681f3Smrg uint32_t uim_val; 1807ec681f3Smrg float fim_val; 1817ec681f3Smrg /* relative: */ 1827ec681f3Smrg struct { 1837ec681f3Smrg uint16_t id; 1847ec681f3Smrg int16_t offset; 1857ec681f3Smrg uint16_t base; 1867ec681f3Smrg } array; 1877ec681f3Smrg }; 1887ec681f3Smrg 1897ec681f3Smrg /* For IR3_REG_DEST, pointer back to the instruction containing this 1907ec681f3Smrg * register. 1917ec681f3Smrg */ 1927ec681f3Smrg struct ir3_instruction *instr; 1937ec681f3Smrg 1947ec681f3Smrg /* For IR3_REG_SSA, src registers contain ptr back to assigning 1957ec681f3Smrg * instruction. 1967ec681f3Smrg * 1977ec681f3Smrg * For IR3_REG_ARRAY, the pointer is back to the last dependent 1987ec681f3Smrg * array access (although the net effect is the same, it points 1997ec681f3Smrg * back to a previous instruction that we depend on). 2007ec681f3Smrg */ 2017ec681f3Smrg struct ir3_register *def; 2027ec681f3Smrg 2037ec681f3Smrg /* Pointer to another register in the instruction that must share the same 2047ec681f3Smrg * physical register. Each destination can be tied with one source, and 2057ec681f3Smrg * they must have "tied" pointing to each other. 2067ec681f3Smrg */ 2077ec681f3Smrg struct ir3_register *tied; 2087ec681f3Smrg 2097ec681f3Smrg unsigned spill_slot, next_use; 2107ec681f3Smrg 2117ec681f3Smrg unsigned merge_set_offset; 2127ec681f3Smrg struct ir3_merge_set *merge_set; 2137ec681f3Smrg unsigned interval_start, interval_end; 2147e102996Smaya}; 2157e102996Smaya 2167e102996Smaya/* 2177e102996Smaya * Stupid/simple growable array implementation: 2187e102996Smaya */ 2197ec681f3Smrg#define DECLARE_ARRAY(type, name) \ 2207ec681f3Smrg unsigned name##_count, name##_sz; \ 2217ec681f3Smrg type *name; 2227ec681f3Smrg 2237ec681f3Smrg#define array_insert(ctx, arr, ...) \ 2247ec681f3Smrg do { \ 2257ec681f3Smrg if (arr##_count == arr##_sz) { \ 2267ec681f3Smrg arr##_sz = MAX2(2 * arr##_sz, 16); \ 2277ec681f3Smrg arr = reralloc_size(ctx, arr, arr##_sz * sizeof(arr[0])); \ 2287ec681f3Smrg } \ 2297ec681f3Smrg arr[arr##_count++] = __VA_ARGS__; \ 2307ec681f3Smrg } while (0) 2317e102996Smaya 2327e102996Smayastruct ir3_instruction { 2337ec681f3Smrg struct ir3_block *block; 2347ec681f3Smrg opc_t opc; 2357ec681f3Smrg enum { 2367ec681f3Smrg /* (sy) flag is set on first instruction, and after sample 2377ec681f3Smrg * instructions (probably just on RAW hazard). 2387ec681f3Smrg */ 2397ec681f3Smrg IR3_INSTR_SY = 0x001, 2407ec681f3Smrg /* (ss) flag is set on first instruction, and first instruction 2417ec681f3Smrg * to depend on the result of "long" instructions (RAW hazard): 2427ec681f3Smrg * 2437ec681f3Smrg * rcp, rsq, log2, exp2, sin, cos, sqrt 2447ec681f3Smrg * 2457ec681f3Smrg * It seems to synchronize until all in-flight instructions are 2467ec681f3Smrg * completed, for example: 2477ec681f3Smrg * 2487ec681f3Smrg * rsq hr1.w, hr1.w 2497ec681f3Smrg * add.f hr2.z, (neg)hr2.z, hc0.y 2507ec681f3Smrg * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y 2517ec681f3Smrg * rsq hr2.x, hr2.x 2527ec681f3Smrg * (rpt1)nop 2537ec681f3Smrg * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w 2547ec681f3Smrg * nop 2557ec681f3Smrg * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w 2567ec681f3Smrg * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w 2577ec681f3Smrg * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x 2587ec681f3Smrg * 2597ec681f3Smrg * The last mul.f does not have (ss) set, presumably because the 2607ec681f3Smrg * (ss) on the previous instruction does the job. 2617ec681f3Smrg * 2627ec681f3Smrg * The blob driver also seems to set it on WAR hazards, although 2637ec681f3Smrg * not really clear if this is needed or just blob compiler being 2647ec681f3Smrg * sloppy. So far I haven't found a case where removing the (ss) 2657ec681f3Smrg * causes problems for WAR hazard, but I could just be getting 2667ec681f3Smrg * lucky: 2677ec681f3Smrg * 2687ec681f3Smrg * rcp r1.y, r3.y 2697ec681f3Smrg * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z 2707ec681f3Smrg * 2717ec681f3Smrg */ 2727ec681f3Smrg IR3_INSTR_SS = 0x002, 2737ec681f3Smrg /* (jp) flag is set on jump targets: 2747ec681f3Smrg */ 2757ec681f3Smrg IR3_INSTR_JP = 0x004, 2767ec681f3Smrg IR3_INSTR_UL = 0x008, 2777ec681f3Smrg IR3_INSTR_3D = 0x010, 2787ec681f3Smrg IR3_INSTR_A = 0x020, 2797ec681f3Smrg IR3_INSTR_O = 0x040, 2807ec681f3Smrg IR3_INSTR_P = 0x080, 2817ec681f3Smrg IR3_INSTR_S = 0x100, 2827ec681f3Smrg IR3_INSTR_S2EN = 0x200, 2837ec681f3Smrg IR3_INSTR_G = 0x400, 2847ec681f3Smrg IR3_INSTR_SAT = 0x800, 2857ec681f3Smrg /* (cat5/cat6) Bindless */ 2867ec681f3Smrg IR3_INSTR_B = 0x1000, 2877ec681f3Smrg /* (cat5/cat6) nonuniform */ 2887ec681f3Smrg IR3_INSTR_NONUNIF = 0x02000, 2897ec681f3Smrg /* (cat5-only) Get some parts of the encoding from a1.x */ 2907ec681f3Smrg IR3_INSTR_A1EN = 0x04000, 2917ec681f3Smrg /* meta-flags, for intermediate stages of IR, ie. 2927ec681f3Smrg * before register assignment is done: 2937ec681f3Smrg */ 2947ec681f3Smrg IR3_INSTR_MARK = 0x08000, 2957ec681f3Smrg IR3_INSTR_UNUSED = 0x10000, 2967ec681f3Smrg } flags; 2977ec681f3Smrg uint8_t repeat; 2987ec681f3Smrg uint8_t nop; 2997e102996Smaya#ifdef DEBUG 3007ec681f3Smrg unsigned srcs_max, dsts_max; 3017e102996Smaya#endif 3027ec681f3Smrg unsigned srcs_count, dsts_count; 3037ec681f3Smrg struct ir3_register **dsts; 3047ec681f3Smrg struct ir3_register **srcs; 3057ec681f3Smrg union { 3067ec681f3Smrg struct { 3077ec681f3Smrg char inv1, inv2; 3087ec681f3Smrg char comp1, comp2; 3097ec681f3Smrg int immed; 3107ec681f3Smrg struct ir3_block *target; 3117ec681f3Smrg const char *target_label; 3127ec681f3Smrg brtype_t brtype; 3137ec681f3Smrg unsigned idx; /* for brac.N */ 3147ec681f3Smrg } cat0; 3157ec681f3Smrg struct { 3167ec681f3Smrg type_t src_type, dst_type; 3177ec681f3Smrg round_t round; 3187ec681f3Smrg } cat1; 3197ec681f3Smrg struct { 3207ec681f3Smrg enum { 3217ec681f3Smrg IR3_COND_LT = 0, 3227ec681f3Smrg IR3_COND_LE = 1, 3237ec681f3Smrg IR3_COND_GT = 2, 3247ec681f3Smrg IR3_COND_GE = 3, 3257ec681f3Smrg IR3_COND_EQ = 4, 3267ec681f3Smrg IR3_COND_NE = 5, 3277ec681f3Smrg } condition; 3287ec681f3Smrg } cat2; 3297ec681f3Smrg struct { 3307ec681f3Smrg unsigned samp, tex; 3317ec681f3Smrg unsigned tex_base : 3; 3327ec681f3Smrg type_t type; 3337ec681f3Smrg } cat5; 3347ec681f3Smrg struct { 3357ec681f3Smrg type_t type; 3367ec681f3Smrg /* TODO remove dst_offset and handle as a ir3_register 3377ec681f3Smrg * which might be IMMED, similar to how src_offset is 3387ec681f3Smrg * handled. 3397ec681f3Smrg */ 3407ec681f3Smrg int dst_offset; 3417ec681f3Smrg int iim_val : 3; /* for ldgb/stgb, # of components */ 3427ec681f3Smrg unsigned d : 3; /* for ldc, component offset */ 3437ec681f3Smrg bool typed : 1; 3447ec681f3Smrg unsigned base : 3; 3457ec681f3Smrg } cat6; 3467ec681f3Smrg struct { 3477ec681f3Smrg unsigned w : 1; /* write */ 3487ec681f3Smrg unsigned r : 1; /* read */ 3497ec681f3Smrg unsigned l : 1; /* local */ 3507ec681f3Smrg unsigned g : 1; /* global */ 3517ec681f3Smrg } cat7; 3527ec681f3Smrg /* for meta-instructions, just used to hold extra data 3537ec681f3Smrg * before instruction scheduling, etc 3547ec681f3Smrg */ 3557ec681f3Smrg struct { 3567ec681f3Smrg int off; /* component/offset */ 3577ec681f3Smrg } split; 3587ec681f3Smrg struct { 3597ec681f3Smrg /* Per-source index back to the entry in the 3607ec681f3Smrg * ir3_shader_variant::outputs table. 3617ec681f3Smrg */ 3627ec681f3Smrg unsigned *outidxs; 3637ec681f3Smrg } end; 3647ec681f3Smrg struct { 3657ec681f3Smrg /* used to temporarily hold reference to nir_phi_instr 3667ec681f3Smrg * until we resolve the phi srcs 3677ec681f3Smrg */ 3687ec681f3Smrg void *nphi; 3697ec681f3Smrg } phi; 3707ec681f3Smrg struct { 3717ec681f3Smrg unsigned samp, tex; 3727ec681f3Smrg unsigned input_offset; 3737ec681f3Smrg unsigned samp_base : 3; 3747ec681f3Smrg unsigned tex_base : 3; 3757ec681f3Smrg } prefetch; 3767ec681f3Smrg struct { 3777ec681f3Smrg /* maps back to entry in ir3_shader_variant::inputs table: */ 3787ec681f3Smrg int inidx; 3797ec681f3Smrg /* for sysvals, identifies the sysval type. Mostly so we can 3807ec681f3Smrg * identify the special cases where a sysval should not be DCE'd 3817ec681f3Smrg * (currently, just pre-fs texture fetch) 3827ec681f3Smrg */ 3837ec681f3Smrg gl_system_value sysval; 3847ec681f3Smrg } input; 3857ec681f3Smrg }; 3867ec681f3Smrg 3877ec681f3Smrg /* For assigning jump offsets, we need instruction's position: */ 3887ec681f3Smrg uint32_t ip; 3897ec681f3Smrg 3907ec681f3Smrg /* used for per-pass extra instruction data. 3917ec681f3Smrg * 3927ec681f3Smrg * TODO we should remove the per-pass data like this and 'use_count' 3937ec681f3Smrg * and do something similar to what RA does w/ ir3_ra_instr_data.. 3947ec681f3Smrg * ie. use the ir3_count_instructions pass, and then use instr->ip 3957ec681f3Smrg * to index into a table of pass-private data. 3967ec681f3Smrg */ 3977ec681f3Smrg void *data; 3987ec681f3Smrg 3997ec681f3Smrg /** 4007ec681f3Smrg * Valid if pass calls ir3_find_ssa_uses().. see foreach_ssa_use() 4017ec681f3Smrg */ 4027ec681f3Smrg struct set *uses; 4037ec681f3Smrg 4047ec681f3Smrg int use_count; /* currently just updated/used by cp */ 4057ec681f3Smrg 4067ec681f3Smrg /* an instruction can reference at most one address register amongst 4077ec681f3Smrg * it's src/dst registers. Beyond that, you need to insert mov's. 4087ec681f3Smrg * 4097ec681f3Smrg * NOTE: do not write this directly, use ir3_instr_set_address() 4107ec681f3Smrg */ 4117ec681f3Smrg struct ir3_register *address; 4127ec681f3Smrg 4137ec681f3Smrg /* Tracking for additional dependent instructions. Used to handle 4147ec681f3Smrg * barriers, WAR hazards for arrays/SSBOs/etc. 4157ec681f3Smrg */ 4167ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, deps); 4177ec681f3Smrg 4187ec681f3Smrg /* 4197ec681f3Smrg * From PoV of instruction scheduling, not execution (ie. ignores global/ 4207ec681f3Smrg * local distinction): 4217ec681f3Smrg * shared image atomic SSBO everything 4227ec681f3Smrg * barrier()/ - R/W R/W R/W R/W X 4237ec681f3Smrg * groupMemoryBarrier() 4247ec681f3Smrg * memoryBarrier() 4257ec681f3Smrg * (but only images declared coherent?) 4267ec681f3Smrg * memoryBarrierAtomic() - R/W 4277ec681f3Smrg * memoryBarrierBuffer() - R/W 4287ec681f3Smrg * memoryBarrierImage() - R/W 4297ec681f3Smrg * memoryBarrierShared() - R/W 4307ec681f3Smrg * 4317ec681f3Smrg * TODO I think for SSBO/image/shared, in cases where we can determine 4327ec681f3Smrg * which variable is accessed, we don't need to care about accesses to 4337ec681f3Smrg * different variables (unless declared coherent??) 4347ec681f3Smrg */ 4357ec681f3Smrg enum { 4367ec681f3Smrg IR3_BARRIER_EVERYTHING = 1 << 0, 4377ec681f3Smrg IR3_BARRIER_SHARED_R = 1 << 1, 4387ec681f3Smrg IR3_BARRIER_SHARED_W = 1 << 2, 4397ec681f3Smrg IR3_BARRIER_IMAGE_R = 1 << 3, 4407ec681f3Smrg IR3_BARRIER_IMAGE_W = 1 << 4, 4417ec681f3Smrg IR3_BARRIER_BUFFER_R = 1 << 5, 4427ec681f3Smrg IR3_BARRIER_BUFFER_W = 1 << 6, 4437ec681f3Smrg IR3_BARRIER_ARRAY_R = 1 << 7, 4447ec681f3Smrg IR3_BARRIER_ARRAY_W = 1 << 8, 4457ec681f3Smrg IR3_BARRIER_PRIVATE_R = 1 << 9, 4467ec681f3Smrg IR3_BARRIER_PRIVATE_W = 1 << 10, 4477ec681f3Smrg } barrier_class, 4487ec681f3Smrg barrier_conflict; 4497ec681f3Smrg 4507ec681f3Smrg /* Entry in ir3_block's instruction list: */ 4517ec681f3Smrg struct list_head node; 4527ec681f3Smrg 4537ec681f3Smrg uint32_t serialno; 4547ec681f3Smrg 4557ec681f3Smrg // TODO only computerator/assembler: 4567ec681f3Smrg int line; 4577ec681f3Smrg}; 4587ec681f3Smrg 4597ec681f3Smrgstruct ir3 { 4607ec681f3Smrg struct ir3_compiler *compiler; 4617ec681f3Smrg gl_shader_stage type; 4627ec681f3Smrg 4637ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, inputs); 4647ec681f3Smrg 4657ec681f3Smrg /* Track bary.f (and ldlv) instructions.. this is needed in 4667ec681f3Smrg * scheduling to ensure that all varying fetches happen before 4677ec681f3Smrg * any potential kill instructions. The hw gets grumpy if all 4687ec681f3Smrg * threads in a group are killed before the last bary.f gets 4697ec681f3Smrg * a chance to signal end of input (ei). 4707ec681f3Smrg */ 4717ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, baryfs); 4727ec681f3Smrg 4737ec681f3Smrg /* Track all indirect instructions (read and write). To avoid 4747ec681f3Smrg * deadlock scenario where an address register gets scheduled, 4757ec681f3Smrg * but other dependent src instructions cannot be scheduled due 4767ec681f3Smrg * to dependency on a *different* address register value, the 4777ec681f3Smrg * scheduler needs to ensure that all dependencies other than 4787ec681f3Smrg * the instruction other than the address register are scheduled 4797ec681f3Smrg * before the one that writes the address register. Having a 4807ec681f3Smrg * convenient list of instructions that reference some address 4817ec681f3Smrg * register simplifies this. 4827ec681f3Smrg */ 4837ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, a0_users); 4847ec681f3Smrg 4857ec681f3Smrg /* same for a1.x: */ 4867ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, a1_users); 4877ec681f3Smrg 4887ec681f3Smrg /* and same for instructions that consume predicate register: */ 4897ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, predicates); 4907ec681f3Smrg 4917ec681f3Smrg /* Track texture sample instructions which need texture state 4927ec681f3Smrg * patched in (for astc-srgb workaround): 4937ec681f3Smrg */ 4947ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); 4957ec681f3Smrg 4967ec681f3Smrg /* List of blocks: */ 4977ec681f3Smrg struct list_head block_list; 4987ec681f3Smrg 4997ec681f3Smrg /* List of ir3_array's: */ 5007ec681f3Smrg struct list_head array_list; 5017e102996Smaya 5027e102996Smaya#ifdef DEBUG 5037ec681f3Smrg unsigned block_count; 5047e102996Smaya#endif 5057ec681f3Smrg unsigned instr_count; 5067e102996Smaya}; 5077e102996Smaya 5087ec681f3Smrgstruct ir3_array { 5097ec681f3Smrg struct list_head node; 5107ec681f3Smrg unsigned length; 5117ec681f3Smrg unsigned id; 5127e102996Smaya 5137ec681f3Smrg struct nir_register *r; 5147e102996Smaya 5157ec681f3Smrg /* To avoid array write's from getting DCE'd, keep track of the 5167ec681f3Smrg * most recent write. Any array access depends on the most 5177ec681f3Smrg * recent write. This way, nothing depends on writes after the 5187ec681f3Smrg * last read. But all the writes that happen before that have 5197ec681f3Smrg * something depending on them 5207ec681f3Smrg */ 5217ec681f3Smrg struct ir3_register *last_write; 5227e102996Smaya 5237ec681f3Smrg /* extra stuff used in RA pass: */ 5247ec681f3Smrg unsigned base; /* base vreg name */ 5257ec681f3Smrg unsigned reg; /* base physical reg */ 5267ec681f3Smrg uint16_t start_ip, end_ip; 5277e102996Smaya 5287ec681f3Smrg /* Indicates if half-precision */ 5297ec681f3Smrg bool half; 5307e102996Smaya 5317ec681f3Smrg bool unused; 5327e102996Smaya}; 5337e102996Smaya 5347ec681f3Smrgstruct ir3_array *ir3_lookup_array(struct ir3 *ir, unsigned id); 5357e102996Smaya 5367ec681f3Smrgenum ir3_branch_type { 5377ec681f3Smrg IR3_BRANCH_COND, /* condition */ 5387ec681f3Smrg IR3_BRANCH_ANY, /* subgroupAny(condition) */ 5397ec681f3Smrg IR3_BRANCH_ALL, /* subgroupAll(condition) */ 5407ec681f3Smrg IR3_BRANCH_GETONE, /* subgroupElect() */ 5417ec681f3Smrg}; 5427e102996Smaya 5437e102996Smayastruct ir3_block { 5447ec681f3Smrg struct list_head node; 5457ec681f3Smrg struct ir3 *shader; 5467ec681f3Smrg 5477ec681f3Smrg const struct nir_block *nblock; 5487ec681f3Smrg 5497ec681f3Smrg struct list_head instr_list; /* list of ir3_instruction */ 5507ec681f3Smrg 5517ec681f3Smrg /* The actual branch condition, if there are two successors */ 5527ec681f3Smrg enum ir3_branch_type brtype; 5537e102996Smaya 5547ec681f3Smrg /* each block has either one or two successors.. in case of two 5557ec681f3Smrg * successors, 'condition' decides which one to follow. A block preceding 5567ec681f3Smrg * an if/else has two successors. 5577ec681f3Smrg * 5587ec681f3Smrg * In some cases the path that the machine actually takes through the 5597ec681f3Smrg * program may not match the per-thread view of the CFG. In particular 5607ec681f3Smrg * this is the case for if/else, where the machine jumps from the end of 5617ec681f3Smrg * the if to the beginning of the else and switches active lanes. While 5627ec681f3Smrg * most things only care about the per-thread view, we need to use the 5637ec681f3Smrg * "physical" view when allocating shared registers. "successors" contains 5647ec681f3Smrg * the per-thread successors, and "physical_successors" contains the 5657ec681f3Smrg * physical successors which includes the fallthrough edge from the if to 5667ec681f3Smrg * the else. 5677ec681f3Smrg */ 5687ec681f3Smrg struct ir3_instruction *condition; 5697ec681f3Smrg struct ir3_block *successors[2]; 5707ec681f3Smrg struct ir3_block *physical_successors[2]; 5717e102996Smaya 5727ec681f3Smrg DECLARE_ARRAY(struct ir3_block *, predecessors); 5737ec681f3Smrg DECLARE_ARRAY(struct ir3_block *, physical_predecessors); 5747e102996Smaya 5757ec681f3Smrg uint16_t start_ip, end_ip; 5767e102996Smaya 5777ec681f3Smrg /* Track instructions which do not write a register but other- 5787ec681f3Smrg * wise must not be discarded (such as kill, stg, etc) 5797ec681f3Smrg */ 5807ec681f3Smrg DECLARE_ARRAY(struct ir3_instruction *, keeps); 5817e102996Smaya 5827ec681f3Smrg /* used for per-pass extra block data. Mainly used right 5837ec681f3Smrg * now in RA step to track livein/liveout. 5847ec681f3Smrg */ 5857ec681f3Smrg void *data; 5867e102996Smaya 5877ec681f3Smrg uint32_t index; 5887e102996Smaya 5897ec681f3Smrg struct ir3_block *imm_dom; 5907ec681f3Smrg DECLARE_ARRAY(struct ir3_block *, dom_children); 5917ec681f3Smrg 5927ec681f3Smrg uint32_t dom_pre_index; 5937ec681f3Smrg uint32_t dom_post_index; 5947ec681f3Smrg 5957ec681f3Smrg uint32_t loop_id; 5967ec681f3Smrg uint32_t loop_depth; 5977e102996Smaya 5987e102996Smaya#ifdef DEBUG 5997ec681f3Smrg uint32_t serialno; 6007e102996Smaya#endif 6017e102996Smaya}; 6027e102996Smaya 6037e102996Smayastatic inline uint32_t 6047e102996Smayablock_id(struct ir3_block *block) 6057e102996Smaya{ 6067e102996Smaya#ifdef DEBUG 6077ec681f3Smrg return block->serialno; 6087e102996Smaya#else 6097ec681f3Smrg return (uint32_t)(unsigned long)block; 6107e102996Smaya#endif 6117e102996Smaya} 6127e102996Smaya 6137ec681f3Smrgstatic inline struct ir3_block * 6147ec681f3Smrgir3_start_block(struct ir3 *ir) 6157ec681f3Smrg{ 6167ec681f3Smrg return list_first_entry(&ir->block_list, struct ir3_block, node); 6177ec681f3Smrg} 6187ec681f3Smrg 6197ec681f3Smrgvoid ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred); 6207ec681f3Smrgvoid ir3_block_add_physical_predecessor(struct ir3_block *block, 6217ec681f3Smrg struct ir3_block *pred); 6227ec681f3Smrgvoid ir3_block_remove_predecessor(struct ir3_block *block, 6237ec681f3Smrg struct ir3_block *pred); 6247ec681f3Smrgvoid ir3_block_remove_physical_predecessor(struct ir3_block *block, 6257ec681f3Smrg struct ir3_block *pred); 6267ec681f3Smrgunsigned ir3_block_get_pred_index(struct ir3_block *block, 6277ec681f3Smrg struct ir3_block *pred); 6287ec681f3Smrg 6297ec681f3Smrgvoid ir3_calc_dominance(struct ir3 *ir); 6307ec681f3Smrgbool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b); 6317ec681f3Smrg 6327ec681f3Smrgstruct ir3_shader_variant; 6337ec681f3Smrg 6347ec681f3Smrgstruct ir3 *ir3_create(struct ir3_compiler *compiler, 6357ec681f3Smrg struct ir3_shader_variant *v); 6367e102996Smayavoid ir3_destroy(struct ir3 *shader); 6377e102996Smaya 6387ec681f3Smrgvoid ir3_collect_info(struct ir3_shader_variant *v); 6397ec681f3Smrgvoid *ir3_alloc(struct ir3 *shader, int sz); 6407ec681f3Smrg 6417ec681f3Smrgunsigned ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler, 6427ec681f3Smrg unsigned reg_count, 6437ec681f3Smrg bool double_threadsize); 6447e102996Smaya 6457ec681f3Smrgunsigned ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v, 6467ec681f3Smrg bool double_threadsize); 6477ec681f3Smrg 6487ec681f3Smrgbool ir3_should_double_threadsize(struct ir3_shader_variant *v, 6497ec681f3Smrg unsigned regs_count); 6507ec681f3Smrg 6517ec681f3Smrgstruct ir3_block *ir3_block_create(struct ir3 *shader); 6527ec681f3Smrg 6537ec681f3Smrgstruct ir3_instruction *ir3_instr_create(struct ir3_block *block, opc_t opc, 6547ec681f3Smrg int ndst, int nsrc); 6557ec681f3Smrgstruct ir3_instruction *ir3_instr_clone(struct ir3_instruction *instr); 6567ec681f3Smrgvoid ir3_instr_add_dep(struct ir3_instruction *instr, 6577ec681f3Smrg struct ir3_instruction *dep); 6587e102996Smayaconst char *ir3_instr_name(struct ir3_instruction *instr); 6597e102996Smaya 6607ec681f3Smrgstruct ir3_register *ir3_src_create(struct ir3_instruction *instr, int num, 6617ec681f3Smrg int flags); 6627ec681f3Smrgstruct ir3_register *ir3_dst_create(struct ir3_instruction *instr, int num, 6637ec681f3Smrg int flags); 6647ec681f3Smrgstruct ir3_register *ir3_reg_clone(struct ir3 *shader, 6657ec681f3Smrg struct ir3_register *reg); 6667ec681f3Smrg 6677ec681f3Smrgstatic inline void 6687ec681f3Smrgir3_reg_tie(struct ir3_register *dst, struct ir3_register *src) 6697ec681f3Smrg{ 6707ec681f3Smrg assert(!dst->tied && !src->tied); 6717ec681f3Smrg dst->tied = src; 6727ec681f3Smrg src->tied = dst; 6737ec681f3Smrg} 6747ec681f3Smrg 6757ec681f3Smrgvoid ir3_reg_set_last_array(struct ir3_instruction *instr, 6767ec681f3Smrg struct ir3_register *reg, 6777ec681f3Smrg struct ir3_register *last_write); 6787e102996Smaya 6797e102996Smayavoid ir3_instr_set_address(struct ir3_instruction *instr, 6807ec681f3Smrg struct ir3_instruction *addr); 6817e102996Smaya 6827ec681f3Smrgstatic inline bool 6837ec681f3Smrgir3_instr_check_mark(struct ir3_instruction *instr) 6847e102996Smaya{ 6857ec681f3Smrg if (instr->flags & IR3_INSTR_MARK) 6867ec681f3Smrg return true; /* already visited */ 6877ec681f3Smrg instr->flags |= IR3_INSTR_MARK; 6887ec681f3Smrg return false; 6897e102996Smaya} 6907e102996Smaya 6917e102996Smayavoid ir3_block_clear_mark(struct ir3_block *block); 6927e102996Smayavoid ir3_clear_mark(struct ir3 *shader); 6937e102996Smaya 6947e102996Smayaunsigned ir3_count_instructions(struct ir3 *ir); 6957ec681f3Smrgunsigned ir3_count_instructions_ra(struct ir3 *ir); 6967e102996Smaya 6977ec681f3Smrg/** 6987ec681f3Smrg * Move 'instr' to just before 'after' 6997ec681f3Smrg */ 7007ec681f3Smrgstatic inline void 7017ec681f3Smrgir3_instr_move_before(struct ir3_instruction *instr, 7027ec681f3Smrg struct ir3_instruction *after) 7037e102996Smaya{ 7047ec681f3Smrg list_delinit(&instr->node); 7057ec681f3Smrg list_addtail(&instr->node, &after->node); 7067e102996Smaya} 7077e102996Smaya 7087ec681f3Smrg/** 7097ec681f3Smrg * Move 'instr' to just after 'before': 7107ec681f3Smrg */ 7117ec681f3Smrgstatic inline void 7127ec681f3Smrgir3_instr_move_after(struct ir3_instruction *instr, 7137ec681f3Smrg struct ir3_instruction *before) 7147ec681f3Smrg{ 7157ec681f3Smrg list_delinit(&instr->node); 7167ec681f3Smrg list_add(&instr->node, &before->node); 7177ec681f3Smrg} 7187e102996Smaya 7197ec681f3Smrg/** 7207ec681f3Smrg * Move 'instr' to the beginning of the block: 7217e102996Smaya */ 7227ec681f3Smrgstatic inline void 7237ec681f3Smrgir3_instr_move_before_block(struct ir3_instruction *instr, 7247ec681f3Smrg struct ir3_block *block) 7257e102996Smaya{ 7267ec681f3Smrg list_delinit(&instr->node); 7277ec681f3Smrg list_add(&instr->node, &block->instr_list); 7287e102996Smaya} 7297e102996Smaya 7307ec681f3Smrgvoid ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps); 7317ec681f3Smrg 7327ec681f3Smrgvoid ir3_set_dst_type(struct ir3_instruction *instr, bool half); 7337ec681f3Smrgvoid ir3_fixup_src_type(struct ir3_instruction *instr); 7347ec681f3Smrg 7357ec681f3Smrgint ir3_flut(struct ir3_register *src_reg); 7367ec681f3Smrg 7377ec681f3Smrgbool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags); 7387ec681f3Smrg 7397ec681f3Smrgbool ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed); 7407ec681f3Smrg 7417ec681f3Smrg#include "util/set.h" 7427ec681f3Smrg#define foreach_ssa_use(__use, __instr) \ 7437ec681f3Smrg for (struct ir3_instruction *__use = (void *)~0; __use && (__instr)->uses; \ 7447ec681f3Smrg __use = NULL) \ 7457ec681f3Smrg set_foreach ((__instr)->uses, __entry) \ 7467ec681f3Smrg if ((__use = (void *)__entry->key)) 7477ec681f3Smrg 7487ec681f3Smrgstatic inline uint32_t 7497ec681f3Smrgreg_num(const struct ir3_register *reg) 7507e102996Smaya{ 7517ec681f3Smrg return reg->num >> 2; 7527e102996Smaya} 7537e102996Smaya 7547ec681f3Smrgstatic inline uint32_t 7557ec681f3Smrgreg_comp(const struct ir3_register *reg) 7567e102996Smaya{ 7577ec681f3Smrg return reg->num & 0x3; 7587e102996Smaya} 7597e102996Smaya 7607ec681f3Smrgstatic inline bool 7617ec681f3Smrgis_flow(struct ir3_instruction *instr) 7627e102996Smaya{ 7637ec681f3Smrg return (opc_cat(instr->opc) == 0); 7647e102996Smaya} 7657e102996Smaya 7667ec681f3Smrgstatic inline bool 7677ec681f3Smrgis_kill_or_demote(struct ir3_instruction *instr) 7687e102996Smaya{ 7697ec681f3Smrg return instr->opc == OPC_KILL || instr->opc == OPC_DEMOTE; 7707e102996Smaya} 7717e102996Smaya 7727ec681f3Smrgstatic inline bool 7737ec681f3Smrgis_nop(struct ir3_instruction *instr) 7747ec681f3Smrg{ 7757ec681f3Smrg return instr->opc == OPC_NOP; 7767ec681f3Smrg} 7777ec681f3Smrg 7787ec681f3Smrgstatic inline bool 7797ec681f3Smrgis_same_type_reg(struct ir3_register *dst, struct ir3_register *src) 7807e102996Smaya{ 7817ec681f3Smrg unsigned dst_type = (dst->flags & IR3_REG_HALF); 7827ec681f3Smrg unsigned src_type = (src->flags & IR3_REG_HALF); 7837ec681f3Smrg 7847ec681f3Smrg /* Treat shared->normal copies as same-type, because they can generally be 7857ec681f3Smrg * folded, but not normal->shared copies. 7867ec681f3Smrg */ 7877ec681f3Smrg if (dst_type != src_type || 7887ec681f3Smrg ((dst->flags & IR3_REG_SHARED) && !(src->flags & IR3_REG_SHARED))) 7897ec681f3Smrg return false; 7907ec681f3Smrg else 7917ec681f3Smrg return true; 7927e102996Smaya} 7937e102996Smaya 7947e102996Smaya/* Is it a non-transformative (ie. not type changing) mov? This can 7957e102996Smaya * also include absneg.s/absneg.f, which for the most part can be 7967e102996Smaya * treated as a mov (single src argument). 7977e102996Smaya */ 7987ec681f3Smrgstatic inline bool 7997ec681f3Smrgis_same_type_mov(struct ir3_instruction *instr) 8007e102996Smaya{ 8017ec681f3Smrg struct ir3_register *dst; 8027ec681f3Smrg 8037ec681f3Smrg switch (instr->opc) { 8047ec681f3Smrg case OPC_MOV: 8057ec681f3Smrg if (instr->cat1.src_type != instr->cat1.dst_type) 8067ec681f3Smrg return false; 8077ec681f3Smrg /* If the type of dest reg and src reg are different, 8087ec681f3Smrg * it shouldn't be considered as same type mov 8097ec681f3Smrg */ 8107ec681f3Smrg if (!is_same_type_reg(instr->dsts[0], instr->srcs[0])) 8117ec681f3Smrg return false; 8127ec681f3Smrg break; 8137ec681f3Smrg case OPC_ABSNEG_F: 8147ec681f3Smrg case OPC_ABSNEG_S: 8157ec681f3Smrg if (instr->flags & IR3_INSTR_SAT) 8167ec681f3Smrg return false; 8177ec681f3Smrg /* If the type of dest reg and src reg are different, 8187ec681f3Smrg * it shouldn't be considered as same type mov 8197ec681f3Smrg */ 8207ec681f3Smrg if (!is_same_type_reg(instr->dsts[0], instr->srcs[0])) 8217ec681f3Smrg return false; 8227ec681f3Smrg break; 8237ec681f3Smrg case OPC_META_PHI: 8247ec681f3Smrg return instr->srcs_count == 1; 8257ec681f3Smrg default: 8267ec681f3Smrg return false; 8277ec681f3Smrg } 8287ec681f3Smrg 8297ec681f3Smrg dst = instr->dsts[0]; 8307ec681f3Smrg 8317ec681f3Smrg /* mov's that write to a0 or p0.x are special: */ 8327ec681f3Smrg if (dst->num == regid(REG_P0, 0)) 8337ec681f3Smrg return false; 8347ec681f3Smrg if (reg_num(dst) == REG_A0) 8357ec681f3Smrg return false; 8367ec681f3Smrg 8377ec681f3Smrg if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) 8387ec681f3Smrg return false; 8397ec681f3Smrg 8407ec681f3Smrg return true; 8417ec681f3Smrg} 8427ec681f3Smrg 8437ec681f3Smrg/* A move from const, which changes size but not type, can also be 8447ec681f3Smrg * folded into dest instruction in some cases. 8457ec681f3Smrg */ 8467ec681f3Smrgstatic inline bool 8477ec681f3Smrgis_const_mov(struct ir3_instruction *instr) 8487ec681f3Smrg{ 8497ec681f3Smrg if (instr->opc != OPC_MOV) 8507ec681f3Smrg return false; 8517ec681f3Smrg 8527ec681f3Smrg if (!(instr->srcs[0]->flags & IR3_REG_CONST)) 8537ec681f3Smrg return false; 8547e102996Smaya 8557ec681f3Smrg type_t src_type = instr->cat1.src_type; 8567ec681f3Smrg type_t dst_type = instr->cat1.dst_type; 8577e102996Smaya 8587ec681f3Smrg return (type_float(src_type) && type_float(dst_type)) || 8597ec681f3Smrg (type_uint(src_type) && type_uint(dst_type)) || 8607ec681f3Smrg (type_sint(src_type) && type_sint(dst_type)); 8617ec681f3Smrg} 8627e102996Smaya 8637ec681f3Smrgstatic inline bool 8647ec681f3Smrgis_alu(struct ir3_instruction *instr) 8657ec681f3Smrg{ 8667ec681f3Smrg return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); 8677ec681f3Smrg} 8687e102996Smaya 8697ec681f3Smrgstatic inline bool 8707ec681f3Smrgis_sfu(struct ir3_instruction *instr) 8717ec681f3Smrg{ 8727ec681f3Smrg return (opc_cat(instr->opc) == 4); 8737ec681f3Smrg} 8747e102996Smaya 8757ec681f3Smrgstatic inline bool 8767ec681f3Smrgis_tex(struct ir3_instruction *instr) 8777ec681f3Smrg{ 8787ec681f3Smrg return (opc_cat(instr->opc) == 5); 8797e102996Smaya} 8807e102996Smaya 8817ec681f3Smrgstatic inline bool 8827ec681f3Smrgis_tex_or_prefetch(struct ir3_instruction *instr) 8837e102996Smaya{ 8847ec681f3Smrg return is_tex(instr) || (instr->opc == OPC_META_TEX_PREFETCH); 8857e102996Smaya} 8867e102996Smaya 8877ec681f3Smrgstatic inline bool 8887ec681f3Smrgis_mem(struct ir3_instruction *instr) 8897e102996Smaya{ 8907ec681f3Smrg return (opc_cat(instr->opc) == 6); 8917e102996Smaya} 8927e102996Smaya 8937ec681f3Smrgstatic inline bool 8947ec681f3Smrgis_barrier(struct ir3_instruction *instr) 8957e102996Smaya{ 8967ec681f3Smrg return (opc_cat(instr->opc) == 7); 8977e102996Smaya} 8987e102996Smaya 8997ec681f3Smrgstatic inline bool 9007ec681f3Smrgis_half(struct ir3_instruction *instr) 9017e102996Smaya{ 9027ec681f3Smrg return !!(instr->dsts[0]->flags & IR3_REG_HALF); 9037e102996Smaya} 9047e102996Smaya 9057ec681f3Smrgstatic inline bool 9067ec681f3Smrgis_shared(struct ir3_instruction *instr) 9077e102996Smaya{ 9087ec681f3Smrg return !!(instr->dsts[0]->flags & IR3_REG_SHARED); 9097e102996Smaya} 9107e102996Smaya 9117e102996Smayastatic inline bool 9127e102996Smayais_store(struct ir3_instruction *instr) 9137e102996Smaya{ 9147ec681f3Smrg /* these instructions, the "destination" register is 9157ec681f3Smrg * actually a source, the address to store to. 9167ec681f3Smrg */ 9177ec681f3Smrg switch (instr->opc) { 9187ec681f3Smrg case OPC_STG: 9197ec681f3Smrg case OPC_STG_A: 9207ec681f3Smrg case OPC_STGB: 9217ec681f3Smrg case OPC_STIB: 9227ec681f3Smrg case OPC_STP: 9237ec681f3Smrg case OPC_STL: 9247ec681f3Smrg case OPC_STLW: 9257ec681f3Smrg case OPC_L2G: 9267ec681f3Smrg case OPC_G2L: 9277ec681f3Smrg return true; 9287ec681f3Smrg default: 9297ec681f3Smrg return false; 9307ec681f3Smrg } 9317e102996Smaya} 9327e102996Smaya 9337ec681f3Smrgstatic inline bool 9347ec681f3Smrgis_load(struct ir3_instruction *instr) 9357ec681f3Smrg{ 9367ec681f3Smrg switch (instr->opc) { 9377ec681f3Smrg case OPC_LDG: 9387ec681f3Smrg case OPC_LDG_A: 9397ec681f3Smrg case OPC_LDGB: 9407ec681f3Smrg case OPC_LDIB: 9417ec681f3Smrg case OPC_LDL: 9427ec681f3Smrg case OPC_LDP: 9437ec681f3Smrg case OPC_L2G: 9447ec681f3Smrg case OPC_LDLW: 9457ec681f3Smrg case OPC_LDC: 9467ec681f3Smrg case OPC_LDLV: 9477ec681f3Smrg /* probably some others too.. */ 9487ec681f3Smrg return true; 9497ec681f3Smrg default: 9507ec681f3Smrg return false; 9517ec681f3Smrg } 9527ec681f3Smrg} 9537ec681f3Smrg 9547ec681f3Smrgstatic inline bool 9557ec681f3Smrgis_input(struct ir3_instruction *instr) 9567ec681f3Smrg{ 9577ec681f3Smrg /* in some cases, ldlv is used to fetch varying without 9587ec681f3Smrg * interpolation.. fortunately inloc is the first src 9597ec681f3Smrg * register in either case 9607ec681f3Smrg */ 9617ec681f3Smrg switch (instr->opc) { 9627ec681f3Smrg case OPC_LDLV: 9637ec681f3Smrg case OPC_BARY_F: 9647ec681f3Smrg return true; 9657ec681f3Smrg default: 9667ec681f3Smrg return false; 9677ec681f3Smrg } 9687ec681f3Smrg} 9697ec681f3Smrg 9707ec681f3Smrgstatic inline bool 9717ec681f3Smrgis_bool(struct ir3_instruction *instr) 9727e102996Smaya{ 9737ec681f3Smrg switch (instr->opc) { 9747ec681f3Smrg case OPC_CMPS_F: 9757ec681f3Smrg case OPC_CMPS_S: 9767ec681f3Smrg case OPC_CMPS_U: 9777ec681f3Smrg return true; 9787ec681f3Smrg default: 9797ec681f3Smrg return false; 9807ec681f3Smrg } 9817e102996Smaya} 9827e102996Smaya 9837ec681f3Smrgstatic inline opc_t 9847ec681f3Smrgcat3_half_opc(opc_t opc) 9857e102996Smaya{ 9867ec681f3Smrg switch (opc) { 9877ec681f3Smrg case OPC_MAD_F32: 9887ec681f3Smrg return OPC_MAD_F16; 9897ec681f3Smrg case OPC_SEL_B32: 9907ec681f3Smrg return OPC_SEL_B16; 9917ec681f3Smrg case OPC_SEL_S32: 9927ec681f3Smrg return OPC_SEL_S16; 9937ec681f3Smrg case OPC_SEL_F32: 9947ec681f3Smrg return OPC_SEL_F16; 9957ec681f3Smrg case OPC_SAD_S32: 9967ec681f3Smrg return OPC_SAD_S16; 9977ec681f3Smrg default: 9987ec681f3Smrg return opc; 9997ec681f3Smrg } 10007e102996Smaya} 10017e102996Smaya 10027ec681f3Smrgstatic inline opc_t 10037ec681f3Smrgcat3_full_opc(opc_t opc) 10047e102996Smaya{ 10057ec681f3Smrg switch (opc) { 10067ec681f3Smrg case OPC_MAD_F16: 10077ec681f3Smrg return OPC_MAD_F32; 10087ec681f3Smrg case OPC_SEL_B16: 10097ec681f3Smrg return OPC_SEL_B32; 10107ec681f3Smrg case OPC_SEL_S16: 10117ec681f3Smrg return OPC_SEL_S32; 10127ec681f3Smrg case OPC_SEL_F16: 10137ec681f3Smrg return OPC_SEL_F32; 10147ec681f3Smrg case OPC_SAD_S16: 10157ec681f3Smrg return OPC_SAD_S32; 10167ec681f3Smrg default: 10177ec681f3Smrg return opc; 10187ec681f3Smrg } 10197e102996Smaya} 10207e102996Smaya 10217ec681f3Smrgstatic inline opc_t 10227ec681f3Smrgcat4_half_opc(opc_t opc) 10237e102996Smaya{ 10247ec681f3Smrg switch (opc) { 10257ec681f3Smrg case OPC_RSQ: 10267ec681f3Smrg return OPC_HRSQ; 10277ec681f3Smrg case OPC_LOG2: 10287ec681f3Smrg return OPC_HLOG2; 10297ec681f3Smrg case OPC_EXP2: 10307ec681f3Smrg return OPC_HEXP2; 10317ec681f3Smrg default: 10327ec681f3Smrg return opc; 10337ec681f3Smrg } 10347e102996Smaya} 10357e102996Smaya 10367ec681f3Smrgstatic inline opc_t 10377ec681f3Smrgcat4_full_opc(opc_t opc) 10387e102996Smaya{ 10397ec681f3Smrg switch (opc) { 10407ec681f3Smrg case OPC_HRSQ: 10417ec681f3Smrg return OPC_RSQ; 10427ec681f3Smrg case OPC_HLOG2: 10437ec681f3Smrg return OPC_LOG2; 10447ec681f3Smrg case OPC_HEXP2: 10457ec681f3Smrg return OPC_EXP2; 10467ec681f3Smrg default: 10477ec681f3Smrg return opc; 10487ec681f3Smrg } 10497ec681f3Smrg} 10507e102996Smaya 10517ec681f3Smrgstatic inline bool 10527ec681f3Smrgis_meta(struct ir3_instruction *instr) 10537ec681f3Smrg{ 10547ec681f3Smrg return (opc_cat(instr->opc) == -1); 10557e102996Smaya} 10567e102996Smaya 10577ec681f3Smrgstatic inline unsigned 10587ec681f3Smrgreg_elems(const struct ir3_register *reg) 10597e102996Smaya{ 10607ec681f3Smrg if (reg->flags & IR3_REG_ARRAY) 10617ec681f3Smrg return reg->size; 10627ec681f3Smrg else 10637ec681f3Smrg return util_last_bit(reg->wrmask); 10647e102996Smaya} 10657e102996Smaya 10667ec681f3Smrgstatic inline unsigned 10677ec681f3Smrgreg_elem_size(const struct ir3_register *reg) 10687ec681f3Smrg{ 10697ec681f3Smrg return (reg->flags & IR3_REG_HALF) ? 1 : 2; 10707ec681f3Smrg} 10717ec681f3Smrg 10727ec681f3Smrgstatic inline unsigned 10737ec681f3Smrgreg_size(const struct ir3_register *reg) 10747ec681f3Smrg{ 10757ec681f3Smrg return reg_elems(reg) * reg_elem_size(reg); 10767ec681f3Smrg} 10777ec681f3Smrg 10787ec681f3Smrgstatic inline unsigned 10797ec681f3Smrgdest_regs(struct ir3_instruction *instr) 10807ec681f3Smrg{ 10817ec681f3Smrg if (instr->dsts_count == 0) 10827ec681f3Smrg return 0; 10837ec681f3Smrg 10847ec681f3Smrg debug_assert(instr->dsts_count == 1); 10857ec681f3Smrg return util_last_bit(instr->dsts[0]->wrmask); 10867ec681f3Smrg} 10877ec681f3Smrg 10887ec681f3Smrg/* is dst a normal temp register: */ 10897ec681f3Smrgstatic inline bool 10907ec681f3Smrgis_dest_gpr(struct ir3_register *dst) 10917ec681f3Smrg{ 10927ec681f3Smrg if (dst->wrmask == 0) 10937ec681f3Smrg return false; 10947ec681f3Smrg if ((reg_num(dst) == REG_A0) || (dst->num == regid(REG_P0, 0))) 10957ec681f3Smrg return false; 10967ec681f3Smrg return true; 10977ec681f3Smrg} 10987ec681f3Smrg 10997ec681f3Smrgstatic inline bool 11007ec681f3Smrgwrites_gpr(struct ir3_instruction *instr) 11017e102996Smaya{ 11027ec681f3Smrg if (dest_regs(instr) == 0) 11037ec681f3Smrg return false; 11047ec681f3Smrg return is_dest_gpr(instr->dsts[0]); 11057ec681f3Smrg} 11067ec681f3Smrg 11077ec681f3Smrgstatic inline bool 11087ec681f3Smrgwrites_addr0(struct ir3_instruction *instr) 11097ec681f3Smrg{ 11107ec681f3Smrg /* Note: only the first dest can write to a0.x */ 11117ec681f3Smrg if (instr->dsts_count > 0) { 11127ec681f3Smrg struct ir3_register *dst = instr->dsts[0]; 11137ec681f3Smrg return dst->num == regid(REG_A0, 0); 11147ec681f3Smrg } 11157ec681f3Smrg return false; 11167ec681f3Smrg} 11177ec681f3Smrg 11187ec681f3Smrgstatic inline bool 11197ec681f3Smrgwrites_addr1(struct ir3_instruction *instr) 11207ec681f3Smrg{ 11217ec681f3Smrg /* Note: only the first dest can write to a1.x */ 11227ec681f3Smrg if (instr->dsts_count > 0) { 11237ec681f3Smrg struct ir3_register *dst = instr->dsts[0]; 11247ec681f3Smrg return dst->num == regid(REG_A0, 1); 11257ec681f3Smrg } 11267ec681f3Smrg return false; 11277ec681f3Smrg} 11287ec681f3Smrg 11297ec681f3Smrgstatic inline bool 11307ec681f3Smrgwrites_pred(struct ir3_instruction *instr) 11317ec681f3Smrg{ 11327ec681f3Smrg /* Note: only the first dest can write to p0.x */ 11337ec681f3Smrg if (instr->dsts_count > 0) { 11347ec681f3Smrg struct ir3_register *dst = instr->dsts[0]; 11357ec681f3Smrg return reg_num(dst) == REG_P0; 11367ec681f3Smrg } 11377ec681f3Smrg return false; 11387ec681f3Smrg} 11397ec681f3Smrg 11407ec681f3Smrg/* Is it something other than a normal register. Shared regs, p0, and a0/a1 11417ec681f3Smrg * are considered special here. Special registers are always accessed with one 11427ec681f3Smrg * size and never alias normal registers, even though a naive calculation 11437ec681f3Smrg * would sometimes make it seem like e.g. r30.z aliases a0.x. 11447ec681f3Smrg */ 11457ec681f3Smrgstatic inline bool 11467ec681f3Smrgis_reg_special(const struct ir3_register *reg) 11477ec681f3Smrg{ 11487ec681f3Smrg return (reg->flags & IR3_REG_SHARED) || (reg_num(reg) == REG_A0) || 11497ec681f3Smrg (reg_num(reg) == REG_P0); 11507ec681f3Smrg} 11517ec681f3Smrg 11527ec681f3Smrg/* Same as above but in cases where we don't have a register. r48.x and above 11537ec681f3Smrg * are shared/special. 11547ec681f3Smrg */ 11557ec681f3Smrgstatic inline bool 11567ec681f3Smrgis_reg_num_special(unsigned num) 11577ec681f3Smrg{ 11587ec681f3Smrg return num >= 48 * 4; 11597e102996Smaya} 11607e102996Smaya 11617e102996Smaya/* returns defining instruction for reg */ 11627e102996Smaya/* TODO better name */ 11637ec681f3Smrgstatic inline struct ir3_instruction * 11647ec681f3Smrgssa(struct ir3_register *reg) 11657e102996Smaya{ 11667ec681f3Smrg if ((reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) && reg->def) 11677ec681f3Smrg return reg->def->instr; 11687ec681f3Smrg return NULL; 11697e102996Smaya} 11707e102996Smaya 11717ec681f3Smrgstatic inline bool 11727ec681f3Smrgconflicts(struct ir3_register *a, struct ir3_register *b) 11737ec681f3Smrg{ 11747ec681f3Smrg return (a && b) && (a->def != b->def); 11757ec681f3Smrg} 11767ec681f3Smrg 11777ec681f3Smrgstatic inline bool 11787ec681f3Smrgreg_gpr(struct ir3_register *r) 11797e102996Smaya{ 11807ec681f3Smrg if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) 11817ec681f3Smrg return false; 11827ec681f3Smrg if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) 11837ec681f3Smrg return false; 11847ec681f3Smrg return true; 11857e102996Smaya} 11867e102996Smaya 11877ec681f3Smrgstatic inline type_t 11887ec681f3Smrghalf_type(type_t type) 11897e102996Smaya{ 11907ec681f3Smrg switch (type) { 11917ec681f3Smrg case TYPE_F32: 11927ec681f3Smrg return TYPE_F16; 11937ec681f3Smrg case TYPE_U32: 11947ec681f3Smrg return TYPE_U16; 11957ec681f3Smrg case TYPE_S32: 11967ec681f3Smrg return TYPE_S16; 11977ec681f3Smrg case TYPE_F16: 11987ec681f3Smrg case TYPE_U16: 11997ec681f3Smrg case TYPE_S16: 12007ec681f3Smrg return type; 12017ec681f3Smrg default: 12027ec681f3Smrg assert(0); 12037ec681f3Smrg return ~0; 12047ec681f3Smrg } 12057e102996Smaya} 12067e102996Smaya 12077ec681f3Smrgstatic inline type_t 12087ec681f3Smrgfull_type(type_t type) 12097e102996Smaya{ 12107ec681f3Smrg switch (type) { 12117ec681f3Smrg case TYPE_F16: 12127ec681f3Smrg return TYPE_F32; 12137ec681f3Smrg case TYPE_U16: 12147ec681f3Smrg return TYPE_U32; 12157ec681f3Smrg case TYPE_S16: 12167ec681f3Smrg return TYPE_S32; 12177ec681f3Smrg case TYPE_F32: 12187ec681f3Smrg case TYPE_U32: 12197ec681f3Smrg case TYPE_S32: 12207ec681f3Smrg return type; 12217ec681f3Smrg default: 12227ec681f3Smrg assert(0); 12237ec681f3Smrg return ~0; 12247ec681f3Smrg } 12257e102996Smaya} 12267e102996Smaya 12277e102996Smaya/* some cat2 instructions (ie. those which are not float) can embed an 12287e102996Smaya * immediate: 12297e102996Smaya */ 12307ec681f3Smrgstatic inline bool 12317ec681f3Smrgir3_cat2_int(opc_t opc) 12327ec681f3Smrg{ 12337ec681f3Smrg switch (opc) { 12347ec681f3Smrg case OPC_ADD_U: 12357ec681f3Smrg case OPC_ADD_S: 12367ec681f3Smrg case OPC_SUB_U: 12377ec681f3Smrg case OPC_SUB_S: 12387ec681f3Smrg case OPC_CMPS_U: 12397ec681f3Smrg case OPC_CMPS_S: 12407ec681f3Smrg case OPC_MIN_U: 12417ec681f3Smrg case OPC_MIN_S: 12427ec681f3Smrg case OPC_MAX_U: 12437ec681f3Smrg case OPC_MAX_S: 12447ec681f3Smrg case OPC_CMPV_U: 12457ec681f3Smrg case OPC_CMPV_S: 12467ec681f3Smrg case OPC_MUL_U24: 12477ec681f3Smrg case OPC_MUL_S24: 12487ec681f3Smrg case OPC_MULL_U: 12497ec681f3Smrg case OPC_CLZ_S: 12507ec681f3Smrg case OPC_ABSNEG_S: 12517ec681f3Smrg case OPC_AND_B: 12527ec681f3Smrg case OPC_OR_B: 12537ec681f3Smrg case OPC_NOT_B: 12547ec681f3Smrg case OPC_XOR_B: 12557ec681f3Smrg case OPC_BFREV_B: 12567ec681f3Smrg case OPC_CLZ_B: 12577ec681f3Smrg case OPC_SHL_B: 12587ec681f3Smrg case OPC_SHR_B: 12597ec681f3Smrg case OPC_ASHR_B: 12607ec681f3Smrg case OPC_MGEN_B: 12617ec681f3Smrg case OPC_GETBIT_B: 12627ec681f3Smrg case OPC_CBITS_B: 12637ec681f3Smrg case OPC_BARY_F: 12647ec681f3Smrg return true; 12657ec681f3Smrg 12667ec681f3Smrg default: 12677ec681f3Smrg return false; 12687ec681f3Smrg } 12697e102996Smaya} 12707e102996Smaya 12717e102996Smaya/* map cat2 instruction to valid abs/neg flags: */ 12727ec681f3Smrgstatic inline unsigned 12737ec681f3Smrgir3_cat2_absneg(opc_t opc) 12747ec681f3Smrg{ 12757ec681f3Smrg switch (opc) { 12767ec681f3Smrg case OPC_ADD_F: 12777ec681f3Smrg case OPC_MIN_F: 12787ec681f3Smrg case OPC_MAX_F: 12797ec681f3Smrg case OPC_MUL_F: 12807ec681f3Smrg case OPC_SIGN_F: 12817ec681f3Smrg case OPC_CMPS_F: 12827ec681f3Smrg case OPC_ABSNEG_F: 12837ec681f3Smrg case OPC_CMPV_F: 12847ec681f3Smrg case OPC_FLOOR_F: 12857ec681f3Smrg case OPC_CEIL_F: 12867ec681f3Smrg case OPC_RNDNE_F: 12877ec681f3Smrg case OPC_RNDAZ_F: 12887ec681f3Smrg case OPC_TRUNC_F: 12897ec681f3Smrg case OPC_BARY_F: 12907ec681f3Smrg return IR3_REG_FABS | IR3_REG_FNEG; 12917ec681f3Smrg 12927ec681f3Smrg case OPC_ADD_U: 12937ec681f3Smrg case OPC_ADD_S: 12947ec681f3Smrg case OPC_SUB_U: 12957ec681f3Smrg case OPC_SUB_S: 12967ec681f3Smrg case OPC_CMPS_U: 12977ec681f3Smrg case OPC_CMPS_S: 12987ec681f3Smrg case OPC_MIN_U: 12997ec681f3Smrg case OPC_MIN_S: 13007ec681f3Smrg case OPC_MAX_U: 13017ec681f3Smrg case OPC_MAX_S: 13027ec681f3Smrg case OPC_CMPV_U: 13037ec681f3Smrg case OPC_CMPV_S: 13047ec681f3Smrg case OPC_MUL_U24: 13057ec681f3Smrg case OPC_MUL_S24: 13067ec681f3Smrg case OPC_MULL_U: 13077ec681f3Smrg case OPC_CLZ_S: 13087ec681f3Smrg return 0; 13097ec681f3Smrg 13107ec681f3Smrg case OPC_ABSNEG_S: 13117ec681f3Smrg return IR3_REG_SABS | IR3_REG_SNEG; 13127ec681f3Smrg 13137ec681f3Smrg case OPC_AND_B: 13147ec681f3Smrg case OPC_OR_B: 13157ec681f3Smrg case OPC_NOT_B: 13167ec681f3Smrg case OPC_XOR_B: 13177ec681f3Smrg case OPC_BFREV_B: 13187ec681f3Smrg case OPC_CLZ_B: 13197ec681f3Smrg case OPC_SHL_B: 13207ec681f3Smrg case OPC_SHR_B: 13217ec681f3Smrg case OPC_ASHR_B: 13227ec681f3Smrg case OPC_MGEN_B: 13237ec681f3Smrg case OPC_GETBIT_B: 13247ec681f3Smrg case OPC_CBITS_B: 13257ec681f3Smrg return IR3_REG_BNOT; 13267ec681f3Smrg 13277ec681f3Smrg default: 13287ec681f3Smrg return 0; 13297ec681f3Smrg } 13307e102996Smaya} 13317e102996Smaya 13327e102996Smaya/* map cat3 instructions to valid abs/neg flags: */ 13337ec681f3Smrgstatic inline unsigned 13347ec681f3Smrgir3_cat3_absneg(opc_t opc) 13357ec681f3Smrg{ 13367ec681f3Smrg switch (opc) { 13377ec681f3Smrg case OPC_MAD_F16: 13387ec681f3Smrg case OPC_MAD_F32: 13397ec681f3Smrg case OPC_SEL_F16: 13407ec681f3Smrg case OPC_SEL_F32: 13417ec681f3Smrg return IR3_REG_FNEG; 13427ec681f3Smrg 13437ec681f3Smrg case OPC_MAD_U16: 13447ec681f3Smrg case OPC_MADSH_U16: 13457ec681f3Smrg case OPC_MAD_S16: 13467ec681f3Smrg case OPC_MADSH_M16: 13477ec681f3Smrg case OPC_MAD_U24: 13487ec681f3Smrg case OPC_MAD_S24: 13497ec681f3Smrg case OPC_SEL_S16: 13507ec681f3Smrg case OPC_SEL_S32: 13517ec681f3Smrg case OPC_SAD_S16: 13527ec681f3Smrg case OPC_SAD_S32: 13537ec681f3Smrg /* neg *may* work on 3rd src.. */ 13547ec681f3Smrg 13557ec681f3Smrg case OPC_SEL_B16: 13567ec681f3Smrg case OPC_SEL_B32: 13577ec681f3Smrg 13587ec681f3Smrg case OPC_SHLG_B16: 13597ec681f3Smrg 13607ec681f3Smrg default: 13617ec681f3Smrg return 0; 13627ec681f3Smrg } 13637ec681f3Smrg} 13647ec681f3Smrg 13657ec681f3Smrg/* Return the type (float, int, or uint) the op uses when converting from the 13667ec681f3Smrg * internal result of the op (which is assumed to be the same size as the 13677ec681f3Smrg * sources) to the destination when they are not the same size. If F32 it does 13687ec681f3Smrg * a floating-point conversion, if U32 it does a truncation/zero-extension, if 13697ec681f3Smrg * S32 it does a truncation/sign-extension. "can_fold" will be false if it 13707ec681f3Smrg * doesn't do anything sensible or is unknown. 13717ec681f3Smrg */ 13727ec681f3Smrgstatic inline type_t 13737ec681f3Smrgir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold) 13747ec681f3Smrg{ 13757ec681f3Smrg *can_fold = true; 13767ec681f3Smrg switch (instr->opc) { 13777ec681f3Smrg case OPC_ADD_F: 13787ec681f3Smrg case OPC_MUL_F: 13797ec681f3Smrg case OPC_BARY_F: 13807ec681f3Smrg case OPC_MAD_F32: 13817ec681f3Smrg case OPC_MAD_F16: 13827ec681f3Smrg return TYPE_F32; 13837ec681f3Smrg 13847ec681f3Smrg case OPC_ADD_U: 13857ec681f3Smrg case OPC_SUB_U: 13867ec681f3Smrg case OPC_MIN_U: 13877ec681f3Smrg case OPC_MAX_U: 13887ec681f3Smrg case OPC_AND_B: 13897ec681f3Smrg case OPC_OR_B: 13907ec681f3Smrg case OPC_NOT_B: 13917ec681f3Smrg case OPC_XOR_B: 13927ec681f3Smrg case OPC_MUL_U24: 13937ec681f3Smrg case OPC_MULL_U: 13947ec681f3Smrg case OPC_SHL_B: 13957ec681f3Smrg case OPC_SHR_B: 13967ec681f3Smrg case OPC_ASHR_B: 13977ec681f3Smrg case OPC_MAD_U24: 13987ec681f3Smrg /* Comparison ops zero-extend/truncate their results, so consider them as 13997ec681f3Smrg * unsigned here. 14007ec681f3Smrg */ 14017ec681f3Smrg case OPC_CMPS_F: 14027ec681f3Smrg case OPC_CMPV_F: 14037ec681f3Smrg case OPC_CMPS_U: 14047ec681f3Smrg case OPC_CMPS_S: 14057ec681f3Smrg return TYPE_U32; 14067ec681f3Smrg 14077ec681f3Smrg case OPC_ADD_S: 14087ec681f3Smrg case OPC_SUB_S: 14097ec681f3Smrg case OPC_MIN_S: 14107ec681f3Smrg case OPC_MAX_S: 14117ec681f3Smrg case OPC_ABSNEG_S: 14127ec681f3Smrg case OPC_MUL_S24: 14137ec681f3Smrg case OPC_MAD_S24: 14147ec681f3Smrg return TYPE_S32; 14157ec681f3Smrg 14167ec681f3Smrg /* We assume that any move->move folding that could be done was done by 14177ec681f3Smrg * NIR. 14187ec681f3Smrg */ 14197ec681f3Smrg case OPC_MOV: 14207ec681f3Smrg default: 14217ec681f3Smrg *can_fold = false; 14227ec681f3Smrg return TYPE_U32; 14237ec681f3Smrg } 14247ec681f3Smrg} 14257ec681f3Smrg 14267ec681f3Smrg/* Return the src and dst types for the conversion which is already folded 14277ec681f3Smrg * into the op. We can assume that instr has folded in a conversion from 14287ec681f3Smrg * ir3_output_conv_src_type() to ir3_output_conv_dst_type(). Only makes sense 14297ec681f3Smrg * to call if ir3_output_conv_type() returns can_fold = true. 14307ec681f3Smrg */ 14317ec681f3Smrgstatic inline type_t 14327ec681f3Smrgir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type) 14337ec681f3Smrg{ 14347ec681f3Smrg switch (instr->opc) { 14357ec681f3Smrg case OPC_CMPS_F: 14367ec681f3Smrg case OPC_CMPV_F: 14377ec681f3Smrg case OPC_CMPS_U: 14387ec681f3Smrg case OPC_CMPS_S: 14397ec681f3Smrg /* Comparisons only return 0/1 and the size of the comparison sources 14407ec681f3Smrg * is irrelevant, never consider them as having an output conversion 14417ec681f3Smrg * by returning a type with the dest size here: 14427ec681f3Smrg */ 14437ec681f3Smrg return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type) 14447ec681f3Smrg : full_type(base_type); 14457ec681f3Smrg 14467ec681f3Smrg case OPC_BARY_F: 14477ec681f3Smrg /* bary.f doesn't have an explicit source, but we can assume here that 14487ec681f3Smrg * the varying data it reads is in fp32. 14497ec681f3Smrg * 14507ec681f3Smrg * This may be fp16 on older gen's depending on some register 14517ec681f3Smrg * settings, but it's probably not worth plumbing that through for a 14527ec681f3Smrg * small improvement that NIR would hopefully handle for us anyway. 14537ec681f3Smrg */ 14547ec681f3Smrg return TYPE_F32; 14557ec681f3Smrg 14567ec681f3Smrg default: 14577ec681f3Smrg return (instr->srcs[0]->flags & IR3_REG_HALF) ? half_type(base_type) 14587ec681f3Smrg : full_type(base_type); 14597ec681f3Smrg } 14607ec681f3Smrg} 14617ec681f3Smrg 14627ec681f3Smrgstatic inline type_t 14637ec681f3Smrgir3_output_conv_dst_type(struct ir3_instruction *instr, type_t base_type) 14647ec681f3Smrg{ 14657ec681f3Smrg return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type) 14667ec681f3Smrg : full_type(base_type); 14677ec681f3Smrg} 14687ec681f3Smrg 14697ec681f3Smrg/* Some instructions have signed/unsigned variants which are identical except 14707ec681f3Smrg * for whether the folded conversion sign-extends or zero-extends, and we can 14717ec681f3Smrg * fold in a mismatching move by rewriting the opcode. Return the opcode to 14727ec681f3Smrg * switch signedness, and whether one exists. 14737ec681f3Smrg */ 14747ec681f3Smrgstatic inline opc_t 14757ec681f3Smrgir3_try_swap_signedness(opc_t opc, bool *can_swap) 14767ec681f3Smrg{ 14777ec681f3Smrg switch (opc) { 14787ec681f3Smrg#define PAIR(u, s) \ 14797ec681f3Smrg case OPC_##u: \ 14807ec681f3Smrg return OPC_##s; \ 14817ec681f3Smrg case OPC_##s: \ 14827ec681f3Smrg return OPC_##u; 14837ec681f3Smrg PAIR(ADD_U, ADD_S) 14847ec681f3Smrg PAIR(SUB_U, SUB_S) 14857ec681f3Smrg /* Note: these are only identical when the sources are half, but that's 14867ec681f3Smrg * the only case we call this function for anyway. 14877ec681f3Smrg */ 14887ec681f3Smrg PAIR(MUL_U24, MUL_S24) 14897ec681f3Smrg 14907ec681f3Smrg default: 14917ec681f3Smrg *can_swap = false; 14927ec681f3Smrg return opc; 14937ec681f3Smrg } 14947e102996Smaya} 14957e102996Smaya 14967e102996Smaya#define MASK(n) ((1 << (n)) - 1) 14977e102996Smaya 14987e102996Smaya/* iterator for an instructions's sources (reg), also returns src #: */ 14997ec681f3Smrg#define foreach_src_n(__srcreg, __n, __instr) \ 15007ec681f3Smrg if ((__instr)->srcs_count) \ 15017ec681f3Smrg for (struct ir3_register *__srcreg = (void *)~0; __srcreg; \ 15027ec681f3Smrg __srcreg = NULL) \ 15037ec681f3Smrg for (unsigned __cnt = (__instr)->srcs_count, __n = 0; __n < __cnt; \ 15047ec681f3Smrg __n++) \ 15057ec681f3Smrg if ((__srcreg = (__instr)->srcs[__n])) 15067e102996Smaya 15077e102996Smaya/* iterator for an instructions's sources (reg): */ 15087ec681f3Smrg#define foreach_src(__srcreg, __instr) foreach_src_n (__srcreg, __i, __instr) 15097ec681f3Smrg 15107ec681f3Smrg/* iterator for an instructions's destinations (reg), also returns dst #: */ 15117ec681f3Smrg#define foreach_dst_n(__dstreg, __n, __instr) \ 15127ec681f3Smrg if ((__instr)->dsts_count) \ 15137ec681f3Smrg for (struct ir3_register *__dstreg = (void *)~0; __dstreg; \ 15147ec681f3Smrg __dstreg = NULL) \ 15157ec681f3Smrg for (unsigned __cnt = (__instr)->dsts_count, __n = 0; __n < __cnt; \ 15167ec681f3Smrg __n++) \ 15177ec681f3Smrg if ((__dstreg = (__instr)->dsts[__n])) 15187ec681f3Smrg 15197ec681f3Smrg/* iterator for an instructions's destinations (reg): */ 15207ec681f3Smrg#define foreach_dst(__dstreg, __instr) foreach_dst_n (__dstreg, __i, __instr) 15217ec681f3Smrg 15227ec681f3Smrgstatic inline unsigned 15237ec681f3Smrg__ssa_src_cnt(struct ir3_instruction *instr) 15247e102996Smaya{ 15257ec681f3Smrg return instr->srcs_count + instr->deps_count; 15267e102996Smaya} 15277e102996Smaya 15287ec681f3Smrgstatic inline bool 15297ec681f3Smrg__is_false_dep(struct ir3_instruction *instr, unsigned n) 15307e102996Smaya{ 15317ec681f3Smrg if (n >= instr->srcs_count) 15327ec681f3Smrg return true; 15337ec681f3Smrg return false; 15347e102996Smaya} 15357e102996Smaya 15367ec681f3Smrgstatic inline struct ir3_instruction ** 15377ec681f3Smrg__ssa_srcp_n(struct ir3_instruction *instr, unsigned n) 15387e102996Smaya{ 15397ec681f3Smrg if (__is_false_dep(instr, n)) 15407ec681f3Smrg return &instr->deps[n - instr->srcs_count]; 15417ec681f3Smrg if (ssa(instr->srcs[n])) 15427ec681f3Smrg return &instr->srcs[n]->def->instr; 15437ec681f3Smrg return NULL; 15447e102996Smaya} 15457e102996Smaya 15467ec681f3Smrg#define foreach_ssa_srcp_n(__srcp, __n, __instr) \ 15477ec681f3Smrg for (struct ir3_instruction **__srcp = (void *)~0; __srcp; __srcp = NULL) \ 15487ec681f3Smrg for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; \ 15497ec681f3Smrg __n++) \ 15507ec681f3Smrg if ((__srcp = __ssa_srcp_n(__instr, __n))) 15517ec681f3Smrg 15527ec681f3Smrg#define foreach_ssa_srcp(__srcp, __instr) \ 15537ec681f3Smrg foreach_ssa_srcp_n (__srcp, __i, __instr) 15547e102996Smaya 15557e102996Smaya/* iterator for an instruction's SSA sources (instr), also returns src #: */ 15567ec681f3Smrg#define foreach_ssa_src_n(__srcinst, __n, __instr) \ 15577ec681f3Smrg for (struct ir3_instruction *__srcinst = (void *)~0; __srcinst; \ 15587ec681f3Smrg __srcinst = NULL) \ 15597ec681f3Smrg foreach_ssa_srcp_n (__srcp, __n, __instr) \ 15607ec681f3Smrg if ((__srcinst = *__srcp)) 15617e102996Smaya 15627e102996Smaya/* iterator for an instruction's SSA sources (instr): */ 15637ec681f3Smrg#define foreach_ssa_src(__srcinst, __instr) \ 15647ec681f3Smrg foreach_ssa_src_n (__srcinst, __i, __instr) 15657ec681f3Smrg 15667ec681f3Smrg/* iterators for shader inputs: */ 15677ec681f3Smrg#define foreach_input_n(__ininstr, __cnt, __ir) \ 15687ec681f3Smrg for (struct ir3_instruction *__ininstr = (void *)~0; __ininstr; \ 15697ec681f3Smrg __ininstr = NULL) \ 15707ec681f3Smrg for (unsigned __cnt = 0; __cnt < (__ir)->inputs_count; __cnt++) \ 15717ec681f3Smrg if ((__ininstr = (__ir)->inputs[__cnt])) 15727ec681f3Smrg#define foreach_input(__ininstr, __ir) foreach_input_n (__ininstr, __i, __ir) 15737ec681f3Smrg 15747ec681f3Smrg/* iterators for instructions: */ 15757ec681f3Smrg#define foreach_instr(__instr, __list) \ 15767ec681f3Smrg list_for_each_entry (struct ir3_instruction, __instr, __list, node) 15777ec681f3Smrg#define foreach_instr_rev(__instr, __list) \ 15787ec681f3Smrg list_for_each_entry_rev (struct ir3_instruction, __instr, __list, node) 15797ec681f3Smrg#define foreach_instr_safe(__instr, __list) \ 15807ec681f3Smrg list_for_each_entry_safe (struct ir3_instruction, __instr, __list, node) 15817ec681f3Smrg#define foreach_instr_from_safe(__instr, __start, __list) \ 15827ec681f3Smrg list_for_each_entry_from_safe(struct ir3_instruction, __instr, __start, \ 15837ec681f3Smrg __list, node) 15847ec681f3Smrg 15857ec681f3Smrg/* iterators for blocks: */ 15867ec681f3Smrg#define foreach_block(__block, __list) \ 15877ec681f3Smrg list_for_each_entry (struct ir3_block, __block, __list, node) 15887ec681f3Smrg#define foreach_block_safe(__block, __list) \ 15897ec681f3Smrg list_for_each_entry_safe (struct ir3_block, __block, __list, node) 15907ec681f3Smrg#define foreach_block_rev(__block, __list) \ 15917ec681f3Smrg list_for_each_entry_rev (struct ir3_block, __block, __list, node) 15927ec681f3Smrg 15937ec681f3Smrg/* iterators for arrays: */ 15947ec681f3Smrg#define foreach_array(__array, __list) \ 15957ec681f3Smrg list_for_each_entry (struct ir3_array, __array, __list, node) 15967ec681f3Smrg#define foreach_array_safe(__array, __list) \ 15977ec681f3Smrg list_for_each_entry_safe (struct ir3_array, __array, __list, node) 15987ec681f3Smrg 15997ec681f3Smrg#define IR3_PASS(ir, pass, ...) \ 16007ec681f3Smrg ({ \ 16017ec681f3Smrg bool progress = pass(ir, ##__VA_ARGS__); \ 16027ec681f3Smrg if (progress) { \ 16037ec681f3Smrg ir3_debug_print(ir, "AFTER: " #pass); \ 16047ec681f3Smrg ir3_validate(ir); \ 16057ec681f3Smrg } \ 16067ec681f3Smrg progress; \ 16077ec681f3Smrg }) 16087ec681f3Smrg 16097ec681f3Smrg/* validate: */ 16107ec681f3Smrgvoid ir3_validate(struct ir3 *ir); 16117e102996Smaya 16127e102996Smaya/* dump: */ 16137e102996Smayavoid ir3_print(struct ir3 *ir); 16147e102996Smayavoid ir3_print_instr(struct ir3_instruction *instr); 16157e102996Smaya 16167ec681f3Smrgstruct log_stream; 16177ec681f3Smrgvoid ir3_print_instr_stream(struct log_stream *stream, struct ir3_instruction *instr); 16187ec681f3Smrg 16197ec681f3Smrg/* delay calculation: */ 16207e102996Smayaint ir3_delayslots(struct ir3_instruction *assigner, 16217ec681f3Smrg struct ir3_instruction *consumer, unsigned n, bool soft); 16227ec681f3Smrgunsigned ir3_delay_calc_prera(struct ir3_block *block, 16237ec681f3Smrg struct ir3_instruction *instr); 16247ec681f3Smrgunsigned ir3_delay_calc_postra(struct ir3_block *block, 16257ec681f3Smrg struct ir3_instruction *instr, bool soft, 16267ec681f3Smrg bool mergedregs); 16277ec681f3Smrgunsigned ir3_delay_calc_exact(struct ir3_block *block, 16287ec681f3Smrg struct ir3_instruction *instr, bool mergedregs); 16297ec681f3Smrgvoid ir3_remove_nops(struct ir3 *ir); 16307ec681f3Smrg 16317ec681f3Smrg/* unreachable block elimination: */ 16327ec681f3Smrgbool ir3_remove_unreachable(struct ir3 *ir); 16337ec681f3Smrg 16347ec681f3Smrg/* dead code elimination: */ 16357ec681f3Smrgstruct ir3_shader_variant; 16367ec681f3Smrgbool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so); 16377ec681f3Smrg 16387ec681f3Smrg/* fp16 conversion folding */ 16397ec681f3Smrgbool ir3_cf(struct ir3 *ir); 16407e102996Smaya 16417e102996Smaya/* copy-propagate: */ 16427ec681f3Smrgbool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); 16437ec681f3Smrgbool ir3_cp_postsched(struct ir3 *ir); 16447e102996Smaya 16457ec681f3Smrg/* common subexpression elimination: */ 16467ec681f3Smrgbool ir3_cse(struct ir3 *ir); 16477e102996Smaya 16487ec681f3Smrg/* Make arrays SSA */ 16497ec681f3Smrgbool ir3_array_to_ssa(struct ir3 *ir); 16507e102996Smaya 16517e102996Smaya/* scheduling: */ 16527ec681f3Smrgbool ir3_sched_add_deps(struct ir3 *ir); 16537e102996Smayaint ir3_sched(struct ir3 *ir); 16547e102996Smaya 16557ec681f3Smrgstruct ir3_context; 16567ec681f3Smrgbool ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v); 16577e102996Smaya 16587e102996Smaya/* register assignment: */ 16597ec681f3Smrgint ir3_ra(struct ir3_shader_variant *v); 16607ec681f3Smrg 16617ec681f3Smrg/* lower subgroup ops: */ 16627ec681f3Smrgbool ir3_lower_subgroups(struct ir3 *ir); 16637e102996Smaya 16647e102996Smaya/* legalize: */ 16657ec681f3Smrgbool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary); 16667ec681f3Smrg 16677ec681f3Smrgstatic inline bool 16687ec681f3Smrgir3_has_latency_to_hide(struct ir3 *ir) 16697ec681f3Smrg{ 16707ec681f3Smrg /* VS/GS/TCS/TESS co-exist with frag shader invocations, but we don't 16717ec681f3Smrg * know the nature of the fragment shader. Just assume it will have 16727ec681f3Smrg * latency to hide: 16737ec681f3Smrg */ 16747ec681f3Smrg if (ir->type != MESA_SHADER_FRAGMENT) 16757ec681f3Smrg return true; 16767ec681f3Smrg 16777ec681f3Smrg foreach_block (block, &ir->block_list) { 16787ec681f3Smrg foreach_instr (instr, &block->instr_list) { 16797ec681f3Smrg if (is_tex_or_prefetch(instr)) 16807ec681f3Smrg return true; 16817ec681f3Smrg 16827ec681f3Smrg if (is_load(instr)) { 16837ec681f3Smrg switch (instr->opc) { 16847ec681f3Smrg case OPC_LDLV: 16857ec681f3Smrg case OPC_LDL: 16867ec681f3Smrg case OPC_LDLW: 16877ec681f3Smrg break; 16887ec681f3Smrg default: 16897ec681f3Smrg return true; 16907ec681f3Smrg } 16917ec681f3Smrg } 16927ec681f3Smrg } 16937ec681f3Smrg } 16947ec681f3Smrg 16957ec681f3Smrg return false; 16967ec681f3Smrg} 16977e102996Smaya 16987e102996Smaya/* ************************************************************************* */ 16997e102996Smaya/* instruction helpers */ 17007e102996Smaya 17017ec681f3Smrg/* creates SSA src of correct type (ie. half vs full precision) */ 17027ec681f3Smrgstatic inline struct ir3_register * 17037ec681f3Smrg__ssa_src(struct ir3_instruction *instr, struct ir3_instruction *src, 17047ec681f3Smrg unsigned flags) 17057ec681f3Smrg{ 17067ec681f3Smrg struct ir3_register *reg; 17077ec681f3Smrg if (src->dsts[0]->flags & IR3_REG_HALF) 17087ec681f3Smrg flags |= IR3_REG_HALF; 17097ec681f3Smrg reg = ir3_src_create(instr, INVALID_REG, IR3_REG_SSA | flags); 17107ec681f3Smrg reg->def = src->dsts[0]; 17117ec681f3Smrg reg->wrmask = src->dsts[0]->wrmask; 17127ec681f3Smrg return reg; 17137ec681f3Smrg} 17147ec681f3Smrg 17157ec681f3Smrgstatic inline struct ir3_register * 17167ec681f3Smrg__ssa_dst(struct ir3_instruction *instr) 17177ec681f3Smrg{ 17187ec681f3Smrg struct ir3_register *reg = ir3_dst_create(instr, INVALID_REG, IR3_REG_SSA); 17197ec681f3Smrg reg->instr = instr; 17207ec681f3Smrg return reg; 17217ec681f3Smrg} 17227ec681f3Smrg 17237e102996Smayastatic inline struct ir3_instruction * 17247e102996Smayacreate_immed_typed(struct ir3_block *block, uint32_t val, type_t type) 17257e102996Smaya{ 17267ec681f3Smrg struct ir3_instruction *mov; 17277ec681f3Smrg unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; 17287e102996Smaya 17297ec681f3Smrg mov = ir3_instr_create(block, OPC_MOV, 1, 1); 17307ec681f3Smrg mov->cat1.src_type = type; 17317ec681f3Smrg mov->cat1.dst_type = type; 17327ec681f3Smrg __ssa_dst(mov)->flags |= flags; 17337ec681f3Smrg ir3_src_create(mov, 0, IR3_REG_IMMED | flags)->uim_val = val; 17347e102996Smaya 17357ec681f3Smrg return mov; 17367e102996Smaya} 17377e102996Smaya 17387e102996Smayastatic inline struct ir3_instruction * 17397e102996Smayacreate_immed(struct ir3_block *block, uint32_t val) 17407e102996Smaya{ 17417ec681f3Smrg return create_immed_typed(block, val, TYPE_U32); 17427e102996Smaya} 17437e102996Smaya 17447e102996Smayastatic inline struct ir3_instruction * 17457ec681f3Smrgcreate_uniform_typed(struct ir3_block *block, unsigned n, type_t type) 17467e102996Smaya{ 17477ec681f3Smrg struct ir3_instruction *mov; 17487ec681f3Smrg unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; 17497e102996Smaya 17507ec681f3Smrg mov = ir3_instr_create(block, OPC_MOV, 1, 1); 17517ec681f3Smrg mov->cat1.src_type = type; 17527ec681f3Smrg mov->cat1.dst_type = type; 17537ec681f3Smrg __ssa_dst(mov)->flags |= flags; 17547ec681f3Smrg ir3_src_create(mov, n, IR3_REG_CONST | flags); 17557e102996Smaya 17567ec681f3Smrg return mov; 17577e102996Smaya} 17587e102996Smaya 17597e102996Smayastatic inline struct ir3_instruction * 17607ec681f3Smrgcreate_uniform(struct ir3_block *block, unsigned n) 17617e102996Smaya{ 17627ec681f3Smrg return create_uniform_typed(block, n, TYPE_F32); 17637ec681f3Smrg} 17647e102996Smaya 17657ec681f3Smrgstatic inline struct ir3_instruction * 17667ec681f3Smrgcreate_uniform_indirect(struct ir3_block *block, int n, type_t type, 17677ec681f3Smrg struct ir3_instruction *address) 17687ec681f3Smrg{ 17697ec681f3Smrg struct ir3_instruction *mov; 17707e102996Smaya 17717ec681f3Smrg mov = ir3_instr_create(block, OPC_MOV, 1, 1); 17727ec681f3Smrg mov->cat1.src_type = type; 17737ec681f3Smrg mov->cat1.dst_type = type; 17747ec681f3Smrg __ssa_dst(mov); 17757ec681f3Smrg ir3_src_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n; 17767e102996Smaya 17777ec681f3Smrg ir3_instr_set_address(mov, address); 17787ec681f3Smrg 17797ec681f3Smrg return mov; 17807e102996Smaya} 17817e102996Smaya 17827ec681f3Smrgstatic inline struct ir3_instruction * 17837ec681f3Smrgir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) 17847e102996Smaya{ 17857ec681f3Smrg struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1); 17867ec681f3Smrg unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; 17877ec681f3Smrg 17887ec681f3Smrg __ssa_dst(instr)->flags |= flags; 17897ec681f3Smrg if (src->dsts[0]->flags & IR3_REG_ARRAY) { 17907ec681f3Smrg struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY); 17917ec681f3Smrg src_reg->array = src->dsts[0]->array; 17927ec681f3Smrg } else { 17937ec681f3Smrg __ssa_src(instr, src, src->dsts[0]->flags & IR3_REG_SHARED); 17947ec681f3Smrg } 17957ec681f3Smrg debug_assert(!(src->dsts[0]->flags & IR3_REG_RELATIV)); 17967ec681f3Smrg instr->cat1.src_type = type; 17977ec681f3Smrg instr->cat1.dst_type = type; 17987ec681f3Smrg return instr; 17997e102996Smaya} 18007e102996Smaya 18017e102996Smayastatic inline struct ir3_instruction * 18027ec681f3Smrgir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type, 18037ec681f3Smrg type_t dst_type) 18047e102996Smaya{ 18057ec681f3Smrg struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1); 18067ec681f3Smrg unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0; 18077ec681f3Smrg unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0; 18087ec681f3Smrg 18097ec681f3Smrg debug_assert((src->dsts[0]->flags & IR3_REG_HALF) == src_flags); 18107ec681f3Smrg 18117ec681f3Smrg __ssa_dst(instr)->flags |= dst_flags; 18127ec681f3Smrg __ssa_src(instr, src, 0); 18137ec681f3Smrg instr->cat1.src_type = src_type; 18147ec681f3Smrg instr->cat1.dst_type = dst_type; 18157ec681f3Smrg debug_assert(!(src->dsts[0]->flags & IR3_REG_ARRAY)); 18167ec681f3Smrg return instr; 18177ec681f3Smrg} 18187ec681f3Smrg 18197ec681f3Smrgstatic inline struct ir3_instruction * 18207ec681f3Smrgir3_MOVMSK(struct ir3_block *block, unsigned components) 18217ec681f3Smrg{ 18227ec681f3Smrg struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOVMSK, 1, 0); 18237ec681f3Smrg 18247ec681f3Smrg struct ir3_register *dst = __ssa_dst(instr); 18257ec681f3Smrg dst->flags |= IR3_REG_SHARED; 18267ec681f3Smrg dst->wrmask = (1 << components) - 1; 18277ec681f3Smrg instr->repeat = components - 1; 18287ec681f3Smrg return instr; 18297e102996Smaya} 18307e102996Smaya 18317e102996Smayastatic inline struct ir3_instruction * 18327ec681f3Smrgir3_BALLOT_MACRO(struct ir3_block *block, struct ir3_instruction *src, 18337ec681f3Smrg unsigned components) 18347e102996Smaya{ 18357ec681f3Smrg struct ir3_instruction *instr = 18367ec681f3Smrg ir3_instr_create(block, OPC_BALLOT_MACRO, 1, 1); 18377e102996Smaya 18387ec681f3Smrg struct ir3_register *dst = __ssa_dst(instr); 18397ec681f3Smrg dst->flags |= IR3_REG_SHARED; 18407ec681f3Smrg dst->wrmask = (1 << components) - 1; 18417e102996Smaya 18427ec681f3Smrg __ssa_src(instr, src, 0); 18437ec681f3Smrg 18447ec681f3Smrg return instr; 18457e102996Smaya} 18467e102996Smaya 18477e102996Smayastatic inline struct ir3_instruction * 18487e102996Smayair3_NOP(struct ir3_block *block) 18497e102996Smaya{ 18507ec681f3Smrg return ir3_instr_create(block, OPC_NOP, 0, 0); 18517e102996Smaya} 18527e102996Smaya 18537e102996Smaya#define IR3_INSTR_0 0 18547e102996Smaya 18557ec681f3Smrg/* clang-format off */ 18567ec681f3Smrg#define __INSTR0(flag, name, opc) \ 18577ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(struct ir3_block *block) \ 18587ec681f3Smrg{ \ 18597ec681f3Smrg struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 0); \ 18607ec681f3Smrg instr->flags |= flag; \ 18617ec681f3Smrg return instr; \ 18627ec681f3Smrg} 18637ec681f3Smrg/* clang-format on */ 18647ec681f3Smrg#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name) 18657ec681f3Smrg#define INSTR0(name) __INSTR0(0, name, OPC_##name) 18667ec681f3Smrg 18677ec681f3Smrg/* clang-format off */ 18687ec681f3Smrg#define __INSTR1(flag, dst_count, name, opc) \ 18697ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name( \ 18707ec681f3Smrg struct ir3_block *block, struct ir3_instruction *a, unsigned aflags) \ 18717ec681f3Smrg{ \ 18727ec681f3Smrg struct ir3_instruction *instr = \ 18737ec681f3Smrg ir3_instr_create(block, opc, dst_count, 1); \ 18747ec681f3Smrg for (unsigned i = 0; i < dst_count; i++) \ 18757ec681f3Smrg __ssa_dst(instr); \ 18767ec681f3Smrg __ssa_src(instr, a, aflags); \ 18777ec681f3Smrg instr->flags |= flag; \ 18787ec681f3Smrg return instr; \ 18797ec681f3Smrg} 18807ec681f3Smrg/* clang-format on */ 18817ec681f3Smrg#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 18827ec681f3Smrg#define INSTR1(name) __INSTR1(0, 1, name, OPC_##name) 18837ec681f3Smrg#define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name) 18847ec681f3Smrg 18857ec681f3Smrg/* clang-format off */ 18867ec681f3Smrg#define __INSTR2(flag, name, opc) \ 18877ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name( \ 18887ec681f3Smrg struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 18897ec681f3Smrg struct ir3_instruction *b, unsigned bflags) \ 18907ec681f3Smrg{ \ 18917ec681f3Smrg struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2); \ 18927ec681f3Smrg __ssa_dst(instr); \ 18937ec681f3Smrg __ssa_src(instr, a, aflags); \ 18947ec681f3Smrg __ssa_src(instr, b, bflags); \ 18957ec681f3Smrg instr->flags |= flag; \ 18967ec681f3Smrg return instr; \ 18977ec681f3Smrg} 18987ec681f3Smrg/* clang-format on */ 18997ec681f3Smrg#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name) 19007ec681f3Smrg#define INSTR2(name) __INSTR2(0, name, OPC_##name) 19017ec681f3Smrg 19027ec681f3Smrg/* clang-format off */ 19037ec681f3Smrg#define __INSTR3(flag, dst_count, name, opc) \ 19047ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name( \ 19057ec681f3Smrg struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 19067ec681f3Smrg struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 19077ec681f3Smrg unsigned cflags) \ 19087ec681f3Smrg{ \ 19097ec681f3Smrg struct ir3_instruction *instr = \ 19107ec681f3Smrg ir3_instr_create(block, opc, dst_count, 3); \ 19117ec681f3Smrg for (unsigned i = 0; i < dst_count; i++) \ 19127ec681f3Smrg __ssa_dst(instr); \ 19137ec681f3Smrg __ssa_src(instr, a, aflags); \ 19147ec681f3Smrg __ssa_src(instr, b, bflags); \ 19157ec681f3Smrg __ssa_src(instr, c, cflags); \ 19167ec681f3Smrg instr->flags |= flag; \ 19177ec681f3Smrg return instr; \ 19187ec681f3Smrg} 19197ec681f3Smrg/* clang-format on */ 19207ec681f3Smrg#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 19217ec681f3Smrg#define INSTR3(name) __INSTR3(0, 1, name, OPC_##name) 19227ec681f3Smrg#define INSTR3NODST(name) __INSTR3(0, 0, name, OPC_##name) 19237ec681f3Smrg 19247ec681f3Smrg/* clang-format off */ 19257ec681f3Smrg#define __INSTR4(flag, dst_count, name, opc) \ 19267ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name( \ 19277ec681f3Smrg struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 19287ec681f3Smrg struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 19297ec681f3Smrg unsigned cflags, struct ir3_instruction *d, unsigned dflags) \ 19307ec681f3Smrg{ \ 19317ec681f3Smrg struct ir3_instruction *instr = \ 19327ec681f3Smrg ir3_instr_create(block, opc, dst_count, 4); \ 19337ec681f3Smrg for (unsigned i = 0; i < dst_count; i++) \ 19347ec681f3Smrg __ssa_dst(instr); \ 19357ec681f3Smrg __ssa_src(instr, a, aflags); \ 19367ec681f3Smrg __ssa_src(instr, b, bflags); \ 19377ec681f3Smrg __ssa_src(instr, c, cflags); \ 19387ec681f3Smrg __ssa_src(instr, d, dflags); \ 19397ec681f3Smrg instr->flags |= flag; \ 19407ec681f3Smrg return instr; \ 19417ec681f3Smrg} 19427ec681f3Smrg/* clang-format on */ 19437ec681f3Smrg#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 19447ec681f3Smrg#define INSTR4(name) __INSTR4(0, 1, name, OPC_##name) 19457ec681f3Smrg#define INSTR4NODST(name) __INSTR4(0, 0, name, OPC_##name) 19467ec681f3Smrg 19477ec681f3Smrg/* clang-format off */ 19487ec681f3Smrg#define __INSTR5(flag, name, opc) \ 19497ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name( \ 19507ec681f3Smrg struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 19517ec681f3Smrg struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 19527ec681f3Smrg unsigned cflags, struct ir3_instruction *d, unsigned dflags, \ 19537ec681f3Smrg struct ir3_instruction *e, unsigned eflags) \ 19547ec681f3Smrg{ \ 19557ec681f3Smrg struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 5); \ 19567ec681f3Smrg __ssa_dst(instr); \ 19577ec681f3Smrg __ssa_src(instr, a, aflags); \ 19587ec681f3Smrg __ssa_src(instr, b, bflags); \ 19597ec681f3Smrg __ssa_src(instr, c, cflags); \ 19607ec681f3Smrg __ssa_src(instr, d, dflags); \ 19617ec681f3Smrg __ssa_src(instr, e, eflags); \ 19627ec681f3Smrg instr->flags |= flag; \ 19637ec681f3Smrg return instr; \ 19647ec681f3Smrg} 19657ec681f3Smrg/* clang-format on */ 19667ec681f3Smrg#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name) 19677ec681f3Smrg#define INSTR5(name) __INSTR5(0, name, OPC_##name) 19687ec681f3Smrg 19697ec681f3Smrg/* clang-format off */ 19707ec681f3Smrg#define __INSTR6(flag, dst_count, name, opc) \ 19717ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name( \ 19727ec681f3Smrg struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 19737ec681f3Smrg struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 19747ec681f3Smrg unsigned cflags, struct ir3_instruction *d, unsigned dflags, \ 19757ec681f3Smrg struct ir3_instruction *e, unsigned eflags, struct ir3_instruction *f, \ 19767ec681f3Smrg unsigned fflags) \ 19777ec681f3Smrg{ \ 19787ec681f3Smrg struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 6); \ 19797ec681f3Smrg for (unsigned i = 0; i < dst_count; i++) \ 19807ec681f3Smrg __ssa_dst(instr); \ 19817ec681f3Smrg __ssa_src(instr, a, aflags); \ 19827ec681f3Smrg __ssa_src(instr, b, bflags); \ 19837ec681f3Smrg __ssa_src(instr, c, cflags); \ 19847ec681f3Smrg __ssa_src(instr, d, dflags); \ 19857ec681f3Smrg __ssa_src(instr, e, eflags); \ 19867ec681f3Smrg __ssa_src(instr, f, fflags); \ 19877ec681f3Smrg instr->flags |= flag; \ 19887ec681f3Smrg return instr; \ 19897ec681f3Smrg} 19907ec681f3Smrg/* clang-format on */ 19917ec681f3Smrg#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 19927ec681f3Smrg#define INSTR6(name) __INSTR6(0, 1, name, OPC_##name) 19937ec681f3Smrg#define INSTR6NODST(name) __INSTR6(0, 0, name, OPC_##name) 19947e102996Smaya 19957e102996Smaya/* cat0 instructions: */ 19967ec681f3SmrgINSTR1NODST(B) 19977e102996SmayaINSTR0(JUMP) 19987ec681f3SmrgINSTR1NODST(KILL) 19997ec681f3SmrgINSTR1NODST(DEMOTE) 20007e102996SmayaINSTR0(END) 20017ec681f3SmrgINSTR0(CHSH) 20027ec681f3SmrgINSTR0(CHMASK) 20037ec681f3SmrgINSTR1NODST(PREDT) 20047ec681f3SmrgINSTR0(PREDF) 20057ec681f3SmrgINSTR0(PREDE) 20067ec681f3SmrgINSTR0(GETONE) 20077ec681f3Smrg 20087ec681f3Smrg/* cat1 macros */ 20097ec681f3SmrgINSTR1(ANY_MACRO) 20107ec681f3SmrgINSTR1(ALL_MACRO) 20117ec681f3SmrgINSTR1(READ_FIRST_MACRO) 20127ec681f3SmrgINSTR2(READ_COND_MACRO) 20137ec681f3Smrg 20147ec681f3Smrgstatic inline struct ir3_instruction * 20157ec681f3Smrgir3_ELECT_MACRO(struct ir3_block *block) 20167ec681f3Smrg{ 20177ec681f3Smrg struct ir3_instruction *instr = 20187ec681f3Smrg ir3_instr_create(block, OPC_ELECT_MACRO, 1, 0); 20197ec681f3Smrg __ssa_dst(instr); 20207ec681f3Smrg return instr; 20217ec681f3Smrg} 20227e102996Smaya 20237e102996Smaya/* cat2 instructions, most 2 src but some 1 src: */ 20247e102996SmayaINSTR2(ADD_F) 20257e102996SmayaINSTR2(MIN_F) 20267e102996SmayaINSTR2(MAX_F) 20277e102996SmayaINSTR2(MUL_F) 20287e102996SmayaINSTR1(SIGN_F) 20297e102996SmayaINSTR2(CMPS_F) 20307e102996SmayaINSTR1(ABSNEG_F) 20317e102996SmayaINSTR2(CMPV_F) 20327e102996SmayaINSTR1(FLOOR_F) 20337e102996SmayaINSTR1(CEIL_F) 20347e102996SmayaINSTR1(RNDNE_F) 20357e102996SmayaINSTR1(RNDAZ_F) 20367e102996SmayaINSTR1(TRUNC_F) 20377e102996SmayaINSTR2(ADD_U) 20387e102996SmayaINSTR2(ADD_S) 20397e102996SmayaINSTR2(SUB_U) 20407e102996SmayaINSTR2(SUB_S) 20417e102996SmayaINSTR2(CMPS_U) 20427e102996SmayaINSTR2(CMPS_S) 20437e102996SmayaINSTR2(MIN_U) 20447e102996SmayaINSTR2(MIN_S) 20457e102996SmayaINSTR2(MAX_U) 20467e102996SmayaINSTR2(MAX_S) 20477e102996SmayaINSTR1(ABSNEG_S) 20487e102996SmayaINSTR2(AND_B) 20497e102996SmayaINSTR2(OR_B) 20507e102996SmayaINSTR1(NOT_B) 20517e102996SmayaINSTR2(XOR_B) 20527e102996SmayaINSTR2(CMPV_U) 20537e102996SmayaINSTR2(CMPV_S) 20547ec681f3SmrgINSTR2(MUL_U24) 20557ec681f3SmrgINSTR2(MUL_S24) 20567e102996SmayaINSTR2(MULL_U) 20577e102996SmayaINSTR1(BFREV_B) 20587e102996SmayaINSTR1(CLZ_S) 20597e102996SmayaINSTR1(CLZ_B) 20607e102996SmayaINSTR2(SHL_B) 20617e102996SmayaINSTR2(SHR_B) 20627e102996SmayaINSTR2(ASHR_B) 20637e102996SmayaINSTR2(BARY_F) 20647e102996SmayaINSTR2(MGEN_B) 20657e102996SmayaINSTR2(GETBIT_B) 20667e102996SmayaINSTR1(SETRM) 20677e102996SmayaINSTR1(CBITS_B) 20687e102996SmayaINSTR2(SHB) 20697e102996SmayaINSTR2(MSAD) 20707e102996Smaya 20717e102996Smaya/* cat3 instructions: */ 20727e102996SmayaINSTR3(MAD_U16) 20737e102996SmayaINSTR3(MADSH_U16) 20747e102996SmayaINSTR3(MAD_S16) 20757e102996SmayaINSTR3(MADSH_M16) 20767e102996SmayaINSTR3(MAD_U24) 20777e102996SmayaINSTR3(MAD_S24) 20787e102996SmayaINSTR3(MAD_F16) 20797e102996SmayaINSTR3(MAD_F32) 20807ec681f3Smrg/* NOTE: SEL_B32 checks for zero vs nonzero */ 20817e102996SmayaINSTR3(SEL_B16) 20827e102996SmayaINSTR3(SEL_B32) 20837e102996SmayaINSTR3(SEL_S16) 20847e102996SmayaINSTR3(SEL_S32) 20857e102996SmayaINSTR3(SEL_F16) 20867e102996SmayaINSTR3(SEL_F32) 20877e102996SmayaINSTR3(SAD_S16) 20887e102996SmayaINSTR3(SAD_S32) 20897e102996Smaya 20907e102996Smaya/* cat4 instructions: */ 20917e102996SmayaINSTR1(RCP) 20927e102996SmayaINSTR1(RSQ) 20937ec681f3SmrgINSTR1(HRSQ) 20947e102996SmayaINSTR1(LOG2) 20957ec681f3SmrgINSTR1(HLOG2) 20967e102996SmayaINSTR1(EXP2) 20977ec681f3SmrgINSTR1(HEXP2) 20987e102996SmayaINSTR1(SIN) 20997e102996SmayaINSTR1(COS) 21007e102996SmayaINSTR1(SQRT) 21017e102996Smaya 21027e102996Smaya/* cat5 instructions: */ 21037e102996SmayaINSTR1(DSX) 21047ec681f3SmrgINSTR1(DSXPP_MACRO) 21057e102996SmayaINSTR1(DSY) 21067ec681f3SmrgINSTR1(DSYPP_MACRO) 21077e102996SmayaINSTR1F(3D, DSX) 21087e102996SmayaINSTR1F(3D, DSY) 21097e102996SmayaINSTR1(RGETPOS) 21107e102996Smaya 21117e102996Smayastatic inline struct ir3_instruction * 21127ec681f3Smrgir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask, 21137ec681f3Smrg unsigned flags, struct ir3_instruction *samp_tex, 21147ec681f3Smrg struct ir3_instruction *src0, struct ir3_instruction *src1) 21157ec681f3Smrg{ 21167ec681f3Smrg struct ir3_instruction *sam; 21177ec681f3Smrg unsigned nreg = 0; 21187ec681f3Smrg 21197ec681f3Smrg if (flags & IR3_INSTR_S2EN) { 21207ec681f3Smrg nreg++; 21217ec681f3Smrg } 21227ec681f3Smrg if (src0) { 21237ec681f3Smrg nreg++; 21247ec681f3Smrg } 21257ec681f3Smrg if (src1) { 21267ec681f3Smrg nreg++; 21277ec681f3Smrg } 21287ec681f3Smrg 21297ec681f3Smrg sam = ir3_instr_create(block, opc, 1, nreg); 21307ec681f3Smrg sam->flags |= flags; 21317ec681f3Smrg __ssa_dst(sam)->wrmask = wrmask; 21327ec681f3Smrg if (flags & IR3_INSTR_S2EN) { 21337ec681f3Smrg __ssa_src(sam, samp_tex, (flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF); 21347ec681f3Smrg } 21357ec681f3Smrg if (src0) { 21367ec681f3Smrg __ssa_src(sam, src0, 0); 21377ec681f3Smrg } 21387ec681f3Smrg if (src1) { 21397ec681f3Smrg __ssa_src(sam, src1, 0); 21407ec681f3Smrg } 21417ec681f3Smrg sam->cat5.type = type; 21427ec681f3Smrg 21437ec681f3Smrg return sam; 21447e102996Smaya} 21457e102996Smaya 21467e102996Smaya/* cat6 instructions: */ 21477e102996SmayaINSTR2(LDLV) 21487ec681f3SmrgINSTR3(LDG) 21497ec681f3SmrgINSTR3(LDL) 21507ec681f3SmrgINSTR3(LDLW) 21517ec681f3SmrgINSTR3(LDP) 21527ec681f3SmrgINSTR4NODST(STG) 21537ec681f3SmrgINSTR3NODST(STL) 21547ec681f3SmrgINSTR3NODST(STLW) 21557ec681f3SmrgINSTR3NODST(STP) 21567e102996SmayaINSTR1(RESINFO) 21577e102996SmayaINSTR1(RESFMT) 21587e102996SmayaINSTR2(ATOMIC_ADD) 21597e102996SmayaINSTR2(ATOMIC_SUB) 21607e102996SmayaINSTR2(ATOMIC_XCHG) 21617e102996SmayaINSTR2(ATOMIC_INC) 21627e102996SmayaINSTR2(ATOMIC_DEC) 21637e102996SmayaINSTR2(ATOMIC_CMPXCHG) 21647e102996SmayaINSTR2(ATOMIC_MIN) 21657e102996SmayaINSTR2(ATOMIC_MAX) 21667e102996SmayaINSTR2(ATOMIC_AND) 21677e102996SmayaINSTR2(ATOMIC_OR) 21687e102996SmayaINSTR2(ATOMIC_XOR) 21697ec681f3SmrgINSTR2(LDC) 21707e102996Smaya#if GPU >= 600 21717ec681f3SmrgINSTR3NODST(STIB); 21727e102996SmayaINSTR2(LDIB); 21737ec681f3SmrgINSTR5(LDG_A); 21747ec681f3SmrgINSTR6NODST(STG_A); 21757e102996SmayaINSTR3F(G, ATOMIC_ADD) 21767e102996SmayaINSTR3F(G, ATOMIC_SUB) 21777e102996SmayaINSTR3F(G, ATOMIC_XCHG) 21787e102996SmayaINSTR3F(G, ATOMIC_INC) 21797e102996SmayaINSTR3F(G, ATOMIC_DEC) 21807e102996SmayaINSTR3F(G, ATOMIC_CMPXCHG) 21817e102996SmayaINSTR3F(G, ATOMIC_MIN) 21827e102996SmayaINSTR3F(G, ATOMIC_MAX) 21837e102996SmayaINSTR3F(G, ATOMIC_AND) 21847e102996SmayaINSTR3F(G, ATOMIC_OR) 21857e102996SmayaINSTR3F(G, ATOMIC_XOR) 21867e102996Smaya#elif GPU >= 400 21877e102996SmayaINSTR3(LDGB) 21887ec681f3Smrg#if GPU >= 500 21897ec681f3SmrgINSTR3(LDIB) 21907ec681f3Smrg#endif 21917ec681f3SmrgINSTR4NODST(STGB) 21927ec681f3SmrgINSTR4NODST(STIB) 21937e102996SmayaINSTR4F(G, ATOMIC_ADD) 21947e102996SmayaINSTR4F(G, ATOMIC_SUB) 21957e102996SmayaINSTR4F(G, ATOMIC_XCHG) 21967e102996SmayaINSTR4F(G, ATOMIC_INC) 21977e102996SmayaINSTR4F(G, ATOMIC_DEC) 21987e102996SmayaINSTR4F(G, ATOMIC_CMPXCHG) 21997e102996SmayaINSTR4F(G, ATOMIC_MIN) 22007e102996SmayaINSTR4F(G, ATOMIC_MAX) 22017e102996SmayaINSTR4F(G, ATOMIC_AND) 22027e102996SmayaINSTR4F(G, ATOMIC_OR) 22037e102996SmayaINSTR4F(G, ATOMIC_XOR) 22047e102996Smaya#endif 22057e102996Smaya 22067e102996Smaya/* cat7 instructions: */ 22077e102996SmayaINSTR0(BAR) 22087e102996SmayaINSTR0(FENCE) 22097e102996Smaya 22107e102996Smaya/* ************************************************************************* */ 22117ec681f3Smrg#include "bitset.h" 22127e102996Smaya 22137e102996Smaya#define MAX_REG 256 22147e102996Smaya 22157ec681f3Smrgtypedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG); 22167ec681f3Smrg 22177ec681f3Smrgtypedef struct { 22187ec681f3Smrg bool mergedregs; 22197ec681f3Smrg regmaskstate_t mask; 22207ec681f3Smrg} regmask_t; 22217e102996Smaya 22227ec681f3Smrgstatic inline bool 22237ec681f3Smrg__regmask_get(regmask_t *regmask, bool half, unsigned n) 22247e102996Smaya{ 22257ec681f3Smrg if (regmask->mergedregs) { 22267ec681f3Smrg /* a6xx+ case, with merged register file, we track things in terms 22277ec681f3Smrg * of half-precision registers, with a full precisions register 22287ec681f3Smrg * using two half-precision slots. 22297ec681f3Smrg * 22307ec681f3Smrg * Pretend that special regs (a0.x, a1.x, etc.) are full registers to 22317ec681f3Smrg * avoid having them alias normal full regs. 22327ec681f3Smrg */ 22337ec681f3Smrg if (half && !is_reg_num_special(n)) { 22347ec681f3Smrg return BITSET_TEST(regmask->mask, n); 22357ec681f3Smrg } else { 22367ec681f3Smrg n *= 2; 22377ec681f3Smrg return BITSET_TEST(regmask->mask, n) || 22387ec681f3Smrg BITSET_TEST(regmask->mask, n + 1); 22397ec681f3Smrg } 22407ec681f3Smrg } else { 22417ec681f3Smrg /* pre a6xx case, with separate register file for half and full 22427ec681f3Smrg * precision: 22437ec681f3Smrg */ 22447ec681f3Smrg if (half) 22457ec681f3Smrg n += MAX_REG; 22467ec681f3Smrg return BITSET_TEST(regmask->mask, n); 22477ec681f3Smrg } 22487e102996Smaya} 22497e102996Smaya 22507ec681f3Smrgstatic inline void 22517ec681f3Smrg__regmask_set(regmask_t *regmask, bool half, unsigned n) 22527e102996Smaya{ 22537ec681f3Smrg if (regmask->mergedregs) { 22547ec681f3Smrg /* a6xx+ case, with merged register file, we track things in terms 22557ec681f3Smrg * of half-precision registers, with a full precisions register 22567ec681f3Smrg * using two half-precision slots: 22577ec681f3Smrg */ 22587ec681f3Smrg if (half && !is_reg_num_special(n)) { 22597ec681f3Smrg BITSET_SET(regmask->mask, n); 22607ec681f3Smrg } else { 22617ec681f3Smrg n *= 2; 22627ec681f3Smrg BITSET_SET(regmask->mask, n); 22637ec681f3Smrg BITSET_SET(regmask->mask, n + 1); 22647ec681f3Smrg } 22657ec681f3Smrg } else { 22667ec681f3Smrg /* pre a6xx case, with separate register file for half and full 22677ec681f3Smrg * precision: 22687ec681f3Smrg */ 22697ec681f3Smrg if (half) 22707ec681f3Smrg n += MAX_REG; 22717ec681f3Smrg BITSET_SET(regmask->mask, n); 22727ec681f3Smrg } 22737e102996Smaya} 22747e102996Smaya 22757ec681f3Smrgstatic inline void 22767ec681f3Smrg__regmask_clear(regmask_t *regmask, bool half, unsigned n) 22777e102996Smaya{ 22787ec681f3Smrg if (regmask->mergedregs) { 22797ec681f3Smrg /* a6xx+ case, with merged register file, we track things in terms 22807ec681f3Smrg * of half-precision registers, with a full precisions register 22817ec681f3Smrg * using two half-precision slots: 22827ec681f3Smrg */ 22837ec681f3Smrg if (half && !is_reg_num_special(n)) { 22847ec681f3Smrg BITSET_CLEAR(regmask->mask, n); 22857ec681f3Smrg } else { 22867ec681f3Smrg n *= 2; 22877ec681f3Smrg BITSET_CLEAR(regmask->mask, n); 22887ec681f3Smrg BITSET_CLEAR(regmask->mask, n + 1); 22897ec681f3Smrg } 22907ec681f3Smrg } else { 22917ec681f3Smrg /* pre a6xx case, with separate register file for half and full 22927ec681f3Smrg * precision: 22937ec681f3Smrg */ 22947ec681f3Smrg if (half) 22957ec681f3Smrg n += MAX_REG; 22967ec681f3Smrg BITSET_CLEAR(regmask->mask, n); 22977ec681f3Smrg } 22987e102996Smaya} 22997e102996Smaya 23007ec681f3Smrgstatic inline void 23017ec681f3Smrgregmask_init(regmask_t *regmask, bool mergedregs) 23027e102996Smaya{ 23037ec681f3Smrg memset(®mask->mask, 0, sizeof(regmask->mask)); 23047ec681f3Smrg regmask->mergedregs = mergedregs; 23057e102996Smaya} 23067e102996Smaya 23077ec681f3Smrgstatic inline void 23087ec681f3Smrgregmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) 23097ec681f3Smrg{ 23107ec681f3Smrg assert(dst->mergedregs == a->mergedregs); 23117ec681f3Smrg assert(dst->mergedregs == b->mergedregs); 23127ec681f3Smrg 23137ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++) 23147ec681f3Smrg dst->mask[i] = a->mask[i] | b->mask[i]; 23157e102996Smaya} 23167e102996Smaya 23177ec681f3Smrg 23187ec681f3Smrgstatic inline void 23197ec681f3Smrgregmask_set(regmask_t *regmask, struct ir3_register *reg) 23207ec681f3Smrg{ 23217ec681f3Smrg bool half = reg->flags & IR3_REG_HALF; 23227ec681f3Smrg if (reg->flags & IR3_REG_RELATIV) { 23237ec681f3Smrg for (unsigned i = 0; i < reg->size; i++) 23247ec681f3Smrg __regmask_set(regmask, half, reg->array.base + i); 23257ec681f3Smrg } else { 23267ec681f3Smrg for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) 23277ec681f3Smrg if (mask & 1) 23287ec681f3Smrg __regmask_set(regmask, half, n); 23297ec681f3Smrg } 23307ec681f3Smrg} 23317ec681f3Smrg 23327ec681f3Smrgstatic inline bool 23337ec681f3Smrgregmask_get(regmask_t *regmask, struct ir3_register *reg) 23347ec681f3Smrg{ 23357ec681f3Smrg bool half = reg->flags & IR3_REG_HALF; 23367ec681f3Smrg if (reg->flags & IR3_REG_RELATIV) { 23377ec681f3Smrg for (unsigned i = 0; i < reg->size; i++) 23387ec681f3Smrg if (__regmask_get(regmask, half, reg->array.base + i)) 23397ec681f3Smrg return true; 23407ec681f3Smrg } else { 23417ec681f3Smrg for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) 23427ec681f3Smrg if (mask & 1) 23437ec681f3Smrg if (__regmask_get(regmask, half, n)) 23447ec681f3Smrg return true; 23457ec681f3Smrg } 23467ec681f3Smrg return false; 23477ec681f3Smrg} 23487e102996Smaya/* ************************************************************************* */ 23497e102996Smaya 23507e102996Smaya#endif /* IR3_H_ */ 2351