17e102996Smaya/*
27e102996Smaya * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
37e102996Smaya *
47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a
57e102996Smaya * copy of this software and associated documentation files (the "Software"),
67e102996Smaya * to deal in the Software without restriction, including without limitation
77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the
97e102996Smaya * Software is furnished to do so, subject to the following conditions:
107e102996Smaya *
117e102996Smaya * The above copyright notice and this permission notice (including the next
127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the
137e102996Smaya * Software.
147e102996Smaya *
157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217e102996Smaya * SOFTWARE.
227e102996Smaya */
237e102996Smaya
247e102996Smaya#ifndef IR3_H_
257e102996Smaya#define IR3_H_
267e102996Smaya
277e102996Smaya#include <stdbool.h>
287ec681f3Smrg#include <stdint.h>
297e102996Smaya
307e102996Smaya#include "compiler/shader_enums.h"
317e102996Smaya
327e102996Smaya#include "util/bitscan.h"
337e102996Smaya#include "util/list.h"
347ec681f3Smrg#include "util/set.h"
357e102996Smaya#include "util/u_debug.h"
367e102996Smaya
377e102996Smaya#include "instr-a3xx.h"
387e102996Smaya
397e102996Smaya/* low level intermediate representation of an adreno shader program */
407e102996Smaya
417e102996Smayastruct ir3_compiler;
427e102996Smayastruct ir3;
437e102996Smayastruct ir3_instruction;
447e102996Smayastruct ir3_block;
457e102996Smaya
467e102996Smayastruct ir3_info {
477ec681f3Smrg   void *data; /* used internally in ir3 assembler */
487ec681f3Smrg   /* Size in bytes of the shader binary, including NIR constants and
497ec681f3Smrg    * padding
507ec681f3Smrg    */
517ec681f3Smrg   uint32_t size;
527ec681f3Smrg   /* byte offset from start of the shader to the NIR constant data. */
537ec681f3Smrg   uint32_t constant_data_offset;
547ec681f3Smrg   /* Size in dwords of the instructions. */
557ec681f3Smrg   uint16_t sizedwords;
567ec681f3Smrg   uint16_t instrs_count; /* expanded to account for rpt's */
577ec681f3Smrg   uint16_t nops_count;   /* # of nop instructions, including nopN */
587ec681f3Smrg   uint16_t mov_count;
597ec681f3Smrg   uint16_t cov_count;
607ec681f3Smrg   uint16_t stp_count;
617ec681f3Smrg   uint16_t ldp_count;
627ec681f3Smrg   /* NOTE: max_reg, etc, does not include registers not touched
637ec681f3Smrg    * by the shader (ie. vertex fetched via VFD_DECODE but not
647ec681f3Smrg    * touched by shader)
657ec681f3Smrg    */
667ec681f3Smrg   int8_t max_reg; /* highest GPR # used by shader */
677ec681f3Smrg   int8_t max_half_reg;
687ec681f3Smrg   int16_t max_const;
697ec681f3Smrg   /* This is the maximum # of waves that can executed at once in one core,
707ec681f3Smrg    * assuming that they are all executing this shader.
717ec681f3Smrg    */
727ec681f3Smrg   int8_t max_waves;
737ec681f3Smrg   bool double_threadsize;
747ec681f3Smrg   bool multi_dword_ldp_stp;
757ec681f3Smrg
767ec681f3Smrg   /* number of sync bits: */
777ec681f3Smrg   uint16_t ss, sy;
787ec681f3Smrg
797ec681f3Smrg   /* estimate of number of cycles stalled on (ss) */
807ec681f3Smrg   uint16_t sstall;
817ec681f3Smrg
827ec681f3Smrg   uint16_t last_baryf; /* instruction # of last varying fetch */
837ec681f3Smrg
847ec681f3Smrg   /* Number of instructions of a given category: */
857ec681f3Smrg   uint16_t instrs_per_cat[8];
867ec681f3Smrg};
877ec681f3Smrg
887ec681f3Smrgstruct ir3_merge_set {
897ec681f3Smrg   uint16_t preferred_reg;
907ec681f3Smrg   uint16_t size;
917ec681f3Smrg   uint16_t alignment;
927ec681f3Smrg
937ec681f3Smrg   unsigned interval_start;
947ec681f3Smrg   unsigned spill_slot;
957ec681f3Smrg
967ec681f3Smrg   unsigned regs_count;
977ec681f3Smrg   struct ir3_register **regs;
987e102996Smaya};
997e102996Smaya
1007e102996Smayastruct ir3_register {
1017ec681f3Smrg   enum {
1027ec681f3Smrg      IR3_REG_CONST = 0x001,
1037ec681f3Smrg      IR3_REG_IMMED = 0x002,
1047ec681f3Smrg      IR3_REG_HALF = 0x004,
1057ec681f3Smrg      /* Shared registers have the same value for all threads when read.
1067ec681f3Smrg       * They can only be written when one thread is active (that is, inside
1077ec681f3Smrg       * a "getone" block).
1087ec681f3Smrg       */
1097ec681f3Smrg      IR3_REG_SHARED = 0x008,
1107ec681f3Smrg      IR3_REG_RELATIV = 0x010,
1117ec681f3Smrg      IR3_REG_R = 0x020,
1127ec681f3Smrg      /* Most instructions, it seems, can do float abs/neg but not
1137ec681f3Smrg       * integer.  The CP pass needs to know what is intended (int or
1147ec681f3Smrg       * float) in order to do the right thing.  For this reason the
1157ec681f3Smrg       * abs/neg flags are split out into float and int variants.  In
1167ec681f3Smrg       * addition, .b (bitwise) operations, the negate is actually a
1177ec681f3Smrg       * bitwise not, so split that out into a new flag to make it
1187ec681f3Smrg       * more clear.
1197ec681f3Smrg       */
1207ec681f3Smrg      IR3_REG_FNEG = 0x040,
1217ec681f3Smrg      IR3_REG_FABS = 0x080,
1227ec681f3Smrg      IR3_REG_SNEG = 0x100,
1237ec681f3Smrg      IR3_REG_SABS = 0x200,
1247ec681f3Smrg      IR3_REG_BNOT = 0x400,
1257ec681f3Smrg      /* (ei) flag, end-input?  Set on last bary, presumably to signal
1267ec681f3Smrg       * that the shader needs no more input:
1277ec681f3Smrg       */
1287ec681f3Smrg      IR3_REG_EI = 0x2000,
1297ec681f3Smrg      /* meta-flags, for intermediate stages of IR, ie.
1307ec681f3Smrg       * before register assignment is done:
1317ec681f3Smrg       */
1327ec681f3Smrg      IR3_REG_SSA = 0x4000, /* 'def' is ptr to assigning destination */
1337ec681f3Smrg      IR3_REG_ARRAY = 0x8000,
1347ec681f3Smrg
1357ec681f3Smrg      /* Set on a use whenever the SSA value becomes dead after the current
1367ec681f3Smrg       * instruction.
1377ec681f3Smrg       */
1387ec681f3Smrg      IR3_REG_KILL = 0x10000,
1397ec681f3Smrg
1407ec681f3Smrg      /* Similar to IR3_REG_KILL, except that if there are multiple uses of the
1417ec681f3Smrg       * same SSA value in a single instruction, this is only set on the first
1427ec681f3Smrg       * use.
1437ec681f3Smrg       */
1447ec681f3Smrg      IR3_REG_FIRST_KILL = 0x20000,
1457ec681f3Smrg
1467ec681f3Smrg      /* Set when a destination doesn't have any uses and is dead immediately
1477ec681f3Smrg       * after the instruction. This can happen even after optimizations for
1487ec681f3Smrg       * corner cases such as destinations of atomic instructions.
1497ec681f3Smrg       */
1507ec681f3Smrg      IR3_REG_UNUSED = 0x40000,
1517ec681f3Smrg   } flags;
1527ec681f3Smrg
1537ec681f3Smrg   unsigned name;
1547ec681f3Smrg
1557ec681f3Smrg   /* used for cat5 instructions, but also for internal/IR level
1567ec681f3Smrg    * tracking of what registers are read/written by an instruction.
1577ec681f3Smrg    * wrmask may be a bad name since it is used to represent both
1587ec681f3Smrg    * src and dst that touch multiple adjacent registers.
1597ec681f3Smrg    */
1607ec681f3Smrg   unsigned wrmask : 16; /* up to vec16 */
1617ec681f3Smrg
1627ec681f3Smrg   /* for relative addressing, 32bits for array size is too small,
1637ec681f3Smrg    * but otoh we don't need to deal with disjoint sets, so instead
1647ec681f3Smrg    * use a simple size field (number of scalar components).
1657ec681f3Smrg    *
1667ec681f3Smrg    * Note the size field isn't important for relative const (since
1677ec681f3Smrg    * we don't have to do register allocation for constants).
1687ec681f3Smrg    */
1697ec681f3Smrg   unsigned size : 16;
1707ec681f3Smrg
1717ec681f3Smrg   /* normal registers:
1727ec681f3Smrg    * the component is in the low two bits of the reg #, so
1737ec681f3Smrg    * rN.x becomes: (N << 2) | x
1747ec681f3Smrg    */
1757ec681f3Smrg   uint16_t num;
1767ec681f3Smrg   union {
1777ec681f3Smrg      /* immediate: */
1787ec681f3Smrg      int32_t iim_val;
1797ec681f3Smrg      uint32_t uim_val;
1807ec681f3Smrg      float fim_val;
1817ec681f3Smrg      /* relative: */
1827ec681f3Smrg      struct {
1837ec681f3Smrg         uint16_t id;
1847ec681f3Smrg         int16_t offset;
1857ec681f3Smrg         uint16_t base;
1867ec681f3Smrg      } array;
1877ec681f3Smrg   };
1887ec681f3Smrg
1897ec681f3Smrg   /* For IR3_REG_DEST, pointer back to the instruction containing this
1907ec681f3Smrg    * register.
1917ec681f3Smrg    */
1927ec681f3Smrg   struct ir3_instruction *instr;
1937ec681f3Smrg
1947ec681f3Smrg   /* For IR3_REG_SSA, src registers contain ptr back to assigning
1957ec681f3Smrg    * instruction.
1967ec681f3Smrg    *
1977ec681f3Smrg    * For IR3_REG_ARRAY, the pointer is back to the last dependent
1987ec681f3Smrg    * array access (although the net effect is the same, it points
1997ec681f3Smrg    * back to a previous instruction that we depend on).
2007ec681f3Smrg    */
2017ec681f3Smrg   struct ir3_register *def;
2027ec681f3Smrg
2037ec681f3Smrg   /* Pointer to another register in the instruction that must share the same
2047ec681f3Smrg    * physical register. Each destination can be tied with one source, and
2057ec681f3Smrg    * they must have "tied" pointing to each other.
2067ec681f3Smrg    */
2077ec681f3Smrg   struct ir3_register *tied;
2087ec681f3Smrg
2097ec681f3Smrg   unsigned spill_slot, next_use;
2107ec681f3Smrg
2117ec681f3Smrg   unsigned merge_set_offset;
2127ec681f3Smrg   struct ir3_merge_set *merge_set;
2137ec681f3Smrg   unsigned interval_start, interval_end;
2147e102996Smaya};
2157e102996Smaya
2167e102996Smaya/*
2177e102996Smaya * Stupid/simple growable array implementation:
2187e102996Smaya */
2197ec681f3Smrg#define DECLARE_ARRAY(type, name)                                              \
2207ec681f3Smrg   unsigned name##_count, name##_sz;                                           \
2217ec681f3Smrg   type *name;
2227ec681f3Smrg
2237ec681f3Smrg#define array_insert(ctx, arr, ...)                                            \
2247ec681f3Smrg   do {                                                                        \
2257ec681f3Smrg      if (arr##_count == arr##_sz) {                                           \
2267ec681f3Smrg         arr##_sz = MAX2(2 * arr##_sz, 16);                                    \
2277ec681f3Smrg         arr = reralloc_size(ctx, arr, arr##_sz * sizeof(arr[0]));             \
2287ec681f3Smrg      }                                                                        \
2297ec681f3Smrg      arr[arr##_count++] = __VA_ARGS__;                                        \
2307ec681f3Smrg   } while (0)
2317e102996Smaya
2327e102996Smayastruct ir3_instruction {
2337ec681f3Smrg   struct ir3_block *block;
2347ec681f3Smrg   opc_t opc;
2357ec681f3Smrg   enum {
2367ec681f3Smrg      /* (sy) flag is set on first instruction, and after sample
2377ec681f3Smrg       * instructions (probably just on RAW hazard).
2387ec681f3Smrg       */
2397ec681f3Smrg      IR3_INSTR_SY = 0x001,
2407ec681f3Smrg      /* (ss) flag is set on first instruction, and first instruction
2417ec681f3Smrg       * to depend on the result of "long" instructions (RAW hazard):
2427ec681f3Smrg       *
2437ec681f3Smrg       *   rcp, rsq, log2, exp2, sin, cos, sqrt
2447ec681f3Smrg       *
2457ec681f3Smrg       * It seems to synchronize until all in-flight instructions are
2467ec681f3Smrg       * completed, for example:
2477ec681f3Smrg       *
2487ec681f3Smrg       *   rsq hr1.w, hr1.w
2497ec681f3Smrg       *   add.f hr2.z, (neg)hr2.z, hc0.y
2507ec681f3Smrg       *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
2517ec681f3Smrg       *   rsq hr2.x, hr2.x
2527ec681f3Smrg       *   (rpt1)nop
2537ec681f3Smrg       *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
2547ec681f3Smrg       *   nop
2557ec681f3Smrg       *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
2567ec681f3Smrg       *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
2577ec681f3Smrg       *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
2587ec681f3Smrg       *
2597ec681f3Smrg       * The last mul.f does not have (ss) set, presumably because the
2607ec681f3Smrg       * (ss) on the previous instruction does the job.
2617ec681f3Smrg       *
2627ec681f3Smrg       * The blob driver also seems to set it on WAR hazards, although
2637ec681f3Smrg       * not really clear if this is needed or just blob compiler being
2647ec681f3Smrg       * sloppy.  So far I haven't found a case where removing the (ss)
2657ec681f3Smrg       * causes problems for WAR hazard, but I could just be getting
2667ec681f3Smrg       * lucky:
2677ec681f3Smrg       *
2687ec681f3Smrg       *   rcp r1.y, r3.y
2697ec681f3Smrg       *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
2707ec681f3Smrg       *
2717ec681f3Smrg       */
2727ec681f3Smrg      IR3_INSTR_SS = 0x002,
2737ec681f3Smrg      /* (jp) flag is set on jump targets:
2747ec681f3Smrg       */
2757ec681f3Smrg      IR3_INSTR_JP = 0x004,
2767ec681f3Smrg      IR3_INSTR_UL = 0x008,
2777ec681f3Smrg      IR3_INSTR_3D = 0x010,
2787ec681f3Smrg      IR3_INSTR_A = 0x020,
2797ec681f3Smrg      IR3_INSTR_O = 0x040,
2807ec681f3Smrg      IR3_INSTR_P = 0x080,
2817ec681f3Smrg      IR3_INSTR_S = 0x100,
2827ec681f3Smrg      IR3_INSTR_S2EN = 0x200,
2837ec681f3Smrg      IR3_INSTR_G = 0x400,
2847ec681f3Smrg      IR3_INSTR_SAT = 0x800,
2857ec681f3Smrg      /* (cat5/cat6) Bindless */
2867ec681f3Smrg      IR3_INSTR_B = 0x1000,
2877ec681f3Smrg      /* (cat5/cat6) nonuniform */
2887ec681f3Smrg      IR3_INSTR_NONUNIF = 0x02000,
2897ec681f3Smrg      /* (cat5-only) Get some parts of the encoding from a1.x */
2907ec681f3Smrg      IR3_INSTR_A1EN = 0x04000,
2917ec681f3Smrg      /* meta-flags, for intermediate stages of IR, ie.
2927ec681f3Smrg       * before register assignment is done:
2937ec681f3Smrg       */
2947ec681f3Smrg      IR3_INSTR_MARK = 0x08000,
2957ec681f3Smrg      IR3_INSTR_UNUSED = 0x10000,
2967ec681f3Smrg   } flags;
2977ec681f3Smrg   uint8_t repeat;
2987ec681f3Smrg   uint8_t nop;
2997e102996Smaya#ifdef DEBUG
3007ec681f3Smrg   unsigned srcs_max, dsts_max;
3017e102996Smaya#endif
3027ec681f3Smrg   unsigned srcs_count, dsts_count;
3037ec681f3Smrg   struct ir3_register **dsts;
3047ec681f3Smrg   struct ir3_register **srcs;
3057ec681f3Smrg   union {
3067ec681f3Smrg      struct {
3077ec681f3Smrg         char inv1, inv2;
3087ec681f3Smrg         char comp1, comp2;
3097ec681f3Smrg         int immed;
3107ec681f3Smrg         struct ir3_block *target;
3117ec681f3Smrg         const char *target_label;
3127ec681f3Smrg         brtype_t brtype;
3137ec681f3Smrg         unsigned idx; /* for brac.N */
3147ec681f3Smrg      } cat0;
3157ec681f3Smrg      struct {
3167ec681f3Smrg         type_t src_type, dst_type;
3177ec681f3Smrg         round_t round;
3187ec681f3Smrg      } cat1;
3197ec681f3Smrg      struct {
3207ec681f3Smrg         enum {
3217ec681f3Smrg            IR3_COND_LT = 0,
3227ec681f3Smrg            IR3_COND_LE = 1,
3237ec681f3Smrg            IR3_COND_GT = 2,
3247ec681f3Smrg            IR3_COND_GE = 3,
3257ec681f3Smrg            IR3_COND_EQ = 4,
3267ec681f3Smrg            IR3_COND_NE = 5,
3277ec681f3Smrg         } condition;
3287ec681f3Smrg      } cat2;
3297ec681f3Smrg      struct {
3307ec681f3Smrg         unsigned samp, tex;
3317ec681f3Smrg         unsigned tex_base : 3;
3327ec681f3Smrg         type_t type;
3337ec681f3Smrg      } cat5;
3347ec681f3Smrg      struct {
3357ec681f3Smrg         type_t type;
3367ec681f3Smrg         /* TODO remove dst_offset and handle as a ir3_register
3377ec681f3Smrg          * which might be IMMED, similar to how src_offset is
3387ec681f3Smrg          * handled.
3397ec681f3Smrg          */
3407ec681f3Smrg         int dst_offset;
3417ec681f3Smrg         int iim_val   : 3; /* for ldgb/stgb, # of components */
3427ec681f3Smrg         unsigned d    : 3; /* for ldc, component offset */
3437ec681f3Smrg         bool typed    : 1;
3447ec681f3Smrg         unsigned base : 3;
3457ec681f3Smrg      } cat6;
3467ec681f3Smrg      struct {
3477ec681f3Smrg         unsigned w : 1; /* write */
3487ec681f3Smrg         unsigned r : 1; /* read */
3497ec681f3Smrg         unsigned l : 1; /* local */
3507ec681f3Smrg         unsigned g : 1; /* global */
3517ec681f3Smrg      } cat7;
3527ec681f3Smrg      /* for meta-instructions, just used to hold extra data
3537ec681f3Smrg       * before instruction scheduling, etc
3547ec681f3Smrg       */
3557ec681f3Smrg      struct {
3567ec681f3Smrg         int off; /* component/offset */
3577ec681f3Smrg      } split;
3587ec681f3Smrg      struct {
3597ec681f3Smrg         /* Per-source index back to the entry in the
3607ec681f3Smrg          * ir3_shader_variant::outputs table.
3617ec681f3Smrg          */
3627ec681f3Smrg         unsigned *outidxs;
3637ec681f3Smrg      } end;
3647ec681f3Smrg      struct {
3657ec681f3Smrg         /* used to temporarily hold reference to nir_phi_instr
3667ec681f3Smrg          * until we resolve the phi srcs
3677ec681f3Smrg          */
3687ec681f3Smrg         void *nphi;
3697ec681f3Smrg      } phi;
3707ec681f3Smrg      struct {
3717ec681f3Smrg         unsigned samp, tex;
3727ec681f3Smrg         unsigned input_offset;
3737ec681f3Smrg         unsigned samp_base : 3;
3747ec681f3Smrg         unsigned tex_base  : 3;
3757ec681f3Smrg      } prefetch;
3767ec681f3Smrg      struct {
3777ec681f3Smrg         /* maps back to entry in ir3_shader_variant::inputs table: */
3787ec681f3Smrg         int inidx;
3797ec681f3Smrg         /* for sysvals, identifies the sysval type.  Mostly so we can
3807ec681f3Smrg          * identify the special cases where a sysval should not be DCE'd
3817ec681f3Smrg          * (currently, just pre-fs texture fetch)
3827ec681f3Smrg          */
3837ec681f3Smrg         gl_system_value sysval;
3847ec681f3Smrg      } input;
3857ec681f3Smrg   };
3867ec681f3Smrg
3877ec681f3Smrg   /* For assigning jump offsets, we need instruction's position: */
3887ec681f3Smrg   uint32_t ip;
3897ec681f3Smrg
3907ec681f3Smrg   /* used for per-pass extra instruction data.
3917ec681f3Smrg    *
3927ec681f3Smrg    * TODO we should remove the per-pass data like this and 'use_count'
3937ec681f3Smrg    * and do something similar to what RA does w/ ir3_ra_instr_data..
3947ec681f3Smrg    * ie. use the ir3_count_instructions pass, and then use instr->ip
3957ec681f3Smrg    * to index into a table of pass-private data.
3967ec681f3Smrg    */
3977ec681f3Smrg   void *data;
3987ec681f3Smrg
3997ec681f3Smrg   /**
4007ec681f3Smrg    * Valid if pass calls ir3_find_ssa_uses().. see foreach_ssa_use()
4017ec681f3Smrg    */
4027ec681f3Smrg   struct set *uses;
4037ec681f3Smrg
4047ec681f3Smrg   int use_count; /* currently just updated/used by cp */
4057ec681f3Smrg
4067ec681f3Smrg   /* an instruction can reference at most one address register amongst
4077ec681f3Smrg    * it's src/dst registers.  Beyond that, you need to insert mov's.
4087ec681f3Smrg    *
4097ec681f3Smrg    * NOTE: do not write this directly, use ir3_instr_set_address()
4107ec681f3Smrg    */
4117ec681f3Smrg   struct ir3_register *address;
4127ec681f3Smrg
4137ec681f3Smrg   /* Tracking for additional dependent instructions.  Used to handle
4147ec681f3Smrg    * barriers, WAR hazards for arrays/SSBOs/etc.
4157ec681f3Smrg    */
4167ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, deps);
4177ec681f3Smrg
4187ec681f3Smrg   /*
4197ec681f3Smrg    * From PoV of instruction scheduling, not execution (ie. ignores global/
4207ec681f3Smrg    * local distinction):
4217ec681f3Smrg    *                            shared  image  atomic  SSBO  everything
4227ec681f3Smrg    *   barrier()/            -   R/W     R/W    R/W     R/W       X
4237ec681f3Smrg    *     groupMemoryBarrier()
4247ec681f3Smrg    *     memoryBarrier()
4257ec681f3Smrg    *     (but only images declared coherent?)
4267ec681f3Smrg    *   memoryBarrierAtomic() -                  R/W
4277ec681f3Smrg    *   memoryBarrierBuffer() -                          R/W
4287ec681f3Smrg    *   memoryBarrierImage()  -           R/W
4297ec681f3Smrg    *   memoryBarrierShared() -   R/W
4307ec681f3Smrg    *
4317ec681f3Smrg    * TODO I think for SSBO/image/shared, in cases where we can determine
4327ec681f3Smrg    * which variable is accessed, we don't need to care about accesses to
4337ec681f3Smrg    * different variables (unless declared coherent??)
4347ec681f3Smrg    */
4357ec681f3Smrg   enum {
4367ec681f3Smrg      IR3_BARRIER_EVERYTHING = 1 << 0,
4377ec681f3Smrg      IR3_BARRIER_SHARED_R = 1 << 1,
4387ec681f3Smrg      IR3_BARRIER_SHARED_W = 1 << 2,
4397ec681f3Smrg      IR3_BARRIER_IMAGE_R = 1 << 3,
4407ec681f3Smrg      IR3_BARRIER_IMAGE_W = 1 << 4,
4417ec681f3Smrg      IR3_BARRIER_BUFFER_R = 1 << 5,
4427ec681f3Smrg      IR3_BARRIER_BUFFER_W = 1 << 6,
4437ec681f3Smrg      IR3_BARRIER_ARRAY_R = 1 << 7,
4447ec681f3Smrg      IR3_BARRIER_ARRAY_W = 1 << 8,
4457ec681f3Smrg      IR3_BARRIER_PRIVATE_R = 1 << 9,
4467ec681f3Smrg      IR3_BARRIER_PRIVATE_W = 1 << 10,
4477ec681f3Smrg   } barrier_class,
4487ec681f3Smrg      barrier_conflict;
4497ec681f3Smrg
4507ec681f3Smrg   /* Entry in ir3_block's instruction list: */
4517ec681f3Smrg   struct list_head node;
4527ec681f3Smrg
4537ec681f3Smrg   uint32_t serialno;
4547ec681f3Smrg
4557ec681f3Smrg   // TODO only computerator/assembler:
4567ec681f3Smrg   int line;
4577ec681f3Smrg};
4587ec681f3Smrg
4597ec681f3Smrgstruct ir3 {
4607ec681f3Smrg   struct ir3_compiler *compiler;
4617ec681f3Smrg   gl_shader_stage type;
4627ec681f3Smrg
4637ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, inputs);
4647ec681f3Smrg
4657ec681f3Smrg   /* Track bary.f (and ldlv) instructions.. this is needed in
4667ec681f3Smrg    * scheduling to ensure that all varying fetches happen before
4677ec681f3Smrg    * any potential kill instructions.  The hw gets grumpy if all
4687ec681f3Smrg    * threads in a group are killed before the last bary.f gets
4697ec681f3Smrg    * a chance to signal end of input (ei).
4707ec681f3Smrg    */
4717ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, baryfs);
4727ec681f3Smrg
4737ec681f3Smrg   /* Track all indirect instructions (read and write).  To avoid
4747ec681f3Smrg    * deadlock scenario where an address register gets scheduled,
4757ec681f3Smrg    * but other dependent src instructions cannot be scheduled due
4767ec681f3Smrg    * to dependency on a *different* address register value, the
4777ec681f3Smrg    * scheduler needs to ensure that all dependencies other than
4787ec681f3Smrg    * the instruction other than the address register are scheduled
4797ec681f3Smrg    * before the one that writes the address register.  Having a
4807ec681f3Smrg    * convenient list of instructions that reference some address
4817ec681f3Smrg    * register simplifies this.
4827ec681f3Smrg    */
4837ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, a0_users);
4847ec681f3Smrg
4857ec681f3Smrg   /* same for a1.x: */
4867ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, a1_users);
4877ec681f3Smrg
4887ec681f3Smrg   /* and same for instructions that consume predicate register: */
4897ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, predicates);
4907ec681f3Smrg
4917ec681f3Smrg   /* Track texture sample instructions which need texture state
4927ec681f3Smrg    * patched in (for astc-srgb workaround):
4937ec681f3Smrg    */
4947ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
4957ec681f3Smrg
4967ec681f3Smrg   /* List of blocks: */
4977ec681f3Smrg   struct list_head block_list;
4987ec681f3Smrg
4997ec681f3Smrg   /* List of ir3_array's: */
5007ec681f3Smrg   struct list_head array_list;
5017e102996Smaya
5027e102996Smaya#ifdef DEBUG
5037ec681f3Smrg   unsigned block_count;
5047e102996Smaya#endif
5057ec681f3Smrg   unsigned instr_count;
5067e102996Smaya};
5077e102996Smaya
5087ec681f3Smrgstruct ir3_array {
5097ec681f3Smrg   struct list_head node;
5107ec681f3Smrg   unsigned length;
5117ec681f3Smrg   unsigned id;
5127e102996Smaya
5137ec681f3Smrg   struct nir_register *r;
5147e102996Smaya
5157ec681f3Smrg   /* To avoid array write's from getting DCE'd, keep track of the
5167ec681f3Smrg    * most recent write.  Any array access depends on the most
5177ec681f3Smrg    * recent write.  This way, nothing depends on writes after the
5187ec681f3Smrg    * last read.  But all the writes that happen before that have
5197ec681f3Smrg    * something depending on them
5207ec681f3Smrg    */
5217ec681f3Smrg   struct ir3_register *last_write;
5227e102996Smaya
5237ec681f3Smrg   /* extra stuff used in RA pass: */
5247ec681f3Smrg   unsigned base; /* base vreg name */
5257ec681f3Smrg   unsigned reg;  /* base physical reg */
5267ec681f3Smrg   uint16_t start_ip, end_ip;
5277e102996Smaya
5287ec681f3Smrg   /* Indicates if half-precision */
5297ec681f3Smrg   bool half;
5307e102996Smaya
5317ec681f3Smrg   bool unused;
5327e102996Smaya};
5337e102996Smaya
5347ec681f3Smrgstruct ir3_array *ir3_lookup_array(struct ir3 *ir, unsigned id);
5357e102996Smaya
5367ec681f3Smrgenum ir3_branch_type {
5377ec681f3Smrg   IR3_BRANCH_COND,   /* condition */
5387ec681f3Smrg   IR3_BRANCH_ANY,    /* subgroupAny(condition) */
5397ec681f3Smrg   IR3_BRANCH_ALL,    /* subgroupAll(condition) */
5407ec681f3Smrg   IR3_BRANCH_GETONE, /* subgroupElect() */
5417ec681f3Smrg};
5427e102996Smaya
5437e102996Smayastruct ir3_block {
5447ec681f3Smrg   struct list_head node;
5457ec681f3Smrg   struct ir3 *shader;
5467ec681f3Smrg
5477ec681f3Smrg   const struct nir_block *nblock;
5487ec681f3Smrg
5497ec681f3Smrg   struct list_head instr_list; /* list of ir3_instruction */
5507ec681f3Smrg
5517ec681f3Smrg   /* The actual branch condition, if there are two successors */
5527ec681f3Smrg   enum ir3_branch_type brtype;
5537e102996Smaya
5547ec681f3Smrg   /* each block has either one or two successors.. in case of two
5557ec681f3Smrg    * successors, 'condition' decides which one to follow.  A block preceding
5567ec681f3Smrg    * an if/else has two successors.
5577ec681f3Smrg    *
5587ec681f3Smrg    * In some cases the path that the machine actually takes through the
5597ec681f3Smrg    * program may not match the per-thread view of the CFG. In particular
5607ec681f3Smrg    * this is the case for if/else, where the machine jumps from the end of
5617ec681f3Smrg    * the if to the beginning of the else and switches active lanes. While
5627ec681f3Smrg    * most things only care about the per-thread view, we need to use the
5637ec681f3Smrg    * "physical" view when allocating shared registers. "successors" contains
5647ec681f3Smrg    * the per-thread successors, and "physical_successors" contains the
5657ec681f3Smrg    * physical successors which includes the fallthrough edge from the if to
5667ec681f3Smrg    * the else.
5677ec681f3Smrg    */
5687ec681f3Smrg   struct ir3_instruction *condition;
5697ec681f3Smrg   struct ir3_block *successors[2];
5707ec681f3Smrg   struct ir3_block *physical_successors[2];
5717e102996Smaya
5727ec681f3Smrg   DECLARE_ARRAY(struct ir3_block *, predecessors);
5737ec681f3Smrg   DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
5747e102996Smaya
5757ec681f3Smrg   uint16_t start_ip, end_ip;
5767e102996Smaya
5777ec681f3Smrg   /* Track instructions which do not write a register but other-
5787ec681f3Smrg    * wise must not be discarded (such as kill, stg, etc)
5797ec681f3Smrg    */
5807ec681f3Smrg   DECLARE_ARRAY(struct ir3_instruction *, keeps);
5817e102996Smaya
5827ec681f3Smrg   /* used for per-pass extra block data.  Mainly used right
5837ec681f3Smrg    * now in RA step to track livein/liveout.
5847ec681f3Smrg    */
5857ec681f3Smrg   void *data;
5867e102996Smaya
5877ec681f3Smrg   uint32_t index;
5887e102996Smaya
5897ec681f3Smrg   struct ir3_block *imm_dom;
5907ec681f3Smrg   DECLARE_ARRAY(struct ir3_block *, dom_children);
5917ec681f3Smrg
5927ec681f3Smrg   uint32_t dom_pre_index;
5937ec681f3Smrg   uint32_t dom_post_index;
5947ec681f3Smrg
5957ec681f3Smrg   uint32_t loop_id;
5967ec681f3Smrg   uint32_t loop_depth;
5977e102996Smaya
5987e102996Smaya#ifdef DEBUG
5997ec681f3Smrg   uint32_t serialno;
6007e102996Smaya#endif
6017e102996Smaya};
6027e102996Smaya
6037e102996Smayastatic inline uint32_t
6047e102996Smayablock_id(struct ir3_block *block)
6057e102996Smaya{
6067e102996Smaya#ifdef DEBUG
6077ec681f3Smrg   return block->serialno;
6087e102996Smaya#else
6097ec681f3Smrg   return (uint32_t)(unsigned long)block;
6107e102996Smaya#endif
6117e102996Smaya}
6127e102996Smaya
6137ec681f3Smrgstatic inline struct ir3_block *
6147ec681f3Smrgir3_start_block(struct ir3 *ir)
6157ec681f3Smrg{
6167ec681f3Smrg   return list_first_entry(&ir->block_list, struct ir3_block, node);
6177ec681f3Smrg}
6187ec681f3Smrg
6197ec681f3Smrgvoid ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
6207ec681f3Smrgvoid ir3_block_add_physical_predecessor(struct ir3_block *block,
6217ec681f3Smrg                                        struct ir3_block *pred);
6227ec681f3Smrgvoid ir3_block_remove_predecessor(struct ir3_block *block,
6237ec681f3Smrg                                  struct ir3_block *pred);
6247ec681f3Smrgvoid ir3_block_remove_physical_predecessor(struct ir3_block *block,
6257ec681f3Smrg                                           struct ir3_block *pred);
6267ec681f3Smrgunsigned ir3_block_get_pred_index(struct ir3_block *block,
6277ec681f3Smrg                                  struct ir3_block *pred);
6287ec681f3Smrg
6297ec681f3Smrgvoid ir3_calc_dominance(struct ir3 *ir);
6307ec681f3Smrgbool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b);
6317ec681f3Smrg
6327ec681f3Smrgstruct ir3_shader_variant;
6337ec681f3Smrg
6347ec681f3Smrgstruct ir3 *ir3_create(struct ir3_compiler *compiler,
6357ec681f3Smrg                       struct ir3_shader_variant *v);
6367e102996Smayavoid ir3_destroy(struct ir3 *shader);
6377e102996Smaya
6387ec681f3Smrgvoid ir3_collect_info(struct ir3_shader_variant *v);
6397ec681f3Smrgvoid *ir3_alloc(struct ir3 *shader, int sz);
6407ec681f3Smrg
6417ec681f3Smrgunsigned ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler,
6427ec681f3Smrg                                         unsigned reg_count,
6437ec681f3Smrg                                         bool double_threadsize);
6447e102996Smaya
6457ec681f3Smrgunsigned ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
6467ec681f3Smrg                                           bool double_threadsize);
6477ec681f3Smrg
6487ec681f3Smrgbool ir3_should_double_threadsize(struct ir3_shader_variant *v,
6497ec681f3Smrg                                  unsigned regs_count);
6507ec681f3Smrg
6517ec681f3Smrgstruct ir3_block *ir3_block_create(struct ir3 *shader);
6527ec681f3Smrg
6537ec681f3Smrgstruct ir3_instruction *ir3_instr_create(struct ir3_block *block, opc_t opc,
6547ec681f3Smrg                                         int ndst, int nsrc);
6557ec681f3Smrgstruct ir3_instruction *ir3_instr_clone(struct ir3_instruction *instr);
6567ec681f3Smrgvoid ir3_instr_add_dep(struct ir3_instruction *instr,
6577ec681f3Smrg                       struct ir3_instruction *dep);
6587e102996Smayaconst char *ir3_instr_name(struct ir3_instruction *instr);
6597e102996Smaya
6607ec681f3Smrgstruct ir3_register *ir3_src_create(struct ir3_instruction *instr, int num,
6617ec681f3Smrg                                    int flags);
6627ec681f3Smrgstruct ir3_register *ir3_dst_create(struct ir3_instruction *instr, int num,
6637ec681f3Smrg                                    int flags);
6647ec681f3Smrgstruct ir3_register *ir3_reg_clone(struct ir3 *shader,
6657ec681f3Smrg                                   struct ir3_register *reg);
6667ec681f3Smrg
6677ec681f3Smrgstatic inline void
6687ec681f3Smrgir3_reg_tie(struct ir3_register *dst, struct ir3_register *src)
6697ec681f3Smrg{
6707ec681f3Smrg   assert(!dst->tied && !src->tied);
6717ec681f3Smrg   dst->tied = src;
6727ec681f3Smrg   src->tied = dst;
6737ec681f3Smrg}
6747ec681f3Smrg
6757ec681f3Smrgvoid ir3_reg_set_last_array(struct ir3_instruction *instr,
6767ec681f3Smrg                            struct ir3_register *reg,
6777ec681f3Smrg                            struct ir3_register *last_write);
6787e102996Smaya
6797e102996Smayavoid ir3_instr_set_address(struct ir3_instruction *instr,
6807ec681f3Smrg                           struct ir3_instruction *addr);
6817e102996Smaya
6827ec681f3Smrgstatic inline bool
6837ec681f3Smrgir3_instr_check_mark(struct ir3_instruction *instr)
6847e102996Smaya{
6857ec681f3Smrg   if (instr->flags & IR3_INSTR_MARK)
6867ec681f3Smrg      return true; /* already visited */
6877ec681f3Smrg   instr->flags |= IR3_INSTR_MARK;
6887ec681f3Smrg   return false;
6897e102996Smaya}
6907e102996Smaya
6917e102996Smayavoid ir3_block_clear_mark(struct ir3_block *block);
6927e102996Smayavoid ir3_clear_mark(struct ir3 *shader);
6937e102996Smaya
6947e102996Smayaunsigned ir3_count_instructions(struct ir3 *ir);
6957ec681f3Smrgunsigned ir3_count_instructions_ra(struct ir3 *ir);
6967e102996Smaya
6977ec681f3Smrg/**
6987ec681f3Smrg * Move 'instr' to just before 'after'
6997ec681f3Smrg */
7007ec681f3Smrgstatic inline void
7017ec681f3Smrgir3_instr_move_before(struct ir3_instruction *instr,
7027ec681f3Smrg                      struct ir3_instruction *after)
7037e102996Smaya{
7047ec681f3Smrg   list_delinit(&instr->node);
7057ec681f3Smrg   list_addtail(&instr->node, &after->node);
7067e102996Smaya}
7077e102996Smaya
7087ec681f3Smrg/**
7097ec681f3Smrg * Move 'instr' to just after 'before':
7107ec681f3Smrg */
7117ec681f3Smrgstatic inline void
7127ec681f3Smrgir3_instr_move_after(struct ir3_instruction *instr,
7137ec681f3Smrg                     struct ir3_instruction *before)
7147ec681f3Smrg{
7157ec681f3Smrg   list_delinit(&instr->node);
7167ec681f3Smrg   list_add(&instr->node, &before->node);
7177ec681f3Smrg}
7187e102996Smaya
7197ec681f3Smrg/**
7207ec681f3Smrg * Move 'instr' to the beginning of the block:
7217e102996Smaya */
7227ec681f3Smrgstatic inline void
7237ec681f3Smrgir3_instr_move_before_block(struct ir3_instruction *instr,
7247ec681f3Smrg                            struct ir3_block *block)
7257e102996Smaya{
7267ec681f3Smrg   list_delinit(&instr->node);
7277ec681f3Smrg   list_add(&instr->node, &block->instr_list);
7287e102996Smaya}
7297e102996Smaya
7307ec681f3Smrgvoid ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
7317ec681f3Smrg
7327ec681f3Smrgvoid ir3_set_dst_type(struct ir3_instruction *instr, bool half);
7337ec681f3Smrgvoid ir3_fixup_src_type(struct ir3_instruction *instr);
7347ec681f3Smrg
7357ec681f3Smrgint ir3_flut(struct ir3_register *src_reg);
7367ec681f3Smrg
7377ec681f3Smrgbool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
7387ec681f3Smrg
7397ec681f3Smrgbool ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed);
7407ec681f3Smrg
7417ec681f3Smrg#include "util/set.h"
7427ec681f3Smrg#define foreach_ssa_use(__use, __instr)                                        \
7437ec681f3Smrg   for (struct ir3_instruction *__use = (void *)~0; __use && (__instr)->uses;  \
7447ec681f3Smrg        __use = NULL)                                                          \
7457ec681f3Smrg      set_foreach ((__instr)->uses, __entry)                                   \
7467ec681f3Smrg         if ((__use = (void *)__entry->key))
7477ec681f3Smrg
7487ec681f3Smrgstatic inline uint32_t
7497ec681f3Smrgreg_num(const struct ir3_register *reg)
7507e102996Smaya{
7517ec681f3Smrg   return reg->num >> 2;
7527e102996Smaya}
7537e102996Smaya
7547ec681f3Smrgstatic inline uint32_t
7557ec681f3Smrgreg_comp(const struct ir3_register *reg)
7567e102996Smaya{
7577ec681f3Smrg   return reg->num & 0x3;
7587e102996Smaya}
7597e102996Smaya
7607ec681f3Smrgstatic inline bool
7617ec681f3Smrgis_flow(struct ir3_instruction *instr)
7627e102996Smaya{
7637ec681f3Smrg   return (opc_cat(instr->opc) == 0);
7647e102996Smaya}
7657e102996Smaya
7667ec681f3Smrgstatic inline bool
7677ec681f3Smrgis_kill_or_demote(struct ir3_instruction *instr)
7687e102996Smaya{
7697ec681f3Smrg   return instr->opc == OPC_KILL || instr->opc == OPC_DEMOTE;
7707e102996Smaya}
7717e102996Smaya
7727ec681f3Smrgstatic inline bool
7737ec681f3Smrgis_nop(struct ir3_instruction *instr)
7747ec681f3Smrg{
7757ec681f3Smrg   return instr->opc == OPC_NOP;
7767ec681f3Smrg}
7777ec681f3Smrg
7787ec681f3Smrgstatic inline bool
7797ec681f3Smrgis_same_type_reg(struct ir3_register *dst, struct ir3_register *src)
7807e102996Smaya{
7817ec681f3Smrg   unsigned dst_type = (dst->flags & IR3_REG_HALF);
7827ec681f3Smrg   unsigned src_type = (src->flags & IR3_REG_HALF);
7837ec681f3Smrg
7847ec681f3Smrg   /* Treat shared->normal copies as same-type, because they can generally be
7857ec681f3Smrg    * folded, but not normal->shared copies.
7867ec681f3Smrg    */
7877ec681f3Smrg   if (dst_type != src_type ||
7887ec681f3Smrg       ((dst->flags & IR3_REG_SHARED) && !(src->flags & IR3_REG_SHARED)))
7897ec681f3Smrg      return false;
7907ec681f3Smrg   else
7917ec681f3Smrg      return true;
7927e102996Smaya}
7937e102996Smaya
7947e102996Smaya/* Is it a non-transformative (ie. not type changing) mov?  This can
7957e102996Smaya * also include absneg.s/absneg.f, which for the most part can be
7967e102996Smaya * treated as a mov (single src argument).
7977e102996Smaya */
7987ec681f3Smrgstatic inline bool
7997ec681f3Smrgis_same_type_mov(struct ir3_instruction *instr)
8007e102996Smaya{
8017ec681f3Smrg   struct ir3_register *dst;
8027ec681f3Smrg
8037ec681f3Smrg   switch (instr->opc) {
8047ec681f3Smrg   case OPC_MOV:
8057ec681f3Smrg      if (instr->cat1.src_type != instr->cat1.dst_type)
8067ec681f3Smrg         return false;
8077ec681f3Smrg      /* If the type of dest reg and src reg are different,
8087ec681f3Smrg       * it shouldn't be considered as same type mov
8097ec681f3Smrg       */
8107ec681f3Smrg      if (!is_same_type_reg(instr->dsts[0], instr->srcs[0]))
8117ec681f3Smrg         return false;
8127ec681f3Smrg      break;
8137ec681f3Smrg   case OPC_ABSNEG_F:
8147ec681f3Smrg   case OPC_ABSNEG_S:
8157ec681f3Smrg      if (instr->flags & IR3_INSTR_SAT)
8167ec681f3Smrg         return false;
8177ec681f3Smrg      /* If the type of dest reg and src reg are different,
8187ec681f3Smrg       * it shouldn't be considered as same type mov
8197ec681f3Smrg       */
8207ec681f3Smrg      if (!is_same_type_reg(instr->dsts[0], instr->srcs[0]))
8217ec681f3Smrg         return false;
8227ec681f3Smrg      break;
8237ec681f3Smrg   case OPC_META_PHI:
8247ec681f3Smrg      return instr->srcs_count == 1;
8257ec681f3Smrg   default:
8267ec681f3Smrg      return false;
8277ec681f3Smrg   }
8287ec681f3Smrg
8297ec681f3Smrg   dst = instr->dsts[0];
8307ec681f3Smrg
8317ec681f3Smrg   /* mov's that write to a0 or p0.x are special: */
8327ec681f3Smrg   if (dst->num == regid(REG_P0, 0))
8337ec681f3Smrg      return false;
8347ec681f3Smrg   if (reg_num(dst) == REG_A0)
8357ec681f3Smrg      return false;
8367ec681f3Smrg
8377ec681f3Smrg   if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
8387ec681f3Smrg      return false;
8397ec681f3Smrg
8407ec681f3Smrg   return true;
8417ec681f3Smrg}
8427ec681f3Smrg
8437ec681f3Smrg/* A move from const, which changes size but not type, can also be
8447ec681f3Smrg * folded into dest instruction in some cases.
8457ec681f3Smrg */
8467ec681f3Smrgstatic inline bool
8477ec681f3Smrgis_const_mov(struct ir3_instruction *instr)
8487ec681f3Smrg{
8497ec681f3Smrg   if (instr->opc != OPC_MOV)
8507ec681f3Smrg      return false;
8517ec681f3Smrg
8527ec681f3Smrg   if (!(instr->srcs[0]->flags & IR3_REG_CONST))
8537ec681f3Smrg      return false;
8547e102996Smaya
8557ec681f3Smrg   type_t src_type = instr->cat1.src_type;
8567ec681f3Smrg   type_t dst_type = instr->cat1.dst_type;
8577e102996Smaya
8587ec681f3Smrg   return (type_float(src_type) && type_float(dst_type)) ||
8597ec681f3Smrg          (type_uint(src_type) && type_uint(dst_type)) ||
8607ec681f3Smrg          (type_sint(src_type) && type_sint(dst_type));
8617ec681f3Smrg}
8627e102996Smaya
8637ec681f3Smrgstatic inline bool
8647ec681f3Smrgis_alu(struct ir3_instruction *instr)
8657ec681f3Smrg{
8667ec681f3Smrg   return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
8677ec681f3Smrg}
8687e102996Smaya
8697ec681f3Smrgstatic inline bool
8707ec681f3Smrgis_sfu(struct ir3_instruction *instr)
8717ec681f3Smrg{
8727ec681f3Smrg   return (opc_cat(instr->opc) == 4);
8737ec681f3Smrg}
8747e102996Smaya
8757ec681f3Smrgstatic inline bool
8767ec681f3Smrgis_tex(struct ir3_instruction *instr)
8777ec681f3Smrg{
8787ec681f3Smrg   return (opc_cat(instr->opc) == 5);
8797e102996Smaya}
8807e102996Smaya
8817ec681f3Smrgstatic inline bool
8827ec681f3Smrgis_tex_or_prefetch(struct ir3_instruction *instr)
8837e102996Smaya{
8847ec681f3Smrg   return is_tex(instr) || (instr->opc == OPC_META_TEX_PREFETCH);
8857e102996Smaya}
8867e102996Smaya
8877ec681f3Smrgstatic inline bool
8887ec681f3Smrgis_mem(struct ir3_instruction *instr)
8897e102996Smaya{
8907ec681f3Smrg   return (opc_cat(instr->opc) == 6);
8917e102996Smaya}
8927e102996Smaya
8937ec681f3Smrgstatic inline bool
8947ec681f3Smrgis_barrier(struct ir3_instruction *instr)
8957e102996Smaya{
8967ec681f3Smrg   return (opc_cat(instr->opc) == 7);
8977e102996Smaya}
8987e102996Smaya
8997ec681f3Smrgstatic inline bool
9007ec681f3Smrgis_half(struct ir3_instruction *instr)
9017e102996Smaya{
9027ec681f3Smrg   return !!(instr->dsts[0]->flags & IR3_REG_HALF);
9037e102996Smaya}
9047e102996Smaya
9057ec681f3Smrgstatic inline bool
9067ec681f3Smrgis_shared(struct ir3_instruction *instr)
9077e102996Smaya{
9087ec681f3Smrg   return !!(instr->dsts[0]->flags & IR3_REG_SHARED);
9097e102996Smaya}
9107e102996Smaya
9117e102996Smayastatic inline bool
9127e102996Smayais_store(struct ir3_instruction *instr)
9137e102996Smaya{
9147ec681f3Smrg   /* these instructions, the "destination" register is
9157ec681f3Smrg    * actually a source, the address to store to.
9167ec681f3Smrg    */
9177ec681f3Smrg   switch (instr->opc) {
9187ec681f3Smrg   case OPC_STG:
9197ec681f3Smrg   case OPC_STG_A:
9207ec681f3Smrg   case OPC_STGB:
9217ec681f3Smrg   case OPC_STIB:
9227ec681f3Smrg   case OPC_STP:
9237ec681f3Smrg   case OPC_STL:
9247ec681f3Smrg   case OPC_STLW:
9257ec681f3Smrg   case OPC_L2G:
9267ec681f3Smrg   case OPC_G2L:
9277ec681f3Smrg      return true;
9287ec681f3Smrg   default:
9297ec681f3Smrg      return false;
9307ec681f3Smrg   }
9317e102996Smaya}
9327e102996Smaya
9337ec681f3Smrgstatic inline bool
9347ec681f3Smrgis_load(struct ir3_instruction *instr)
9357ec681f3Smrg{
9367ec681f3Smrg   switch (instr->opc) {
9377ec681f3Smrg   case OPC_LDG:
9387ec681f3Smrg   case OPC_LDG_A:
9397ec681f3Smrg   case OPC_LDGB:
9407ec681f3Smrg   case OPC_LDIB:
9417ec681f3Smrg   case OPC_LDL:
9427ec681f3Smrg   case OPC_LDP:
9437ec681f3Smrg   case OPC_L2G:
9447ec681f3Smrg   case OPC_LDLW:
9457ec681f3Smrg   case OPC_LDC:
9467ec681f3Smrg   case OPC_LDLV:
9477ec681f3Smrg      /* probably some others too.. */
9487ec681f3Smrg      return true;
9497ec681f3Smrg   default:
9507ec681f3Smrg      return false;
9517ec681f3Smrg   }
9527ec681f3Smrg}
9537ec681f3Smrg
9547ec681f3Smrgstatic inline bool
9557ec681f3Smrgis_input(struct ir3_instruction *instr)
9567ec681f3Smrg{
9577ec681f3Smrg   /* in some cases, ldlv is used to fetch varying without
9587ec681f3Smrg    * interpolation.. fortunately inloc is the first src
9597ec681f3Smrg    * register in either case
9607ec681f3Smrg    */
9617ec681f3Smrg   switch (instr->opc) {
9627ec681f3Smrg   case OPC_LDLV:
9637ec681f3Smrg   case OPC_BARY_F:
9647ec681f3Smrg      return true;
9657ec681f3Smrg   default:
9667ec681f3Smrg      return false;
9677ec681f3Smrg   }
9687ec681f3Smrg}
9697ec681f3Smrg
9707ec681f3Smrgstatic inline bool
9717ec681f3Smrgis_bool(struct ir3_instruction *instr)
9727e102996Smaya{
9737ec681f3Smrg   switch (instr->opc) {
9747ec681f3Smrg   case OPC_CMPS_F:
9757ec681f3Smrg   case OPC_CMPS_S:
9767ec681f3Smrg   case OPC_CMPS_U:
9777ec681f3Smrg      return true;
9787ec681f3Smrg   default:
9797ec681f3Smrg      return false;
9807ec681f3Smrg   }
9817e102996Smaya}
9827e102996Smaya
9837ec681f3Smrgstatic inline opc_t
9847ec681f3Smrgcat3_half_opc(opc_t opc)
9857e102996Smaya{
9867ec681f3Smrg   switch (opc) {
9877ec681f3Smrg   case OPC_MAD_F32:
9887ec681f3Smrg      return OPC_MAD_F16;
9897ec681f3Smrg   case OPC_SEL_B32:
9907ec681f3Smrg      return OPC_SEL_B16;
9917ec681f3Smrg   case OPC_SEL_S32:
9927ec681f3Smrg      return OPC_SEL_S16;
9937ec681f3Smrg   case OPC_SEL_F32:
9947ec681f3Smrg      return OPC_SEL_F16;
9957ec681f3Smrg   case OPC_SAD_S32:
9967ec681f3Smrg      return OPC_SAD_S16;
9977ec681f3Smrg   default:
9987ec681f3Smrg      return opc;
9997ec681f3Smrg   }
10007e102996Smaya}
10017e102996Smaya
10027ec681f3Smrgstatic inline opc_t
10037ec681f3Smrgcat3_full_opc(opc_t opc)
10047e102996Smaya{
10057ec681f3Smrg   switch (opc) {
10067ec681f3Smrg   case OPC_MAD_F16:
10077ec681f3Smrg      return OPC_MAD_F32;
10087ec681f3Smrg   case OPC_SEL_B16:
10097ec681f3Smrg      return OPC_SEL_B32;
10107ec681f3Smrg   case OPC_SEL_S16:
10117ec681f3Smrg      return OPC_SEL_S32;
10127ec681f3Smrg   case OPC_SEL_F16:
10137ec681f3Smrg      return OPC_SEL_F32;
10147ec681f3Smrg   case OPC_SAD_S16:
10157ec681f3Smrg      return OPC_SAD_S32;
10167ec681f3Smrg   default:
10177ec681f3Smrg      return opc;
10187ec681f3Smrg   }
10197e102996Smaya}
10207e102996Smaya
10217ec681f3Smrgstatic inline opc_t
10227ec681f3Smrgcat4_half_opc(opc_t opc)
10237e102996Smaya{
10247ec681f3Smrg   switch (opc) {
10257ec681f3Smrg   case OPC_RSQ:
10267ec681f3Smrg      return OPC_HRSQ;
10277ec681f3Smrg   case OPC_LOG2:
10287ec681f3Smrg      return OPC_HLOG2;
10297ec681f3Smrg   case OPC_EXP2:
10307ec681f3Smrg      return OPC_HEXP2;
10317ec681f3Smrg   default:
10327ec681f3Smrg      return opc;
10337ec681f3Smrg   }
10347e102996Smaya}
10357e102996Smaya
10367ec681f3Smrgstatic inline opc_t
10377ec681f3Smrgcat4_full_opc(opc_t opc)
10387e102996Smaya{
10397ec681f3Smrg   switch (opc) {
10407ec681f3Smrg   case OPC_HRSQ:
10417ec681f3Smrg      return OPC_RSQ;
10427ec681f3Smrg   case OPC_HLOG2:
10437ec681f3Smrg      return OPC_LOG2;
10447ec681f3Smrg   case OPC_HEXP2:
10457ec681f3Smrg      return OPC_EXP2;
10467ec681f3Smrg   default:
10477ec681f3Smrg      return opc;
10487ec681f3Smrg   }
10497ec681f3Smrg}
10507e102996Smaya
10517ec681f3Smrgstatic inline bool
10527ec681f3Smrgis_meta(struct ir3_instruction *instr)
10537ec681f3Smrg{
10547ec681f3Smrg   return (opc_cat(instr->opc) == -1);
10557e102996Smaya}
10567e102996Smaya
10577ec681f3Smrgstatic inline unsigned
10587ec681f3Smrgreg_elems(const struct ir3_register *reg)
10597e102996Smaya{
10607ec681f3Smrg   if (reg->flags & IR3_REG_ARRAY)
10617ec681f3Smrg      return reg->size;
10627ec681f3Smrg   else
10637ec681f3Smrg      return util_last_bit(reg->wrmask);
10647e102996Smaya}
10657e102996Smaya
10667ec681f3Smrgstatic inline unsigned
10677ec681f3Smrgreg_elem_size(const struct ir3_register *reg)
10687ec681f3Smrg{
10697ec681f3Smrg   return (reg->flags & IR3_REG_HALF) ? 1 : 2;
10707ec681f3Smrg}
10717ec681f3Smrg
10727ec681f3Smrgstatic inline unsigned
10737ec681f3Smrgreg_size(const struct ir3_register *reg)
10747ec681f3Smrg{
10757ec681f3Smrg   return reg_elems(reg) * reg_elem_size(reg);
10767ec681f3Smrg}
10777ec681f3Smrg
10787ec681f3Smrgstatic inline unsigned
10797ec681f3Smrgdest_regs(struct ir3_instruction *instr)
10807ec681f3Smrg{
10817ec681f3Smrg   if (instr->dsts_count == 0)
10827ec681f3Smrg      return 0;
10837ec681f3Smrg
10847ec681f3Smrg   debug_assert(instr->dsts_count == 1);
10857ec681f3Smrg   return util_last_bit(instr->dsts[0]->wrmask);
10867ec681f3Smrg}
10877ec681f3Smrg
10887ec681f3Smrg/* is dst a normal temp register: */
10897ec681f3Smrgstatic inline bool
10907ec681f3Smrgis_dest_gpr(struct ir3_register *dst)
10917ec681f3Smrg{
10927ec681f3Smrg   if (dst->wrmask == 0)
10937ec681f3Smrg      return false;
10947ec681f3Smrg   if ((reg_num(dst) == REG_A0) || (dst->num == regid(REG_P0, 0)))
10957ec681f3Smrg      return false;
10967ec681f3Smrg   return true;
10977ec681f3Smrg}
10987ec681f3Smrg
10997ec681f3Smrgstatic inline bool
11007ec681f3Smrgwrites_gpr(struct ir3_instruction *instr)
11017e102996Smaya{
11027ec681f3Smrg   if (dest_regs(instr) == 0)
11037ec681f3Smrg      return false;
11047ec681f3Smrg   return is_dest_gpr(instr->dsts[0]);
11057ec681f3Smrg}
11067ec681f3Smrg
11077ec681f3Smrgstatic inline bool
11087ec681f3Smrgwrites_addr0(struct ir3_instruction *instr)
11097ec681f3Smrg{
11107ec681f3Smrg   /* Note: only the first dest can write to a0.x */
11117ec681f3Smrg   if (instr->dsts_count > 0) {
11127ec681f3Smrg      struct ir3_register *dst = instr->dsts[0];
11137ec681f3Smrg      return dst->num == regid(REG_A0, 0);
11147ec681f3Smrg   }
11157ec681f3Smrg   return false;
11167ec681f3Smrg}
11177ec681f3Smrg
11187ec681f3Smrgstatic inline bool
11197ec681f3Smrgwrites_addr1(struct ir3_instruction *instr)
11207ec681f3Smrg{
11217ec681f3Smrg   /* Note: only the first dest can write to a1.x */
11227ec681f3Smrg   if (instr->dsts_count > 0) {
11237ec681f3Smrg      struct ir3_register *dst = instr->dsts[0];
11247ec681f3Smrg      return dst->num == regid(REG_A0, 1);
11257ec681f3Smrg   }
11267ec681f3Smrg   return false;
11277ec681f3Smrg}
11287ec681f3Smrg
11297ec681f3Smrgstatic inline bool
11307ec681f3Smrgwrites_pred(struct ir3_instruction *instr)
11317ec681f3Smrg{
11327ec681f3Smrg   /* Note: only the first dest can write to p0.x */
11337ec681f3Smrg   if (instr->dsts_count > 0) {
11347ec681f3Smrg      struct ir3_register *dst = instr->dsts[0];
11357ec681f3Smrg      return reg_num(dst) == REG_P0;
11367ec681f3Smrg   }
11377ec681f3Smrg   return false;
11387ec681f3Smrg}
11397ec681f3Smrg
11407ec681f3Smrg/* Is it something other than a normal register. Shared regs, p0, and a0/a1
11417ec681f3Smrg * are considered special here. Special registers are always accessed with one
11427ec681f3Smrg * size and never alias normal registers, even though a naive calculation
11437ec681f3Smrg * would sometimes make it seem like e.g. r30.z aliases a0.x.
11447ec681f3Smrg */
11457ec681f3Smrgstatic inline bool
11467ec681f3Smrgis_reg_special(const struct ir3_register *reg)
11477ec681f3Smrg{
11487ec681f3Smrg   return (reg->flags & IR3_REG_SHARED) || (reg_num(reg) == REG_A0) ||
11497ec681f3Smrg          (reg_num(reg) == REG_P0);
11507ec681f3Smrg}
11517ec681f3Smrg
11527ec681f3Smrg/* Same as above but in cases where we don't have a register. r48.x and above
11537ec681f3Smrg * are shared/special.
11547ec681f3Smrg */
11557ec681f3Smrgstatic inline bool
11567ec681f3Smrgis_reg_num_special(unsigned num)
11577ec681f3Smrg{
11587ec681f3Smrg   return num >= 48 * 4;
11597e102996Smaya}
11607e102996Smaya
11617e102996Smaya/* returns defining instruction for reg */
11627e102996Smaya/* TODO better name */
11637ec681f3Smrgstatic inline struct ir3_instruction *
11647ec681f3Smrgssa(struct ir3_register *reg)
11657e102996Smaya{
11667ec681f3Smrg   if ((reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) && reg->def)
11677ec681f3Smrg      return reg->def->instr;
11687ec681f3Smrg   return NULL;
11697e102996Smaya}
11707e102996Smaya
11717ec681f3Smrgstatic inline bool
11727ec681f3Smrgconflicts(struct ir3_register *a, struct ir3_register *b)
11737ec681f3Smrg{
11747ec681f3Smrg   return (a && b) && (a->def != b->def);
11757ec681f3Smrg}
11767ec681f3Smrg
11777ec681f3Smrgstatic inline bool
11787ec681f3Smrgreg_gpr(struct ir3_register *r)
11797e102996Smaya{
11807ec681f3Smrg   if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
11817ec681f3Smrg      return false;
11827ec681f3Smrg   if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
11837ec681f3Smrg      return false;
11847ec681f3Smrg   return true;
11857e102996Smaya}
11867e102996Smaya
11877ec681f3Smrgstatic inline type_t
11887ec681f3Smrghalf_type(type_t type)
11897e102996Smaya{
11907ec681f3Smrg   switch (type) {
11917ec681f3Smrg   case TYPE_F32:
11927ec681f3Smrg      return TYPE_F16;
11937ec681f3Smrg   case TYPE_U32:
11947ec681f3Smrg      return TYPE_U16;
11957ec681f3Smrg   case TYPE_S32:
11967ec681f3Smrg      return TYPE_S16;
11977ec681f3Smrg   case TYPE_F16:
11987ec681f3Smrg   case TYPE_U16:
11997ec681f3Smrg   case TYPE_S16:
12007ec681f3Smrg      return type;
12017ec681f3Smrg   default:
12027ec681f3Smrg      assert(0);
12037ec681f3Smrg      return ~0;
12047ec681f3Smrg   }
12057e102996Smaya}
12067e102996Smaya
12077ec681f3Smrgstatic inline type_t
12087ec681f3Smrgfull_type(type_t type)
12097e102996Smaya{
12107ec681f3Smrg   switch (type) {
12117ec681f3Smrg   case TYPE_F16:
12127ec681f3Smrg      return TYPE_F32;
12137ec681f3Smrg   case TYPE_U16:
12147ec681f3Smrg      return TYPE_U32;
12157ec681f3Smrg   case TYPE_S16:
12167ec681f3Smrg      return TYPE_S32;
12177ec681f3Smrg   case TYPE_F32:
12187ec681f3Smrg   case TYPE_U32:
12197ec681f3Smrg   case TYPE_S32:
12207ec681f3Smrg      return type;
12217ec681f3Smrg   default:
12227ec681f3Smrg      assert(0);
12237ec681f3Smrg      return ~0;
12247ec681f3Smrg   }
12257e102996Smaya}
12267e102996Smaya
12277e102996Smaya/* some cat2 instructions (ie. those which are not float) can embed an
12287e102996Smaya * immediate:
12297e102996Smaya */
12307ec681f3Smrgstatic inline bool
12317ec681f3Smrgir3_cat2_int(opc_t opc)
12327ec681f3Smrg{
12337ec681f3Smrg   switch (opc) {
12347ec681f3Smrg   case OPC_ADD_U:
12357ec681f3Smrg   case OPC_ADD_S:
12367ec681f3Smrg   case OPC_SUB_U:
12377ec681f3Smrg   case OPC_SUB_S:
12387ec681f3Smrg   case OPC_CMPS_U:
12397ec681f3Smrg   case OPC_CMPS_S:
12407ec681f3Smrg   case OPC_MIN_U:
12417ec681f3Smrg   case OPC_MIN_S:
12427ec681f3Smrg   case OPC_MAX_U:
12437ec681f3Smrg   case OPC_MAX_S:
12447ec681f3Smrg   case OPC_CMPV_U:
12457ec681f3Smrg   case OPC_CMPV_S:
12467ec681f3Smrg   case OPC_MUL_U24:
12477ec681f3Smrg   case OPC_MUL_S24:
12487ec681f3Smrg   case OPC_MULL_U:
12497ec681f3Smrg   case OPC_CLZ_S:
12507ec681f3Smrg   case OPC_ABSNEG_S:
12517ec681f3Smrg   case OPC_AND_B:
12527ec681f3Smrg   case OPC_OR_B:
12537ec681f3Smrg   case OPC_NOT_B:
12547ec681f3Smrg   case OPC_XOR_B:
12557ec681f3Smrg   case OPC_BFREV_B:
12567ec681f3Smrg   case OPC_CLZ_B:
12577ec681f3Smrg   case OPC_SHL_B:
12587ec681f3Smrg   case OPC_SHR_B:
12597ec681f3Smrg   case OPC_ASHR_B:
12607ec681f3Smrg   case OPC_MGEN_B:
12617ec681f3Smrg   case OPC_GETBIT_B:
12627ec681f3Smrg   case OPC_CBITS_B:
12637ec681f3Smrg   case OPC_BARY_F:
12647ec681f3Smrg      return true;
12657ec681f3Smrg
12667ec681f3Smrg   default:
12677ec681f3Smrg      return false;
12687ec681f3Smrg   }
12697e102996Smaya}
12707e102996Smaya
12717e102996Smaya/* map cat2 instruction to valid abs/neg flags: */
12727ec681f3Smrgstatic inline unsigned
12737ec681f3Smrgir3_cat2_absneg(opc_t opc)
12747ec681f3Smrg{
12757ec681f3Smrg   switch (opc) {
12767ec681f3Smrg   case OPC_ADD_F:
12777ec681f3Smrg   case OPC_MIN_F:
12787ec681f3Smrg   case OPC_MAX_F:
12797ec681f3Smrg   case OPC_MUL_F:
12807ec681f3Smrg   case OPC_SIGN_F:
12817ec681f3Smrg   case OPC_CMPS_F:
12827ec681f3Smrg   case OPC_ABSNEG_F:
12837ec681f3Smrg   case OPC_CMPV_F:
12847ec681f3Smrg   case OPC_FLOOR_F:
12857ec681f3Smrg   case OPC_CEIL_F:
12867ec681f3Smrg   case OPC_RNDNE_F:
12877ec681f3Smrg   case OPC_RNDAZ_F:
12887ec681f3Smrg   case OPC_TRUNC_F:
12897ec681f3Smrg   case OPC_BARY_F:
12907ec681f3Smrg      return IR3_REG_FABS | IR3_REG_FNEG;
12917ec681f3Smrg
12927ec681f3Smrg   case OPC_ADD_U:
12937ec681f3Smrg   case OPC_ADD_S:
12947ec681f3Smrg   case OPC_SUB_U:
12957ec681f3Smrg   case OPC_SUB_S:
12967ec681f3Smrg   case OPC_CMPS_U:
12977ec681f3Smrg   case OPC_CMPS_S:
12987ec681f3Smrg   case OPC_MIN_U:
12997ec681f3Smrg   case OPC_MIN_S:
13007ec681f3Smrg   case OPC_MAX_U:
13017ec681f3Smrg   case OPC_MAX_S:
13027ec681f3Smrg   case OPC_CMPV_U:
13037ec681f3Smrg   case OPC_CMPV_S:
13047ec681f3Smrg   case OPC_MUL_U24:
13057ec681f3Smrg   case OPC_MUL_S24:
13067ec681f3Smrg   case OPC_MULL_U:
13077ec681f3Smrg   case OPC_CLZ_S:
13087ec681f3Smrg      return 0;
13097ec681f3Smrg
13107ec681f3Smrg   case OPC_ABSNEG_S:
13117ec681f3Smrg      return IR3_REG_SABS | IR3_REG_SNEG;
13127ec681f3Smrg
13137ec681f3Smrg   case OPC_AND_B:
13147ec681f3Smrg   case OPC_OR_B:
13157ec681f3Smrg   case OPC_NOT_B:
13167ec681f3Smrg   case OPC_XOR_B:
13177ec681f3Smrg   case OPC_BFREV_B:
13187ec681f3Smrg   case OPC_CLZ_B:
13197ec681f3Smrg   case OPC_SHL_B:
13207ec681f3Smrg   case OPC_SHR_B:
13217ec681f3Smrg   case OPC_ASHR_B:
13227ec681f3Smrg   case OPC_MGEN_B:
13237ec681f3Smrg   case OPC_GETBIT_B:
13247ec681f3Smrg   case OPC_CBITS_B:
13257ec681f3Smrg      return IR3_REG_BNOT;
13267ec681f3Smrg
13277ec681f3Smrg   default:
13287ec681f3Smrg      return 0;
13297ec681f3Smrg   }
13307e102996Smaya}
13317e102996Smaya
13327e102996Smaya/* map cat3 instructions to valid abs/neg flags: */
13337ec681f3Smrgstatic inline unsigned
13347ec681f3Smrgir3_cat3_absneg(opc_t opc)
13357ec681f3Smrg{
13367ec681f3Smrg   switch (opc) {
13377ec681f3Smrg   case OPC_MAD_F16:
13387ec681f3Smrg   case OPC_MAD_F32:
13397ec681f3Smrg   case OPC_SEL_F16:
13407ec681f3Smrg   case OPC_SEL_F32:
13417ec681f3Smrg      return IR3_REG_FNEG;
13427ec681f3Smrg
13437ec681f3Smrg   case OPC_MAD_U16:
13447ec681f3Smrg   case OPC_MADSH_U16:
13457ec681f3Smrg   case OPC_MAD_S16:
13467ec681f3Smrg   case OPC_MADSH_M16:
13477ec681f3Smrg   case OPC_MAD_U24:
13487ec681f3Smrg   case OPC_MAD_S24:
13497ec681f3Smrg   case OPC_SEL_S16:
13507ec681f3Smrg   case OPC_SEL_S32:
13517ec681f3Smrg   case OPC_SAD_S16:
13527ec681f3Smrg   case OPC_SAD_S32:
13537ec681f3Smrg      /* neg *may* work on 3rd src.. */
13547ec681f3Smrg
13557ec681f3Smrg   case OPC_SEL_B16:
13567ec681f3Smrg   case OPC_SEL_B32:
13577ec681f3Smrg
13587ec681f3Smrg   case OPC_SHLG_B16:
13597ec681f3Smrg
13607ec681f3Smrg   default:
13617ec681f3Smrg      return 0;
13627ec681f3Smrg   }
13637ec681f3Smrg}
13647ec681f3Smrg
13657ec681f3Smrg/* Return the type (float, int, or uint) the op uses when converting from the
13667ec681f3Smrg * internal result of the op (which is assumed to be the same size as the
13677ec681f3Smrg * sources) to the destination when they are not the same size. If F32 it does
13687ec681f3Smrg * a floating-point conversion, if U32 it does a truncation/zero-extension, if
13697ec681f3Smrg * S32 it does a truncation/sign-extension. "can_fold" will be false if it
13707ec681f3Smrg * doesn't do anything sensible or is unknown.
13717ec681f3Smrg */
13727ec681f3Smrgstatic inline type_t
13737ec681f3Smrgir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
13747ec681f3Smrg{
13757ec681f3Smrg   *can_fold = true;
13767ec681f3Smrg   switch (instr->opc) {
13777ec681f3Smrg   case OPC_ADD_F:
13787ec681f3Smrg   case OPC_MUL_F:
13797ec681f3Smrg   case OPC_BARY_F:
13807ec681f3Smrg   case OPC_MAD_F32:
13817ec681f3Smrg   case OPC_MAD_F16:
13827ec681f3Smrg      return TYPE_F32;
13837ec681f3Smrg
13847ec681f3Smrg   case OPC_ADD_U:
13857ec681f3Smrg   case OPC_SUB_U:
13867ec681f3Smrg   case OPC_MIN_U:
13877ec681f3Smrg   case OPC_MAX_U:
13887ec681f3Smrg   case OPC_AND_B:
13897ec681f3Smrg   case OPC_OR_B:
13907ec681f3Smrg   case OPC_NOT_B:
13917ec681f3Smrg   case OPC_XOR_B:
13927ec681f3Smrg   case OPC_MUL_U24:
13937ec681f3Smrg   case OPC_MULL_U:
13947ec681f3Smrg   case OPC_SHL_B:
13957ec681f3Smrg   case OPC_SHR_B:
13967ec681f3Smrg   case OPC_ASHR_B:
13977ec681f3Smrg   case OPC_MAD_U24:
13987ec681f3Smrg   /* Comparison ops zero-extend/truncate their results, so consider them as
13997ec681f3Smrg    * unsigned here.
14007ec681f3Smrg    */
14017ec681f3Smrg   case OPC_CMPS_F:
14027ec681f3Smrg   case OPC_CMPV_F:
14037ec681f3Smrg   case OPC_CMPS_U:
14047ec681f3Smrg   case OPC_CMPS_S:
14057ec681f3Smrg      return TYPE_U32;
14067ec681f3Smrg
14077ec681f3Smrg   case OPC_ADD_S:
14087ec681f3Smrg   case OPC_SUB_S:
14097ec681f3Smrg   case OPC_MIN_S:
14107ec681f3Smrg   case OPC_MAX_S:
14117ec681f3Smrg   case OPC_ABSNEG_S:
14127ec681f3Smrg   case OPC_MUL_S24:
14137ec681f3Smrg   case OPC_MAD_S24:
14147ec681f3Smrg      return TYPE_S32;
14157ec681f3Smrg
14167ec681f3Smrg   /* We assume that any move->move folding that could be done was done by
14177ec681f3Smrg    * NIR.
14187ec681f3Smrg    */
14197ec681f3Smrg   case OPC_MOV:
14207ec681f3Smrg   default:
14217ec681f3Smrg      *can_fold = false;
14227ec681f3Smrg      return TYPE_U32;
14237ec681f3Smrg   }
14247ec681f3Smrg}
14257ec681f3Smrg
14267ec681f3Smrg/* Return the src and dst types for the conversion which is already folded
14277ec681f3Smrg * into the op. We can assume that instr has folded in a conversion from
14287ec681f3Smrg * ir3_output_conv_src_type() to ir3_output_conv_dst_type(). Only makes sense
14297ec681f3Smrg * to call if ir3_output_conv_type() returns can_fold = true.
14307ec681f3Smrg */
14317ec681f3Smrgstatic inline type_t
14327ec681f3Smrgir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type)
14337ec681f3Smrg{
14347ec681f3Smrg   switch (instr->opc) {
14357ec681f3Smrg   case OPC_CMPS_F:
14367ec681f3Smrg   case OPC_CMPV_F:
14377ec681f3Smrg   case OPC_CMPS_U:
14387ec681f3Smrg   case OPC_CMPS_S:
14397ec681f3Smrg      /* Comparisons only return 0/1 and the size of the comparison sources
14407ec681f3Smrg       * is irrelevant, never consider them as having an output conversion
14417ec681f3Smrg       * by returning a type with the dest size here:
14427ec681f3Smrg       */
14437ec681f3Smrg      return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type)
14447ec681f3Smrg                                                    : full_type(base_type);
14457ec681f3Smrg
14467ec681f3Smrg   case OPC_BARY_F:
14477ec681f3Smrg      /* bary.f doesn't have an explicit source, but we can assume here that
14487ec681f3Smrg       * the varying data it reads is in fp32.
14497ec681f3Smrg       *
14507ec681f3Smrg       * This may be fp16 on older gen's depending on some register
14517ec681f3Smrg       * settings, but it's probably not worth plumbing that through for a
14527ec681f3Smrg       * small improvement that NIR would hopefully handle for us anyway.
14537ec681f3Smrg       */
14547ec681f3Smrg      return TYPE_F32;
14557ec681f3Smrg
14567ec681f3Smrg   default:
14577ec681f3Smrg      return (instr->srcs[0]->flags & IR3_REG_HALF) ? half_type(base_type)
14587ec681f3Smrg                                                    : full_type(base_type);
14597ec681f3Smrg   }
14607ec681f3Smrg}
14617ec681f3Smrg
14627ec681f3Smrgstatic inline type_t
14637ec681f3Smrgir3_output_conv_dst_type(struct ir3_instruction *instr, type_t base_type)
14647ec681f3Smrg{
14657ec681f3Smrg   return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type)
14667ec681f3Smrg                                                 : full_type(base_type);
14677ec681f3Smrg}
14687ec681f3Smrg
14697ec681f3Smrg/* Some instructions have signed/unsigned variants which are identical except
14707ec681f3Smrg * for whether the folded conversion sign-extends or zero-extends, and we can
14717ec681f3Smrg * fold in a mismatching move by rewriting the opcode. Return the opcode to
14727ec681f3Smrg * switch signedness, and whether one exists.
14737ec681f3Smrg */
14747ec681f3Smrgstatic inline opc_t
14757ec681f3Smrgir3_try_swap_signedness(opc_t opc, bool *can_swap)
14767ec681f3Smrg{
14777ec681f3Smrg   switch (opc) {
14787ec681f3Smrg#define PAIR(u, s)                                                             \
14797ec681f3Smrg   case OPC_##u:                                                               \
14807ec681f3Smrg      return OPC_##s;                                                          \
14817ec681f3Smrg   case OPC_##s:                                                               \
14827ec681f3Smrg      return OPC_##u;
14837ec681f3Smrg      PAIR(ADD_U, ADD_S)
14847ec681f3Smrg      PAIR(SUB_U, SUB_S)
14857ec681f3Smrg      /* Note: these are only identical when the sources are half, but that's
14867ec681f3Smrg       * the only case we call this function for anyway.
14877ec681f3Smrg       */
14887ec681f3Smrg      PAIR(MUL_U24, MUL_S24)
14897ec681f3Smrg
14907ec681f3Smrg   default:
14917ec681f3Smrg      *can_swap = false;
14927ec681f3Smrg      return opc;
14937ec681f3Smrg   }
14947e102996Smaya}
14957e102996Smaya
14967e102996Smaya#define MASK(n) ((1 << (n)) - 1)
14977e102996Smaya
14987e102996Smaya/* iterator for an instructions's sources (reg), also returns src #: */
14997ec681f3Smrg#define foreach_src_n(__srcreg, __n, __instr)                                  \
15007ec681f3Smrg   if ((__instr)->srcs_count)                                                  \
15017ec681f3Smrg      for (struct ir3_register *__srcreg = (void *)~0; __srcreg;               \
15027ec681f3Smrg           __srcreg = NULL)                                                    \
15037ec681f3Smrg         for (unsigned __cnt = (__instr)->srcs_count, __n = 0; __n < __cnt;    \
15047ec681f3Smrg              __n++)                                                           \
15057ec681f3Smrg            if ((__srcreg = (__instr)->srcs[__n]))
15067e102996Smaya
15077e102996Smaya/* iterator for an instructions's sources (reg): */
15087ec681f3Smrg#define foreach_src(__srcreg, __instr) foreach_src_n (__srcreg, __i, __instr)
15097ec681f3Smrg
15107ec681f3Smrg/* iterator for an instructions's destinations (reg), also returns dst #: */
15117ec681f3Smrg#define foreach_dst_n(__dstreg, __n, __instr)                                  \
15127ec681f3Smrg   if ((__instr)->dsts_count)                                                  \
15137ec681f3Smrg      for (struct ir3_register *__dstreg = (void *)~0; __dstreg;               \
15147ec681f3Smrg           __dstreg = NULL)                                                    \
15157ec681f3Smrg         for (unsigned __cnt = (__instr)->dsts_count, __n = 0; __n < __cnt;    \
15167ec681f3Smrg              __n++)                                                           \
15177ec681f3Smrg            if ((__dstreg = (__instr)->dsts[__n]))
15187ec681f3Smrg
15197ec681f3Smrg/* iterator for an instructions's destinations (reg): */
15207ec681f3Smrg#define foreach_dst(__dstreg, __instr) foreach_dst_n (__dstreg, __i, __instr)
15217ec681f3Smrg
15227ec681f3Smrgstatic inline unsigned
15237ec681f3Smrg__ssa_src_cnt(struct ir3_instruction *instr)
15247e102996Smaya{
15257ec681f3Smrg   return instr->srcs_count + instr->deps_count;
15267e102996Smaya}
15277e102996Smaya
15287ec681f3Smrgstatic inline bool
15297ec681f3Smrg__is_false_dep(struct ir3_instruction *instr, unsigned n)
15307e102996Smaya{
15317ec681f3Smrg   if (n >= instr->srcs_count)
15327ec681f3Smrg      return true;
15337ec681f3Smrg   return false;
15347e102996Smaya}
15357e102996Smaya
15367ec681f3Smrgstatic inline struct ir3_instruction **
15377ec681f3Smrg__ssa_srcp_n(struct ir3_instruction *instr, unsigned n)
15387e102996Smaya{
15397ec681f3Smrg   if (__is_false_dep(instr, n))
15407ec681f3Smrg      return &instr->deps[n - instr->srcs_count];
15417ec681f3Smrg   if (ssa(instr->srcs[n]))
15427ec681f3Smrg      return &instr->srcs[n]->def->instr;
15437ec681f3Smrg   return NULL;
15447e102996Smaya}
15457e102996Smaya
15467ec681f3Smrg#define foreach_ssa_srcp_n(__srcp, __n, __instr)                               \
15477ec681f3Smrg   for (struct ir3_instruction **__srcp = (void *)~0; __srcp; __srcp = NULL)   \
15487ec681f3Smrg      for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt;      \
15497ec681f3Smrg           __n++)                                                              \
15507ec681f3Smrg         if ((__srcp = __ssa_srcp_n(__instr, __n)))
15517ec681f3Smrg
15527ec681f3Smrg#define foreach_ssa_srcp(__srcp, __instr)                                      \
15537ec681f3Smrg   foreach_ssa_srcp_n (__srcp, __i, __instr)
15547e102996Smaya
15557e102996Smaya/* iterator for an instruction's SSA sources (instr), also returns src #: */
15567ec681f3Smrg#define foreach_ssa_src_n(__srcinst, __n, __instr)                             \
15577ec681f3Smrg   for (struct ir3_instruction *__srcinst = (void *)~0; __srcinst;             \
15587ec681f3Smrg        __srcinst = NULL)                                                      \
15597ec681f3Smrg      foreach_ssa_srcp_n (__srcp, __n, __instr)                                \
15607ec681f3Smrg         if ((__srcinst = *__srcp))
15617e102996Smaya
15627e102996Smaya/* iterator for an instruction's SSA sources (instr): */
15637ec681f3Smrg#define foreach_ssa_src(__srcinst, __instr)                                    \
15647ec681f3Smrg   foreach_ssa_src_n (__srcinst, __i, __instr)
15657ec681f3Smrg
15667ec681f3Smrg/* iterators for shader inputs: */
15677ec681f3Smrg#define foreach_input_n(__ininstr, __cnt, __ir)                                \
15687ec681f3Smrg   for (struct ir3_instruction *__ininstr = (void *)~0; __ininstr;             \
15697ec681f3Smrg        __ininstr = NULL)                                                      \
15707ec681f3Smrg      for (unsigned __cnt = 0; __cnt < (__ir)->inputs_count; __cnt++)          \
15717ec681f3Smrg         if ((__ininstr = (__ir)->inputs[__cnt]))
15727ec681f3Smrg#define foreach_input(__ininstr, __ir) foreach_input_n (__ininstr, __i, __ir)
15737ec681f3Smrg
15747ec681f3Smrg/* iterators for instructions: */
15757ec681f3Smrg#define foreach_instr(__instr, __list)                                         \
15767ec681f3Smrg   list_for_each_entry (struct ir3_instruction, __instr, __list, node)
15777ec681f3Smrg#define foreach_instr_rev(__instr, __list)                                     \
15787ec681f3Smrg   list_for_each_entry_rev (struct ir3_instruction, __instr, __list, node)
15797ec681f3Smrg#define foreach_instr_safe(__instr, __list)                                    \
15807ec681f3Smrg   list_for_each_entry_safe (struct ir3_instruction, __instr, __list, node)
15817ec681f3Smrg#define foreach_instr_from_safe(__instr, __start, __list)                      \
15827ec681f3Smrg   list_for_each_entry_from_safe(struct ir3_instruction, __instr, __start,     \
15837ec681f3Smrg                                 __list, node)
15847ec681f3Smrg
15857ec681f3Smrg/* iterators for blocks: */
15867ec681f3Smrg#define foreach_block(__block, __list)                                         \
15877ec681f3Smrg   list_for_each_entry (struct ir3_block, __block, __list, node)
15887ec681f3Smrg#define foreach_block_safe(__block, __list)                                    \
15897ec681f3Smrg   list_for_each_entry_safe (struct ir3_block, __block, __list, node)
15907ec681f3Smrg#define foreach_block_rev(__block, __list)                                     \
15917ec681f3Smrg   list_for_each_entry_rev (struct ir3_block, __block, __list, node)
15927ec681f3Smrg
15937ec681f3Smrg/* iterators for arrays: */
15947ec681f3Smrg#define foreach_array(__array, __list)                                         \
15957ec681f3Smrg   list_for_each_entry (struct ir3_array, __array, __list, node)
15967ec681f3Smrg#define foreach_array_safe(__array, __list)                                    \
15977ec681f3Smrg   list_for_each_entry_safe (struct ir3_array, __array, __list, node)
15987ec681f3Smrg
15997ec681f3Smrg#define IR3_PASS(ir, pass, ...)                                                \
16007ec681f3Smrg   ({                                                                          \
16017ec681f3Smrg      bool progress = pass(ir, ##__VA_ARGS__);                                 \
16027ec681f3Smrg      if (progress) {                                                          \
16037ec681f3Smrg         ir3_debug_print(ir, "AFTER: " #pass);                                 \
16047ec681f3Smrg         ir3_validate(ir);                                                     \
16057ec681f3Smrg      }                                                                        \
16067ec681f3Smrg      progress;                                                                \
16077ec681f3Smrg   })
16087ec681f3Smrg
16097ec681f3Smrg/* validate: */
16107ec681f3Smrgvoid ir3_validate(struct ir3 *ir);
16117e102996Smaya
16127e102996Smaya/* dump: */
16137e102996Smayavoid ir3_print(struct ir3 *ir);
16147e102996Smayavoid ir3_print_instr(struct ir3_instruction *instr);
16157e102996Smaya
16167ec681f3Smrgstruct log_stream;
16177ec681f3Smrgvoid ir3_print_instr_stream(struct log_stream *stream, struct ir3_instruction *instr);
16187ec681f3Smrg
16197ec681f3Smrg/* delay calculation: */
16207e102996Smayaint ir3_delayslots(struct ir3_instruction *assigner,
16217ec681f3Smrg                   struct ir3_instruction *consumer, unsigned n, bool soft);
16227ec681f3Smrgunsigned ir3_delay_calc_prera(struct ir3_block *block,
16237ec681f3Smrg                              struct ir3_instruction *instr);
16247ec681f3Smrgunsigned ir3_delay_calc_postra(struct ir3_block *block,
16257ec681f3Smrg                               struct ir3_instruction *instr, bool soft,
16267ec681f3Smrg                               bool mergedregs);
16277ec681f3Smrgunsigned ir3_delay_calc_exact(struct ir3_block *block,
16287ec681f3Smrg                              struct ir3_instruction *instr, bool mergedregs);
16297ec681f3Smrgvoid ir3_remove_nops(struct ir3 *ir);
16307ec681f3Smrg
16317ec681f3Smrg/* unreachable block elimination: */
16327ec681f3Smrgbool ir3_remove_unreachable(struct ir3 *ir);
16337ec681f3Smrg
16347ec681f3Smrg/* dead code elimination: */
16357ec681f3Smrgstruct ir3_shader_variant;
16367ec681f3Smrgbool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so);
16377ec681f3Smrg
16387ec681f3Smrg/* fp16 conversion folding */
16397ec681f3Smrgbool ir3_cf(struct ir3 *ir);
16407e102996Smaya
16417e102996Smaya/* copy-propagate: */
16427ec681f3Smrgbool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
16437ec681f3Smrgbool ir3_cp_postsched(struct ir3 *ir);
16447e102996Smaya
16457ec681f3Smrg/* common subexpression elimination: */
16467ec681f3Smrgbool ir3_cse(struct ir3 *ir);
16477e102996Smaya
16487ec681f3Smrg/* Make arrays SSA */
16497ec681f3Smrgbool ir3_array_to_ssa(struct ir3 *ir);
16507e102996Smaya
16517e102996Smaya/* scheduling: */
16527ec681f3Smrgbool ir3_sched_add_deps(struct ir3 *ir);
16537e102996Smayaint ir3_sched(struct ir3 *ir);
16547e102996Smaya
16557ec681f3Smrgstruct ir3_context;
16567ec681f3Smrgbool ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v);
16577e102996Smaya
16587e102996Smaya/* register assignment: */
16597ec681f3Smrgint ir3_ra(struct ir3_shader_variant *v);
16607ec681f3Smrg
16617ec681f3Smrg/* lower subgroup ops: */
16627ec681f3Smrgbool ir3_lower_subgroups(struct ir3 *ir);
16637e102996Smaya
16647e102996Smaya/* legalize: */
16657ec681f3Smrgbool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
16667ec681f3Smrg
16677ec681f3Smrgstatic inline bool
16687ec681f3Smrgir3_has_latency_to_hide(struct ir3 *ir)
16697ec681f3Smrg{
16707ec681f3Smrg   /* VS/GS/TCS/TESS  co-exist with frag shader invocations, but we don't
16717ec681f3Smrg    * know the nature of the fragment shader.  Just assume it will have
16727ec681f3Smrg    * latency to hide:
16737ec681f3Smrg    */
16747ec681f3Smrg   if (ir->type != MESA_SHADER_FRAGMENT)
16757ec681f3Smrg      return true;
16767ec681f3Smrg
16777ec681f3Smrg   foreach_block (block, &ir->block_list) {
16787ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
16797ec681f3Smrg         if (is_tex_or_prefetch(instr))
16807ec681f3Smrg            return true;
16817ec681f3Smrg
16827ec681f3Smrg         if (is_load(instr)) {
16837ec681f3Smrg            switch (instr->opc) {
16847ec681f3Smrg            case OPC_LDLV:
16857ec681f3Smrg            case OPC_LDL:
16867ec681f3Smrg            case OPC_LDLW:
16877ec681f3Smrg               break;
16887ec681f3Smrg            default:
16897ec681f3Smrg               return true;
16907ec681f3Smrg            }
16917ec681f3Smrg         }
16927ec681f3Smrg      }
16937ec681f3Smrg   }
16947ec681f3Smrg
16957ec681f3Smrg   return false;
16967ec681f3Smrg}
16977e102996Smaya
16987e102996Smaya/* ************************************************************************* */
16997e102996Smaya/* instruction helpers */
17007e102996Smaya
17017ec681f3Smrg/* creates SSA src of correct type (ie. half vs full precision) */
17027ec681f3Smrgstatic inline struct ir3_register *
17037ec681f3Smrg__ssa_src(struct ir3_instruction *instr, struct ir3_instruction *src,
17047ec681f3Smrg          unsigned flags)
17057ec681f3Smrg{
17067ec681f3Smrg   struct ir3_register *reg;
17077ec681f3Smrg   if (src->dsts[0]->flags & IR3_REG_HALF)
17087ec681f3Smrg      flags |= IR3_REG_HALF;
17097ec681f3Smrg   reg = ir3_src_create(instr, INVALID_REG, IR3_REG_SSA | flags);
17107ec681f3Smrg   reg->def = src->dsts[0];
17117ec681f3Smrg   reg->wrmask = src->dsts[0]->wrmask;
17127ec681f3Smrg   return reg;
17137ec681f3Smrg}
17147ec681f3Smrg
17157ec681f3Smrgstatic inline struct ir3_register *
17167ec681f3Smrg__ssa_dst(struct ir3_instruction *instr)
17177ec681f3Smrg{
17187ec681f3Smrg   struct ir3_register *reg = ir3_dst_create(instr, INVALID_REG, IR3_REG_SSA);
17197ec681f3Smrg   reg->instr = instr;
17207ec681f3Smrg   return reg;
17217ec681f3Smrg}
17227ec681f3Smrg
17237e102996Smayastatic inline struct ir3_instruction *
17247e102996Smayacreate_immed_typed(struct ir3_block *block, uint32_t val, type_t type)
17257e102996Smaya{
17267ec681f3Smrg   struct ir3_instruction *mov;
17277ec681f3Smrg   unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
17287e102996Smaya
17297ec681f3Smrg   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
17307ec681f3Smrg   mov->cat1.src_type = type;
17317ec681f3Smrg   mov->cat1.dst_type = type;
17327ec681f3Smrg   __ssa_dst(mov)->flags |= flags;
17337ec681f3Smrg   ir3_src_create(mov, 0, IR3_REG_IMMED | flags)->uim_val = val;
17347e102996Smaya
17357ec681f3Smrg   return mov;
17367e102996Smaya}
17377e102996Smaya
17387e102996Smayastatic inline struct ir3_instruction *
17397e102996Smayacreate_immed(struct ir3_block *block, uint32_t val)
17407e102996Smaya{
17417ec681f3Smrg   return create_immed_typed(block, val, TYPE_U32);
17427e102996Smaya}
17437e102996Smaya
17447e102996Smayastatic inline struct ir3_instruction *
17457ec681f3Smrgcreate_uniform_typed(struct ir3_block *block, unsigned n, type_t type)
17467e102996Smaya{
17477ec681f3Smrg   struct ir3_instruction *mov;
17487ec681f3Smrg   unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
17497e102996Smaya
17507ec681f3Smrg   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
17517ec681f3Smrg   mov->cat1.src_type = type;
17527ec681f3Smrg   mov->cat1.dst_type = type;
17537ec681f3Smrg   __ssa_dst(mov)->flags |= flags;
17547ec681f3Smrg   ir3_src_create(mov, n, IR3_REG_CONST | flags);
17557e102996Smaya
17567ec681f3Smrg   return mov;
17577e102996Smaya}
17587e102996Smaya
17597e102996Smayastatic inline struct ir3_instruction *
17607ec681f3Smrgcreate_uniform(struct ir3_block *block, unsigned n)
17617e102996Smaya{
17627ec681f3Smrg   return create_uniform_typed(block, n, TYPE_F32);
17637ec681f3Smrg}
17647e102996Smaya
17657ec681f3Smrgstatic inline struct ir3_instruction *
17667ec681f3Smrgcreate_uniform_indirect(struct ir3_block *block, int n, type_t type,
17677ec681f3Smrg                        struct ir3_instruction *address)
17687ec681f3Smrg{
17697ec681f3Smrg   struct ir3_instruction *mov;
17707e102996Smaya
17717ec681f3Smrg   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
17727ec681f3Smrg   mov->cat1.src_type = type;
17737ec681f3Smrg   mov->cat1.dst_type = type;
17747ec681f3Smrg   __ssa_dst(mov);
17757ec681f3Smrg   ir3_src_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
17767e102996Smaya
17777ec681f3Smrg   ir3_instr_set_address(mov, address);
17787ec681f3Smrg
17797ec681f3Smrg   return mov;
17807e102996Smaya}
17817e102996Smaya
17827ec681f3Smrgstatic inline struct ir3_instruction *
17837ec681f3Smrgir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
17847e102996Smaya{
17857ec681f3Smrg   struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
17867ec681f3Smrg   unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
17877ec681f3Smrg
17887ec681f3Smrg   __ssa_dst(instr)->flags |= flags;
17897ec681f3Smrg   if (src->dsts[0]->flags & IR3_REG_ARRAY) {
17907ec681f3Smrg      struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY);
17917ec681f3Smrg      src_reg->array = src->dsts[0]->array;
17927ec681f3Smrg   } else {
17937ec681f3Smrg      __ssa_src(instr, src, src->dsts[0]->flags & IR3_REG_SHARED);
17947ec681f3Smrg   }
17957ec681f3Smrg   debug_assert(!(src->dsts[0]->flags & IR3_REG_RELATIV));
17967ec681f3Smrg   instr->cat1.src_type = type;
17977ec681f3Smrg   instr->cat1.dst_type = type;
17987ec681f3Smrg   return instr;
17997e102996Smaya}
18007e102996Smaya
18017e102996Smayastatic inline struct ir3_instruction *
18027ec681f3Smrgir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type,
18037ec681f3Smrg        type_t dst_type)
18047e102996Smaya{
18057ec681f3Smrg   struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
18067ec681f3Smrg   unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0;
18077ec681f3Smrg   unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0;
18087ec681f3Smrg
18097ec681f3Smrg   debug_assert((src->dsts[0]->flags & IR3_REG_HALF) == src_flags);
18107ec681f3Smrg
18117ec681f3Smrg   __ssa_dst(instr)->flags |= dst_flags;
18127ec681f3Smrg   __ssa_src(instr, src, 0);
18137ec681f3Smrg   instr->cat1.src_type = src_type;
18147ec681f3Smrg   instr->cat1.dst_type = dst_type;
18157ec681f3Smrg   debug_assert(!(src->dsts[0]->flags & IR3_REG_ARRAY));
18167ec681f3Smrg   return instr;
18177ec681f3Smrg}
18187ec681f3Smrg
18197ec681f3Smrgstatic inline struct ir3_instruction *
18207ec681f3Smrgir3_MOVMSK(struct ir3_block *block, unsigned components)
18217ec681f3Smrg{
18227ec681f3Smrg   struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOVMSK, 1, 0);
18237ec681f3Smrg
18247ec681f3Smrg   struct ir3_register *dst = __ssa_dst(instr);
18257ec681f3Smrg   dst->flags |= IR3_REG_SHARED;
18267ec681f3Smrg   dst->wrmask = (1 << components) - 1;
18277ec681f3Smrg   instr->repeat = components - 1;
18287ec681f3Smrg   return instr;
18297e102996Smaya}
18307e102996Smaya
18317e102996Smayastatic inline struct ir3_instruction *
18327ec681f3Smrgir3_BALLOT_MACRO(struct ir3_block *block, struct ir3_instruction *src,
18337ec681f3Smrg                 unsigned components)
18347e102996Smaya{
18357ec681f3Smrg   struct ir3_instruction *instr =
18367ec681f3Smrg      ir3_instr_create(block, OPC_BALLOT_MACRO, 1, 1);
18377e102996Smaya
18387ec681f3Smrg   struct ir3_register *dst = __ssa_dst(instr);
18397ec681f3Smrg   dst->flags |= IR3_REG_SHARED;
18407ec681f3Smrg   dst->wrmask = (1 << components) - 1;
18417e102996Smaya
18427ec681f3Smrg   __ssa_src(instr, src, 0);
18437ec681f3Smrg
18447ec681f3Smrg   return instr;
18457e102996Smaya}
18467e102996Smaya
18477e102996Smayastatic inline struct ir3_instruction *
18487e102996Smayair3_NOP(struct ir3_block *block)
18497e102996Smaya{
18507ec681f3Smrg   return ir3_instr_create(block, OPC_NOP, 0, 0);
18517e102996Smaya}
18527e102996Smaya
18537e102996Smaya#define IR3_INSTR_0 0
18547e102996Smaya
18557ec681f3Smrg/* clang-format off */
18567ec681f3Smrg#define __INSTR0(flag, name, opc)                                              \
18577ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(struct ir3_block *block)      \
18587ec681f3Smrg{                                                                              \
18597ec681f3Smrg   struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 0);         \
18607ec681f3Smrg   instr->flags |= flag;                                                       \
18617ec681f3Smrg   return instr;                                                               \
18627ec681f3Smrg}
18637ec681f3Smrg/* clang-format on */
18647ec681f3Smrg#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name)
18657ec681f3Smrg#define INSTR0(name)     __INSTR0(0, name, OPC_##name)
18667ec681f3Smrg
18677ec681f3Smrg/* clang-format off */
18687ec681f3Smrg#define __INSTR1(flag, dst_count, name, opc)                                   \
18697ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(                              \
18707ec681f3Smrg   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags)        \
18717ec681f3Smrg{                                                                              \
18727ec681f3Smrg   struct ir3_instruction *instr =                                             \
18737ec681f3Smrg      ir3_instr_create(block, opc, dst_count, 1);                              \
18747ec681f3Smrg   for (unsigned i = 0; i < dst_count; i++)                                    \
18757ec681f3Smrg      __ssa_dst(instr);                                                        \
18767ec681f3Smrg   __ssa_src(instr, a, aflags);                                                \
18777ec681f3Smrg   instr->flags |= flag;                                                       \
18787ec681f3Smrg   return instr;                                                               \
18797ec681f3Smrg}
18807ec681f3Smrg/* clang-format on */
18817ec681f3Smrg#define INSTR1F(f, name)  __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
18827ec681f3Smrg#define INSTR1(name)      __INSTR1(0, 1, name, OPC_##name)
18837ec681f3Smrg#define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name)
18847ec681f3Smrg
18857ec681f3Smrg/* clang-format off */
18867ec681f3Smrg#define __INSTR2(flag, name, opc)                                              \
18877ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(                              \
18887ec681f3Smrg   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
18897ec681f3Smrg   struct ir3_instruction *b, unsigned bflags)                                 \
18907ec681f3Smrg{                                                                              \
18917ec681f3Smrg   struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2);         \
18927ec681f3Smrg   __ssa_dst(instr);                                                           \
18937ec681f3Smrg   __ssa_src(instr, a, aflags);                                                \
18947ec681f3Smrg   __ssa_src(instr, b, bflags);                                                \
18957ec681f3Smrg   instr->flags |= flag;                                                       \
18967ec681f3Smrg   return instr;                                                               \
18977ec681f3Smrg}
18987ec681f3Smrg/* clang-format on */
18997ec681f3Smrg#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name)
19007ec681f3Smrg#define INSTR2(name)     __INSTR2(0, name, OPC_##name)
19017ec681f3Smrg
19027ec681f3Smrg/* clang-format off */
19037ec681f3Smrg#define __INSTR3(flag, dst_count, name, opc)                                   \
19047ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(                              \
19057ec681f3Smrg   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
19067ec681f3Smrg   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
19077ec681f3Smrg   unsigned cflags)                                                            \
19087ec681f3Smrg{                                                                              \
19097ec681f3Smrg   struct ir3_instruction *instr =                                             \
19107ec681f3Smrg      ir3_instr_create(block, opc, dst_count, 3);                              \
19117ec681f3Smrg   for (unsigned i = 0; i < dst_count; i++)                                    \
19127ec681f3Smrg      __ssa_dst(instr);                                                        \
19137ec681f3Smrg   __ssa_src(instr, a, aflags);                                                \
19147ec681f3Smrg   __ssa_src(instr, b, bflags);                                                \
19157ec681f3Smrg   __ssa_src(instr, c, cflags);                                                \
19167ec681f3Smrg   instr->flags |= flag;                                                       \
19177ec681f3Smrg   return instr;                                                               \
19187ec681f3Smrg}
19197ec681f3Smrg/* clang-format on */
19207ec681f3Smrg#define INSTR3F(f, name)  __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
19217ec681f3Smrg#define INSTR3(name)      __INSTR3(0, 1, name, OPC_##name)
19227ec681f3Smrg#define INSTR3NODST(name) __INSTR3(0, 0, name, OPC_##name)
19237ec681f3Smrg
19247ec681f3Smrg/* clang-format off */
19257ec681f3Smrg#define __INSTR4(flag, dst_count, name, opc)                                   \
19267ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(                              \
19277ec681f3Smrg   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
19287ec681f3Smrg   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
19297ec681f3Smrg   unsigned cflags, struct ir3_instruction *d, unsigned dflags)                \
19307ec681f3Smrg{                                                                              \
19317ec681f3Smrg   struct ir3_instruction *instr =                                             \
19327ec681f3Smrg      ir3_instr_create(block, opc, dst_count, 4);                              \
19337ec681f3Smrg   for (unsigned i = 0; i < dst_count; i++)                                    \
19347ec681f3Smrg      __ssa_dst(instr);                                                        \
19357ec681f3Smrg   __ssa_src(instr, a, aflags);                                                \
19367ec681f3Smrg   __ssa_src(instr, b, bflags);                                                \
19377ec681f3Smrg   __ssa_src(instr, c, cflags);                                                \
19387ec681f3Smrg   __ssa_src(instr, d, dflags);                                                \
19397ec681f3Smrg   instr->flags |= flag;                                                       \
19407ec681f3Smrg   return instr;                                                               \
19417ec681f3Smrg}
19427ec681f3Smrg/* clang-format on */
19437ec681f3Smrg#define INSTR4F(f, name)  __INSTR4(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
19447ec681f3Smrg#define INSTR4(name)      __INSTR4(0, 1, name, OPC_##name)
19457ec681f3Smrg#define INSTR4NODST(name) __INSTR4(0, 0, name, OPC_##name)
19467ec681f3Smrg
19477ec681f3Smrg/* clang-format off */
19487ec681f3Smrg#define __INSTR5(flag, name, opc)                                              \
19497ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(                              \
19507ec681f3Smrg   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
19517ec681f3Smrg   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
19527ec681f3Smrg   unsigned cflags, struct ir3_instruction *d, unsigned dflags,                \
19537ec681f3Smrg   struct ir3_instruction *e, unsigned eflags)                                 \
19547ec681f3Smrg{                                                                              \
19557ec681f3Smrg   struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 5);         \
19567ec681f3Smrg   __ssa_dst(instr);                                                           \
19577ec681f3Smrg   __ssa_src(instr, a, aflags);                                                \
19587ec681f3Smrg   __ssa_src(instr, b, bflags);                                                \
19597ec681f3Smrg   __ssa_src(instr, c, cflags);                                                \
19607ec681f3Smrg   __ssa_src(instr, d, dflags);                                                \
19617ec681f3Smrg   __ssa_src(instr, e, eflags);                                                \
19627ec681f3Smrg   instr->flags |= flag;                                                       \
19637ec681f3Smrg   return instr;                                                               \
19647ec681f3Smrg}
19657ec681f3Smrg/* clang-format on */
19667ec681f3Smrg#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
19677ec681f3Smrg#define INSTR5(name)     __INSTR5(0, name, OPC_##name)
19687ec681f3Smrg
19697ec681f3Smrg/* clang-format off */
19707ec681f3Smrg#define __INSTR6(flag, dst_count, name, opc)                                   \
19717ec681f3Smrgstatic inline struct ir3_instruction *ir3_##name(                              \
19727ec681f3Smrg   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
19737ec681f3Smrg   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
19747ec681f3Smrg   unsigned cflags, struct ir3_instruction *d, unsigned dflags,                \
19757ec681f3Smrg   struct ir3_instruction *e, unsigned eflags, struct ir3_instruction *f,      \
19767ec681f3Smrg   unsigned fflags)                                                            \
19777ec681f3Smrg{                                                                              \
19787ec681f3Smrg   struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 6);         \
19797ec681f3Smrg   for (unsigned i = 0; i < dst_count; i++)                                    \
19807ec681f3Smrg      __ssa_dst(instr);                                                        \
19817ec681f3Smrg   __ssa_src(instr, a, aflags);                                                \
19827ec681f3Smrg   __ssa_src(instr, b, bflags);                                                \
19837ec681f3Smrg   __ssa_src(instr, c, cflags);                                                \
19847ec681f3Smrg   __ssa_src(instr, d, dflags);                                                \
19857ec681f3Smrg   __ssa_src(instr, e, eflags);                                                \
19867ec681f3Smrg   __ssa_src(instr, f, fflags);                                                \
19877ec681f3Smrg   instr->flags |= flag;                                                       \
19887ec681f3Smrg   return instr;                                                               \
19897ec681f3Smrg}
19907ec681f3Smrg/* clang-format on */
19917ec681f3Smrg#define INSTR6F(f, name)  __INSTR6(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
19927ec681f3Smrg#define INSTR6(name)      __INSTR6(0, 1, name, OPC_##name)
19937ec681f3Smrg#define INSTR6NODST(name) __INSTR6(0, 0, name, OPC_##name)
19947e102996Smaya
19957e102996Smaya/* cat0 instructions: */
19967ec681f3SmrgINSTR1NODST(B)
19977e102996SmayaINSTR0(JUMP)
19987ec681f3SmrgINSTR1NODST(KILL)
19997ec681f3SmrgINSTR1NODST(DEMOTE)
20007e102996SmayaINSTR0(END)
20017ec681f3SmrgINSTR0(CHSH)
20027ec681f3SmrgINSTR0(CHMASK)
20037ec681f3SmrgINSTR1NODST(PREDT)
20047ec681f3SmrgINSTR0(PREDF)
20057ec681f3SmrgINSTR0(PREDE)
20067ec681f3SmrgINSTR0(GETONE)
20077ec681f3Smrg
20087ec681f3Smrg/* cat1 macros */
20097ec681f3SmrgINSTR1(ANY_MACRO)
20107ec681f3SmrgINSTR1(ALL_MACRO)
20117ec681f3SmrgINSTR1(READ_FIRST_MACRO)
20127ec681f3SmrgINSTR2(READ_COND_MACRO)
20137ec681f3Smrg
20147ec681f3Smrgstatic inline struct ir3_instruction *
20157ec681f3Smrgir3_ELECT_MACRO(struct ir3_block *block)
20167ec681f3Smrg{
20177ec681f3Smrg   struct ir3_instruction *instr =
20187ec681f3Smrg      ir3_instr_create(block, OPC_ELECT_MACRO, 1, 0);
20197ec681f3Smrg   __ssa_dst(instr);
20207ec681f3Smrg   return instr;
20217ec681f3Smrg}
20227e102996Smaya
20237e102996Smaya/* cat2 instructions, most 2 src but some 1 src: */
20247e102996SmayaINSTR2(ADD_F)
20257e102996SmayaINSTR2(MIN_F)
20267e102996SmayaINSTR2(MAX_F)
20277e102996SmayaINSTR2(MUL_F)
20287e102996SmayaINSTR1(SIGN_F)
20297e102996SmayaINSTR2(CMPS_F)
20307e102996SmayaINSTR1(ABSNEG_F)
20317e102996SmayaINSTR2(CMPV_F)
20327e102996SmayaINSTR1(FLOOR_F)
20337e102996SmayaINSTR1(CEIL_F)
20347e102996SmayaINSTR1(RNDNE_F)
20357e102996SmayaINSTR1(RNDAZ_F)
20367e102996SmayaINSTR1(TRUNC_F)
20377e102996SmayaINSTR2(ADD_U)
20387e102996SmayaINSTR2(ADD_S)
20397e102996SmayaINSTR2(SUB_U)
20407e102996SmayaINSTR2(SUB_S)
20417e102996SmayaINSTR2(CMPS_U)
20427e102996SmayaINSTR2(CMPS_S)
20437e102996SmayaINSTR2(MIN_U)
20447e102996SmayaINSTR2(MIN_S)
20457e102996SmayaINSTR2(MAX_U)
20467e102996SmayaINSTR2(MAX_S)
20477e102996SmayaINSTR1(ABSNEG_S)
20487e102996SmayaINSTR2(AND_B)
20497e102996SmayaINSTR2(OR_B)
20507e102996SmayaINSTR1(NOT_B)
20517e102996SmayaINSTR2(XOR_B)
20527e102996SmayaINSTR2(CMPV_U)
20537e102996SmayaINSTR2(CMPV_S)
20547ec681f3SmrgINSTR2(MUL_U24)
20557ec681f3SmrgINSTR2(MUL_S24)
20567e102996SmayaINSTR2(MULL_U)
20577e102996SmayaINSTR1(BFREV_B)
20587e102996SmayaINSTR1(CLZ_S)
20597e102996SmayaINSTR1(CLZ_B)
20607e102996SmayaINSTR2(SHL_B)
20617e102996SmayaINSTR2(SHR_B)
20627e102996SmayaINSTR2(ASHR_B)
20637e102996SmayaINSTR2(BARY_F)
20647e102996SmayaINSTR2(MGEN_B)
20657e102996SmayaINSTR2(GETBIT_B)
20667e102996SmayaINSTR1(SETRM)
20677e102996SmayaINSTR1(CBITS_B)
20687e102996SmayaINSTR2(SHB)
20697e102996SmayaINSTR2(MSAD)
20707e102996Smaya
20717e102996Smaya/* cat3 instructions: */
20727e102996SmayaINSTR3(MAD_U16)
20737e102996SmayaINSTR3(MADSH_U16)
20747e102996SmayaINSTR3(MAD_S16)
20757e102996SmayaINSTR3(MADSH_M16)
20767e102996SmayaINSTR3(MAD_U24)
20777e102996SmayaINSTR3(MAD_S24)
20787e102996SmayaINSTR3(MAD_F16)
20797e102996SmayaINSTR3(MAD_F32)
20807ec681f3Smrg/* NOTE: SEL_B32 checks for zero vs nonzero */
20817e102996SmayaINSTR3(SEL_B16)
20827e102996SmayaINSTR3(SEL_B32)
20837e102996SmayaINSTR3(SEL_S16)
20847e102996SmayaINSTR3(SEL_S32)
20857e102996SmayaINSTR3(SEL_F16)
20867e102996SmayaINSTR3(SEL_F32)
20877e102996SmayaINSTR3(SAD_S16)
20887e102996SmayaINSTR3(SAD_S32)
20897e102996Smaya
20907e102996Smaya/* cat4 instructions: */
20917e102996SmayaINSTR1(RCP)
20927e102996SmayaINSTR1(RSQ)
20937ec681f3SmrgINSTR1(HRSQ)
20947e102996SmayaINSTR1(LOG2)
20957ec681f3SmrgINSTR1(HLOG2)
20967e102996SmayaINSTR1(EXP2)
20977ec681f3SmrgINSTR1(HEXP2)
20987e102996SmayaINSTR1(SIN)
20997e102996SmayaINSTR1(COS)
21007e102996SmayaINSTR1(SQRT)
21017e102996Smaya
21027e102996Smaya/* cat5 instructions: */
21037e102996SmayaINSTR1(DSX)
21047ec681f3SmrgINSTR1(DSXPP_MACRO)
21057e102996SmayaINSTR1(DSY)
21067ec681f3SmrgINSTR1(DSYPP_MACRO)
21077e102996SmayaINSTR1F(3D, DSX)
21087e102996SmayaINSTR1F(3D, DSY)
21097e102996SmayaINSTR1(RGETPOS)
21107e102996Smaya
21117e102996Smayastatic inline struct ir3_instruction *
21127ec681f3Smrgir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask,
21137ec681f3Smrg        unsigned flags, struct ir3_instruction *samp_tex,
21147ec681f3Smrg        struct ir3_instruction *src0, struct ir3_instruction *src1)
21157ec681f3Smrg{
21167ec681f3Smrg   struct ir3_instruction *sam;
21177ec681f3Smrg   unsigned nreg = 0;
21187ec681f3Smrg
21197ec681f3Smrg   if (flags & IR3_INSTR_S2EN) {
21207ec681f3Smrg      nreg++;
21217ec681f3Smrg   }
21227ec681f3Smrg   if (src0) {
21237ec681f3Smrg      nreg++;
21247ec681f3Smrg   }
21257ec681f3Smrg   if (src1) {
21267ec681f3Smrg      nreg++;
21277ec681f3Smrg   }
21287ec681f3Smrg
21297ec681f3Smrg   sam = ir3_instr_create(block, opc, 1, nreg);
21307ec681f3Smrg   sam->flags |= flags;
21317ec681f3Smrg   __ssa_dst(sam)->wrmask = wrmask;
21327ec681f3Smrg   if (flags & IR3_INSTR_S2EN) {
21337ec681f3Smrg      __ssa_src(sam, samp_tex, (flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF);
21347ec681f3Smrg   }
21357ec681f3Smrg   if (src0) {
21367ec681f3Smrg      __ssa_src(sam, src0, 0);
21377ec681f3Smrg   }
21387ec681f3Smrg   if (src1) {
21397ec681f3Smrg      __ssa_src(sam, src1, 0);
21407ec681f3Smrg   }
21417ec681f3Smrg   sam->cat5.type = type;
21427ec681f3Smrg
21437ec681f3Smrg   return sam;
21447e102996Smaya}
21457e102996Smaya
21467e102996Smaya/* cat6 instructions: */
21477e102996SmayaINSTR2(LDLV)
21487ec681f3SmrgINSTR3(LDG)
21497ec681f3SmrgINSTR3(LDL)
21507ec681f3SmrgINSTR3(LDLW)
21517ec681f3SmrgINSTR3(LDP)
21527ec681f3SmrgINSTR4NODST(STG)
21537ec681f3SmrgINSTR3NODST(STL)
21547ec681f3SmrgINSTR3NODST(STLW)
21557ec681f3SmrgINSTR3NODST(STP)
21567e102996SmayaINSTR1(RESINFO)
21577e102996SmayaINSTR1(RESFMT)
21587e102996SmayaINSTR2(ATOMIC_ADD)
21597e102996SmayaINSTR2(ATOMIC_SUB)
21607e102996SmayaINSTR2(ATOMIC_XCHG)
21617e102996SmayaINSTR2(ATOMIC_INC)
21627e102996SmayaINSTR2(ATOMIC_DEC)
21637e102996SmayaINSTR2(ATOMIC_CMPXCHG)
21647e102996SmayaINSTR2(ATOMIC_MIN)
21657e102996SmayaINSTR2(ATOMIC_MAX)
21667e102996SmayaINSTR2(ATOMIC_AND)
21677e102996SmayaINSTR2(ATOMIC_OR)
21687e102996SmayaINSTR2(ATOMIC_XOR)
21697ec681f3SmrgINSTR2(LDC)
21707e102996Smaya#if GPU >= 600
21717ec681f3SmrgINSTR3NODST(STIB);
21727e102996SmayaINSTR2(LDIB);
21737ec681f3SmrgINSTR5(LDG_A);
21747ec681f3SmrgINSTR6NODST(STG_A);
21757e102996SmayaINSTR3F(G, ATOMIC_ADD)
21767e102996SmayaINSTR3F(G, ATOMIC_SUB)
21777e102996SmayaINSTR3F(G, ATOMIC_XCHG)
21787e102996SmayaINSTR3F(G, ATOMIC_INC)
21797e102996SmayaINSTR3F(G, ATOMIC_DEC)
21807e102996SmayaINSTR3F(G, ATOMIC_CMPXCHG)
21817e102996SmayaINSTR3F(G, ATOMIC_MIN)
21827e102996SmayaINSTR3F(G, ATOMIC_MAX)
21837e102996SmayaINSTR3F(G, ATOMIC_AND)
21847e102996SmayaINSTR3F(G, ATOMIC_OR)
21857e102996SmayaINSTR3F(G, ATOMIC_XOR)
21867e102996Smaya#elif GPU >= 400
21877e102996SmayaINSTR3(LDGB)
21887ec681f3Smrg#if GPU >= 500
21897ec681f3SmrgINSTR3(LDIB)
21907ec681f3Smrg#endif
21917ec681f3SmrgINSTR4NODST(STGB)
21927ec681f3SmrgINSTR4NODST(STIB)
21937e102996SmayaINSTR4F(G, ATOMIC_ADD)
21947e102996SmayaINSTR4F(G, ATOMIC_SUB)
21957e102996SmayaINSTR4F(G, ATOMIC_XCHG)
21967e102996SmayaINSTR4F(G, ATOMIC_INC)
21977e102996SmayaINSTR4F(G, ATOMIC_DEC)
21987e102996SmayaINSTR4F(G, ATOMIC_CMPXCHG)
21997e102996SmayaINSTR4F(G, ATOMIC_MIN)
22007e102996SmayaINSTR4F(G, ATOMIC_MAX)
22017e102996SmayaINSTR4F(G, ATOMIC_AND)
22027e102996SmayaINSTR4F(G, ATOMIC_OR)
22037e102996SmayaINSTR4F(G, ATOMIC_XOR)
22047e102996Smaya#endif
22057e102996Smaya
22067e102996Smaya/* cat7 instructions: */
22077e102996SmayaINSTR0(BAR)
22087e102996SmayaINSTR0(FENCE)
22097e102996Smaya
22107e102996Smaya/* ************************************************************************* */
22117ec681f3Smrg#include "bitset.h"
22127e102996Smaya
22137e102996Smaya#define MAX_REG 256
22147e102996Smaya
22157ec681f3Smrgtypedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG);
22167ec681f3Smrg
22177ec681f3Smrgtypedef struct {
22187ec681f3Smrg   bool mergedregs;
22197ec681f3Smrg   regmaskstate_t mask;
22207ec681f3Smrg} regmask_t;
22217e102996Smaya
22227ec681f3Smrgstatic inline bool
22237ec681f3Smrg__regmask_get(regmask_t *regmask, bool half, unsigned n)
22247e102996Smaya{
22257ec681f3Smrg   if (regmask->mergedregs) {
22267ec681f3Smrg      /* a6xx+ case, with merged register file, we track things in terms
22277ec681f3Smrg       * of half-precision registers, with a full precisions register
22287ec681f3Smrg       * using two half-precision slots.
22297ec681f3Smrg       *
22307ec681f3Smrg       * Pretend that special regs (a0.x, a1.x, etc.) are full registers to
22317ec681f3Smrg       * avoid having them alias normal full regs.
22327ec681f3Smrg       */
22337ec681f3Smrg      if (half && !is_reg_num_special(n)) {
22347ec681f3Smrg         return BITSET_TEST(regmask->mask, n);
22357ec681f3Smrg      } else {
22367ec681f3Smrg         n *= 2;
22377ec681f3Smrg         return BITSET_TEST(regmask->mask, n) ||
22387ec681f3Smrg                BITSET_TEST(regmask->mask, n + 1);
22397ec681f3Smrg      }
22407ec681f3Smrg   } else {
22417ec681f3Smrg      /* pre a6xx case, with separate register file for half and full
22427ec681f3Smrg       * precision:
22437ec681f3Smrg       */
22447ec681f3Smrg      if (half)
22457ec681f3Smrg         n += MAX_REG;
22467ec681f3Smrg      return BITSET_TEST(regmask->mask, n);
22477ec681f3Smrg   }
22487e102996Smaya}
22497e102996Smaya
22507ec681f3Smrgstatic inline void
22517ec681f3Smrg__regmask_set(regmask_t *regmask, bool half, unsigned n)
22527e102996Smaya{
22537ec681f3Smrg   if (regmask->mergedregs) {
22547ec681f3Smrg      /* a6xx+ case, with merged register file, we track things in terms
22557ec681f3Smrg       * of half-precision registers, with a full precisions register
22567ec681f3Smrg       * using two half-precision slots:
22577ec681f3Smrg       */
22587ec681f3Smrg      if (half && !is_reg_num_special(n)) {
22597ec681f3Smrg         BITSET_SET(regmask->mask, n);
22607ec681f3Smrg      } else {
22617ec681f3Smrg         n *= 2;
22627ec681f3Smrg         BITSET_SET(regmask->mask, n);
22637ec681f3Smrg         BITSET_SET(regmask->mask, n + 1);
22647ec681f3Smrg      }
22657ec681f3Smrg   } else {
22667ec681f3Smrg      /* pre a6xx case, with separate register file for half and full
22677ec681f3Smrg       * precision:
22687ec681f3Smrg       */
22697ec681f3Smrg      if (half)
22707ec681f3Smrg         n += MAX_REG;
22717ec681f3Smrg      BITSET_SET(regmask->mask, n);
22727ec681f3Smrg   }
22737e102996Smaya}
22747e102996Smaya
22757ec681f3Smrgstatic inline void
22767ec681f3Smrg__regmask_clear(regmask_t *regmask, bool half, unsigned n)
22777e102996Smaya{
22787ec681f3Smrg   if (regmask->mergedregs) {
22797ec681f3Smrg      /* a6xx+ case, with merged register file, we track things in terms
22807ec681f3Smrg       * of half-precision registers, with a full precisions register
22817ec681f3Smrg       * using two half-precision slots:
22827ec681f3Smrg       */
22837ec681f3Smrg      if (half && !is_reg_num_special(n)) {
22847ec681f3Smrg         BITSET_CLEAR(regmask->mask, n);
22857ec681f3Smrg      } else {
22867ec681f3Smrg         n *= 2;
22877ec681f3Smrg         BITSET_CLEAR(regmask->mask, n);
22887ec681f3Smrg         BITSET_CLEAR(regmask->mask, n + 1);
22897ec681f3Smrg      }
22907ec681f3Smrg   } else {
22917ec681f3Smrg      /* pre a6xx case, with separate register file for half and full
22927ec681f3Smrg       * precision:
22937ec681f3Smrg       */
22947ec681f3Smrg      if (half)
22957ec681f3Smrg         n += MAX_REG;
22967ec681f3Smrg      BITSET_CLEAR(regmask->mask, n);
22977ec681f3Smrg   }
22987e102996Smaya}
22997e102996Smaya
23007ec681f3Smrgstatic inline void
23017ec681f3Smrgregmask_init(regmask_t *regmask, bool mergedregs)
23027e102996Smaya{
23037ec681f3Smrg   memset(&regmask->mask, 0, sizeof(regmask->mask));
23047ec681f3Smrg   regmask->mergedregs = mergedregs;
23057e102996Smaya}
23067e102996Smaya
23077ec681f3Smrgstatic inline void
23087ec681f3Smrgregmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
23097ec681f3Smrg{
23107ec681f3Smrg   assert(dst->mergedregs == a->mergedregs);
23117ec681f3Smrg   assert(dst->mergedregs == b->mergedregs);
23127ec681f3Smrg
23137ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
23147ec681f3Smrg      dst->mask[i] = a->mask[i] | b->mask[i];
23157e102996Smaya}
23167e102996Smaya
23177ec681f3Smrg
23187ec681f3Smrgstatic inline void
23197ec681f3Smrgregmask_set(regmask_t *regmask, struct ir3_register *reg)
23207ec681f3Smrg{
23217ec681f3Smrg   bool half = reg->flags & IR3_REG_HALF;
23227ec681f3Smrg   if (reg->flags & IR3_REG_RELATIV) {
23237ec681f3Smrg      for (unsigned i = 0; i < reg->size; i++)
23247ec681f3Smrg         __regmask_set(regmask, half, reg->array.base + i);
23257ec681f3Smrg   } else {
23267ec681f3Smrg      for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
23277ec681f3Smrg         if (mask & 1)
23287ec681f3Smrg            __regmask_set(regmask, half, n);
23297ec681f3Smrg   }
23307ec681f3Smrg}
23317ec681f3Smrg
23327ec681f3Smrgstatic inline bool
23337ec681f3Smrgregmask_get(regmask_t *regmask, struct ir3_register *reg)
23347ec681f3Smrg{
23357ec681f3Smrg   bool half = reg->flags & IR3_REG_HALF;
23367ec681f3Smrg   if (reg->flags & IR3_REG_RELATIV) {
23377ec681f3Smrg      for (unsigned i = 0; i < reg->size; i++)
23387ec681f3Smrg         if (__regmask_get(regmask, half, reg->array.base + i))
23397ec681f3Smrg            return true;
23407ec681f3Smrg   } else {
23417ec681f3Smrg      for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
23427ec681f3Smrg         if (mask & 1)
23437ec681f3Smrg            if (__regmask_get(regmask, half, n))
23447ec681f3Smrg               return true;
23457ec681f3Smrg   }
23467ec681f3Smrg   return false;
23477ec681f3Smrg}
23487e102996Smaya/* ************************************************************************* */
23497e102996Smaya
23507e102996Smaya#endif /* IR3_H_ */
2351