17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2018 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#ifndef ACO_IR_H
267ec681f3Smrg#define ACO_IR_H
277ec681f3Smrg
287ec681f3Smrg#include "aco_opcodes.h"
297ec681f3Smrg#include "aco_util.h"
307ec681f3Smrg
317ec681f3Smrg#include "vulkan/radv_shader.h"
327ec681f3Smrg
337ec681f3Smrg#include "nir.h"
347ec681f3Smrg
357ec681f3Smrg#include <bitset>
367ec681f3Smrg#include <memory>
377ec681f3Smrg#include <vector>
387ec681f3Smrg
397ec681f3Smrgstruct radv_shader_args;
407ec681f3Smrgstruct radv_shader_info;
417ec681f3Smrgstruct radv_vs_prolog_key;
427ec681f3Smrg
437ec681f3Smrgnamespace aco {
447ec681f3Smrg
457ec681f3Smrgextern uint64_t debug_flags;
467ec681f3Smrg
477ec681f3Smrgenum {
487ec681f3Smrg   DEBUG_VALIDATE_IR = 0x1,
497ec681f3Smrg   DEBUG_VALIDATE_RA = 0x2,
507ec681f3Smrg   DEBUG_PERFWARN = 0x4,
517ec681f3Smrg   DEBUG_FORCE_WAITCNT = 0x8,
527ec681f3Smrg   DEBUG_NO_VN = 0x10,
537ec681f3Smrg   DEBUG_NO_OPT = 0x20,
547ec681f3Smrg   DEBUG_NO_SCHED = 0x40,
557ec681f3Smrg   DEBUG_PERF_INFO = 0x80,
567ec681f3Smrg   DEBUG_LIVE_INFO = 0x100,
577ec681f3Smrg};
587ec681f3Smrg
597ec681f3Smrg/**
607ec681f3Smrg * Representation of the instruction's microcode encoding format
617ec681f3Smrg * Note: Some Vector ALU Formats can be combined, such that:
627ec681f3Smrg * - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding
637ec681f3Smrg * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
647ec681f3Smrg * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
657ec681f3Smrg *
667ec681f3Smrg * (*) The same is applicable for VOP1 and VOPC instructions.
677ec681f3Smrg */
687ec681f3Smrgenum class Format : std::uint16_t {
697ec681f3Smrg   /* Pseudo Instruction Format */
707ec681f3Smrg   PSEUDO = 0,
717ec681f3Smrg   /* Scalar ALU & Control Formats */
727ec681f3Smrg   SOP1 = 1,
737ec681f3Smrg   SOP2 = 2,
747ec681f3Smrg   SOPK = 3,
757ec681f3Smrg   SOPP = 4,
767ec681f3Smrg   SOPC = 5,
777ec681f3Smrg   /* Scalar Memory Format */
787ec681f3Smrg   SMEM = 6,
797ec681f3Smrg   /* LDS/GDS Format */
807ec681f3Smrg   DS = 8,
817ec681f3Smrg   /* Vector Memory Buffer Formats */
827ec681f3Smrg   MTBUF = 9,
837ec681f3Smrg   MUBUF = 10,
847ec681f3Smrg   /* Vector Memory Image Format */
857ec681f3Smrg   MIMG = 11,
867ec681f3Smrg   /* Export Format */
877ec681f3Smrg   EXP = 12,
887ec681f3Smrg   /* Flat Formats */
897ec681f3Smrg   FLAT = 13,
907ec681f3Smrg   GLOBAL = 14,
917ec681f3Smrg   SCRATCH = 15,
927ec681f3Smrg
937ec681f3Smrg   PSEUDO_BRANCH = 16,
947ec681f3Smrg   PSEUDO_BARRIER = 17,
957ec681f3Smrg   PSEUDO_REDUCTION = 18,
967ec681f3Smrg
977ec681f3Smrg   /* Vector ALU Formats */
987ec681f3Smrg   VOP3P = 19,
997ec681f3Smrg   VOP1 = 1 << 8,
1007ec681f3Smrg   VOP2 = 1 << 9,
1017ec681f3Smrg   VOPC = 1 << 10,
1027ec681f3Smrg   VOP3 = 1 << 11,
1037ec681f3Smrg   /* Vector Parameter Interpolation Format */
1047ec681f3Smrg   VINTRP = 1 << 12,
1057ec681f3Smrg   DPP = 1 << 13,
1067ec681f3Smrg   SDWA = 1 << 14,
1077ec681f3Smrg};
1087ec681f3Smrg
1097ec681f3Smrgenum class instr_class : uint8_t {
1107ec681f3Smrg   valu32 = 0,
1117ec681f3Smrg   valu_convert32 = 1,
1127ec681f3Smrg   valu64 = 2,
1137ec681f3Smrg   valu_quarter_rate32 = 3,
1147ec681f3Smrg   valu_fma = 4,
1157ec681f3Smrg   valu_transcendental32 = 5,
1167ec681f3Smrg   valu_double = 6,
1177ec681f3Smrg   valu_double_add = 7,
1187ec681f3Smrg   valu_double_convert = 8,
1197ec681f3Smrg   valu_double_transcendental = 9,
1207ec681f3Smrg   salu = 10,
1217ec681f3Smrg   smem = 11,
1227ec681f3Smrg   barrier = 12,
1237ec681f3Smrg   branch = 13,
1247ec681f3Smrg   sendmsg = 14,
1257ec681f3Smrg   ds = 15,
1267ec681f3Smrg   exp = 16,
1277ec681f3Smrg   vmem = 17,
1287ec681f3Smrg   waitcnt = 18,
1297ec681f3Smrg   other = 19,
1307ec681f3Smrg   count,
1317ec681f3Smrg};
1327ec681f3Smrg
1337ec681f3Smrgenum storage_class : uint8_t {
1347ec681f3Smrg   storage_none = 0x0,   /* no synchronization and can be reordered around aliasing stores */
1357ec681f3Smrg   storage_buffer = 0x1, /* SSBOs and global memory */
1367ec681f3Smrg   storage_atomic_counter = 0x2, /* not used for Vulkan */
1377ec681f3Smrg   storage_image = 0x4,
1387ec681f3Smrg   storage_shared = 0x8,       /* or TCS output */
1397ec681f3Smrg   storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
1407ec681f3Smrg   storage_scratch = 0x20,
1417ec681f3Smrg   storage_vgpr_spill = 0x40,
1427ec681f3Smrg   storage_count = 8,
1437ec681f3Smrg};
1447ec681f3Smrg
1457ec681f3Smrgenum memory_semantics : uint8_t {
1467ec681f3Smrg   semantic_none = 0x0,
1477ec681f3Smrg   /* for loads: don't move any access after this load to before this load (even other loads)
1487ec681f3Smrg    * for barriers: don't move any access after the barrier to before any
1497ec681f3Smrg    * atomics/control_barriers/sendmsg_gs_done before the barrier */
1507ec681f3Smrg   semantic_acquire = 0x1,
1517ec681f3Smrg   /* for stores: don't move any access before this store to after this store
1527ec681f3Smrg    * for barriers: don't move any access before the barrier to after any
1537ec681f3Smrg    * atomics/control_barriers/sendmsg_gs_done after the barrier */
1547ec681f3Smrg   semantic_release = 0x2,
1557ec681f3Smrg
1567ec681f3Smrg   /* the rest are for load/stores/atomics only */
1577ec681f3Smrg   /* cannot be DCE'd or CSE'd */
1587ec681f3Smrg   semantic_volatile = 0x4,
1597ec681f3Smrg   /* does not interact with barriers and assumes this lane is the only lane
1607ec681f3Smrg    * accessing this memory */
1617ec681f3Smrg   semantic_private = 0x8,
1627ec681f3Smrg   /* this operation can be reordered around operations of the same storage.
1637ec681f3Smrg    * says nothing about barriers */
1647ec681f3Smrg   semantic_can_reorder = 0x10,
1657ec681f3Smrg   /* this is a atomic instruction (may only read or write memory) */
1667ec681f3Smrg   semantic_atomic = 0x20,
1677ec681f3Smrg   /* this is instruction both reads and writes memory */
1687ec681f3Smrg   semantic_rmw = 0x40,
1697ec681f3Smrg
1707ec681f3Smrg   semantic_acqrel = semantic_acquire | semantic_release,
1717ec681f3Smrg   semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
1727ec681f3Smrg};
1737ec681f3Smrg
1747ec681f3Smrgenum sync_scope : uint8_t {
1757ec681f3Smrg   scope_invocation = 0,
1767ec681f3Smrg   scope_subgroup = 1,
1777ec681f3Smrg   scope_workgroup = 2,
1787ec681f3Smrg   scope_queuefamily = 3,
1797ec681f3Smrg   scope_device = 4,
1807ec681f3Smrg};
1817ec681f3Smrg
1827ec681f3Smrgstruct memory_sync_info {
1837ec681f3Smrg   memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
1847ec681f3Smrg   memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation)
1857ec681f3Smrg       : storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_)
1867ec681f3Smrg   {}
1877ec681f3Smrg
1887ec681f3Smrg   storage_class storage : 8;
1897ec681f3Smrg   memory_semantics semantics : 8;
1907ec681f3Smrg   sync_scope scope : 8;
1917ec681f3Smrg
1927ec681f3Smrg   bool operator==(const memory_sync_info& rhs) const
1937ec681f3Smrg   {
1947ec681f3Smrg      return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope;
1957ec681f3Smrg   }
1967ec681f3Smrg
1977ec681f3Smrg   bool can_reorder() const
1987ec681f3Smrg   {
1997ec681f3Smrg      if (semantics & semantic_acqrel)
2007ec681f3Smrg         return false;
2017ec681f3Smrg      /* Also check storage so that zero-initialized memory_sync_info can be
2027ec681f3Smrg       * reordered. */
2037ec681f3Smrg      return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
2047ec681f3Smrg   }
2057ec681f3Smrg};
2067ec681f3Smrgstatic_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
2077ec681f3Smrg
2087ec681f3Smrgenum fp_round {
2097ec681f3Smrg   fp_round_ne = 0,
2107ec681f3Smrg   fp_round_pi = 1,
2117ec681f3Smrg   fp_round_ni = 2,
2127ec681f3Smrg   fp_round_tz = 3,
2137ec681f3Smrg};
2147ec681f3Smrg
2157ec681f3Smrgenum fp_denorm {
2167ec681f3Smrg   /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
2177ec681f3Smrg    * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
2187ec681f3Smrg   fp_denorm_flush = 0x0,
2197ec681f3Smrg   fp_denorm_keep_in = 0x1,
2207ec681f3Smrg   fp_denorm_keep_out = 0x2,
2217ec681f3Smrg   fp_denorm_keep = 0x3,
2227ec681f3Smrg};
2237ec681f3Smrg
2247ec681f3Smrgstruct float_mode {
2257ec681f3Smrg   /* matches encoding of the MODE register */
2267ec681f3Smrg   union {
2277ec681f3Smrg      struct {
2287ec681f3Smrg         fp_round round32 : 2;
2297ec681f3Smrg         fp_round round16_64 : 2;
2307ec681f3Smrg         unsigned denorm32 : 2;
2317ec681f3Smrg         unsigned denorm16_64 : 2;
2327ec681f3Smrg      };
2337ec681f3Smrg      struct {
2347ec681f3Smrg         uint8_t round : 4;
2357ec681f3Smrg         uint8_t denorm : 4;
2367ec681f3Smrg      };
2377ec681f3Smrg      uint8_t val = 0;
2387ec681f3Smrg   };
2397ec681f3Smrg   /* if false, optimizations which may remove infs/nan/-0.0 can be done */
2407ec681f3Smrg   bool preserve_signed_zero_inf_nan32 : 1;
2417ec681f3Smrg   bool preserve_signed_zero_inf_nan16_64 : 1;
2427ec681f3Smrg   /* if false, optimizations which may remove denormal flushing can be done */
2437ec681f3Smrg   bool must_flush_denorms32 : 1;
2447ec681f3Smrg   bool must_flush_denorms16_64 : 1;
2457ec681f3Smrg   bool care_about_round32 : 1;
2467ec681f3Smrg   bool care_about_round16_64 : 1;
2477ec681f3Smrg
2487ec681f3Smrg   /* Returns true if instructions using the mode "other" can safely use the
2497ec681f3Smrg    * current one instead. */
2507ec681f3Smrg   bool canReplace(float_mode other) const noexcept
2517ec681f3Smrg   {
2527ec681f3Smrg      return val == other.val &&
2537ec681f3Smrg             (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
2547ec681f3Smrg             (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
2557ec681f3Smrg             (must_flush_denorms32 || !other.must_flush_denorms32) &&
2567ec681f3Smrg             (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
2577ec681f3Smrg             (care_about_round32 || !other.care_about_round32) &&
2587ec681f3Smrg             (care_about_round16_64 || !other.care_about_round16_64);
2597ec681f3Smrg   }
2607ec681f3Smrg};
2617ec681f3Smrg
2627ec681f3Smrgstruct wait_imm {
2637ec681f3Smrg   static const uint8_t unset_counter = 0xff;
2647ec681f3Smrg
2657ec681f3Smrg   uint8_t vm;
2667ec681f3Smrg   uint8_t exp;
2677ec681f3Smrg   uint8_t lgkm;
2687ec681f3Smrg   uint8_t vs;
2697ec681f3Smrg
2707ec681f3Smrg   wait_imm();
2717ec681f3Smrg   wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
2727ec681f3Smrg   wait_imm(enum chip_class chip, uint16_t packed);
2737ec681f3Smrg
2747ec681f3Smrg   uint16_t pack(enum chip_class chip) const;
2757ec681f3Smrg
2767ec681f3Smrg   bool combine(const wait_imm& other);
2777ec681f3Smrg
2787ec681f3Smrg   bool empty() const;
2797ec681f3Smrg};
2807ec681f3Smrg
2817ec681f3Smrgconstexpr Format
2827ec681f3SmrgasVOP3(Format format)
2837ec681f3Smrg{
2847ec681f3Smrg   return (Format)((uint32_t)Format::VOP3 | (uint32_t)format);
2857ec681f3Smrg};
2867ec681f3Smrg
2877ec681f3Smrgconstexpr Format
2887ec681f3SmrgasSDWA(Format format)
2897ec681f3Smrg{
2907ec681f3Smrg   assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
2917ec681f3Smrg   return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
2927ec681f3Smrg}
2937ec681f3Smrg
2947ec681f3Smrgconstexpr Format
2957ec681f3SmrgwithoutDPP(Format format)
2967ec681f3Smrg{
2977ec681f3Smrg   return (Format)((uint32_t)format & ~(uint32_t)Format::DPP);
2987ec681f3Smrg}
2997ec681f3Smrg
3007ec681f3Smrgenum class RegType {
3017ec681f3Smrg   none = 0,
3027ec681f3Smrg   sgpr,
3037ec681f3Smrg   vgpr,
3047ec681f3Smrg   linear_vgpr,
3057ec681f3Smrg};
3067ec681f3Smrg
3077ec681f3Smrgstruct RegClass {
3087ec681f3Smrg
3097ec681f3Smrg   enum RC : uint8_t {
3107ec681f3Smrg      s1 = 1,
3117ec681f3Smrg      s2 = 2,
3127ec681f3Smrg      s3 = 3,
3137ec681f3Smrg      s4 = 4,
3147ec681f3Smrg      s6 = 6,
3157ec681f3Smrg      s8 = 8,
3167ec681f3Smrg      s16 = 16,
3177ec681f3Smrg      v1 = s1 | (1 << 5),
3187ec681f3Smrg      v2 = s2 | (1 << 5),
3197ec681f3Smrg      v3 = s3 | (1 << 5),
3207ec681f3Smrg      v4 = s4 | (1 << 5),
3217ec681f3Smrg      v5 = 5 | (1 << 5),
3227ec681f3Smrg      v6 = 6 | (1 << 5),
3237ec681f3Smrg      v7 = 7 | (1 << 5),
3247ec681f3Smrg      v8 = 8 | (1 << 5),
3257ec681f3Smrg      /* byte-sized register class */
3267ec681f3Smrg      v1b = v1 | (1 << 7),
3277ec681f3Smrg      v2b = v2 | (1 << 7),
3287ec681f3Smrg      v3b = v3 | (1 << 7),
3297ec681f3Smrg      v4b = v4 | (1 << 7),
3307ec681f3Smrg      v6b = v6 | (1 << 7),
3317ec681f3Smrg      v8b = v8 | (1 << 7),
3327ec681f3Smrg      /* these are used for WWM and spills to vgpr */
3337ec681f3Smrg      v1_linear = v1 | (1 << 6),
3347ec681f3Smrg      v2_linear = v2 | (1 << 6),
3357ec681f3Smrg   };
3367ec681f3Smrg
3377ec681f3Smrg   RegClass() = default;
3387ec681f3Smrg   constexpr RegClass(RC rc_) : rc(rc_) {}
3397ec681f3Smrg   constexpr RegClass(RegType type, unsigned size)
3407ec681f3Smrg       : rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size))
3417ec681f3Smrg   {}
3427ec681f3Smrg
3437ec681f3Smrg   constexpr operator RC() const { return rc; }
3447ec681f3Smrg   explicit operator bool() = delete;
3457ec681f3Smrg
3467ec681f3Smrg   constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
3477ec681f3Smrg   constexpr bool is_linear_vgpr() const { return rc & (1 << 6); };
3487ec681f3Smrg   constexpr bool is_subdword() const { return rc & (1 << 7); }
3497ec681f3Smrg   constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); }
3507ec681f3Smrg   // TODO: use size() less in favor of bytes()
3517ec681f3Smrg   constexpr unsigned size() const { return (bytes() + 3) >> 2; }
3527ec681f3Smrg   constexpr bool is_linear() const { return rc <= RC::s16 || is_linear_vgpr(); }
3537ec681f3Smrg   constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); }
3547ec681f3Smrg   constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); }
3557ec681f3Smrg
3567ec681f3Smrg   static constexpr RegClass get(RegType type, unsigned bytes)
3577ec681f3Smrg   {
3587ec681f3Smrg      if (type == RegType::sgpr) {
3597ec681f3Smrg         return RegClass(type, DIV_ROUND_UP(bytes, 4u));
3607ec681f3Smrg      } else {
3617ec681f3Smrg         return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u);
3627ec681f3Smrg      }
3637ec681f3Smrg   }
3647ec681f3Smrg
3657ec681f3Smrg   constexpr RegClass resize(unsigned bytes) const
3667ec681f3Smrg   {
3677ec681f3Smrg      if (is_linear_vgpr()) {
3687ec681f3Smrg         assert(bytes % 4u == 0);
3697ec681f3Smrg         return get(RegType::vgpr, bytes).as_linear();
3707ec681f3Smrg      }
3717ec681f3Smrg      return get(type(), bytes);
3727ec681f3Smrg   }
3737ec681f3Smrg
3747ec681f3Smrgprivate:
3757ec681f3Smrg   RC rc;
3767ec681f3Smrg};
3777ec681f3Smrg
3787ec681f3Smrg/* transitional helper expressions */
3797ec681f3Smrgstatic constexpr RegClass s1{RegClass::s1};
3807ec681f3Smrgstatic constexpr RegClass s2{RegClass::s2};
3817ec681f3Smrgstatic constexpr RegClass s3{RegClass::s3};
3827ec681f3Smrgstatic constexpr RegClass s4{RegClass::s4};
3837ec681f3Smrgstatic constexpr RegClass s8{RegClass::s8};
3847ec681f3Smrgstatic constexpr RegClass s16{RegClass::s16};
3857ec681f3Smrgstatic constexpr RegClass v1{RegClass::v1};
3867ec681f3Smrgstatic constexpr RegClass v2{RegClass::v2};
3877ec681f3Smrgstatic constexpr RegClass v3{RegClass::v3};
3887ec681f3Smrgstatic constexpr RegClass v4{RegClass::v4};
3897ec681f3Smrgstatic constexpr RegClass v5{RegClass::v5};
3907ec681f3Smrgstatic constexpr RegClass v6{RegClass::v6};
3917ec681f3Smrgstatic constexpr RegClass v7{RegClass::v7};
3927ec681f3Smrgstatic constexpr RegClass v8{RegClass::v8};
3937ec681f3Smrgstatic constexpr RegClass v1b{RegClass::v1b};
3947ec681f3Smrgstatic constexpr RegClass v2b{RegClass::v2b};
3957ec681f3Smrgstatic constexpr RegClass v3b{RegClass::v3b};
3967ec681f3Smrgstatic constexpr RegClass v4b{RegClass::v4b};
3977ec681f3Smrgstatic constexpr RegClass v6b{RegClass::v6b};
3987ec681f3Smrgstatic constexpr RegClass v8b{RegClass::v8b};
3997ec681f3Smrg
4007ec681f3Smrg/**
4017ec681f3Smrg * Temp Class
4027ec681f3Smrg * Each temporary virtual register has a
4037ec681f3Smrg * register class (i.e. size and type)
4047ec681f3Smrg * and SSA id.
4057ec681f3Smrg */
4067ec681f3Smrgstruct Temp {
4077ec681f3Smrg   Temp() noexcept : id_(0), reg_class(0) {}
4087ec681f3Smrg   constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {}
4097ec681f3Smrg
4107ec681f3Smrg   constexpr uint32_t id() const noexcept { return id_; }
4117ec681f3Smrg   constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
4127ec681f3Smrg
4137ec681f3Smrg   constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
4147ec681f3Smrg   constexpr unsigned size() const noexcept { return regClass().size(); }
4157ec681f3Smrg   constexpr RegType type() const noexcept { return regClass().type(); }
4167ec681f3Smrg   constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
4177ec681f3Smrg
4187ec681f3Smrg   constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); }
4197ec681f3Smrg   constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
4207ec681f3Smrg   constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
4217ec681f3Smrg
4227ec681f3Smrgprivate:
4237ec681f3Smrg   uint32_t id_ : 24;
4247ec681f3Smrg   uint32_t reg_class : 8;
4257ec681f3Smrg};
4267ec681f3Smrg
4277ec681f3Smrg/**
4287ec681f3Smrg * PhysReg
4297ec681f3Smrg * Represents the physical register for each
4307ec681f3Smrg * Operand and Definition.
4317ec681f3Smrg */
4327ec681f3Smrgstruct PhysReg {
4337ec681f3Smrg   constexpr PhysReg() = default;
4347ec681f3Smrg   explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
4357ec681f3Smrg   constexpr unsigned reg() const { return reg_b >> 2; }
4367ec681f3Smrg   constexpr unsigned byte() const { return reg_b & 0x3; }
4377ec681f3Smrg   constexpr operator unsigned() const { return reg(); }
4387ec681f3Smrg   constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
4397ec681f3Smrg   constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
4407ec681f3Smrg   constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; }
4417ec681f3Smrg   constexpr PhysReg advance(int bytes) const
4427ec681f3Smrg   {
4437ec681f3Smrg      PhysReg res = *this;
4447ec681f3Smrg      res.reg_b += bytes;
4457ec681f3Smrg      return res;
4467ec681f3Smrg   }
4477ec681f3Smrg
4487ec681f3Smrg   uint16_t reg_b = 0;
4497ec681f3Smrg};
4507ec681f3Smrg
4517ec681f3Smrg/* helper expressions for special registers */
4527ec681f3Smrgstatic constexpr PhysReg m0{124};
4537ec681f3Smrgstatic constexpr PhysReg vcc{106};
4547ec681f3Smrgstatic constexpr PhysReg vcc_hi{107};
4557ec681f3Smrgstatic constexpr PhysReg tba{108}; /* GFX6-GFX8 */
4567ec681f3Smrgstatic constexpr PhysReg tma{110}; /* GFX6-GFX8 */
4577ec681f3Smrgstatic constexpr PhysReg ttmp0{112};
4587ec681f3Smrgstatic constexpr PhysReg ttmp1{113};
4597ec681f3Smrgstatic constexpr PhysReg ttmp2{114};
4607ec681f3Smrgstatic constexpr PhysReg ttmp3{115};
4617ec681f3Smrgstatic constexpr PhysReg ttmp4{116};
4627ec681f3Smrgstatic constexpr PhysReg ttmp5{117};
4637ec681f3Smrgstatic constexpr PhysReg ttmp6{118};
4647ec681f3Smrgstatic constexpr PhysReg ttmp7{119};
4657ec681f3Smrgstatic constexpr PhysReg ttmp8{120};
4667ec681f3Smrgstatic constexpr PhysReg ttmp9{121};
4677ec681f3Smrgstatic constexpr PhysReg ttmp10{122};
4687ec681f3Smrgstatic constexpr PhysReg ttmp11{123};
4697ec681f3Smrgstatic constexpr PhysReg sgpr_null{125}; /* GFX10+ */
4707ec681f3Smrgstatic constexpr PhysReg exec{126};
4717ec681f3Smrgstatic constexpr PhysReg exec_lo{126};
4727ec681f3Smrgstatic constexpr PhysReg exec_hi{127};
4737ec681f3Smrgstatic constexpr PhysReg vccz{251};
4747ec681f3Smrgstatic constexpr PhysReg execz{252};
4757ec681f3Smrgstatic constexpr PhysReg scc{253};
4767ec681f3Smrg
4777ec681f3Smrg/**
4787ec681f3Smrg * Operand Class
4797ec681f3Smrg * Initially, each Operand refers to either
4807ec681f3Smrg * a temporary virtual register
4817ec681f3Smrg * or to a constant value
4827ec681f3Smrg * Temporary registers get mapped to physical register during RA
4837ec681f3Smrg * Constant values are inlined into the instruction sequence.
4847ec681f3Smrg */
4857ec681f3Smrgclass Operand final {
4867ec681f3Smrgpublic:
4877ec681f3Smrg   constexpr Operand()
4887ec681f3Smrg       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false),
4897ec681f3Smrg         isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), is16bit_(false),
4907ec681f3Smrg         is24bit_(false), signext(false)
4917ec681f3Smrg   {}
4927ec681f3Smrg
4937ec681f3Smrg   explicit Operand(Temp r) noexcept
4947ec681f3Smrg   {
4957ec681f3Smrg      data_.temp = r;
4967ec681f3Smrg      if (r.id()) {
4977ec681f3Smrg         isTemp_ = true;
4987ec681f3Smrg      } else {
4997ec681f3Smrg         isUndef_ = true;
5007ec681f3Smrg         setFixed(PhysReg{128});
5017ec681f3Smrg      }
5027ec681f3Smrg   };
5037ec681f3Smrg   explicit Operand(Temp r, PhysReg reg) noexcept
5047ec681f3Smrg   {
5057ec681f3Smrg      assert(r.id()); /* Don't allow fixing an undef to a register */
5067ec681f3Smrg      data_.temp = r;
5077ec681f3Smrg      isTemp_ = true;
5087ec681f3Smrg      setFixed(reg);
5097ec681f3Smrg   };
5107ec681f3Smrg
5117ec681f3Smrg   /* 8-bit constant */
5127ec681f3Smrg   static Operand c8(uint8_t v) noexcept
5137ec681f3Smrg   {
5147ec681f3Smrg      /* 8-bit constants are only used for copies and copies from any 8-bit
5157ec681f3Smrg       * constant can be implemented with a SDWA v_mul_u32_u24. So consider all
5167ec681f3Smrg       * to be inline constants. */
5177ec681f3Smrg      Operand op;
5187ec681f3Smrg      op.control_ = 0;
5197ec681f3Smrg      op.data_.i = v;
5207ec681f3Smrg      op.isConstant_ = true;
5217ec681f3Smrg      op.constSize = 0;
5227ec681f3Smrg      op.setFixed(PhysReg{0u});
5237ec681f3Smrg      return op;
5247ec681f3Smrg   };
5257ec681f3Smrg
5267ec681f3Smrg   /* 16-bit constant */
5277ec681f3Smrg   static Operand c16(uint16_t v) noexcept
5287ec681f3Smrg   {
5297ec681f3Smrg      Operand op;
5307ec681f3Smrg      op.control_ = 0;
5317ec681f3Smrg      op.data_.i = v;
5327ec681f3Smrg      op.isConstant_ = true;
5337ec681f3Smrg      op.constSize = 1;
5347ec681f3Smrg      if (v <= 64)
5357ec681f3Smrg         op.setFixed(PhysReg{128u + v});
5367ec681f3Smrg      else if (v >= 0xFFF0) /* [-16 .. -1] */
5377ec681f3Smrg         op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)});
5387ec681f3Smrg      else if (v == 0x3800) /* 0.5 */
5397ec681f3Smrg         op.setFixed(PhysReg{240});
5407ec681f3Smrg      else if (v == 0xB800) /* -0.5 */
5417ec681f3Smrg         op.setFixed(PhysReg{241});
5427ec681f3Smrg      else if (v == 0x3C00) /* 1.0 */
5437ec681f3Smrg         op.setFixed(PhysReg{242});
5447ec681f3Smrg      else if (v == 0xBC00) /* -1.0 */
5457ec681f3Smrg         op.setFixed(PhysReg{243});
5467ec681f3Smrg      else if (v == 0x4000) /* 2.0 */
5477ec681f3Smrg         op.setFixed(PhysReg{244});
5487ec681f3Smrg      else if (v == 0xC000) /* -2.0 */
5497ec681f3Smrg         op.setFixed(PhysReg{245});
5507ec681f3Smrg      else if (v == 0x4400) /* 4.0 */
5517ec681f3Smrg         op.setFixed(PhysReg{246});
5527ec681f3Smrg      else if (v == 0xC400) /* -4.0 */
5537ec681f3Smrg         op.setFixed(PhysReg{247});
5547ec681f3Smrg      else if (v == 0x3118) /* 1/2 PI */
5557ec681f3Smrg         op.setFixed(PhysReg{248});
5567ec681f3Smrg      else /* Literal Constant */
5577ec681f3Smrg         op.setFixed(PhysReg{255});
5587ec681f3Smrg      return op;
5597ec681f3Smrg   }
5607ec681f3Smrg
5617ec681f3Smrg   /* 32-bit constant */
5627ec681f3Smrg   static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); }
5637ec681f3Smrg
5647ec681f3Smrg   /* 64-bit constant */
5657ec681f3Smrg   static Operand c64(uint64_t v) noexcept
5667ec681f3Smrg   {
5677ec681f3Smrg      Operand op;
5687ec681f3Smrg      op.control_ = 0;
5697ec681f3Smrg      op.isConstant_ = true;
5707ec681f3Smrg      op.constSize = 3;
5717ec681f3Smrg      if (v <= 64) {
5727ec681f3Smrg         op.data_.i = (uint32_t)v;
5737ec681f3Smrg         op.setFixed(PhysReg{128 + (uint32_t)v});
5747ec681f3Smrg      } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
5757ec681f3Smrg         op.data_.i = (uint32_t)v;
5767ec681f3Smrg         op.setFixed(PhysReg{192 - (uint32_t)v});
5777ec681f3Smrg      } else if (v == 0x3FE0000000000000) { /* 0.5 */
5787ec681f3Smrg         op.data_.i = 0x3f000000;
5797ec681f3Smrg         op.setFixed(PhysReg{240});
5807ec681f3Smrg      } else if (v == 0xBFE0000000000000) { /* -0.5 */
5817ec681f3Smrg         op.data_.i = 0xbf000000;
5827ec681f3Smrg         op.setFixed(PhysReg{241});
5837ec681f3Smrg      } else if (v == 0x3FF0000000000000) { /* 1.0 */
5847ec681f3Smrg         op.data_.i = 0x3f800000;
5857ec681f3Smrg         op.setFixed(PhysReg{242});
5867ec681f3Smrg      } else if (v == 0xBFF0000000000000) { /* -1.0 */
5877ec681f3Smrg         op.data_.i = 0xbf800000;
5887ec681f3Smrg         op.setFixed(PhysReg{243});
5897ec681f3Smrg      } else if (v == 0x4000000000000000) { /* 2.0 */
5907ec681f3Smrg         op.data_.i = 0x40000000;
5917ec681f3Smrg         op.setFixed(PhysReg{244});
5927ec681f3Smrg      } else if (v == 0xC000000000000000) { /* -2.0 */
5937ec681f3Smrg         op.data_.i = 0xc0000000;
5947ec681f3Smrg         op.setFixed(PhysReg{245});
5957ec681f3Smrg      } else if (v == 0x4010000000000000) { /* 4.0 */
5967ec681f3Smrg         op.data_.i = 0x40800000;
5977ec681f3Smrg         op.setFixed(PhysReg{246});
5987ec681f3Smrg      } else if (v == 0xC010000000000000) { /* -4.0 */
5997ec681f3Smrg         op.data_.i = 0xc0800000;
6007ec681f3Smrg         op.setFixed(PhysReg{247});
6017ec681f3Smrg      } else { /* Literal Constant: we don't know if it is a long or double.*/
6027ec681f3Smrg         op.signext = v >> 63;
6037ec681f3Smrg         op.data_.i = v & 0xffffffffu;
6047ec681f3Smrg         op.setFixed(PhysReg{255});
6057ec681f3Smrg         assert(op.constantValue64() == v &&
6067ec681f3Smrg                "attempt to create a unrepresentable 64-bit literal constant");
6077ec681f3Smrg      }
6087ec681f3Smrg      return op;
6097ec681f3Smrg   }
6107ec681f3Smrg
6117ec681f3Smrg   /* 32-bit constant stored as a 32-bit or 64-bit operand */
6127ec681f3Smrg   static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept
6137ec681f3Smrg   {
6147ec681f3Smrg      Operand op;
6157ec681f3Smrg      op.control_ = 0;
6167ec681f3Smrg      op.data_.i = v;
6177ec681f3Smrg      op.isConstant_ = true;
6187ec681f3Smrg      op.constSize = is64bit ? 3 : 2;
6197ec681f3Smrg      if (v <= 64)
6207ec681f3Smrg         op.setFixed(PhysReg{128 + v});
6217ec681f3Smrg      else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
6227ec681f3Smrg         op.setFixed(PhysReg{192 - v});
6237ec681f3Smrg      else if (v == 0x3f000000) /* 0.5 */
6247ec681f3Smrg         op.setFixed(PhysReg{240});
6257ec681f3Smrg      else if (v == 0xbf000000) /* -0.5 */
6267ec681f3Smrg         op.setFixed(PhysReg{241});
6277ec681f3Smrg      else if (v == 0x3f800000) /* 1.0 */
6287ec681f3Smrg         op.setFixed(PhysReg{242});
6297ec681f3Smrg      else if (v == 0xbf800000) /* -1.0 */
6307ec681f3Smrg         op.setFixed(PhysReg{243});
6317ec681f3Smrg      else if (v == 0x40000000) /* 2.0 */
6327ec681f3Smrg         op.setFixed(PhysReg{244});
6337ec681f3Smrg      else if (v == 0xc0000000) /* -2.0 */
6347ec681f3Smrg         op.setFixed(PhysReg{245});
6357ec681f3Smrg      else if (v == 0x40800000) /* 4.0 */
6367ec681f3Smrg         op.setFixed(PhysReg{246});
6377ec681f3Smrg      else if (v == 0xc0800000) /* -4.0 */
6387ec681f3Smrg         op.setFixed(PhysReg{247});
6397ec681f3Smrg      else { /* Literal Constant */
6407ec681f3Smrg         assert(!is64bit && "attempt to create a 64-bit literal constant");
6417ec681f3Smrg         op.setFixed(PhysReg{255});
6427ec681f3Smrg      }
6437ec681f3Smrg      return op;
6447ec681f3Smrg   }
6457ec681f3Smrg
6467ec681f3Smrg   explicit Operand(RegClass type) noexcept
6477ec681f3Smrg   {
6487ec681f3Smrg      isUndef_ = true;
6497ec681f3Smrg      data_.temp = Temp(0, type);
6507ec681f3Smrg      setFixed(PhysReg{128});
6517ec681f3Smrg   };
6527ec681f3Smrg   explicit Operand(PhysReg reg, RegClass type) noexcept
6537ec681f3Smrg   {
6547ec681f3Smrg      data_.temp = Temp(0, type);
6557ec681f3Smrg      setFixed(reg);
6567ec681f3Smrg   }
6577ec681f3Smrg
6587ec681f3Smrg   static Operand zero(unsigned bytes = 4) noexcept
6597ec681f3Smrg   {
6607ec681f3Smrg      if (bytes == 8)
6617ec681f3Smrg         return Operand::c64(0);
6627ec681f3Smrg      else if (bytes == 4)
6637ec681f3Smrg         return Operand::c32(0);
6647ec681f3Smrg      else if (bytes == 2)
6657ec681f3Smrg         return Operand::c16(0);
6667ec681f3Smrg      assert(bytes == 1);
6677ec681f3Smrg      return Operand::c8(0);
6687ec681f3Smrg   }
6697ec681f3Smrg
6707ec681f3Smrg   /* This is useful over the constructors when you want to take a chip class
6717ec681f3Smrg    * for 1/2 PI or an unknown operand size.
6727ec681f3Smrg    */
6737ec681f3Smrg   static Operand get_const(enum chip_class chip, uint64_t val, unsigned bytes)
6747ec681f3Smrg   {
6757ec681f3Smrg      if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
6767ec681f3Smrg         /* 1/2 PI can be an inline constant on GFX8+ */
6777ec681f3Smrg         Operand op = Operand::c32(val);
6787ec681f3Smrg         op.setFixed(PhysReg{248});
6797ec681f3Smrg         return op;
6807ec681f3Smrg      }
6817ec681f3Smrg
6827ec681f3Smrg      if (bytes == 8)
6837ec681f3Smrg         return Operand::c64(val);
6847ec681f3Smrg      else if (bytes == 4)
6857ec681f3Smrg         return Operand::c32(val);
6867ec681f3Smrg      else if (bytes == 2)
6877ec681f3Smrg         return Operand::c16(val);
6887ec681f3Smrg      assert(bytes == 1);
6897ec681f3Smrg      return Operand::c8(val);
6907ec681f3Smrg   }
6917ec681f3Smrg
6927ec681f3Smrg   static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,
6937ec681f3Smrg                                         bool sext = false)
6947ec681f3Smrg   {
6957ec681f3Smrg      if (bytes <= 4)
6967ec681f3Smrg         return true;
6977ec681f3Smrg
6987ec681f3Smrg      if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000)
6997ec681f3Smrg         return true;
7007ec681f3Smrg      uint64_t upper33 = val & 0xFFFFFFFF80000000;
7017ec681f3Smrg      if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0))
7027ec681f3Smrg         return true;
7037ec681f3Smrg
7047ec681f3Smrg      return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */
7057ec681f3Smrg             val == 0x3FE0000000000000 ||              /* 0.5 */
7067ec681f3Smrg             val == 0xBFE0000000000000 ||              /* -0.5 */
7077ec681f3Smrg             val == 0x3FF0000000000000 ||              /* 1.0 */
7087ec681f3Smrg             val == 0xBFF0000000000000 ||              /* -1.0 */
7097ec681f3Smrg             val == 0x4000000000000000 ||              /* 2.0 */
7107ec681f3Smrg             val == 0xC000000000000000 ||              /* -2.0 */
7117ec681f3Smrg             val == 0x4010000000000000 ||              /* 4.0 */
7127ec681f3Smrg             val == 0xC010000000000000;                /* -4.0 */
7137ec681f3Smrg   }
7147ec681f3Smrg
7157ec681f3Smrg   constexpr bool isTemp() const noexcept { return isTemp_; }
7167ec681f3Smrg
7177ec681f3Smrg   constexpr void setTemp(Temp t) noexcept
7187ec681f3Smrg   {
7197ec681f3Smrg      assert(!isConstant_);
7207ec681f3Smrg      isTemp_ = true;
7217ec681f3Smrg      data_.temp = t;
7227ec681f3Smrg   }
7237ec681f3Smrg
7247ec681f3Smrg   constexpr Temp getTemp() const noexcept { return data_.temp; }
7257ec681f3Smrg
7267ec681f3Smrg   constexpr uint32_t tempId() const noexcept { return data_.temp.id(); }
7277ec681f3Smrg
7287ec681f3Smrg   constexpr bool hasRegClass() const noexcept { return isTemp() || isUndefined(); }
7297ec681f3Smrg
7307ec681f3Smrg   constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); }
7317ec681f3Smrg
7327ec681f3Smrg   constexpr unsigned bytes() const noexcept
7337ec681f3Smrg   {
7347ec681f3Smrg      if (isConstant())
7357ec681f3Smrg         return 1 << constSize;
7367ec681f3Smrg      else
7377ec681f3Smrg         return data_.temp.bytes();
7387ec681f3Smrg   }
7397ec681f3Smrg
7407ec681f3Smrg   constexpr unsigned size() const noexcept
7417ec681f3Smrg   {
7427ec681f3Smrg      if (isConstant())
7437ec681f3Smrg         return constSize > 2 ? 2 : 1;
7447ec681f3Smrg      else
7457ec681f3Smrg         return data_.temp.size();
7467ec681f3Smrg   }
7477ec681f3Smrg
7487ec681f3Smrg   constexpr bool isFixed() const noexcept { return isFixed_; }
7497ec681f3Smrg
7507ec681f3Smrg   constexpr PhysReg physReg() const noexcept { return reg_; }
7517ec681f3Smrg
7527ec681f3Smrg   constexpr void setFixed(PhysReg reg) noexcept
7537ec681f3Smrg   {
7547ec681f3Smrg      isFixed_ = reg != unsigned(-1);
7557ec681f3Smrg      reg_ = reg;
7567ec681f3Smrg   }
7577ec681f3Smrg
7587ec681f3Smrg   constexpr bool isConstant() const noexcept { return isConstant_; }
7597ec681f3Smrg
7607ec681f3Smrg   constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; }
7617ec681f3Smrg
7627ec681f3Smrg   constexpr bool isUndefined() const noexcept { return isUndef_; }
7637ec681f3Smrg
7647ec681f3Smrg   constexpr uint32_t constantValue() const noexcept { return data_.i; }
7657ec681f3Smrg
7667ec681f3Smrg   constexpr bool constantEquals(uint32_t cmp) const noexcept
7677ec681f3Smrg   {
7687ec681f3Smrg      return isConstant() && constantValue() == cmp;
7697ec681f3Smrg   }
7707ec681f3Smrg
7717ec681f3Smrg   constexpr uint64_t constantValue64() const noexcept
7727ec681f3Smrg   {
7737ec681f3Smrg      if (constSize == 3) {
7747ec681f3Smrg         if (reg_ <= 192)
7757ec681f3Smrg            return reg_ - 128;
7767ec681f3Smrg         else if (reg_ <= 208)
7777ec681f3Smrg            return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
7787ec681f3Smrg
7797ec681f3Smrg         switch (reg_) {
7807ec681f3Smrg         case 240: return 0x3FE0000000000000;
7817ec681f3Smrg         case 241: return 0xBFE0000000000000;
7827ec681f3Smrg         case 242: return 0x3FF0000000000000;
7837ec681f3Smrg         case 243: return 0xBFF0000000000000;
7847ec681f3Smrg         case 244: return 0x4000000000000000;
7857ec681f3Smrg         case 245: return 0xC000000000000000;
7867ec681f3Smrg         case 246: return 0x4010000000000000;
7877ec681f3Smrg         case 247: return 0xC010000000000000;
7887ec681f3Smrg         case 255:
7897ec681f3Smrg            return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
7907ec681f3Smrg         }
7917ec681f3Smrg         unreachable("invalid register for 64-bit constant");
7927ec681f3Smrg      } else {
7937ec681f3Smrg         return data_.i;
7947ec681f3Smrg      }
7957ec681f3Smrg   }
7967ec681f3Smrg
7977ec681f3Smrg   constexpr bool isOfType(RegType type) const noexcept
7987ec681f3Smrg   {
7997ec681f3Smrg      return hasRegClass() && regClass().type() == type;
8007ec681f3Smrg   }
8017ec681f3Smrg
8027ec681f3Smrg   /* Indicates that the killed operand's live range intersects with the
8037ec681f3Smrg    * instruction's definitions. Unlike isKill() and isFirstKill(), this is
8047ec681f3Smrg    * not set by liveness analysis. */
8057ec681f3Smrg   constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; }
8067ec681f3Smrg
8077ec681f3Smrg   constexpr bool isLateKill() const noexcept { return isLateKill_; }
8087ec681f3Smrg
8097ec681f3Smrg   constexpr void setKill(bool flag) noexcept
8107ec681f3Smrg   {
8117ec681f3Smrg      isKill_ = flag;
8127ec681f3Smrg      if (!flag)
8137ec681f3Smrg         setFirstKill(false);
8147ec681f3Smrg   }
8157ec681f3Smrg
8167ec681f3Smrg   constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); }
8177ec681f3Smrg
8187ec681f3Smrg   constexpr void setFirstKill(bool flag) noexcept
8197ec681f3Smrg   {
8207ec681f3Smrg      isFirstKill_ = flag;
8217ec681f3Smrg      if (flag)
8227ec681f3Smrg         setKill(flag);
8237ec681f3Smrg   }
8247ec681f3Smrg
8257ec681f3Smrg   /* When there are multiple operands killing the same temporary,
8267ec681f3Smrg    * isFirstKill() is only returns true for the first one. */
8277ec681f3Smrg   constexpr bool isFirstKill() const noexcept { return isFirstKill_; }
8287ec681f3Smrg
8297ec681f3Smrg   constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); }
8307ec681f3Smrg
8317ec681f3Smrg   constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); }
8327ec681f3Smrg
8337ec681f3Smrg   constexpr bool operator==(Operand other) const noexcept
8347ec681f3Smrg   {
8357ec681f3Smrg      if (other.size() != size())
8367ec681f3Smrg         return false;
8377ec681f3Smrg      if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
8387ec681f3Smrg         return false;
8397ec681f3Smrg      if (isFixed() && other.isFixed() && physReg() != other.physReg())
8407ec681f3Smrg         return false;
8417ec681f3Smrg      if (isLiteral())
8427ec681f3Smrg         return other.isLiteral() && other.constantValue() == constantValue();
8437ec681f3Smrg      else if (isConstant())
8447ec681f3Smrg         return other.isConstant() && other.physReg() == physReg();
8457ec681f3Smrg      else if (isUndefined())
8467ec681f3Smrg         return other.isUndefined() && other.regClass() == regClass();
8477ec681f3Smrg      else
8487ec681f3Smrg         return other.isTemp() && other.getTemp() == getTemp();
8497ec681f3Smrg   }
8507ec681f3Smrg
8517ec681f3Smrg   constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); }
8527ec681f3Smrg
8537ec681f3Smrg   constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; }
8547ec681f3Smrg
8557ec681f3Smrg   constexpr bool is16bit() const noexcept { return is16bit_; }
8567ec681f3Smrg
8577ec681f3Smrg   constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; }
8587ec681f3Smrg
8597ec681f3Smrg   constexpr bool is24bit() const noexcept { return is24bit_; }
8607ec681f3Smrg
8617ec681f3Smrgprivate:
8627ec681f3Smrg   union {
8637ec681f3Smrg      Temp temp;
8647ec681f3Smrg      uint32_t i;
8657ec681f3Smrg      float f;
8667ec681f3Smrg   } data_ = {Temp(0, s1)};
8677ec681f3Smrg   PhysReg reg_;
8687ec681f3Smrg   union {
8697ec681f3Smrg      struct {
8707ec681f3Smrg         uint8_t isTemp_ : 1;
8717ec681f3Smrg         uint8_t isFixed_ : 1;
8727ec681f3Smrg         uint8_t isConstant_ : 1;
8737ec681f3Smrg         uint8_t isKill_ : 1;
8747ec681f3Smrg         uint8_t isUndef_ : 1;
8757ec681f3Smrg         uint8_t isFirstKill_ : 1;
8767ec681f3Smrg         uint8_t constSize : 2;
8777ec681f3Smrg         uint8_t isLateKill_ : 1;
8787ec681f3Smrg         uint8_t is16bit_ : 1;
8797ec681f3Smrg         uint8_t is24bit_ : 1;
8807ec681f3Smrg         uint8_t signext : 1;
8817ec681f3Smrg      };
8827ec681f3Smrg      /* can't initialize bit-fields in c++11, so work around using a union */
8837ec681f3Smrg      uint16_t control_ = 0;
8847ec681f3Smrg   };
8857ec681f3Smrg};
8867ec681f3Smrg
8877ec681f3Smrg/**
8887ec681f3Smrg * Definition Class
8897ec681f3Smrg * Definitions are the results of Instructions
8907ec681f3Smrg * and refer to temporary virtual registers
8917ec681f3Smrg * which are later mapped to physical registers
8927ec681f3Smrg */
8937ec681f3Smrgclass Definition final {
8947ec681f3Smrgpublic:
8957ec681f3Smrg   constexpr Definition()
8967ec681f3Smrg       : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0), isPrecise_(0), isNUW_(0),
8977ec681f3Smrg         isNoCSE_(0)
8987ec681f3Smrg   {}
8997ec681f3Smrg   Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {}
9007ec681f3Smrg   explicit Definition(Temp tmp) noexcept : temp(tmp) {}
9017ec681f3Smrg   Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); }
9027ec681f3Smrg   Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type))
9037ec681f3Smrg   {
9047ec681f3Smrg      setFixed(reg);
9057ec681f3Smrg   }
9067ec681f3Smrg
9077ec681f3Smrg   constexpr bool isTemp() const noexcept { return tempId() > 0; }
9087ec681f3Smrg
9097ec681f3Smrg   constexpr Temp getTemp() const noexcept { return temp; }
9107ec681f3Smrg
9117ec681f3Smrg   constexpr uint32_t tempId() const noexcept { return temp.id(); }
9127ec681f3Smrg
9137ec681f3Smrg   constexpr void setTemp(Temp t) noexcept { temp = t; }
9147ec681f3Smrg
9157ec681f3Smrg   void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); }
9167ec681f3Smrg
9177ec681f3Smrg   constexpr RegClass regClass() const noexcept { return temp.regClass(); }
9187ec681f3Smrg
9197ec681f3Smrg   constexpr unsigned bytes() const noexcept { return temp.bytes(); }
9207ec681f3Smrg
9217ec681f3Smrg   constexpr unsigned size() const noexcept { return temp.size(); }
9227ec681f3Smrg
9237ec681f3Smrg   constexpr bool isFixed() const noexcept { return isFixed_; }
9247ec681f3Smrg
9257ec681f3Smrg   constexpr PhysReg physReg() const noexcept { return reg_; }
9267ec681f3Smrg
9277ec681f3Smrg   constexpr void setFixed(PhysReg reg) noexcept
9287ec681f3Smrg   {
9297ec681f3Smrg      isFixed_ = 1;
9307ec681f3Smrg      reg_ = reg;
9317ec681f3Smrg   }
9327ec681f3Smrg
9337ec681f3Smrg   constexpr void setHint(PhysReg reg) noexcept
9347ec681f3Smrg   {
9357ec681f3Smrg      hasHint_ = 1;
9367ec681f3Smrg      reg_ = reg;
9377ec681f3Smrg   }
9387ec681f3Smrg
9397ec681f3Smrg   constexpr bool hasHint() const noexcept { return hasHint_; }
9407ec681f3Smrg
9417ec681f3Smrg   constexpr void setKill(bool flag) noexcept { isKill_ = flag; }
9427ec681f3Smrg
9437ec681f3Smrg   constexpr bool isKill() const noexcept { return isKill_; }
9447ec681f3Smrg
9457ec681f3Smrg   constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; }
9467ec681f3Smrg
9477ec681f3Smrg   constexpr bool isPrecise() const noexcept { return isPrecise_; }
9487ec681f3Smrg
9497ec681f3Smrg   /* No Unsigned Wrap */
9507ec681f3Smrg   constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; }
9517ec681f3Smrg
9527ec681f3Smrg   constexpr bool isNUW() const noexcept { return isNUW_; }
9537ec681f3Smrg
9547ec681f3Smrg   constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; }
9557ec681f3Smrg
9567ec681f3Smrg   constexpr bool isNoCSE() const noexcept { return isNoCSE_; }
9577ec681f3Smrg
9587ec681f3Smrgprivate:
9597ec681f3Smrg   Temp temp = Temp(0, s1);
9607ec681f3Smrg   PhysReg reg_;
9617ec681f3Smrg   union {
9627ec681f3Smrg      struct {
9637ec681f3Smrg         uint8_t isFixed_ : 1;
9647ec681f3Smrg         uint8_t hasHint_ : 1;
9657ec681f3Smrg         uint8_t isKill_ : 1;
9667ec681f3Smrg         uint8_t isPrecise_ : 1;
9677ec681f3Smrg         uint8_t isNUW_ : 1;
9687ec681f3Smrg         uint8_t isNoCSE_ : 1;
9697ec681f3Smrg      };
9707ec681f3Smrg      /* can't initialize bit-fields in c++11, so work around using a union */
9717ec681f3Smrg      uint8_t control_ = 0;
9727ec681f3Smrg   };
9737ec681f3Smrg};
9747ec681f3Smrg
9757ec681f3Smrgstruct Block;
9767ec681f3Smrgstruct Instruction;
9777ec681f3Smrgstruct Pseudo_instruction;
9787ec681f3Smrgstruct SOP1_instruction;
9797ec681f3Smrgstruct SOP2_instruction;
9807ec681f3Smrgstruct SOPK_instruction;
9817ec681f3Smrgstruct SOPP_instruction;
9827ec681f3Smrgstruct SOPC_instruction;
9837ec681f3Smrgstruct SMEM_instruction;
9847ec681f3Smrgstruct DS_instruction;
9857ec681f3Smrgstruct MTBUF_instruction;
9867ec681f3Smrgstruct MUBUF_instruction;
9877ec681f3Smrgstruct MIMG_instruction;
9887ec681f3Smrgstruct Export_instruction;
9897ec681f3Smrgstruct FLAT_instruction;
9907ec681f3Smrgstruct Pseudo_branch_instruction;
9917ec681f3Smrgstruct Pseudo_barrier_instruction;
9927ec681f3Smrgstruct Pseudo_reduction_instruction;
9937ec681f3Smrgstruct VOP3P_instruction;
9947ec681f3Smrgstruct VOP1_instruction;
9957ec681f3Smrgstruct VOP2_instruction;
9967ec681f3Smrgstruct VOPC_instruction;
9977ec681f3Smrgstruct VOP3_instruction;
9987ec681f3Smrgstruct Interp_instruction;
9997ec681f3Smrgstruct DPP_instruction;
10007ec681f3Smrgstruct SDWA_instruction;
10017ec681f3Smrg
10027ec681f3Smrgstruct Instruction {
10037ec681f3Smrg   aco_opcode opcode;
10047ec681f3Smrg   Format format;
10057ec681f3Smrg   uint32_t pass_flags;
10067ec681f3Smrg
10077ec681f3Smrg   aco::span<Operand> operands;
10087ec681f3Smrg   aco::span<Definition> definitions;
10097ec681f3Smrg
10107ec681f3Smrg   constexpr bool usesModifiers() const noexcept;
10117ec681f3Smrg
10127ec681f3Smrg   constexpr bool reads_exec() const noexcept
10137ec681f3Smrg   {
10147ec681f3Smrg      for (const Operand& op : operands) {
10157ec681f3Smrg         if (op.isFixed() && op.physReg() == exec)
10167ec681f3Smrg            return true;
10177ec681f3Smrg      }
10187ec681f3Smrg      return false;
10197ec681f3Smrg   }
10207ec681f3Smrg
10217ec681f3Smrg   Pseudo_instruction& pseudo() noexcept
10227ec681f3Smrg   {
10237ec681f3Smrg      assert(isPseudo());
10247ec681f3Smrg      return *(Pseudo_instruction*)this;
10257ec681f3Smrg   }
10267ec681f3Smrg   const Pseudo_instruction& pseudo() const noexcept
10277ec681f3Smrg   {
10287ec681f3Smrg      assert(isPseudo());
10297ec681f3Smrg      return *(Pseudo_instruction*)this;
10307ec681f3Smrg   }
10317ec681f3Smrg   constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; }
10327ec681f3Smrg   SOP1_instruction& sop1() noexcept
10337ec681f3Smrg   {
10347ec681f3Smrg      assert(isSOP1());
10357ec681f3Smrg      return *(SOP1_instruction*)this;
10367ec681f3Smrg   }
10377ec681f3Smrg   const SOP1_instruction& sop1() const noexcept
10387ec681f3Smrg   {
10397ec681f3Smrg      assert(isSOP1());
10407ec681f3Smrg      return *(SOP1_instruction*)this;
10417ec681f3Smrg   }
10427ec681f3Smrg   constexpr bool isSOP1() const noexcept { return format == Format::SOP1; }
10437ec681f3Smrg   SOP2_instruction& sop2() noexcept
10447ec681f3Smrg   {
10457ec681f3Smrg      assert(isSOP2());
10467ec681f3Smrg      return *(SOP2_instruction*)this;
10477ec681f3Smrg   }
10487ec681f3Smrg   const SOP2_instruction& sop2() const noexcept
10497ec681f3Smrg   {
10507ec681f3Smrg      assert(isSOP2());
10517ec681f3Smrg      return *(SOP2_instruction*)this;
10527ec681f3Smrg   }
10537ec681f3Smrg   constexpr bool isSOP2() const noexcept { return format == Format::SOP2; }
10547ec681f3Smrg   SOPK_instruction& sopk() noexcept
10557ec681f3Smrg   {
10567ec681f3Smrg      assert(isSOPK());
10577ec681f3Smrg      return *(SOPK_instruction*)this;
10587ec681f3Smrg   }
10597ec681f3Smrg   const SOPK_instruction& sopk() const noexcept
10607ec681f3Smrg   {
10617ec681f3Smrg      assert(isSOPK());
10627ec681f3Smrg      return *(SOPK_instruction*)this;
10637ec681f3Smrg   }
10647ec681f3Smrg   constexpr bool isSOPK() const noexcept { return format == Format::SOPK; }
10657ec681f3Smrg   SOPP_instruction& sopp() noexcept
10667ec681f3Smrg   {
10677ec681f3Smrg      assert(isSOPP());
10687ec681f3Smrg      return *(SOPP_instruction*)this;
10697ec681f3Smrg   }
10707ec681f3Smrg   const SOPP_instruction& sopp() const noexcept
10717ec681f3Smrg   {
10727ec681f3Smrg      assert(isSOPP());
10737ec681f3Smrg      return *(SOPP_instruction*)this;
10747ec681f3Smrg   }
10757ec681f3Smrg   constexpr bool isSOPP() const noexcept { return format == Format::SOPP; }
10767ec681f3Smrg   SOPC_instruction& sopc() noexcept
10777ec681f3Smrg   {
10787ec681f3Smrg      assert(isSOPC());
10797ec681f3Smrg      return *(SOPC_instruction*)this;
10807ec681f3Smrg   }
10817ec681f3Smrg   const SOPC_instruction& sopc() const noexcept
10827ec681f3Smrg   {
10837ec681f3Smrg      assert(isSOPC());
10847ec681f3Smrg      return *(SOPC_instruction*)this;
10857ec681f3Smrg   }
10867ec681f3Smrg   constexpr bool isSOPC() const noexcept { return format == Format::SOPC; }
10877ec681f3Smrg   SMEM_instruction& smem() noexcept
10887ec681f3Smrg   {
10897ec681f3Smrg      assert(isSMEM());
10907ec681f3Smrg      return *(SMEM_instruction*)this;
10917ec681f3Smrg   }
10927ec681f3Smrg   const SMEM_instruction& smem() const noexcept
10937ec681f3Smrg   {
10947ec681f3Smrg      assert(isSMEM());
10957ec681f3Smrg      return *(SMEM_instruction*)this;
10967ec681f3Smrg   }
10977ec681f3Smrg   constexpr bool isSMEM() const noexcept { return format == Format::SMEM; }
10987ec681f3Smrg   DS_instruction& ds() noexcept
10997ec681f3Smrg   {
11007ec681f3Smrg      assert(isDS());
11017ec681f3Smrg      return *(DS_instruction*)this;
11027ec681f3Smrg   }
11037ec681f3Smrg   const DS_instruction& ds() const noexcept
11047ec681f3Smrg   {
11057ec681f3Smrg      assert(isDS());
11067ec681f3Smrg      return *(DS_instruction*)this;
11077ec681f3Smrg   }
11087ec681f3Smrg   constexpr bool isDS() const noexcept { return format == Format::DS; }
11097ec681f3Smrg   MTBUF_instruction& mtbuf() noexcept
11107ec681f3Smrg   {
11117ec681f3Smrg      assert(isMTBUF());
11127ec681f3Smrg      return *(MTBUF_instruction*)this;
11137ec681f3Smrg   }
11147ec681f3Smrg   const MTBUF_instruction& mtbuf() const noexcept
11157ec681f3Smrg   {
11167ec681f3Smrg      assert(isMTBUF());
11177ec681f3Smrg      return *(MTBUF_instruction*)this;
11187ec681f3Smrg   }
11197ec681f3Smrg   constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; }
11207ec681f3Smrg   MUBUF_instruction& mubuf() noexcept
11217ec681f3Smrg   {
11227ec681f3Smrg      assert(isMUBUF());
11237ec681f3Smrg      return *(MUBUF_instruction*)this;
11247ec681f3Smrg   }
11257ec681f3Smrg   const MUBUF_instruction& mubuf() const noexcept
11267ec681f3Smrg   {
11277ec681f3Smrg      assert(isMUBUF());
11287ec681f3Smrg      return *(MUBUF_instruction*)this;
11297ec681f3Smrg   }
11307ec681f3Smrg   constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; }
11317ec681f3Smrg   MIMG_instruction& mimg() noexcept
11327ec681f3Smrg   {
11337ec681f3Smrg      assert(isMIMG());
11347ec681f3Smrg      return *(MIMG_instruction*)this;
11357ec681f3Smrg   }
11367ec681f3Smrg   const MIMG_instruction& mimg() const noexcept
11377ec681f3Smrg   {
11387ec681f3Smrg      assert(isMIMG());
11397ec681f3Smrg      return *(MIMG_instruction*)this;
11407ec681f3Smrg   }
11417ec681f3Smrg   constexpr bool isMIMG() const noexcept { return format == Format::MIMG; }
11427ec681f3Smrg   Export_instruction& exp() noexcept
11437ec681f3Smrg   {
11447ec681f3Smrg      assert(isEXP());
11457ec681f3Smrg      return *(Export_instruction*)this;
11467ec681f3Smrg   }
11477ec681f3Smrg   const Export_instruction& exp() const noexcept
11487ec681f3Smrg   {
11497ec681f3Smrg      assert(isEXP());
11507ec681f3Smrg      return *(Export_instruction*)this;
11517ec681f3Smrg   }
11527ec681f3Smrg   constexpr bool isEXP() const noexcept { return format == Format::EXP; }
11537ec681f3Smrg   FLAT_instruction& flat() noexcept
11547ec681f3Smrg   {
11557ec681f3Smrg      assert(isFlat());
11567ec681f3Smrg      return *(FLAT_instruction*)this;
11577ec681f3Smrg   }
11587ec681f3Smrg   const FLAT_instruction& flat() const noexcept
11597ec681f3Smrg   {
11607ec681f3Smrg      assert(isFlat());
11617ec681f3Smrg      return *(FLAT_instruction*)this;
11627ec681f3Smrg   }
11637ec681f3Smrg   constexpr bool isFlat() const noexcept { return format == Format::FLAT; }
11647ec681f3Smrg   FLAT_instruction& global() noexcept
11657ec681f3Smrg   {
11667ec681f3Smrg      assert(isGlobal());
11677ec681f3Smrg      return *(FLAT_instruction*)this;
11687ec681f3Smrg   }
11697ec681f3Smrg   const FLAT_instruction& global() const noexcept
11707ec681f3Smrg   {
11717ec681f3Smrg      assert(isGlobal());
11727ec681f3Smrg      return *(FLAT_instruction*)this;
11737ec681f3Smrg   }
11747ec681f3Smrg   constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; }
11757ec681f3Smrg   FLAT_instruction& scratch() noexcept
11767ec681f3Smrg   {
11777ec681f3Smrg      assert(isScratch());
11787ec681f3Smrg      return *(FLAT_instruction*)this;
11797ec681f3Smrg   }
11807ec681f3Smrg   const FLAT_instruction& scratch() const noexcept
11817ec681f3Smrg   {
11827ec681f3Smrg      assert(isScratch());
11837ec681f3Smrg      return *(FLAT_instruction*)this;
11847ec681f3Smrg   }
11857ec681f3Smrg   constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; }
11867ec681f3Smrg   Pseudo_branch_instruction& branch() noexcept
11877ec681f3Smrg   {
11887ec681f3Smrg      assert(isBranch());
11897ec681f3Smrg      return *(Pseudo_branch_instruction*)this;
11907ec681f3Smrg   }
11917ec681f3Smrg   const Pseudo_branch_instruction& branch() const noexcept
11927ec681f3Smrg   {
11937ec681f3Smrg      assert(isBranch());
11947ec681f3Smrg      return *(Pseudo_branch_instruction*)this;
11957ec681f3Smrg   }
11967ec681f3Smrg   constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; }
11977ec681f3Smrg   Pseudo_barrier_instruction& barrier() noexcept
11987ec681f3Smrg   {
11997ec681f3Smrg      assert(isBarrier());
12007ec681f3Smrg      return *(Pseudo_barrier_instruction*)this;
12017ec681f3Smrg   }
12027ec681f3Smrg   const Pseudo_barrier_instruction& barrier() const noexcept
12037ec681f3Smrg   {
12047ec681f3Smrg      assert(isBarrier());
12057ec681f3Smrg      return *(Pseudo_barrier_instruction*)this;
12067ec681f3Smrg   }
12077ec681f3Smrg   constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; }
12087ec681f3Smrg   Pseudo_reduction_instruction& reduction() noexcept
12097ec681f3Smrg   {
12107ec681f3Smrg      assert(isReduction());
12117ec681f3Smrg      return *(Pseudo_reduction_instruction*)this;
12127ec681f3Smrg   }
12137ec681f3Smrg   const Pseudo_reduction_instruction& reduction() const noexcept
12147ec681f3Smrg   {
12157ec681f3Smrg      assert(isReduction());
12167ec681f3Smrg      return *(Pseudo_reduction_instruction*)this;
12177ec681f3Smrg   }
12187ec681f3Smrg   constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
12197ec681f3Smrg   VOP3P_instruction& vop3p() noexcept
12207ec681f3Smrg   {
12217ec681f3Smrg      assert(isVOP3P());
12227ec681f3Smrg      return *(VOP3P_instruction*)this;
12237ec681f3Smrg   }
12247ec681f3Smrg   const VOP3P_instruction& vop3p() const noexcept
12257ec681f3Smrg   {
12267ec681f3Smrg      assert(isVOP3P());
12277ec681f3Smrg      return *(VOP3P_instruction*)this;
12287ec681f3Smrg   }
12297ec681f3Smrg   constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
12307ec681f3Smrg   VOP1_instruction& vop1() noexcept
12317ec681f3Smrg   {
12327ec681f3Smrg      assert(isVOP1());
12337ec681f3Smrg      return *(VOP1_instruction*)this;
12347ec681f3Smrg   }
12357ec681f3Smrg   const VOP1_instruction& vop1() const noexcept
12367ec681f3Smrg   {
12377ec681f3Smrg      assert(isVOP1());
12387ec681f3Smrg      return *(VOP1_instruction*)this;
12397ec681f3Smrg   }
12407ec681f3Smrg   constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
12417ec681f3Smrg   VOP2_instruction& vop2() noexcept
12427ec681f3Smrg   {
12437ec681f3Smrg      assert(isVOP2());
12447ec681f3Smrg      return *(VOP2_instruction*)this;
12457ec681f3Smrg   }
12467ec681f3Smrg   const VOP2_instruction& vop2() const noexcept
12477ec681f3Smrg   {
12487ec681f3Smrg      assert(isVOP2());
12497ec681f3Smrg      return *(VOP2_instruction*)this;
12507ec681f3Smrg   }
12517ec681f3Smrg   constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
12527ec681f3Smrg   VOPC_instruction& vopc() noexcept
12537ec681f3Smrg   {
12547ec681f3Smrg      assert(isVOPC());
12557ec681f3Smrg      return *(VOPC_instruction*)this;
12567ec681f3Smrg   }
12577ec681f3Smrg   const VOPC_instruction& vopc() const noexcept
12587ec681f3Smrg   {
12597ec681f3Smrg      assert(isVOPC());
12607ec681f3Smrg      return *(VOPC_instruction*)this;
12617ec681f3Smrg   }
12627ec681f3Smrg   constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
12637ec681f3Smrg   VOP3_instruction& vop3() noexcept
12647ec681f3Smrg   {
12657ec681f3Smrg      assert(isVOP3());
12667ec681f3Smrg      return *(VOP3_instruction*)this;
12677ec681f3Smrg   }
12687ec681f3Smrg   const VOP3_instruction& vop3() const noexcept
12697ec681f3Smrg   {
12707ec681f3Smrg      assert(isVOP3());
12717ec681f3Smrg      return *(VOP3_instruction*)this;
12727ec681f3Smrg   }
12737ec681f3Smrg   constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
12747ec681f3Smrg   Interp_instruction& vintrp() noexcept
12757ec681f3Smrg   {
12767ec681f3Smrg      assert(isVINTRP());
12777ec681f3Smrg      return *(Interp_instruction*)this;
12787ec681f3Smrg   }
12797ec681f3Smrg   const Interp_instruction& vintrp() const noexcept
12807ec681f3Smrg   {
12817ec681f3Smrg      assert(isVINTRP());
12827ec681f3Smrg      return *(Interp_instruction*)this;
12837ec681f3Smrg   }
12847ec681f3Smrg   constexpr bool isVINTRP() const noexcept { return (uint16_t)format & (uint16_t)Format::VINTRP; }
12857ec681f3Smrg   DPP_instruction& dpp() noexcept
12867ec681f3Smrg   {
12877ec681f3Smrg      assert(isDPP());
12887ec681f3Smrg      return *(DPP_instruction*)this;
12897ec681f3Smrg   }
12907ec681f3Smrg   const DPP_instruction& dpp() const noexcept
12917ec681f3Smrg   {
12927ec681f3Smrg      assert(isDPP());
12937ec681f3Smrg      return *(DPP_instruction*)this;
12947ec681f3Smrg   }
12957ec681f3Smrg   constexpr bool isDPP() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP; }
12967ec681f3Smrg   SDWA_instruction& sdwa() noexcept
12977ec681f3Smrg   {
12987ec681f3Smrg      assert(isSDWA());
12997ec681f3Smrg      return *(SDWA_instruction*)this;
13007ec681f3Smrg   }
13017ec681f3Smrg   const SDWA_instruction& sdwa() const noexcept
13027ec681f3Smrg   {
13037ec681f3Smrg      assert(isSDWA());
13047ec681f3Smrg      return *(SDWA_instruction*)this;
13057ec681f3Smrg   }
13067ec681f3Smrg   constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; }
13077ec681f3Smrg
13087ec681f3Smrg   FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; }
13097ec681f3Smrg
13107ec681f3Smrg   const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; }
13117ec681f3Smrg
13127ec681f3Smrg   constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); }
13137ec681f3Smrg
13147ec681f3Smrg   constexpr bool isVALU() const noexcept
13157ec681f3Smrg   {
13167ec681f3Smrg      return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P();
13177ec681f3Smrg   }
13187ec681f3Smrg
13197ec681f3Smrg   constexpr bool isSALU() const noexcept
13207ec681f3Smrg   {
13217ec681f3Smrg      return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
13227ec681f3Smrg   }
13237ec681f3Smrg
13247ec681f3Smrg   constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); }
13257ec681f3Smrg};
13267ec681f3Smrgstatic_assert(sizeof(Instruction) == 16, "Unexpected padding");
13277ec681f3Smrg
13287ec681f3Smrgstruct SOPK_instruction : public Instruction {
13297ec681f3Smrg   uint16_t imm;
13307ec681f3Smrg   uint16_t padding;
13317ec681f3Smrg};
13327ec681f3Smrgstatic_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
13337ec681f3Smrg
13347ec681f3Smrgstruct SOPP_instruction : public Instruction {
13357ec681f3Smrg   uint32_t imm;
13367ec681f3Smrg   int block;
13377ec681f3Smrg};
13387ec681f3Smrgstatic_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
13397ec681f3Smrg
13407ec681f3Smrgstruct SOPC_instruction : public Instruction {};
13417ec681f3Smrgstatic_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
13427ec681f3Smrg
13437ec681f3Smrgstruct SOP1_instruction : public Instruction {};
13447ec681f3Smrgstatic_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
13457ec681f3Smrg
13467ec681f3Smrgstruct SOP2_instruction : public Instruction {};
13477ec681f3Smrgstatic_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
13487ec681f3Smrg
13497ec681f3Smrg/**
13507ec681f3Smrg * Scalar Memory Format:
13517ec681f3Smrg * For s_(buffer_)load_dword*:
13527ec681f3Smrg * Operand(0): SBASE - SGPR-pair which provides base address
13537ec681f3Smrg * Operand(1): Offset - immediate (un)signed offset or SGPR
13547ec681f3Smrg * Operand(2) / Definition(0): SDATA - SGPR for read / write result
13557ec681f3Smrg * Operand(n-1): SOffset - SGPR offset (Vega only)
13567ec681f3Smrg *
13577ec681f3Smrg * Having no operands is also valid for instructions such as s_dcache_inv.
13587ec681f3Smrg *
13597ec681f3Smrg */
13607ec681f3Smrgstruct SMEM_instruction : public Instruction {
13617ec681f3Smrg   memory_sync_info sync;
13627ec681f3Smrg   bool glc : 1; /* VI+: globally coherent */
13637ec681f3Smrg   bool dlc : 1; /* NAVI: device level coherent */
13647ec681f3Smrg   bool nv : 1;  /* VEGA only: Non-volatile */
13657ec681f3Smrg   bool disable_wqm : 1;
13667ec681f3Smrg   bool prevent_overflow : 1; /* avoid overflow when combining additions */
13677ec681f3Smrg   uint8_t padding : 3;
13687ec681f3Smrg};
13697ec681f3Smrgstatic_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
13707ec681f3Smrg
13717ec681f3Smrgstruct VOP1_instruction : public Instruction {};
13727ec681f3Smrgstatic_assert(sizeof(VOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
13737ec681f3Smrg
13747ec681f3Smrgstruct VOP2_instruction : public Instruction {};
13757ec681f3Smrgstatic_assert(sizeof(VOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
13767ec681f3Smrg
13777ec681f3Smrgstruct VOPC_instruction : public Instruction {};
13787ec681f3Smrgstatic_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
13797ec681f3Smrg
13807ec681f3Smrgstruct VOP3_instruction : public Instruction {
13817ec681f3Smrg   bool abs[3];
13827ec681f3Smrg   bool neg[3];
13837ec681f3Smrg   uint8_t opsel : 4;
13847ec681f3Smrg   uint8_t omod : 2;
13857ec681f3Smrg   bool clamp : 1;
13867ec681f3Smrg   uint8_t padding0 : 1;
13877ec681f3Smrg   uint8_t padding1;
13887ec681f3Smrg};
13897ec681f3Smrgstatic_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
13907ec681f3Smrg
13917ec681f3Smrgstruct VOP3P_instruction : public Instruction {
13927ec681f3Smrg   bool neg_lo[3];
13937ec681f3Smrg   bool neg_hi[3];
13947ec681f3Smrg   uint8_t opsel_lo : 3;
13957ec681f3Smrg   uint8_t opsel_hi : 3;
13967ec681f3Smrg   bool clamp : 1;
13977ec681f3Smrg   uint8_t padding0 : 1;
13987ec681f3Smrg   uint8_t padding1;
13997ec681f3Smrg};
14007ec681f3Smrgstatic_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
14017ec681f3Smrg
14027ec681f3Smrg/**
14037ec681f3Smrg * Data Parallel Primitives Format:
14047ec681f3Smrg * This format can be used for VOP1, VOP2 or VOPC instructions.
14057ec681f3Smrg * The swizzle applies to the src0 operand.
14067ec681f3Smrg *
14077ec681f3Smrg */
14087ec681f3Smrgstruct DPP_instruction : public Instruction {
14097ec681f3Smrg   bool abs[2];
14107ec681f3Smrg   bool neg[2];
14117ec681f3Smrg   uint16_t dpp_ctrl;
14127ec681f3Smrg   uint8_t row_mask : 4;
14137ec681f3Smrg   uint8_t bank_mask : 4;
14147ec681f3Smrg   bool bound_ctrl : 1;
14157ec681f3Smrg   uint8_t padding : 7;
14167ec681f3Smrg};
14177ec681f3Smrgstatic_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
14187ec681f3Smrg
14197ec681f3Smrgstruct SubdwordSel {
14207ec681f3Smrg   enum sdwa_sel : uint8_t {
14217ec681f3Smrg      ubyte = 0x4,
14227ec681f3Smrg      uword = 0x8,
14237ec681f3Smrg      dword = 0x10,
14247ec681f3Smrg      sext = 0x20,
14257ec681f3Smrg      sbyte = ubyte | sext,
14267ec681f3Smrg      sword = uword | sext,
14277ec681f3Smrg
14287ec681f3Smrg      ubyte0 = ubyte,
14297ec681f3Smrg      ubyte1 = ubyte | 1,
14307ec681f3Smrg      ubyte2 = ubyte | 2,
14317ec681f3Smrg      ubyte3 = ubyte | 3,
14327ec681f3Smrg      sbyte0 = sbyte,
14337ec681f3Smrg      sbyte1 = sbyte | 1,
14347ec681f3Smrg      sbyte2 = sbyte | 2,
14357ec681f3Smrg      sbyte3 = sbyte | 3,
14367ec681f3Smrg      uword0 = uword,
14377ec681f3Smrg      uword1 = uword | 2,
14387ec681f3Smrg      sword0 = sword,
14397ec681f3Smrg      sword1 = sword | 2,
14407ec681f3Smrg   };
14417ec681f3Smrg
14427ec681f3Smrg   SubdwordSel() : sel((sdwa_sel)0) {}
14437ec681f3Smrg   constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
14447ec681f3Smrg   constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
14457ec681f3Smrg       : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
14467ec681f3Smrg   {}
14477ec681f3Smrg   constexpr operator sdwa_sel() const { return sel; }
14487ec681f3Smrg   explicit operator bool() const { return sel != 0; }
14497ec681f3Smrg
14507ec681f3Smrg   constexpr unsigned size() const { return (sel >> 2) & 0x7; }
14517ec681f3Smrg   constexpr unsigned offset() const { return sel & 0x3; }
14527ec681f3Smrg   constexpr bool sign_extend() const { return sel & sext; }
14537ec681f3Smrg   constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
14547ec681f3Smrg   {
14557ec681f3Smrg      reg_byte_offset += offset();
14567ec681f3Smrg      if (size() == 1)
14577ec681f3Smrg         return reg_byte_offset;
14587ec681f3Smrg      else if (size() == 2)
14597ec681f3Smrg         return 4 + (reg_byte_offset >> 1);
14607ec681f3Smrg      else
14617ec681f3Smrg         return 6;
14627ec681f3Smrg   }
14637ec681f3Smrg
14647ec681f3Smrgprivate:
14657ec681f3Smrg   sdwa_sel sel;
14667ec681f3Smrg};
14677ec681f3Smrg
14687ec681f3Smrg/**
14697ec681f3Smrg * Sub-Dword Addressing Format:
14707ec681f3Smrg * This format can be used for VOP1, VOP2 or VOPC instructions.
14717ec681f3Smrg *
14727ec681f3Smrg * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
14737ec681f3Smrg * the definition doesn't have to be VCC on GFX9+.
14747ec681f3Smrg *
14757ec681f3Smrg */
14767ec681f3Smrgstruct SDWA_instruction : public Instruction {
14777ec681f3Smrg   /* these destination modifiers aren't available with VOPC except for
14787ec681f3Smrg    * clamp on GFX8 */
14797ec681f3Smrg   SubdwordSel sel[2];
14807ec681f3Smrg   SubdwordSel dst_sel;
14817ec681f3Smrg   bool neg[2];
14827ec681f3Smrg   bool abs[2];
14837ec681f3Smrg   bool clamp : 1;
14847ec681f3Smrg   uint8_t omod : 2; /* GFX9+ */
14857ec681f3Smrg   uint8_t padding : 5;
14867ec681f3Smrg};
14877ec681f3Smrgstatic_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
14887ec681f3Smrg
14897ec681f3Smrgstruct Interp_instruction : public Instruction {
14907ec681f3Smrg   uint8_t attribute;
14917ec681f3Smrg   uint8_t component;
14927ec681f3Smrg   uint16_t padding;
14937ec681f3Smrg};
14947ec681f3Smrgstatic_assert(sizeof(Interp_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
14957ec681f3Smrg
14967ec681f3Smrg/**
14977ec681f3Smrg * Local and Global Data Sharing instructions
14987ec681f3Smrg * Operand(0): ADDR - VGPR which supplies the address.
14997ec681f3Smrg * Operand(1): DATA0 - First data VGPR.
15007ec681f3Smrg * Operand(2): DATA1 - Second data VGPR.
15017ec681f3Smrg * Operand(n-1): M0 - LDS size.
15027ec681f3Smrg * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
15037ec681f3Smrg *
15047ec681f3Smrg */
15057ec681f3Smrgstruct DS_instruction : public Instruction {
15067ec681f3Smrg   memory_sync_info sync;
15077ec681f3Smrg   bool gds;
15087ec681f3Smrg   int16_t offset0;
15097ec681f3Smrg   int8_t offset1;
15107ec681f3Smrg   uint8_t padding;
15117ec681f3Smrg};
15127ec681f3Smrgstatic_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
15137ec681f3Smrg
15147ec681f3Smrg/**
15157ec681f3Smrg * Vector Memory Untyped-buffer Instructions
15167ec681f3Smrg * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
15177ec681f3Smrg * Operand(1): VADDR - Address source. Can carry an index and/or offset
15187ec681f3Smrg * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
15197ec681f3Smrg * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
15207ec681f3Smrg *
15217ec681f3Smrg */
15227ec681f3Smrgstruct MUBUF_instruction : public Instruction {
15237ec681f3Smrg   memory_sync_info sync;
15247ec681f3Smrg   bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
15257ec681f3Smrg   bool idxen : 1;           /* Supply an index from VGPR (VADDR) */
15267ec681f3Smrg   bool addr64 : 1;          /* SI, CIK: Address size is 64-bit */
15277ec681f3Smrg   bool glc : 1;             /* globally coherent */
15287ec681f3Smrg   bool dlc : 1;             /* NAVI: device level coherent */
15297ec681f3Smrg   bool slc : 1;             /* system level coherent */
15307ec681f3Smrg   bool tfe : 1;             /* texture fail enable */
15317ec681f3Smrg   bool lds : 1;             /* Return read-data to LDS instead of VGPRs */
15327ec681f3Smrg   uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
15337ec681f3Smrg   uint16_t offset : 12;     /* Unsigned byte offset - 12 bit */
15347ec681f3Smrg   uint16_t swizzled : 1;
15357ec681f3Smrg   uint16_t padding0 : 2;
15367ec681f3Smrg   uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */
15377ec681f3Smrg   uint16_t padding1 : 10;
15387ec681f3Smrg};
15397ec681f3Smrgstatic_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
15407ec681f3Smrg
15417ec681f3Smrg/**
15427ec681f3Smrg * Vector Memory Typed-buffer Instructions
15437ec681f3Smrg * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
15447ec681f3Smrg * Operand(1): VADDR - Address source. Can carry an index and/or offset
15457ec681f3Smrg * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
15467ec681f3Smrg * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
15477ec681f3Smrg *
15487ec681f3Smrg */
15497ec681f3Smrgstruct MTBUF_instruction : public Instruction {
15507ec681f3Smrg   memory_sync_info sync;
15517ec681f3Smrg   uint8_t dfmt : 4;         /* Data Format of data in memory buffer */
15527ec681f3Smrg   uint8_t nfmt : 3;         /* Numeric format of data in memory */
15537ec681f3Smrg   bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
15547ec681f3Smrg   uint16_t idxen : 1;       /* Supply an index from VGPR (VADDR) */
15557ec681f3Smrg   uint16_t glc : 1;         /* globally coherent */
15567ec681f3Smrg   uint16_t dlc : 1;         /* NAVI: device level coherent */
15577ec681f3Smrg   uint16_t slc : 1;         /* system level coherent */
15587ec681f3Smrg   uint16_t tfe : 1;         /* texture fail enable */
15597ec681f3Smrg   uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
15607ec681f3Smrg   uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */
15617ec681f3Smrg   uint16_t padding : 4;
15627ec681f3Smrg   uint16_t offset; /* Unsigned byte offset - 12 bit */
15637ec681f3Smrg};
15647ec681f3Smrgstatic_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
15657ec681f3Smrg
15667ec681f3Smrg/**
15677ec681f3Smrg * Vector Memory Image Instructions
15687ec681f3Smrg * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
15697ec681f3Smrg * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
15707ec681f3Smrg * Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1.
15717ec681f3Smrg * Operand(3): VADDR - Address source. Can carry an offset or an index.
15727ec681f3Smrg * Definition(0): VDATA - Vector GPR for read result.
15737ec681f3Smrg *
15747ec681f3Smrg */
15757ec681f3Smrgstruct MIMG_instruction : public Instruction {
15767ec681f3Smrg   memory_sync_info sync;
15777ec681f3Smrg   uint8_t dmask;        /* Data VGPR enable mask */
15787ec681f3Smrg   uint8_t dim : 3;      /* NAVI: dimensionality */
15797ec681f3Smrg   bool unrm : 1;        /* Force address to be un-normalized */
15807ec681f3Smrg   bool dlc : 1;         /* NAVI: device level coherent */
15817ec681f3Smrg   bool glc : 1;         /* globally coherent */
15827ec681f3Smrg   bool slc : 1;         /* system level coherent */
15837ec681f3Smrg   bool tfe : 1;         /* texture fail enable */
15847ec681f3Smrg   bool da : 1;          /* declare an array */
15857ec681f3Smrg   bool lwe : 1;         /* LOD warning enable */
15867ec681f3Smrg   bool r128 : 1;        /* NAVI: Texture resource size */
15877ec681f3Smrg   bool a16 : 1;         /* VEGA, NAVI: Address components are 16-bits */
15887ec681f3Smrg   bool d16 : 1;         /* Convert 32-bit data to 16-bit data */
15897ec681f3Smrg   bool disable_wqm : 1; /* Require an exec mask without helper invocations */
15907ec681f3Smrg   uint8_t padding0 : 2;
15917ec681f3Smrg   uint8_t padding1;
15927ec681f3Smrg   uint8_t padding2;
15937ec681f3Smrg};
15947ec681f3Smrgstatic_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
15957ec681f3Smrg
15967ec681f3Smrg/**
15977ec681f3Smrg * Flat/Scratch/Global Instructions
15987ec681f3Smrg * Operand(0): ADDR
15997ec681f3Smrg * Operand(1): SADDR
16007ec681f3Smrg * Operand(2) / Definition(0): DATA/VDST
16017ec681f3Smrg *
16027ec681f3Smrg */
16037ec681f3Smrgstruct FLAT_instruction : public Instruction {
16047ec681f3Smrg   memory_sync_info sync;
16057ec681f3Smrg   bool slc : 1; /* system level coherent */
16067ec681f3Smrg   bool glc : 1; /* globally coherent */
16077ec681f3Smrg   bool dlc : 1; /* NAVI: device level coherent */
16087ec681f3Smrg   bool lds : 1;
16097ec681f3Smrg   bool nv : 1;
16107ec681f3Smrg   bool disable_wqm : 1; /* Require an exec mask without helper invocations */
16117ec681f3Smrg   uint8_t padding0 : 2;
16127ec681f3Smrg   uint16_t offset; /* Vega/Navi only */
16137ec681f3Smrg   uint16_t padding1;
16147ec681f3Smrg};
16157ec681f3Smrgstatic_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
16167ec681f3Smrg
16177ec681f3Smrgstruct Export_instruction : public Instruction {
16187ec681f3Smrg   uint8_t enabled_mask;
16197ec681f3Smrg   uint8_t dest;
16207ec681f3Smrg   bool compressed : 1;
16217ec681f3Smrg   bool done : 1;
16227ec681f3Smrg   bool valid_mask : 1;
16237ec681f3Smrg   uint8_t padding0 : 5;
16247ec681f3Smrg   uint8_t padding1;
16257ec681f3Smrg};
16267ec681f3Smrgstatic_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
16277ec681f3Smrg
16287ec681f3Smrgstruct Pseudo_instruction : public Instruction {
16297ec681f3Smrg   PhysReg scratch_sgpr; /* might not be valid if it's not needed */
16307ec681f3Smrg   bool tmp_in_scc;
16317ec681f3Smrg   uint8_t padding;
16327ec681f3Smrg};
16337ec681f3Smrgstatic_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
16347ec681f3Smrg
16357ec681f3Smrgstruct Pseudo_branch_instruction : public Instruction {
16367ec681f3Smrg   /* target[0] is the block index of the branch target.
16377ec681f3Smrg    * For conditional branches, target[1] contains the fall-through alternative.
16387ec681f3Smrg    * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
16397ec681f3Smrg    */
16407ec681f3Smrg   uint32_t target[2];
16417ec681f3Smrg};
16427ec681f3Smrgstatic_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
16437ec681f3Smrg
16447ec681f3Smrgstruct Pseudo_barrier_instruction : public Instruction {
16457ec681f3Smrg   memory_sync_info sync;
16467ec681f3Smrg   sync_scope exec_scope;
16477ec681f3Smrg};
16487ec681f3Smrgstatic_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
16497ec681f3Smrg
16507ec681f3Smrgenum ReduceOp : uint16_t {
16517ec681f3Smrg   // clang-format off
16527ec681f3Smrg   iadd8, iadd16, iadd32, iadd64,
16537ec681f3Smrg   imul8, imul16, imul32, imul64,
16547ec681f3Smrg          fadd16, fadd32, fadd64,
16557ec681f3Smrg          fmul16, fmul32, fmul64,
16567ec681f3Smrg   imin8, imin16, imin32, imin64,
16577ec681f3Smrg   imax8, imax16, imax32, imax64,
16587ec681f3Smrg   umin8, umin16, umin32, umin64,
16597ec681f3Smrg   umax8, umax16, umax32, umax64,
16607ec681f3Smrg          fmin16, fmin32, fmin64,
16617ec681f3Smrg          fmax16, fmax32, fmax64,
16627ec681f3Smrg   iand8, iand16, iand32, iand64,
16637ec681f3Smrg   ior8, ior16, ior32, ior64,
16647ec681f3Smrg   ixor8, ixor16, ixor32, ixor64,
16657ec681f3Smrg   num_reduce_ops,
16667ec681f3Smrg   // clang-format on
16677ec681f3Smrg};
16687ec681f3Smrg
16697ec681f3Smrg/**
16707ec681f3Smrg * Subgroup Reduction Instructions, everything except for the data to be
16717ec681f3Smrg * reduced and the result as inserted by setup_reduce_temp().
16727ec681f3Smrg * Operand(0): data to be reduced
16737ec681f3Smrg * Operand(1): reduce temporary
16747ec681f3Smrg * Operand(2): vector temporary
16757ec681f3Smrg * Definition(0): result
16767ec681f3Smrg * Definition(1): scalar temporary
16777ec681f3Smrg * Definition(2): scalar identity temporary (not used to store identity on GFX10)
16787ec681f3Smrg * Definition(3): scc clobber
16797ec681f3Smrg * Definition(4): vcc clobber
16807ec681f3Smrg *
16817ec681f3Smrg */
16827ec681f3Smrgstruct Pseudo_reduction_instruction : public Instruction {
16837ec681f3Smrg   ReduceOp reduce_op;
16847ec681f3Smrg   uint16_t cluster_size; // must be 0 for scans
16857ec681f3Smrg};
16867ec681f3Smrgstatic_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4,
16877ec681f3Smrg              "Unexpected padding");
16887ec681f3Smrg
16897ec681f3Smrgstruct instr_deleter_functor {
16907ec681f3Smrg   void operator()(void* p) { free(p); }
16917ec681f3Smrg};
16927ec681f3Smrg
16937ec681f3Smrgtemplate <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
16947ec681f3Smrg
16957ec681f3Smrgtemplate <typename T>
16967ec681f3SmrgT*
16977ec681f3Smrgcreate_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
16987ec681f3Smrg                   uint32_t num_definitions)
16997ec681f3Smrg{
17007ec681f3Smrg   std::size_t size =
17017ec681f3Smrg      sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
17027ec681f3Smrg   char* data = (char*)calloc(1, size);
17037ec681f3Smrg   T* inst = (T*)data;
17047ec681f3Smrg
17057ec681f3Smrg   inst->opcode = opcode;
17067ec681f3Smrg   inst->format = format;
17077ec681f3Smrg
17087ec681f3Smrg   uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
17097ec681f3Smrg   inst->operands = aco::span<Operand>(operands_offset, num_operands);
17107ec681f3Smrg   uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
17117ec681f3Smrg   inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
17127ec681f3Smrg
17137ec681f3Smrg   return inst;
17147ec681f3Smrg}
17157ec681f3Smrg
17167ec681f3Smrgconstexpr bool
17177ec681f3SmrgInstruction::usesModifiers() const noexcept
17187ec681f3Smrg{
17197ec681f3Smrg   if (isDPP() || isSDWA())
17207ec681f3Smrg      return true;
17217ec681f3Smrg
17227ec681f3Smrg   if (isVOP3P()) {
17237ec681f3Smrg      const VOP3P_instruction& vop3p = this->vop3p();
17247ec681f3Smrg      for (unsigned i = 0; i < operands.size(); i++) {
17257ec681f3Smrg         if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
17267ec681f3Smrg            return true;
17277ec681f3Smrg
17287ec681f3Smrg         /* opsel_hi must be 1 to not be considered a modifier - even for constants */
17297ec681f3Smrg         if (!(vop3p.opsel_hi & (1 << i)))
17307ec681f3Smrg            return true;
17317ec681f3Smrg      }
17327ec681f3Smrg      return vop3p.opsel_lo || vop3p.clamp;
17337ec681f3Smrg   } else if (isVOP3()) {
17347ec681f3Smrg      const VOP3_instruction& vop3 = this->vop3();
17357ec681f3Smrg      for (unsigned i = 0; i < operands.size(); i++) {
17367ec681f3Smrg         if (vop3.abs[i] || vop3.neg[i])
17377ec681f3Smrg            return true;
17387ec681f3Smrg      }
17397ec681f3Smrg      return vop3.opsel || vop3.clamp || vop3.omod;
17407ec681f3Smrg   }
17417ec681f3Smrg   return false;
17427ec681f3Smrg}
17437ec681f3Smrg
17447ec681f3Smrgconstexpr bool
17457ec681f3Smrgis_phi(Instruction* instr)
17467ec681f3Smrg{
17477ec681f3Smrg   return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
17487ec681f3Smrg}
17497ec681f3Smrg
17507ec681f3Smrgstatic inline bool
17517ec681f3Smrgis_phi(aco_ptr<Instruction>& instr)
17527ec681f3Smrg{
17537ec681f3Smrg   return is_phi(instr.get());
17547ec681f3Smrg}
17557ec681f3Smrg
17567ec681f3Smrgmemory_sync_info get_sync_info(const Instruction* instr);
17577ec681f3Smrg
17587ec681f3Smrgbool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
17597ec681f3Smrg
17607ec681f3Smrgbool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
17617ec681f3Smrgbool instr_is_16bit(chip_class chip, aco_opcode op);
17627ec681f3Smrgbool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
17637ec681f3Smrgbool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra);
17647ec681f3Smrg/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
17657ec681f3Smrgaco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr);
17667ec681f3Smrgaco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr);
17677ec681f3Smrgbool needs_exec_mask(const Instruction* instr);
17687ec681f3Smrg
17697ec681f3Smrgaco_opcode get_ordered(aco_opcode op);
17707ec681f3Smrgaco_opcode get_unordered(aco_opcode op);
17717ec681f3Smrgaco_opcode get_inverse(aco_opcode op);
17727ec681f3Smrgaco_opcode get_f32_cmp(aco_opcode op);
17737ec681f3Smrgunsigned get_cmp_bitsize(aco_opcode op);
17747ec681f3Smrgbool is_cmp(aco_opcode op);
17757ec681f3Smrg
17767ec681f3Smrgbool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op);
17777ec681f3Smrg
17787ec681f3Smrguint32_t get_reduction_identity(ReduceOp op, unsigned idx);
17797ec681f3Smrg
17807ec681f3Smrgunsigned get_mimg_nsa_dwords(const Instruction* instr);
17817ec681f3Smrg
17827ec681f3Smrgbool should_form_clause(const Instruction* a, const Instruction* b);
17837ec681f3Smrg
17847ec681f3Smrgenum block_kind {
17857ec681f3Smrg   /* uniform indicates that leaving this block,
17867ec681f3Smrg    * all actives lanes stay active */
17877ec681f3Smrg   block_kind_uniform = 1 << 0,
17887ec681f3Smrg   block_kind_top_level = 1 << 1,
17897ec681f3Smrg   block_kind_loop_preheader = 1 << 2,
17907ec681f3Smrg   block_kind_loop_header = 1 << 3,
17917ec681f3Smrg   block_kind_loop_exit = 1 << 4,
17927ec681f3Smrg   block_kind_continue = 1 << 5,
17937ec681f3Smrg   block_kind_break = 1 << 6,
17947ec681f3Smrg   block_kind_continue_or_break = 1 << 7,
17957ec681f3Smrg   block_kind_discard = 1 << 8,
17967ec681f3Smrg   block_kind_branch = 1 << 9,
17977ec681f3Smrg   block_kind_merge = 1 << 10,
17987ec681f3Smrg   block_kind_invert = 1 << 11,
17997ec681f3Smrg   block_kind_uses_discard_if = 1 << 12,
18007ec681f3Smrg   block_kind_needs_lowering = 1 << 13,
18017ec681f3Smrg   block_kind_uses_demote = 1 << 14,
18027ec681f3Smrg   block_kind_export_end = 1 << 15,
18037ec681f3Smrg};
18047ec681f3Smrg
18057ec681f3Smrgstruct RegisterDemand {
18067ec681f3Smrg   constexpr RegisterDemand() = default;
18077ec681f3Smrg   constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {}
18087ec681f3Smrg   int16_t vgpr = 0;
18097ec681f3Smrg   int16_t sgpr = 0;
18107ec681f3Smrg
18117ec681f3Smrg   constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept
18127ec681f3Smrg   {
18137ec681f3Smrg      return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
18147ec681f3Smrg   }
18157ec681f3Smrg
18167ec681f3Smrg   constexpr bool exceeds(const RegisterDemand other) const noexcept
18177ec681f3Smrg   {
18187ec681f3Smrg      return vgpr > other.vgpr || sgpr > other.sgpr;
18197ec681f3Smrg   }
18207ec681f3Smrg
18217ec681f3Smrg   constexpr RegisterDemand operator+(const Temp t) const noexcept
18227ec681f3Smrg   {
18237ec681f3Smrg      if (t.type() == RegType::sgpr)
18247ec681f3Smrg         return RegisterDemand(vgpr, sgpr + t.size());
18257ec681f3Smrg      else
18267ec681f3Smrg         return RegisterDemand(vgpr + t.size(), sgpr);
18277ec681f3Smrg   }
18287ec681f3Smrg
18297ec681f3Smrg   constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept
18307ec681f3Smrg   {
18317ec681f3Smrg      return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
18327ec681f3Smrg   }
18337ec681f3Smrg
18347ec681f3Smrg   constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept
18357ec681f3Smrg   {
18367ec681f3Smrg      return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
18377ec681f3Smrg   }
18387ec681f3Smrg
18397ec681f3Smrg   constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept
18407ec681f3Smrg   {
18417ec681f3Smrg      vgpr += other.vgpr;
18427ec681f3Smrg      sgpr += other.sgpr;
18437ec681f3Smrg      return *this;
18447ec681f3Smrg   }
18457ec681f3Smrg
18467ec681f3Smrg   constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept
18477ec681f3Smrg   {
18487ec681f3Smrg      vgpr -= other.vgpr;
18497ec681f3Smrg      sgpr -= other.sgpr;
18507ec681f3Smrg      return *this;
18517ec681f3Smrg   }
18527ec681f3Smrg
18537ec681f3Smrg   constexpr RegisterDemand& operator+=(const Temp t) noexcept
18547ec681f3Smrg   {
18557ec681f3Smrg      if (t.type() == RegType::sgpr)
18567ec681f3Smrg         sgpr += t.size();
18577ec681f3Smrg      else
18587ec681f3Smrg         vgpr += t.size();
18597ec681f3Smrg      return *this;
18607ec681f3Smrg   }
18617ec681f3Smrg
18627ec681f3Smrg   constexpr RegisterDemand& operator-=(const Temp t) noexcept
18637ec681f3Smrg   {
18647ec681f3Smrg      if (t.type() == RegType::sgpr)
18657ec681f3Smrg         sgpr -= t.size();
18667ec681f3Smrg      else
18677ec681f3Smrg         vgpr -= t.size();
18687ec681f3Smrg      return *this;
18697ec681f3Smrg   }
18707ec681f3Smrg
18717ec681f3Smrg   constexpr void update(const RegisterDemand other) noexcept
18727ec681f3Smrg   {
18737ec681f3Smrg      vgpr = std::max(vgpr, other.vgpr);
18747ec681f3Smrg      sgpr = std::max(sgpr, other.sgpr);
18757ec681f3Smrg   }
18767ec681f3Smrg};
18777ec681f3Smrg
18787ec681f3Smrg/* CFG */
18797ec681f3Smrgstruct Block {
18807ec681f3Smrg   float_mode fp_mode;
18817ec681f3Smrg   unsigned index;
18827ec681f3Smrg   unsigned offset = 0;
18837ec681f3Smrg   std::vector<aco_ptr<Instruction>> instructions;
18847ec681f3Smrg   std::vector<unsigned> logical_preds;
18857ec681f3Smrg   std::vector<unsigned> linear_preds;
18867ec681f3Smrg   std::vector<unsigned> logical_succs;
18877ec681f3Smrg   std::vector<unsigned> linear_succs;
18887ec681f3Smrg   RegisterDemand register_demand = RegisterDemand();
18897ec681f3Smrg   uint16_t loop_nest_depth = 0;
18907ec681f3Smrg   uint16_t divergent_if_logical_depth = 0;
18917ec681f3Smrg   uint16_t uniform_if_depth = 0;
18927ec681f3Smrg   uint16_t kind = 0;
18937ec681f3Smrg   int logical_idom = -1;
18947ec681f3Smrg   int linear_idom = -1;
18957ec681f3Smrg
18967ec681f3Smrg   /* this information is needed for predecessors to blocks with phis when
18977ec681f3Smrg    * moving out of ssa */
18987ec681f3Smrg   bool scc_live_out = false;
18997ec681f3Smrg   PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
19007ec681f3Smrg
19017ec681f3Smrg   Block() : index(0) {}
19027ec681f3Smrg};
19037ec681f3Smrg
19047ec681f3Smrg/*
19057ec681f3Smrg * Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
19067ec681f3Smrg */
19077ec681f3Smrgenum class SWStage : uint8_t {
19087ec681f3Smrg   None = 0,
19097ec681f3Smrg   VS = 1 << 0,     /* Vertex Shader */
19107ec681f3Smrg   GS = 1 << 1,     /* Geometry Shader */
19117ec681f3Smrg   TCS = 1 << 2,    /* Tessellation Control aka Hull Shader */
19127ec681f3Smrg   TES = 1 << 3,    /* Tessellation Evaluation aka Domain Shader */
19137ec681f3Smrg   FS = 1 << 4,     /* Fragment aka Pixel Shader */
19147ec681f3Smrg   CS = 1 << 5,     /* Compute Shader */
19157ec681f3Smrg   GSCopy = 1 << 6, /* GS Copy Shader (internal) */
19167ec681f3Smrg
19177ec681f3Smrg   /* Stage combinations merged to run on a single HWStage */
19187ec681f3Smrg   VS_GS = VS | GS,
19197ec681f3Smrg   VS_TCS = VS | TCS,
19207ec681f3Smrg   TES_GS = TES | GS,
19217ec681f3Smrg};
19227ec681f3Smrg
19237ec681f3Smrgconstexpr SWStage
19247ec681f3Smrgoperator|(SWStage a, SWStage b)
19257ec681f3Smrg{
19267ec681f3Smrg   return static_cast<SWStage>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b));
19277ec681f3Smrg}
19287ec681f3Smrg
19297ec681f3Smrg/*
19307ec681f3Smrg * Shader stages as running on the AMD GPU.
19317ec681f3Smrg *
19327ec681f3Smrg * The relation between HWStages and SWStages is not a one-to-one mapping:
19337ec681f3Smrg * Some SWStages are merged by ACO to run on a single HWStage.
19347ec681f3Smrg * See README.md for details.
19357ec681f3Smrg */
19367ec681f3Smrgenum class HWStage : uint8_t {
19377ec681f3Smrg   VS,
19387ec681f3Smrg   ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
19397ec681f3Smrg   GS,  /* Geometry shader on GFX10/legacy and GFX6-9. */
19407ec681f3Smrg   NGG, /* Primitive shader, used to implement VS, TES, GS. */
19417ec681f3Smrg   LS,  /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
19427ec681f3Smrg   HS,  /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
19437ec681f3Smrg   FS,
19447ec681f3Smrg   CS,
19457ec681f3Smrg};
19467ec681f3Smrg
19477ec681f3Smrg/*
19487ec681f3Smrg * Set of SWStages to be merged into a single shader paired with the
19497ec681f3Smrg * HWStage it will run on.
19507ec681f3Smrg */
19517ec681f3Smrgstruct Stage {
19527ec681f3Smrg   constexpr Stage() = default;
19537ec681f3Smrg
19547ec681f3Smrg   explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
19557ec681f3Smrg
19567ec681f3Smrg   /* Check if the given SWStage is included */
19577ec681f3Smrg   constexpr bool has(SWStage stage) const
19587ec681f3Smrg   {
19597ec681f3Smrg      return (static_cast<uint8_t>(sw) & static_cast<uint8_t>(stage));
19607ec681f3Smrg   }
19617ec681f3Smrg
19627ec681f3Smrg   unsigned num_sw_stages() const { return util_bitcount(static_cast<uint8_t>(sw)); }
19637ec681f3Smrg
19647ec681f3Smrg   constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; }
19657ec681f3Smrg
19667ec681f3Smrg   constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; }
19677ec681f3Smrg
19687ec681f3Smrg   /* Mask of merged software stages */
19697ec681f3Smrg   SWStage sw = SWStage::None;
19707ec681f3Smrg
19717ec681f3Smrg   /* Active hardware stage */
19727ec681f3Smrg   HWStage hw{};
19737ec681f3Smrg};
19747ec681f3Smrg
19757ec681f3Smrg/* possible settings of Program::stage */
19767ec681f3Smrgstatic constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
19777ec681f3Smrgstatic constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
19787ec681f3Smrgstatic constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
19797ec681f3Smrgstatic constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
19807ec681f3Smrgstatic constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy);
19817ec681f3Smrg/* GFX10/NGG */
19827ec681f3Smrgstatic constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
19837ec681f3Smrgstatic constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
19847ec681f3Smrgstatic constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
19857ec681f3Smrgstatic constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
19867ec681f3Smrg/* GFX9 (and GFX10 if NGG isn't used) */
19877ec681f3Smrgstatic constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
19887ec681f3Smrgstatic constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
19897ec681f3Smrgstatic constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
19907ec681f3Smrg/* pre-GFX9 */
19917ec681f3Smrgstatic constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */
19927ec681f3Smrgstatic constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
19937ec681f3Smrgstatic constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
19947ec681f3Smrgstatic constexpr Stage tess_eval_es(HWStage::ES,
19957ec681f3Smrg                                    SWStage::TES); /* tesselation evaluation before geometry */
19967ec681f3Smrgstatic constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
19977ec681f3Smrg
19987ec681f3Smrgenum statistic {
19997ec681f3Smrg   statistic_hash,
20007ec681f3Smrg   statistic_instructions,
20017ec681f3Smrg   statistic_copies,
20027ec681f3Smrg   statistic_branches,
20037ec681f3Smrg   statistic_latency,
20047ec681f3Smrg   statistic_inv_throughput,
20057ec681f3Smrg   statistic_vmem_clauses,
20067ec681f3Smrg   statistic_smem_clauses,
20077ec681f3Smrg   statistic_sgpr_presched,
20087ec681f3Smrg   statistic_vgpr_presched,
20097ec681f3Smrg   num_statistics
20107ec681f3Smrg};
20117ec681f3Smrg
20127ec681f3Smrgstruct DeviceInfo {
20137ec681f3Smrg   uint16_t lds_encoding_granule;
20147ec681f3Smrg   uint16_t lds_alloc_granule;
20157ec681f3Smrg   uint32_t lds_limit; /* in bytes */
20167ec681f3Smrg   bool has_16bank_lds;
20177ec681f3Smrg   uint16_t physical_sgprs;
20187ec681f3Smrg   uint16_t physical_vgprs;
20197ec681f3Smrg   uint16_t vgpr_limit;
20207ec681f3Smrg   uint16_t sgpr_limit;
20217ec681f3Smrg   uint16_t sgpr_alloc_granule;
20227ec681f3Smrg   uint16_t vgpr_alloc_granule; /* must be power of two */
20237ec681f3Smrg   unsigned max_wave64_per_simd;
20247ec681f3Smrg   unsigned simd_per_cu;
20257ec681f3Smrg   bool has_fast_fma32 = false;
20267ec681f3Smrg   bool xnack_enabled = false;
20277ec681f3Smrg   bool sram_ecc_enabled = false;
20287ec681f3Smrg};
20297ec681f3Smrg
20307ec681f3Smrgenum class CompilationProgress {
20317ec681f3Smrg   after_isel,
20327ec681f3Smrg   after_spilling,
20337ec681f3Smrg   after_ra,
20347ec681f3Smrg};
20357ec681f3Smrg
20367ec681f3Smrgclass Program final {
20377ec681f3Smrgpublic:
20387ec681f3Smrg   std::vector<Block> blocks;
20397ec681f3Smrg   std::vector<RegClass> temp_rc = {s1};
20407ec681f3Smrg   RegisterDemand max_reg_demand = RegisterDemand();
20417ec681f3Smrg   uint16_t num_waves = 0;
20427ec681f3Smrg   uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
20437ec681f3Smrg   ac_shader_config* config;
20447ec681f3Smrg   const struct radv_shader_info* info;
20457ec681f3Smrg   enum chip_class chip_class;
20467ec681f3Smrg   enum radeon_family family;
20477ec681f3Smrg   DeviceInfo dev;
20487ec681f3Smrg   unsigned wave_size;
20497ec681f3Smrg   RegClass lane_mask;
20507ec681f3Smrg   Stage stage;
20517ec681f3Smrg   bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
20527ec681f3Smrg   bool needs_wqm = false;   /* there exists a p_wqm instruction */
20537ec681f3Smrg
20547ec681f3Smrg   std::vector<uint8_t> constant_data;
20557ec681f3Smrg   Temp private_segment_buffer;
20567ec681f3Smrg   Temp scratch_offset;
20577ec681f3Smrg
20587ec681f3Smrg   uint16_t min_waves = 0;
20597ec681f3Smrg   unsigned workgroup_size; /* if known; otherwise UINT_MAX */
20607ec681f3Smrg   bool wgp_mode;
20617ec681f3Smrg   bool early_rast = false; /* whether rasterization can start as soon as the 1st DONE pos export */
20627ec681f3Smrg
20637ec681f3Smrg   bool needs_vcc = false;
20647ec681f3Smrg   bool needs_flat_scr = false;
20657ec681f3Smrg
20667ec681f3Smrg   CompilationProgress progress;
20677ec681f3Smrg
20687ec681f3Smrg   bool collect_statistics = false;
20697ec681f3Smrg   uint32_t statistics[num_statistics];
20707ec681f3Smrg
20717ec681f3Smrg   float_mode next_fp_mode;
20727ec681f3Smrg   unsigned next_loop_depth = 0;
20737ec681f3Smrg   unsigned next_divergent_if_logical_depth = 0;
20747ec681f3Smrg   unsigned next_uniform_if_depth = 0;
20757ec681f3Smrg
20767ec681f3Smrg   std::vector<Definition> vs_inputs;
20777ec681f3Smrg
20787ec681f3Smrg   struct {
20797ec681f3Smrg      FILE* output = stderr;
20807ec681f3Smrg      bool shorten_messages = false;
20817ec681f3Smrg      void (*func)(void* private_data, enum radv_compiler_debug_level level, const char* message);
20827ec681f3Smrg      void* private_data;
20837ec681f3Smrg   } debug;
20847ec681f3Smrg
20857ec681f3Smrg   uint32_t allocateId(RegClass rc)
20867ec681f3Smrg   {
20877ec681f3Smrg      assert(allocationID <= 16777215);
20887ec681f3Smrg      temp_rc.push_back(rc);
20897ec681f3Smrg      return allocationID++;
20907ec681f3Smrg   }
20917ec681f3Smrg
20927ec681f3Smrg   void allocateRange(unsigned amount)
20937ec681f3Smrg   {
20947ec681f3Smrg      assert(allocationID + amount <= 16777216);
20957ec681f3Smrg      temp_rc.resize(temp_rc.size() + amount);
20967ec681f3Smrg      allocationID += amount;
20977ec681f3Smrg   }
20987ec681f3Smrg
20997ec681f3Smrg   Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); }
21007ec681f3Smrg
21017ec681f3Smrg   uint32_t peekAllocationId() { return allocationID; }
21027ec681f3Smrg
21037ec681f3Smrg   friend void reindex_ssa(Program* program);
21047ec681f3Smrg   friend void reindex_ssa(Program* program, std::vector<IDSet>& live_out);
21057ec681f3Smrg
21067ec681f3Smrg   Block* create_and_insert_block()
21077ec681f3Smrg   {
21087ec681f3Smrg      Block block;
21097ec681f3Smrg      return insert_block(std::move(block));
21107ec681f3Smrg   }
21117ec681f3Smrg
21127ec681f3Smrg   Block* insert_block(Block&& block)
21137ec681f3Smrg   {
21147ec681f3Smrg      block.index = blocks.size();
21157ec681f3Smrg      block.fp_mode = next_fp_mode;
21167ec681f3Smrg      block.loop_nest_depth = next_loop_depth;
21177ec681f3Smrg      block.divergent_if_logical_depth = next_divergent_if_logical_depth;
21187ec681f3Smrg      block.uniform_if_depth = next_uniform_if_depth;
21197ec681f3Smrg      blocks.emplace_back(std::move(block));
21207ec681f3Smrg      return &blocks.back();
21217ec681f3Smrg   }
21227ec681f3Smrg
21237ec681f3Smrgprivate:
21247ec681f3Smrg   uint32_t allocationID = 1;
21257ec681f3Smrg};
21267ec681f3Smrg
21277ec681f3Smrgstruct live {
21287ec681f3Smrg   /* live temps out per block */
21297ec681f3Smrg   std::vector<IDSet> live_out;
21307ec681f3Smrg   /* register demand (sgpr/vgpr) per instruction per block */
21317ec681f3Smrg   std::vector<std::vector<RegisterDemand>> register_demand;
21327ec681f3Smrg};
21337ec681f3Smrg
21347ec681f3Smrgstruct ra_test_policy {
21357ec681f3Smrg   /* Force RA to always use its pessimistic fallback algorithm */
21367ec681f3Smrg   bool skip_optimistic_path = false;
21377ec681f3Smrg};
21387ec681f3Smrg
21397ec681f3Smrgvoid init();
21407ec681f3Smrg
21417ec681f3Smrgvoid init_program(Program* program, Stage stage, const struct radv_shader_info* info,
21427ec681f3Smrg                  enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
21437ec681f3Smrg                  ac_shader_config* config);
21447ec681f3Smrg
21457ec681f3Smrgvoid select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
21467ec681f3Smrg                    ac_shader_config* config, const struct radv_shader_args* args);
21477ec681f3Smrgvoid select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config,
21487ec681f3Smrg                           const struct radv_shader_args* args);
21497ec681f3Smrgvoid select_trap_handler_shader(Program* program, struct nir_shader* shader,
21507ec681f3Smrg                                ac_shader_config* config, const struct radv_shader_args* args);
21517ec681f3Smrgvoid select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key,
21527ec681f3Smrg                      ac_shader_config* config, const struct radv_shader_args* args,
21537ec681f3Smrg                      unsigned* num_preserved_sgprs);
21547ec681f3Smrg
21557ec681f3Smrgvoid lower_phis(Program* program);
21567ec681f3Smrgvoid calc_min_waves(Program* program);
21577ec681f3Smrgvoid update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
21587ec681f3Smrglive live_var_analysis(Program* program);
21597ec681f3Smrgstd::vector<uint16_t> dead_code_analysis(Program* program);
21607ec681f3Smrgvoid dominator_tree(Program* program);
21617ec681f3Smrgvoid insert_exec_mask(Program* program);
21627ec681f3Smrgvoid value_numbering(Program* program);
21637ec681f3Smrgvoid optimize(Program* program);
21647ec681f3Smrgvoid optimize_postRA(Program* program);
21657ec681f3Smrgvoid setup_reduce_temp(Program* program);
21667ec681f3Smrgvoid lower_to_cssa(Program* program, live& live_vars);
21677ec681f3Smrgvoid register_allocation(Program* program, std::vector<IDSet>& live_out_per_block,
21687ec681f3Smrg                         ra_test_policy = {});
21697ec681f3Smrgvoid ssa_elimination(Program* program);
21707ec681f3Smrgvoid lower_to_hw_instr(Program* program);
21717ec681f3Smrgvoid schedule_program(Program* program, live& live_vars);
21727ec681f3Smrgvoid spill(Program* program, live& live_vars);
21737ec681f3Smrgvoid insert_wait_states(Program* program);
21747ec681f3Smrgvoid insert_NOPs(Program* program);
21757ec681f3Smrgvoid form_hard_clauses(Program* program);
21767ec681f3Smrgunsigned emit_program(Program* program, std::vector<uint32_t>& code);
21777ec681f3Smrg/**
21787ec681f3Smrg * Returns true if print_asm can disassemble the given program for the current build/runtime
21797ec681f3Smrg * configuration
21807ec681f3Smrg */
21817ec681f3Smrgbool check_print_asm_support(Program* program);
21827ec681f3Smrgbool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output);
21837ec681f3Smrgbool validate_ir(Program* program);
21847ec681f3Smrgbool validate_ra(Program* program);
21857ec681f3Smrg#ifndef NDEBUG
21867ec681f3Smrgvoid perfwarn(Program* program, bool cond, const char* msg, Instruction* instr = NULL);
21877ec681f3Smrg#else
21887ec681f3Smrg#define perfwarn(program, cond, msg, ...)                                                          \
21897ec681f3Smrg   do {                                                                                            \
21907ec681f3Smrg   } while (0)
21917ec681f3Smrg#endif
21927ec681f3Smrg
21937ec681f3Smrgvoid collect_presched_stats(Program* program);
21947ec681f3Smrgvoid collect_preasm_stats(Program* program);
21957ec681f3Smrgvoid collect_postasm_stats(Program* program, const std::vector<uint32_t>& code);
21967ec681f3Smrg
21977ec681f3Smrgenum print_flags {
21987ec681f3Smrg   print_no_ssa = 0x1,
21997ec681f3Smrg   print_perf_info = 0x2,
22007ec681f3Smrg   print_kill = 0x4,
22017ec681f3Smrg   print_live_vars = 0x8,
22027ec681f3Smrg};
22037ec681f3Smrg
22047ec681f3Smrgvoid aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0);
22057ec681f3Smrgvoid aco_print_instr(const Instruction* instr, FILE* output, unsigned flags = 0);
22067ec681f3Smrgvoid aco_print_program(const Program* program, FILE* output, unsigned flags = 0);
22077ec681f3Smrgvoid aco_print_program(const Program* program, FILE* output, const live& live_vars,
22087ec681f3Smrg                       unsigned flags = 0);
22097ec681f3Smrg
22107ec681f3Smrgvoid _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...);
22117ec681f3Smrgvoid _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...);
22127ec681f3Smrg
22137ec681f3Smrg#define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__)
22147ec681f3Smrg#define aco_err(program, ...)      _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
22157ec681f3Smrg
22167ec681f3Smrg/* utilities for dealing with register demand */
22177ec681f3SmrgRegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
22187ec681f3SmrgRegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
22197ec681f3SmrgRegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr,
22207ec681f3Smrg                                 aco_ptr<Instruction>& instr_before);
22217ec681f3Smrg
22227ec681f3Smrg/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
22237ec681f3Smrguint16_t get_extra_sgprs(Program* program);
22247ec681f3Smrg
22257ec681f3Smrg/* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
22267ec681f3Smrguint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs);
22277ec681f3Smrguint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
22287ec681f3Smrg
22297ec681f3Smrg/* return number of addressable sgprs/vgprs for max_waves */
22307ec681f3Smrguint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
22317ec681f3Smrguint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
22327ec681f3Smrg
22337ec681f3Smrgtypedef struct {
22347ec681f3Smrg   const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
22357ec681f3Smrg   const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
22367ec681f3Smrg   const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
22377ec681f3Smrg   const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
22387ec681f3Smrg   const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
22397ec681f3Smrg   const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
22407ec681f3Smrg   const char* name[static_cast<int>(aco_opcode::num_opcodes)];
22417ec681f3Smrg   const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
22427ec681f3Smrg   /* sizes used for input/output modifiers and constants */
22437ec681f3Smrg   const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
22447ec681f3Smrg   const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
22457ec681f3Smrg} Info;
22467ec681f3Smrg
22477ec681f3Smrgextern const Info instr_info;
22487ec681f3Smrg
22497ec681f3Smrg} // namespace aco
22507ec681f3Smrg
22517ec681f3Smrg#endif /* ACO_IR_H */
2252