17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2018 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#ifndef ACO_IR_H 267ec681f3Smrg#define ACO_IR_H 277ec681f3Smrg 287ec681f3Smrg#include "aco_opcodes.h" 297ec681f3Smrg#include "aco_util.h" 307ec681f3Smrg 317ec681f3Smrg#include "vulkan/radv_shader.h" 327ec681f3Smrg 337ec681f3Smrg#include "nir.h" 347ec681f3Smrg 357ec681f3Smrg#include <bitset> 367ec681f3Smrg#include <memory> 377ec681f3Smrg#include <vector> 387ec681f3Smrg 397ec681f3Smrgstruct radv_shader_args; 407ec681f3Smrgstruct radv_shader_info; 417ec681f3Smrgstruct radv_vs_prolog_key; 427ec681f3Smrg 437ec681f3Smrgnamespace aco { 447ec681f3Smrg 457ec681f3Smrgextern uint64_t debug_flags; 467ec681f3Smrg 477ec681f3Smrgenum { 487ec681f3Smrg DEBUG_VALIDATE_IR = 0x1, 497ec681f3Smrg DEBUG_VALIDATE_RA = 0x2, 507ec681f3Smrg DEBUG_PERFWARN = 0x4, 517ec681f3Smrg DEBUG_FORCE_WAITCNT = 0x8, 527ec681f3Smrg DEBUG_NO_VN = 0x10, 537ec681f3Smrg DEBUG_NO_OPT = 0x20, 547ec681f3Smrg DEBUG_NO_SCHED = 0x40, 557ec681f3Smrg DEBUG_PERF_INFO = 0x80, 567ec681f3Smrg DEBUG_LIVE_INFO = 0x100, 577ec681f3Smrg}; 587ec681f3Smrg 597ec681f3Smrg/** 607ec681f3Smrg * Representation of the instruction's microcode encoding format 617ec681f3Smrg * Note: Some Vector ALU Formats can be combined, such that: 627ec681f3Smrg * - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding 637ec681f3Smrg * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive. 647ec681f3Smrg * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing. 657ec681f3Smrg * 667ec681f3Smrg * (*) The same is applicable for VOP1 and VOPC instructions. 677ec681f3Smrg */ 687ec681f3Smrgenum class Format : std::uint16_t { 697ec681f3Smrg /* Pseudo Instruction Format */ 707ec681f3Smrg PSEUDO = 0, 717ec681f3Smrg /* Scalar ALU & Control Formats */ 727ec681f3Smrg SOP1 = 1, 737ec681f3Smrg SOP2 = 2, 747ec681f3Smrg SOPK = 3, 757ec681f3Smrg SOPP = 4, 767ec681f3Smrg SOPC = 5, 777ec681f3Smrg /* Scalar Memory Format */ 787ec681f3Smrg SMEM = 6, 797ec681f3Smrg /* LDS/GDS Format */ 807ec681f3Smrg DS = 8, 817ec681f3Smrg /* Vector Memory Buffer Formats */ 827ec681f3Smrg MTBUF = 9, 837ec681f3Smrg MUBUF = 10, 847ec681f3Smrg /* Vector Memory Image Format */ 857ec681f3Smrg MIMG = 11, 867ec681f3Smrg /* Export Format */ 877ec681f3Smrg EXP = 12, 887ec681f3Smrg /* Flat Formats */ 897ec681f3Smrg FLAT = 13, 907ec681f3Smrg GLOBAL = 14, 917ec681f3Smrg SCRATCH = 15, 927ec681f3Smrg 937ec681f3Smrg PSEUDO_BRANCH = 16, 947ec681f3Smrg PSEUDO_BARRIER = 17, 957ec681f3Smrg PSEUDO_REDUCTION = 18, 967ec681f3Smrg 977ec681f3Smrg /* Vector ALU Formats */ 987ec681f3Smrg VOP3P = 19, 997ec681f3Smrg VOP1 = 1 << 8, 1007ec681f3Smrg VOP2 = 1 << 9, 1017ec681f3Smrg VOPC = 1 << 10, 1027ec681f3Smrg VOP3 = 1 << 11, 1037ec681f3Smrg /* Vector Parameter Interpolation Format */ 1047ec681f3Smrg VINTRP = 1 << 12, 1057ec681f3Smrg DPP = 1 << 13, 1067ec681f3Smrg SDWA = 1 << 14, 1077ec681f3Smrg}; 1087ec681f3Smrg 1097ec681f3Smrgenum class instr_class : uint8_t { 1107ec681f3Smrg valu32 = 0, 1117ec681f3Smrg valu_convert32 = 1, 1127ec681f3Smrg valu64 = 2, 1137ec681f3Smrg valu_quarter_rate32 = 3, 1147ec681f3Smrg valu_fma = 4, 1157ec681f3Smrg valu_transcendental32 = 5, 1167ec681f3Smrg valu_double = 6, 1177ec681f3Smrg valu_double_add = 7, 1187ec681f3Smrg valu_double_convert = 8, 1197ec681f3Smrg valu_double_transcendental = 9, 1207ec681f3Smrg salu = 10, 1217ec681f3Smrg smem = 11, 1227ec681f3Smrg barrier = 12, 1237ec681f3Smrg branch = 13, 1247ec681f3Smrg sendmsg = 14, 1257ec681f3Smrg ds = 15, 1267ec681f3Smrg exp = 16, 1277ec681f3Smrg vmem = 17, 1287ec681f3Smrg waitcnt = 18, 1297ec681f3Smrg other = 19, 1307ec681f3Smrg count, 1317ec681f3Smrg}; 1327ec681f3Smrg 1337ec681f3Smrgenum storage_class : uint8_t { 1347ec681f3Smrg storage_none = 0x0, /* no synchronization and can be reordered around aliasing stores */ 1357ec681f3Smrg storage_buffer = 0x1, /* SSBOs and global memory */ 1367ec681f3Smrg storage_atomic_counter = 0x2, /* not used for Vulkan */ 1377ec681f3Smrg storage_image = 0x4, 1387ec681f3Smrg storage_shared = 0x8, /* or TCS output */ 1397ec681f3Smrg storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */ 1407ec681f3Smrg storage_scratch = 0x20, 1417ec681f3Smrg storage_vgpr_spill = 0x40, 1427ec681f3Smrg storage_count = 8, 1437ec681f3Smrg}; 1447ec681f3Smrg 1457ec681f3Smrgenum memory_semantics : uint8_t { 1467ec681f3Smrg semantic_none = 0x0, 1477ec681f3Smrg /* for loads: don't move any access after this load to before this load (even other loads) 1487ec681f3Smrg * for barriers: don't move any access after the barrier to before any 1497ec681f3Smrg * atomics/control_barriers/sendmsg_gs_done before the barrier */ 1507ec681f3Smrg semantic_acquire = 0x1, 1517ec681f3Smrg /* for stores: don't move any access before this store to after this store 1527ec681f3Smrg * for barriers: don't move any access before the barrier to after any 1537ec681f3Smrg * atomics/control_barriers/sendmsg_gs_done after the barrier */ 1547ec681f3Smrg semantic_release = 0x2, 1557ec681f3Smrg 1567ec681f3Smrg /* the rest are for load/stores/atomics only */ 1577ec681f3Smrg /* cannot be DCE'd or CSE'd */ 1587ec681f3Smrg semantic_volatile = 0x4, 1597ec681f3Smrg /* does not interact with barriers and assumes this lane is the only lane 1607ec681f3Smrg * accessing this memory */ 1617ec681f3Smrg semantic_private = 0x8, 1627ec681f3Smrg /* this operation can be reordered around operations of the same storage. 1637ec681f3Smrg * says nothing about barriers */ 1647ec681f3Smrg semantic_can_reorder = 0x10, 1657ec681f3Smrg /* this is a atomic instruction (may only read or write memory) */ 1667ec681f3Smrg semantic_atomic = 0x20, 1677ec681f3Smrg /* this is instruction both reads and writes memory */ 1687ec681f3Smrg semantic_rmw = 0x40, 1697ec681f3Smrg 1707ec681f3Smrg semantic_acqrel = semantic_acquire | semantic_release, 1717ec681f3Smrg semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw, 1727ec681f3Smrg}; 1737ec681f3Smrg 1747ec681f3Smrgenum sync_scope : uint8_t { 1757ec681f3Smrg scope_invocation = 0, 1767ec681f3Smrg scope_subgroup = 1, 1777ec681f3Smrg scope_workgroup = 2, 1787ec681f3Smrg scope_queuefamily = 3, 1797ec681f3Smrg scope_device = 4, 1807ec681f3Smrg}; 1817ec681f3Smrg 1827ec681f3Smrgstruct memory_sync_info { 1837ec681f3Smrg memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {} 1847ec681f3Smrg memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation) 1857ec681f3Smrg : storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_) 1867ec681f3Smrg {} 1877ec681f3Smrg 1887ec681f3Smrg storage_class storage : 8; 1897ec681f3Smrg memory_semantics semantics : 8; 1907ec681f3Smrg sync_scope scope : 8; 1917ec681f3Smrg 1927ec681f3Smrg bool operator==(const memory_sync_info& rhs) const 1937ec681f3Smrg { 1947ec681f3Smrg return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope; 1957ec681f3Smrg } 1967ec681f3Smrg 1977ec681f3Smrg bool can_reorder() const 1987ec681f3Smrg { 1997ec681f3Smrg if (semantics & semantic_acqrel) 2007ec681f3Smrg return false; 2017ec681f3Smrg /* Also check storage so that zero-initialized memory_sync_info can be 2027ec681f3Smrg * reordered. */ 2037ec681f3Smrg return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile); 2047ec681f3Smrg } 2057ec681f3Smrg}; 2067ec681f3Smrgstatic_assert(sizeof(memory_sync_info) == 3, "Unexpected padding"); 2077ec681f3Smrg 2087ec681f3Smrgenum fp_round { 2097ec681f3Smrg fp_round_ne = 0, 2107ec681f3Smrg fp_round_pi = 1, 2117ec681f3Smrg fp_round_ni = 2, 2127ec681f3Smrg fp_round_tz = 3, 2137ec681f3Smrg}; 2147ec681f3Smrg 2157ec681f3Smrgenum fp_denorm { 2167ec681f3Smrg /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and 2177ec681f3Smrg * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */ 2187ec681f3Smrg fp_denorm_flush = 0x0, 2197ec681f3Smrg fp_denorm_keep_in = 0x1, 2207ec681f3Smrg fp_denorm_keep_out = 0x2, 2217ec681f3Smrg fp_denorm_keep = 0x3, 2227ec681f3Smrg}; 2237ec681f3Smrg 2247ec681f3Smrgstruct float_mode { 2257ec681f3Smrg /* matches encoding of the MODE register */ 2267ec681f3Smrg union { 2277ec681f3Smrg struct { 2287ec681f3Smrg fp_round round32 : 2; 2297ec681f3Smrg fp_round round16_64 : 2; 2307ec681f3Smrg unsigned denorm32 : 2; 2317ec681f3Smrg unsigned denorm16_64 : 2; 2327ec681f3Smrg }; 2337ec681f3Smrg struct { 2347ec681f3Smrg uint8_t round : 4; 2357ec681f3Smrg uint8_t denorm : 4; 2367ec681f3Smrg }; 2377ec681f3Smrg uint8_t val = 0; 2387ec681f3Smrg }; 2397ec681f3Smrg /* if false, optimizations which may remove infs/nan/-0.0 can be done */ 2407ec681f3Smrg bool preserve_signed_zero_inf_nan32 : 1; 2417ec681f3Smrg bool preserve_signed_zero_inf_nan16_64 : 1; 2427ec681f3Smrg /* if false, optimizations which may remove denormal flushing can be done */ 2437ec681f3Smrg bool must_flush_denorms32 : 1; 2447ec681f3Smrg bool must_flush_denorms16_64 : 1; 2457ec681f3Smrg bool care_about_round32 : 1; 2467ec681f3Smrg bool care_about_round16_64 : 1; 2477ec681f3Smrg 2487ec681f3Smrg /* Returns true if instructions using the mode "other" can safely use the 2497ec681f3Smrg * current one instead. */ 2507ec681f3Smrg bool canReplace(float_mode other) const noexcept 2517ec681f3Smrg { 2527ec681f3Smrg return val == other.val && 2537ec681f3Smrg (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) && 2547ec681f3Smrg (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) && 2557ec681f3Smrg (must_flush_denorms32 || !other.must_flush_denorms32) && 2567ec681f3Smrg (must_flush_denorms16_64 || !other.must_flush_denorms16_64) && 2577ec681f3Smrg (care_about_round32 || !other.care_about_round32) && 2587ec681f3Smrg (care_about_round16_64 || !other.care_about_round16_64); 2597ec681f3Smrg } 2607ec681f3Smrg}; 2617ec681f3Smrg 2627ec681f3Smrgstruct wait_imm { 2637ec681f3Smrg static const uint8_t unset_counter = 0xff; 2647ec681f3Smrg 2657ec681f3Smrg uint8_t vm; 2667ec681f3Smrg uint8_t exp; 2677ec681f3Smrg uint8_t lgkm; 2687ec681f3Smrg uint8_t vs; 2697ec681f3Smrg 2707ec681f3Smrg wait_imm(); 2717ec681f3Smrg wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_); 2727ec681f3Smrg wait_imm(enum chip_class chip, uint16_t packed); 2737ec681f3Smrg 2747ec681f3Smrg uint16_t pack(enum chip_class chip) const; 2757ec681f3Smrg 2767ec681f3Smrg bool combine(const wait_imm& other); 2777ec681f3Smrg 2787ec681f3Smrg bool empty() const; 2797ec681f3Smrg}; 2807ec681f3Smrg 2817ec681f3Smrgconstexpr Format 2827ec681f3SmrgasVOP3(Format format) 2837ec681f3Smrg{ 2847ec681f3Smrg return (Format)((uint32_t)Format::VOP3 | (uint32_t)format); 2857ec681f3Smrg}; 2867ec681f3Smrg 2877ec681f3Smrgconstexpr Format 2887ec681f3SmrgasSDWA(Format format) 2897ec681f3Smrg{ 2907ec681f3Smrg assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC); 2917ec681f3Smrg return (Format)((uint32_t)Format::SDWA | (uint32_t)format); 2927ec681f3Smrg} 2937ec681f3Smrg 2947ec681f3Smrgconstexpr Format 2957ec681f3SmrgwithoutDPP(Format format) 2967ec681f3Smrg{ 2977ec681f3Smrg return (Format)((uint32_t)format & ~(uint32_t)Format::DPP); 2987ec681f3Smrg} 2997ec681f3Smrg 3007ec681f3Smrgenum class RegType { 3017ec681f3Smrg none = 0, 3027ec681f3Smrg sgpr, 3037ec681f3Smrg vgpr, 3047ec681f3Smrg linear_vgpr, 3057ec681f3Smrg}; 3067ec681f3Smrg 3077ec681f3Smrgstruct RegClass { 3087ec681f3Smrg 3097ec681f3Smrg enum RC : uint8_t { 3107ec681f3Smrg s1 = 1, 3117ec681f3Smrg s2 = 2, 3127ec681f3Smrg s3 = 3, 3137ec681f3Smrg s4 = 4, 3147ec681f3Smrg s6 = 6, 3157ec681f3Smrg s8 = 8, 3167ec681f3Smrg s16 = 16, 3177ec681f3Smrg v1 = s1 | (1 << 5), 3187ec681f3Smrg v2 = s2 | (1 << 5), 3197ec681f3Smrg v3 = s3 | (1 << 5), 3207ec681f3Smrg v4 = s4 | (1 << 5), 3217ec681f3Smrg v5 = 5 | (1 << 5), 3227ec681f3Smrg v6 = 6 | (1 << 5), 3237ec681f3Smrg v7 = 7 | (1 << 5), 3247ec681f3Smrg v8 = 8 | (1 << 5), 3257ec681f3Smrg /* byte-sized register class */ 3267ec681f3Smrg v1b = v1 | (1 << 7), 3277ec681f3Smrg v2b = v2 | (1 << 7), 3287ec681f3Smrg v3b = v3 | (1 << 7), 3297ec681f3Smrg v4b = v4 | (1 << 7), 3307ec681f3Smrg v6b = v6 | (1 << 7), 3317ec681f3Smrg v8b = v8 | (1 << 7), 3327ec681f3Smrg /* these are used for WWM and spills to vgpr */ 3337ec681f3Smrg v1_linear = v1 | (1 << 6), 3347ec681f3Smrg v2_linear = v2 | (1 << 6), 3357ec681f3Smrg }; 3367ec681f3Smrg 3377ec681f3Smrg RegClass() = default; 3387ec681f3Smrg constexpr RegClass(RC rc_) : rc(rc_) {} 3397ec681f3Smrg constexpr RegClass(RegType type, unsigned size) 3407ec681f3Smrg : rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size)) 3417ec681f3Smrg {} 3427ec681f3Smrg 3437ec681f3Smrg constexpr operator RC() const { return rc; } 3447ec681f3Smrg explicit operator bool() = delete; 3457ec681f3Smrg 3467ec681f3Smrg constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; } 3477ec681f3Smrg constexpr bool is_linear_vgpr() const { return rc & (1 << 6); }; 3487ec681f3Smrg constexpr bool is_subdword() const { return rc & (1 << 7); } 3497ec681f3Smrg constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); } 3507ec681f3Smrg // TODO: use size() less in favor of bytes() 3517ec681f3Smrg constexpr unsigned size() const { return (bytes() + 3) >> 2; } 3527ec681f3Smrg constexpr bool is_linear() const { return rc <= RC::s16 || is_linear_vgpr(); } 3537ec681f3Smrg constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); } 3547ec681f3Smrg constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); } 3557ec681f3Smrg 3567ec681f3Smrg static constexpr RegClass get(RegType type, unsigned bytes) 3577ec681f3Smrg { 3587ec681f3Smrg if (type == RegType::sgpr) { 3597ec681f3Smrg return RegClass(type, DIV_ROUND_UP(bytes, 4u)); 3607ec681f3Smrg } else { 3617ec681f3Smrg return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u); 3627ec681f3Smrg } 3637ec681f3Smrg } 3647ec681f3Smrg 3657ec681f3Smrg constexpr RegClass resize(unsigned bytes) const 3667ec681f3Smrg { 3677ec681f3Smrg if (is_linear_vgpr()) { 3687ec681f3Smrg assert(bytes % 4u == 0); 3697ec681f3Smrg return get(RegType::vgpr, bytes).as_linear(); 3707ec681f3Smrg } 3717ec681f3Smrg return get(type(), bytes); 3727ec681f3Smrg } 3737ec681f3Smrg 3747ec681f3Smrgprivate: 3757ec681f3Smrg RC rc; 3767ec681f3Smrg}; 3777ec681f3Smrg 3787ec681f3Smrg/* transitional helper expressions */ 3797ec681f3Smrgstatic constexpr RegClass s1{RegClass::s1}; 3807ec681f3Smrgstatic constexpr RegClass s2{RegClass::s2}; 3817ec681f3Smrgstatic constexpr RegClass s3{RegClass::s3}; 3827ec681f3Smrgstatic constexpr RegClass s4{RegClass::s4}; 3837ec681f3Smrgstatic constexpr RegClass s8{RegClass::s8}; 3847ec681f3Smrgstatic constexpr RegClass s16{RegClass::s16}; 3857ec681f3Smrgstatic constexpr RegClass v1{RegClass::v1}; 3867ec681f3Smrgstatic constexpr RegClass v2{RegClass::v2}; 3877ec681f3Smrgstatic constexpr RegClass v3{RegClass::v3}; 3887ec681f3Smrgstatic constexpr RegClass v4{RegClass::v4}; 3897ec681f3Smrgstatic constexpr RegClass v5{RegClass::v5}; 3907ec681f3Smrgstatic constexpr RegClass v6{RegClass::v6}; 3917ec681f3Smrgstatic constexpr RegClass v7{RegClass::v7}; 3927ec681f3Smrgstatic constexpr RegClass v8{RegClass::v8}; 3937ec681f3Smrgstatic constexpr RegClass v1b{RegClass::v1b}; 3947ec681f3Smrgstatic constexpr RegClass v2b{RegClass::v2b}; 3957ec681f3Smrgstatic constexpr RegClass v3b{RegClass::v3b}; 3967ec681f3Smrgstatic constexpr RegClass v4b{RegClass::v4b}; 3977ec681f3Smrgstatic constexpr RegClass v6b{RegClass::v6b}; 3987ec681f3Smrgstatic constexpr RegClass v8b{RegClass::v8b}; 3997ec681f3Smrg 4007ec681f3Smrg/** 4017ec681f3Smrg * Temp Class 4027ec681f3Smrg * Each temporary virtual register has a 4037ec681f3Smrg * register class (i.e. size and type) 4047ec681f3Smrg * and SSA id. 4057ec681f3Smrg */ 4067ec681f3Smrgstruct Temp { 4077ec681f3Smrg Temp() noexcept : id_(0), reg_class(0) {} 4087ec681f3Smrg constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {} 4097ec681f3Smrg 4107ec681f3Smrg constexpr uint32_t id() const noexcept { return id_; } 4117ec681f3Smrg constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; } 4127ec681f3Smrg 4137ec681f3Smrg constexpr unsigned bytes() const noexcept { return regClass().bytes(); } 4147ec681f3Smrg constexpr unsigned size() const noexcept { return regClass().size(); } 4157ec681f3Smrg constexpr RegType type() const noexcept { return regClass().type(); } 4167ec681f3Smrg constexpr bool is_linear() const noexcept { return regClass().is_linear(); } 4177ec681f3Smrg 4187ec681f3Smrg constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); } 4197ec681f3Smrg constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); } 4207ec681f3Smrg constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); } 4217ec681f3Smrg 4227ec681f3Smrgprivate: 4237ec681f3Smrg uint32_t id_ : 24; 4247ec681f3Smrg uint32_t reg_class : 8; 4257ec681f3Smrg}; 4267ec681f3Smrg 4277ec681f3Smrg/** 4287ec681f3Smrg * PhysReg 4297ec681f3Smrg * Represents the physical register for each 4307ec681f3Smrg * Operand and Definition. 4317ec681f3Smrg */ 4327ec681f3Smrgstruct PhysReg { 4337ec681f3Smrg constexpr PhysReg() = default; 4347ec681f3Smrg explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {} 4357ec681f3Smrg constexpr unsigned reg() const { return reg_b >> 2; } 4367ec681f3Smrg constexpr unsigned byte() const { return reg_b & 0x3; } 4377ec681f3Smrg constexpr operator unsigned() const { return reg(); } 4387ec681f3Smrg constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; } 4397ec681f3Smrg constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; } 4407ec681f3Smrg constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; } 4417ec681f3Smrg constexpr PhysReg advance(int bytes) const 4427ec681f3Smrg { 4437ec681f3Smrg PhysReg res = *this; 4447ec681f3Smrg res.reg_b += bytes; 4457ec681f3Smrg return res; 4467ec681f3Smrg } 4477ec681f3Smrg 4487ec681f3Smrg uint16_t reg_b = 0; 4497ec681f3Smrg}; 4507ec681f3Smrg 4517ec681f3Smrg/* helper expressions for special registers */ 4527ec681f3Smrgstatic constexpr PhysReg m0{124}; 4537ec681f3Smrgstatic constexpr PhysReg vcc{106}; 4547ec681f3Smrgstatic constexpr PhysReg vcc_hi{107}; 4557ec681f3Smrgstatic constexpr PhysReg tba{108}; /* GFX6-GFX8 */ 4567ec681f3Smrgstatic constexpr PhysReg tma{110}; /* GFX6-GFX8 */ 4577ec681f3Smrgstatic constexpr PhysReg ttmp0{112}; 4587ec681f3Smrgstatic constexpr PhysReg ttmp1{113}; 4597ec681f3Smrgstatic constexpr PhysReg ttmp2{114}; 4607ec681f3Smrgstatic constexpr PhysReg ttmp3{115}; 4617ec681f3Smrgstatic constexpr PhysReg ttmp4{116}; 4627ec681f3Smrgstatic constexpr PhysReg ttmp5{117}; 4637ec681f3Smrgstatic constexpr PhysReg ttmp6{118}; 4647ec681f3Smrgstatic constexpr PhysReg ttmp7{119}; 4657ec681f3Smrgstatic constexpr PhysReg ttmp8{120}; 4667ec681f3Smrgstatic constexpr PhysReg ttmp9{121}; 4677ec681f3Smrgstatic constexpr PhysReg ttmp10{122}; 4687ec681f3Smrgstatic constexpr PhysReg ttmp11{123}; 4697ec681f3Smrgstatic constexpr PhysReg sgpr_null{125}; /* GFX10+ */ 4707ec681f3Smrgstatic constexpr PhysReg exec{126}; 4717ec681f3Smrgstatic constexpr PhysReg exec_lo{126}; 4727ec681f3Smrgstatic constexpr PhysReg exec_hi{127}; 4737ec681f3Smrgstatic constexpr PhysReg vccz{251}; 4747ec681f3Smrgstatic constexpr PhysReg execz{252}; 4757ec681f3Smrgstatic constexpr PhysReg scc{253}; 4767ec681f3Smrg 4777ec681f3Smrg/** 4787ec681f3Smrg * Operand Class 4797ec681f3Smrg * Initially, each Operand refers to either 4807ec681f3Smrg * a temporary virtual register 4817ec681f3Smrg * or to a constant value 4827ec681f3Smrg * Temporary registers get mapped to physical register during RA 4837ec681f3Smrg * Constant values are inlined into the instruction sequence. 4847ec681f3Smrg */ 4857ec681f3Smrgclass Operand final { 4867ec681f3Smrgpublic: 4877ec681f3Smrg constexpr Operand() 4887ec681f3Smrg : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false), 4897ec681f3Smrg isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), is16bit_(false), 4907ec681f3Smrg is24bit_(false), signext(false) 4917ec681f3Smrg {} 4927ec681f3Smrg 4937ec681f3Smrg explicit Operand(Temp r) noexcept 4947ec681f3Smrg { 4957ec681f3Smrg data_.temp = r; 4967ec681f3Smrg if (r.id()) { 4977ec681f3Smrg isTemp_ = true; 4987ec681f3Smrg } else { 4997ec681f3Smrg isUndef_ = true; 5007ec681f3Smrg setFixed(PhysReg{128}); 5017ec681f3Smrg } 5027ec681f3Smrg }; 5037ec681f3Smrg explicit Operand(Temp r, PhysReg reg) noexcept 5047ec681f3Smrg { 5057ec681f3Smrg assert(r.id()); /* Don't allow fixing an undef to a register */ 5067ec681f3Smrg data_.temp = r; 5077ec681f3Smrg isTemp_ = true; 5087ec681f3Smrg setFixed(reg); 5097ec681f3Smrg }; 5107ec681f3Smrg 5117ec681f3Smrg /* 8-bit constant */ 5127ec681f3Smrg static Operand c8(uint8_t v) noexcept 5137ec681f3Smrg { 5147ec681f3Smrg /* 8-bit constants are only used for copies and copies from any 8-bit 5157ec681f3Smrg * constant can be implemented with a SDWA v_mul_u32_u24. So consider all 5167ec681f3Smrg * to be inline constants. */ 5177ec681f3Smrg Operand op; 5187ec681f3Smrg op.control_ = 0; 5197ec681f3Smrg op.data_.i = v; 5207ec681f3Smrg op.isConstant_ = true; 5217ec681f3Smrg op.constSize = 0; 5227ec681f3Smrg op.setFixed(PhysReg{0u}); 5237ec681f3Smrg return op; 5247ec681f3Smrg }; 5257ec681f3Smrg 5267ec681f3Smrg /* 16-bit constant */ 5277ec681f3Smrg static Operand c16(uint16_t v) noexcept 5287ec681f3Smrg { 5297ec681f3Smrg Operand op; 5307ec681f3Smrg op.control_ = 0; 5317ec681f3Smrg op.data_.i = v; 5327ec681f3Smrg op.isConstant_ = true; 5337ec681f3Smrg op.constSize = 1; 5347ec681f3Smrg if (v <= 64) 5357ec681f3Smrg op.setFixed(PhysReg{128u + v}); 5367ec681f3Smrg else if (v >= 0xFFF0) /* [-16 .. -1] */ 5377ec681f3Smrg op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)}); 5387ec681f3Smrg else if (v == 0x3800) /* 0.5 */ 5397ec681f3Smrg op.setFixed(PhysReg{240}); 5407ec681f3Smrg else if (v == 0xB800) /* -0.5 */ 5417ec681f3Smrg op.setFixed(PhysReg{241}); 5427ec681f3Smrg else if (v == 0x3C00) /* 1.0 */ 5437ec681f3Smrg op.setFixed(PhysReg{242}); 5447ec681f3Smrg else if (v == 0xBC00) /* -1.0 */ 5457ec681f3Smrg op.setFixed(PhysReg{243}); 5467ec681f3Smrg else if (v == 0x4000) /* 2.0 */ 5477ec681f3Smrg op.setFixed(PhysReg{244}); 5487ec681f3Smrg else if (v == 0xC000) /* -2.0 */ 5497ec681f3Smrg op.setFixed(PhysReg{245}); 5507ec681f3Smrg else if (v == 0x4400) /* 4.0 */ 5517ec681f3Smrg op.setFixed(PhysReg{246}); 5527ec681f3Smrg else if (v == 0xC400) /* -4.0 */ 5537ec681f3Smrg op.setFixed(PhysReg{247}); 5547ec681f3Smrg else if (v == 0x3118) /* 1/2 PI */ 5557ec681f3Smrg op.setFixed(PhysReg{248}); 5567ec681f3Smrg else /* Literal Constant */ 5577ec681f3Smrg op.setFixed(PhysReg{255}); 5587ec681f3Smrg return op; 5597ec681f3Smrg } 5607ec681f3Smrg 5617ec681f3Smrg /* 32-bit constant */ 5627ec681f3Smrg static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); } 5637ec681f3Smrg 5647ec681f3Smrg /* 64-bit constant */ 5657ec681f3Smrg static Operand c64(uint64_t v) noexcept 5667ec681f3Smrg { 5677ec681f3Smrg Operand op; 5687ec681f3Smrg op.control_ = 0; 5697ec681f3Smrg op.isConstant_ = true; 5707ec681f3Smrg op.constSize = 3; 5717ec681f3Smrg if (v <= 64) { 5727ec681f3Smrg op.data_.i = (uint32_t)v; 5737ec681f3Smrg op.setFixed(PhysReg{128 + (uint32_t)v}); 5747ec681f3Smrg } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */ 5757ec681f3Smrg op.data_.i = (uint32_t)v; 5767ec681f3Smrg op.setFixed(PhysReg{192 - (uint32_t)v}); 5777ec681f3Smrg } else if (v == 0x3FE0000000000000) { /* 0.5 */ 5787ec681f3Smrg op.data_.i = 0x3f000000; 5797ec681f3Smrg op.setFixed(PhysReg{240}); 5807ec681f3Smrg } else if (v == 0xBFE0000000000000) { /* -0.5 */ 5817ec681f3Smrg op.data_.i = 0xbf000000; 5827ec681f3Smrg op.setFixed(PhysReg{241}); 5837ec681f3Smrg } else if (v == 0x3FF0000000000000) { /* 1.0 */ 5847ec681f3Smrg op.data_.i = 0x3f800000; 5857ec681f3Smrg op.setFixed(PhysReg{242}); 5867ec681f3Smrg } else if (v == 0xBFF0000000000000) { /* -1.0 */ 5877ec681f3Smrg op.data_.i = 0xbf800000; 5887ec681f3Smrg op.setFixed(PhysReg{243}); 5897ec681f3Smrg } else if (v == 0x4000000000000000) { /* 2.0 */ 5907ec681f3Smrg op.data_.i = 0x40000000; 5917ec681f3Smrg op.setFixed(PhysReg{244}); 5927ec681f3Smrg } else if (v == 0xC000000000000000) { /* -2.0 */ 5937ec681f3Smrg op.data_.i = 0xc0000000; 5947ec681f3Smrg op.setFixed(PhysReg{245}); 5957ec681f3Smrg } else if (v == 0x4010000000000000) { /* 4.0 */ 5967ec681f3Smrg op.data_.i = 0x40800000; 5977ec681f3Smrg op.setFixed(PhysReg{246}); 5987ec681f3Smrg } else if (v == 0xC010000000000000) { /* -4.0 */ 5997ec681f3Smrg op.data_.i = 0xc0800000; 6007ec681f3Smrg op.setFixed(PhysReg{247}); 6017ec681f3Smrg } else { /* Literal Constant: we don't know if it is a long or double.*/ 6027ec681f3Smrg op.signext = v >> 63; 6037ec681f3Smrg op.data_.i = v & 0xffffffffu; 6047ec681f3Smrg op.setFixed(PhysReg{255}); 6057ec681f3Smrg assert(op.constantValue64() == v && 6067ec681f3Smrg "attempt to create a unrepresentable 64-bit literal constant"); 6077ec681f3Smrg } 6087ec681f3Smrg return op; 6097ec681f3Smrg } 6107ec681f3Smrg 6117ec681f3Smrg /* 32-bit constant stored as a 32-bit or 64-bit operand */ 6127ec681f3Smrg static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept 6137ec681f3Smrg { 6147ec681f3Smrg Operand op; 6157ec681f3Smrg op.control_ = 0; 6167ec681f3Smrg op.data_.i = v; 6177ec681f3Smrg op.isConstant_ = true; 6187ec681f3Smrg op.constSize = is64bit ? 3 : 2; 6197ec681f3Smrg if (v <= 64) 6207ec681f3Smrg op.setFixed(PhysReg{128 + v}); 6217ec681f3Smrg else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */ 6227ec681f3Smrg op.setFixed(PhysReg{192 - v}); 6237ec681f3Smrg else if (v == 0x3f000000) /* 0.5 */ 6247ec681f3Smrg op.setFixed(PhysReg{240}); 6257ec681f3Smrg else if (v == 0xbf000000) /* -0.5 */ 6267ec681f3Smrg op.setFixed(PhysReg{241}); 6277ec681f3Smrg else if (v == 0x3f800000) /* 1.0 */ 6287ec681f3Smrg op.setFixed(PhysReg{242}); 6297ec681f3Smrg else if (v == 0xbf800000) /* -1.0 */ 6307ec681f3Smrg op.setFixed(PhysReg{243}); 6317ec681f3Smrg else if (v == 0x40000000) /* 2.0 */ 6327ec681f3Smrg op.setFixed(PhysReg{244}); 6337ec681f3Smrg else if (v == 0xc0000000) /* -2.0 */ 6347ec681f3Smrg op.setFixed(PhysReg{245}); 6357ec681f3Smrg else if (v == 0x40800000) /* 4.0 */ 6367ec681f3Smrg op.setFixed(PhysReg{246}); 6377ec681f3Smrg else if (v == 0xc0800000) /* -4.0 */ 6387ec681f3Smrg op.setFixed(PhysReg{247}); 6397ec681f3Smrg else { /* Literal Constant */ 6407ec681f3Smrg assert(!is64bit && "attempt to create a 64-bit literal constant"); 6417ec681f3Smrg op.setFixed(PhysReg{255}); 6427ec681f3Smrg } 6437ec681f3Smrg return op; 6447ec681f3Smrg } 6457ec681f3Smrg 6467ec681f3Smrg explicit Operand(RegClass type) noexcept 6477ec681f3Smrg { 6487ec681f3Smrg isUndef_ = true; 6497ec681f3Smrg data_.temp = Temp(0, type); 6507ec681f3Smrg setFixed(PhysReg{128}); 6517ec681f3Smrg }; 6527ec681f3Smrg explicit Operand(PhysReg reg, RegClass type) noexcept 6537ec681f3Smrg { 6547ec681f3Smrg data_.temp = Temp(0, type); 6557ec681f3Smrg setFixed(reg); 6567ec681f3Smrg } 6577ec681f3Smrg 6587ec681f3Smrg static Operand zero(unsigned bytes = 4) noexcept 6597ec681f3Smrg { 6607ec681f3Smrg if (bytes == 8) 6617ec681f3Smrg return Operand::c64(0); 6627ec681f3Smrg else if (bytes == 4) 6637ec681f3Smrg return Operand::c32(0); 6647ec681f3Smrg else if (bytes == 2) 6657ec681f3Smrg return Operand::c16(0); 6667ec681f3Smrg assert(bytes == 1); 6677ec681f3Smrg return Operand::c8(0); 6687ec681f3Smrg } 6697ec681f3Smrg 6707ec681f3Smrg /* This is useful over the constructors when you want to take a chip class 6717ec681f3Smrg * for 1/2 PI or an unknown operand size. 6727ec681f3Smrg */ 6737ec681f3Smrg static Operand get_const(enum chip_class chip, uint64_t val, unsigned bytes) 6747ec681f3Smrg { 6757ec681f3Smrg if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) { 6767ec681f3Smrg /* 1/2 PI can be an inline constant on GFX8+ */ 6777ec681f3Smrg Operand op = Operand::c32(val); 6787ec681f3Smrg op.setFixed(PhysReg{248}); 6797ec681f3Smrg return op; 6807ec681f3Smrg } 6817ec681f3Smrg 6827ec681f3Smrg if (bytes == 8) 6837ec681f3Smrg return Operand::c64(val); 6847ec681f3Smrg else if (bytes == 4) 6857ec681f3Smrg return Operand::c32(val); 6867ec681f3Smrg else if (bytes == 2) 6877ec681f3Smrg return Operand::c16(val); 6887ec681f3Smrg assert(bytes == 1); 6897ec681f3Smrg return Operand::c8(val); 6907ec681f3Smrg } 6917ec681f3Smrg 6927ec681f3Smrg static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false, 6937ec681f3Smrg bool sext = false) 6947ec681f3Smrg { 6957ec681f3Smrg if (bytes <= 4) 6967ec681f3Smrg return true; 6977ec681f3Smrg 6987ec681f3Smrg if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000) 6997ec681f3Smrg return true; 7007ec681f3Smrg uint64_t upper33 = val & 0xFFFFFFFF80000000; 7017ec681f3Smrg if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0)) 7027ec681f3Smrg return true; 7037ec681f3Smrg 7047ec681f3Smrg return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */ 7057ec681f3Smrg val == 0x3FE0000000000000 || /* 0.5 */ 7067ec681f3Smrg val == 0xBFE0000000000000 || /* -0.5 */ 7077ec681f3Smrg val == 0x3FF0000000000000 || /* 1.0 */ 7087ec681f3Smrg val == 0xBFF0000000000000 || /* -1.0 */ 7097ec681f3Smrg val == 0x4000000000000000 || /* 2.0 */ 7107ec681f3Smrg val == 0xC000000000000000 || /* -2.0 */ 7117ec681f3Smrg val == 0x4010000000000000 || /* 4.0 */ 7127ec681f3Smrg val == 0xC010000000000000; /* -4.0 */ 7137ec681f3Smrg } 7147ec681f3Smrg 7157ec681f3Smrg constexpr bool isTemp() const noexcept { return isTemp_; } 7167ec681f3Smrg 7177ec681f3Smrg constexpr void setTemp(Temp t) noexcept 7187ec681f3Smrg { 7197ec681f3Smrg assert(!isConstant_); 7207ec681f3Smrg isTemp_ = true; 7217ec681f3Smrg data_.temp = t; 7227ec681f3Smrg } 7237ec681f3Smrg 7247ec681f3Smrg constexpr Temp getTemp() const noexcept { return data_.temp; } 7257ec681f3Smrg 7267ec681f3Smrg constexpr uint32_t tempId() const noexcept { return data_.temp.id(); } 7277ec681f3Smrg 7287ec681f3Smrg constexpr bool hasRegClass() const noexcept { return isTemp() || isUndefined(); } 7297ec681f3Smrg 7307ec681f3Smrg constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); } 7317ec681f3Smrg 7327ec681f3Smrg constexpr unsigned bytes() const noexcept 7337ec681f3Smrg { 7347ec681f3Smrg if (isConstant()) 7357ec681f3Smrg return 1 << constSize; 7367ec681f3Smrg else 7377ec681f3Smrg return data_.temp.bytes(); 7387ec681f3Smrg } 7397ec681f3Smrg 7407ec681f3Smrg constexpr unsigned size() const noexcept 7417ec681f3Smrg { 7427ec681f3Smrg if (isConstant()) 7437ec681f3Smrg return constSize > 2 ? 2 : 1; 7447ec681f3Smrg else 7457ec681f3Smrg return data_.temp.size(); 7467ec681f3Smrg } 7477ec681f3Smrg 7487ec681f3Smrg constexpr bool isFixed() const noexcept { return isFixed_; } 7497ec681f3Smrg 7507ec681f3Smrg constexpr PhysReg physReg() const noexcept { return reg_; } 7517ec681f3Smrg 7527ec681f3Smrg constexpr void setFixed(PhysReg reg) noexcept 7537ec681f3Smrg { 7547ec681f3Smrg isFixed_ = reg != unsigned(-1); 7557ec681f3Smrg reg_ = reg; 7567ec681f3Smrg } 7577ec681f3Smrg 7587ec681f3Smrg constexpr bool isConstant() const noexcept { return isConstant_; } 7597ec681f3Smrg 7607ec681f3Smrg constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; } 7617ec681f3Smrg 7627ec681f3Smrg constexpr bool isUndefined() const noexcept { return isUndef_; } 7637ec681f3Smrg 7647ec681f3Smrg constexpr uint32_t constantValue() const noexcept { return data_.i; } 7657ec681f3Smrg 7667ec681f3Smrg constexpr bool constantEquals(uint32_t cmp) const noexcept 7677ec681f3Smrg { 7687ec681f3Smrg return isConstant() && constantValue() == cmp; 7697ec681f3Smrg } 7707ec681f3Smrg 7717ec681f3Smrg constexpr uint64_t constantValue64() const noexcept 7727ec681f3Smrg { 7737ec681f3Smrg if (constSize == 3) { 7747ec681f3Smrg if (reg_ <= 192) 7757ec681f3Smrg return reg_ - 128; 7767ec681f3Smrg else if (reg_ <= 208) 7777ec681f3Smrg return 0xFFFFFFFFFFFFFFFF - (reg_ - 193); 7787ec681f3Smrg 7797ec681f3Smrg switch (reg_) { 7807ec681f3Smrg case 240: return 0x3FE0000000000000; 7817ec681f3Smrg case 241: return 0xBFE0000000000000; 7827ec681f3Smrg case 242: return 0x3FF0000000000000; 7837ec681f3Smrg case 243: return 0xBFF0000000000000; 7847ec681f3Smrg case 244: return 0x4000000000000000; 7857ec681f3Smrg case 245: return 0xC000000000000000; 7867ec681f3Smrg case 246: return 0x4010000000000000; 7877ec681f3Smrg case 247: return 0xC010000000000000; 7887ec681f3Smrg case 255: 7897ec681f3Smrg return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i; 7907ec681f3Smrg } 7917ec681f3Smrg unreachable("invalid register for 64-bit constant"); 7927ec681f3Smrg } else { 7937ec681f3Smrg return data_.i; 7947ec681f3Smrg } 7957ec681f3Smrg } 7967ec681f3Smrg 7977ec681f3Smrg constexpr bool isOfType(RegType type) const noexcept 7987ec681f3Smrg { 7997ec681f3Smrg return hasRegClass() && regClass().type() == type; 8007ec681f3Smrg } 8017ec681f3Smrg 8027ec681f3Smrg /* Indicates that the killed operand's live range intersects with the 8037ec681f3Smrg * instruction's definitions. Unlike isKill() and isFirstKill(), this is 8047ec681f3Smrg * not set by liveness analysis. */ 8057ec681f3Smrg constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; } 8067ec681f3Smrg 8077ec681f3Smrg constexpr bool isLateKill() const noexcept { return isLateKill_; } 8087ec681f3Smrg 8097ec681f3Smrg constexpr void setKill(bool flag) noexcept 8107ec681f3Smrg { 8117ec681f3Smrg isKill_ = flag; 8127ec681f3Smrg if (!flag) 8137ec681f3Smrg setFirstKill(false); 8147ec681f3Smrg } 8157ec681f3Smrg 8167ec681f3Smrg constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); } 8177ec681f3Smrg 8187ec681f3Smrg constexpr void setFirstKill(bool flag) noexcept 8197ec681f3Smrg { 8207ec681f3Smrg isFirstKill_ = flag; 8217ec681f3Smrg if (flag) 8227ec681f3Smrg setKill(flag); 8237ec681f3Smrg } 8247ec681f3Smrg 8257ec681f3Smrg /* When there are multiple operands killing the same temporary, 8267ec681f3Smrg * isFirstKill() is only returns true for the first one. */ 8277ec681f3Smrg constexpr bool isFirstKill() const noexcept { return isFirstKill_; } 8287ec681f3Smrg 8297ec681f3Smrg constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); } 8307ec681f3Smrg 8317ec681f3Smrg constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); } 8327ec681f3Smrg 8337ec681f3Smrg constexpr bool operator==(Operand other) const noexcept 8347ec681f3Smrg { 8357ec681f3Smrg if (other.size() != size()) 8367ec681f3Smrg return false; 8377ec681f3Smrg if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef()) 8387ec681f3Smrg return false; 8397ec681f3Smrg if (isFixed() && other.isFixed() && physReg() != other.physReg()) 8407ec681f3Smrg return false; 8417ec681f3Smrg if (isLiteral()) 8427ec681f3Smrg return other.isLiteral() && other.constantValue() == constantValue(); 8437ec681f3Smrg else if (isConstant()) 8447ec681f3Smrg return other.isConstant() && other.physReg() == physReg(); 8457ec681f3Smrg else if (isUndefined()) 8467ec681f3Smrg return other.isUndefined() && other.regClass() == regClass(); 8477ec681f3Smrg else 8487ec681f3Smrg return other.isTemp() && other.getTemp() == getTemp(); 8497ec681f3Smrg } 8507ec681f3Smrg 8517ec681f3Smrg constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); } 8527ec681f3Smrg 8537ec681f3Smrg constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; } 8547ec681f3Smrg 8557ec681f3Smrg constexpr bool is16bit() const noexcept { return is16bit_; } 8567ec681f3Smrg 8577ec681f3Smrg constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; } 8587ec681f3Smrg 8597ec681f3Smrg constexpr bool is24bit() const noexcept { return is24bit_; } 8607ec681f3Smrg 8617ec681f3Smrgprivate: 8627ec681f3Smrg union { 8637ec681f3Smrg Temp temp; 8647ec681f3Smrg uint32_t i; 8657ec681f3Smrg float f; 8667ec681f3Smrg } data_ = {Temp(0, s1)}; 8677ec681f3Smrg PhysReg reg_; 8687ec681f3Smrg union { 8697ec681f3Smrg struct { 8707ec681f3Smrg uint8_t isTemp_ : 1; 8717ec681f3Smrg uint8_t isFixed_ : 1; 8727ec681f3Smrg uint8_t isConstant_ : 1; 8737ec681f3Smrg uint8_t isKill_ : 1; 8747ec681f3Smrg uint8_t isUndef_ : 1; 8757ec681f3Smrg uint8_t isFirstKill_ : 1; 8767ec681f3Smrg uint8_t constSize : 2; 8777ec681f3Smrg uint8_t isLateKill_ : 1; 8787ec681f3Smrg uint8_t is16bit_ : 1; 8797ec681f3Smrg uint8_t is24bit_ : 1; 8807ec681f3Smrg uint8_t signext : 1; 8817ec681f3Smrg }; 8827ec681f3Smrg /* can't initialize bit-fields in c++11, so work around using a union */ 8837ec681f3Smrg uint16_t control_ = 0; 8847ec681f3Smrg }; 8857ec681f3Smrg}; 8867ec681f3Smrg 8877ec681f3Smrg/** 8887ec681f3Smrg * Definition Class 8897ec681f3Smrg * Definitions are the results of Instructions 8907ec681f3Smrg * and refer to temporary virtual registers 8917ec681f3Smrg * which are later mapped to physical registers 8927ec681f3Smrg */ 8937ec681f3Smrgclass Definition final { 8947ec681f3Smrgpublic: 8957ec681f3Smrg constexpr Definition() 8967ec681f3Smrg : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0), isPrecise_(0), isNUW_(0), 8977ec681f3Smrg isNoCSE_(0) 8987ec681f3Smrg {} 8997ec681f3Smrg Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {} 9007ec681f3Smrg explicit Definition(Temp tmp) noexcept : temp(tmp) {} 9017ec681f3Smrg Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); } 9027ec681f3Smrg Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type)) 9037ec681f3Smrg { 9047ec681f3Smrg setFixed(reg); 9057ec681f3Smrg } 9067ec681f3Smrg 9077ec681f3Smrg constexpr bool isTemp() const noexcept { return tempId() > 0; } 9087ec681f3Smrg 9097ec681f3Smrg constexpr Temp getTemp() const noexcept { return temp; } 9107ec681f3Smrg 9117ec681f3Smrg constexpr uint32_t tempId() const noexcept { return temp.id(); } 9127ec681f3Smrg 9137ec681f3Smrg constexpr void setTemp(Temp t) noexcept { temp = t; } 9147ec681f3Smrg 9157ec681f3Smrg void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); } 9167ec681f3Smrg 9177ec681f3Smrg constexpr RegClass regClass() const noexcept { return temp.regClass(); } 9187ec681f3Smrg 9197ec681f3Smrg constexpr unsigned bytes() const noexcept { return temp.bytes(); } 9207ec681f3Smrg 9217ec681f3Smrg constexpr unsigned size() const noexcept { return temp.size(); } 9227ec681f3Smrg 9237ec681f3Smrg constexpr bool isFixed() const noexcept { return isFixed_; } 9247ec681f3Smrg 9257ec681f3Smrg constexpr PhysReg physReg() const noexcept { return reg_; } 9267ec681f3Smrg 9277ec681f3Smrg constexpr void setFixed(PhysReg reg) noexcept 9287ec681f3Smrg { 9297ec681f3Smrg isFixed_ = 1; 9307ec681f3Smrg reg_ = reg; 9317ec681f3Smrg } 9327ec681f3Smrg 9337ec681f3Smrg constexpr void setHint(PhysReg reg) noexcept 9347ec681f3Smrg { 9357ec681f3Smrg hasHint_ = 1; 9367ec681f3Smrg reg_ = reg; 9377ec681f3Smrg } 9387ec681f3Smrg 9397ec681f3Smrg constexpr bool hasHint() const noexcept { return hasHint_; } 9407ec681f3Smrg 9417ec681f3Smrg constexpr void setKill(bool flag) noexcept { isKill_ = flag; } 9427ec681f3Smrg 9437ec681f3Smrg constexpr bool isKill() const noexcept { return isKill_; } 9447ec681f3Smrg 9457ec681f3Smrg constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; } 9467ec681f3Smrg 9477ec681f3Smrg constexpr bool isPrecise() const noexcept { return isPrecise_; } 9487ec681f3Smrg 9497ec681f3Smrg /* No Unsigned Wrap */ 9507ec681f3Smrg constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; } 9517ec681f3Smrg 9527ec681f3Smrg constexpr bool isNUW() const noexcept { return isNUW_; } 9537ec681f3Smrg 9547ec681f3Smrg constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; } 9557ec681f3Smrg 9567ec681f3Smrg constexpr bool isNoCSE() const noexcept { return isNoCSE_; } 9577ec681f3Smrg 9587ec681f3Smrgprivate: 9597ec681f3Smrg Temp temp = Temp(0, s1); 9607ec681f3Smrg PhysReg reg_; 9617ec681f3Smrg union { 9627ec681f3Smrg struct { 9637ec681f3Smrg uint8_t isFixed_ : 1; 9647ec681f3Smrg uint8_t hasHint_ : 1; 9657ec681f3Smrg uint8_t isKill_ : 1; 9667ec681f3Smrg uint8_t isPrecise_ : 1; 9677ec681f3Smrg uint8_t isNUW_ : 1; 9687ec681f3Smrg uint8_t isNoCSE_ : 1; 9697ec681f3Smrg }; 9707ec681f3Smrg /* can't initialize bit-fields in c++11, so work around using a union */ 9717ec681f3Smrg uint8_t control_ = 0; 9727ec681f3Smrg }; 9737ec681f3Smrg}; 9747ec681f3Smrg 9757ec681f3Smrgstruct Block; 9767ec681f3Smrgstruct Instruction; 9777ec681f3Smrgstruct Pseudo_instruction; 9787ec681f3Smrgstruct SOP1_instruction; 9797ec681f3Smrgstruct SOP2_instruction; 9807ec681f3Smrgstruct SOPK_instruction; 9817ec681f3Smrgstruct SOPP_instruction; 9827ec681f3Smrgstruct SOPC_instruction; 9837ec681f3Smrgstruct SMEM_instruction; 9847ec681f3Smrgstruct DS_instruction; 9857ec681f3Smrgstruct MTBUF_instruction; 9867ec681f3Smrgstruct MUBUF_instruction; 9877ec681f3Smrgstruct MIMG_instruction; 9887ec681f3Smrgstruct Export_instruction; 9897ec681f3Smrgstruct FLAT_instruction; 9907ec681f3Smrgstruct Pseudo_branch_instruction; 9917ec681f3Smrgstruct Pseudo_barrier_instruction; 9927ec681f3Smrgstruct Pseudo_reduction_instruction; 9937ec681f3Smrgstruct VOP3P_instruction; 9947ec681f3Smrgstruct VOP1_instruction; 9957ec681f3Smrgstruct VOP2_instruction; 9967ec681f3Smrgstruct VOPC_instruction; 9977ec681f3Smrgstruct VOP3_instruction; 9987ec681f3Smrgstruct Interp_instruction; 9997ec681f3Smrgstruct DPP_instruction; 10007ec681f3Smrgstruct SDWA_instruction; 10017ec681f3Smrg 10027ec681f3Smrgstruct Instruction { 10037ec681f3Smrg aco_opcode opcode; 10047ec681f3Smrg Format format; 10057ec681f3Smrg uint32_t pass_flags; 10067ec681f3Smrg 10077ec681f3Smrg aco::span<Operand> operands; 10087ec681f3Smrg aco::span<Definition> definitions; 10097ec681f3Smrg 10107ec681f3Smrg constexpr bool usesModifiers() const noexcept; 10117ec681f3Smrg 10127ec681f3Smrg constexpr bool reads_exec() const noexcept 10137ec681f3Smrg { 10147ec681f3Smrg for (const Operand& op : operands) { 10157ec681f3Smrg if (op.isFixed() && op.physReg() == exec) 10167ec681f3Smrg return true; 10177ec681f3Smrg } 10187ec681f3Smrg return false; 10197ec681f3Smrg } 10207ec681f3Smrg 10217ec681f3Smrg Pseudo_instruction& pseudo() noexcept 10227ec681f3Smrg { 10237ec681f3Smrg assert(isPseudo()); 10247ec681f3Smrg return *(Pseudo_instruction*)this; 10257ec681f3Smrg } 10267ec681f3Smrg const Pseudo_instruction& pseudo() const noexcept 10277ec681f3Smrg { 10287ec681f3Smrg assert(isPseudo()); 10297ec681f3Smrg return *(Pseudo_instruction*)this; 10307ec681f3Smrg } 10317ec681f3Smrg constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; } 10327ec681f3Smrg SOP1_instruction& sop1() noexcept 10337ec681f3Smrg { 10347ec681f3Smrg assert(isSOP1()); 10357ec681f3Smrg return *(SOP1_instruction*)this; 10367ec681f3Smrg } 10377ec681f3Smrg const SOP1_instruction& sop1() const noexcept 10387ec681f3Smrg { 10397ec681f3Smrg assert(isSOP1()); 10407ec681f3Smrg return *(SOP1_instruction*)this; 10417ec681f3Smrg } 10427ec681f3Smrg constexpr bool isSOP1() const noexcept { return format == Format::SOP1; } 10437ec681f3Smrg SOP2_instruction& sop2() noexcept 10447ec681f3Smrg { 10457ec681f3Smrg assert(isSOP2()); 10467ec681f3Smrg return *(SOP2_instruction*)this; 10477ec681f3Smrg } 10487ec681f3Smrg const SOP2_instruction& sop2() const noexcept 10497ec681f3Smrg { 10507ec681f3Smrg assert(isSOP2()); 10517ec681f3Smrg return *(SOP2_instruction*)this; 10527ec681f3Smrg } 10537ec681f3Smrg constexpr bool isSOP2() const noexcept { return format == Format::SOP2; } 10547ec681f3Smrg SOPK_instruction& sopk() noexcept 10557ec681f3Smrg { 10567ec681f3Smrg assert(isSOPK()); 10577ec681f3Smrg return *(SOPK_instruction*)this; 10587ec681f3Smrg } 10597ec681f3Smrg const SOPK_instruction& sopk() const noexcept 10607ec681f3Smrg { 10617ec681f3Smrg assert(isSOPK()); 10627ec681f3Smrg return *(SOPK_instruction*)this; 10637ec681f3Smrg } 10647ec681f3Smrg constexpr bool isSOPK() const noexcept { return format == Format::SOPK; } 10657ec681f3Smrg SOPP_instruction& sopp() noexcept 10667ec681f3Smrg { 10677ec681f3Smrg assert(isSOPP()); 10687ec681f3Smrg return *(SOPP_instruction*)this; 10697ec681f3Smrg } 10707ec681f3Smrg const SOPP_instruction& sopp() const noexcept 10717ec681f3Smrg { 10727ec681f3Smrg assert(isSOPP()); 10737ec681f3Smrg return *(SOPP_instruction*)this; 10747ec681f3Smrg } 10757ec681f3Smrg constexpr bool isSOPP() const noexcept { return format == Format::SOPP; } 10767ec681f3Smrg SOPC_instruction& sopc() noexcept 10777ec681f3Smrg { 10787ec681f3Smrg assert(isSOPC()); 10797ec681f3Smrg return *(SOPC_instruction*)this; 10807ec681f3Smrg } 10817ec681f3Smrg const SOPC_instruction& sopc() const noexcept 10827ec681f3Smrg { 10837ec681f3Smrg assert(isSOPC()); 10847ec681f3Smrg return *(SOPC_instruction*)this; 10857ec681f3Smrg } 10867ec681f3Smrg constexpr bool isSOPC() const noexcept { return format == Format::SOPC; } 10877ec681f3Smrg SMEM_instruction& smem() noexcept 10887ec681f3Smrg { 10897ec681f3Smrg assert(isSMEM()); 10907ec681f3Smrg return *(SMEM_instruction*)this; 10917ec681f3Smrg } 10927ec681f3Smrg const SMEM_instruction& smem() const noexcept 10937ec681f3Smrg { 10947ec681f3Smrg assert(isSMEM()); 10957ec681f3Smrg return *(SMEM_instruction*)this; 10967ec681f3Smrg } 10977ec681f3Smrg constexpr bool isSMEM() const noexcept { return format == Format::SMEM; } 10987ec681f3Smrg DS_instruction& ds() noexcept 10997ec681f3Smrg { 11007ec681f3Smrg assert(isDS()); 11017ec681f3Smrg return *(DS_instruction*)this; 11027ec681f3Smrg } 11037ec681f3Smrg const DS_instruction& ds() const noexcept 11047ec681f3Smrg { 11057ec681f3Smrg assert(isDS()); 11067ec681f3Smrg return *(DS_instruction*)this; 11077ec681f3Smrg } 11087ec681f3Smrg constexpr bool isDS() const noexcept { return format == Format::DS; } 11097ec681f3Smrg MTBUF_instruction& mtbuf() noexcept 11107ec681f3Smrg { 11117ec681f3Smrg assert(isMTBUF()); 11127ec681f3Smrg return *(MTBUF_instruction*)this; 11137ec681f3Smrg } 11147ec681f3Smrg const MTBUF_instruction& mtbuf() const noexcept 11157ec681f3Smrg { 11167ec681f3Smrg assert(isMTBUF()); 11177ec681f3Smrg return *(MTBUF_instruction*)this; 11187ec681f3Smrg } 11197ec681f3Smrg constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; } 11207ec681f3Smrg MUBUF_instruction& mubuf() noexcept 11217ec681f3Smrg { 11227ec681f3Smrg assert(isMUBUF()); 11237ec681f3Smrg return *(MUBUF_instruction*)this; 11247ec681f3Smrg } 11257ec681f3Smrg const MUBUF_instruction& mubuf() const noexcept 11267ec681f3Smrg { 11277ec681f3Smrg assert(isMUBUF()); 11287ec681f3Smrg return *(MUBUF_instruction*)this; 11297ec681f3Smrg } 11307ec681f3Smrg constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; } 11317ec681f3Smrg MIMG_instruction& mimg() noexcept 11327ec681f3Smrg { 11337ec681f3Smrg assert(isMIMG()); 11347ec681f3Smrg return *(MIMG_instruction*)this; 11357ec681f3Smrg } 11367ec681f3Smrg const MIMG_instruction& mimg() const noexcept 11377ec681f3Smrg { 11387ec681f3Smrg assert(isMIMG()); 11397ec681f3Smrg return *(MIMG_instruction*)this; 11407ec681f3Smrg } 11417ec681f3Smrg constexpr bool isMIMG() const noexcept { return format == Format::MIMG; } 11427ec681f3Smrg Export_instruction& exp() noexcept 11437ec681f3Smrg { 11447ec681f3Smrg assert(isEXP()); 11457ec681f3Smrg return *(Export_instruction*)this; 11467ec681f3Smrg } 11477ec681f3Smrg const Export_instruction& exp() const noexcept 11487ec681f3Smrg { 11497ec681f3Smrg assert(isEXP()); 11507ec681f3Smrg return *(Export_instruction*)this; 11517ec681f3Smrg } 11527ec681f3Smrg constexpr bool isEXP() const noexcept { return format == Format::EXP; } 11537ec681f3Smrg FLAT_instruction& flat() noexcept 11547ec681f3Smrg { 11557ec681f3Smrg assert(isFlat()); 11567ec681f3Smrg return *(FLAT_instruction*)this; 11577ec681f3Smrg } 11587ec681f3Smrg const FLAT_instruction& flat() const noexcept 11597ec681f3Smrg { 11607ec681f3Smrg assert(isFlat()); 11617ec681f3Smrg return *(FLAT_instruction*)this; 11627ec681f3Smrg } 11637ec681f3Smrg constexpr bool isFlat() const noexcept { return format == Format::FLAT; } 11647ec681f3Smrg FLAT_instruction& global() noexcept 11657ec681f3Smrg { 11667ec681f3Smrg assert(isGlobal()); 11677ec681f3Smrg return *(FLAT_instruction*)this; 11687ec681f3Smrg } 11697ec681f3Smrg const FLAT_instruction& global() const noexcept 11707ec681f3Smrg { 11717ec681f3Smrg assert(isGlobal()); 11727ec681f3Smrg return *(FLAT_instruction*)this; 11737ec681f3Smrg } 11747ec681f3Smrg constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; } 11757ec681f3Smrg FLAT_instruction& scratch() noexcept 11767ec681f3Smrg { 11777ec681f3Smrg assert(isScratch()); 11787ec681f3Smrg return *(FLAT_instruction*)this; 11797ec681f3Smrg } 11807ec681f3Smrg const FLAT_instruction& scratch() const noexcept 11817ec681f3Smrg { 11827ec681f3Smrg assert(isScratch()); 11837ec681f3Smrg return *(FLAT_instruction*)this; 11847ec681f3Smrg } 11857ec681f3Smrg constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; } 11867ec681f3Smrg Pseudo_branch_instruction& branch() noexcept 11877ec681f3Smrg { 11887ec681f3Smrg assert(isBranch()); 11897ec681f3Smrg return *(Pseudo_branch_instruction*)this; 11907ec681f3Smrg } 11917ec681f3Smrg const Pseudo_branch_instruction& branch() const noexcept 11927ec681f3Smrg { 11937ec681f3Smrg assert(isBranch()); 11947ec681f3Smrg return *(Pseudo_branch_instruction*)this; 11957ec681f3Smrg } 11967ec681f3Smrg constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; } 11977ec681f3Smrg Pseudo_barrier_instruction& barrier() noexcept 11987ec681f3Smrg { 11997ec681f3Smrg assert(isBarrier()); 12007ec681f3Smrg return *(Pseudo_barrier_instruction*)this; 12017ec681f3Smrg } 12027ec681f3Smrg const Pseudo_barrier_instruction& barrier() const noexcept 12037ec681f3Smrg { 12047ec681f3Smrg assert(isBarrier()); 12057ec681f3Smrg return *(Pseudo_barrier_instruction*)this; 12067ec681f3Smrg } 12077ec681f3Smrg constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; } 12087ec681f3Smrg Pseudo_reduction_instruction& reduction() noexcept 12097ec681f3Smrg { 12107ec681f3Smrg assert(isReduction()); 12117ec681f3Smrg return *(Pseudo_reduction_instruction*)this; 12127ec681f3Smrg } 12137ec681f3Smrg const Pseudo_reduction_instruction& reduction() const noexcept 12147ec681f3Smrg { 12157ec681f3Smrg assert(isReduction()); 12167ec681f3Smrg return *(Pseudo_reduction_instruction*)this; 12177ec681f3Smrg } 12187ec681f3Smrg constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; } 12197ec681f3Smrg VOP3P_instruction& vop3p() noexcept 12207ec681f3Smrg { 12217ec681f3Smrg assert(isVOP3P()); 12227ec681f3Smrg return *(VOP3P_instruction*)this; 12237ec681f3Smrg } 12247ec681f3Smrg const VOP3P_instruction& vop3p() const noexcept 12257ec681f3Smrg { 12267ec681f3Smrg assert(isVOP3P()); 12277ec681f3Smrg return *(VOP3P_instruction*)this; 12287ec681f3Smrg } 12297ec681f3Smrg constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; } 12307ec681f3Smrg VOP1_instruction& vop1() noexcept 12317ec681f3Smrg { 12327ec681f3Smrg assert(isVOP1()); 12337ec681f3Smrg return *(VOP1_instruction*)this; 12347ec681f3Smrg } 12357ec681f3Smrg const VOP1_instruction& vop1() const noexcept 12367ec681f3Smrg { 12377ec681f3Smrg assert(isVOP1()); 12387ec681f3Smrg return *(VOP1_instruction*)this; 12397ec681f3Smrg } 12407ec681f3Smrg constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; } 12417ec681f3Smrg VOP2_instruction& vop2() noexcept 12427ec681f3Smrg { 12437ec681f3Smrg assert(isVOP2()); 12447ec681f3Smrg return *(VOP2_instruction*)this; 12457ec681f3Smrg } 12467ec681f3Smrg const VOP2_instruction& vop2() const noexcept 12477ec681f3Smrg { 12487ec681f3Smrg assert(isVOP2()); 12497ec681f3Smrg return *(VOP2_instruction*)this; 12507ec681f3Smrg } 12517ec681f3Smrg constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; } 12527ec681f3Smrg VOPC_instruction& vopc() noexcept 12537ec681f3Smrg { 12547ec681f3Smrg assert(isVOPC()); 12557ec681f3Smrg return *(VOPC_instruction*)this; 12567ec681f3Smrg } 12577ec681f3Smrg const VOPC_instruction& vopc() const noexcept 12587ec681f3Smrg { 12597ec681f3Smrg assert(isVOPC()); 12607ec681f3Smrg return *(VOPC_instruction*)this; 12617ec681f3Smrg } 12627ec681f3Smrg constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; } 12637ec681f3Smrg VOP3_instruction& vop3() noexcept 12647ec681f3Smrg { 12657ec681f3Smrg assert(isVOP3()); 12667ec681f3Smrg return *(VOP3_instruction*)this; 12677ec681f3Smrg } 12687ec681f3Smrg const VOP3_instruction& vop3() const noexcept 12697ec681f3Smrg { 12707ec681f3Smrg assert(isVOP3()); 12717ec681f3Smrg return *(VOP3_instruction*)this; 12727ec681f3Smrg } 12737ec681f3Smrg constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; } 12747ec681f3Smrg Interp_instruction& vintrp() noexcept 12757ec681f3Smrg { 12767ec681f3Smrg assert(isVINTRP()); 12777ec681f3Smrg return *(Interp_instruction*)this; 12787ec681f3Smrg } 12797ec681f3Smrg const Interp_instruction& vintrp() const noexcept 12807ec681f3Smrg { 12817ec681f3Smrg assert(isVINTRP()); 12827ec681f3Smrg return *(Interp_instruction*)this; 12837ec681f3Smrg } 12847ec681f3Smrg constexpr bool isVINTRP() const noexcept { return (uint16_t)format & (uint16_t)Format::VINTRP; } 12857ec681f3Smrg DPP_instruction& dpp() noexcept 12867ec681f3Smrg { 12877ec681f3Smrg assert(isDPP()); 12887ec681f3Smrg return *(DPP_instruction*)this; 12897ec681f3Smrg } 12907ec681f3Smrg const DPP_instruction& dpp() const noexcept 12917ec681f3Smrg { 12927ec681f3Smrg assert(isDPP()); 12937ec681f3Smrg return *(DPP_instruction*)this; 12947ec681f3Smrg } 12957ec681f3Smrg constexpr bool isDPP() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP; } 12967ec681f3Smrg SDWA_instruction& sdwa() noexcept 12977ec681f3Smrg { 12987ec681f3Smrg assert(isSDWA()); 12997ec681f3Smrg return *(SDWA_instruction*)this; 13007ec681f3Smrg } 13017ec681f3Smrg const SDWA_instruction& sdwa() const noexcept 13027ec681f3Smrg { 13037ec681f3Smrg assert(isSDWA()); 13047ec681f3Smrg return *(SDWA_instruction*)this; 13057ec681f3Smrg } 13067ec681f3Smrg constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; } 13077ec681f3Smrg 13087ec681f3Smrg FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; } 13097ec681f3Smrg 13107ec681f3Smrg const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; } 13117ec681f3Smrg 13127ec681f3Smrg constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); } 13137ec681f3Smrg 13147ec681f3Smrg constexpr bool isVALU() const noexcept 13157ec681f3Smrg { 13167ec681f3Smrg return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P(); 13177ec681f3Smrg } 13187ec681f3Smrg 13197ec681f3Smrg constexpr bool isSALU() const noexcept 13207ec681f3Smrg { 13217ec681f3Smrg return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP(); 13227ec681f3Smrg } 13237ec681f3Smrg 13247ec681f3Smrg constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); } 13257ec681f3Smrg}; 13267ec681f3Smrgstatic_assert(sizeof(Instruction) == 16, "Unexpected padding"); 13277ec681f3Smrg 13287ec681f3Smrgstruct SOPK_instruction : public Instruction { 13297ec681f3Smrg uint16_t imm; 13307ec681f3Smrg uint16_t padding; 13317ec681f3Smrg}; 13327ec681f3Smrgstatic_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 13337ec681f3Smrg 13347ec681f3Smrgstruct SOPP_instruction : public Instruction { 13357ec681f3Smrg uint32_t imm; 13367ec681f3Smrg int block; 13377ec681f3Smrg}; 13387ec681f3Smrgstatic_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 13397ec681f3Smrg 13407ec681f3Smrgstruct SOPC_instruction : public Instruction {}; 13417ec681f3Smrgstatic_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 13427ec681f3Smrg 13437ec681f3Smrgstruct SOP1_instruction : public Instruction {}; 13447ec681f3Smrgstatic_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 13457ec681f3Smrg 13467ec681f3Smrgstruct SOP2_instruction : public Instruction {}; 13477ec681f3Smrgstatic_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 13487ec681f3Smrg 13497ec681f3Smrg/** 13507ec681f3Smrg * Scalar Memory Format: 13517ec681f3Smrg * For s_(buffer_)load_dword*: 13527ec681f3Smrg * Operand(0): SBASE - SGPR-pair which provides base address 13537ec681f3Smrg * Operand(1): Offset - immediate (un)signed offset or SGPR 13547ec681f3Smrg * Operand(2) / Definition(0): SDATA - SGPR for read / write result 13557ec681f3Smrg * Operand(n-1): SOffset - SGPR offset (Vega only) 13567ec681f3Smrg * 13577ec681f3Smrg * Having no operands is also valid for instructions such as s_dcache_inv. 13587ec681f3Smrg * 13597ec681f3Smrg */ 13607ec681f3Smrgstruct SMEM_instruction : public Instruction { 13617ec681f3Smrg memory_sync_info sync; 13627ec681f3Smrg bool glc : 1; /* VI+: globally coherent */ 13637ec681f3Smrg bool dlc : 1; /* NAVI: device level coherent */ 13647ec681f3Smrg bool nv : 1; /* VEGA only: Non-volatile */ 13657ec681f3Smrg bool disable_wqm : 1; 13667ec681f3Smrg bool prevent_overflow : 1; /* avoid overflow when combining additions */ 13677ec681f3Smrg uint8_t padding : 3; 13687ec681f3Smrg}; 13697ec681f3Smrgstatic_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 13707ec681f3Smrg 13717ec681f3Smrgstruct VOP1_instruction : public Instruction {}; 13727ec681f3Smrgstatic_assert(sizeof(VOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 13737ec681f3Smrg 13747ec681f3Smrgstruct VOP2_instruction : public Instruction {}; 13757ec681f3Smrgstatic_assert(sizeof(VOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 13767ec681f3Smrg 13777ec681f3Smrgstruct VOPC_instruction : public Instruction {}; 13787ec681f3Smrgstatic_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 13797ec681f3Smrg 13807ec681f3Smrgstruct VOP3_instruction : public Instruction { 13817ec681f3Smrg bool abs[3]; 13827ec681f3Smrg bool neg[3]; 13837ec681f3Smrg uint8_t opsel : 4; 13847ec681f3Smrg uint8_t omod : 2; 13857ec681f3Smrg bool clamp : 1; 13867ec681f3Smrg uint8_t padding0 : 1; 13877ec681f3Smrg uint8_t padding1; 13887ec681f3Smrg}; 13897ec681f3Smrgstatic_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 13907ec681f3Smrg 13917ec681f3Smrgstruct VOP3P_instruction : public Instruction { 13927ec681f3Smrg bool neg_lo[3]; 13937ec681f3Smrg bool neg_hi[3]; 13947ec681f3Smrg uint8_t opsel_lo : 3; 13957ec681f3Smrg uint8_t opsel_hi : 3; 13967ec681f3Smrg bool clamp : 1; 13977ec681f3Smrg uint8_t padding0 : 1; 13987ec681f3Smrg uint8_t padding1; 13997ec681f3Smrg}; 14007ec681f3Smrgstatic_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 14017ec681f3Smrg 14027ec681f3Smrg/** 14037ec681f3Smrg * Data Parallel Primitives Format: 14047ec681f3Smrg * This format can be used for VOP1, VOP2 or VOPC instructions. 14057ec681f3Smrg * The swizzle applies to the src0 operand. 14067ec681f3Smrg * 14077ec681f3Smrg */ 14087ec681f3Smrgstruct DPP_instruction : public Instruction { 14097ec681f3Smrg bool abs[2]; 14107ec681f3Smrg bool neg[2]; 14117ec681f3Smrg uint16_t dpp_ctrl; 14127ec681f3Smrg uint8_t row_mask : 4; 14137ec681f3Smrg uint8_t bank_mask : 4; 14147ec681f3Smrg bool bound_ctrl : 1; 14157ec681f3Smrg uint8_t padding : 7; 14167ec681f3Smrg}; 14177ec681f3Smrgstatic_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 14187ec681f3Smrg 14197ec681f3Smrgstruct SubdwordSel { 14207ec681f3Smrg enum sdwa_sel : uint8_t { 14217ec681f3Smrg ubyte = 0x4, 14227ec681f3Smrg uword = 0x8, 14237ec681f3Smrg dword = 0x10, 14247ec681f3Smrg sext = 0x20, 14257ec681f3Smrg sbyte = ubyte | sext, 14267ec681f3Smrg sword = uword | sext, 14277ec681f3Smrg 14287ec681f3Smrg ubyte0 = ubyte, 14297ec681f3Smrg ubyte1 = ubyte | 1, 14307ec681f3Smrg ubyte2 = ubyte | 2, 14317ec681f3Smrg ubyte3 = ubyte | 3, 14327ec681f3Smrg sbyte0 = sbyte, 14337ec681f3Smrg sbyte1 = sbyte | 1, 14347ec681f3Smrg sbyte2 = sbyte | 2, 14357ec681f3Smrg sbyte3 = sbyte | 3, 14367ec681f3Smrg uword0 = uword, 14377ec681f3Smrg uword1 = uword | 2, 14387ec681f3Smrg sword0 = sword, 14397ec681f3Smrg sword1 = sword | 2, 14407ec681f3Smrg }; 14417ec681f3Smrg 14427ec681f3Smrg SubdwordSel() : sel((sdwa_sel)0) {} 14437ec681f3Smrg constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {} 14447ec681f3Smrg constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend) 14457ec681f3Smrg : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset)) 14467ec681f3Smrg {} 14477ec681f3Smrg constexpr operator sdwa_sel() const { return sel; } 14487ec681f3Smrg explicit operator bool() const { return sel != 0; } 14497ec681f3Smrg 14507ec681f3Smrg constexpr unsigned size() const { return (sel >> 2) & 0x7; } 14517ec681f3Smrg constexpr unsigned offset() const { return sel & 0x3; } 14527ec681f3Smrg constexpr bool sign_extend() const { return sel & sext; } 14537ec681f3Smrg constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const 14547ec681f3Smrg { 14557ec681f3Smrg reg_byte_offset += offset(); 14567ec681f3Smrg if (size() == 1) 14577ec681f3Smrg return reg_byte_offset; 14587ec681f3Smrg else if (size() == 2) 14597ec681f3Smrg return 4 + (reg_byte_offset >> 1); 14607ec681f3Smrg else 14617ec681f3Smrg return 6; 14627ec681f3Smrg } 14637ec681f3Smrg 14647ec681f3Smrgprivate: 14657ec681f3Smrg sdwa_sel sel; 14667ec681f3Smrg}; 14677ec681f3Smrg 14687ec681f3Smrg/** 14697ec681f3Smrg * Sub-Dword Addressing Format: 14707ec681f3Smrg * This format can be used for VOP1, VOP2 or VOPC instructions. 14717ec681f3Smrg * 14727ec681f3Smrg * omod and SGPR/constant operands are only available on GFX9+. For VOPC, 14737ec681f3Smrg * the definition doesn't have to be VCC on GFX9+. 14747ec681f3Smrg * 14757ec681f3Smrg */ 14767ec681f3Smrgstruct SDWA_instruction : public Instruction { 14777ec681f3Smrg /* these destination modifiers aren't available with VOPC except for 14787ec681f3Smrg * clamp on GFX8 */ 14797ec681f3Smrg SubdwordSel sel[2]; 14807ec681f3Smrg SubdwordSel dst_sel; 14817ec681f3Smrg bool neg[2]; 14827ec681f3Smrg bool abs[2]; 14837ec681f3Smrg bool clamp : 1; 14847ec681f3Smrg uint8_t omod : 2; /* GFX9+ */ 14857ec681f3Smrg uint8_t padding : 5; 14867ec681f3Smrg}; 14877ec681f3Smrgstatic_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 14887ec681f3Smrg 14897ec681f3Smrgstruct Interp_instruction : public Instruction { 14907ec681f3Smrg uint8_t attribute; 14917ec681f3Smrg uint8_t component; 14927ec681f3Smrg uint16_t padding; 14937ec681f3Smrg}; 14947ec681f3Smrgstatic_assert(sizeof(Interp_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 14957ec681f3Smrg 14967ec681f3Smrg/** 14977ec681f3Smrg * Local and Global Data Sharing instructions 14987ec681f3Smrg * Operand(0): ADDR - VGPR which supplies the address. 14997ec681f3Smrg * Operand(1): DATA0 - First data VGPR. 15007ec681f3Smrg * Operand(2): DATA1 - Second data VGPR. 15017ec681f3Smrg * Operand(n-1): M0 - LDS size. 15027ec681f3Smrg * Definition(0): VDST - Destination VGPR when results returned to VGPRs. 15037ec681f3Smrg * 15047ec681f3Smrg */ 15057ec681f3Smrgstruct DS_instruction : public Instruction { 15067ec681f3Smrg memory_sync_info sync; 15077ec681f3Smrg bool gds; 15087ec681f3Smrg int16_t offset0; 15097ec681f3Smrg int8_t offset1; 15107ec681f3Smrg uint8_t padding; 15117ec681f3Smrg}; 15127ec681f3Smrgstatic_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 15137ec681f3Smrg 15147ec681f3Smrg/** 15157ec681f3Smrg * Vector Memory Untyped-buffer Instructions 15167ec681f3Smrg * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant) 15177ec681f3Smrg * Operand(1): VADDR - Address source. Can carry an index and/or offset 15187ec681f3Smrg * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant) 15197ec681f3Smrg * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data 15207ec681f3Smrg * 15217ec681f3Smrg */ 15227ec681f3Smrgstruct MUBUF_instruction : public Instruction { 15237ec681f3Smrg memory_sync_info sync; 15247ec681f3Smrg bool offen : 1; /* Supply an offset from VGPR (VADDR) */ 15257ec681f3Smrg bool idxen : 1; /* Supply an index from VGPR (VADDR) */ 15267ec681f3Smrg bool addr64 : 1; /* SI, CIK: Address size is 64-bit */ 15277ec681f3Smrg bool glc : 1; /* globally coherent */ 15287ec681f3Smrg bool dlc : 1; /* NAVI: device level coherent */ 15297ec681f3Smrg bool slc : 1; /* system level coherent */ 15307ec681f3Smrg bool tfe : 1; /* texture fail enable */ 15317ec681f3Smrg bool lds : 1; /* Return read-data to LDS instead of VGPRs */ 15327ec681f3Smrg uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */ 15337ec681f3Smrg uint16_t offset : 12; /* Unsigned byte offset - 12 bit */ 15347ec681f3Smrg uint16_t swizzled : 1; 15357ec681f3Smrg uint16_t padding0 : 2; 15367ec681f3Smrg uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */ 15377ec681f3Smrg uint16_t padding1 : 10; 15387ec681f3Smrg}; 15397ec681f3Smrgstatic_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 15407ec681f3Smrg 15417ec681f3Smrg/** 15427ec681f3Smrg * Vector Memory Typed-buffer Instructions 15437ec681f3Smrg * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant) 15447ec681f3Smrg * Operand(1): VADDR - Address source. Can carry an index and/or offset 15457ec681f3Smrg * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant) 15467ec681f3Smrg * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data 15477ec681f3Smrg * 15487ec681f3Smrg */ 15497ec681f3Smrgstruct MTBUF_instruction : public Instruction { 15507ec681f3Smrg memory_sync_info sync; 15517ec681f3Smrg uint8_t dfmt : 4; /* Data Format of data in memory buffer */ 15527ec681f3Smrg uint8_t nfmt : 3; /* Numeric format of data in memory */ 15537ec681f3Smrg bool offen : 1; /* Supply an offset from VGPR (VADDR) */ 15547ec681f3Smrg uint16_t idxen : 1; /* Supply an index from VGPR (VADDR) */ 15557ec681f3Smrg uint16_t glc : 1; /* globally coherent */ 15567ec681f3Smrg uint16_t dlc : 1; /* NAVI: device level coherent */ 15577ec681f3Smrg uint16_t slc : 1; /* system level coherent */ 15587ec681f3Smrg uint16_t tfe : 1; /* texture fail enable */ 15597ec681f3Smrg uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */ 15607ec681f3Smrg uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */ 15617ec681f3Smrg uint16_t padding : 4; 15627ec681f3Smrg uint16_t offset; /* Unsigned byte offset - 12 bit */ 15637ec681f3Smrg}; 15647ec681f3Smrgstatic_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 15657ec681f3Smrg 15667ec681f3Smrg/** 15677ec681f3Smrg * Vector Memory Image Instructions 15687ec681f3Smrg * Operand(0) SRSRC - Scalar GPR that specifies the resource constant. 15697ec681f3Smrg * Operand(1): SSAMP - Scalar GPR that specifies sampler constant. 15707ec681f3Smrg * Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1. 15717ec681f3Smrg * Operand(3): VADDR - Address source. Can carry an offset or an index. 15727ec681f3Smrg * Definition(0): VDATA - Vector GPR for read result. 15737ec681f3Smrg * 15747ec681f3Smrg */ 15757ec681f3Smrgstruct MIMG_instruction : public Instruction { 15767ec681f3Smrg memory_sync_info sync; 15777ec681f3Smrg uint8_t dmask; /* Data VGPR enable mask */ 15787ec681f3Smrg uint8_t dim : 3; /* NAVI: dimensionality */ 15797ec681f3Smrg bool unrm : 1; /* Force address to be un-normalized */ 15807ec681f3Smrg bool dlc : 1; /* NAVI: device level coherent */ 15817ec681f3Smrg bool glc : 1; /* globally coherent */ 15827ec681f3Smrg bool slc : 1; /* system level coherent */ 15837ec681f3Smrg bool tfe : 1; /* texture fail enable */ 15847ec681f3Smrg bool da : 1; /* declare an array */ 15857ec681f3Smrg bool lwe : 1; /* LOD warning enable */ 15867ec681f3Smrg bool r128 : 1; /* NAVI: Texture resource size */ 15877ec681f3Smrg bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */ 15887ec681f3Smrg bool d16 : 1; /* Convert 32-bit data to 16-bit data */ 15897ec681f3Smrg bool disable_wqm : 1; /* Require an exec mask without helper invocations */ 15907ec681f3Smrg uint8_t padding0 : 2; 15917ec681f3Smrg uint8_t padding1; 15927ec681f3Smrg uint8_t padding2; 15937ec681f3Smrg}; 15947ec681f3Smrgstatic_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 15957ec681f3Smrg 15967ec681f3Smrg/** 15977ec681f3Smrg * Flat/Scratch/Global Instructions 15987ec681f3Smrg * Operand(0): ADDR 15997ec681f3Smrg * Operand(1): SADDR 16007ec681f3Smrg * Operand(2) / Definition(0): DATA/VDST 16017ec681f3Smrg * 16027ec681f3Smrg */ 16037ec681f3Smrgstruct FLAT_instruction : public Instruction { 16047ec681f3Smrg memory_sync_info sync; 16057ec681f3Smrg bool slc : 1; /* system level coherent */ 16067ec681f3Smrg bool glc : 1; /* globally coherent */ 16077ec681f3Smrg bool dlc : 1; /* NAVI: device level coherent */ 16087ec681f3Smrg bool lds : 1; 16097ec681f3Smrg bool nv : 1; 16107ec681f3Smrg bool disable_wqm : 1; /* Require an exec mask without helper invocations */ 16117ec681f3Smrg uint8_t padding0 : 2; 16127ec681f3Smrg uint16_t offset; /* Vega/Navi only */ 16137ec681f3Smrg uint16_t padding1; 16147ec681f3Smrg}; 16157ec681f3Smrgstatic_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 16167ec681f3Smrg 16177ec681f3Smrgstruct Export_instruction : public Instruction { 16187ec681f3Smrg uint8_t enabled_mask; 16197ec681f3Smrg uint8_t dest; 16207ec681f3Smrg bool compressed : 1; 16217ec681f3Smrg bool done : 1; 16227ec681f3Smrg bool valid_mask : 1; 16237ec681f3Smrg uint8_t padding0 : 5; 16247ec681f3Smrg uint8_t padding1; 16257ec681f3Smrg}; 16267ec681f3Smrgstatic_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 16277ec681f3Smrg 16287ec681f3Smrgstruct Pseudo_instruction : public Instruction { 16297ec681f3Smrg PhysReg scratch_sgpr; /* might not be valid if it's not needed */ 16307ec681f3Smrg bool tmp_in_scc; 16317ec681f3Smrg uint8_t padding; 16327ec681f3Smrg}; 16337ec681f3Smrgstatic_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 16347ec681f3Smrg 16357ec681f3Smrgstruct Pseudo_branch_instruction : public Instruction { 16367ec681f3Smrg /* target[0] is the block index of the branch target. 16377ec681f3Smrg * For conditional branches, target[1] contains the fall-through alternative. 16387ec681f3Smrg * A value of 0 means the target has not been initialized (BB0 cannot be a branch target). 16397ec681f3Smrg */ 16407ec681f3Smrg uint32_t target[2]; 16417ec681f3Smrg}; 16427ec681f3Smrgstatic_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 16437ec681f3Smrg 16447ec681f3Smrgstruct Pseudo_barrier_instruction : public Instruction { 16457ec681f3Smrg memory_sync_info sync; 16467ec681f3Smrg sync_scope exec_scope; 16477ec681f3Smrg}; 16487ec681f3Smrgstatic_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 16497ec681f3Smrg 16507ec681f3Smrgenum ReduceOp : uint16_t { 16517ec681f3Smrg // clang-format off 16527ec681f3Smrg iadd8, iadd16, iadd32, iadd64, 16537ec681f3Smrg imul8, imul16, imul32, imul64, 16547ec681f3Smrg fadd16, fadd32, fadd64, 16557ec681f3Smrg fmul16, fmul32, fmul64, 16567ec681f3Smrg imin8, imin16, imin32, imin64, 16577ec681f3Smrg imax8, imax16, imax32, imax64, 16587ec681f3Smrg umin8, umin16, umin32, umin64, 16597ec681f3Smrg umax8, umax16, umax32, umax64, 16607ec681f3Smrg fmin16, fmin32, fmin64, 16617ec681f3Smrg fmax16, fmax32, fmax64, 16627ec681f3Smrg iand8, iand16, iand32, iand64, 16637ec681f3Smrg ior8, ior16, ior32, ior64, 16647ec681f3Smrg ixor8, ixor16, ixor32, ixor64, 16657ec681f3Smrg num_reduce_ops, 16667ec681f3Smrg // clang-format on 16677ec681f3Smrg}; 16687ec681f3Smrg 16697ec681f3Smrg/** 16707ec681f3Smrg * Subgroup Reduction Instructions, everything except for the data to be 16717ec681f3Smrg * reduced and the result as inserted by setup_reduce_temp(). 16727ec681f3Smrg * Operand(0): data to be reduced 16737ec681f3Smrg * Operand(1): reduce temporary 16747ec681f3Smrg * Operand(2): vector temporary 16757ec681f3Smrg * Definition(0): result 16767ec681f3Smrg * Definition(1): scalar temporary 16777ec681f3Smrg * Definition(2): scalar identity temporary (not used to store identity on GFX10) 16787ec681f3Smrg * Definition(3): scc clobber 16797ec681f3Smrg * Definition(4): vcc clobber 16807ec681f3Smrg * 16817ec681f3Smrg */ 16827ec681f3Smrgstruct Pseudo_reduction_instruction : public Instruction { 16837ec681f3Smrg ReduceOp reduce_op; 16847ec681f3Smrg uint16_t cluster_size; // must be 0 for scans 16857ec681f3Smrg}; 16867ec681f3Smrgstatic_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4, 16877ec681f3Smrg "Unexpected padding"); 16887ec681f3Smrg 16897ec681f3Smrgstruct instr_deleter_functor { 16907ec681f3Smrg void operator()(void* p) { free(p); } 16917ec681f3Smrg}; 16927ec681f3Smrg 16937ec681f3Smrgtemplate <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>; 16947ec681f3Smrg 16957ec681f3Smrgtemplate <typename T> 16967ec681f3SmrgT* 16977ec681f3Smrgcreate_instruction(aco_opcode opcode, Format format, uint32_t num_operands, 16987ec681f3Smrg uint32_t num_definitions) 16997ec681f3Smrg{ 17007ec681f3Smrg std::size_t size = 17017ec681f3Smrg sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition); 17027ec681f3Smrg char* data = (char*)calloc(1, size); 17037ec681f3Smrg T* inst = (T*)data; 17047ec681f3Smrg 17057ec681f3Smrg inst->opcode = opcode; 17067ec681f3Smrg inst->format = format; 17077ec681f3Smrg 17087ec681f3Smrg uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands; 17097ec681f3Smrg inst->operands = aco::span<Operand>(operands_offset, num_operands); 17107ec681f3Smrg uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions; 17117ec681f3Smrg inst->definitions = aco::span<Definition>(definitions_offset, num_definitions); 17127ec681f3Smrg 17137ec681f3Smrg return inst; 17147ec681f3Smrg} 17157ec681f3Smrg 17167ec681f3Smrgconstexpr bool 17177ec681f3SmrgInstruction::usesModifiers() const noexcept 17187ec681f3Smrg{ 17197ec681f3Smrg if (isDPP() || isSDWA()) 17207ec681f3Smrg return true; 17217ec681f3Smrg 17227ec681f3Smrg if (isVOP3P()) { 17237ec681f3Smrg const VOP3P_instruction& vop3p = this->vop3p(); 17247ec681f3Smrg for (unsigned i = 0; i < operands.size(); i++) { 17257ec681f3Smrg if (vop3p.neg_lo[i] || vop3p.neg_hi[i]) 17267ec681f3Smrg return true; 17277ec681f3Smrg 17287ec681f3Smrg /* opsel_hi must be 1 to not be considered a modifier - even for constants */ 17297ec681f3Smrg if (!(vop3p.opsel_hi & (1 << i))) 17307ec681f3Smrg return true; 17317ec681f3Smrg } 17327ec681f3Smrg return vop3p.opsel_lo || vop3p.clamp; 17337ec681f3Smrg } else if (isVOP3()) { 17347ec681f3Smrg const VOP3_instruction& vop3 = this->vop3(); 17357ec681f3Smrg for (unsigned i = 0; i < operands.size(); i++) { 17367ec681f3Smrg if (vop3.abs[i] || vop3.neg[i]) 17377ec681f3Smrg return true; 17387ec681f3Smrg } 17397ec681f3Smrg return vop3.opsel || vop3.clamp || vop3.omod; 17407ec681f3Smrg } 17417ec681f3Smrg return false; 17427ec681f3Smrg} 17437ec681f3Smrg 17447ec681f3Smrgconstexpr bool 17457ec681f3Smrgis_phi(Instruction* instr) 17467ec681f3Smrg{ 17477ec681f3Smrg return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi; 17487ec681f3Smrg} 17497ec681f3Smrg 17507ec681f3Smrgstatic inline bool 17517ec681f3Smrgis_phi(aco_ptr<Instruction>& instr) 17527ec681f3Smrg{ 17537ec681f3Smrg return is_phi(instr.get()); 17547ec681f3Smrg} 17557ec681f3Smrg 17567ec681f3Smrgmemory_sync_info get_sync_info(const Instruction* instr); 17577ec681f3Smrg 17587ec681f3Smrgbool is_dead(const std::vector<uint16_t>& uses, Instruction* instr); 17597ec681f3Smrg 17607ec681f3Smrgbool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high); 17617ec681f3Smrgbool instr_is_16bit(chip_class chip, aco_opcode op); 17627ec681f3Smrgbool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra); 17637ec681f3Smrgbool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra); 17647ec681f3Smrg/* updates "instr" and returns the old instruction (or NULL if no update was needed) */ 17657ec681f3Smrgaco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr); 17667ec681f3Smrgaco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr); 17677ec681f3Smrgbool needs_exec_mask(const Instruction* instr); 17687ec681f3Smrg 17697ec681f3Smrgaco_opcode get_ordered(aco_opcode op); 17707ec681f3Smrgaco_opcode get_unordered(aco_opcode op); 17717ec681f3Smrgaco_opcode get_inverse(aco_opcode op); 17727ec681f3Smrgaco_opcode get_f32_cmp(aco_opcode op); 17737ec681f3Smrgunsigned get_cmp_bitsize(aco_opcode op); 17747ec681f3Smrgbool is_cmp(aco_opcode op); 17757ec681f3Smrg 17767ec681f3Smrgbool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op); 17777ec681f3Smrg 17787ec681f3Smrguint32_t get_reduction_identity(ReduceOp op, unsigned idx); 17797ec681f3Smrg 17807ec681f3Smrgunsigned get_mimg_nsa_dwords(const Instruction* instr); 17817ec681f3Smrg 17827ec681f3Smrgbool should_form_clause(const Instruction* a, const Instruction* b); 17837ec681f3Smrg 17847ec681f3Smrgenum block_kind { 17857ec681f3Smrg /* uniform indicates that leaving this block, 17867ec681f3Smrg * all actives lanes stay active */ 17877ec681f3Smrg block_kind_uniform = 1 << 0, 17887ec681f3Smrg block_kind_top_level = 1 << 1, 17897ec681f3Smrg block_kind_loop_preheader = 1 << 2, 17907ec681f3Smrg block_kind_loop_header = 1 << 3, 17917ec681f3Smrg block_kind_loop_exit = 1 << 4, 17927ec681f3Smrg block_kind_continue = 1 << 5, 17937ec681f3Smrg block_kind_break = 1 << 6, 17947ec681f3Smrg block_kind_continue_or_break = 1 << 7, 17957ec681f3Smrg block_kind_discard = 1 << 8, 17967ec681f3Smrg block_kind_branch = 1 << 9, 17977ec681f3Smrg block_kind_merge = 1 << 10, 17987ec681f3Smrg block_kind_invert = 1 << 11, 17997ec681f3Smrg block_kind_uses_discard_if = 1 << 12, 18007ec681f3Smrg block_kind_needs_lowering = 1 << 13, 18017ec681f3Smrg block_kind_uses_demote = 1 << 14, 18027ec681f3Smrg block_kind_export_end = 1 << 15, 18037ec681f3Smrg}; 18047ec681f3Smrg 18057ec681f3Smrgstruct RegisterDemand { 18067ec681f3Smrg constexpr RegisterDemand() = default; 18077ec681f3Smrg constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {} 18087ec681f3Smrg int16_t vgpr = 0; 18097ec681f3Smrg int16_t sgpr = 0; 18107ec681f3Smrg 18117ec681f3Smrg constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept 18127ec681f3Smrg { 18137ec681f3Smrg return a.vgpr == b.vgpr && a.sgpr == b.sgpr; 18147ec681f3Smrg } 18157ec681f3Smrg 18167ec681f3Smrg constexpr bool exceeds(const RegisterDemand other) const noexcept 18177ec681f3Smrg { 18187ec681f3Smrg return vgpr > other.vgpr || sgpr > other.sgpr; 18197ec681f3Smrg } 18207ec681f3Smrg 18217ec681f3Smrg constexpr RegisterDemand operator+(const Temp t) const noexcept 18227ec681f3Smrg { 18237ec681f3Smrg if (t.type() == RegType::sgpr) 18247ec681f3Smrg return RegisterDemand(vgpr, sgpr + t.size()); 18257ec681f3Smrg else 18267ec681f3Smrg return RegisterDemand(vgpr + t.size(), sgpr); 18277ec681f3Smrg } 18287ec681f3Smrg 18297ec681f3Smrg constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept 18307ec681f3Smrg { 18317ec681f3Smrg return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr); 18327ec681f3Smrg } 18337ec681f3Smrg 18347ec681f3Smrg constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept 18357ec681f3Smrg { 18367ec681f3Smrg return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr); 18377ec681f3Smrg } 18387ec681f3Smrg 18397ec681f3Smrg constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept 18407ec681f3Smrg { 18417ec681f3Smrg vgpr += other.vgpr; 18427ec681f3Smrg sgpr += other.sgpr; 18437ec681f3Smrg return *this; 18447ec681f3Smrg } 18457ec681f3Smrg 18467ec681f3Smrg constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept 18477ec681f3Smrg { 18487ec681f3Smrg vgpr -= other.vgpr; 18497ec681f3Smrg sgpr -= other.sgpr; 18507ec681f3Smrg return *this; 18517ec681f3Smrg } 18527ec681f3Smrg 18537ec681f3Smrg constexpr RegisterDemand& operator+=(const Temp t) noexcept 18547ec681f3Smrg { 18557ec681f3Smrg if (t.type() == RegType::sgpr) 18567ec681f3Smrg sgpr += t.size(); 18577ec681f3Smrg else 18587ec681f3Smrg vgpr += t.size(); 18597ec681f3Smrg return *this; 18607ec681f3Smrg } 18617ec681f3Smrg 18627ec681f3Smrg constexpr RegisterDemand& operator-=(const Temp t) noexcept 18637ec681f3Smrg { 18647ec681f3Smrg if (t.type() == RegType::sgpr) 18657ec681f3Smrg sgpr -= t.size(); 18667ec681f3Smrg else 18677ec681f3Smrg vgpr -= t.size(); 18687ec681f3Smrg return *this; 18697ec681f3Smrg } 18707ec681f3Smrg 18717ec681f3Smrg constexpr void update(const RegisterDemand other) noexcept 18727ec681f3Smrg { 18737ec681f3Smrg vgpr = std::max(vgpr, other.vgpr); 18747ec681f3Smrg sgpr = std::max(sgpr, other.sgpr); 18757ec681f3Smrg } 18767ec681f3Smrg}; 18777ec681f3Smrg 18787ec681f3Smrg/* CFG */ 18797ec681f3Smrgstruct Block { 18807ec681f3Smrg float_mode fp_mode; 18817ec681f3Smrg unsigned index; 18827ec681f3Smrg unsigned offset = 0; 18837ec681f3Smrg std::vector<aco_ptr<Instruction>> instructions; 18847ec681f3Smrg std::vector<unsigned> logical_preds; 18857ec681f3Smrg std::vector<unsigned> linear_preds; 18867ec681f3Smrg std::vector<unsigned> logical_succs; 18877ec681f3Smrg std::vector<unsigned> linear_succs; 18887ec681f3Smrg RegisterDemand register_demand = RegisterDemand(); 18897ec681f3Smrg uint16_t loop_nest_depth = 0; 18907ec681f3Smrg uint16_t divergent_if_logical_depth = 0; 18917ec681f3Smrg uint16_t uniform_if_depth = 0; 18927ec681f3Smrg uint16_t kind = 0; 18937ec681f3Smrg int logical_idom = -1; 18947ec681f3Smrg int linear_idom = -1; 18957ec681f3Smrg 18967ec681f3Smrg /* this information is needed for predecessors to blocks with phis when 18977ec681f3Smrg * moving out of ssa */ 18987ec681f3Smrg bool scc_live_out = false; 18997ec681f3Smrg PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */ 19007ec681f3Smrg 19017ec681f3Smrg Block() : index(0) {} 19027ec681f3Smrg}; 19037ec681f3Smrg 19047ec681f3Smrg/* 19057ec681f3Smrg * Shader stages as provided in Vulkan by the application. Contrast this to HWStage. 19067ec681f3Smrg */ 19077ec681f3Smrgenum class SWStage : uint8_t { 19087ec681f3Smrg None = 0, 19097ec681f3Smrg VS = 1 << 0, /* Vertex Shader */ 19107ec681f3Smrg GS = 1 << 1, /* Geometry Shader */ 19117ec681f3Smrg TCS = 1 << 2, /* Tessellation Control aka Hull Shader */ 19127ec681f3Smrg TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */ 19137ec681f3Smrg FS = 1 << 4, /* Fragment aka Pixel Shader */ 19147ec681f3Smrg CS = 1 << 5, /* Compute Shader */ 19157ec681f3Smrg GSCopy = 1 << 6, /* GS Copy Shader (internal) */ 19167ec681f3Smrg 19177ec681f3Smrg /* Stage combinations merged to run on a single HWStage */ 19187ec681f3Smrg VS_GS = VS | GS, 19197ec681f3Smrg VS_TCS = VS | TCS, 19207ec681f3Smrg TES_GS = TES | GS, 19217ec681f3Smrg}; 19227ec681f3Smrg 19237ec681f3Smrgconstexpr SWStage 19247ec681f3Smrgoperator|(SWStage a, SWStage b) 19257ec681f3Smrg{ 19267ec681f3Smrg return static_cast<SWStage>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b)); 19277ec681f3Smrg} 19287ec681f3Smrg 19297ec681f3Smrg/* 19307ec681f3Smrg * Shader stages as running on the AMD GPU. 19317ec681f3Smrg * 19327ec681f3Smrg * The relation between HWStages and SWStages is not a one-to-one mapping: 19337ec681f3Smrg * Some SWStages are merged by ACO to run on a single HWStage. 19347ec681f3Smrg * See README.md for details. 19357ec681f3Smrg */ 19367ec681f3Smrgenum class HWStage : uint8_t { 19377ec681f3Smrg VS, 19387ec681f3Smrg ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */ 19397ec681f3Smrg GS, /* Geometry shader on GFX10/legacy and GFX6-9. */ 19407ec681f3Smrg NGG, /* Primitive shader, used to implement VS, TES, GS. */ 19417ec681f3Smrg LS, /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */ 19427ec681f3Smrg HS, /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */ 19437ec681f3Smrg FS, 19447ec681f3Smrg CS, 19457ec681f3Smrg}; 19467ec681f3Smrg 19477ec681f3Smrg/* 19487ec681f3Smrg * Set of SWStages to be merged into a single shader paired with the 19497ec681f3Smrg * HWStage it will run on. 19507ec681f3Smrg */ 19517ec681f3Smrgstruct Stage { 19527ec681f3Smrg constexpr Stage() = default; 19537ec681f3Smrg 19547ec681f3Smrg explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {} 19557ec681f3Smrg 19567ec681f3Smrg /* Check if the given SWStage is included */ 19577ec681f3Smrg constexpr bool has(SWStage stage) const 19587ec681f3Smrg { 19597ec681f3Smrg return (static_cast<uint8_t>(sw) & static_cast<uint8_t>(stage)); 19607ec681f3Smrg } 19617ec681f3Smrg 19627ec681f3Smrg unsigned num_sw_stages() const { return util_bitcount(static_cast<uint8_t>(sw)); } 19637ec681f3Smrg 19647ec681f3Smrg constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; } 19657ec681f3Smrg 19667ec681f3Smrg constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; } 19677ec681f3Smrg 19687ec681f3Smrg /* Mask of merged software stages */ 19697ec681f3Smrg SWStage sw = SWStage::None; 19707ec681f3Smrg 19717ec681f3Smrg /* Active hardware stage */ 19727ec681f3Smrg HWStage hw{}; 19737ec681f3Smrg}; 19747ec681f3Smrg 19757ec681f3Smrg/* possible settings of Program::stage */ 19767ec681f3Smrgstatic constexpr Stage vertex_vs(HWStage::VS, SWStage::VS); 19777ec681f3Smrgstatic constexpr Stage fragment_fs(HWStage::FS, SWStage::FS); 19787ec681f3Smrgstatic constexpr Stage compute_cs(HWStage::CS, SWStage::CS); 19797ec681f3Smrgstatic constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES); 19807ec681f3Smrgstatic constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy); 19817ec681f3Smrg/* GFX10/NGG */ 19827ec681f3Smrgstatic constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS); 19837ec681f3Smrgstatic constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS); 19847ec681f3Smrgstatic constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES); 19857ec681f3Smrgstatic constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS); 19867ec681f3Smrg/* GFX9 (and GFX10 if NGG isn't used) */ 19877ec681f3Smrgstatic constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS); 19887ec681f3Smrgstatic constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS); 19897ec681f3Smrgstatic constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS); 19907ec681f3Smrg/* pre-GFX9 */ 19917ec681f3Smrgstatic constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */ 19927ec681f3Smrgstatic constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */ 19937ec681f3Smrgstatic constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS); 19947ec681f3Smrgstatic constexpr Stage tess_eval_es(HWStage::ES, 19957ec681f3Smrg SWStage::TES); /* tesselation evaluation before geometry */ 19967ec681f3Smrgstatic constexpr Stage geometry_gs(HWStage::GS, SWStage::GS); 19977ec681f3Smrg 19987ec681f3Smrgenum statistic { 19997ec681f3Smrg statistic_hash, 20007ec681f3Smrg statistic_instructions, 20017ec681f3Smrg statistic_copies, 20027ec681f3Smrg statistic_branches, 20037ec681f3Smrg statistic_latency, 20047ec681f3Smrg statistic_inv_throughput, 20057ec681f3Smrg statistic_vmem_clauses, 20067ec681f3Smrg statistic_smem_clauses, 20077ec681f3Smrg statistic_sgpr_presched, 20087ec681f3Smrg statistic_vgpr_presched, 20097ec681f3Smrg num_statistics 20107ec681f3Smrg}; 20117ec681f3Smrg 20127ec681f3Smrgstruct DeviceInfo { 20137ec681f3Smrg uint16_t lds_encoding_granule; 20147ec681f3Smrg uint16_t lds_alloc_granule; 20157ec681f3Smrg uint32_t lds_limit; /* in bytes */ 20167ec681f3Smrg bool has_16bank_lds; 20177ec681f3Smrg uint16_t physical_sgprs; 20187ec681f3Smrg uint16_t physical_vgprs; 20197ec681f3Smrg uint16_t vgpr_limit; 20207ec681f3Smrg uint16_t sgpr_limit; 20217ec681f3Smrg uint16_t sgpr_alloc_granule; 20227ec681f3Smrg uint16_t vgpr_alloc_granule; /* must be power of two */ 20237ec681f3Smrg unsigned max_wave64_per_simd; 20247ec681f3Smrg unsigned simd_per_cu; 20257ec681f3Smrg bool has_fast_fma32 = false; 20267ec681f3Smrg bool xnack_enabled = false; 20277ec681f3Smrg bool sram_ecc_enabled = false; 20287ec681f3Smrg}; 20297ec681f3Smrg 20307ec681f3Smrgenum class CompilationProgress { 20317ec681f3Smrg after_isel, 20327ec681f3Smrg after_spilling, 20337ec681f3Smrg after_ra, 20347ec681f3Smrg}; 20357ec681f3Smrg 20367ec681f3Smrgclass Program final { 20377ec681f3Smrgpublic: 20387ec681f3Smrg std::vector<Block> blocks; 20397ec681f3Smrg std::vector<RegClass> temp_rc = {s1}; 20407ec681f3Smrg RegisterDemand max_reg_demand = RegisterDemand(); 20417ec681f3Smrg uint16_t num_waves = 0; 20427ec681f3Smrg uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */ 20437ec681f3Smrg ac_shader_config* config; 20447ec681f3Smrg const struct radv_shader_info* info; 20457ec681f3Smrg enum chip_class chip_class; 20467ec681f3Smrg enum radeon_family family; 20477ec681f3Smrg DeviceInfo dev; 20487ec681f3Smrg unsigned wave_size; 20497ec681f3Smrg RegClass lane_mask; 20507ec681f3Smrg Stage stage; 20517ec681f3Smrg bool needs_exact = false; /* there exists an instruction with disable_wqm = true */ 20527ec681f3Smrg bool needs_wqm = false; /* there exists a p_wqm instruction */ 20537ec681f3Smrg 20547ec681f3Smrg std::vector<uint8_t> constant_data; 20557ec681f3Smrg Temp private_segment_buffer; 20567ec681f3Smrg Temp scratch_offset; 20577ec681f3Smrg 20587ec681f3Smrg uint16_t min_waves = 0; 20597ec681f3Smrg unsigned workgroup_size; /* if known; otherwise UINT_MAX */ 20607ec681f3Smrg bool wgp_mode; 20617ec681f3Smrg bool early_rast = false; /* whether rasterization can start as soon as the 1st DONE pos export */ 20627ec681f3Smrg 20637ec681f3Smrg bool needs_vcc = false; 20647ec681f3Smrg bool needs_flat_scr = false; 20657ec681f3Smrg 20667ec681f3Smrg CompilationProgress progress; 20677ec681f3Smrg 20687ec681f3Smrg bool collect_statistics = false; 20697ec681f3Smrg uint32_t statistics[num_statistics]; 20707ec681f3Smrg 20717ec681f3Smrg float_mode next_fp_mode; 20727ec681f3Smrg unsigned next_loop_depth = 0; 20737ec681f3Smrg unsigned next_divergent_if_logical_depth = 0; 20747ec681f3Smrg unsigned next_uniform_if_depth = 0; 20757ec681f3Smrg 20767ec681f3Smrg std::vector<Definition> vs_inputs; 20777ec681f3Smrg 20787ec681f3Smrg struct { 20797ec681f3Smrg FILE* output = stderr; 20807ec681f3Smrg bool shorten_messages = false; 20817ec681f3Smrg void (*func)(void* private_data, enum radv_compiler_debug_level level, const char* message); 20827ec681f3Smrg void* private_data; 20837ec681f3Smrg } debug; 20847ec681f3Smrg 20857ec681f3Smrg uint32_t allocateId(RegClass rc) 20867ec681f3Smrg { 20877ec681f3Smrg assert(allocationID <= 16777215); 20887ec681f3Smrg temp_rc.push_back(rc); 20897ec681f3Smrg return allocationID++; 20907ec681f3Smrg } 20917ec681f3Smrg 20927ec681f3Smrg void allocateRange(unsigned amount) 20937ec681f3Smrg { 20947ec681f3Smrg assert(allocationID + amount <= 16777216); 20957ec681f3Smrg temp_rc.resize(temp_rc.size() + amount); 20967ec681f3Smrg allocationID += amount; 20977ec681f3Smrg } 20987ec681f3Smrg 20997ec681f3Smrg Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); } 21007ec681f3Smrg 21017ec681f3Smrg uint32_t peekAllocationId() { return allocationID; } 21027ec681f3Smrg 21037ec681f3Smrg friend void reindex_ssa(Program* program); 21047ec681f3Smrg friend void reindex_ssa(Program* program, std::vector<IDSet>& live_out); 21057ec681f3Smrg 21067ec681f3Smrg Block* create_and_insert_block() 21077ec681f3Smrg { 21087ec681f3Smrg Block block; 21097ec681f3Smrg return insert_block(std::move(block)); 21107ec681f3Smrg } 21117ec681f3Smrg 21127ec681f3Smrg Block* insert_block(Block&& block) 21137ec681f3Smrg { 21147ec681f3Smrg block.index = blocks.size(); 21157ec681f3Smrg block.fp_mode = next_fp_mode; 21167ec681f3Smrg block.loop_nest_depth = next_loop_depth; 21177ec681f3Smrg block.divergent_if_logical_depth = next_divergent_if_logical_depth; 21187ec681f3Smrg block.uniform_if_depth = next_uniform_if_depth; 21197ec681f3Smrg blocks.emplace_back(std::move(block)); 21207ec681f3Smrg return &blocks.back(); 21217ec681f3Smrg } 21227ec681f3Smrg 21237ec681f3Smrgprivate: 21247ec681f3Smrg uint32_t allocationID = 1; 21257ec681f3Smrg}; 21267ec681f3Smrg 21277ec681f3Smrgstruct live { 21287ec681f3Smrg /* live temps out per block */ 21297ec681f3Smrg std::vector<IDSet> live_out; 21307ec681f3Smrg /* register demand (sgpr/vgpr) per instruction per block */ 21317ec681f3Smrg std::vector<std::vector<RegisterDemand>> register_demand; 21327ec681f3Smrg}; 21337ec681f3Smrg 21347ec681f3Smrgstruct ra_test_policy { 21357ec681f3Smrg /* Force RA to always use its pessimistic fallback algorithm */ 21367ec681f3Smrg bool skip_optimistic_path = false; 21377ec681f3Smrg}; 21387ec681f3Smrg 21397ec681f3Smrgvoid init(); 21407ec681f3Smrg 21417ec681f3Smrgvoid init_program(Program* program, Stage stage, const struct radv_shader_info* info, 21427ec681f3Smrg enum chip_class chip_class, enum radeon_family family, bool wgp_mode, 21437ec681f3Smrg ac_shader_config* config); 21447ec681f3Smrg 21457ec681f3Smrgvoid select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders, 21467ec681f3Smrg ac_shader_config* config, const struct radv_shader_args* args); 21477ec681f3Smrgvoid select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config, 21487ec681f3Smrg const struct radv_shader_args* args); 21497ec681f3Smrgvoid select_trap_handler_shader(Program* program, struct nir_shader* shader, 21507ec681f3Smrg ac_shader_config* config, const struct radv_shader_args* args); 21517ec681f3Smrgvoid select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, 21527ec681f3Smrg ac_shader_config* config, const struct radv_shader_args* args, 21537ec681f3Smrg unsigned* num_preserved_sgprs); 21547ec681f3Smrg 21557ec681f3Smrgvoid lower_phis(Program* program); 21567ec681f3Smrgvoid calc_min_waves(Program* program); 21577ec681f3Smrgvoid update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand); 21587ec681f3Smrglive live_var_analysis(Program* program); 21597ec681f3Smrgstd::vector<uint16_t> dead_code_analysis(Program* program); 21607ec681f3Smrgvoid dominator_tree(Program* program); 21617ec681f3Smrgvoid insert_exec_mask(Program* program); 21627ec681f3Smrgvoid value_numbering(Program* program); 21637ec681f3Smrgvoid optimize(Program* program); 21647ec681f3Smrgvoid optimize_postRA(Program* program); 21657ec681f3Smrgvoid setup_reduce_temp(Program* program); 21667ec681f3Smrgvoid lower_to_cssa(Program* program, live& live_vars); 21677ec681f3Smrgvoid register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, 21687ec681f3Smrg ra_test_policy = {}); 21697ec681f3Smrgvoid ssa_elimination(Program* program); 21707ec681f3Smrgvoid lower_to_hw_instr(Program* program); 21717ec681f3Smrgvoid schedule_program(Program* program, live& live_vars); 21727ec681f3Smrgvoid spill(Program* program, live& live_vars); 21737ec681f3Smrgvoid insert_wait_states(Program* program); 21747ec681f3Smrgvoid insert_NOPs(Program* program); 21757ec681f3Smrgvoid form_hard_clauses(Program* program); 21767ec681f3Smrgunsigned emit_program(Program* program, std::vector<uint32_t>& code); 21777ec681f3Smrg/** 21787ec681f3Smrg * Returns true if print_asm can disassemble the given program for the current build/runtime 21797ec681f3Smrg * configuration 21807ec681f3Smrg */ 21817ec681f3Smrgbool check_print_asm_support(Program* program); 21827ec681f3Smrgbool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output); 21837ec681f3Smrgbool validate_ir(Program* program); 21847ec681f3Smrgbool validate_ra(Program* program); 21857ec681f3Smrg#ifndef NDEBUG 21867ec681f3Smrgvoid perfwarn(Program* program, bool cond, const char* msg, Instruction* instr = NULL); 21877ec681f3Smrg#else 21887ec681f3Smrg#define perfwarn(program, cond, msg, ...) \ 21897ec681f3Smrg do { \ 21907ec681f3Smrg } while (0) 21917ec681f3Smrg#endif 21927ec681f3Smrg 21937ec681f3Smrgvoid collect_presched_stats(Program* program); 21947ec681f3Smrgvoid collect_preasm_stats(Program* program); 21957ec681f3Smrgvoid collect_postasm_stats(Program* program, const std::vector<uint32_t>& code); 21967ec681f3Smrg 21977ec681f3Smrgenum print_flags { 21987ec681f3Smrg print_no_ssa = 0x1, 21997ec681f3Smrg print_perf_info = 0x2, 22007ec681f3Smrg print_kill = 0x4, 22017ec681f3Smrg print_live_vars = 0x8, 22027ec681f3Smrg}; 22037ec681f3Smrg 22047ec681f3Smrgvoid aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0); 22057ec681f3Smrgvoid aco_print_instr(const Instruction* instr, FILE* output, unsigned flags = 0); 22067ec681f3Smrgvoid aco_print_program(const Program* program, FILE* output, unsigned flags = 0); 22077ec681f3Smrgvoid aco_print_program(const Program* program, FILE* output, const live& live_vars, 22087ec681f3Smrg unsigned flags = 0); 22097ec681f3Smrg 22107ec681f3Smrgvoid _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...); 22117ec681f3Smrgvoid _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...); 22127ec681f3Smrg 22137ec681f3Smrg#define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__) 22147ec681f3Smrg#define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__) 22157ec681f3Smrg 22167ec681f3Smrg/* utilities for dealing with register demand */ 22177ec681f3SmrgRegisterDemand get_live_changes(aco_ptr<Instruction>& instr); 22187ec681f3SmrgRegisterDemand get_temp_registers(aco_ptr<Instruction>& instr); 22197ec681f3SmrgRegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, 22207ec681f3Smrg aco_ptr<Instruction>& instr_before); 22217ec681f3Smrg 22227ec681f3Smrg/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */ 22237ec681f3Smrguint16_t get_extra_sgprs(Program* program); 22247ec681f3Smrg 22257ec681f3Smrg/* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */ 22267ec681f3Smrguint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs); 22277ec681f3Smrguint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs); 22287ec681f3Smrg 22297ec681f3Smrg/* return number of addressable sgprs/vgprs for max_waves */ 22307ec681f3Smrguint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves); 22317ec681f3Smrguint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves); 22327ec681f3Smrg 22337ec681f3Smrgtypedef struct { 22347ec681f3Smrg const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)]; 22357ec681f3Smrg const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)]; 22367ec681f3Smrg const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)]; 22377ec681f3Smrg const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers; 22387ec681f3Smrg const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers; 22397ec681f3Smrg const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic; 22407ec681f3Smrg const char* name[static_cast<int>(aco_opcode::num_opcodes)]; 22417ec681f3Smrg const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)]; 22427ec681f3Smrg /* sizes used for input/output modifiers and constants */ 22437ec681f3Smrg const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)]; 22447ec681f3Smrg const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)]; 22457ec681f3Smrg} Info; 22467ec681f3Smrg 22477ec681f3Smrgextern const Info instr_info; 22487ec681f3Smrg 22497ec681f3Smrg} // namespace aco 22507ec681f3Smrg 22517ec681f3Smrg#endif /* ACO_IR_H */ 2252