17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2020 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg */
247ec681f3Smrg#include "helpers.h"
257ec681f3Smrg
267ec681f3Smrgusing namespace aco;
277ec681f3Smrg
287ec681f3SmrgBEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
297ec681f3Smrg   /* Registers of operands should be "recycled" for the output. But if the
307ec681f3Smrg    * input is smaller than the output, that's not generally possible. The
317ec681f3Smrg    * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0
327ec681f3Smrg    * while the lower 16 bits are still live, so the output must be stored in
337ec681f3Smrg    * a register other than v0. For the second v_cvt_f32_f16, the original
347ec681f3Smrg    * value stored in v0 is no longer used and hence it's safe to store the
357ec681f3Smrg    * result in v0.
367ec681f3Smrg    */
377ec681f3Smrg
387ec681f3Smrg   for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) {
397ec681f3Smrg      for (bool pessimistic : { false, true }) {
407ec681f3Smrg         const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
417ec681f3Smrg
427ec681f3Smrg         //>> v1: %_:v[#a] = p_startpgm
437ec681f3Smrg         if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant))
447ec681f3Smrg            return;
457ec681f3Smrg
467ec681f3Smrg         //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
477ec681f3Smrg         Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
487ec681f3Smrg
497ec681f3Smrg         //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
507ec681f3Smrg         //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
517ec681f3Smrg         //; success = (b != a)
527ec681f3Smrg         auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());
537ec681f3Smrg         auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());
547ec681f3Smrg         writeout(0, result1);
557ec681f3Smrg         writeout(1, result2);
567ec681f3Smrg
577ec681f3Smrg         finish_ra_test(ra_test_policy { pessimistic });
587ec681f3Smrg      }
597ec681f3Smrg   }
607ec681f3SmrgEND_TEST
617ec681f3Smrg
627ec681f3SmrgBEGIN_TEST(regalloc.32bit_partial_write)
637ec681f3Smrg   //>> v1: %_:v[0] = p_startpgm
647ec681f3Smrg   if (!setup_cs("v1", GFX10))
657ec681f3Smrg      return;
667ec681f3Smrg
677ec681f3Smrg   /* ensure high 16 bits are occupied */
687ec681f3Smrg   //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
697ec681f3Smrg   Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
707ec681f3Smrg
717ec681f3Smrg   /* This test checks if this instruction uses SDWA. */
727ec681f3Smrg   //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
737ec681f3Smrg   Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());
747ec681f3Smrg
757ec681f3Smrg   //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
767ec681f3Smrg   bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
777ec681f3Smrg
787ec681f3Smrg   finish_ra_test(ra_test_policy());
797ec681f3SmrgEND_TEST
807ec681f3Smrg
817ec681f3SmrgBEGIN_TEST(regalloc.precolor.swap)
827ec681f3Smrg   //>> s2: %op0:s[0-1] = p_startpgm
837ec681f3Smrg   if (!setup_cs("s2", GFX10))
847ec681f3Smrg      return;
857ec681f3Smrg
867ec681f3Smrg   program->dev.sgpr_limit = 4;
877ec681f3Smrg
887ec681f3Smrg   //! s2: %op1:s[2-3] = p_unit_test
897ec681f3Smrg   Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
907ec681f3Smrg
917ec681f3Smrg   //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
927ec681f3Smrg   //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
937ec681f3Smrg   Operand op(inputs[0]);
947ec681f3Smrg   op.setFixed(PhysReg(2));
957ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, op, op1);
967ec681f3Smrg
977ec681f3Smrg   finish_ra_test(ra_test_policy());
987ec681f3SmrgEND_TEST
997ec681f3Smrg
1007ec681f3SmrgBEGIN_TEST(regalloc.precolor.blocking_vector)
1017ec681f3Smrg   //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm
1027ec681f3Smrg   if (!setup_cs("s2 s1", GFX10))
1037ec681f3Smrg      return;
1047ec681f3Smrg
1057ec681f3Smrg   //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]
1067ec681f3Smrg   //! p_unit_test %tmp1_2:s[1]
1077ec681f3Smrg   Operand op(inputs[1]);
1087ec681f3Smrg   op.setFixed(PhysReg(1));
1097ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, op);
1107ec681f3Smrg
1117ec681f3Smrg   //! p_unit_test %tmp0_2:s[2-3]
1127ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, inputs[0]);
1137ec681f3Smrg
1147ec681f3Smrg   finish_ra_test(ra_test_policy());
1157ec681f3SmrgEND_TEST
1167ec681f3Smrg
1177ec681f3SmrgBEGIN_TEST(regalloc.precolor.vector.test)
1187ec681f3Smrg   //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
1197ec681f3Smrg   if (!setup_cs("s2 s1 s1", GFX10))
1207ec681f3Smrg      return;
1217ec681f3Smrg
1227ec681f3Smrg   //! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]
1237ec681f3Smrg   //! p_unit_test %tmp0_2:s[2-3]
1247ec681f3Smrg   Operand op(inputs[0]);
1257ec681f3Smrg   op.setFixed(PhysReg(2));
1267ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, op);
1277ec681f3Smrg
1287ec681f3Smrg   //! p_unit_test %tmp2_2:s[0]
1297ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, inputs[2]);
1307ec681f3Smrg
1317ec681f3Smrg   finish_ra_test(ra_test_policy());
1327ec681f3SmrgEND_TEST
1337ec681f3Smrg
1347ec681f3SmrgBEGIN_TEST(regalloc.precolor.vector.collect)
1357ec681f3Smrg   //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
1367ec681f3Smrg   if (!setup_cs("s2 s1 s1", GFX10))
1377ec681f3Smrg      return;
1387ec681f3Smrg
1397ec681f3Smrg   //! s1: %tmp2_2:s[0], s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp1:s[2], %tmp0:s[0-1]
1407ec681f3Smrg   //! p_unit_test %tmp0_2:s[2-3]
1417ec681f3Smrg   Operand op(inputs[0]);
1427ec681f3Smrg   op.setFixed(PhysReg(2));
1437ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, op);
1447ec681f3Smrg
1457ec681f3Smrg   //! p_unit_test %tmp1_2:s[1], %tmp2_2:s[0]
1467ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);
1477ec681f3Smrg
1487ec681f3Smrg   finish_ra_test(ra_test_policy());
1497ec681f3SmrgEND_TEST
1507ec681f3Smrg
1517ec681f3SmrgBEGIN_TEST(regalloc.scratch_sgpr.create_vector)
1527ec681f3Smrg   if (!setup_cs("v1 s1", GFX7))
1537ec681f3Smrg      return;
1547ec681f3Smrg
1557ec681f3Smrg   Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero());
1567ec681f3Smrg
1577ec681f3Smrg   //>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
1587ec681f3Smrg   //! s1: %0:s[1] = s_mov_b32 0x1000001
1597ec681f3Smrg   //! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8]
1607ec681f3Smrg   bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp));
1617ec681f3Smrg
1627ec681f3Smrg   //! p_unit_test %_:s[0]
1637ec681f3Smrg   //! s_endpgm
1647ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
1657ec681f3Smrg
1667ec681f3Smrg   finish_ra_test(ra_test_policy(), true);
1677ec681f3SmrgEND_TEST
1687ec681f3Smrg
1697ec681f3SmrgBEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand)
1707ec681f3Smrg   if (!setup_cs("v2 s1", GFX7))
1717ec681f3Smrg      return;
1727ec681f3Smrg
1737ec681f3Smrg   Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u));
1747ec681f3Smrg
1757ec681f3Smrg   //>> v1: %0:v[0] = v_mov_b32 %_:s[0]
1767ec681f3Smrg   //! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24]
1777ec681f3Smrg   //! s1: %0:s[1] = s_mov_b32 0x1000001
1787ec681f3Smrg   //! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8]
1797ec681f3Smrg   bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp));
1807ec681f3Smrg
1817ec681f3Smrg   //! p_unit_test %_:s[0]
1827ec681f3Smrg   //! s_endpgm
1837ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
1847ec681f3Smrg
1857ec681f3Smrg   finish_ra_test(ra_test_policy(), true);
1867ec681f3SmrgEND_TEST
1877ec681f3Smrg
1887ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.fixed_def)
1897ec681f3Smrg   //>> p_startpgm
1907ec681f3Smrg   if (!setup_cs("", GFX10))
1917ec681f3Smrg      return;
1927ec681f3Smrg
1937ec681f3Smrg   PhysReg reg_v0{256};
1947ec681f3Smrg   PhysReg reg_v1{257};
1957ec681f3Smrg
1967ec681f3Smrg   //! lv1: %tmp1:v[0] = p_unit_test
1977ec681f3Smrg   Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
1987ec681f3Smrg
1997ec681f3Smrg   //! lv1: %tmp2:v[1] = p_parallelcopy %tmp1:v[0]
2007ec681f3Smrg   //! v1: %_:v[0] = p_unit_test
2017ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, Definition(reg_v0, v1));
2027ec681f3Smrg
2037ec681f3Smrg   //! p_unit_test %tmp2:v[1]
2047ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, tmp);
2057ec681f3Smrg
2067ec681f3Smrg   finish_ra_test(ra_test_policy());
2077ec681f3SmrgEND_TEST
2087ec681f3Smrg
2097ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
2107ec681f3Smrg   //>> p_startpgm
2117ec681f3Smrg   if (!setup_cs("", GFX10))
2127ec681f3Smrg      return;
2137ec681f3Smrg
2147ec681f3Smrg   program->dev.vgpr_limit = 3;
2157ec681f3Smrg
2167ec681f3Smrg   PhysReg reg_v1{257};
2177ec681f3Smrg
2187ec681f3Smrg   //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
2197ec681f3Smrg   Temp s0_tmp = bld.tmp(s1);
2207ec681f3Smrg   Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
2217ec681f3Smrg
2227ec681f3Smrg   //! lv1: %tmp1:v[1] = p_unit_test
2237ec681f3Smrg   Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
2247ec681f3Smrg
2257ec681f3Smrg   //! lv1: %tmp2:v[2] = p_parallelcopy %tmp1:v[1]
2267ec681f3Smrg   //! v2: %_:v[0-1] = p_unit_test
2277ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, bld.def(v2));
2287ec681f3Smrg
2297ec681f3Smrg   //! p_unit_test %tmp2:v[2], %scc_tmp:scc, %1:s[0]
2307ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, tmp, scc_tmp, s0_tmp);
2317ec681f3Smrg
2327ec681f3Smrg   finish_ra_test(ra_test_policy());
2337ec681f3Smrg
2347ec681f3Smrg   //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
2357ec681f3Smrg   Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
2367ec681f3Smrg   aco_print_instr(&parallelcopy, output);
2377ec681f3Smrg   fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
2387ec681f3SmrgEND_TEST
2397ec681f3Smrg
2407ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
2417ec681f3Smrg   //>> p_startpgm
2427ec681f3Smrg   if (!setup_cs("", GFX10))
2437ec681f3Smrg      return;
2447ec681f3Smrg
2457ec681f3Smrg   program->dev.vgpr_limit = 6;
2467ec681f3Smrg
2477ec681f3Smrg   PhysReg reg_v2{258};
2487ec681f3Smrg   PhysReg reg_v4{260};
2497ec681f3Smrg
2507ec681f3Smrg   //! lv1: %lin_tmp1:v[4] = p_unit_test
2517ec681f3Smrg   Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v4));
2527ec681f3Smrg   //! v2: %log_tmp1:v[2-3] = p_unit_test
2537ec681f3Smrg   Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, reg_v2));
2547ec681f3Smrg
2557ec681f3Smrg   //! lv1: %lin_tmp2:v[0], v2: %log_tmp2:v[4-5] = p_parallelcopy %lin_tmp1:v[4], %log_tmp1:v[2-3]
2567ec681f3Smrg   //! v3: %_:v[1-3] = p_unit_test
2577ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, bld.def(v3));
2587ec681f3Smrg
2597ec681f3Smrg   //! p_unit_test %log_tmp2:v[4-5], %lin_tmp2:v[0]
2607ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, log_tmp, lin_tmp);
2617ec681f3Smrg
2627ec681f3Smrg   finish_ra_test(ra_test_policy());
2637ec681f3SmrgEND_TEST
2647ec681f3Smrg
2657ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_create_vector)
2667ec681f3Smrg   //>> p_startpgm
2677ec681f3Smrg   if (!setup_cs("", GFX10))
2687ec681f3Smrg      return;
2697ec681f3Smrg
2707ec681f3Smrg   program->dev.vgpr_limit = 4;
2717ec681f3Smrg
2727ec681f3Smrg   PhysReg reg_v0{256};
2737ec681f3Smrg   PhysReg reg_v1{257};
2747ec681f3Smrg
2757ec681f3Smrg   //! lv1: %lin_tmp1:v[0] = p_unit_test
2767ec681f3Smrg   Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
2777ec681f3Smrg   //! v1: %log_tmp:v[1] = p_unit_test
2787ec681f3Smrg   Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, reg_v1));
2797ec681f3Smrg
2807ec681f3Smrg   //! lv1: %lin_tmp2:v[2] = p_parallelcopy %lin_tmp1:v[0]
2817ec681f3Smrg   //! v2: %_:v[0-1] = p_create_vector v1: undef, %log_tmp:v[1]
2827ec681f3Smrg   bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(v1), log_tmp);
2837ec681f3Smrg
2847ec681f3Smrg   //! p_unit_test %lin_tmp2:v[2]
2857ec681f3Smrg   bld.pseudo(aco_opcode::p_unit_test, lin_tmp);
2867ec681f3Smrg
2877ec681f3Smrg   finish_ra_test(ra_test_policy());
2887ec681f3SmrgEND_TEST
289