17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2020 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg */ 247ec681f3Smrg#include "helpers.h" 257ec681f3Smrg 267ec681f3Smrgusing namespace aco; 277ec681f3Smrg 287ec681f3SmrgBEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands) 297ec681f3Smrg /* Registers of operands should be "recycled" for the output. But if the 307ec681f3Smrg * input is smaller than the output, that's not generally possible. The 317ec681f3Smrg * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0 327ec681f3Smrg * while the lower 16 bits are still live, so the output must be stored in 337ec681f3Smrg * a register other than v0. For the second v_cvt_f32_f16, the original 347ec681f3Smrg * value stored in v0 is no longer used and hence it's safe to store the 357ec681f3Smrg * result in v0. 367ec681f3Smrg */ 377ec681f3Smrg 387ec681f3Smrg for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) { 397ec681f3Smrg for (bool pessimistic : { false, true }) { 407ec681f3Smrg const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic"; 417ec681f3Smrg 427ec681f3Smrg //>> v1: %_:v[#a] = p_startpgm 437ec681f3Smrg if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant)) 447ec681f3Smrg return; 457ec681f3Smrg 467ec681f3Smrg //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a] 477ec681f3Smrg Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]); 487ec681f3Smrg 497ec681f3Smrg //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1 507ec681f3Smrg //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16] 517ec681f3Smrg //; success = (b != a) 527ec681f3Smrg auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp()); 537ec681f3Smrg auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp()); 547ec681f3Smrg writeout(0, result1); 557ec681f3Smrg writeout(1, result2); 567ec681f3Smrg 577ec681f3Smrg finish_ra_test(ra_test_policy { pessimistic }); 587ec681f3Smrg } 597ec681f3Smrg } 607ec681f3SmrgEND_TEST 617ec681f3Smrg 627ec681f3SmrgBEGIN_TEST(regalloc.32bit_partial_write) 637ec681f3Smrg //>> v1: %_:v[0] = p_startpgm 647ec681f3Smrg if (!setup_cs("v1", GFX10)) 657ec681f3Smrg return; 667ec681f3Smrg 677ec681f3Smrg /* ensure high 16 bits are occupied */ 687ec681f3Smrg //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0] 697ec681f3Smrg Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp(); 707ec681f3Smrg 717ec681f3Smrg /* This test checks if this instruction uses SDWA. */ 727ec681f3Smrg //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword 737ec681f3Smrg Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero()); 747ec681f3Smrg 757ec681f3Smrg //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32] 767ec681f3Smrg bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi); 777ec681f3Smrg 787ec681f3Smrg finish_ra_test(ra_test_policy()); 797ec681f3SmrgEND_TEST 807ec681f3Smrg 817ec681f3SmrgBEGIN_TEST(regalloc.precolor.swap) 827ec681f3Smrg //>> s2: %op0:s[0-1] = p_startpgm 837ec681f3Smrg if (!setup_cs("s2", GFX10)) 847ec681f3Smrg return; 857ec681f3Smrg 867ec681f3Smrg program->dev.sgpr_limit = 4; 877ec681f3Smrg 887ec681f3Smrg //! s2: %op1:s[2-3] = p_unit_test 897ec681f3Smrg Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2)); 907ec681f3Smrg 917ec681f3Smrg //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1] 927ec681f3Smrg //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1] 937ec681f3Smrg Operand op(inputs[0]); 947ec681f3Smrg op.setFixed(PhysReg(2)); 957ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, op, op1); 967ec681f3Smrg 977ec681f3Smrg finish_ra_test(ra_test_policy()); 987ec681f3SmrgEND_TEST 997ec681f3Smrg 1007ec681f3SmrgBEGIN_TEST(regalloc.precolor.blocking_vector) 1017ec681f3Smrg //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm 1027ec681f3Smrg if (!setup_cs("s2 s1", GFX10)) 1037ec681f3Smrg return; 1047ec681f3Smrg 1057ec681f3Smrg //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2] 1067ec681f3Smrg //! p_unit_test %tmp1_2:s[1] 1077ec681f3Smrg Operand op(inputs[1]); 1087ec681f3Smrg op.setFixed(PhysReg(1)); 1097ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, op); 1107ec681f3Smrg 1117ec681f3Smrg //! p_unit_test %tmp0_2:s[2-3] 1127ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, inputs[0]); 1137ec681f3Smrg 1147ec681f3Smrg finish_ra_test(ra_test_policy()); 1157ec681f3SmrgEND_TEST 1167ec681f3Smrg 1177ec681f3SmrgBEGIN_TEST(regalloc.precolor.vector.test) 1187ec681f3Smrg //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm 1197ec681f3Smrg if (!setup_cs("s2 s1 s1", GFX10)) 1207ec681f3Smrg return; 1217ec681f3Smrg 1227ec681f3Smrg //! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1] 1237ec681f3Smrg //! p_unit_test %tmp0_2:s[2-3] 1247ec681f3Smrg Operand op(inputs[0]); 1257ec681f3Smrg op.setFixed(PhysReg(2)); 1267ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, op); 1277ec681f3Smrg 1287ec681f3Smrg //! p_unit_test %tmp2_2:s[0] 1297ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, inputs[2]); 1307ec681f3Smrg 1317ec681f3Smrg finish_ra_test(ra_test_policy()); 1327ec681f3SmrgEND_TEST 1337ec681f3Smrg 1347ec681f3SmrgBEGIN_TEST(regalloc.precolor.vector.collect) 1357ec681f3Smrg //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm 1367ec681f3Smrg if (!setup_cs("s2 s1 s1", GFX10)) 1377ec681f3Smrg return; 1387ec681f3Smrg 1397ec681f3Smrg //! s1: %tmp2_2:s[0], s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp1:s[2], %tmp0:s[0-1] 1407ec681f3Smrg //! p_unit_test %tmp0_2:s[2-3] 1417ec681f3Smrg Operand op(inputs[0]); 1427ec681f3Smrg op.setFixed(PhysReg(2)); 1437ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, op); 1447ec681f3Smrg 1457ec681f3Smrg //! p_unit_test %tmp1_2:s[1], %tmp2_2:s[0] 1467ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]); 1477ec681f3Smrg 1487ec681f3Smrg finish_ra_test(ra_test_policy()); 1497ec681f3SmrgEND_TEST 1507ec681f3Smrg 1517ec681f3SmrgBEGIN_TEST(regalloc.scratch_sgpr.create_vector) 1527ec681f3Smrg if (!setup_cs("v1 s1", GFX7)) 1537ec681f3Smrg return; 1547ec681f3Smrg 1557ec681f3Smrg Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero()); 1567ec681f3Smrg 1577ec681f3Smrg //>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] 1587ec681f3Smrg //! s1: %0:s[1] = s_mov_b32 0x1000001 1597ec681f3Smrg //! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8] 1607ec681f3Smrg bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp)); 1617ec681f3Smrg 1627ec681f3Smrg //! p_unit_test %_:s[0] 1637ec681f3Smrg //! s_endpgm 1647ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, inputs[1]); 1657ec681f3Smrg 1667ec681f3Smrg finish_ra_test(ra_test_policy(), true); 1677ec681f3SmrgEND_TEST 1687ec681f3Smrg 1697ec681f3SmrgBEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand) 1707ec681f3Smrg if (!setup_cs("v2 s1", GFX7)) 1717ec681f3Smrg return; 1727ec681f3Smrg 1737ec681f3Smrg Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u)); 1747ec681f3Smrg 1757ec681f3Smrg //>> v1: %0:v[0] = v_mov_b32 %_:s[0] 1767ec681f3Smrg //! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24] 1777ec681f3Smrg //! s1: %0:s[1] = s_mov_b32 0x1000001 1787ec681f3Smrg //! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8] 1797ec681f3Smrg bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp)); 1807ec681f3Smrg 1817ec681f3Smrg //! p_unit_test %_:s[0] 1827ec681f3Smrg //! s_endpgm 1837ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, inputs[1]); 1847ec681f3Smrg 1857ec681f3Smrg finish_ra_test(ra_test_policy(), true); 1867ec681f3SmrgEND_TEST 1877ec681f3Smrg 1887ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.fixed_def) 1897ec681f3Smrg //>> p_startpgm 1907ec681f3Smrg if (!setup_cs("", GFX10)) 1917ec681f3Smrg return; 1927ec681f3Smrg 1937ec681f3Smrg PhysReg reg_v0{256}; 1947ec681f3Smrg PhysReg reg_v1{257}; 1957ec681f3Smrg 1967ec681f3Smrg //! lv1: %tmp1:v[0] = p_unit_test 1977ec681f3Smrg Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0)); 1987ec681f3Smrg 1997ec681f3Smrg //! lv1: %tmp2:v[1] = p_parallelcopy %tmp1:v[0] 2007ec681f3Smrg //! v1: %_:v[0] = p_unit_test 2017ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, Definition(reg_v0, v1)); 2027ec681f3Smrg 2037ec681f3Smrg //! p_unit_test %tmp2:v[1] 2047ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, tmp); 2057ec681f3Smrg 2067ec681f3Smrg finish_ra_test(ra_test_policy()); 2077ec681f3SmrgEND_TEST 2087ec681f3Smrg 2097ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl) 2107ec681f3Smrg //>> p_startpgm 2117ec681f3Smrg if (!setup_cs("", GFX10)) 2127ec681f3Smrg return; 2137ec681f3Smrg 2147ec681f3Smrg program->dev.vgpr_limit = 3; 2157ec681f3Smrg 2167ec681f3Smrg PhysReg reg_v1{257}; 2177ec681f3Smrg 2187ec681f3Smrg //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test 2197ec681f3Smrg Temp s0_tmp = bld.tmp(s1); 2207ec681f3Smrg Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1)); 2217ec681f3Smrg 2227ec681f3Smrg //! lv1: %tmp1:v[1] = p_unit_test 2237ec681f3Smrg Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1)); 2247ec681f3Smrg 2257ec681f3Smrg //! lv1: %tmp2:v[2] = p_parallelcopy %tmp1:v[1] 2267ec681f3Smrg //! v2: %_:v[0-1] = p_unit_test 2277ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, bld.def(v2)); 2287ec681f3Smrg 2297ec681f3Smrg //! p_unit_test %tmp2:v[2], %scc_tmp:scc, %1:s[0] 2307ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, tmp, scc_tmp, s0_tmp); 2317ec681f3Smrg 2327ec681f3Smrg finish_ra_test(ra_test_policy()); 2337ec681f3Smrg 2347ec681f3Smrg //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1 2357ec681f3Smrg Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo(); 2367ec681f3Smrg aco_print_instr(¶llelcopy, output); 2377ec681f3Smrg fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg()); 2387ec681f3SmrgEND_TEST 2397ec681f3Smrg 2407ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies) 2417ec681f3Smrg //>> p_startpgm 2427ec681f3Smrg if (!setup_cs("", GFX10)) 2437ec681f3Smrg return; 2447ec681f3Smrg 2457ec681f3Smrg program->dev.vgpr_limit = 6; 2467ec681f3Smrg 2477ec681f3Smrg PhysReg reg_v2{258}; 2487ec681f3Smrg PhysReg reg_v4{260}; 2497ec681f3Smrg 2507ec681f3Smrg //! lv1: %lin_tmp1:v[4] = p_unit_test 2517ec681f3Smrg Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v4)); 2527ec681f3Smrg //! v2: %log_tmp1:v[2-3] = p_unit_test 2537ec681f3Smrg Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, reg_v2)); 2547ec681f3Smrg 2557ec681f3Smrg //! lv1: %lin_tmp2:v[0], v2: %log_tmp2:v[4-5] = p_parallelcopy %lin_tmp1:v[4], %log_tmp1:v[2-3] 2567ec681f3Smrg //! v3: %_:v[1-3] = p_unit_test 2577ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, bld.def(v3)); 2587ec681f3Smrg 2597ec681f3Smrg //! p_unit_test %log_tmp2:v[4-5], %lin_tmp2:v[0] 2607ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, log_tmp, lin_tmp); 2617ec681f3Smrg 2627ec681f3Smrg finish_ra_test(ra_test_policy()); 2637ec681f3SmrgEND_TEST 2647ec681f3Smrg 2657ec681f3SmrgBEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_create_vector) 2667ec681f3Smrg //>> p_startpgm 2677ec681f3Smrg if (!setup_cs("", GFX10)) 2687ec681f3Smrg return; 2697ec681f3Smrg 2707ec681f3Smrg program->dev.vgpr_limit = 4; 2717ec681f3Smrg 2727ec681f3Smrg PhysReg reg_v0{256}; 2737ec681f3Smrg PhysReg reg_v1{257}; 2747ec681f3Smrg 2757ec681f3Smrg //! lv1: %lin_tmp1:v[0] = p_unit_test 2767ec681f3Smrg Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0)); 2777ec681f3Smrg //! v1: %log_tmp:v[1] = p_unit_test 2787ec681f3Smrg Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, reg_v1)); 2797ec681f3Smrg 2807ec681f3Smrg //! lv1: %lin_tmp2:v[2] = p_parallelcopy %lin_tmp1:v[0] 2817ec681f3Smrg //! v2: %_:v[0-1] = p_create_vector v1: undef, %log_tmp:v[1] 2827ec681f3Smrg bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(v1), log_tmp); 2837ec681f3Smrg 2847ec681f3Smrg //! p_unit_test %lin_tmp2:v[2] 2857ec681f3Smrg bld.pseudo(aco_opcode::p_unit_test, lin_tmp); 2867ec681f3Smrg 2877ec681f3Smrg finish_ra_test(ra_test_policy()); 2887ec681f3SmrgEND_TEST 289