17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2020 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg */ 247ec681f3Smrg#include "helpers.h" 257ec681f3Smrg 267ec681f3Smrgusing namespace aco; 277ec681f3Smrg 287ec681f3SmrgBEGIN_TEST(assembler.s_memtime) 297ec681f3Smrg for (unsigned i = GFX6; i <= GFX10; i++) { 307ec681f3Smrg if (!setup_cs(NULL, (chip_class)i)) 317ec681f3Smrg continue; 327ec681f3Smrg 337ec681f3Smrg //~gfx[6-7]>> c7800000 347ec681f3Smrg //~gfx[6-7]! bf810000 357ec681f3Smrg //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000 367ec681f3Smrg //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000 377ec681f3Smrg bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0}); 387ec681f3Smrg 397ec681f3Smrg finish_assembler_test(); 407ec681f3Smrg } 417ec681f3SmrgEND_TEST 427ec681f3Smrg 437ec681f3SmrgBEGIN_TEST(assembler.branch_3f) 447ec681f3Smrg if (!setup_cs(NULL, (chip_class)GFX10)) 457ec681f3Smrg return; 467ec681f3Smrg 477ec681f3Smrg //! BB0: 487ec681f3Smrg //! s_branch BB1 ; bf820040 497ec681f3Smrg //! s_nop 0 ; bf800000 507ec681f3Smrg bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1); 517ec681f3Smrg 527ec681f3Smrg for (unsigned i = 0; i < 0x3f; i++) 537ec681f3Smrg bld.vop1(aco_opcode::v_nop); 547ec681f3Smrg 557ec681f3Smrg bld.reset(program->create_and_insert_block()); 567ec681f3Smrg 577ec681f3Smrg program->blocks[1].linear_preds.push_back(0u); 587ec681f3Smrg 597ec681f3Smrg finish_assembler_test(); 607ec681f3SmrgEND_TEST 617ec681f3Smrg 627ec681f3SmrgBEGIN_TEST(assembler.long_jump.unconditional_forwards) 637ec681f3Smrg if (!setup_cs(NULL, (chip_class)GFX10)) 647ec681f3Smrg return; 657ec681f3Smrg 667ec681f3Smrg //!BB0: 677ec681f3Smrg //! s_getpc_b64 s[0:1] ; be801f00 687ec681f3Smrg //! s_addc_u32 s0, s0, 0x20018 ; 8200ff00 00020018 697ec681f3Smrg //! s_addc_u32 s1, s1, 0 ; 82018001 707ec681f3Smrg //! s_bitcmp1_b32 s0, 0 ; bf0d8000 717ec681f3Smrg //! s_bitset0_b32 s0, 0 ; be801b80 727ec681f3Smrg //! s_setpc_b64 s[0:1] ; be802000 737ec681f3Smrg bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 747ec681f3Smrg 757ec681f3Smrg bld.reset(program->create_and_insert_block()); 767ec681f3Smrg 777ec681f3Smrg //! s_nop 0 ; bf800000 787ec681f3Smrg //!(then repeated 32767 times) 797ec681f3Smrg for (unsigned i = 0; i < INT16_MAX + 1; i++) 807ec681f3Smrg bld.sopp(aco_opcode::s_nop, -1, 0); 817ec681f3Smrg 827ec681f3Smrg //! BB2: 837ec681f3Smrg //! s_endpgm ; bf810000 847ec681f3Smrg bld.reset(program->create_and_insert_block()); 857ec681f3Smrg 867ec681f3Smrg program->blocks[2].linear_preds.push_back(0u); 877ec681f3Smrg program->blocks[2].linear_preds.push_back(1u); 887ec681f3Smrg 897ec681f3Smrg finish_assembler_test(); 907ec681f3SmrgEND_TEST 917ec681f3Smrg 927ec681f3SmrgBEGIN_TEST(assembler.long_jump.conditional_forwards) 937ec681f3Smrg if (!setup_cs(NULL, (chip_class)GFX10)) 947ec681f3Smrg return; 957ec681f3Smrg 967ec681f3Smrg //! BB0: 977ec681f3Smrg //! s_cbranch_scc1 BB1 ; bf850007 987ec681f3Smrg //! s_getpc_b64 s[0:1] ; be801f00 997ec681f3Smrg //! s_addc_u32 s0, s0, 0x20018 ; 8200ff00 00020018 1007ec681f3Smrg //! s_addc_u32 s1, s1, 0 ; 82018001 1017ec681f3Smrg //! s_bitcmp1_b32 s0, 0 ; bf0d8000 1027ec681f3Smrg //! s_bitset0_b32 s0, 0 ; be801b80 1037ec681f3Smrg //! s_setpc_b64 s[0:1] ; be802000 1047ec681f3Smrg bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2); 1057ec681f3Smrg 1067ec681f3Smrg bld.reset(program->create_and_insert_block()); 1077ec681f3Smrg 1087ec681f3Smrg //! BB1: 1097ec681f3Smrg //! s_nop 0 ; bf800000 1107ec681f3Smrg //!(then repeated 32767 times) 1117ec681f3Smrg for (unsigned i = 0; i < INT16_MAX + 1; i++) 1127ec681f3Smrg bld.sopp(aco_opcode::s_nop, -1, 0); 1137ec681f3Smrg 1147ec681f3Smrg //! BB2: 1157ec681f3Smrg //! s_endpgm ; bf810000 1167ec681f3Smrg bld.reset(program->create_and_insert_block()); 1177ec681f3Smrg 1187ec681f3Smrg program->blocks[1].linear_preds.push_back(0u); 1197ec681f3Smrg program->blocks[2].linear_preds.push_back(0u); 1207ec681f3Smrg program->blocks[2].linear_preds.push_back(1u); 1217ec681f3Smrg 1227ec681f3Smrg finish_assembler_test(); 1237ec681f3SmrgEND_TEST 1247ec681f3Smrg 1257ec681f3SmrgBEGIN_TEST(assembler.long_jump.unconditional_backwards) 1267ec681f3Smrg if (!setup_cs(NULL, (chip_class)GFX10)) 1277ec681f3Smrg return; 1287ec681f3Smrg 1297ec681f3Smrg //!BB0: 1307ec681f3Smrg //! s_nop 0 ; bf800000 1317ec681f3Smrg //!(then repeated 32767 times) 1327ec681f3Smrg for (unsigned i = 0; i < INT16_MAX + 1; i++) 1337ec681f3Smrg bld.sopp(aco_opcode::s_nop, -1, 0); 1347ec681f3Smrg 1357ec681f3Smrg //! s_getpc_b64 s[0:1] ; be801f00 1367ec681f3Smrg //! s_addc_u32 s0, s0, 0xfffdfffc ; 8200ff00 fffdfffc 1377ec681f3Smrg //! s_addc_u32 s1, s1, -1 ; 8201c101 1387ec681f3Smrg //! s_bitcmp1_b32 s0, 0 ; bf0d8000 1397ec681f3Smrg //! s_bitset0_b32 s0, 0 ; be801b80 1407ec681f3Smrg //! s_setpc_b64 s[0:1] ; be802000 1417ec681f3Smrg bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0); 1427ec681f3Smrg 1437ec681f3Smrg //! BB1: 1447ec681f3Smrg //! s_endpgm ; bf810000 1457ec681f3Smrg bld.reset(program->create_and_insert_block()); 1467ec681f3Smrg 1477ec681f3Smrg program->blocks[0].linear_preds.push_back(0u); 1487ec681f3Smrg program->blocks[1].linear_preds.push_back(0u); 1497ec681f3Smrg 1507ec681f3Smrg finish_assembler_test(); 1517ec681f3SmrgEND_TEST 1527ec681f3Smrg 1537ec681f3SmrgBEGIN_TEST(assembler.long_jump.conditional_backwards) 1547ec681f3Smrg if (!setup_cs(NULL, (chip_class)GFX10)) 1557ec681f3Smrg return; 1567ec681f3Smrg 1577ec681f3Smrg //!BB0: 1587ec681f3Smrg //! s_nop 0 ; bf800000 1597ec681f3Smrg //!(then repeated 32767 times) 1607ec681f3Smrg for (unsigned i = 0; i < INT16_MAX + 1; i++) 1617ec681f3Smrg bld.sopp(aco_opcode::s_nop, -1, 0); 1627ec681f3Smrg 1637ec681f3Smrg //! s_cbranch_execz BB1 ; bf880007 1647ec681f3Smrg //! s_getpc_b64 s[0:1] ; be801f00 1657ec681f3Smrg //! s_addc_u32 s0, s0, 0xfffdfff8 ; 8200ff00 fffdfff8 1667ec681f3Smrg //! s_addc_u32 s1, s1, -1 ; 8201c101 1677ec681f3Smrg //! s_bitcmp1_b32 s0, 0 ; bf0d8000 1687ec681f3Smrg //! s_bitset0_b32 s0, 0 ; be801b80 1697ec681f3Smrg //! s_setpc_b64 s[0:1] ; be802000 1707ec681f3Smrg bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0); 1717ec681f3Smrg 1727ec681f3Smrg //! BB1: 1737ec681f3Smrg //! s_endpgm ; bf810000 1747ec681f3Smrg bld.reset(program->create_and_insert_block()); 1757ec681f3Smrg 1767ec681f3Smrg program->blocks[0].linear_preds.push_back(0u); 1777ec681f3Smrg program->blocks[1].linear_preds.push_back(0u); 1787ec681f3Smrg 1797ec681f3Smrg finish_assembler_test(); 1807ec681f3SmrgEND_TEST 1817ec681f3Smrg 1827ec681f3SmrgBEGIN_TEST(assembler.long_jump.3f) 1837ec681f3Smrg if (!setup_cs(NULL, (chip_class)GFX10)) 1847ec681f3Smrg return; 1857ec681f3Smrg 1867ec681f3Smrg //! BB0: 1877ec681f3Smrg //! s_branch BB1 ; bf820040 1887ec681f3Smrg //! s_nop 0 ; bf800000 1897ec681f3Smrg bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1); 1907ec681f3Smrg 1917ec681f3Smrg for (unsigned i = 0; i < 0x3f - 7; i++) // a unconditional long jump is 7 dwords 1927ec681f3Smrg bld.vop1(aco_opcode::v_nop); 1937ec681f3Smrg bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 1947ec681f3Smrg 1957ec681f3Smrg bld.reset(program->create_and_insert_block()); 1967ec681f3Smrg for (unsigned i = 0; i < INT16_MAX + 1; i++) 1977ec681f3Smrg bld.vop1(aco_opcode::v_nop); 1987ec681f3Smrg bld.reset(program->create_and_insert_block()); 1997ec681f3Smrg 2007ec681f3Smrg program->blocks[1].linear_preds.push_back(0u); 2017ec681f3Smrg program->blocks[2].linear_preds.push_back(0u); 2027ec681f3Smrg program->blocks[2].linear_preds.push_back(1u); 2037ec681f3Smrg 2047ec681f3Smrg finish_assembler_test(); 2057ec681f3SmrgEND_TEST 2067ec681f3Smrg 2077ec681f3SmrgBEGIN_TEST(assembler.long_jump.constaddr) 2087ec681f3Smrg if (!setup_cs(NULL, (chip_class)GFX10)) 2097ec681f3Smrg return; 2107ec681f3Smrg 2117ec681f3Smrg //>> s_getpc_b64 s[0:1] ; be801f00 2127ec681f3Smrg bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 2137ec681f3Smrg 2147ec681f3Smrg bld.reset(program->create_and_insert_block()); 2157ec681f3Smrg 2167ec681f3Smrg for (unsigned i = 0; i < INT16_MAX + 1; i++) 2177ec681f3Smrg bld.sopp(aco_opcode::s_nop, -1, 0); 2187ec681f3Smrg 2197ec681f3Smrg bld.reset(program->create_and_insert_block()); 2207ec681f3Smrg 2217ec681f3Smrg //>> s_getpc_b64 s[0:1] ; be801f00 2227ec681f3Smrg //! s_add_u32 s0, s0, 0xe0 ; 8000ff00 000000e0 2237ec681f3Smrg bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero()); 2247ec681f3Smrg bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc), 2257ec681f3Smrg Operand(PhysReg(0), s1), Operand::zero()); 2267ec681f3Smrg 2277ec681f3Smrg program->blocks[2].linear_preds.push_back(0u); 2287ec681f3Smrg program->blocks[2].linear_preds.push_back(1u); 2297ec681f3Smrg 2307ec681f3Smrg finish_assembler_test(); 2317ec681f3SmrgEND_TEST 2327ec681f3Smrg 2337ec681f3SmrgBEGIN_TEST(assembler.v_add3) 2347ec681f3Smrg for (unsigned i = GFX9; i <= GFX10; i++) { 2357ec681f3Smrg if (!setup_cs(NULL, (chip_class)i)) 2367ec681f3Smrg continue; 2377ec681f3Smrg 2387ec681f3Smrg //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080 2397ec681f3Smrg //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080 2407ec681f3Smrg aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; 2417ec681f3Smrg add3->operands[0] = Operand::zero(); 2427ec681f3Smrg add3->operands[1] = Operand::zero(); 2437ec681f3Smrg add3->operands[2] = Operand::zero(); 2447ec681f3Smrg add3->definitions[0] = Definition(PhysReg(0), v1); 2457ec681f3Smrg bld.insert(std::move(add3)); 2467ec681f3Smrg 2477ec681f3Smrg finish_assembler_test(); 2487ec681f3Smrg } 2497ec681f3SmrgEND_TEST 2507ec681f3Smrg 2517ec681f3SmrgBEGIN_TEST(assembler.v_add3_clamp) 2527ec681f3Smrg for (unsigned i = GFX9; i <= GFX10; i++) { 2537ec681f3Smrg if (!setup_cs(NULL, (chip_class)i)) 2547ec681f3Smrg continue; 2557ec681f3Smrg 2567ec681f3Smrg //~gfx9>> integer addition + clamp ; d1ff8000 02010080 2577ec681f3Smrg //~gfx10>> integer addition + clamp ; d76d8000 02010080 2587ec681f3Smrg aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; 2597ec681f3Smrg add3->operands[0] = Operand::zero(); 2607ec681f3Smrg add3->operands[1] = Operand::zero(); 2617ec681f3Smrg add3->operands[2] = Operand::zero(); 2627ec681f3Smrg add3->definitions[0] = Definition(PhysReg(0), v1); 2637ec681f3Smrg add3->clamp = 1; 2647ec681f3Smrg bld.insert(std::move(add3)); 2657ec681f3Smrg 2667ec681f3Smrg finish_assembler_test(); 2677ec681f3Smrg } 2687ec681f3SmrgEND_TEST 269