17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2020 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg */
247ec681f3Smrg#include "helpers.h"
257ec681f3Smrg
267ec681f3Smrgusing namespace aco;
277ec681f3Smrg
287ec681f3SmrgBEGIN_TEST(assembler.s_memtime)
297ec681f3Smrg   for (unsigned i = GFX6; i <= GFX10; i++) {
307ec681f3Smrg      if (!setup_cs(NULL, (chip_class)i))
317ec681f3Smrg         continue;
327ec681f3Smrg
337ec681f3Smrg      //~gfx[6-7]>> c7800000
347ec681f3Smrg      //~gfx[6-7]!  bf810000
357ec681f3Smrg      //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000
367ec681f3Smrg      //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000
377ec681f3Smrg      bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0});
387ec681f3Smrg
397ec681f3Smrg      finish_assembler_test();
407ec681f3Smrg   }
417ec681f3SmrgEND_TEST
427ec681f3Smrg
437ec681f3SmrgBEGIN_TEST(assembler.branch_3f)
447ec681f3Smrg   if (!setup_cs(NULL, (chip_class)GFX10))
457ec681f3Smrg      return;
467ec681f3Smrg
477ec681f3Smrg   //! BB0:
487ec681f3Smrg   //! s_branch BB1                                                ; bf820040
497ec681f3Smrg   //! s_nop 0                                                     ; bf800000
507ec681f3Smrg   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
517ec681f3Smrg
527ec681f3Smrg   for (unsigned i = 0; i < 0x3f; i++)
537ec681f3Smrg      bld.vop1(aco_opcode::v_nop);
547ec681f3Smrg
557ec681f3Smrg   bld.reset(program->create_and_insert_block());
567ec681f3Smrg
577ec681f3Smrg   program->blocks[1].linear_preds.push_back(0u);
587ec681f3Smrg
597ec681f3Smrg   finish_assembler_test();
607ec681f3SmrgEND_TEST
617ec681f3Smrg
627ec681f3SmrgBEGIN_TEST(assembler.long_jump.unconditional_forwards)
637ec681f3Smrg   if (!setup_cs(NULL, (chip_class)GFX10))
647ec681f3Smrg      return;
657ec681f3Smrg
667ec681f3Smrg   //!BB0:
677ec681f3Smrg   //! s_getpc_b64 s[0:1]                                          ; be801f00
687ec681f3Smrg   //! s_addc_u32 s0, s0, 0x20018                                  ; 8200ff00 00020018
697ec681f3Smrg   //! s_addc_u32 s1, s1, 0                                        ; 82018001
707ec681f3Smrg   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
717ec681f3Smrg   //! s_bitset0_b32 s0, 0                                         ; be801b80
727ec681f3Smrg   //! s_setpc_b64 s[0:1]                                          ; be802000
737ec681f3Smrg   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
747ec681f3Smrg
757ec681f3Smrg   bld.reset(program->create_and_insert_block());
767ec681f3Smrg
777ec681f3Smrg   //! s_nop 0                                                     ; bf800000
787ec681f3Smrg   //!(then repeated 32767 times)
797ec681f3Smrg   for (unsigned i = 0; i < INT16_MAX + 1; i++)
807ec681f3Smrg      bld.sopp(aco_opcode::s_nop, -1, 0);
817ec681f3Smrg
827ec681f3Smrg   //! BB2:
837ec681f3Smrg   //! s_endpgm                                                    ; bf810000
847ec681f3Smrg   bld.reset(program->create_and_insert_block());
857ec681f3Smrg
867ec681f3Smrg   program->blocks[2].linear_preds.push_back(0u);
877ec681f3Smrg   program->blocks[2].linear_preds.push_back(1u);
887ec681f3Smrg
897ec681f3Smrg   finish_assembler_test();
907ec681f3SmrgEND_TEST
917ec681f3Smrg
927ec681f3SmrgBEGIN_TEST(assembler.long_jump.conditional_forwards)
937ec681f3Smrg   if (!setup_cs(NULL, (chip_class)GFX10))
947ec681f3Smrg      return;
957ec681f3Smrg
967ec681f3Smrg   //! BB0:
977ec681f3Smrg   //! s_cbranch_scc1 BB1                                          ; bf850007
987ec681f3Smrg   //! s_getpc_b64 s[0:1]                                          ; be801f00
997ec681f3Smrg   //! s_addc_u32 s0, s0, 0x20018                                  ; 8200ff00 00020018
1007ec681f3Smrg   //! s_addc_u32 s1, s1, 0                                        ; 82018001
1017ec681f3Smrg   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
1027ec681f3Smrg   //! s_bitset0_b32 s0, 0                                         ; be801b80
1037ec681f3Smrg   //! s_setpc_b64 s[0:1]                                          ; be802000
1047ec681f3Smrg   bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2);
1057ec681f3Smrg
1067ec681f3Smrg   bld.reset(program->create_and_insert_block());
1077ec681f3Smrg
1087ec681f3Smrg   //! BB1:
1097ec681f3Smrg   //! s_nop 0 ; bf800000
1107ec681f3Smrg   //!(then repeated 32767 times)
1117ec681f3Smrg   for (unsigned i = 0; i < INT16_MAX + 1; i++)
1127ec681f3Smrg      bld.sopp(aco_opcode::s_nop, -1, 0);
1137ec681f3Smrg
1147ec681f3Smrg   //! BB2:
1157ec681f3Smrg   //! s_endpgm                                                    ; bf810000
1167ec681f3Smrg   bld.reset(program->create_and_insert_block());
1177ec681f3Smrg
1187ec681f3Smrg   program->blocks[1].linear_preds.push_back(0u);
1197ec681f3Smrg   program->blocks[2].linear_preds.push_back(0u);
1207ec681f3Smrg   program->blocks[2].linear_preds.push_back(1u);
1217ec681f3Smrg
1227ec681f3Smrg   finish_assembler_test();
1237ec681f3SmrgEND_TEST
1247ec681f3Smrg
1257ec681f3SmrgBEGIN_TEST(assembler.long_jump.unconditional_backwards)
1267ec681f3Smrg   if (!setup_cs(NULL, (chip_class)GFX10))
1277ec681f3Smrg      return;
1287ec681f3Smrg
1297ec681f3Smrg   //!BB0:
1307ec681f3Smrg   //! s_nop 0                                                     ; bf800000
1317ec681f3Smrg   //!(then repeated 32767 times)
1327ec681f3Smrg   for (unsigned i = 0; i < INT16_MAX + 1; i++)
1337ec681f3Smrg      bld.sopp(aco_opcode::s_nop, -1, 0);
1347ec681f3Smrg
1357ec681f3Smrg   //! s_getpc_b64 s[0:1]                                          ; be801f00
1367ec681f3Smrg   //! s_addc_u32 s0, s0, 0xfffdfffc                               ; 8200ff00 fffdfffc
1377ec681f3Smrg   //! s_addc_u32 s1, s1, -1                                       ; 8201c101
1387ec681f3Smrg   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
1397ec681f3Smrg   //! s_bitset0_b32 s0, 0                                         ; be801b80
1407ec681f3Smrg   //! s_setpc_b64 s[0:1]                                          ; be802000
1417ec681f3Smrg   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0);
1427ec681f3Smrg
1437ec681f3Smrg   //! BB1:
1447ec681f3Smrg   //! s_endpgm                                                    ; bf810000
1457ec681f3Smrg   bld.reset(program->create_and_insert_block());
1467ec681f3Smrg
1477ec681f3Smrg   program->blocks[0].linear_preds.push_back(0u);
1487ec681f3Smrg   program->blocks[1].linear_preds.push_back(0u);
1497ec681f3Smrg
1507ec681f3Smrg   finish_assembler_test();
1517ec681f3SmrgEND_TEST
1527ec681f3Smrg
1537ec681f3SmrgBEGIN_TEST(assembler.long_jump.conditional_backwards)
1547ec681f3Smrg   if (!setup_cs(NULL, (chip_class)GFX10))
1557ec681f3Smrg      return;
1567ec681f3Smrg
1577ec681f3Smrg   //!BB0:
1587ec681f3Smrg   //! s_nop 0                                                     ; bf800000
1597ec681f3Smrg   //!(then repeated 32767 times)
1607ec681f3Smrg   for (unsigned i = 0; i < INT16_MAX + 1; i++)
1617ec681f3Smrg      bld.sopp(aco_opcode::s_nop, -1, 0);
1627ec681f3Smrg
1637ec681f3Smrg   //! s_cbranch_execz BB1                                         ; bf880007
1647ec681f3Smrg   //! s_getpc_b64 s[0:1]                                          ; be801f00
1657ec681f3Smrg   //! s_addc_u32 s0, s0, 0xfffdfff8                               ; 8200ff00 fffdfff8
1667ec681f3Smrg   //! s_addc_u32 s1, s1, -1                                       ; 8201c101
1677ec681f3Smrg   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
1687ec681f3Smrg   //! s_bitset0_b32 s0, 0                                         ; be801b80
1697ec681f3Smrg   //! s_setpc_b64 s[0:1]                                          ; be802000
1707ec681f3Smrg   bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0);
1717ec681f3Smrg
1727ec681f3Smrg   //! BB1:
1737ec681f3Smrg   //! s_endpgm                                                    ; bf810000
1747ec681f3Smrg   bld.reset(program->create_and_insert_block());
1757ec681f3Smrg
1767ec681f3Smrg   program->blocks[0].linear_preds.push_back(0u);
1777ec681f3Smrg   program->blocks[1].linear_preds.push_back(0u);
1787ec681f3Smrg
1797ec681f3Smrg   finish_assembler_test();
1807ec681f3SmrgEND_TEST
1817ec681f3Smrg
1827ec681f3SmrgBEGIN_TEST(assembler.long_jump.3f)
1837ec681f3Smrg   if (!setup_cs(NULL, (chip_class)GFX10))
1847ec681f3Smrg      return;
1857ec681f3Smrg
1867ec681f3Smrg   //! BB0:
1877ec681f3Smrg   //! s_branch BB1                                                ; bf820040
1887ec681f3Smrg   //! s_nop 0                                                     ; bf800000
1897ec681f3Smrg   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
1907ec681f3Smrg
1917ec681f3Smrg   for (unsigned i = 0; i < 0x3f - 7; i++) // a unconditional long jump is 7 dwords
1927ec681f3Smrg      bld.vop1(aco_opcode::v_nop);
1937ec681f3Smrg   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
1947ec681f3Smrg
1957ec681f3Smrg   bld.reset(program->create_and_insert_block());
1967ec681f3Smrg   for (unsigned i = 0; i < INT16_MAX + 1; i++)
1977ec681f3Smrg      bld.vop1(aco_opcode::v_nop);
1987ec681f3Smrg   bld.reset(program->create_and_insert_block());
1997ec681f3Smrg
2007ec681f3Smrg   program->blocks[1].linear_preds.push_back(0u);
2017ec681f3Smrg   program->blocks[2].linear_preds.push_back(0u);
2027ec681f3Smrg   program->blocks[2].linear_preds.push_back(1u);
2037ec681f3Smrg
2047ec681f3Smrg   finish_assembler_test();
2057ec681f3SmrgEND_TEST
2067ec681f3Smrg
2077ec681f3SmrgBEGIN_TEST(assembler.long_jump.constaddr)
2087ec681f3Smrg   if (!setup_cs(NULL, (chip_class)GFX10))
2097ec681f3Smrg      return;
2107ec681f3Smrg
2117ec681f3Smrg   //>> s_getpc_b64 s[0:1]                                          ; be801f00
2127ec681f3Smrg   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
2137ec681f3Smrg
2147ec681f3Smrg   bld.reset(program->create_and_insert_block());
2157ec681f3Smrg
2167ec681f3Smrg   for (unsigned i = 0; i < INT16_MAX + 1; i++)
2177ec681f3Smrg      bld.sopp(aco_opcode::s_nop, -1, 0);
2187ec681f3Smrg
2197ec681f3Smrg   bld.reset(program->create_and_insert_block());
2207ec681f3Smrg
2217ec681f3Smrg   //>> s_getpc_b64 s[0:1]                                          ; be801f00
2227ec681f3Smrg   //! s_add_u32 s0, s0, 0xe0                                      ; 8000ff00 000000e0
2237ec681f3Smrg   bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero());
2247ec681f3Smrg   bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
2257ec681f3Smrg            Operand(PhysReg(0), s1), Operand::zero());
2267ec681f3Smrg
2277ec681f3Smrg   program->blocks[2].linear_preds.push_back(0u);
2287ec681f3Smrg   program->blocks[2].linear_preds.push_back(1u);
2297ec681f3Smrg
2307ec681f3Smrg   finish_assembler_test();
2317ec681f3SmrgEND_TEST
2327ec681f3Smrg
2337ec681f3SmrgBEGIN_TEST(assembler.v_add3)
2347ec681f3Smrg   for (unsigned i = GFX9; i <= GFX10; i++) {
2357ec681f3Smrg      if (!setup_cs(NULL, (chip_class)i))
2367ec681f3Smrg         continue;
2377ec681f3Smrg
2387ec681f3Smrg      //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
2397ec681f3Smrg      //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
2407ec681f3Smrg      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
2417ec681f3Smrg      add3->operands[0] = Operand::zero();
2427ec681f3Smrg      add3->operands[1] = Operand::zero();
2437ec681f3Smrg      add3->operands[2] = Operand::zero();
2447ec681f3Smrg      add3->definitions[0] = Definition(PhysReg(0), v1);
2457ec681f3Smrg      bld.insert(std::move(add3));
2467ec681f3Smrg
2477ec681f3Smrg      finish_assembler_test();
2487ec681f3Smrg   }
2497ec681f3SmrgEND_TEST
2507ec681f3Smrg
2517ec681f3SmrgBEGIN_TEST(assembler.v_add3_clamp)
2527ec681f3Smrg   for (unsigned i = GFX9; i <= GFX10; i++) {
2537ec681f3Smrg      if (!setup_cs(NULL, (chip_class)i))
2547ec681f3Smrg         continue;
2557ec681f3Smrg
2567ec681f3Smrg      //~gfx9>> integer addition + clamp ; d1ff8000 02010080
2577ec681f3Smrg      //~gfx10>> integer addition + clamp ; d76d8000 02010080
2587ec681f3Smrg      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
2597ec681f3Smrg      add3->operands[0] = Operand::zero();
2607ec681f3Smrg      add3->operands[1] = Operand::zero();
2617ec681f3Smrg      add3->operands[2] = Operand::zero();
2627ec681f3Smrg      add3->definitions[0] = Definition(PhysReg(0), v1);
2637ec681f3Smrg      add3->clamp = 1;
2647ec681f3Smrg      bld.insert(std::move(add3));
2657ec681f3Smrg
2667ec681f3Smrg      finish_assembler_test();
2677ec681f3Smrg   }
2687ec681f3SmrgEND_TEST
269