17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2020 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg */ 247ec681f3Smrg#include "helpers.h" 257ec681f3Smrg 267ec681f3Smrgusing namespace aco; 277ec681f3Smrg 287ec681f3SmrgBEGIN_TEST(builder.v_mul_imm) 297ec681f3Smrg for (unsigned i = GFX8; i <= GFX10; i++) { 307ec681f3Smrg //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm 317ec681f3Smrg if (!setup_cs("v1 v1 s1 s1", (chip_class)i)) 327ec681f3Smrg continue; 337ec681f3Smrg 347ec681f3Smrg /* simple optimizations */ 357ec681f3Smrg 367ec681f3Smrg //! p_unit_test 0, 0 377ec681f3Smrg writeout(0, bld.v_mul_imm(bld.def(v1), inputs[0], 0)); 387ec681f3Smrg 397ec681f3Smrg //! p_unit_test 1, %a 407ec681f3Smrg writeout(1, bld.v_mul_imm(bld.def(v1), inputs[0], 1)); 417ec681f3Smrg 427ec681f3Smrg //! v1: %res2 = v_lshlrev_b32 2, %a 437ec681f3Smrg //! p_unit_test 2, %res2 447ec681f3Smrg writeout(2, bld.v_mul_imm(bld.def(v1), inputs[0], 4)); 457ec681f3Smrg 467ec681f3Smrg //! v1: %res3 = v_lshlrev_b32 31, %a 477ec681f3Smrg //! p_unit_test 3, %res3 487ec681f3Smrg writeout(3, bld.v_mul_imm(bld.def(v1), inputs[0], 2147483648u)); 497ec681f3Smrg 507ec681f3Smrg /* single lshl+add/sub */ 517ec681f3Smrg 527ec681f3Smrg //~gfx8! v1: %res4_tmp = v_lshlrev_b32 3, %a 537ec681f3Smrg //~gfx8! v1: %res4, s2: %_ = v_add_co_u32 %res4_tmp, %a 547ec681f3Smrg //~gfx(9|10)! v1: %res4 = v_lshl_add_u32 %a, 3, %a 557ec681f3Smrg //! p_unit_test 4, %res4 567ec681f3Smrg writeout(4, bld.v_mul_imm(bld.def(v1), inputs[0], 9)); 577ec681f3Smrg 587ec681f3Smrg //~gfx[89]! v1: %res5_tmp = v_lshlrev_b32 3, %a 597ec681f3Smrg //~gfx8! v1: %res5, s2: %_ = v_sub_co_u32 %res5_tmp, %a 607ec681f3Smrg //~gfx9! v1: %res5 = v_sub_u32 %res5_tmp, %a 617ec681f3Smrg //~gfx10! v1: %res5 = v_mul_lo_u32 7, %a 627ec681f3Smrg //! p_unit_test 5, %res5 637ec681f3Smrg writeout(5, bld.v_mul_imm(bld.def(v1), inputs[0], 7)); 647ec681f3Smrg 657ec681f3Smrg /* lshl+add optimization with literal */ 667ec681f3Smrg 677ec681f3Smrg //~gfx8! v1: %res6_tmp0 = v_lshlrev_b32 2, %a 687ec681f3Smrg //~gfx8! v1: %res6_tmp1 = v_lshlrev_b32 6, %a 697ec681f3Smrg //~gfx8! v1: %res6, s2: %_ = v_add_co_u32 %res6_tmp1, %res6_tmp0 707ec681f3Smrg //~gfx9! v1: %res6_tmp = v_lshlrev_b32 2, %a 717ec681f3Smrg //~gfx9! v1: %res6 = v_lshl_add_u32 %a, 6, %res6_tmp 727ec681f3Smrg //~gfx10! v1: %res6 = v_mul_lo_u32 0x44, %a 737ec681f3Smrg //! p_unit_test 6, %res6 747ec681f3Smrg writeout(6, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64)); 757ec681f3Smrg 767ec681f3Smrg //~gfx8! s1: %res7_tmp = p_parallelcopy 0x144 777ec681f3Smrg //~gfx8! v1: %res7 = v_mul_lo_u32 %res7_tmp, %a 787ec681f3Smrg //~gfx9! v1: %res7_tmp0 = v_lshlrev_b32 2, %a 797ec681f3Smrg //~gfx9! v1: %res7_tmp1 = v_lshl_add_u32 %a, 6, %res7_tmp0 807ec681f3Smrg //~gfx9! v1: %res7 = v_lshl_add_u32 %a, 8, %res7_tmp1 817ec681f3Smrg //~gfx10! v1: %res7 = v_mul_lo_u32 0x144, %a 827ec681f3Smrg //! p_unit_test 7, %res7 837ec681f3Smrg writeout(7, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256)); 847ec681f3Smrg 857ec681f3Smrg //~gfx8! s1: %res8_tmp = p_parallelcopy 0x944 867ec681f3Smrg //~gfx8! v1: %res8 = v_mul_lo_u32 %res8_tmp, %a 877ec681f3Smrg //~gfx9! v1: %res8_tmp0 = v_lshlrev_b32 2, %a 887ec681f3Smrg //~gfx9! v1: %res8_tmp1 = v_lshl_add_u32 %a, 6, %res8_tmp0 897ec681f3Smrg //~gfx9! v1: %res8_tmp2 = v_lshl_add_u32 %a, 8, %res8_tmp1 907ec681f3Smrg //~gfx9! v1: %res8 = v_lshl_add_u32 %a, 11, %res8_tmp2 917ec681f3Smrg //~gfx10! v1: %res8 = v_mul_lo_u32 0x944, %a 927ec681f3Smrg //! p_unit_test 8, %res8 937ec681f3Smrg writeout(8, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256 | 2048)); 947ec681f3Smrg 957ec681f3Smrg /* lshl+add optimization with inline constant */ 967ec681f3Smrg 977ec681f3Smrg //~gfx8! v1: %res9_tmp0 = v_lshlrev_b32 1, %a 987ec681f3Smrg //~gfx8! v1: %res9_tmp1 = v_lshlrev_b32 2, %a 997ec681f3Smrg //~gfx8! v1: %res9, s2: %_ = v_add_co_u32 %res9_tmp1, %res9_tmp0 1007ec681f3Smrg //~gfx9! v1: %res9_tmp0 = v_lshlrev_b32 1, %a 1017ec681f3Smrg //~gfx9! v1: %res9 = v_lshl_add_u32 %a, 2, %res9_tmp0 1027ec681f3Smrg //~gfx10! v1: %res9 = v_mul_lo_u32 6, %a 1037ec681f3Smrg //! p_unit_test 9, %res9 1047ec681f3Smrg writeout(9, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4)); 1057ec681f3Smrg 1067ec681f3Smrg //~gfx(8|10)! v1: %res10 = v_mul_lo_u32 14, %a 1077ec681f3Smrg //~gfx9! v1: %res10_tmp0 = v_lshlrev_b32 1, %a 1087ec681f3Smrg //~gfx9! v1: %res10_tmp1 = v_lshl_add_u32 %a, 2, %res10_tmp0 1097ec681f3Smrg //~gfx9! v1: %res10 = v_lshl_add_u32 %a, 3, %res10_tmp1 1107ec681f3Smrg //! p_unit_test 10, %res10 1117ec681f3Smrg writeout(10, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8)); 1127ec681f3Smrg 1137ec681f3Smrg //! v1: %res11 = v_mul_lo_u32 30, %a 1147ec681f3Smrg //! p_unit_test 11, %res11 1157ec681f3Smrg writeout(11, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8 | 16)); 1167ec681f3Smrg 1177ec681f3Smrg finish_opt_test(); 1187ec681f3Smrg } 1197ec681f3SmrgEND_TEST 120