17ec681f3Smrg#!/bin/bash 27ec681f3Smrg# 37ec681f3Smrg# Test various instructions to check whether half<->full widening/narrowing 47ec681f3Smrg# works. The basic premise is to perform the same instruction with and 57ec681f3Smrg# without the widening/narrowing folded in and check if the results match. 67ec681f3Smrg# 77ec681f3Smrg# Note this doesn't currently diferentiate between signed/unsigned/bool, 87ec681f3Smrg# and just assumes int is signed (since unsigned is basically(ish) like 97ec681f3Smrg# signed but without sign extension) 107ec681f3Smrg# 117ec681f3Smrg# TODO probably good pick numeric src values that are better at triggering 127ec681f3Smrg# edge cases, while still not loosing precision in a full->half->full 137ec681f3Smrg# seqeuence.. but some instructions like absneg don't even appear to be 147ec681f3Smrg# subtlely wrong when you try to fold in a precision conversion. 157ec681f3Smrg# 167ec681f3Smrg# add '-v' arg to see the result values 177ec681f3Smrg 187ec681f3Smrgset -e 197ec681f3Smrg 207ec681f3Smrg# 217ec681f3Smrg# Templates for float->float instructions: 227ec681f3Smrg# 237ec681f3Smrgf2f_instrs=( 247ec681f3Smrg 'add.f $dst, $src1, $src2' 257ec681f3Smrg 'min.f $dst, $src1, $src2' 267ec681f3Smrg 'min.f $dst, $src2, $src1' 277ec681f3Smrg 'max.f $dst, $src1, $src2' 287ec681f3Smrg 'max.f $dst, $src2, $src1' 297ec681f3Smrg 'mul.f $dst, $src1, $src2' 307ec681f3Smrg 'sign.f $dst, $src1' 317ec681f3Smrg 'absneg.f $dst, \(neg\)$src1' 327ec681f3Smrg 'absneg.f $dst, \(abs\)$src1' 337ec681f3Smrg 'floor.f $dst, $src1' 347ec681f3Smrg 'ceil.f $dst, $src1' 357ec681f3Smrg 'rndne.f $dst, $src1' 367ec681f3Smrg 'rndaz.f $dst, $src1' 377ec681f3Smrg 'trunc.f $dst, $src1' 387ec681f3Smrg) 397ec681f3Smrg 407ec681f3Smrg# 417ec681f3Smrg# Templates for float->int instructions: 427ec681f3Smrg# 437ec681f3Smrgf2i_instrs=( 447ec681f3Smrg 'cmps.f.gt $dst, $src1, $src2' 457ec681f3Smrg 'cmps.f.lt $dst, $src1, $src2' 467ec681f3Smrg 'cmpv.f.gt $dst, $src1, $src2' 477ec681f3Smrg 'cmpv.f.lt $dst, $src1, $src2' 487ec681f3Smrg) 497ec681f3Smrg 507ec681f3Smrg# 517ec681f3Smrg# Templates for int->int instructions: 527ec681f3Smrg# 537ec681f3Smrgi2i_instrs=( 547ec681f3Smrg 'add.u $dst, $src1, $src2' 557ec681f3Smrg 'add.s $dst, $src1, $src2' 567ec681f3Smrg 'sub.u $dst, $src1, $src2' 577ec681f3Smrg 'sub.s $dst, $src1, $src2' 587ec681f3Smrg 'cmps.f.gt $dst, $src1, $src2' 597ec681f3Smrg 'cmps.f.lt $dst, $src1, $src2' 607ec681f3Smrg 'min.u $dst, $src1, $src2' 617ec681f3Smrg 'min.u $dst, $src2, $src1' 627ec681f3Smrg 'min.s $dst, $src1, $src2' 637ec681f3Smrg 'min.s $dst, $src2, $src1' 647ec681f3Smrg 'max.u $dst, $src1, $src2' 657ec681f3Smrg 'max.u $dst, $src2, $src1' 667ec681f3Smrg 'max.s $dst, $src1, $src2' 677ec681f3Smrg 'max.s $dst, $src2, $src1' 687ec681f3Smrg 'absneg.s $dst, \(neg\)$src1' 697ec681f3Smrg 'absneg.s $dst, \(abs\)$src1' 707ec681f3Smrg 'and.b $dst, $src2, $src3' 717ec681f3Smrg 'or.b $dst, $src1, $src2' 727ec681f3Smrg 'not.b $dst, $src1' 737ec681f3Smrg 'xor.b $dst, $src1, $src2' 747ec681f3Smrg 'cmpv.u.gt $dst, $src1, $src2' 757ec681f3Smrg 'cmpv.u.lt $dst, $src1, $src2' 767ec681f3Smrg 'cmpv.s.gt $dst, $src1, $src2' 777ec681f3Smrg 'cmpv.s.lt $dst, $src1, $src2' 787ec681f3Smrg 'mul.u24 $dst, $src1, $src2' 797ec681f3Smrg 'mul.s24 $dst, $src1, $src2' 807ec681f3Smrg 'mull.u $dst, $src1, $src2' 817ec681f3Smrg 'bfrev.b $dst, $src1' 827ec681f3Smrg 'clz.s $dst, $src2' 837ec681f3Smrg 'clz.b $dst, $src2' 847ec681f3Smrg 'shl.b $dst, $src1, $src2' 857ec681f3Smrg 'shr.b $dst, $src3, $src1' 867ec681f3Smrg 'ashr.b $dst, $src3, $src1' 877ec681f3Smrg 'mgen.b $dst, $src1, $src2' 887ec681f3Smrg 'getbit.b $dst, $src3, $src2' 897ec681f3Smrg 'setrm $dst, $src1' 907ec681f3Smrg 'cbits.b $dst, $src3' 917ec681f3Smrg 'shb $dst, $src1, $src2' 927ec681f3Smrg 'msad $dst, $src1, $src2' 937ec681f3Smrg) 947ec681f3Smrg 957ec681f3Smrg# 967ec681f3Smrg# Helper to expand instruction template: 977ec681f3Smrg# 987ec681f3Smrgexpand() { 997ec681f3Smrg instr=$1 1007ec681f3Smrg dst=$2 1017ec681f3Smrg src1=$3 1027ec681f3Smrg src2=$4 1037ec681f3Smrg src3=$5 1047ec681f3Smrg eval echo $instr 1057ec681f3Smrg} 1067ec681f3Smrg 1077ec681f3Smrgexpand_test() { 1087ec681f3Smrg instr=$1 1097ec681f3Smrg 1107ec681f3Smrg echo '; control, half->half:' 1117ec681f3Smrg expand $instr "hr1.x" "hr0.x" "hr0.y" "hr0.z" 1127ec681f3Smrg echo '; test, full->half:' 1137ec681f3Smrg expand $instr "hr1.y" "r1.x" "r1.y" "r1.z" 1147ec681f3Smrg 1157ec681f3Smrg echo '; control, full->full:' 1167ec681f3Smrg expand $instr "r2.x" "r1.x" "r1.y" "r1.z" 1177ec681f3Smrg echo '; test, half->full:' 1187ec681f3Smrg expand $instr "r2.y" "hr0.x" "hr0.y" "hr0.z" 1197ec681f3Smrg 1207ec681f3Smrg echo "(rpt5)nop" 1217ec681f3Smrg} 1227ec681f3Smrg 1237ec681f3Smrg# 1247ec681f3Smrg# Helpers to construct test program assembly: 1257ec681f3Smrg# 1267ec681f3Smrgheader_asm() { 1277ec681f3Smrg cat <<EOF 1287ec681f3Smrg@localsize 1, 1, 1 1297ec681f3Smrg@buf 4 ; g[0] 1307ec681f3SmrgEOF 1317ec681f3Smrg} 1327ec681f3Smrg 1337ec681f3Smrgfooter_asm() { 1347ec681f3Smrg cat <<EOF 1357ec681f3Smrg; dest offsets: 1367ec681f3Smrgmov.u32u32 r3.x, 0 1377ec681f3Smrgmov.u32u32 r3.y, 1 1387ec681f3Smrgmov.u32u32 r3.z, 2 1397ec681f3Smrgmov.u32u32 r3.w, 3 1407ec681f3Smrg(rpt5)nop 1417ec681f3Smrg 1427ec681f3Smrg; and store results: 1437ec681f3Smrgstib.untyped.1d.u32.1 r2.x, r3.x, 0 ; control: full->full 1447ec681f3Smrgstib.untyped.1d.u32.1 r2.y, r3.y, 0 ; test: half->full 1457ec681f3Smrgstib.untyped.1d.u32.1 r2.z, r3.z, 0 ; control: half->half 1467ec681f3Smrgstib.untyped.1d.u32.1 r2.w, r3.w, 0 ; test: full->half 1477ec681f3Smrg(sy)nop 1487ec681f3Smrgend 1497ec681f3SmrgEOF 1507ec681f3Smrg} 1517ec681f3Smrg 1527ec681f3Smrgsetup_asm_float() { 1537ec681f3Smrg cat <<EOF 1547ec681f3Smrg; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst 1557ec681f3Smrg; r1->r2 avail for full, r1 for src, r2 for dst 1567ec681f3Smrgcov.f32f16 hr0.x, (1.0) 1577ec681f3Smrgcov.f32f16 hr0.y, (2.0) 1587ec681f3Smrgcov.f32f16 hr0.z, (3.0) 1597ec681f3Smrgmov.f32f32 r1.x, (1.0) 1607ec681f3Smrgmov.f32f32 r1.y, (2.0) 1617ec681f3Smrgmov.f32f32 r1.z, (3.0) 1627ec681f3Smrg(rpt5)nop 1637ec681f3SmrgEOF 1647ec681f3Smrg} 1657ec681f3Smrg 1667ec681f3Smrgsetup_asm_int() { 1677ec681f3Smrg cat <<EOF 1687ec681f3Smrg; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst 1697ec681f3Smrg; r1->r2 avail for full, r1 for src, r2 for dst 1707ec681f3Smrgcov.s32s16 hr0.x, 1 1717ec681f3Smrgcov.s32s16 hr0.y, -2 1727ec681f3Smrgcov.s32s16 hr0.z, 3 1737ec681f3Smrgmov.s32s32 r1.x, 1 1747ec681f3Smrgmov.s32s32 r1.y, -2 1757ec681f3Smrgmov.s32s32 r1.z, 3 1767ec681f3Smrg(rpt5)nop 1777ec681f3SmrgEOF 1787ec681f3Smrg} 1797ec681f3Smrg 1807ec681f3Smrg# 1817ec681f3Smrg# Generate assembly code to test float->float opcode 1827ec681f3Smrg# 1837ec681f3Smrgf2f_asm() { 1847ec681f3Smrg instr=$1 1857ec681f3Smrg 1867ec681f3Smrg header_asm 1877ec681f3Smrg setup_asm_float 1887ec681f3Smrg expand_test $instr 1897ec681f3Smrg 1907ec681f3Smrg cat <<EOF 1917ec681f3Smrg; convert half results back to full: 1927ec681f3Smrgcov.f16f32 r2.z, hr1.x 1937ec681f3Smrgcov.f16f32 r2.w, hr1.y 1947ec681f3SmrgEOF 1957ec681f3Smrg 1967ec681f3Smrg footer_asm 1977ec681f3Smrg} 1987ec681f3Smrg 1997ec681f3Smrg# 2007ec681f3Smrg# Generate assembly code to test float->int opcode 2017ec681f3Smrg# 2027ec681f3Smrgf2i_asm() { 2037ec681f3Smrg instr=$1 2047ec681f3Smrg 2057ec681f3Smrg header_asm 2067ec681f3Smrg setup_asm_float 2077ec681f3Smrg expand_test $instr 2087ec681f3Smrg 2097ec681f3Smrg cat <<EOF 2107ec681f3Smrg; convert half results back to full: 2117ec681f3Smrgcov.s16s32 r2.z, hr1.x 2127ec681f3Smrgcov.s16s32 r2.w, hr1.y 2137ec681f3SmrgEOF 2147ec681f3Smrg 2157ec681f3Smrg footer_asm 2167ec681f3Smrg} 2177ec681f3Smrg 2187ec681f3Smrg# 2197ec681f3Smrg# Generate assembly code to test int->int opcode 2207ec681f3Smrg# 2217ec681f3Smrgi2i_asm() { 2227ec681f3Smrg instr=$1 2237ec681f3Smrg 2247ec681f3Smrg header_asm 2257ec681f3Smrg setup_asm_int 2267ec681f3Smrg expand_test $instr 2277ec681f3Smrg 2287ec681f3Smrg cat <<EOF 2297ec681f3Smrg; convert half results back to full: 2307ec681f3Smrgcov.s16s32 r2.z, hr1.x 2317ec681f3Smrgcov.s16s32 r2.w, hr1.y 2327ec681f3SmrgEOF 2337ec681f3Smrg 2347ec681f3Smrg footer_asm 2357ec681f3Smrg} 2367ec681f3Smrg 2377ec681f3Smrg 2387ec681f3Smrg# 2397ec681f3Smrg# Helper to parse computerator output and print results: 2407ec681f3Smrg# 2417ec681f3Smrgcheck_results() { 2427ec681f3Smrg str=`cat - | grep " " | head -1 | xargs` 2437ec681f3Smrg 2447ec681f3Smrg if [ "$verbose" = "true" ]; then 2457ec681f3Smrg echo $str 2467ec681f3Smrg fi 2477ec681f3Smrg 2487ec681f3Smrg # Split components of result buffer: 2497ec681f3Smrg cf=$(echo $str | cut -f1 -d' ') 2507ec681f3Smrg tf=$(echo $str | cut -f2 -d' ') 2517ec681f3Smrg ch=$(echo $str | cut -f3 -d' ') 2527ec681f3Smrg th=$(echo $str | cut -f4 -d' ') 2537ec681f3Smrg 2547ec681f3Smrg # Sanity test, make sure the control results match: 2557ec681f3Smrg if [ $cf != $ch ]; then 2567ec681f3Smrg echo " FAIL: control results do not match! Half vs full op is not equivalent!" 2577ec681f3Smrg echo " full=$cf half=$ch" 2587ec681f3Smrg fi 2597ec681f3Smrg 2607ec681f3Smrg # Compare test (with conversion folded) to control: 2617ec681f3Smrg if [ $cf != $tf ]; then 2627ec681f3Smrg echo " FAIL: half -> full widening result does not match control!" 2637ec681f3Smrg echo " control=$cf result=$tf" 2647ec681f3Smrg fi 2657ec681f3Smrg if [ $ch != $th ]; then 2667ec681f3Smrg echo " FAIL: full -> half narrowing result does not match control!" 2677ec681f3Smrg echo " control=$ch result=$th" 2687ec681f3Smrg fi 2697ec681f3Smrg 2707ec681f3Smrg # HACK without a delay different invocations 2717ec681f3Smrg # of computerator seem to somehow clobber each 2727ec681f3Smrg # other.. which isn't great.. 2737ec681f3Smrg sleep 0.1 2747ec681f3Smrg} 2757ec681f3Smrg 2767ec681f3Smrg# 2777ec681f3Smrg# Run the tests! 2787ec681f3Smrg# 2797ec681f3Smrg 2807ec681f3Smrgif [ "$1" = "-v" ]; then 2817ec681f3Smrg verbose="true" 2827ec681f3Smrgfi 2837ec681f3Smrg 2847ec681f3SmrgIFS="" 2857ec681f3Smrgfor instr in ${f2f_instrs[@]}; do 2867ec681f3Smrg echo "TEST: $instr" 2877ec681f3Smrg f2f_asm $instr | ./computerator -g 1,1,1 | check_results 2887ec681f3Smrgdone 2897ec681f3Smrgfor instr in ${f2i_instrs[@]}; do 2907ec681f3Smrg echo "TEST: $instr" 2917ec681f3Smrg f2i_asm $instr | ./computerator -g 1,1,1 | check_results 2927ec681f3Smrgdone 2937ec681f3Smrgfor instr in ${i2i_instrs[@]}; do 2947ec681f3Smrg echo "TEST: $instr" 2957ec681f3Smrg i2i_asm $instr | ./computerator -g 1,1,1 | check_results 2967ec681f3Smrgdone 2977ec681f3Smrg 298