17ec681f3Smrg#!/bin/bash
27ec681f3Smrg#
37ec681f3Smrg# Test various instructions to check whether half<->full widening/narrowing
47ec681f3Smrg# works.  The basic premise is to perform the same instruction with and
57ec681f3Smrg# without the widening/narrowing folded in and check if the results match.
67ec681f3Smrg#
77ec681f3Smrg# Note this doesn't currently diferentiate between signed/unsigned/bool,
87ec681f3Smrg# and just assumes int is signed (since unsigned is basically(ish) like
97ec681f3Smrg# signed but without sign extension)
107ec681f3Smrg#
117ec681f3Smrg# TODO probably good pick numeric src values that are better at triggering
127ec681f3Smrg# edge cases, while still not loosing precision in a full->half->full
137ec681f3Smrg# seqeuence.. but some instructions like absneg don't even appear to be
147ec681f3Smrg# subtlely wrong when you try to fold in a precision conversion.
157ec681f3Smrg#
167ec681f3Smrg# add '-v' arg to see the result values
177ec681f3Smrg
187ec681f3Smrgset -e
197ec681f3Smrg
207ec681f3Smrg#
217ec681f3Smrg# Templates for float->float instructions:
227ec681f3Smrg#
237ec681f3Smrgf2f_instrs=(
247ec681f3Smrg	'add.f $dst, $src1, $src2'
257ec681f3Smrg	'min.f $dst, $src1, $src2'
267ec681f3Smrg	'min.f $dst, $src2, $src1'
277ec681f3Smrg	'max.f $dst, $src1, $src2'
287ec681f3Smrg	'max.f $dst, $src2, $src1'
297ec681f3Smrg	'mul.f $dst, $src1, $src2'
307ec681f3Smrg	'sign.f $dst, $src1'
317ec681f3Smrg	'absneg.f $dst, \(neg\)$src1'
327ec681f3Smrg	'absneg.f $dst, \(abs\)$src1'
337ec681f3Smrg	'floor.f $dst, $src1'
347ec681f3Smrg	'ceil.f $dst, $src1'
357ec681f3Smrg	'rndne.f $dst, $src1'
367ec681f3Smrg	'rndaz.f $dst, $src1'
377ec681f3Smrg	'trunc.f $dst, $src1'
387ec681f3Smrg)
397ec681f3Smrg
407ec681f3Smrg#
417ec681f3Smrg# Templates for float->int instructions:
427ec681f3Smrg#
437ec681f3Smrgf2i_instrs=(
447ec681f3Smrg	'cmps.f.gt $dst, $src1, $src2'
457ec681f3Smrg	'cmps.f.lt $dst, $src1, $src2'
467ec681f3Smrg	'cmpv.f.gt $dst, $src1, $src2'
477ec681f3Smrg	'cmpv.f.lt $dst, $src1, $src2'
487ec681f3Smrg)
497ec681f3Smrg
507ec681f3Smrg#
517ec681f3Smrg# Templates for int->int instructions:
527ec681f3Smrg#
537ec681f3Smrgi2i_instrs=(
547ec681f3Smrg	'add.u $dst, $src1, $src2'
557ec681f3Smrg	'add.s $dst, $src1, $src2'
567ec681f3Smrg	'sub.u $dst, $src1, $src2'
577ec681f3Smrg	'sub.s $dst, $src1, $src2'
587ec681f3Smrg	'cmps.f.gt $dst, $src1, $src2'
597ec681f3Smrg	'cmps.f.lt $dst, $src1, $src2'
607ec681f3Smrg	'min.u $dst, $src1, $src2'
617ec681f3Smrg	'min.u $dst, $src2, $src1'
627ec681f3Smrg	'min.s $dst, $src1, $src2'
637ec681f3Smrg	'min.s $dst, $src2, $src1'
647ec681f3Smrg	'max.u $dst, $src1, $src2'
657ec681f3Smrg	'max.u $dst, $src2, $src1'
667ec681f3Smrg	'max.s $dst, $src1, $src2'
677ec681f3Smrg	'max.s $dst, $src2, $src1'
687ec681f3Smrg	'absneg.s $dst, \(neg\)$src1'
697ec681f3Smrg	'absneg.s $dst, \(abs\)$src1'
707ec681f3Smrg	'and.b $dst, $src2, $src3'
717ec681f3Smrg	'or.b $dst, $src1, $src2'
727ec681f3Smrg	'not.b $dst, $src1'
737ec681f3Smrg	'xor.b $dst, $src1, $src2'
747ec681f3Smrg	'cmpv.u.gt $dst, $src1, $src2'
757ec681f3Smrg	'cmpv.u.lt $dst, $src1, $src2'
767ec681f3Smrg	'cmpv.s.gt $dst, $src1, $src2'
777ec681f3Smrg	'cmpv.s.lt $dst, $src1, $src2'
787ec681f3Smrg	'mul.u24 $dst, $src1, $src2'
797ec681f3Smrg	'mul.s24 $dst, $src1, $src2'
807ec681f3Smrg	'mull.u $dst, $src1, $src2'
817ec681f3Smrg	'bfrev.b $dst, $src1'
827ec681f3Smrg	'clz.s $dst, $src2'
837ec681f3Smrg	'clz.b $dst, $src2'
847ec681f3Smrg	'shl.b $dst, $src1, $src2'
857ec681f3Smrg	'shr.b $dst, $src3, $src1'
867ec681f3Smrg	'ashr.b $dst, $src3, $src1'
877ec681f3Smrg	'mgen.b $dst, $src1, $src2'
887ec681f3Smrg	'getbit.b $dst, $src3, $src2'
897ec681f3Smrg	'setrm $dst, $src1'
907ec681f3Smrg	'cbits.b $dst, $src3'
917ec681f3Smrg	'shb $dst, $src1, $src2'
927ec681f3Smrg	'msad $dst, $src1, $src2'
937ec681f3Smrg)
947ec681f3Smrg
957ec681f3Smrg#
967ec681f3Smrg# Helper to expand instruction template:
977ec681f3Smrg#
987ec681f3Smrgexpand() {
997ec681f3Smrg	instr=$1
1007ec681f3Smrg	dst=$2
1017ec681f3Smrg	src1=$3
1027ec681f3Smrg	src2=$4
1037ec681f3Smrg	src3=$5
1047ec681f3Smrg	eval echo $instr
1057ec681f3Smrg}
1067ec681f3Smrg
1077ec681f3Smrgexpand_test() {
1087ec681f3Smrg	instr=$1
1097ec681f3Smrg
1107ec681f3Smrg	echo '; control, half->half:'
1117ec681f3Smrg	expand $instr "hr1.x" "hr0.x" "hr0.y" "hr0.z"
1127ec681f3Smrg	echo '; test, full->half:'
1137ec681f3Smrg	expand $instr "hr1.y" "r1.x" "r1.y" "r1.z"
1147ec681f3Smrg
1157ec681f3Smrg	echo '; control, full->full:'
1167ec681f3Smrg	expand $instr "r2.x" "r1.x" "r1.y" "r1.z"
1177ec681f3Smrg	echo '; test, half->full:'
1187ec681f3Smrg	expand $instr "r2.y" "hr0.x" "hr0.y" "hr0.z"
1197ec681f3Smrg
1207ec681f3Smrg	echo "(rpt5)nop"
1217ec681f3Smrg}
1227ec681f3Smrg
1237ec681f3Smrg#
1247ec681f3Smrg# Helpers to construct test program assembly:
1257ec681f3Smrg#
1267ec681f3Smrgheader_asm() {
1277ec681f3Smrg	cat <<EOF
1287ec681f3Smrg@localsize 1, 1, 1
1297ec681f3Smrg@buf 4  ; g[0]
1307ec681f3SmrgEOF
1317ec681f3Smrg}
1327ec681f3Smrg
1337ec681f3Smrgfooter_asm() {
1347ec681f3Smrg	cat <<EOF
1357ec681f3Smrg; dest offsets:
1367ec681f3Smrgmov.u32u32 r3.x, 0
1377ec681f3Smrgmov.u32u32 r3.y, 1
1387ec681f3Smrgmov.u32u32 r3.z, 2
1397ec681f3Smrgmov.u32u32 r3.w, 3
1407ec681f3Smrg(rpt5)nop
1417ec681f3Smrg
1427ec681f3Smrg; and store results:
1437ec681f3Smrgstib.untyped.1d.u32.1 r2.x, r3.x, 0   ; control: full->full
1447ec681f3Smrgstib.untyped.1d.u32.1 r2.y, r3.y, 0   ; test:    half->full
1457ec681f3Smrgstib.untyped.1d.u32.1 r2.z, r3.z, 0   ; control: half->half
1467ec681f3Smrgstib.untyped.1d.u32.1 r2.w, r3.w, 0   ; test:    full->half
1477ec681f3Smrg(sy)nop
1487ec681f3Smrgend
1497ec681f3SmrgEOF
1507ec681f3Smrg}
1517ec681f3Smrg
1527ec681f3Smrgsetup_asm_float() {
1537ec681f3Smrg	cat <<EOF
1547ec681f3Smrg; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
1557ec681f3Smrg; r1->r2 avail for full, r1 for src, r2 for dst
1567ec681f3Smrgcov.f32f16 hr0.x, (1.0)
1577ec681f3Smrgcov.f32f16 hr0.y, (2.0)
1587ec681f3Smrgcov.f32f16 hr0.z, (3.0)
1597ec681f3Smrgmov.f32f32 r1.x,  (1.0)
1607ec681f3Smrgmov.f32f32 r1.y,  (2.0)
1617ec681f3Smrgmov.f32f32 r1.z,  (3.0)
1627ec681f3Smrg(rpt5)nop
1637ec681f3SmrgEOF
1647ec681f3Smrg}
1657ec681f3Smrg
1667ec681f3Smrgsetup_asm_int() {
1677ec681f3Smrg	cat <<EOF
1687ec681f3Smrg; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
1697ec681f3Smrg; r1->r2 avail for full, r1 for src, r2 for dst
1707ec681f3Smrgcov.s32s16 hr0.x,  1
1717ec681f3Smrgcov.s32s16 hr0.y, -2
1727ec681f3Smrgcov.s32s16 hr0.z,  3
1737ec681f3Smrgmov.s32s32 r1.x,   1
1747ec681f3Smrgmov.s32s32 r1.y,  -2
1757ec681f3Smrgmov.s32s32 r1.z,   3
1767ec681f3Smrg(rpt5)nop
1777ec681f3SmrgEOF
1787ec681f3Smrg}
1797ec681f3Smrg
1807ec681f3Smrg#
1817ec681f3Smrg# Generate assembly code to test float->float opcode
1827ec681f3Smrg#
1837ec681f3Smrgf2f_asm() {
1847ec681f3Smrg	instr=$1
1857ec681f3Smrg
1867ec681f3Smrg	header_asm
1877ec681f3Smrg	setup_asm_float
1887ec681f3Smrg	expand_test $instr
1897ec681f3Smrg
1907ec681f3Smrg	cat <<EOF
1917ec681f3Smrg; convert half results back to full:
1927ec681f3Smrgcov.f16f32 r2.z, hr1.x
1937ec681f3Smrgcov.f16f32 r2.w, hr1.y
1947ec681f3SmrgEOF
1957ec681f3Smrg
1967ec681f3Smrg	footer_asm
1977ec681f3Smrg}
1987ec681f3Smrg
1997ec681f3Smrg#
2007ec681f3Smrg# Generate assembly code to test float->int opcode
2017ec681f3Smrg#
2027ec681f3Smrgf2i_asm() {
2037ec681f3Smrg	instr=$1
2047ec681f3Smrg
2057ec681f3Smrg	header_asm
2067ec681f3Smrg	setup_asm_float
2077ec681f3Smrg	expand_test $instr
2087ec681f3Smrg
2097ec681f3Smrg	cat <<EOF
2107ec681f3Smrg; convert half results back to full:
2117ec681f3Smrgcov.s16s32 r2.z, hr1.x
2127ec681f3Smrgcov.s16s32 r2.w, hr1.y
2137ec681f3SmrgEOF
2147ec681f3Smrg
2157ec681f3Smrg	footer_asm
2167ec681f3Smrg}
2177ec681f3Smrg
2187ec681f3Smrg#
2197ec681f3Smrg# Generate assembly code to test int->int opcode
2207ec681f3Smrg#
2217ec681f3Smrgi2i_asm() {
2227ec681f3Smrg	instr=$1
2237ec681f3Smrg
2247ec681f3Smrg	header_asm
2257ec681f3Smrg	setup_asm_int
2267ec681f3Smrg	expand_test $instr
2277ec681f3Smrg
2287ec681f3Smrg	cat <<EOF
2297ec681f3Smrg; convert half results back to full:
2307ec681f3Smrgcov.s16s32 r2.z, hr1.x
2317ec681f3Smrgcov.s16s32 r2.w, hr1.y
2327ec681f3SmrgEOF
2337ec681f3Smrg
2347ec681f3Smrg	footer_asm
2357ec681f3Smrg}
2367ec681f3Smrg
2377ec681f3Smrg
2387ec681f3Smrg#
2397ec681f3Smrg# Helper to parse computerator output and print results:
2407ec681f3Smrg#
2417ec681f3Smrgcheck_results() {
2427ec681f3Smrg	str=`cat - | grep "	" | head -1 | xargs`
2437ec681f3Smrg
2447ec681f3Smrg	if [ "$verbose" = "true" ]; then
2457ec681f3Smrg		echo $str
2467ec681f3Smrg	fi
2477ec681f3Smrg
2487ec681f3Smrg	# Split components of result buffer:
2497ec681f3Smrg	cf=$(echo $str | cut -f1 -d' ')
2507ec681f3Smrg	tf=$(echo $str | cut -f2 -d' ')
2517ec681f3Smrg	ch=$(echo $str | cut -f3 -d' ')
2527ec681f3Smrg	th=$(echo $str | cut -f4 -d' ')
2537ec681f3Smrg
2547ec681f3Smrg	# Sanity test, make sure the control results match:
2557ec681f3Smrg	if [ $cf != $ch ]; then
2567ec681f3Smrg		echo "    FAIL: control results do not match!  Half vs full op is not equivalent!"
2577ec681f3Smrg		echo "    full=$cf half=$ch"
2587ec681f3Smrg	fi
2597ec681f3Smrg
2607ec681f3Smrg	# Compare test (with conversion folded) to control:
2617ec681f3Smrg	if [ $cf != $tf ]; then
2627ec681f3Smrg		echo "    FAIL: half -> full widening result does not match control!"
2637ec681f3Smrg		echo "    control=$cf result=$tf"
2647ec681f3Smrg	fi
2657ec681f3Smrg	if [ $ch != $th ]; then
2667ec681f3Smrg		echo "    FAIL: full -> half narrowing result does not match control!"
2677ec681f3Smrg		echo "    control=$ch result=$th"
2687ec681f3Smrg	fi
2697ec681f3Smrg
2707ec681f3Smrg	# HACK without a delay different invocations
2717ec681f3Smrg	# of computerator seem to somehow clobber each
2727ec681f3Smrg	# other.. which isn't great..
2737ec681f3Smrg	sleep 0.1
2747ec681f3Smrg}
2757ec681f3Smrg
2767ec681f3Smrg#
2777ec681f3Smrg# Run the tests!
2787ec681f3Smrg#
2797ec681f3Smrg
2807ec681f3Smrgif [ "$1" = "-v" ]; then
2817ec681f3Smrg	verbose="true"
2827ec681f3Smrgfi
2837ec681f3Smrg
2847ec681f3SmrgIFS=""
2857ec681f3Smrgfor instr in ${f2f_instrs[@]}; do
2867ec681f3Smrg	echo "TEST: $instr"
2877ec681f3Smrg	f2f_asm $instr | ./computerator -g 1,1,1 | check_results
2887ec681f3Smrgdone
2897ec681f3Smrgfor instr in ${f2i_instrs[@]}; do
2907ec681f3Smrg	echo "TEST: $instr"
2917ec681f3Smrg	f2i_asm $instr | ./computerator -g 1,1,1 | check_results
2927ec681f3Smrgdone
2937ec681f3Smrgfor instr in ${i2i_instrs[@]}; do
2947ec681f3Smrg	echo "TEST: $instr"
2957ec681f3Smrg	i2i_asm $instr | ./computerator -g 1,1,1 | check_results
2967ec681f3Smrgdone
2977ec681f3Smrg
298