17ec681f3Smrg#!/bin/bash
27ec681f3Smrg
37ec681f3Smrg# This tests for the size of the register file. We do this by launching a
47ec681f3Smrg# lot of workgroups with only one invocation, which causes the GPU to be
57ec681f3Smrg# saturated with in-flight waves. Each thread records its wave id using "getwid"
67ec681f3Smrg# (only available in a6xx+!) and stores it in the buffer. We then vary the
77ec681f3Smrg# register footprint by introducing uses of higher and higher registers. This
87ec681f3Smrg# lets us determine:
97ec681f3Smrg# 1. The total number of waves available (always 16 for known models)
107ec681f3Smrg# 2. The wave granularity (how many waves are always launched together, always 2
117ec681f3Smrg# for known models).
127ec681f3Smrg# 3. The total size of the register file that is divvied up between the waves.
137ec681f3Smrg
147ec681f3Smrgset -e
157ec681f3Smrg
167ec681f3Smrggen_shader() {
177ec681f3Smrg	n=$1;
187ec681f3Smrg	cat <<EOF
197ec681f3Smrg@localsize 1, 1, 1
207ec681f3Smrg@buf 128  ; g[0]
217ec681f3Smrg@wgid(r48.x)
227ec681f3Smrggetwid.u32 r1.x
237ec681f3Smrgmov.u32u32 r0.x, r48.x
247ec681f3Smrg
257ec681f3Smrg; busy loop to make sure it actually uses all possible waves
267ec681f3Smrgmov.u32u32 r0.y, 16
277ec681f3Smrg(rpt2)nop
287ec681f3Smrgloop:
297ec681f3Smrgcmps.u.gt p0.x, r0.y, 0
307ec681f3Smrgsub.u r0.y, r0.y, 1
317ec681f3Smrg(rpt5)nop
327ec681f3Smrgbr p0.x, #loop
337ec681f3Smrgadd.f r1.y, r1.x, r$n.w
347ec681f3Smrg
357ec681f3Smrg(ss)(sy)(rpt5)nop
367ec681f3Smrgstib.b.untyped.1d.u32.1.imm r1.x, r0.x, 0
377ec681f3Smrgend
387ec681f3Smrgnop
397ec681f3SmrgEOF
407ec681f3Smrg}
417ec681f3Smrg
427ec681f3Smrg# generate reference:
437ec681f3Smrggen_shader 1 | ./computerator -g 128,1,1 | tee reference.log
447ec681f3Smrg
457ec681f3Smrgfor n in `seq 2 32`; do
467ec681f3Smrg	echo "Trying max reg: r$n"
477ec681f3Smrg	gen_shader $n | ./computerator -g 128,1,1 | tee new.log
487ec681f3Smrg	diff reference.log new.log
497ec681f3Smrg	if [ "$?" != "0" ]; then
507ec681f3Smrg		echo "Changes at r$n"
517ec681f3Smrg		break
527ec681f3Smrg	fi
537ec681f3Smrgdone
54