17ec681f3Smrg-- Parse cmdstream dump and analyse blits and batches
27ec681f3Smrg
37ec681f3Smrg--local posix = require "posix"
47ec681f3Smrg
57ec681f3Smrgfunction printf(fmt, ...)
67ec681f3Smrg	return io.write(string.format(fmt, ...))
77ec681f3Smrgend
87ec681f3Smrg
97ec681f3Smrgfunction dbg(fmt, ...)
107ec681f3Smrg	--printf(fmt, ...)
117ec681f3Smrgend
127ec681f3Smrg
137ec681f3Smrgprintf("Analyzing Data...\n")
147ec681f3Smrg
157ec681f3Smrglocal r = rnn.init("a630")
167ec681f3Smrg
177ec681f3Smrg-- Each submit, all draws will target the same N MRTs:
187ec681f3Smrglocal mrts = {}
197ec681f3Smrglocal allmrts = {}  -- includes historical render targets
207ec681f3Smrgfunction push_mrt(fmt, w, h, samples, base, flag, gmem)
217ec681f3Smrg	dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base)
227ec681f3Smrg
237ec681f3Smrg	local mrt = {}
247ec681f3Smrg	mrt.format = fmt
257ec681f3Smrg	mrt.w = w
267ec681f3Smrg	mrt.h = h
277ec681f3Smrg	mrt.samples = samples
287ec681f3Smrg	mrt.base = base
297ec681f3Smrg	mrt.flag = flag
307ec681f3Smrg	mrt.gmem = gmem
317ec681f3Smrg
327ec681f3Smrg	mrts[base] = mrt
337ec681f3Smrg	allmrts[base] = mrt
347ec681f3Smrgend
357ec681f3Smrg
367ec681f3Smrg-- And each each draw will read from M sources/textures:
377ec681f3Smrglocal sources = {}
387ec681f3Smrgfunction push_source(fmt, w, h, samples, base, flag)
397ec681f3Smrg	dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base)
407ec681f3Smrg
417ec681f3Smrg	local source = {}
427ec681f3Smrg	source.format = fmt
437ec681f3Smrg	source.w = w
447ec681f3Smrg	source.h = h
457ec681f3Smrg	source.samples = samples
467ec681f3Smrg	source.base = base
477ec681f3Smrg	source.flag = flag
487ec681f3Smrg
497ec681f3Smrg	sources[base] = source
507ec681f3Smrgend
517ec681f3Smrg
527ec681f3Smrglocal binw
537ec681f3Smrglocal binh
547ec681f3Smrglocal nbins
557ec681f3Smrglocal blits = 0
567ec681f3Smrglocal draws = 0
577ec681f3Smrglocal drawmode
587ec681f3Smrglocal cleared
597ec681f3Smrglocal restored
607ec681f3Smrglocal resolved
617ec681f3Smrglocal nullbatch
627ec681f3Smrglocal depthtest
637ec681f3Smrglocal depthwrite
647ec681f3Smrglocal stenciltest
657ec681f3Smrglocal stencilwrite
667ec681f3Smrg
677ec681f3Smrgfunction reset()
687ec681f3Smrg	dbg("reset\n")
697ec681f3Smrg	mrts = {}
707ec681f3Smrg	sources = {}
717ec681f3Smrg	draws = 0
727ec681f3Smrg	blits = 0
737ec681f3Smrg	cleared = {}
747ec681f3Smrg	restored = {}
757ec681f3Smrg	resolved = {}
767ec681f3Smrg	depthtest = false
777ec681f3Smrg	depthwrite = false
787ec681f3Smrg	stenciltest = false
797ec681f3Smrg	stencilwrite = false
807ec681f3Smrg	drawmode = Nil
817ec681f3Smrgend
827ec681f3Smrg
837ec681f3Smrgfunction start_submit()
847ec681f3Smrg	dbg("start_submit\n")
857ec681f3Smrg	reset()
867ec681f3Smrg	nullbatch = true
877ec681f3Smrgend
887ec681f3Smrg
897ec681f3Smrgfunction finish()
907ec681f3Smrg	dbg("finish\n")
917ec681f3Smrg
927ec681f3Smrg	printf("\n")
937ec681f3Smrg
947ec681f3Smrg	-- TODO we get false-positives for 'NULL BATCH!' because we don't have
957ec681f3Smrg	-- a really good way to differentiate between submits and cmds.  Ie.
967ec681f3Smrg	-- with growable cmdstream, and a large # of tiles, IB1 can get split
977ec681f3Smrg	-- across multiple buffers.  Since we ignore GMEM draws for window-
987ec681f3Smrg	-- offset != 0,0, the later cmds will appear as null batches
997ec681f3Smrg	if draws == 0 and blits == 0 then
1007ec681f3Smrg		if nullbatch then
1017ec681f3Smrg			printf("NULL BATCH!\n");
1027ec681f3Smrg		end
1037ec681f3Smrg		return
1047ec681f3Smrg	end
1057ec681f3Smrg
1067ec681f3Smrg	if draws > 0 then
1077ec681f3Smrg		printf("Batch:\n")
1087ec681f3Smrg		printf("-------\n")
1097ec681f3Smrg		printf("  # of draws: %u\n", draws)
1107ec681f3Smrg		printf("  mode: %s\n", drawmode)
1117ec681f3Smrg		if drawmode == "RM6_GMEM" then
1127ec681f3Smrg			printf("  bin size: %ux%u (%u bins)\n", binw, binh, nbins)
1137ec681f3Smrg		end
1147ec681f3Smrg		if depthtest or depthwrite then
1157ec681f3Smrg			printf("  ")
1167ec681f3Smrg			if depthtest then
1177ec681f3Smrg				printf("DEPTHTEST ")
1187ec681f3Smrg			end
1197ec681f3Smrg			if depthwrite then
1207ec681f3Smrg				printf("DEPTHWRITE")
1217ec681f3Smrg			end
1227ec681f3Smrg			printf("\n")
1237ec681f3Smrg		end
1247ec681f3Smrg		if stenciltest or stencilwrite then
1257ec681f3Smrg			printf("  ")
1267ec681f3Smrg			if stenciltest then
1277ec681f3Smrg				printf("STENCILTEST ")
1287ec681f3Smrg			end
1297ec681f3Smrg			if stencilwrite then
1307ec681f3Smrg				printf("STENCILWRITE")
1317ec681f3Smrg			end
1327ec681f3Smrg			printf("\n")
1337ec681f3Smrg		end
1347ec681f3Smrg	else
1357ec681f3Smrg		printf("Blit:\n")
1367ec681f3Smrg		printf("-----\n")
1377ec681f3Smrg	end
1387ec681f3Smrg
1397ec681f3Smrg	for base,mrt in pairs(mrts) do
1407ec681f3Smrg		printf("  MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples)
1417ec681f3Smrg		if drawmode == "RM6_GMEM" then
1427ec681f3Smrg			if cleared[mrt.gmem] then
1437ec681f3Smrg				printf("\tCLEARED")
1447ec681f3Smrg			end
1457ec681f3Smrg			if restored[mrt.gmem] then
1467ec681f3Smrg				printf("\tRESTORED")
1477ec681f3Smrg			end
1487ec681f3Smrg			if resolved[mrt.gmem] then
1497ec681f3Smrg				printf("\tRESOLVED")
1507ec681f3Smrg			end
1517ec681f3Smrg		else
1527ec681f3Smrg			if cleared[mrt.base] then
1537ec681f3Smrg				printf("\tCLEARED")
1547ec681f3Smrg			end
1557ec681f3Smrg		end
1567ec681f3Smrg		printf("\n")
1577ec681f3Smrg	end
1587ec681f3Smrg
1597ec681f3Smrg	function print_source(source)
1607ec681f3Smrg		printf("  SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples)
1617ec681f3Smrg	end
1627ec681f3Smrg
1637ec681f3Smrg	for base,source in pairs(sources) do
1647ec681f3Smrg		-- only show sources that have been previously rendered to, other
1657ec681f3Smrg		-- textures are less interesting.  Possibly this should be an
1667ec681f3Smrg		-- option somehow
1677ec681f3Smrg		if draws < 10 then
1687ec681f3Smrg			print_source(source)
1697ec681f3Smrg		elseif allmrts[base] or draws == 0 then
1707ec681f3Smrg			print_source(source)
1717ec681f3Smrg		elseif source.flag and allmrts[source.flag] then
1727ec681f3Smrg			print_source(source)
1737ec681f3Smrg		end
1747ec681f3Smrg	end
1757ec681f3Smrg	reset()
1767ec681f3Smrgend
1777ec681f3Smrg
1787ec681f3Smrgfunction end_submit()
1797ec681f3Smrg	dbg("end_submit\n")
1807ec681f3Smrg	finish()
1817ec681f3Smrgend
1827ec681f3Smrg
1837ec681f3Smrg-- Track the current mode:
1847ec681f3Smrglocal mode = ""
1857ec681f3Smrgfunction CP_SET_MARKER(pkt, size)
1867ec681f3Smrg	mode = pkt[0].MARKER
1877ec681f3Smrg	dbg("mode: %s\n", mode)
1887ec681f3Smrgend
1897ec681f3Smrg
1907ec681f3Smrgfunction CP_EVENT_WRITE(pkt, size)
1917ec681f3Smrg	if tostring(pkt[0].EVENT) ~= "BLIT" then
1927ec681f3Smrg		return
1937ec681f3Smrg	end
1947ec681f3Smrg	nullbatch = false
1957ec681f3Smrg	local m = tostring(mode)
1967ec681f3Smrg	if m == "RM6_GMEM" then
1977ec681f3Smrg		-- either clear or restore:
1987ec681f3Smrg		if r.RB_BLIT_INFO.CLEAR_MASK == 0 then
1997ec681f3Smrg			restored[r.RB_BLIT_BASE_GMEM] = 1
2007ec681f3Smrg		else
2017ec681f3Smrg			cleared[r.RB_BLIT_BASE_GMEM] = 1
2027ec681f3Smrg		end
2037ec681f3Smrg		-- push_mrt() because we could have GMEM
2047ec681f3Smrg		-- passes with only a clear and no draws:
2057ec681f3Smrg		local flag = 0
2067ec681f3Smrg		local sysmem = 0;
2077ec681f3Smrg		-- try to match up the GMEM addr with the MRT/DEPTH state,
2087ec681f3Smrg		-- to avoid relying on RB_BLIT_DST also getting written:
2097ec681f3Smrg		for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
2107ec681f3Smrg			if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then
2117ec681f3Smrg				sysmem = r.RB_MRT[n].BASE
2127ec681f3Smrg				flag = r.RB_MRT_FLAG_BUFFER[n].ADDR
2137ec681f3Smrg				break
2147ec681f3Smrg			end
2157ec681f3Smrg		end
2167ec681f3Smrg		if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then
2177ec681f3Smrg			sysmem = r.RB_DEPTH_BUFFER_BASE
2187ec681f3Smrg			flag = r.RB_DEPTH_FLAG_BUFFER_BASE
2197ec681f3Smrg
2207ec681f3Smrg		end
2217ec681f3Smrg		--NOTE this can get confused by previous blits:
2227ec681f3Smrg		--if sysmem == 0 then
2237ec681f3Smrg		--	-- fallback:
2247ec681f3Smrg		--	sysmem = r.RB_BLIT_DST
2257ec681f3Smrg		--	flag = r.RB_BLIT_FLAG_DST
2267ec681f3Smrg		--end
2277ec681f3Smrg		if not r.RB_BLIT_DST_INFO.FLAGS then
2287ec681f3Smrg			flag = 0
2297ec681f3Smrg		end
2307ec681f3Smrg		-- TODO maybe just emit RB_BLIT_DST/HI for clears.. otherwise
2317ec681f3Smrg		-- we get confused by stale values in registers.. not sure
2327ec681f3Smrg		-- if this is a problem w/ blob
2337ec681f3Smrg		push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT,
2347ec681f3Smrg			r.RB_BLIT_SCISSOR_BR.X + 1,
2357ec681f3Smrg			r.RB_BLIT_SCISSOR_BR.Y + 1,
2367ec681f3Smrg			r.RB_BLIT_DST_INFO.SAMPLES,
2377ec681f3Smrg			sysmem,
2387ec681f3Smrg			flag,
2397ec681f3Smrg			r.RB_BLIT_BASE_GMEM)
2407ec681f3Smrg	elseif m == "RM6_RESOLVE" then
2417ec681f3Smrg		resolved[r.RB_BLIT_BASE_GMEM] = 1
2427ec681f3Smrg	else
2437ec681f3Smrg		printf("I am confused!!!\n")
2447ec681f3Smrg	end
2457ec681f3Smrgend
2467ec681f3Smrg
2477ec681f3Smrgfunction A6XX_TEX_CONST(pkt, size)
2487ec681f3Smrg	push_source(pkt[0].FMT,
2497ec681f3Smrg		pkt[1].WIDTH, pkt[1].HEIGHT,
2507ec681f3Smrg		pkt[0].SAMPLES,
2517ec681f3Smrg		pkt[4].BASE_LO | (pkt[5].BASE_HI << 32),
2527ec681f3Smrg		pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32))
2537ec681f3Smrgend
2547ec681f3Smrg
2557ec681f3Smrgfunction handle_blit()
2567ec681f3Smrg	-- blob sometimes uses CP_BLIT for resolves, so filter those out:
2577ec681f3Smrg	-- TODO it would be nice to not hard-code GMEM addr:
2587ec681f3Smrg	-- TODO I guess the src can be an offset from GMEM addr..
2597ec681f3Smrg	if r.SP_PS_2D_SRC == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then
2607ec681f3Smrg		resolved[0] = 1
2617ec681f3Smrg		return
2627ec681f3Smrg	end
2637ec681f3Smrg	if draws > 0 then
2647ec681f3Smrg		finish()
2657ec681f3Smrg	end
2667ec681f3Smrg	reset()
2677ec681f3Smrg	drawmode = "BLIT"
2687ec681f3Smrg	-- This kinda assumes that we are doing full img blits, which is maybe
2697ec681f3Smrg	-- Not completely legit.  We could perhaps instead just track pitch and
2707ec681f3Smrg	-- size/pitch??  Or maybe the size doesn't matter much
2717ec681f3Smrg	push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT,
2727ec681f3Smrg		r.GRAS_2D_DST_BR.X + 1,
2737ec681f3Smrg		r.GRAS_2D_DST_BR.Y + 1,
2747ec681f3Smrg		"MSAA_ONE",
2757ec681f3Smrg		r.RB_2D_DST,
2767ec681f3Smrg		r.RB_2D_DST_FLAGS,
2777ec681f3Smrg		-1)
2787ec681f3Smrg	if r.RB_2D_BLIT_CNTL.SOLID_COLOR then
2797ec681f3Smrg		dbg("CLEAR=%x\n", r.RB_2D_DST)
2807ec681f3Smrg		cleared[r.RB_2D_DST] = 1
2817ec681f3Smrg	else
2827ec681f3Smrg		push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT,
2837ec681f3Smrg			r.GRAS_2D_SRC_BR_X.X + 1,
2847ec681f3Smrg			r.GRAS_2D_SRC_BR_Y.Y + 1,
2857ec681f3Smrg			"MSAA_ONE",
2867ec681f3Smrg			r.SP_PS_2D_SRC,
2877ec681f3Smrg			r.SP_PS_2D_SRC_FLAGS)
2887ec681f3Smrg	end
2897ec681f3Smrg	blits = blits + 1
2907ec681f3Smrg	finish()
2917ec681f3Smrgend
2927ec681f3Smrg
2937ec681f3Smrgfunction valid_transition(curmode, newmode)
2947ec681f3Smrg	if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then
2957ec681f3Smrg		return true
2967ec681f3Smrg	end
2977ec681f3Smrg	if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then
2987ec681f3Smrg		return true
2997ec681f3Smrg	end
3007ec681f3Smrg	return false
3017ec681f3Smrgend
3027ec681f3Smrg
3037ec681f3Smrgfunction draw(primtype, nindx)
3047ec681f3Smrg	dbg("draw: %s (%s)\n", primtype, mode)
3057ec681f3Smrg	nullbatch = false
3067ec681f3Smrg	if primtype == "BLIT_OP_SCALE" then
3077ec681f3Smrg		handle_blit()
3087ec681f3Smrg		return
3097ec681f3Smrg	elseif primtype == "EVENT:BLIT" then
3107ec681f3Smrg		return
3117ec681f3Smrg	end
3127ec681f3Smrg
3137ec681f3Smrg	local m = tostring(mode)
3147ec681f3Smrg
3157ec681f3Smrg	-- detect changes in drawmode which indicate a different
3167ec681f3Smrg	-- pass..  BINNING->GMEM means same pass, but other
3177ec681f3Smrg	-- transitions mean different pass:
3187ec681f3Smrg	if drawmode and m ~= drawmode then
3197ec681f3Smrg		dbg("%s -> %s transition\n", drawmode, m)
3207ec681f3Smrg		if not valid_transition(drawmode, m) then
3217ec681f3Smrg			dbg("invalid transition, new render pass!\n")
3227ec681f3Smrg			finish()
3237ec681f3Smrg			reset()
3247ec681f3Smrg		end
3257ec681f3Smrg	end
3267ec681f3Smrg
3277ec681f3Smrg	if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then
3287ec681f3Smrg		if m == "RM6_BINNING" then
3297ec681f3Smrg			drawmode = m
3307ec681f3Smrg			return
3317ec681f3Smrg		end
3327ec681f3Smrg		if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then
3337ec681f3Smrg			return
3347ec681f3Smrg		end
3357ec681f3Smrg		printf("unknown MODE %s for primtype %s\n", m, primtype)
3367ec681f3Smrg		return
3377ec681f3Smrg	end
3387ec681f3Smrg
3397ec681f3Smrg	-- Only count the first tile for GMEM mode to avoid counting
3407ec681f3Smrg	-- each draw for each tile
3417ec681f3Smrg	if m == "RM6_GMEM" then
3427ec681f3Smrg		if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then
3437ec681f3Smrg			return
3447ec681f3Smrg		end
3457ec681f3Smrg	end
3467ec681f3Smrg
3477ec681f3Smrg	drawmode = m
3487ec681f3Smrg	local render_components = {}
3497ec681f3Smrg	render_components[0] = r.RB_RENDER_COMPONENTS.RT0;
3507ec681f3Smrg	render_components[1] = r.RB_RENDER_COMPONENTS.RT1;
3517ec681f3Smrg	render_components[2] = r.RB_RENDER_COMPONENTS.RT2;
3527ec681f3Smrg	render_components[3] = r.RB_RENDER_COMPONENTS.RT3;
3537ec681f3Smrg	render_components[4] = r.RB_RENDER_COMPONENTS.RT4;
3547ec681f3Smrg	render_components[5] = r.RB_RENDER_COMPONENTS.RT5;
3557ec681f3Smrg	render_components[6] = r.RB_RENDER_COMPONENTS.RT6;
3567ec681f3Smrg	render_components[7] = r.RB_RENDER_COMPONENTS.RT7;
3577ec681f3Smrg	for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
3587ec681f3Smrg		if render_components[n] ~= 0 then
3597ec681f3Smrg			push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT,
3607ec681f3Smrg				r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
3617ec681f3Smrg				r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
3627ec681f3Smrg				r.RB_MSAA_CNTL.SAMPLES,
3637ec681f3Smrg				r.RB_MRT[n].BASE,
3647ec681f3Smrg				r.RB_MRT_FLAG_BUFFER[n].ADDR,
3657ec681f3Smrg				r.RB_MRT[n].BASE_GMEM)
3667ec681f3Smrg		end
3677ec681f3Smrg	end
3687ec681f3Smrg
3697ec681f3Smrg	local depthbase = r.RB_DEPTH_BUFFER_BASE
3707ec681f3Smrg
3717ec681f3Smrg	if depthbase ~= 0 then
3727ec681f3Smrg		push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT,
3737ec681f3Smrg			r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
3747ec681f3Smrg			r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
3757ec681f3Smrg			r.RB_MSAA_CNTL.SAMPLES,
3767ec681f3Smrg			depthbase,
3777ec681f3Smrg			r.RB_DEPTH_FLAG_BUFFER_BASE,
3787ec681f3Smrg			r.RB_DEPTH_BUFFER_BASE_GMEM)
3797ec681f3Smrg	end
3807ec681f3Smrg
3817ec681f3Smrg	if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then
3827ec681f3Smrg		depthwrite = true
3837ec681f3Smrg	end
3847ec681f3Smrg
3857ec681f3Smrg	if r.RB_DEPTH_CNTL.Z_TEST_ENABLE then
3867ec681f3Smrg		depthtest = true
3877ec681f3Smrg	end
3887ec681f3Smrg
3897ec681f3Smrg	-- clearly 0 != false.. :-/
3907ec681f3Smrg	if r.RB_STENCILWRMASK.WRMASK ~= 0 then
3917ec681f3Smrg		stencilwrite = true
3927ec681f3Smrg	end
3937ec681f3Smrg
3947ec681f3Smrg	if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then
3957ec681f3Smrg		stenciltest = true
3967ec681f3Smrg	end
3977ec681f3Smrg
3987ec681f3Smrg	-- TODO should also check for stencil buffer for z32+s8 case
3997ec681f3Smrg
4007ec681f3Smrg	if m == "RM6_GMEM" then
4017ec681f3Smrg		binw = r.VSC_BIN_SIZE.WIDTH
4027ec681f3Smrg		binh = r.VSC_BIN_SIZE.HEIGHT
4037ec681f3Smrg		nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY
4047ec681f3Smrg	end
4057ec681f3Smrg
4067ec681f3Smrg	draws = draws + 1
4077ec681f3Smrgend
4087ec681f3Smrg
409