17ec681f3Smrg-- Parse cmdstream dump and analyse blits and batches 27ec681f3Smrg 37ec681f3Smrg--local posix = require "posix" 47ec681f3Smrg 57ec681f3Smrgfunction printf(fmt, ...) 67ec681f3Smrg return io.write(string.format(fmt, ...)) 77ec681f3Smrgend 87ec681f3Smrg 97ec681f3Smrgfunction dbg(fmt, ...) 107ec681f3Smrg --printf(fmt, ...) 117ec681f3Smrgend 127ec681f3Smrg 137ec681f3Smrgprintf("Analyzing Data...\n") 147ec681f3Smrg 157ec681f3Smrglocal r = rnn.init("a630") 167ec681f3Smrg 177ec681f3Smrg-- Each submit, all draws will target the same N MRTs: 187ec681f3Smrglocal mrts = {} 197ec681f3Smrglocal allmrts = {} -- includes historical render targets 207ec681f3Smrgfunction push_mrt(fmt, w, h, samples, base, flag, gmem) 217ec681f3Smrg dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base) 227ec681f3Smrg 237ec681f3Smrg local mrt = {} 247ec681f3Smrg mrt.format = fmt 257ec681f3Smrg mrt.w = w 267ec681f3Smrg mrt.h = h 277ec681f3Smrg mrt.samples = samples 287ec681f3Smrg mrt.base = base 297ec681f3Smrg mrt.flag = flag 307ec681f3Smrg mrt.gmem = gmem 317ec681f3Smrg 327ec681f3Smrg mrts[base] = mrt 337ec681f3Smrg allmrts[base] = mrt 347ec681f3Smrgend 357ec681f3Smrg 367ec681f3Smrg-- And each each draw will read from M sources/textures: 377ec681f3Smrglocal sources = {} 387ec681f3Smrgfunction push_source(fmt, w, h, samples, base, flag) 397ec681f3Smrg dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base) 407ec681f3Smrg 417ec681f3Smrg local source = {} 427ec681f3Smrg source.format = fmt 437ec681f3Smrg source.w = w 447ec681f3Smrg source.h = h 457ec681f3Smrg source.samples = samples 467ec681f3Smrg source.base = base 477ec681f3Smrg source.flag = flag 487ec681f3Smrg 497ec681f3Smrg sources[base] = source 507ec681f3Smrgend 517ec681f3Smrg 527ec681f3Smrglocal binw 537ec681f3Smrglocal binh 547ec681f3Smrglocal nbins 557ec681f3Smrglocal blits = 0 567ec681f3Smrglocal draws = 0 577ec681f3Smrglocal drawmode 587ec681f3Smrglocal cleared 597ec681f3Smrglocal restored 607ec681f3Smrglocal resolved 617ec681f3Smrglocal nullbatch 627ec681f3Smrglocal depthtest 637ec681f3Smrglocal depthwrite 647ec681f3Smrglocal stenciltest 657ec681f3Smrglocal stencilwrite 667ec681f3Smrg 677ec681f3Smrgfunction reset() 687ec681f3Smrg dbg("reset\n") 697ec681f3Smrg mrts = {} 707ec681f3Smrg sources = {} 717ec681f3Smrg draws = 0 727ec681f3Smrg blits = 0 737ec681f3Smrg cleared = {} 747ec681f3Smrg restored = {} 757ec681f3Smrg resolved = {} 767ec681f3Smrg depthtest = false 777ec681f3Smrg depthwrite = false 787ec681f3Smrg stenciltest = false 797ec681f3Smrg stencilwrite = false 807ec681f3Smrg drawmode = Nil 817ec681f3Smrgend 827ec681f3Smrg 837ec681f3Smrgfunction start_submit() 847ec681f3Smrg dbg("start_submit\n") 857ec681f3Smrg reset() 867ec681f3Smrg nullbatch = true 877ec681f3Smrgend 887ec681f3Smrg 897ec681f3Smrgfunction finish() 907ec681f3Smrg dbg("finish\n") 917ec681f3Smrg 927ec681f3Smrg printf("\n") 937ec681f3Smrg 947ec681f3Smrg -- TODO we get false-positives for 'NULL BATCH!' because we don't have 957ec681f3Smrg -- a really good way to differentiate between submits and cmds. Ie. 967ec681f3Smrg -- with growable cmdstream, and a large # of tiles, IB1 can get split 977ec681f3Smrg -- across multiple buffers. Since we ignore GMEM draws for window- 987ec681f3Smrg -- offset != 0,0, the later cmds will appear as null batches 997ec681f3Smrg if draws == 0 and blits == 0 then 1007ec681f3Smrg if nullbatch then 1017ec681f3Smrg printf("NULL BATCH!\n"); 1027ec681f3Smrg end 1037ec681f3Smrg return 1047ec681f3Smrg end 1057ec681f3Smrg 1067ec681f3Smrg if draws > 0 then 1077ec681f3Smrg printf("Batch:\n") 1087ec681f3Smrg printf("-------\n") 1097ec681f3Smrg printf(" # of draws: %u\n", draws) 1107ec681f3Smrg printf(" mode: %s\n", drawmode) 1117ec681f3Smrg if drawmode == "RM6_GMEM" then 1127ec681f3Smrg printf(" bin size: %ux%u (%u bins)\n", binw, binh, nbins) 1137ec681f3Smrg end 1147ec681f3Smrg if depthtest or depthwrite then 1157ec681f3Smrg printf(" ") 1167ec681f3Smrg if depthtest then 1177ec681f3Smrg printf("DEPTHTEST ") 1187ec681f3Smrg end 1197ec681f3Smrg if depthwrite then 1207ec681f3Smrg printf("DEPTHWRITE") 1217ec681f3Smrg end 1227ec681f3Smrg printf("\n") 1237ec681f3Smrg end 1247ec681f3Smrg if stenciltest or stencilwrite then 1257ec681f3Smrg printf(" ") 1267ec681f3Smrg if stenciltest then 1277ec681f3Smrg printf("STENCILTEST ") 1287ec681f3Smrg end 1297ec681f3Smrg if stencilwrite then 1307ec681f3Smrg printf("STENCILWRITE") 1317ec681f3Smrg end 1327ec681f3Smrg printf("\n") 1337ec681f3Smrg end 1347ec681f3Smrg else 1357ec681f3Smrg printf("Blit:\n") 1367ec681f3Smrg printf("-----\n") 1377ec681f3Smrg end 1387ec681f3Smrg 1397ec681f3Smrg for base,mrt in pairs(mrts) do 1407ec681f3Smrg printf(" MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples) 1417ec681f3Smrg if drawmode == "RM6_GMEM" then 1427ec681f3Smrg if cleared[mrt.gmem] then 1437ec681f3Smrg printf("\tCLEARED") 1447ec681f3Smrg end 1457ec681f3Smrg if restored[mrt.gmem] then 1467ec681f3Smrg printf("\tRESTORED") 1477ec681f3Smrg end 1487ec681f3Smrg if resolved[mrt.gmem] then 1497ec681f3Smrg printf("\tRESOLVED") 1507ec681f3Smrg end 1517ec681f3Smrg else 1527ec681f3Smrg if cleared[mrt.base] then 1537ec681f3Smrg printf("\tCLEARED") 1547ec681f3Smrg end 1557ec681f3Smrg end 1567ec681f3Smrg printf("\n") 1577ec681f3Smrg end 1587ec681f3Smrg 1597ec681f3Smrg function print_source(source) 1607ec681f3Smrg printf(" SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples) 1617ec681f3Smrg end 1627ec681f3Smrg 1637ec681f3Smrg for base,source in pairs(sources) do 1647ec681f3Smrg -- only show sources that have been previously rendered to, other 1657ec681f3Smrg -- textures are less interesting. Possibly this should be an 1667ec681f3Smrg -- option somehow 1677ec681f3Smrg if draws < 10 then 1687ec681f3Smrg print_source(source) 1697ec681f3Smrg elseif allmrts[base] or draws == 0 then 1707ec681f3Smrg print_source(source) 1717ec681f3Smrg elseif source.flag and allmrts[source.flag] then 1727ec681f3Smrg print_source(source) 1737ec681f3Smrg end 1747ec681f3Smrg end 1757ec681f3Smrg reset() 1767ec681f3Smrgend 1777ec681f3Smrg 1787ec681f3Smrgfunction end_submit() 1797ec681f3Smrg dbg("end_submit\n") 1807ec681f3Smrg finish() 1817ec681f3Smrgend 1827ec681f3Smrg 1837ec681f3Smrg-- Track the current mode: 1847ec681f3Smrglocal mode = "" 1857ec681f3Smrgfunction CP_SET_MARKER(pkt, size) 1867ec681f3Smrg mode = pkt[0].MARKER 1877ec681f3Smrg dbg("mode: %s\n", mode) 1887ec681f3Smrgend 1897ec681f3Smrg 1907ec681f3Smrgfunction CP_EVENT_WRITE(pkt, size) 1917ec681f3Smrg if tostring(pkt[0].EVENT) ~= "BLIT" then 1927ec681f3Smrg return 1937ec681f3Smrg end 1947ec681f3Smrg nullbatch = false 1957ec681f3Smrg local m = tostring(mode) 1967ec681f3Smrg if m == "RM6_GMEM" then 1977ec681f3Smrg -- either clear or restore: 1987ec681f3Smrg if r.RB_BLIT_INFO.CLEAR_MASK == 0 then 1997ec681f3Smrg restored[r.RB_BLIT_BASE_GMEM] = 1 2007ec681f3Smrg else 2017ec681f3Smrg cleared[r.RB_BLIT_BASE_GMEM] = 1 2027ec681f3Smrg end 2037ec681f3Smrg -- push_mrt() because we could have GMEM 2047ec681f3Smrg -- passes with only a clear and no draws: 2057ec681f3Smrg local flag = 0 2067ec681f3Smrg local sysmem = 0; 2077ec681f3Smrg -- try to match up the GMEM addr with the MRT/DEPTH state, 2087ec681f3Smrg -- to avoid relying on RB_BLIT_DST also getting written: 2097ec681f3Smrg for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do 2107ec681f3Smrg if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then 2117ec681f3Smrg sysmem = r.RB_MRT[n].BASE 2127ec681f3Smrg flag = r.RB_MRT_FLAG_BUFFER[n].ADDR 2137ec681f3Smrg break 2147ec681f3Smrg end 2157ec681f3Smrg end 2167ec681f3Smrg if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then 2177ec681f3Smrg sysmem = r.RB_DEPTH_BUFFER_BASE 2187ec681f3Smrg flag = r.RB_DEPTH_FLAG_BUFFER_BASE 2197ec681f3Smrg 2207ec681f3Smrg end 2217ec681f3Smrg --NOTE this can get confused by previous blits: 2227ec681f3Smrg --if sysmem == 0 then 2237ec681f3Smrg -- -- fallback: 2247ec681f3Smrg -- sysmem = r.RB_BLIT_DST 2257ec681f3Smrg -- flag = r.RB_BLIT_FLAG_DST 2267ec681f3Smrg --end 2277ec681f3Smrg if not r.RB_BLIT_DST_INFO.FLAGS then 2287ec681f3Smrg flag = 0 2297ec681f3Smrg end 2307ec681f3Smrg -- TODO maybe just emit RB_BLIT_DST/HI for clears.. otherwise 2317ec681f3Smrg -- we get confused by stale values in registers.. not sure 2327ec681f3Smrg -- if this is a problem w/ blob 2337ec681f3Smrg push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT, 2347ec681f3Smrg r.RB_BLIT_SCISSOR_BR.X + 1, 2357ec681f3Smrg r.RB_BLIT_SCISSOR_BR.Y + 1, 2367ec681f3Smrg r.RB_BLIT_DST_INFO.SAMPLES, 2377ec681f3Smrg sysmem, 2387ec681f3Smrg flag, 2397ec681f3Smrg r.RB_BLIT_BASE_GMEM) 2407ec681f3Smrg elseif m == "RM6_RESOLVE" then 2417ec681f3Smrg resolved[r.RB_BLIT_BASE_GMEM] = 1 2427ec681f3Smrg else 2437ec681f3Smrg printf("I am confused!!!\n") 2447ec681f3Smrg end 2457ec681f3Smrgend 2467ec681f3Smrg 2477ec681f3Smrgfunction A6XX_TEX_CONST(pkt, size) 2487ec681f3Smrg push_source(pkt[0].FMT, 2497ec681f3Smrg pkt[1].WIDTH, pkt[1].HEIGHT, 2507ec681f3Smrg pkt[0].SAMPLES, 2517ec681f3Smrg pkt[4].BASE_LO | (pkt[5].BASE_HI << 32), 2527ec681f3Smrg pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32)) 2537ec681f3Smrgend 2547ec681f3Smrg 2557ec681f3Smrgfunction handle_blit() 2567ec681f3Smrg -- blob sometimes uses CP_BLIT for resolves, so filter those out: 2577ec681f3Smrg -- TODO it would be nice to not hard-code GMEM addr: 2587ec681f3Smrg -- TODO I guess the src can be an offset from GMEM addr.. 2597ec681f3Smrg if r.SP_PS_2D_SRC == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then 2607ec681f3Smrg resolved[0] = 1 2617ec681f3Smrg return 2627ec681f3Smrg end 2637ec681f3Smrg if draws > 0 then 2647ec681f3Smrg finish() 2657ec681f3Smrg end 2667ec681f3Smrg reset() 2677ec681f3Smrg drawmode = "BLIT" 2687ec681f3Smrg -- This kinda assumes that we are doing full img blits, which is maybe 2697ec681f3Smrg -- Not completely legit. We could perhaps instead just track pitch and 2707ec681f3Smrg -- size/pitch?? Or maybe the size doesn't matter much 2717ec681f3Smrg push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT, 2727ec681f3Smrg r.GRAS_2D_DST_BR.X + 1, 2737ec681f3Smrg r.GRAS_2D_DST_BR.Y + 1, 2747ec681f3Smrg "MSAA_ONE", 2757ec681f3Smrg r.RB_2D_DST, 2767ec681f3Smrg r.RB_2D_DST_FLAGS, 2777ec681f3Smrg -1) 2787ec681f3Smrg if r.RB_2D_BLIT_CNTL.SOLID_COLOR then 2797ec681f3Smrg dbg("CLEAR=%x\n", r.RB_2D_DST) 2807ec681f3Smrg cleared[r.RB_2D_DST] = 1 2817ec681f3Smrg else 2827ec681f3Smrg push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT, 2837ec681f3Smrg r.GRAS_2D_SRC_BR_X.X + 1, 2847ec681f3Smrg r.GRAS_2D_SRC_BR_Y.Y + 1, 2857ec681f3Smrg "MSAA_ONE", 2867ec681f3Smrg r.SP_PS_2D_SRC, 2877ec681f3Smrg r.SP_PS_2D_SRC_FLAGS) 2887ec681f3Smrg end 2897ec681f3Smrg blits = blits + 1 2907ec681f3Smrg finish() 2917ec681f3Smrgend 2927ec681f3Smrg 2937ec681f3Smrgfunction valid_transition(curmode, newmode) 2947ec681f3Smrg if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then 2957ec681f3Smrg return true 2967ec681f3Smrg end 2977ec681f3Smrg if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then 2987ec681f3Smrg return true 2997ec681f3Smrg end 3007ec681f3Smrg return false 3017ec681f3Smrgend 3027ec681f3Smrg 3037ec681f3Smrgfunction draw(primtype, nindx) 3047ec681f3Smrg dbg("draw: %s (%s)\n", primtype, mode) 3057ec681f3Smrg nullbatch = false 3067ec681f3Smrg if primtype == "BLIT_OP_SCALE" then 3077ec681f3Smrg handle_blit() 3087ec681f3Smrg return 3097ec681f3Smrg elseif primtype == "EVENT:BLIT" then 3107ec681f3Smrg return 3117ec681f3Smrg end 3127ec681f3Smrg 3137ec681f3Smrg local m = tostring(mode) 3147ec681f3Smrg 3157ec681f3Smrg -- detect changes in drawmode which indicate a different 3167ec681f3Smrg -- pass.. BINNING->GMEM means same pass, but other 3177ec681f3Smrg -- transitions mean different pass: 3187ec681f3Smrg if drawmode and m ~= drawmode then 3197ec681f3Smrg dbg("%s -> %s transition\n", drawmode, m) 3207ec681f3Smrg if not valid_transition(drawmode, m) then 3217ec681f3Smrg dbg("invalid transition, new render pass!\n") 3227ec681f3Smrg finish() 3237ec681f3Smrg reset() 3247ec681f3Smrg end 3257ec681f3Smrg end 3267ec681f3Smrg 3277ec681f3Smrg if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then 3287ec681f3Smrg if m == "RM6_BINNING" then 3297ec681f3Smrg drawmode = m 3307ec681f3Smrg return 3317ec681f3Smrg end 3327ec681f3Smrg if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then 3337ec681f3Smrg return 3347ec681f3Smrg end 3357ec681f3Smrg printf("unknown MODE %s for primtype %s\n", m, primtype) 3367ec681f3Smrg return 3377ec681f3Smrg end 3387ec681f3Smrg 3397ec681f3Smrg -- Only count the first tile for GMEM mode to avoid counting 3407ec681f3Smrg -- each draw for each tile 3417ec681f3Smrg if m == "RM6_GMEM" then 3427ec681f3Smrg if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then 3437ec681f3Smrg return 3447ec681f3Smrg end 3457ec681f3Smrg end 3467ec681f3Smrg 3477ec681f3Smrg drawmode = m 3487ec681f3Smrg local render_components = {} 3497ec681f3Smrg render_components[0] = r.RB_RENDER_COMPONENTS.RT0; 3507ec681f3Smrg render_components[1] = r.RB_RENDER_COMPONENTS.RT1; 3517ec681f3Smrg render_components[2] = r.RB_RENDER_COMPONENTS.RT2; 3527ec681f3Smrg render_components[3] = r.RB_RENDER_COMPONENTS.RT3; 3537ec681f3Smrg render_components[4] = r.RB_RENDER_COMPONENTS.RT4; 3547ec681f3Smrg render_components[5] = r.RB_RENDER_COMPONENTS.RT5; 3557ec681f3Smrg render_components[6] = r.RB_RENDER_COMPONENTS.RT6; 3567ec681f3Smrg render_components[7] = r.RB_RENDER_COMPONENTS.RT7; 3577ec681f3Smrg for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do 3587ec681f3Smrg if render_components[n] ~= 0 then 3597ec681f3Smrg push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT, 3607ec681f3Smrg r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, 3617ec681f3Smrg r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, 3627ec681f3Smrg r.RB_MSAA_CNTL.SAMPLES, 3637ec681f3Smrg r.RB_MRT[n].BASE, 3647ec681f3Smrg r.RB_MRT_FLAG_BUFFER[n].ADDR, 3657ec681f3Smrg r.RB_MRT[n].BASE_GMEM) 3667ec681f3Smrg end 3677ec681f3Smrg end 3687ec681f3Smrg 3697ec681f3Smrg local depthbase = r.RB_DEPTH_BUFFER_BASE 3707ec681f3Smrg 3717ec681f3Smrg if depthbase ~= 0 then 3727ec681f3Smrg push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT, 3737ec681f3Smrg r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, 3747ec681f3Smrg r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, 3757ec681f3Smrg r.RB_MSAA_CNTL.SAMPLES, 3767ec681f3Smrg depthbase, 3777ec681f3Smrg r.RB_DEPTH_FLAG_BUFFER_BASE, 3787ec681f3Smrg r.RB_DEPTH_BUFFER_BASE_GMEM) 3797ec681f3Smrg end 3807ec681f3Smrg 3817ec681f3Smrg if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then 3827ec681f3Smrg depthwrite = true 3837ec681f3Smrg end 3847ec681f3Smrg 3857ec681f3Smrg if r.RB_DEPTH_CNTL.Z_TEST_ENABLE then 3867ec681f3Smrg depthtest = true 3877ec681f3Smrg end 3887ec681f3Smrg 3897ec681f3Smrg -- clearly 0 != false.. :-/ 3907ec681f3Smrg if r.RB_STENCILWRMASK.WRMASK ~= 0 then 3917ec681f3Smrg stencilwrite = true 3927ec681f3Smrg end 3937ec681f3Smrg 3947ec681f3Smrg if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then 3957ec681f3Smrg stenciltest = true 3967ec681f3Smrg end 3977ec681f3Smrg 3987ec681f3Smrg -- TODO should also check for stencil buffer for z32+s8 case 3997ec681f3Smrg 4007ec681f3Smrg if m == "RM6_GMEM" then 4017ec681f3Smrg binw = r.VSC_BIN_SIZE.WIDTH 4027ec681f3Smrg binh = r.VSC_BIN_SIZE.HEIGHT 4037ec681f3Smrg nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY 4047ec681f3Smrg end 4057ec681f3Smrg 4067ec681f3Smrg draws = draws + 1 4077ec681f3Smrgend 4087ec681f3Smrg 409