17ec681f3Smrg#!/usr/bin/env python3 27ec681f3Smrg 37ec681f3Smrgimport re 47ec681f3Smrgimport sys 57ec681f3Smrgimport gzip 67ec681f3Smrgimport io 77ec681f3Smrg 87ec681f3Smrg# Captures per-frame state, including all the renderpasses, and 97ec681f3Smrg# time spent in blits and compute jobs: 107ec681f3Smrgclass Frame: 117ec681f3Smrg def __init__(self): 127ec681f3Smrg self.frame_nr = None 137ec681f3Smrg self.renderpasses = [] 147ec681f3Smrg # Times in ns: 157ec681f3Smrg self.times_sysmem = [] 167ec681f3Smrg self.times_gmem = [] 177ec681f3Smrg self.times_compute = [] 187ec681f3Smrg self.times_blit = [] 197ec681f3Smrg 207ec681f3Smrg def print(self): 217ec681f3Smrg print("FRAME[{}]: {} blits ({:,} ns), {} SYSMEM ({:,} ns), {} GMEM ({:,} ns), {} COMPUTE ({:,} ns)".format( 227ec681f3Smrg self.frame_nr, 237ec681f3Smrg len(self.times_blit), sum(self.times_blit), 247ec681f3Smrg len(self.times_sysmem), sum(self.times_sysmem), 257ec681f3Smrg len(self.times_gmem), sum(self.times_gmem), 267ec681f3Smrg len(self.times_compute), sum(self.times_compute) 277ec681f3Smrg )) 287ec681f3Smrg 297ec681f3Smrg i = 0 307ec681f3Smrg prologue_time = 0 317ec681f3Smrg binning_time = 0 327ec681f3Smrg restore_clear_time = 0 337ec681f3Smrg draw_time = 0 347ec681f3Smrg resolve_time = 0 357ec681f3Smrg elapsed_time = 0 367ec681f3Smrg total_time = sum(self.times_blit) + sum(self.times_sysmem) + sum(self.times_gmem) + sum(self.times_compute) 377ec681f3Smrg 387ec681f3Smrg for renderpass in self.renderpasses: 397ec681f3Smrg renderpass.print(i) 407ec681f3Smrg prologue_time += renderpass.prologue_time 417ec681f3Smrg binning_time += renderpass.binning_time 427ec681f3Smrg restore_clear_time += renderpass.restore_clear_time 437ec681f3Smrg draw_time += renderpass.draw_time 447ec681f3Smrg resolve_time += renderpass.resolve_time 457ec681f3Smrg elapsed_time += renderpass.elapsed_time 467ec681f3Smrg i += 1 477ec681f3Smrg 487ec681f3Smrg print(" TOTAL: prologue: {:,} ns ({}%), binning: {:,} ns ({}%), restore/clear: {:,} ns ({}%), draw: {:,} ns ({}%), resolve: {:,} ns ({}%), blit: {:,} ns ({}%), compute: {:,} ns ({}%), GMEM: {:,} ns ({}%), sysmem: {:,} ns ({}%), total: {:,} ns\n".format( 497ec681f3Smrg prologue_time, 100.0 * prologue_time / total_time, 507ec681f3Smrg binning_time, 100.0 * binning_time / total_time, 517ec681f3Smrg restore_clear_time, 100.0 * restore_clear_time / total_time, 527ec681f3Smrg draw_time, 100.0 * draw_time / total_time, 537ec681f3Smrg resolve_time, 100.0 * resolve_time / total_time, 547ec681f3Smrg sum(self.times_blit), 100.0 * sum(self.times_blit) / total_time, 557ec681f3Smrg sum(self.times_compute), 100.0 * sum(self.times_compute) / total_time, 567ec681f3Smrg sum(self.times_gmem), 100.0 * sum(self.times_gmem) / total_time, 577ec681f3Smrg sum(self.times_sysmem), 100.0 * sum(self.times_sysmem) / total_time, 587ec681f3Smrg total_time 597ec681f3Smrg )) 607ec681f3Smrg 617ec681f3Smrgclass FramebufferState: 627ec681f3Smrg def __init__(self, width, height, layers, samples, nr_cbufs): 637ec681f3Smrg self.width = width 647ec681f3Smrg self.height = height 657ec681f3Smrg self.layers = layers 667ec681f3Smrg self.samples = samples 677ec681f3Smrg self.nr_cbufs = nr_cbufs 687ec681f3Smrg self.surfaces = [] # per MRT + zsbuf 697ec681f3Smrg 707ec681f3Smrg def get_formats(self): 717ec681f3Smrg formats = [] 727ec681f3Smrg for surface in self.surfaces: 737ec681f3Smrg formats.append(surface.format) 747ec681f3Smrg return formats 757ec681f3Smrg 767ec681f3Smrgclass SurfaceState: 777ec681f3Smrg def __init__(self, width, height, samples, format): 787ec681f3Smrg self.width = width 797ec681f3Smrg self.height = height 807ec681f3Smrg self.samples = samples 817ec681f3Smrg self.format = format 827ec681f3Smrg 837ec681f3Smrgclass BinningState: 847ec681f3Smrg def __init__(self, nbins_x, nbins_y, bin_w, bin_h): 857ec681f3Smrg self.nbins_x = nbins_x 867ec681f3Smrg self.nbins_y = nbins_y 877ec681f3Smrg self.bin_w = bin_w 887ec681f3Smrg self.bin_h = bin_h 897ec681f3Smrg 907ec681f3Smrg# Captures per-renderpass state, which can be either a binning or 917ec681f3Smrg# sysmem pass. Blits and compute jobs are not tracked separately 927ec681f3Smrg# but have their time their times accounted for in the Frame state 937ec681f3Smrgclass RenderPass: 947ec681f3Smrg def __init__(self, cleared, gmem_reason, num_draws): 957ec681f3Smrg self.cleared = cleared 967ec681f3Smrg self.gmem_reason = gmem_reason 977ec681f3Smrg self.num_draws = num_draws 987ec681f3Smrg 997ec681f3Smrg # The rest of the parameters aren't known until we see a later trace: 1007ec681f3Smrg self.binning_state = None # None for sysmem passes, else BinningState 1017ec681f3Smrg self.fb = None 1027ec681f3Smrg self.fast_cleared = None 1037ec681f3Smrg 1047ec681f3Smrg self.elapsed_time = 0 1057ec681f3Smrg self.state_restore_time = 0 1067ec681f3Smrg self.prologue_time = 0 1077ec681f3Smrg self.draw_time = 0 1087ec681f3Smrg self.restore_clear_time = 0 1097ec681f3Smrg 1107ec681f3Smrg # Specific to GMEM passes: 1117ec681f3Smrg self.binning_time = 0 1127ec681f3Smrg self.vsc_overflow_test_time = 0 1137ec681f3Smrg self.resolve_time = 0 1147ec681f3Smrg 1157ec681f3Smrg def print_gmem_pass(self, nr): 1167ec681f3Smrg print(" GMEM[{}]: {}x{} ({}x{} tiles), {} draws, prologue: {:,} ns, binning: {:,} ns, restore/clear: {:,} ns, draw: {:,} ns, resolve: {:,} ns, total: {:,} ns, rt/zs: {}".format( 1177ec681f3Smrg nr, self.fb.width, self.fb.height, 1187ec681f3Smrg self.binning_state.nbins_x, self.binning_state.nbins_y, 1197ec681f3Smrg self.num_draws, self.prologue_time, self.binning_time, 1207ec681f3Smrg self.restore_clear_time, self.draw_time, self.resolve_time, 1217ec681f3Smrg self.elapsed_time, 1227ec681f3Smrg ", ".join(self.fb.get_formats()) 1237ec681f3Smrg )) 1247ec681f3Smrg 1257ec681f3Smrg def print_sysmem_pass(self, nr): 1267ec681f3Smrg print(" SYSMEM[{}]: {}x{}, {} draws, prologue: {:,} ns, clear: {:,} ns, draw: {:,} ns, total: {:,} ns, rt/zs: {}".format( 1277ec681f3Smrg nr, self.fb.width, self.fb.height, 1287ec681f3Smrg self.num_draws, self.prologue_time, 1297ec681f3Smrg self.restore_clear_time, self.draw_time, 1307ec681f3Smrg self.elapsed_time, 1317ec681f3Smrg ", ".join(self.fb.get_formats()) 1327ec681f3Smrg )) 1337ec681f3Smrg 1347ec681f3Smrg def print(self, nr): 1357ec681f3Smrg if self.binning_state: 1367ec681f3Smrg self.print_gmem_pass(nr) 1377ec681f3Smrg else: 1387ec681f3Smrg self.print_sysmem_pass(nr) 1397ec681f3Smrg 1407ec681f3Smrgdef main(): 1417ec681f3Smrg filename = sys.argv[1] 1427ec681f3Smrg if filename.endswith(".gz"): 1437ec681f3Smrg file = gzip.open(filename, "r") 1447ec681f3Smrg file = io.TextIOWrapper(file) 1457ec681f3Smrg else: 1467ec681f3Smrg file = open(filename, "r") 1477ec681f3Smrg lines = file.read().split('\n') 1487ec681f3Smrg 1497ec681f3Smrg flush_batch_match = re.compile(r": flush_batch: (\S+): cleared=(\S+), gmem_reason=(\S+), num_draws=(\S+)") 1507ec681f3Smrg framebuffer_match = re.compile(r": framebuffer: (\S+)x(\S+)x(\S+)@(\S+), nr_cbufs: (\S+)") 1517ec681f3Smrg surface_match = re.compile(r": surface: (\S+)x(\S+)@(\S+), fmt=(\S+)") 1527ec681f3Smrg 1537ec681f3Smrg # draw/renderpass passes: 1547ec681f3Smrg gmem_match = re.compile(r": render_gmem: (\S+)x(\S+) bins of (\S+)x(\S+)") 1557ec681f3Smrg sysmem_match = re.compile(r": render_sysmem") 1567ec681f3Smrg state_restore_match = re.compile(r"\+(\S+): end_state_restore") 1577ec681f3Smrg prologue_match = re.compile(r"\+(\S+): end_prologue") 1587ec681f3Smrg binning_ib_match = re.compile(r"\+(\S+): end_binning_ib") 1597ec681f3Smrg vsc_overflow_match = re.compile(r"\+(\S+): end_vsc_overflow_test") 1607ec681f3Smrg draw_ib_match = re.compile(r"\+(\S+): end_draw_ib") 1617ec681f3Smrg resolve_match = re.compile(r"\+(\S+): end_resolve") 1627ec681f3Smrg 1637ec681f3Smrg start_clear_restore_match = re.compile(r"start_clear_restore: fast_cleared: (\S+)") 1647ec681f3Smrg end_clear_restore_match = re.compile(r"\+(\S+): end_clear_restore") 1657ec681f3Smrg 1667ec681f3Smrg # Non-draw passes: 1677ec681f3Smrg compute_match = re.compile(r": start_compute") 1687ec681f3Smrg blit_match = re.compile(r": start_blit") 1697ec681f3Smrg 1707ec681f3Smrg # End of pass/frame markers: 1717ec681f3Smrg elapsed_match = re.compile(r"ELAPSED: (\S+) ns") 1727ec681f3Smrg eof_match = re.compile(r"END OF FRAME (\S+)") 1737ec681f3Smrg 1747ec681f3Smrg frame = Frame() # current frame state 1757ec681f3Smrg renderpass = None # current renderpass state 1767ec681f3Smrg times = None 1777ec681f3Smrg 1787ec681f3Smrg # Helper to set the appropriate times table for the current pass, 1797ec681f3Smrg # which is expected to only happen once for a given render pass 1807ec681f3Smrg def set_times(t): 1817ec681f3Smrg nonlocal times 1827ec681f3Smrg if times is not None: 1837ec681f3Smrg print("expected times to not be set yet") 1847ec681f3Smrg times = t 1857ec681f3Smrg 1867ec681f3Smrg for line in lines: 1877ec681f3Smrg # Note, we only expect the flush_batch trace for !nondraw: 1887ec681f3Smrg match = re.search(flush_batch_match, line) 1897ec681f3Smrg if match is not None: 1907ec681f3Smrg assert(renderpass is None) 1917ec681f3Smrg renderpass = RenderPass(cleared=match.group(2), 1927ec681f3Smrg gmem_reason=match.group(3), 1937ec681f3Smrg num_draws=match.group(4)) 1947ec681f3Smrg frame.renderpasses.append(renderpass) 1957ec681f3Smrg continue 1967ec681f3Smrg 1977ec681f3Smrg match = re.search(framebuffer_match, line) 1987ec681f3Smrg if match is not None: 1997ec681f3Smrg assert(renderpass.fb is None) 2007ec681f3Smrg renderpass.fb = FramebufferState(width=match.group(1), 2017ec681f3Smrg height=match.group(2), 2027ec681f3Smrg layers=match.group(3), 2037ec681f3Smrg samples=match.group(4), 2047ec681f3Smrg nr_cbufs=match.group(5)) 2057ec681f3Smrg continue 2067ec681f3Smrg 2077ec681f3Smrg match = re.search(surface_match, line) 2087ec681f3Smrg if match is not None: 2097ec681f3Smrg surface = SurfaceState(width=match.group(1), 2107ec681f3Smrg height=match.group(2), 2117ec681f3Smrg samples=match.group(3), 2127ec681f3Smrg format=match.group(4)) 2137ec681f3Smrg renderpass.fb.surfaces.append(surface) 2147ec681f3Smrg continue 2157ec681f3Smrg 2167ec681f3Smrg match = re.search(gmem_match, line) 2177ec681f3Smrg if match is not None: 2187ec681f3Smrg assert(renderpass.binning_state is None) 2197ec681f3Smrg renderpass.binning_state = BinningState(nbins_x=match.group(1), 2207ec681f3Smrg nbins_y=match.group(2), 2217ec681f3Smrg bin_w=match.group(3), 2227ec681f3Smrg bin_h=match.group(4)) 2237ec681f3Smrg set_times(frame.times_gmem) 2247ec681f3Smrg continue 2257ec681f3Smrg 2267ec681f3Smrg match = re.search(sysmem_match, line) 2277ec681f3Smrg if match is not None: 2287ec681f3Smrg assert(renderpass.binning_state is None) 2297ec681f3Smrg set_times(frame.times_sysmem) 2307ec681f3Smrg continue 2317ec681f3Smrg 2327ec681f3Smrg match = re.search(state_restore_match, line) 2337ec681f3Smrg if match is not None: 2347ec681f3Smrg renderpass.state_restore_time += int(match.group(1)) 2357ec681f3Smrg continue 2367ec681f3Smrg 2377ec681f3Smrg match = re.search(prologue_match, line) 2387ec681f3Smrg if match is not None: 2397ec681f3Smrg renderpass.prologue_time += int(match.group(1)) 2407ec681f3Smrg continue 2417ec681f3Smrg 2427ec681f3Smrg match = re.search(binning_ib_match, line) 2437ec681f3Smrg if match is not None: 2447ec681f3Smrg assert(renderpass.binning_state is not None) 2457ec681f3Smrg renderpass.binning_time += int(match.group(1)) 2467ec681f3Smrg continue 2477ec681f3Smrg 2487ec681f3Smrg match = re.search(vsc_overflow_match, line) 2497ec681f3Smrg if match is not None: 2507ec681f3Smrg assert(renderpass.binning_state is not None) 2517ec681f3Smrg renderpass.vsc_overflow_test_time += int(match.group(1)) 2527ec681f3Smrg continue 2537ec681f3Smrg 2547ec681f3Smrg match = re.search(draw_ib_match, line) 2557ec681f3Smrg if match is not None: 2567ec681f3Smrg renderpass.draw_time += int(match.group(1)) 2577ec681f3Smrg continue 2587ec681f3Smrg 2597ec681f3Smrg match = re.search(resolve_match, line) 2607ec681f3Smrg if match is not None: 2617ec681f3Smrg assert(renderpass.binning_state is not None) 2627ec681f3Smrg renderpass.resolve_time += int(match.group(1)) 2637ec681f3Smrg continue 2647ec681f3Smrg 2657ec681f3Smrg match = re.search(start_clear_restore_match, line) 2667ec681f3Smrg if match is not None: 2677ec681f3Smrg renderpass.fast_cleared = match.group(1) 2687ec681f3Smrg continue 2697ec681f3Smrg 2707ec681f3Smrg match = re.search(end_clear_restore_match, line) 2717ec681f3Smrg if match is not None: 2727ec681f3Smrg renderpass.restore_clear_time += int(match.group(1)) 2737ec681f3Smrg continue 2747ec681f3Smrg 2757ec681f3Smrg match = re.search(compute_match, line) 2767ec681f3Smrg if match is not None: 2777ec681f3Smrg set_times(frame.times_compute) 2787ec681f3Smrg continue 2797ec681f3Smrg 2807ec681f3Smrg match = re.search(blit_match, line) 2817ec681f3Smrg if match is not None: 2827ec681f3Smrg set_times(frame.times_blit) 2837ec681f3Smrg continue 2847ec681f3Smrg 2857ec681f3Smrg match = re.search(eof_match, line) 2867ec681f3Smrg if match is not None: 2877ec681f3Smrg frame.frame_nr = int(match.group(1)) 2887ec681f3Smrg frame.print() 2897ec681f3Smrg frame = Frame() 2907ec681f3Smrg times = None 2917ec681f3Smrg renderpass = None 2927ec681f3Smrg continue 2937ec681f3Smrg 2947ec681f3Smrg match = re.search(elapsed_match, line) 2957ec681f3Smrg if match is not None: 2967ec681f3Smrg time = int(match.group(1)) 2977ec681f3Smrg #print("ELAPSED: " + str(time) + " ns") 2987ec681f3Smrg if renderpass is not None: 2997ec681f3Smrg renderpass.elapsed_time = time 3007ec681f3Smrg times.append(time) 3017ec681f3Smrg times = None 3027ec681f3Smrg renderpass = None 3037ec681f3Smrg continue 3047ec681f3Smrg 3057ec681f3Smrg 3067ec681f3Smrgif __name__ == "__main__": 3077ec681f3Smrg main() 3087ec681f3Smrg 309