17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors: 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg#include "util/u_math.h" 287ec681f3Smrg#include "util/macros.h" 297ec681f3Smrg#include "pan_device.h" 307ec681f3Smrg#include "pan_encoder.h" 317ec681f3Smrg#include "panfrost-quirks.h" 327ec681f3Smrg 337ec681f3Smrg/* Mali GPUs are tiled-mode renderers, rather than immediate-mode. 347ec681f3Smrg * Conceptually, the screen is divided into 16x16 tiles. Vertex shaders run. 357ec681f3Smrg * Then, a fixed-function hardware block (the tiler) consumes the gl_Position 367ec681f3Smrg * results. For each triangle specified, it marks each containing tile as 377ec681f3Smrg * containing that triangle. This set of "triangles per tile" form the "polygon 387ec681f3Smrg * list". Finally, the rasterization unit consumes the polygon list to invoke 397ec681f3Smrg * the fragment shader. 407ec681f3Smrg * 417ec681f3Smrg * In practice, it's a bit more complicated than this. On Midgard chips with an 427ec681f3Smrg * "advanced tiling unit" (all except T720/T820/T830), 16x16 is the logical 437ec681f3Smrg * tile size, but Midgard features "hierarchical tiling", where power-of-two 447ec681f3Smrg * multiples of the base tile size can be used: hierarchy level 0 (16x16), 457ec681f3Smrg * level 1 (32x32), level 2 (64x64), per public information about Midgard's 467ec681f3Smrg * tiling. In fact, tiling goes up to 4096x4096 (!), although in practice 477ec681f3Smrg * 128x128 is the largest usually used (though higher modes are enabled). The 487ec681f3Smrg * idea behind hierarchical tiling is to use low tiling levels for small 497ec681f3Smrg * triangles and high levels for large triangles, to minimize memory bandwidth 507ec681f3Smrg * and repeated fragment shader invocations (the former issue inherent to 517ec681f3Smrg * immediate-mode rendering and the latter common in traditional tilers). 527ec681f3Smrg * 537ec681f3Smrg * The tiler itself works by reading varyings in and writing a polygon list 547ec681f3Smrg * out. Unfortunately (for us), both of these buffers are managed in main 557ec681f3Smrg * memory; although they ideally will be cached, it is the drivers' 567ec681f3Smrg * responsibility to allocate these buffers. Varying buffer allocation is 577ec681f3Smrg * handled elsewhere, as it is not tiler specific; the real issue is allocating 587ec681f3Smrg * the polygon list. 597ec681f3Smrg * 607ec681f3Smrg * This is hard, because from the driver's perspective, we have no information 617ec681f3Smrg * about what geometry will actually look like on screen; that information is 627ec681f3Smrg * only gained from running the vertex shader. (Theoretically, we could run the 637ec681f3Smrg * vertex shaders in software as a prepass, or in hardware with transform 647ec681f3Smrg * feedback as a prepass, but either idea is ludicrous on so many levels). 657ec681f3Smrg * 667ec681f3Smrg * Instead, Mali uses a bit of a hybrid approach, splitting the polygon list 677ec681f3Smrg * into three distinct pieces. First, the driver statically determines which 687ec681f3Smrg * tile hierarchy levels to use (more on that later). At this point, we know the 697ec681f3Smrg * framebuffer dimensions and all the possible tilings of the framebuffer, so 707ec681f3Smrg * we know exactly how many tiles exist across all hierarchy levels. The first 717ec681f3Smrg * piece of the polygon list is the header, which is exactly 8 bytes per tile, 727ec681f3Smrg * plus padding and a small 64-byte prologue. (If that doesn't remind you of 737ec681f3Smrg * AFBC, it should. See pan_afbc.c for some fun parallels). The next part is 747ec681f3Smrg * the polygon list body, which seems to contain 512 bytes per tile, again 757ec681f3Smrg * across every level of the hierarchy. These two parts form the polygon list 767ec681f3Smrg * buffer. This buffer has a statically determinable size, approximately equal 777ec681f3Smrg * to the # of tiles across all hierarchy levels * (8 bytes + 512 bytes), plus 787ec681f3Smrg * alignment / minimum restrictions / etc. 797ec681f3Smrg * 807ec681f3Smrg * The third piece is the easy one (for us): the tiler heap. In essence, the 817ec681f3Smrg * tiler heap is a gigantic slab that's as big as could possibly be necessary 827ec681f3Smrg * in the worst case imaginable. Just... a gigantic allocation that we give a 837ec681f3Smrg * start and end pointer to. What's the catch? The tiler heap is lazily 847ec681f3Smrg * allocated; that is, a huge amount of memory is _reserved_, but only a tiny 857ec681f3Smrg * bit is actually allocated upfront. The GPU just keeps using the 867ec681f3Smrg * unallocated-but-reserved portions as it goes along, generating page faults 877ec681f3Smrg * if it goes beyond the allocation, and then the kernel is instructed to 887ec681f3Smrg * expand the allocation on page fault (known in the vendor kernel as growable 897ec681f3Smrg * memory). This is quite a bit of bookkeeping of its own, but that task is 907ec681f3Smrg * pushed to kernel space and we can mostly ignore it here, just remembering to 917ec681f3Smrg * set the GROWABLE flag so the kernel actually uses this path rather than 927ec681f3Smrg * allocating a gigantic amount up front and burning a hole in RAM. 937ec681f3Smrg * 947ec681f3Smrg * As far as determining which hierarchy levels to use, the simple answer is 957ec681f3Smrg * that right now, we don't. In the tiler configuration fields (consistent from 967ec681f3Smrg * the earliest Midgard's SFBD through the latest Bifrost traces we have), 977ec681f3Smrg * there is a hierarchy_mask field, controlling which levels (tile sizes) are 987ec681f3Smrg * enabled. Ideally, the hierarchical tiling dream -- mapping big polygons to 997ec681f3Smrg * big tiles and small polygons to small tiles -- would be realized here as 1007ec681f3Smrg * well. As long as there are polygons at all needing tiling, we always have to 1017ec681f3Smrg * have big tiles available, in case there are big polygons. But we don't 1027ec681f3Smrg * necessarily need small tiles available. Ideally, when there are small 1037ec681f3Smrg * polygons, small tiles are enabled (to avoid waste from putting small 1047ec681f3Smrg * triangles in the big tiles); when there are not, small tiles are disabled to 1057ec681f3Smrg * avoid enabling more levels than necessary, which potentially costs in memory 1067ec681f3Smrg * bandwidth / power / tiler performance. 1077ec681f3Smrg * 1087ec681f3Smrg * Of course, the driver has to figure this out statically. When tile 1097ec681f3Smrg * hiearchies are actually established, this occurs by the tiler in 1107ec681f3Smrg * fixed-function hardware, after the vertex shaders have run and there is 1117ec681f3Smrg * sufficient information to figure out the size of triangles. The driver has 1127ec681f3Smrg * no such luxury, again barring insane hacks like additionally running the 1137ec681f3Smrg * vertex shaders in software or in hardware via transform feedback. Thus, for 1147ec681f3Smrg * the driver, we need a heuristic approach. 1157ec681f3Smrg * 1167ec681f3Smrg * There are lots of heuristics to guess triangle size statically you could 1177ec681f3Smrg * imagine, but one approach shines as particularly simple-stupid: assume all 1187ec681f3Smrg * on-screen triangles are equal size and spread equidistantly throughout the 1197ec681f3Smrg * screen. Let's be clear, this is NOT A VALID ASSUMPTION. But if we roll with 1207ec681f3Smrg * it, then we see: 1217ec681f3Smrg * 1227ec681f3Smrg * Triangle Area = (Screen Area / # of triangles) 1237ec681f3Smrg * = (Width * Height) / (# of triangles) 1247ec681f3Smrg * 1257ec681f3Smrg * Or if you prefer, we can also make a third CRAZY assumption that we only draw 1267ec681f3Smrg * right triangles with edges parallel/perpendicular to the sides of the screen 1277ec681f3Smrg * with no overdraw, forming a triangle grid across the screen: 1287ec681f3Smrg * 1297ec681f3Smrg * |--w--| 1307ec681f3Smrg * _____ | 1317ec681f3Smrg * | /| /| | 1327ec681f3Smrg * |/_|/_| h 1337ec681f3Smrg * | /| /| | 1347ec681f3Smrg * |/_|/_| | 1357ec681f3Smrg * 1367ec681f3Smrg * Then you can use some middle school geometry and algebra to work out the 1377ec681f3Smrg * triangle dimensions. I started working on this, but realised I didn't need 1387ec681f3Smrg * to to make my point, but couldn't bare to erase that ASCII art. Anyway. 1397ec681f3Smrg * 1407ec681f3Smrg * POINT IS, by considering the ratio of screen area and triangle count, we can 1417ec681f3Smrg * estimate the triangle size. For a small size, use small bins; for a large 1427ec681f3Smrg * size, use large bins. Intuitively, this metric makes sense: when there are 1437ec681f3Smrg * few triangles on a large screen, you're probably compositing a UI and 1447ec681f3Smrg * therefore the triangles are large; when there are a lot of triangles on a 1457ec681f3Smrg * small screen, you're probably rendering a 3D mesh and therefore the 1467ec681f3Smrg * triangles are tiny. (Or better said -- there will be tiny triangles, even if 1477ec681f3Smrg * there are also large triangles. There have to be unless you expect crazy 1487ec681f3Smrg * overdraw. Generally, it's better to allow more small bin sizes than 1497ec681f3Smrg * necessary than not allow enough.) 1507ec681f3Smrg * 1517ec681f3Smrg * From this heuristic (or whatever), we determine the minimum allowable tile 1527ec681f3Smrg * size, and we use that to decide the hierarchy masking, selecting from the 1537ec681f3Smrg * minimum "ideal" tile size to the maximum tile size (2048x2048 in practice). 1547ec681f3Smrg * 1557ec681f3Smrg * Once we have that mask and the framebuffer dimensions, we can compute the 1567ec681f3Smrg * size of the statically-sized polygon list structures, allocate them, and go! 1577ec681f3Smrg * 1587ec681f3Smrg * ----- 1597ec681f3Smrg * 1607ec681f3Smrg * On T720, T820, and T830, there is no support for hierarchical tiling. 1617ec681f3Smrg * Instead, the hardware allows the driver to select the tile size dynamically 1627ec681f3Smrg * on a per-framebuffer basis, including allowing rectangular/non-square tiles. 1637ec681f3Smrg * Rules for tile size selection are as follows: 1647ec681f3Smrg * 1657ec681f3Smrg * - Dimensions must be powers-of-two. 1667ec681f3Smrg * - The smallest tile is 16x16. 1677ec681f3Smrg * - The tile width/height is at most the framebuffer w/h (clamp up to 16 pix) 1687ec681f3Smrg * - There must be no more than 64 tiles in either dimension. 1697ec681f3Smrg * 1707ec681f3Smrg * Within these constraints, the driver is free to pick a tile size according 1717ec681f3Smrg * to some heuristic, similar to units with an advanced tiling unit. 1727ec681f3Smrg * 1737ec681f3Smrg * To pick a size without any heuristics, we may satisfy the constraints by 1747ec681f3Smrg * defaulting to 16x16 (a power-of-two). This fits the minimum. For the size 1757ec681f3Smrg * constraint, consider: 1767ec681f3Smrg * 1777ec681f3Smrg * # of tiles < 64 1787ec681f3Smrg * ceil (fb / tile) < 64 1797ec681f3Smrg * (fb / tile) <= (64 - 1) 1807ec681f3Smrg * tile <= fb / (64 - 1) <= next_power_of_two(fb / (64 - 1)) 1817ec681f3Smrg * 1827ec681f3Smrg * Hence we clamp up to align_pot(fb / (64 - 1)). 1837ec681f3Smrg 1847ec681f3Smrg * Extending to use a selection heuristic left for future work. 1857ec681f3Smrg * 1867ec681f3Smrg * Once the tile size (w, h) is chosen, we compute the hierarchy "mask": 1877ec681f3Smrg * 1887ec681f3Smrg * hierarchy_mask = (log2(h / 16) << 6) | log2(w / 16) 1897ec681f3Smrg * 1907ec681f3Smrg * Of course with no hierarchical tiling, this is not a mask; it's just a field 1917ec681f3Smrg * specifying the tile size. But I digress. 1927ec681f3Smrg * 1937ec681f3Smrg * We also compute the polgon list sizes (with framebuffer size W, H) as: 1947ec681f3Smrg * 1957ec681f3Smrg * full_size = 0x200 + 0x200 * ceil(W / w) * ceil(H / h) 1967ec681f3Smrg * offset = 8 * ceil(W / w) * ceil(H / h) 1977ec681f3Smrg * 1987ec681f3Smrg * It further appears necessary to round down offset to the nearest 0x200. 1997ec681f3Smrg * Possibly we would also round down full_size to the nearest 0x200 but 2007ec681f3Smrg * full_size/0x200 = (1 + ceil(W / w) * ceil(H / h)) is an integer so there's 2017ec681f3Smrg * nothing to do. 2027ec681f3Smrg */ 2037ec681f3Smrg 2047ec681f3Smrg/* Hierarchical tiling spans from 16x16 to 4096x4096 tiles */ 2057ec681f3Smrg 2067ec681f3Smrg#define MIN_TILE_SIZE 16 2077ec681f3Smrg#define MAX_TILE_SIZE 4096 2087ec681f3Smrg 2097ec681f3Smrg/* Constants as shifts for easier power-of-two iteration */ 2107ec681f3Smrg 2117ec681f3Smrg#define MIN_TILE_SHIFT util_logbase2(MIN_TILE_SIZE) 2127ec681f3Smrg#define MAX_TILE_SHIFT util_logbase2(MAX_TILE_SIZE) 2137ec681f3Smrg 2147ec681f3Smrg/* The hierarchy has a 64-byte prologue */ 2157ec681f3Smrg#define PROLOGUE_SIZE 0x40 2167ec681f3Smrg 2177ec681f3Smrg/* For each tile (across all hierarchy levels), there is 8 bytes of header */ 2187ec681f3Smrg#define HEADER_BYTES_PER_TILE 0x8 2197ec681f3Smrg 2207ec681f3Smrg/* Likewise, each tile per level has 512 bytes of body */ 2217ec681f3Smrg#define FULL_BYTES_PER_TILE 0x200 2227ec681f3Smrg 2237ec681f3Smrg/* If the width-x-height framebuffer is divided into tile_size-x-tile_size 2247ec681f3Smrg * tiles, how many tiles are there? Rounding up in each direction. For the 2257ec681f3Smrg * special case of tile_size=16, this aligns with the usual Midgard count. 2267ec681f3Smrg * tile_size must be a power-of-two. Not really repeat code from AFBC/checksum, 2277ec681f3Smrg * because those care about the stride (not just the overall count) and only at 2287ec681f3Smrg * a a fixed-tile size (not any of a number of power-of-twos) */ 2297ec681f3Smrg 2307ec681f3Smrgstatic unsigned 2317ec681f3Smrgpan_tile_count(unsigned width, unsigned height, unsigned tile_width, unsigned tile_height) 2327ec681f3Smrg{ 2337ec681f3Smrg unsigned aligned_width = ALIGN_POT(width, tile_width); 2347ec681f3Smrg unsigned aligned_height = ALIGN_POT(height, tile_height); 2357ec681f3Smrg 2367ec681f3Smrg unsigned tile_count_x = aligned_width / tile_width; 2377ec681f3Smrg unsigned tile_count_y = aligned_height / tile_height; 2387ec681f3Smrg 2397ec681f3Smrg return tile_count_x * tile_count_y; 2407ec681f3Smrg} 2417ec681f3Smrg 2427ec681f3Smrg/* For `masked_count` of the smallest tile sizes masked out, computes how the 2437ec681f3Smrg * size of the polygon list header. We iterate the tile sizes (16x16 through 2447ec681f3Smrg * 2048x2048). For each tile size, we figure out how many tiles there are at 2457ec681f3Smrg * this hierarchy level and therefore many bytes this level is, leaving us with 2467ec681f3Smrg * a byte count for each level. We then just sum up the byte counts across the 2477ec681f3Smrg * levels to find a byte count for all levels. */ 2487ec681f3Smrg 2497ec681f3Smrgstatic unsigned 2507ec681f3Smrgpanfrost_hierarchy_size( 2517ec681f3Smrg unsigned width, 2527ec681f3Smrg unsigned height, 2537ec681f3Smrg unsigned mask, 2547ec681f3Smrg unsigned bytes_per_tile) 2557ec681f3Smrg{ 2567ec681f3Smrg unsigned size = PROLOGUE_SIZE; 2577ec681f3Smrg 2587ec681f3Smrg /* Iterate hierarchy levels */ 2597ec681f3Smrg 2607ec681f3Smrg for (unsigned b = 0; b < (MAX_TILE_SHIFT - MIN_TILE_SHIFT); ++b) { 2617ec681f3Smrg /* Check if this level is enabled */ 2627ec681f3Smrg if (!(mask & (1 << b))) 2637ec681f3Smrg continue; 2647ec681f3Smrg 2657ec681f3Smrg /* Shift from a level to a tile size */ 2667ec681f3Smrg unsigned tile_size = (1 << b) * MIN_TILE_SIZE; 2677ec681f3Smrg 2687ec681f3Smrg unsigned tile_count = pan_tile_count(width, height, tile_size, tile_size); 2697ec681f3Smrg unsigned level_count = bytes_per_tile * tile_count; 2707ec681f3Smrg 2717ec681f3Smrg size += level_count; 2727ec681f3Smrg } 2737ec681f3Smrg 2747ec681f3Smrg /* This size will be used as an offset, so ensure it's aligned */ 2757ec681f3Smrg return ALIGN_POT(size, 0x200); 2767ec681f3Smrg} 2777ec681f3Smrg 2787ec681f3Smrg/* Implement the formula: 2797ec681f3Smrg * 2807ec681f3Smrg * 0x200 + bytes_per_tile * ceil(W / w) * ceil(H / h) 2817ec681f3Smrg * 2827ec681f3Smrg * rounding down the answer to the nearest 0x200. This is used to compute both 2837ec681f3Smrg * header and body sizes for GPUs without hierarchical tiling. Essentially, 2847ec681f3Smrg * computing a single hierarchy level, since there isn't any hierarchy! 2857ec681f3Smrg */ 2867ec681f3Smrg 2877ec681f3Smrgstatic unsigned 2887ec681f3Smrgpanfrost_flat_size(unsigned width, unsigned height, unsigned dim, unsigned bytes_per_tile) 2897ec681f3Smrg{ 2907ec681f3Smrg /* First, extract the tile dimensions */ 2917ec681f3Smrg 2927ec681f3Smrg unsigned tw = (1 << (dim & 0b111)) * 8; 2937ec681f3Smrg unsigned th = (1 << ((dim & (0b111 << 6)) >> 6)) * 8; 2947ec681f3Smrg 2957ec681f3Smrg /* tile_count is ceil(W/w) * ceil(H/h) */ 2967ec681f3Smrg unsigned raw = pan_tile_count(width, height, tw, th) * bytes_per_tile; 2977ec681f3Smrg 2987ec681f3Smrg /* Round down and add offset */ 2997ec681f3Smrg return 0x200 + ((raw / 0x200) * 0x200); 3007ec681f3Smrg} 3017ec681f3Smrg 3027ec681f3Smrg/* Given a hierarchy mask and a framebuffer size, compute the header size */ 3037ec681f3Smrg 3047ec681f3Smrgunsigned 3057ec681f3Smrgpanfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy) 3067ec681f3Smrg{ 3077ec681f3Smrg if (hierarchy) 3087ec681f3Smrg return panfrost_hierarchy_size(width, height, mask, HEADER_BYTES_PER_TILE); 3097ec681f3Smrg else 3107ec681f3Smrg return panfrost_flat_size(width, height, mask, HEADER_BYTES_PER_TILE); 3117ec681f3Smrg} 3127ec681f3Smrg 3137ec681f3Smrg/* The combined header/body is sized similarly (but it is significantly 3147ec681f3Smrg * larger), except that it can be empty when the tiler disabled, rather than 3157ec681f3Smrg * getting clamped to a minimum size. 3167ec681f3Smrg */ 3177ec681f3Smrg 3187ec681f3Smrgunsigned 3197ec681f3Smrgpanfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy) 3207ec681f3Smrg{ 3217ec681f3Smrg if (hierarchy) 3227ec681f3Smrg return panfrost_hierarchy_size(width, height, mask, FULL_BYTES_PER_TILE); 3237ec681f3Smrg else 3247ec681f3Smrg return panfrost_flat_size(width, height, mask, FULL_BYTES_PER_TILE); 3257ec681f3Smrg} 3267ec681f3Smrg 3277ec681f3Smrg/* On GPUs without hierarchical tiling, we choose a tile size directly and 3287ec681f3Smrg * stuff it into the field otherwise known as hierarchy mask (not a mask). */ 3297ec681f3Smrg 3307ec681f3Smrgstatic unsigned 3317ec681f3Smrgpanfrost_choose_tile_size( 3327ec681f3Smrg unsigned width, unsigned height, unsigned vertex_count) 3337ec681f3Smrg{ 3347ec681f3Smrg /* Figure out the ideal tile size. Eventually a heuristic should be 3357ec681f3Smrg * used for this */ 3367ec681f3Smrg 3377ec681f3Smrg unsigned best_w = 16; 3387ec681f3Smrg unsigned best_h = 16; 3397ec681f3Smrg 3407ec681f3Smrg /* Clamp so there are less than 64 tiles in each direction */ 3417ec681f3Smrg 3427ec681f3Smrg best_w = MAX2(best_w, util_next_power_of_two(width / 63)); 3437ec681f3Smrg best_h = MAX2(best_h, util_next_power_of_two(height / 63)); 3447ec681f3Smrg 3457ec681f3Smrg /* We have our ideal tile size, so encode */ 3467ec681f3Smrg 3477ec681f3Smrg unsigned exp_w = util_logbase2(best_w / 16); 3487ec681f3Smrg unsigned exp_h = util_logbase2(best_h / 16); 3497ec681f3Smrg 3507ec681f3Smrg return exp_w | (exp_h << 6); 3517ec681f3Smrg} 3527ec681f3Smrg 3537ec681f3Smrg/* In the future, a heuristic to choose a tiler hierarchy mask would go here. 3547ec681f3Smrg * At the moment, we just default to 0xFF, which enables all possible hierarchy 3557ec681f3Smrg * levels. Overall this yields good performance but presumably incurs a cost in 3567ec681f3Smrg * memory bandwidth / power consumption / etc, at least on smaller scenes that 3577ec681f3Smrg * don't really need all the smaller levels enabled */ 3587ec681f3Smrg 3597ec681f3Smrgunsigned 3607ec681f3Smrgpanfrost_choose_hierarchy_mask( 3617ec681f3Smrg unsigned width, unsigned height, 3627ec681f3Smrg unsigned vertex_count, bool hierarchy) 3637ec681f3Smrg{ 3647ec681f3Smrg /* If there is no geometry, we don't bother enabling anything */ 3657ec681f3Smrg 3667ec681f3Smrg if (!vertex_count) 3677ec681f3Smrg return 0x00; 3687ec681f3Smrg 3697ec681f3Smrg if (!hierarchy) 3707ec681f3Smrg return panfrost_choose_tile_size(width, height, vertex_count); 3717ec681f3Smrg 3727ec681f3Smrg /* Otherwise, default everything on. TODO: Proper tests */ 3737ec681f3Smrg 3747ec681f3Smrg return 0xFF; 3757ec681f3Smrg} 376