17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org> 37ec681f3Smrg * Copyright © 2018-2019 Google, Inc. 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 227ec681f3Smrg * SOFTWARE. 237ec681f3Smrg * 247ec681f3Smrg * Authors: 257ec681f3Smrg * Rob Clark <robclark@freedesktop.org> 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#include <stdio.h> 297ec681f3Smrg 307ec681f3Smrg#include "freedreno_layout.h" 317ec681f3Smrg 327ec681f3Smrgstatic bool 337ec681f3Smrgis_r8g8(struct fdl_layout *layout) 347ec681f3Smrg{ 357ec681f3Smrg return layout->cpp == 2 && 367ec681f3Smrg util_format_get_nr_components(layout->format) == 2; 377ec681f3Smrg} 387ec681f3Smrg 397ec681f3Smrgvoid 407ec681f3Smrgfdl6_get_ubwc_blockwidth(struct fdl_layout *layout, uint32_t *blockwidth, 417ec681f3Smrg uint32_t *blockheight) 427ec681f3Smrg{ 437ec681f3Smrg static const struct { 447ec681f3Smrg uint8_t width; 457ec681f3Smrg uint8_t height; 467ec681f3Smrg } blocksize[] = { 477ec681f3Smrg { 16, 4 }, /* cpp = 1 */ 487ec681f3Smrg { 16, 4 }, /* cpp = 2 */ 497ec681f3Smrg { 16, 4 }, /* cpp = 4 */ 507ec681f3Smrg { 8, 4 }, /* cpp = 8 */ 517ec681f3Smrg { 4, 4 }, /* cpp = 16 */ 527ec681f3Smrg { 4, 2 }, /* cpp = 32 */ 537ec681f3Smrg { 0, 0 }, /* cpp = 64 (TODO) */ 547ec681f3Smrg }; 557ec681f3Smrg 567ec681f3Smrg /* special case for r8g8: */ 577ec681f3Smrg if (is_r8g8(layout)) { 587ec681f3Smrg *blockwidth = 16; 597ec681f3Smrg *blockheight = 8; 607ec681f3Smrg return; 617ec681f3Smrg } 627ec681f3Smrg 637ec681f3Smrg uint32_t cpp = fdl_cpp_shift(layout); 647ec681f3Smrg assert(cpp < ARRAY_SIZE(blocksize)); 657ec681f3Smrg *blockwidth = blocksize[cpp].width; 667ec681f3Smrg *blockheight = blocksize[cpp].height; 677ec681f3Smrg} 687ec681f3Smrg 697ec681f3Smrgstatic void 707ec681f3Smrgfdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign) 717ec681f3Smrg{ 727ec681f3Smrg layout->pitchalign = fdl_cpp_shift(layout); 737ec681f3Smrg *heightalign = 16; 747ec681f3Smrg 757ec681f3Smrg if (is_r8g8(layout) || layout->cpp == 1) { 767ec681f3Smrg layout->pitchalign = 1; 777ec681f3Smrg *heightalign = 32; 787ec681f3Smrg } else if (layout->cpp == 2) { 797ec681f3Smrg layout->pitchalign = 2; 807ec681f3Smrg } 817ec681f3Smrg 827ec681f3Smrg /* note: this base_align is *probably* not always right, 837ec681f3Smrg * it doesn't really get tested. for example with UBWC we might 847ec681f3Smrg * want 4k alignment, since we align UBWC levels to 4k 857ec681f3Smrg */ 867ec681f3Smrg if (layout->cpp == 1) 877ec681f3Smrg layout->base_align = 64; 887ec681f3Smrg else if (layout->cpp == 2) 897ec681f3Smrg layout->base_align = 128; 907ec681f3Smrg else 917ec681f3Smrg layout->base_align = 256; 927ec681f3Smrg} 937ec681f3Smrg 947ec681f3Smrg/* NOTE: good way to test this is: (for example) 957ec681f3Smrg * piglit/bin/texelFetch fs sampler3D 100x100x8 967ec681f3Smrg */ 977ec681f3Smrgbool 987ec681f3Smrgfdl6_layout(struct fdl_layout *layout, enum pipe_format format, 997ec681f3Smrg uint32_t nr_samples, uint32_t width0, uint32_t height0, 1007ec681f3Smrg uint32_t depth0, uint32_t mip_levels, uint32_t array_size, 1017ec681f3Smrg bool is_3d, struct fdl_explicit_layout *explicit_layout) 1027ec681f3Smrg{ 1037ec681f3Smrg uint32_t offset = 0, heightalign; 1047ec681f3Smrg uint32_t ubwc_blockwidth, ubwc_blockheight; 1057ec681f3Smrg 1067ec681f3Smrg assert(nr_samples > 0); 1077ec681f3Smrg layout->width0 = width0; 1087ec681f3Smrg layout->height0 = height0; 1097ec681f3Smrg layout->depth0 = depth0; 1107ec681f3Smrg 1117ec681f3Smrg layout->cpp = util_format_get_blocksize(format); 1127ec681f3Smrg layout->cpp *= nr_samples; 1137ec681f3Smrg layout->cpp_shift = ffs(layout->cpp) - 1; 1147ec681f3Smrg 1157ec681f3Smrg layout->format = format; 1167ec681f3Smrg layout->nr_samples = nr_samples; 1177ec681f3Smrg layout->layer_first = !is_3d; 1187ec681f3Smrg 1197ec681f3Smrg fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight); 1207ec681f3Smrg 1217ec681f3Smrg if (depth0 > 1 || ubwc_blockwidth == 0) 1227ec681f3Smrg layout->ubwc = false; 1237ec681f3Smrg 1247ec681f3Smrg if (layout->ubwc || util_format_is_depth_or_stencil(format)) 1257ec681f3Smrg layout->tile_all = true; 1267ec681f3Smrg 1277ec681f3Smrg /* in layer_first layout, the level (slice) contains just one 1287ec681f3Smrg * layer (since in fact the layer contains the slices) 1297ec681f3Smrg */ 1307ec681f3Smrg uint32_t layers_in_level = layout->layer_first ? 1 : array_size; 1317ec681f3Smrg 1327ec681f3Smrg /* note: for tiled+noubwc layouts, we can use a lower pitchalign 1337ec681f3Smrg * which will affect the linear levels only, (the hardware will still 1347ec681f3Smrg * expect the tiled alignment on the tiled levels) 1357ec681f3Smrg */ 1367ec681f3Smrg if (layout->tile_mode) { 1377ec681f3Smrg fdl6_tile_alignment(layout, &heightalign); 1387ec681f3Smrg } else { 1397ec681f3Smrg layout->base_align = 64; 1407ec681f3Smrg layout->pitchalign = 0; 1417ec681f3Smrg /* align pitch to at least 16 pixels: 1427ec681f3Smrg * both turnip and galium assume there is enough alignment for 16x4 1437ec681f3Smrg * aligned gmem store. turnip can use CP_BLIT to work without this 1447ec681f3Smrg * extra alignment, but gallium driver doesn't implement it yet 1457ec681f3Smrg */ 1467ec681f3Smrg if (layout->cpp > 4) 1477ec681f3Smrg layout->pitchalign = fdl_cpp_shift(layout) - 2; 1487ec681f3Smrg 1497ec681f3Smrg /* when possible, use a bit more alignment than necessary 1507ec681f3Smrg * presumably this is better for performance? 1517ec681f3Smrg */ 1527ec681f3Smrg if (!explicit_layout) 1537ec681f3Smrg layout->pitchalign = fdl_cpp_shift(layout); 1547ec681f3Smrg 1557ec681f3Smrg /* not used, avoid "may be used uninitialized" warning */ 1567ec681f3Smrg heightalign = 1; 1577ec681f3Smrg } 1587ec681f3Smrg 1597ec681f3Smrg fdl_set_pitchalign(layout, layout->pitchalign + 6); 1607ec681f3Smrg 1617ec681f3Smrg if (explicit_layout) { 1627ec681f3Smrg offset = explicit_layout->offset; 1637ec681f3Smrg layout->pitch0 = explicit_layout->pitch; 1647ec681f3Smrg if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0) 1657ec681f3Smrg return false; 1667ec681f3Smrg } 1677ec681f3Smrg 1687ec681f3Smrg uint32_t ubwc_width0 = width0; 1697ec681f3Smrg uint32_t ubwc_height0 = height0; 1707ec681f3Smrg uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT; 1717ec681f3Smrg if (mip_levels > 1) { 1727ec681f3Smrg /* With mipmapping enabled, UBWC layout is power-of-two sized, 1737ec681f3Smrg * specified in log2 width/height in the descriptors. The height 1747ec681f3Smrg * alignment is 64 for mipmapping, but for buffer sharing (always 1757ec681f3Smrg * single level) other participants expect 16. 1767ec681f3Smrg */ 1777ec681f3Smrg ubwc_width0 = util_next_power_of_two(width0); 1787ec681f3Smrg ubwc_height0 = util_next_power_of_two(height0); 1797ec681f3Smrg ubwc_tile_height_alignment = 64; 1807ec681f3Smrg } 1817ec681f3Smrg layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth), 1827ec681f3Smrg RGB_TILE_WIDTH_ALIGNMENT); 1837ec681f3Smrg ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight), 1847ec681f3Smrg ubwc_tile_height_alignment); 1857ec681f3Smrg 1867ec681f3Smrg for (uint32_t level = 0; level < mip_levels; level++) { 1877ec681f3Smrg uint32_t depth = u_minify(depth0, level); 1887ec681f3Smrg struct fdl_slice *slice = &layout->slices[level]; 1897ec681f3Smrg struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level]; 1907ec681f3Smrg uint32_t tile_mode = fdl_tile_mode(layout, level); 1917ec681f3Smrg uint32_t pitch = fdl_pitch(layout, level); 1927ec681f3Smrg uint32_t height; 1937ec681f3Smrg 1947ec681f3Smrg /* tiled levels of 3D textures are rounded up to PoT dimensions: */ 1957ec681f3Smrg if (is_3d && tile_mode) { 1967ec681f3Smrg height = u_minify(util_next_power_of_two(height0), level); 1977ec681f3Smrg } else { 1987ec681f3Smrg height = u_minify(height0, level); 1997ec681f3Smrg } 2007ec681f3Smrg 2017ec681f3Smrg uint32_t nblocksy = util_format_get_nblocksy(format, height); 2027ec681f3Smrg if (tile_mode) 2037ec681f3Smrg nblocksy = align(nblocksy, heightalign); 2047ec681f3Smrg 2057ec681f3Smrg /* The blits used for mem<->gmem work at a granularity of 2067ec681f3Smrg * 16x4, which can cause faults due to over-fetch on the 2077ec681f3Smrg * last level. The simple solution is to over-allocate a 2087ec681f3Smrg * bit the last level to ensure any over-fetch is harmless. 2097ec681f3Smrg * The pitch is already sufficiently aligned, but height 2107ec681f3Smrg * may not be. note this only matters if last level is linear 2117ec681f3Smrg */ 2127ec681f3Smrg if (level == mip_levels - 1) 2137ec681f3Smrg nblocksy = align(nblocksy, 4); 2147ec681f3Smrg 2157ec681f3Smrg slice->offset = offset + layout->size; 2167ec681f3Smrg 2177ec681f3Smrg /* 1d array and 2d array textures must all have the same layer size 2187ec681f3Smrg * for each miplevel on a6xx. 3d textures can have different layer 2197ec681f3Smrg * sizes for high levels, but the hw auto-sizer is buggy (or at least 2207ec681f3Smrg * different than what this code does), so as soon as the layer size 2217ec681f3Smrg * range gets into range, we stop reducing it. 2227ec681f3Smrg */ 2237ec681f3Smrg if (is_3d) { 2247ec681f3Smrg if (level < 1 || layout->slices[level - 1].size0 > 0xf000) { 2257ec681f3Smrg slice->size0 = align(nblocksy * pitch, 4096); 2267ec681f3Smrg } else { 2277ec681f3Smrg slice->size0 = layout->slices[level - 1].size0; 2287ec681f3Smrg } 2297ec681f3Smrg } else { 2307ec681f3Smrg slice->size0 = nblocksy * pitch; 2317ec681f3Smrg } 2327ec681f3Smrg 2337ec681f3Smrg layout->size += slice->size0 * depth * layers_in_level; 2347ec681f3Smrg 2357ec681f3Smrg if (layout->ubwc) { 2367ec681f3Smrg /* with UBWC every level is aligned to 4K */ 2377ec681f3Smrg layout->size = align(layout->size, 4096); 2387ec681f3Smrg 2397ec681f3Smrg uint32_t meta_pitch = fdl_ubwc_pitch(layout, level); 2407ec681f3Smrg uint32_t meta_height = 2417ec681f3Smrg align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment); 2427ec681f3Smrg 2437ec681f3Smrg ubwc_slice->size0 = 2447ec681f3Smrg align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT); 2457ec681f3Smrg ubwc_slice->offset = offset + layout->ubwc_layer_size; 2467ec681f3Smrg layout->ubwc_layer_size += ubwc_slice->size0; 2477ec681f3Smrg } 2487ec681f3Smrg } 2497ec681f3Smrg 2507ec681f3Smrg if (layout->layer_first) { 2517ec681f3Smrg layout->layer_size = align(layout->size, 4096); 2527ec681f3Smrg layout->size = layout->layer_size * array_size; 2537ec681f3Smrg } 2547ec681f3Smrg 2557ec681f3Smrg /* Place the UBWC slices before the uncompressed slices, because the 2567ec681f3Smrg * kernel expects UBWC to be at the start of the buffer. In the HW, we 2577ec681f3Smrg * get to program the UBWC and non-UBWC offset/strides 2587ec681f3Smrg * independently. 2597ec681f3Smrg */ 2607ec681f3Smrg if (layout->ubwc) { 2617ec681f3Smrg for (uint32_t level = 0; level < mip_levels; level++) 2627ec681f3Smrg layout->slices[level].offset += layout->ubwc_layer_size * array_size; 2637ec681f3Smrg layout->size += layout->ubwc_layer_size * array_size; 2647ec681f3Smrg } 2657ec681f3Smrg 2667ec681f3Smrg /* include explicit offset in size */ 2677ec681f3Smrg layout->size += offset; 2687ec681f3Smrg 2697ec681f3Smrg return true; 2707ec681f3Smrg} 271