17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
37ec681f3Smrg * Copyright © 2018-2019 Google, Inc.
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
227ec681f3Smrg * SOFTWARE.
237ec681f3Smrg *
247ec681f3Smrg * Authors:
257ec681f3Smrg *    Rob Clark <robclark@freedesktop.org>
267ec681f3Smrg */
277ec681f3Smrg
287ec681f3Smrg#include <stdio.h>
297ec681f3Smrg
307ec681f3Smrg#include "freedreno_layout.h"
317ec681f3Smrg
327ec681f3Smrgstatic bool
337ec681f3Smrgis_r8g8(struct fdl_layout *layout)
347ec681f3Smrg{
357ec681f3Smrg   return layout->cpp == 2 &&
367ec681f3Smrg          util_format_get_nr_components(layout->format) == 2;
377ec681f3Smrg}
387ec681f3Smrg
397ec681f3Smrgvoid
407ec681f3Smrgfdl6_get_ubwc_blockwidth(struct fdl_layout *layout, uint32_t *blockwidth,
417ec681f3Smrg                         uint32_t *blockheight)
427ec681f3Smrg{
437ec681f3Smrg   static const struct {
447ec681f3Smrg      uint8_t width;
457ec681f3Smrg      uint8_t height;
467ec681f3Smrg   } blocksize[] = {
477ec681f3Smrg      { 16, 4 }, /* cpp = 1 */
487ec681f3Smrg      { 16, 4 }, /* cpp = 2 */
497ec681f3Smrg      { 16, 4 }, /* cpp = 4 */
507ec681f3Smrg      {  8, 4 }, /* cpp = 8 */
517ec681f3Smrg      {  4, 4 }, /* cpp = 16 */
527ec681f3Smrg      {  4, 2 }, /* cpp = 32 */
537ec681f3Smrg      {  0, 0 }, /* cpp = 64 (TODO) */
547ec681f3Smrg   };
557ec681f3Smrg
567ec681f3Smrg   /* special case for r8g8: */
577ec681f3Smrg   if (is_r8g8(layout)) {
587ec681f3Smrg      *blockwidth = 16;
597ec681f3Smrg      *blockheight = 8;
607ec681f3Smrg      return;
617ec681f3Smrg   }
627ec681f3Smrg
637ec681f3Smrg   uint32_t cpp = fdl_cpp_shift(layout);
647ec681f3Smrg   assert(cpp < ARRAY_SIZE(blocksize));
657ec681f3Smrg   *blockwidth = blocksize[cpp].width;
667ec681f3Smrg   *blockheight = blocksize[cpp].height;
677ec681f3Smrg}
687ec681f3Smrg
697ec681f3Smrgstatic void
707ec681f3Smrgfdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
717ec681f3Smrg{
727ec681f3Smrg   layout->pitchalign = fdl_cpp_shift(layout);
737ec681f3Smrg   *heightalign = 16;
747ec681f3Smrg
757ec681f3Smrg   if (is_r8g8(layout) || layout->cpp == 1) {
767ec681f3Smrg      layout->pitchalign = 1;
777ec681f3Smrg      *heightalign = 32;
787ec681f3Smrg   } else if (layout->cpp == 2) {
797ec681f3Smrg      layout->pitchalign = 2;
807ec681f3Smrg   }
817ec681f3Smrg
827ec681f3Smrg   /* note: this base_align is *probably* not always right,
837ec681f3Smrg    * it doesn't really get tested. for example with UBWC we might
847ec681f3Smrg    * want 4k alignment, since we align UBWC levels to 4k
857ec681f3Smrg    */
867ec681f3Smrg   if (layout->cpp == 1)
877ec681f3Smrg      layout->base_align = 64;
887ec681f3Smrg   else if (layout->cpp == 2)
897ec681f3Smrg      layout->base_align = 128;
907ec681f3Smrg   else
917ec681f3Smrg      layout->base_align = 256;
927ec681f3Smrg}
937ec681f3Smrg
947ec681f3Smrg/* NOTE: good way to test this is:  (for example)
957ec681f3Smrg *  piglit/bin/texelFetch fs sampler3D 100x100x8
967ec681f3Smrg */
977ec681f3Smrgbool
987ec681f3Smrgfdl6_layout(struct fdl_layout *layout, enum pipe_format format,
997ec681f3Smrg            uint32_t nr_samples, uint32_t width0, uint32_t height0,
1007ec681f3Smrg            uint32_t depth0, uint32_t mip_levels, uint32_t array_size,
1017ec681f3Smrg            bool is_3d, struct fdl_explicit_layout *explicit_layout)
1027ec681f3Smrg{
1037ec681f3Smrg   uint32_t offset = 0, heightalign;
1047ec681f3Smrg   uint32_t ubwc_blockwidth, ubwc_blockheight;
1057ec681f3Smrg
1067ec681f3Smrg   assert(nr_samples > 0);
1077ec681f3Smrg   layout->width0 = width0;
1087ec681f3Smrg   layout->height0 = height0;
1097ec681f3Smrg   layout->depth0 = depth0;
1107ec681f3Smrg
1117ec681f3Smrg   layout->cpp = util_format_get_blocksize(format);
1127ec681f3Smrg   layout->cpp *= nr_samples;
1137ec681f3Smrg   layout->cpp_shift = ffs(layout->cpp) - 1;
1147ec681f3Smrg
1157ec681f3Smrg   layout->format = format;
1167ec681f3Smrg   layout->nr_samples = nr_samples;
1177ec681f3Smrg   layout->layer_first = !is_3d;
1187ec681f3Smrg
1197ec681f3Smrg   fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
1207ec681f3Smrg
1217ec681f3Smrg   if (depth0 > 1 || ubwc_blockwidth == 0)
1227ec681f3Smrg      layout->ubwc = false;
1237ec681f3Smrg
1247ec681f3Smrg   if (layout->ubwc || util_format_is_depth_or_stencil(format))
1257ec681f3Smrg      layout->tile_all = true;
1267ec681f3Smrg
1277ec681f3Smrg   /* in layer_first layout, the level (slice) contains just one
1287ec681f3Smrg    * layer (since in fact the layer contains the slices)
1297ec681f3Smrg    */
1307ec681f3Smrg   uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
1317ec681f3Smrg
1327ec681f3Smrg   /* note: for tiled+noubwc layouts, we can use a lower pitchalign
1337ec681f3Smrg    * which will affect the linear levels only, (the hardware will still
1347ec681f3Smrg    * expect the tiled alignment on the tiled levels)
1357ec681f3Smrg    */
1367ec681f3Smrg   if (layout->tile_mode) {
1377ec681f3Smrg      fdl6_tile_alignment(layout, &heightalign);
1387ec681f3Smrg   } else {
1397ec681f3Smrg      layout->base_align = 64;
1407ec681f3Smrg      layout->pitchalign = 0;
1417ec681f3Smrg      /* align pitch to at least 16 pixels:
1427ec681f3Smrg       * both turnip and galium assume there is enough alignment for 16x4
1437ec681f3Smrg       * aligned gmem store. turnip can use CP_BLIT to work without this
1447ec681f3Smrg       * extra alignment, but gallium driver doesn't implement it yet
1457ec681f3Smrg       */
1467ec681f3Smrg      if (layout->cpp > 4)
1477ec681f3Smrg         layout->pitchalign = fdl_cpp_shift(layout) - 2;
1487ec681f3Smrg
1497ec681f3Smrg      /* when possible, use a bit more alignment than necessary
1507ec681f3Smrg       * presumably this is better for performance?
1517ec681f3Smrg       */
1527ec681f3Smrg      if (!explicit_layout)
1537ec681f3Smrg         layout->pitchalign = fdl_cpp_shift(layout);
1547ec681f3Smrg
1557ec681f3Smrg      /* not used, avoid "may be used uninitialized" warning */
1567ec681f3Smrg      heightalign = 1;
1577ec681f3Smrg   }
1587ec681f3Smrg
1597ec681f3Smrg   fdl_set_pitchalign(layout, layout->pitchalign + 6);
1607ec681f3Smrg
1617ec681f3Smrg   if (explicit_layout) {
1627ec681f3Smrg      offset = explicit_layout->offset;
1637ec681f3Smrg      layout->pitch0 = explicit_layout->pitch;
1647ec681f3Smrg      if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
1657ec681f3Smrg         return false;
1667ec681f3Smrg   }
1677ec681f3Smrg
1687ec681f3Smrg   uint32_t ubwc_width0 = width0;
1697ec681f3Smrg   uint32_t ubwc_height0 = height0;
1707ec681f3Smrg   uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
1717ec681f3Smrg   if (mip_levels > 1) {
1727ec681f3Smrg      /* With mipmapping enabled, UBWC layout is power-of-two sized,
1737ec681f3Smrg       * specified in log2 width/height in the descriptors.  The height
1747ec681f3Smrg       * alignment is 64 for mipmapping, but for buffer sharing (always
1757ec681f3Smrg       * single level) other participants expect 16.
1767ec681f3Smrg       */
1777ec681f3Smrg      ubwc_width0 = util_next_power_of_two(width0);
1787ec681f3Smrg      ubwc_height0 = util_next_power_of_two(height0);
1797ec681f3Smrg      ubwc_tile_height_alignment = 64;
1807ec681f3Smrg   }
1817ec681f3Smrg   layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
1827ec681f3Smrg                               RGB_TILE_WIDTH_ALIGNMENT);
1837ec681f3Smrg   ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
1847ec681f3Smrg                        ubwc_tile_height_alignment);
1857ec681f3Smrg
1867ec681f3Smrg   for (uint32_t level = 0; level < mip_levels; level++) {
1877ec681f3Smrg      uint32_t depth = u_minify(depth0, level);
1887ec681f3Smrg      struct fdl_slice *slice = &layout->slices[level];
1897ec681f3Smrg      struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
1907ec681f3Smrg      uint32_t tile_mode = fdl_tile_mode(layout, level);
1917ec681f3Smrg      uint32_t pitch = fdl_pitch(layout, level);
1927ec681f3Smrg      uint32_t height;
1937ec681f3Smrg
1947ec681f3Smrg      /* tiled levels of 3D textures are rounded up to PoT dimensions: */
1957ec681f3Smrg      if (is_3d && tile_mode) {
1967ec681f3Smrg         height = u_minify(util_next_power_of_two(height0), level);
1977ec681f3Smrg      } else {
1987ec681f3Smrg         height = u_minify(height0, level);
1997ec681f3Smrg      }
2007ec681f3Smrg
2017ec681f3Smrg      uint32_t nblocksy = util_format_get_nblocksy(format, height);
2027ec681f3Smrg      if (tile_mode)
2037ec681f3Smrg         nblocksy = align(nblocksy, heightalign);
2047ec681f3Smrg
2057ec681f3Smrg      /* The blits used for mem<->gmem work at a granularity of
2067ec681f3Smrg       * 16x4, which can cause faults due to over-fetch on the
2077ec681f3Smrg       * last level.  The simple solution is to over-allocate a
2087ec681f3Smrg       * bit the last level to ensure any over-fetch is harmless.
2097ec681f3Smrg       * The pitch is already sufficiently aligned, but height
2107ec681f3Smrg       * may not be. note this only matters if last level is linear
2117ec681f3Smrg       */
2127ec681f3Smrg      if (level == mip_levels - 1)
2137ec681f3Smrg         nblocksy = align(nblocksy, 4);
2147ec681f3Smrg
2157ec681f3Smrg      slice->offset = offset + layout->size;
2167ec681f3Smrg
2177ec681f3Smrg      /* 1d array and 2d array textures must all have the same layer size
2187ec681f3Smrg       * for each miplevel on a6xx. 3d textures can have different layer
2197ec681f3Smrg       * sizes for high levels, but the hw auto-sizer is buggy (or at least
2207ec681f3Smrg       * different than what this code does), so as soon as the layer size
2217ec681f3Smrg       * range gets into range, we stop reducing it.
2227ec681f3Smrg       */
2237ec681f3Smrg      if (is_3d) {
2247ec681f3Smrg         if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
2257ec681f3Smrg            slice->size0 = align(nblocksy * pitch, 4096);
2267ec681f3Smrg         } else {
2277ec681f3Smrg            slice->size0 = layout->slices[level - 1].size0;
2287ec681f3Smrg         }
2297ec681f3Smrg      } else {
2307ec681f3Smrg         slice->size0 = nblocksy * pitch;
2317ec681f3Smrg      }
2327ec681f3Smrg
2337ec681f3Smrg      layout->size += slice->size0 * depth * layers_in_level;
2347ec681f3Smrg
2357ec681f3Smrg      if (layout->ubwc) {
2367ec681f3Smrg         /* with UBWC every level is aligned to 4K */
2377ec681f3Smrg         layout->size = align(layout->size, 4096);
2387ec681f3Smrg
2397ec681f3Smrg         uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
2407ec681f3Smrg         uint32_t meta_height =
2417ec681f3Smrg            align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment);
2427ec681f3Smrg
2437ec681f3Smrg         ubwc_slice->size0 =
2447ec681f3Smrg            align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
2457ec681f3Smrg         ubwc_slice->offset = offset + layout->ubwc_layer_size;
2467ec681f3Smrg         layout->ubwc_layer_size += ubwc_slice->size0;
2477ec681f3Smrg      }
2487ec681f3Smrg   }
2497ec681f3Smrg
2507ec681f3Smrg   if (layout->layer_first) {
2517ec681f3Smrg      layout->layer_size = align(layout->size, 4096);
2527ec681f3Smrg      layout->size = layout->layer_size * array_size;
2537ec681f3Smrg   }
2547ec681f3Smrg
2557ec681f3Smrg   /* Place the UBWC slices before the uncompressed slices, because the
2567ec681f3Smrg    * kernel expects UBWC to be at the start of the buffer.  In the HW, we
2577ec681f3Smrg    * get to program the UBWC and non-UBWC offset/strides
2587ec681f3Smrg    * independently.
2597ec681f3Smrg    */
2607ec681f3Smrg   if (layout->ubwc) {
2617ec681f3Smrg      for (uint32_t level = 0; level < mip_levels; level++)
2627ec681f3Smrg         layout->slices[level].offset += layout->ubwc_layer_size * array_size;
2637ec681f3Smrg      layout->size += layout->ubwc_layer_size * array_size;
2647ec681f3Smrg   }
2657ec681f3Smrg
2667ec681f3Smrg   /* include explicit offset in size */
2677ec681f3Smrg   layout->size += offset;
2687ec681f3Smrg
2697ec681f3Smrg   return true;
2707ec681f3Smrg}
271