1/* 2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org> 3 * Copyright © 2018-2019 Google, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Rob Clark <robclark@freedesktop.org> 26 */ 27 28#include <stdio.h> 29 30#include "freedreno_layout.h" 31 32static bool 33is_r8g8(struct fdl_layout *layout) 34{ 35 return layout->cpp == 2 && 36 util_format_get_nr_components(layout->format) == 2; 37} 38 39void 40fdl6_get_ubwc_blockwidth(struct fdl_layout *layout, uint32_t *blockwidth, 41 uint32_t *blockheight) 42{ 43 static const struct { 44 uint8_t width; 45 uint8_t height; 46 } blocksize[] = { 47 { 16, 4 }, /* cpp = 1 */ 48 { 16, 4 }, /* cpp = 2 */ 49 { 16, 4 }, /* cpp = 4 */ 50 { 8, 4 }, /* cpp = 8 */ 51 { 4, 4 }, /* cpp = 16 */ 52 { 4, 2 }, /* cpp = 32 */ 53 { 0, 0 }, /* cpp = 64 (TODO) */ 54 }; 55 56 /* special case for r8g8: */ 57 if (is_r8g8(layout)) { 58 *blockwidth = 16; 59 *blockheight = 8; 60 return; 61 } 62 63 uint32_t cpp = fdl_cpp_shift(layout); 64 assert(cpp < ARRAY_SIZE(blocksize)); 65 *blockwidth = blocksize[cpp].width; 66 *blockheight = blocksize[cpp].height; 67} 68 69static void 70fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign) 71{ 72 layout->pitchalign = fdl_cpp_shift(layout); 73 *heightalign = 16; 74 75 if (is_r8g8(layout) || layout->cpp == 1) { 76 layout->pitchalign = 1; 77 *heightalign = 32; 78 } else if (layout->cpp == 2) { 79 layout->pitchalign = 2; 80 } 81 82 /* note: this base_align is *probably* not always right, 83 * it doesn't really get tested. for example with UBWC we might 84 * want 4k alignment, since we align UBWC levels to 4k 85 */ 86 if (layout->cpp == 1) 87 layout->base_align = 64; 88 else if (layout->cpp == 2) 89 layout->base_align = 128; 90 else 91 layout->base_align = 256; 92} 93 94/* NOTE: good way to test this is: (for example) 95 * piglit/bin/texelFetch fs sampler3D 100x100x8 96 */ 97bool 98fdl6_layout(struct fdl_layout *layout, enum pipe_format format, 99 uint32_t nr_samples, uint32_t width0, uint32_t height0, 100 uint32_t depth0, uint32_t mip_levels, uint32_t array_size, 101 bool is_3d, struct fdl_explicit_layout *explicit_layout) 102{ 103 uint32_t offset = 0, heightalign; 104 uint32_t ubwc_blockwidth, ubwc_blockheight; 105 106 assert(nr_samples > 0); 107 layout->width0 = width0; 108 layout->height0 = height0; 109 layout->depth0 = depth0; 110 111 layout->cpp = util_format_get_blocksize(format); 112 layout->cpp *= nr_samples; 113 layout->cpp_shift = ffs(layout->cpp) - 1; 114 115 layout->format = format; 116 layout->nr_samples = nr_samples; 117 layout->layer_first = !is_3d; 118 119 fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight); 120 121 if (depth0 > 1 || ubwc_blockwidth == 0) 122 layout->ubwc = false; 123 124 if (layout->ubwc || util_format_is_depth_or_stencil(format)) 125 layout->tile_all = true; 126 127 /* in layer_first layout, the level (slice) contains just one 128 * layer (since in fact the layer contains the slices) 129 */ 130 uint32_t layers_in_level = layout->layer_first ? 1 : array_size; 131 132 /* note: for tiled+noubwc layouts, we can use a lower pitchalign 133 * which will affect the linear levels only, (the hardware will still 134 * expect the tiled alignment on the tiled levels) 135 */ 136 if (layout->tile_mode) { 137 fdl6_tile_alignment(layout, &heightalign); 138 } else { 139 layout->base_align = 64; 140 layout->pitchalign = 0; 141 /* align pitch to at least 16 pixels: 142 * both turnip and galium assume there is enough alignment for 16x4 143 * aligned gmem store. turnip can use CP_BLIT to work without this 144 * extra alignment, but gallium driver doesn't implement it yet 145 */ 146 if (layout->cpp > 4) 147 layout->pitchalign = fdl_cpp_shift(layout) - 2; 148 149 /* when possible, use a bit more alignment than necessary 150 * presumably this is better for performance? 151 */ 152 if (!explicit_layout) 153 layout->pitchalign = fdl_cpp_shift(layout); 154 155 /* not used, avoid "may be used uninitialized" warning */ 156 heightalign = 1; 157 } 158 159 fdl_set_pitchalign(layout, layout->pitchalign + 6); 160 161 if (explicit_layout) { 162 offset = explicit_layout->offset; 163 layout->pitch0 = explicit_layout->pitch; 164 if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0) 165 return false; 166 } 167 168 uint32_t ubwc_width0 = width0; 169 uint32_t ubwc_height0 = height0; 170 uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT; 171 if (mip_levels > 1) { 172 /* With mipmapping enabled, UBWC layout is power-of-two sized, 173 * specified in log2 width/height in the descriptors. The height 174 * alignment is 64 for mipmapping, but for buffer sharing (always 175 * single level) other participants expect 16. 176 */ 177 ubwc_width0 = util_next_power_of_two(width0); 178 ubwc_height0 = util_next_power_of_two(height0); 179 ubwc_tile_height_alignment = 64; 180 } 181 layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth), 182 RGB_TILE_WIDTH_ALIGNMENT); 183 ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight), 184 ubwc_tile_height_alignment); 185 186 for (uint32_t level = 0; level < mip_levels; level++) { 187 uint32_t depth = u_minify(depth0, level); 188 struct fdl_slice *slice = &layout->slices[level]; 189 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level]; 190 uint32_t tile_mode = fdl_tile_mode(layout, level); 191 uint32_t pitch = fdl_pitch(layout, level); 192 uint32_t height; 193 194 /* tiled levels of 3D textures are rounded up to PoT dimensions: */ 195 if (is_3d && tile_mode) { 196 height = u_minify(util_next_power_of_two(height0), level); 197 } else { 198 height = u_minify(height0, level); 199 } 200 201 uint32_t nblocksy = util_format_get_nblocksy(format, height); 202 if (tile_mode) 203 nblocksy = align(nblocksy, heightalign); 204 205 /* The blits used for mem<->gmem work at a granularity of 206 * 16x4, which can cause faults due to over-fetch on the 207 * last level. The simple solution is to over-allocate a 208 * bit the last level to ensure any over-fetch is harmless. 209 * The pitch is already sufficiently aligned, but height 210 * may not be. note this only matters if last level is linear 211 */ 212 if (level == mip_levels - 1) 213 nblocksy = align(nblocksy, 4); 214 215 slice->offset = offset + layout->size; 216 217 /* 1d array and 2d array textures must all have the same layer size 218 * for each miplevel on a6xx. 3d textures can have different layer 219 * sizes for high levels, but the hw auto-sizer is buggy (or at least 220 * different than what this code does), so as soon as the layer size 221 * range gets into range, we stop reducing it. 222 */ 223 if (is_3d) { 224 if (level < 1 || layout->slices[level - 1].size0 > 0xf000) { 225 slice->size0 = align(nblocksy * pitch, 4096); 226 } else { 227 slice->size0 = layout->slices[level - 1].size0; 228 } 229 } else { 230 slice->size0 = nblocksy * pitch; 231 } 232 233 layout->size += slice->size0 * depth * layers_in_level; 234 235 if (layout->ubwc) { 236 /* with UBWC every level is aligned to 4K */ 237 layout->size = align(layout->size, 4096); 238 239 uint32_t meta_pitch = fdl_ubwc_pitch(layout, level); 240 uint32_t meta_height = 241 align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment); 242 243 ubwc_slice->size0 = 244 align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT); 245 ubwc_slice->offset = offset + layout->ubwc_layer_size; 246 layout->ubwc_layer_size += ubwc_slice->size0; 247 } 248 } 249 250 if (layout->layer_first) { 251 layout->layer_size = align(layout->size, 4096); 252 layout->size = layout->layer_size * array_size; 253 } 254 255 /* Place the UBWC slices before the uncompressed slices, because the 256 * kernel expects UBWC to be at the start of the buffer. In the HW, we 257 * get to program the UBWC and non-UBWC offset/strides 258 * independently. 259 */ 260 if (layout->ubwc) { 261 for (uint32_t level = 0; level < mip_levels; level++) 262 layout->slices[level].offset += layout->ubwc_layer_size * array_size; 263 layout->size += layout->ubwc_layer_size * array_size; 264 } 265 266 /* include explicit offset in size */ 267 layout->size += offset; 268 269 return true; 270} 271