17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2021 Raspberry Pi 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "v3dv_private.h" 257ec681f3Smrg#include "broadcom/common/v3d_macros.h" 267ec681f3Smrg#include "broadcom/cle/v3dx_pack.h" 277ec681f3Smrg#include "broadcom/compiler/v3d_compiler.h" 287ec681f3Smrg 297ec681f3Smrg#include "vk_format_info.h" 307ec681f3Smrg 317ec681f3Smrg/* 327ec681f3Smrg * This method translates pipe_swizzle to the swizzle values used at the 337ec681f3Smrg * packet TEXTURE_SHADER_STATE 347ec681f3Smrg * 357ec681f3Smrg * FIXME: C&P from v3d, common place? 367ec681f3Smrg */ 377ec681f3Smrgstatic uint32_t 387ec681f3Smrgtranslate_swizzle(unsigned char pipe_swizzle) 397ec681f3Smrg{ 407ec681f3Smrg switch (pipe_swizzle) { 417ec681f3Smrg case PIPE_SWIZZLE_0: 427ec681f3Smrg return 0; 437ec681f3Smrg case PIPE_SWIZZLE_1: 447ec681f3Smrg return 1; 457ec681f3Smrg case PIPE_SWIZZLE_X: 467ec681f3Smrg case PIPE_SWIZZLE_Y: 477ec681f3Smrg case PIPE_SWIZZLE_Z: 487ec681f3Smrg case PIPE_SWIZZLE_W: 497ec681f3Smrg return 2 + pipe_swizzle; 507ec681f3Smrg default: 517ec681f3Smrg unreachable("unknown swizzle"); 527ec681f3Smrg } 537ec681f3Smrg} 547ec681f3Smrg 557ec681f3Smrg/* 567ec681f3Smrg * Packs and ensure bo for the shader state (the latter can be temporal). 577ec681f3Smrg */ 587ec681f3Smrgstatic void 597ec681f3Smrgpack_texture_shader_state_helper(struct v3dv_device *device, 607ec681f3Smrg struct v3dv_image_view *image_view, 617ec681f3Smrg bool for_cube_map_array_storage) 627ec681f3Smrg{ 637ec681f3Smrg assert(!for_cube_map_array_storage || 647ec681f3Smrg image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY); 657ec681f3Smrg const uint32_t index = for_cube_map_array_storage ? 1 : 0; 667ec681f3Smrg 677ec681f3Smrg assert(image_view->vk.image); 687ec681f3Smrg const struct v3dv_image *image = (struct v3dv_image *) image_view->vk.image; 697ec681f3Smrg 707ec681f3Smrg assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT || 717ec681f3Smrg image->vk.samples == VK_SAMPLE_COUNT_4_BIT); 727ec681f3Smrg const uint32_t msaa_scale = image->vk.samples == VK_SAMPLE_COUNT_1_BIT ? 1 : 2; 737ec681f3Smrg 747ec681f3Smrg v3dvx_pack(image_view->texture_shader_state[index], TEXTURE_SHADER_STATE, tex) { 757ec681f3Smrg 767ec681f3Smrg tex.level_0_is_strictly_uif = 777ec681f3Smrg (image->slices[0].tiling == V3D_TILING_UIF_XOR || 787ec681f3Smrg image->slices[0].tiling == V3D_TILING_UIF_NO_XOR); 797ec681f3Smrg 807ec681f3Smrg tex.level_0_xor_enable = (image->slices[0].tiling == V3D_TILING_UIF_XOR); 817ec681f3Smrg 827ec681f3Smrg if (tex.level_0_is_strictly_uif) 837ec681f3Smrg tex.level_0_ub_pad = image->slices[0].ub_pad; 847ec681f3Smrg 857ec681f3Smrg /* FIXME: v3d never sets uif_xor_disable, but uses it on the following 867ec681f3Smrg * check so let's set the default value 877ec681f3Smrg */ 887ec681f3Smrg tex.uif_xor_disable = false; 897ec681f3Smrg if (tex.uif_xor_disable || 907ec681f3Smrg tex.level_0_is_strictly_uif) { 917ec681f3Smrg tex.extended = true; 927ec681f3Smrg } 937ec681f3Smrg 947ec681f3Smrg tex.base_level = image_view->vk.base_mip_level; 957ec681f3Smrg tex.max_level = image_view->vk.base_mip_level + 967ec681f3Smrg image_view->vk.level_count - 1; 977ec681f3Smrg 987ec681f3Smrg tex.swizzle_r = translate_swizzle(image_view->swizzle[0]); 997ec681f3Smrg tex.swizzle_g = translate_swizzle(image_view->swizzle[1]); 1007ec681f3Smrg tex.swizzle_b = translate_swizzle(image_view->swizzle[2]); 1017ec681f3Smrg tex.swizzle_a = translate_swizzle(image_view->swizzle[3]); 1027ec681f3Smrg 1037ec681f3Smrg tex.texture_type = image_view->format->tex_type; 1047ec681f3Smrg 1057ec681f3Smrg if (image->vk.image_type == VK_IMAGE_TYPE_3D) { 1067ec681f3Smrg tex.image_depth = image->vk.extent.depth; 1077ec681f3Smrg } else { 1087ec681f3Smrg tex.image_depth = image_view->vk.layer_count; 1097ec681f3Smrg } 1107ec681f3Smrg 1117ec681f3Smrg /* Empirical testing with CTS shows that when we are sampling from cube 1127ec681f3Smrg * arrays we want to set image depth to layers / 6, but not when doing 1137ec681f3Smrg * image load/store. 1147ec681f3Smrg */ 1157ec681f3Smrg if (image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && 1167ec681f3Smrg !for_cube_map_array_storage) { 1177ec681f3Smrg assert(tex.image_depth % 6 == 0); 1187ec681f3Smrg tex.image_depth /= 6; 1197ec681f3Smrg } 1207ec681f3Smrg 1217ec681f3Smrg tex.image_height = image->vk.extent.height * msaa_scale; 1227ec681f3Smrg tex.image_width = image->vk.extent.width * msaa_scale; 1237ec681f3Smrg 1247ec681f3Smrg /* On 4.x, the height of a 1D texture is redefined to be the 1257ec681f3Smrg * upper 14 bits of the width (which is only usable with txf). 1267ec681f3Smrg */ 1277ec681f3Smrg if (image->vk.image_type == VK_IMAGE_TYPE_1D) { 1287ec681f3Smrg tex.image_height = tex.image_width >> 14; 1297ec681f3Smrg } 1307ec681f3Smrg tex.image_width &= (1 << 14) - 1; 1317ec681f3Smrg tex.image_height &= (1 << 14) - 1; 1327ec681f3Smrg 1337ec681f3Smrg tex.array_stride_64_byte_aligned = image->cube_map_stride / 64; 1347ec681f3Smrg 1357ec681f3Smrg tex.srgb = vk_format_is_srgb(image_view->vk.format); 1367ec681f3Smrg 1377ec681f3Smrg /* At this point we don't have the job. That's the reason the first 1387ec681f3Smrg * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to 1397ec681f3Smrg * add the bo to the job. This also means that we need to add manually 1407ec681f3Smrg * the image bo to the job using the texture. 1417ec681f3Smrg */ 1427ec681f3Smrg const uint32_t base_offset = 1437ec681f3Smrg image->mem->bo->offset + 1447ec681f3Smrg v3dv_layer_offset(image, 0, image_view->vk.base_array_layer); 1457ec681f3Smrg tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); 1467ec681f3Smrg } 1477ec681f3Smrg} 1487ec681f3Smrg 1497ec681f3Smrgvoid 1507ec681f3Smrgv3dX(pack_texture_shader_state)(struct v3dv_device *device, 1517ec681f3Smrg struct v3dv_image_view *iview) 1527ec681f3Smrg{ 1537ec681f3Smrg pack_texture_shader_state_helper(device, iview, false); 1547ec681f3Smrg if (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) 1557ec681f3Smrg pack_texture_shader_state_helper(device, iview, true); 1567ec681f3Smrg} 1577ec681f3Smrg 1587ec681f3Smrgvoid 1597ec681f3Smrgv3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device, 1607ec681f3Smrg struct v3dv_buffer_view *buffer_view) 1617ec681f3Smrg{ 1627ec681f3Smrg assert(buffer_view->buffer); 1637ec681f3Smrg const struct v3dv_buffer *buffer = buffer_view->buffer; 1647ec681f3Smrg 1657ec681f3Smrg v3dvx_pack(buffer_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) { 1667ec681f3Smrg tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); 1677ec681f3Smrg tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); 1687ec681f3Smrg tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); 1697ec681f3Smrg tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); 1707ec681f3Smrg 1717ec681f3Smrg tex.image_depth = 1; 1727ec681f3Smrg 1737ec681f3Smrg /* On 4.x, the height of a 1D texture is redefined to be the upper 14 1747ec681f3Smrg * bits of the width (which is only usable with txf) (or in other words, 1757ec681f3Smrg * we are providing a 28 bit field for size, but split on the usual 1767ec681f3Smrg * 14bit height/width). 1777ec681f3Smrg */ 1787ec681f3Smrg tex.image_width = buffer_view->num_elements; 1797ec681f3Smrg tex.image_height = tex.image_width >> 14; 1807ec681f3Smrg tex.image_width &= (1 << 14) - 1; 1817ec681f3Smrg tex.image_height &= (1 << 14) - 1; 1827ec681f3Smrg 1837ec681f3Smrg tex.texture_type = buffer_view->format->tex_type; 1847ec681f3Smrg tex.srgb = vk_format_is_srgb(buffer_view->vk_format); 1857ec681f3Smrg 1867ec681f3Smrg /* At this point we don't have the job. That's the reason the first 1877ec681f3Smrg * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to 1887ec681f3Smrg * add the bo to the job. This also means that we need to add manually 1897ec681f3Smrg * the image bo to the job using the texture. 1907ec681f3Smrg */ 1917ec681f3Smrg const uint32_t base_offset = 1927ec681f3Smrg buffer->mem->bo->offset + 1937ec681f3Smrg buffer->mem_offset + 1947ec681f3Smrg buffer_view->offset; 1957ec681f3Smrg 1967ec681f3Smrg tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); 1977ec681f3Smrg } 1987ec681f3Smrg} 199