17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2019 Raspberry Pi 37ec681f3Smrg * 47ec681f3Smrg * Based in part on v3d driver which is: 57ec681f3Smrg * 67ec681f3Smrg * Copyright © 2014-2017 Broadcom 77ec681f3Smrg * 87ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 97ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 107ec681f3Smrg * to deal in the Software without restriction, including without limitation 117ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 127ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 137ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 147ec681f3Smrg * 157ec681f3Smrg * The above copyright notice and this permission notice (including the next 167ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 177ec681f3Smrg * Software. 187ec681f3Smrg * 197ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 207ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 217ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 227ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 237ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 247ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 257ec681f3Smrg * IN THE SOFTWARE. 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#include "v3dv_private.h" 297ec681f3Smrg#include "vk_format_info.h" 307ec681f3Smrg 317ec681f3Smrg/* The only version specific structure that we need is 327ec681f3Smrg * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from 337ec681f3Smrg * previous V3D versions and we don't expect that to change, so for now let's 347ec681f3Smrg * just hardcode the V3D version here. 357ec681f3Smrg */ 367ec681f3Smrg#define V3D_VERSION 41 377ec681f3Smrg#include "broadcom/common/v3d_macros.h" 387ec681f3Smrg#include "broadcom/cle/v3dx_pack.h" 397ec681f3Smrg 407ec681f3Smrg/* Our Vulkan resource indices represent indices in descriptor maps which 417ec681f3Smrg * include all shader stages, so we need to size the arrays below 427ec681f3Smrg * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS. 437ec681f3Smrg */ 447ec681f3Smrg#define MAX_STAGES 3 457ec681f3Smrg 467ec681f3Smrg#define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES) 477ec681f3Smrgstruct texture_bo_list { 487ec681f3Smrg struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS]; 497ec681f3Smrg}; 507ec681f3Smrg 517ec681f3Smrg/* This tracks state BOs for both textures and samplers, so we 527ec681f3Smrg * multiply by 2. 537ec681f3Smrg */ 547ec681f3Smrg#define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES) 557ec681f3Smrgstruct state_bo_list { 567ec681f3Smrg uint32_t count; 577ec681f3Smrg struct v3dv_bo *states[MAX_TOTAL_STATES]; 587ec681f3Smrg}; 597ec681f3Smrg 607ec681f3Smrg#define MAX_TOTAL_UNIFORM_BUFFERS (1 + MAX_UNIFORM_BUFFERS * MAX_STAGES) 617ec681f3Smrg#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES) 627ec681f3Smrgstruct buffer_bo_list { 637ec681f3Smrg struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS]; 647ec681f3Smrg struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS]; 657ec681f3Smrg}; 667ec681f3Smrg 677ec681f3Smrgstatic bool 687ec681f3Smrgstate_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo) 697ec681f3Smrg{ 707ec681f3Smrg for (int i = 0; i < list->count; i++) { 717ec681f3Smrg if (list->states[i] == bo) 727ec681f3Smrg return true; 737ec681f3Smrg } 747ec681f3Smrg return false; 757ec681f3Smrg} 767ec681f3Smrg 777ec681f3Smrg/* 787ec681f3Smrg * This method checks if the ubo used for push constants is needed to be 797ec681f3Smrg * updated or not. 807ec681f3Smrg * 817ec681f3Smrg * push contants ubo is only used for push constants accessed by a non-const 827ec681f3Smrg * index. 837ec681f3Smrg * 847ec681f3Smrg * FIXME: right now for this cases we are uploading the full 857ec681f3Smrg * push_constants_data. An improvement would be to upload only the data that 867ec681f3Smrg * we need to rely on a UBO. 877ec681f3Smrg */ 887ec681f3Smrgstatic void 897ec681f3Smrgcheck_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer, 907ec681f3Smrg struct v3dv_pipeline *pipeline) 917ec681f3Smrg{ 927ec681f3Smrg if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS) || 937ec681f3Smrg pipeline->layout->push_constant_size == 0) 947ec681f3Smrg return; 957ec681f3Smrg 967ec681f3Smrg if (cmd_buffer->push_constants_resource.bo == NULL) { 977ec681f3Smrg cmd_buffer->push_constants_resource.bo = 987ec681f3Smrg v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE, 997ec681f3Smrg "push constants", true); 1007ec681f3Smrg 1017ec681f3Smrg if (!cmd_buffer->push_constants_resource.bo) { 1027ec681f3Smrg fprintf(stderr, "Failed to allocate memory for push constants\n"); 1037ec681f3Smrg abort(); 1047ec681f3Smrg } 1057ec681f3Smrg 1067ec681f3Smrg bool ok = v3dv_bo_map(cmd_buffer->device, 1077ec681f3Smrg cmd_buffer->push_constants_resource.bo, 1087ec681f3Smrg MAX_PUSH_CONSTANTS_SIZE); 1097ec681f3Smrg if (!ok) { 1107ec681f3Smrg fprintf(stderr, "failed to map push constants buffer\n"); 1117ec681f3Smrg abort(); 1127ec681f3Smrg } 1137ec681f3Smrg } else { 1147ec681f3Smrg if (cmd_buffer->push_constants_resource.offset + MAX_PUSH_CONSTANTS_SIZE <= 1157ec681f3Smrg cmd_buffer->push_constants_resource.bo->size) { 1167ec681f3Smrg cmd_buffer->push_constants_resource.offset += MAX_PUSH_CONSTANTS_SIZE; 1177ec681f3Smrg } else { 1187ec681f3Smrg /* FIXME: we got out of space for push descriptors. Should we create 1197ec681f3Smrg * a new bo? This could be easier with a uploader 1207ec681f3Smrg */ 1217ec681f3Smrg } 1227ec681f3Smrg } 1237ec681f3Smrg 1247ec681f3Smrg memcpy(cmd_buffer->push_constants_resource.bo->map + 1257ec681f3Smrg cmd_buffer->push_constants_resource.offset, 1267ec681f3Smrg cmd_buffer->push_constants_data, 1277ec681f3Smrg MAX_PUSH_CONSTANTS_SIZE); 1287ec681f3Smrg 1297ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS; 1307ec681f3Smrg} 1317ec681f3Smrg 1327ec681f3Smrg/** V3D 4.x TMU configuration parameter 0 (texture) */ 1337ec681f3Smrgstatic void 1347ec681f3Smrgwrite_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer, 1357ec681f3Smrg struct v3dv_pipeline *pipeline, 1367ec681f3Smrg enum broadcom_shader_stage stage, 1377ec681f3Smrg struct v3dv_cl_out **uniforms, 1387ec681f3Smrg uint32_t data, 1397ec681f3Smrg struct texture_bo_list *tex_bos, 1407ec681f3Smrg struct state_bo_list *state_bos) 1417ec681f3Smrg{ 1427ec681f3Smrg uint32_t texture_idx = v3d_unit_data_get_unit(data); 1437ec681f3Smrg 1447ec681f3Smrg struct v3dv_descriptor_state *descriptor_state = 1457ec681f3Smrg v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 1467ec681f3Smrg 1477ec681f3Smrg /* We need to ensure that the texture bo is added to the job */ 1487ec681f3Smrg struct v3dv_bo *texture_bo = 1497ec681f3Smrg v3dv_descriptor_map_get_texture_bo(descriptor_state, 1507ec681f3Smrg &pipeline->shared_data->maps[stage]->texture_map, 1517ec681f3Smrg pipeline->layout, texture_idx); 1527ec681f3Smrg assert(texture_bo); 1537ec681f3Smrg assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS); 1547ec681f3Smrg tex_bos->tex[texture_idx] = texture_bo; 1557ec681f3Smrg 1567ec681f3Smrg struct v3dv_cl_reloc state_reloc = 1577ec681f3Smrg v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state, 1587ec681f3Smrg &pipeline->shared_data->maps[stage]->texture_map, 1597ec681f3Smrg pipeline->layout, 1607ec681f3Smrg texture_idx); 1617ec681f3Smrg 1627ec681f3Smrg cl_aligned_u32(uniforms, state_reloc.bo->offset + 1637ec681f3Smrg state_reloc.offset + 1647ec681f3Smrg v3d_unit_data_get_offset(data)); 1657ec681f3Smrg 1667ec681f3Smrg /* Texture and Sampler states are typically suballocated, so they are 1677ec681f3Smrg * usually the same BO: only flag them once to avoid trying to add them 1687ec681f3Smrg * multiple times to the job later. 1697ec681f3Smrg */ 1707ec681f3Smrg if (!state_bo_in_list(state_bos, state_reloc.bo)) { 1717ec681f3Smrg assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS); 1727ec681f3Smrg state_bos->states[state_bos->count++] = state_reloc.bo; 1737ec681f3Smrg } 1747ec681f3Smrg} 1757ec681f3Smrg 1767ec681f3Smrg/** V3D 4.x TMU configuration parameter 1 (sampler) */ 1777ec681f3Smrgstatic void 1787ec681f3Smrgwrite_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer, 1797ec681f3Smrg struct v3dv_pipeline *pipeline, 1807ec681f3Smrg enum broadcom_shader_stage stage, 1817ec681f3Smrg struct v3dv_cl_out **uniforms, 1827ec681f3Smrg uint32_t data, 1837ec681f3Smrg struct state_bo_list *state_bos) 1847ec681f3Smrg{ 1857ec681f3Smrg uint32_t sampler_idx = v3d_unit_data_get_unit(data); 1867ec681f3Smrg struct v3dv_descriptor_state *descriptor_state = 1877ec681f3Smrg v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 1887ec681f3Smrg 1897ec681f3Smrg assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX && 1907ec681f3Smrg sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX); 1917ec681f3Smrg 1927ec681f3Smrg struct v3dv_cl_reloc sampler_state_reloc = 1937ec681f3Smrg v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state, 1947ec681f3Smrg &pipeline->shared_data->maps[stage]->sampler_map, 1957ec681f3Smrg pipeline->layout, sampler_idx); 1967ec681f3Smrg 1977ec681f3Smrg const struct v3dv_sampler *sampler = 1987ec681f3Smrg v3dv_descriptor_map_get_sampler(descriptor_state, 1997ec681f3Smrg &pipeline->shared_data->maps[stage]->sampler_map, 2007ec681f3Smrg pipeline->layout, sampler_idx); 2017ec681f3Smrg assert(sampler); 2027ec681f3Smrg 2037ec681f3Smrg /* Set unnormalized coordinates flag from sampler object */ 2047ec681f3Smrg uint32_t p1_packed = v3d_unit_data_get_offset(data); 2057ec681f3Smrg if (sampler->unnormalized_coordinates) { 2067ec681f3Smrg struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked; 2077ec681f3Smrg V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked); 2087ec681f3Smrg p1_unpacked.unnormalized_coordinates = true; 2097ec681f3Smrg V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed, 2107ec681f3Smrg &p1_unpacked); 2117ec681f3Smrg } 2127ec681f3Smrg 2137ec681f3Smrg cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset + 2147ec681f3Smrg sampler_state_reloc.offset + 2157ec681f3Smrg p1_packed); 2167ec681f3Smrg 2177ec681f3Smrg /* Texture and Sampler states are typically suballocated, so they are 2187ec681f3Smrg * usually the same BO: only flag them once to avoid trying to add them 2197ec681f3Smrg * multiple times to the job later. 2207ec681f3Smrg */ 2217ec681f3Smrg if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) { 2227ec681f3Smrg assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS); 2237ec681f3Smrg state_bos->states[state_bos->count++] = sampler_state_reloc.bo; 2247ec681f3Smrg } 2257ec681f3Smrg} 2267ec681f3Smrg 2277ec681f3Smrgstatic void 2287ec681f3Smrgwrite_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer, 2297ec681f3Smrg struct v3dv_pipeline *pipeline, 2307ec681f3Smrg enum broadcom_shader_stage stage, 2317ec681f3Smrg struct v3dv_cl_out **uniforms, 2327ec681f3Smrg enum quniform_contents content, 2337ec681f3Smrg uint32_t data, 2347ec681f3Smrg struct buffer_bo_list *buffer_bos) 2357ec681f3Smrg{ 2367ec681f3Smrg struct v3dv_descriptor_state *descriptor_state = 2377ec681f3Smrg v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 2387ec681f3Smrg 2397ec681f3Smrg struct v3dv_descriptor_map *map = 2407ec681f3Smrg content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ? 2417ec681f3Smrg &pipeline->shared_data->maps[stage]->ubo_map : 2427ec681f3Smrg &pipeline->shared_data->maps[stage]->ssbo_map; 2437ec681f3Smrg 2447ec681f3Smrg uint32_t offset = 2457ec681f3Smrg content == QUNIFORM_UBO_ADDR ? 2467ec681f3Smrg v3d_unit_data_get_offset(data) : 2477ec681f3Smrg 0; 2487ec681f3Smrg 2497ec681f3Smrg uint32_t dynamic_offset = 0; 2507ec681f3Smrg 2517ec681f3Smrg /* For ubos, index is shifted, as 0 is reserved for push constants. 2527ec681f3Smrg */ 2537ec681f3Smrg if (content == QUNIFORM_UBO_ADDR && 2547ec681f3Smrg v3d_unit_data_get_unit(data) == 0) { 2557ec681f3Smrg /* This calls is to ensure that the push_constant_ubo is 2567ec681f3Smrg * updated. It already take into account it is should do the 2577ec681f3Smrg * update or not 2587ec681f3Smrg */ 2597ec681f3Smrg check_push_constants_ubo(cmd_buffer, pipeline); 2607ec681f3Smrg 2617ec681f3Smrg struct v3dv_cl_reloc *resource = 2627ec681f3Smrg &cmd_buffer->push_constants_resource; 2637ec681f3Smrg assert(resource->bo); 2647ec681f3Smrg 2657ec681f3Smrg cl_aligned_u32(uniforms, resource->bo->offset + 2667ec681f3Smrg resource->offset + 2677ec681f3Smrg offset + dynamic_offset); 2687ec681f3Smrg buffer_bos->ubo[0] = resource->bo; 2697ec681f3Smrg } else { 2707ec681f3Smrg uint32_t index = 2717ec681f3Smrg content == QUNIFORM_UBO_ADDR ? 2727ec681f3Smrg v3d_unit_data_get_unit(data) - 1 : 2737ec681f3Smrg data; 2747ec681f3Smrg 2757ec681f3Smrg struct v3dv_descriptor *descriptor = 2767ec681f3Smrg v3dv_descriptor_map_get_descriptor(descriptor_state, map, 2777ec681f3Smrg pipeline->layout, 2787ec681f3Smrg index, &dynamic_offset); 2797ec681f3Smrg assert(descriptor); 2807ec681f3Smrg assert(descriptor->buffer); 2817ec681f3Smrg assert(descriptor->buffer->mem); 2827ec681f3Smrg assert(descriptor->buffer->mem->bo); 2837ec681f3Smrg 2847ec681f3Smrg if (content == QUNIFORM_GET_SSBO_SIZE || 2857ec681f3Smrg content == QUNIFORM_GET_UBO_SIZE) { 2867ec681f3Smrg cl_aligned_u32(uniforms, descriptor->range); 2877ec681f3Smrg } else { 2887ec681f3Smrg cl_aligned_u32(uniforms, descriptor->buffer->mem->bo->offset + 2897ec681f3Smrg descriptor->buffer->mem_offset + 2907ec681f3Smrg descriptor->offset + 2917ec681f3Smrg offset + dynamic_offset); 2927ec681f3Smrg 2937ec681f3Smrg if (content == QUNIFORM_UBO_ADDR) { 2947ec681f3Smrg assert(index + 1 < MAX_TOTAL_UNIFORM_BUFFERS); 2957ec681f3Smrg buffer_bos->ubo[index + 1] = descriptor->buffer->mem->bo; 2967ec681f3Smrg } else { 2977ec681f3Smrg assert(index < MAX_TOTAL_STORAGE_BUFFERS); 2987ec681f3Smrg buffer_bos->ssbo[index] = descriptor->buffer->mem->bo; 2997ec681f3Smrg } 3007ec681f3Smrg } 3017ec681f3Smrg } 3027ec681f3Smrg} 3037ec681f3Smrg 3047ec681f3Smrgstatic uint32_t 3057ec681f3Smrgget_texture_size_from_image_view(struct v3dv_image_view *image_view, 3067ec681f3Smrg enum quniform_contents contents, 3077ec681f3Smrg uint32_t data) 3087ec681f3Smrg{ 3097ec681f3Smrg switch(contents) { 3107ec681f3Smrg case QUNIFORM_IMAGE_WIDTH: 3117ec681f3Smrg case QUNIFORM_TEXTURE_WIDTH: 3127ec681f3Smrg /* We don't u_minify the values, as we are using the image_view 3137ec681f3Smrg * extents 3147ec681f3Smrg */ 3157ec681f3Smrg return image_view->vk.extent.width; 3167ec681f3Smrg case QUNIFORM_IMAGE_HEIGHT: 3177ec681f3Smrg case QUNIFORM_TEXTURE_HEIGHT: 3187ec681f3Smrg return image_view->vk.extent.height; 3197ec681f3Smrg case QUNIFORM_IMAGE_DEPTH: 3207ec681f3Smrg case QUNIFORM_TEXTURE_DEPTH: 3217ec681f3Smrg return image_view->vk.extent.depth; 3227ec681f3Smrg case QUNIFORM_IMAGE_ARRAY_SIZE: 3237ec681f3Smrg case QUNIFORM_TEXTURE_ARRAY_SIZE: 3247ec681f3Smrg if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { 3257ec681f3Smrg return image_view->vk.layer_count; 3267ec681f3Smrg } else { 3277ec681f3Smrg assert(image_view->vk.layer_count % 6 == 0); 3287ec681f3Smrg return image_view->vk.layer_count / 6; 3297ec681f3Smrg } 3307ec681f3Smrg case QUNIFORM_TEXTURE_LEVELS: 3317ec681f3Smrg return image_view->vk.level_count; 3327ec681f3Smrg case QUNIFORM_TEXTURE_SAMPLES: 3337ec681f3Smrg assert(image_view->vk.image); 3347ec681f3Smrg return image_view->vk.image->samples; 3357ec681f3Smrg default: 3367ec681f3Smrg unreachable("Bad texture size field"); 3377ec681f3Smrg } 3387ec681f3Smrg} 3397ec681f3Smrg 3407ec681f3Smrg 3417ec681f3Smrgstatic uint32_t 3427ec681f3Smrgget_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view, 3437ec681f3Smrg enum quniform_contents contents, 3447ec681f3Smrg uint32_t data) 3457ec681f3Smrg{ 3467ec681f3Smrg switch(contents) { 3477ec681f3Smrg case QUNIFORM_IMAGE_WIDTH: 3487ec681f3Smrg case QUNIFORM_TEXTURE_WIDTH: 3497ec681f3Smrg return buffer_view->num_elements; 3507ec681f3Smrg /* Only size can be queried for texel buffers */ 3517ec681f3Smrg default: 3527ec681f3Smrg unreachable("Bad texture size field for texel buffers"); 3537ec681f3Smrg } 3547ec681f3Smrg} 3557ec681f3Smrg 3567ec681f3Smrgstatic uint32_t 3577ec681f3Smrgget_texture_size(struct v3dv_cmd_buffer *cmd_buffer, 3587ec681f3Smrg struct v3dv_pipeline *pipeline, 3597ec681f3Smrg enum broadcom_shader_stage stage, 3607ec681f3Smrg enum quniform_contents contents, 3617ec681f3Smrg uint32_t data) 3627ec681f3Smrg{ 3637ec681f3Smrg uint32_t texture_idx = data; 3647ec681f3Smrg 3657ec681f3Smrg struct v3dv_descriptor_state *descriptor_state = 3667ec681f3Smrg v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 3677ec681f3Smrg 3687ec681f3Smrg struct v3dv_descriptor *descriptor = 3697ec681f3Smrg v3dv_descriptor_map_get_descriptor(descriptor_state, 3707ec681f3Smrg &pipeline->shared_data->maps[stage]->texture_map, 3717ec681f3Smrg pipeline->layout, 3727ec681f3Smrg texture_idx, NULL); 3737ec681f3Smrg 3747ec681f3Smrg assert(descriptor); 3757ec681f3Smrg 3767ec681f3Smrg switch (descriptor->type) { 3777ec681f3Smrg case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 3787ec681f3Smrg case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 3797ec681f3Smrg case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 3807ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 3817ec681f3Smrg return get_texture_size_from_image_view(descriptor->image_view, 3827ec681f3Smrg contents, data); 3837ec681f3Smrg case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 3847ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 3857ec681f3Smrg return get_texture_size_from_buffer_view(descriptor->buffer_view, 3867ec681f3Smrg contents, data); 3877ec681f3Smrg default: 3887ec681f3Smrg unreachable("Wrong descriptor for getting texture size"); 3897ec681f3Smrg } 3907ec681f3Smrg} 3917ec681f3Smrg 3927ec681f3Smrgstruct v3dv_cl_reloc 3937ec681f3Smrgv3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, 3947ec681f3Smrg struct v3dv_pipeline *pipeline, 3957ec681f3Smrg struct v3dv_shader_variant *variant, 3967ec681f3Smrg uint32_t **wg_count_offsets) 3977ec681f3Smrg{ 3987ec681f3Smrg struct v3d_uniform_list *uinfo = 3997ec681f3Smrg &variant->prog_data.base->uniforms; 4007ec681f3Smrg struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 4017ec681f3Smrg 4027ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 4037ec681f3Smrg assert(job); 4047ec681f3Smrg assert(job->cmd_buffer == cmd_buffer); 4057ec681f3Smrg 4067ec681f3Smrg struct texture_bo_list tex_bos = { 0 }; 4077ec681f3Smrg struct state_bo_list state_bos = { 0 }; 4087ec681f3Smrg struct buffer_bo_list buffer_bos = { 0 }; 4097ec681f3Smrg 4107ec681f3Smrg /* The hardware always pre-fetches the next uniform (also when there 4117ec681f3Smrg * aren't any), so we always allocate space for an extra slot. This 4127ec681f3Smrg * fixes MMU exceptions reported since Linux kernel 5.4 when the 4137ec681f3Smrg * uniforms fill up the tail bytes of a page in the indirect 4147ec681f3Smrg * BO. In that scenario, when the hardware pre-fetches after reading 4157ec681f3Smrg * the last uniform it will read beyond the end of the page and trigger 4167ec681f3Smrg * the MMU exception. 4177ec681f3Smrg */ 4187ec681f3Smrg v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4); 4197ec681f3Smrg 4207ec681f3Smrg struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect); 4217ec681f3Smrg 4227ec681f3Smrg struct v3dv_cl_out *uniforms = cl_start(&job->indirect); 4237ec681f3Smrg 4247ec681f3Smrg for (int i = 0; i < uinfo->count; i++) { 4257ec681f3Smrg uint32_t data = uinfo->data[i]; 4267ec681f3Smrg 4277ec681f3Smrg switch (uinfo->contents[i]) { 4287ec681f3Smrg case QUNIFORM_CONSTANT: 4297ec681f3Smrg cl_aligned_u32(&uniforms, data); 4307ec681f3Smrg break; 4317ec681f3Smrg 4327ec681f3Smrg case QUNIFORM_UNIFORM: 4337ec681f3Smrg cl_aligned_u32(&uniforms, cmd_buffer->push_constants_data[data]); 4347ec681f3Smrg break; 4357ec681f3Smrg 4367ec681f3Smrg case QUNIFORM_VIEWPORT_X_SCALE: 4377ec681f3Smrg cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f); 4387ec681f3Smrg break; 4397ec681f3Smrg 4407ec681f3Smrg case QUNIFORM_VIEWPORT_Y_SCALE: 4417ec681f3Smrg cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f); 4427ec681f3Smrg break; 4437ec681f3Smrg 4447ec681f3Smrg case QUNIFORM_VIEWPORT_Z_OFFSET: 4457ec681f3Smrg cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]); 4467ec681f3Smrg break; 4477ec681f3Smrg 4487ec681f3Smrg case QUNIFORM_VIEWPORT_Z_SCALE: 4497ec681f3Smrg cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]); 4507ec681f3Smrg break; 4517ec681f3Smrg 4527ec681f3Smrg case QUNIFORM_SSBO_OFFSET: 4537ec681f3Smrg case QUNIFORM_UBO_ADDR: 4547ec681f3Smrg case QUNIFORM_GET_SSBO_SIZE: 4557ec681f3Smrg case QUNIFORM_GET_UBO_SIZE: 4567ec681f3Smrg write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms, 4577ec681f3Smrg uinfo->contents[i], data, &buffer_bos); 4587ec681f3Smrg 4597ec681f3Smrg break; 4607ec681f3Smrg 4617ec681f3Smrg case QUNIFORM_IMAGE_TMU_CONFIG_P0: 4627ec681f3Smrg case QUNIFORM_TMU_CONFIG_P0: 4637ec681f3Smrg write_tmu_p0(cmd_buffer, pipeline, variant->stage, 4647ec681f3Smrg &uniforms, data, &tex_bos, &state_bos); 4657ec681f3Smrg break; 4667ec681f3Smrg 4677ec681f3Smrg case QUNIFORM_TMU_CONFIG_P1: 4687ec681f3Smrg write_tmu_p1(cmd_buffer, pipeline, variant->stage, 4697ec681f3Smrg &uniforms, data, &state_bos); 4707ec681f3Smrg break; 4717ec681f3Smrg 4727ec681f3Smrg case QUNIFORM_IMAGE_WIDTH: 4737ec681f3Smrg case QUNIFORM_IMAGE_HEIGHT: 4747ec681f3Smrg case QUNIFORM_IMAGE_DEPTH: 4757ec681f3Smrg case QUNIFORM_IMAGE_ARRAY_SIZE: 4767ec681f3Smrg case QUNIFORM_TEXTURE_WIDTH: 4777ec681f3Smrg case QUNIFORM_TEXTURE_HEIGHT: 4787ec681f3Smrg case QUNIFORM_TEXTURE_DEPTH: 4797ec681f3Smrg case QUNIFORM_TEXTURE_ARRAY_SIZE: 4807ec681f3Smrg case QUNIFORM_TEXTURE_LEVELS: 4817ec681f3Smrg case QUNIFORM_TEXTURE_SAMPLES: 4827ec681f3Smrg cl_aligned_u32(&uniforms, 4837ec681f3Smrg get_texture_size(cmd_buffer, 4847ec681f3Smrg pipeline, 4857ec681f3Smrg variant->stage, 4867ec681f3Smrg uinfo->contents[i], 4877ec681f3Smrg data)); 4887ec681f3Smrg break; 4897ec681f3Smrg 4907ec681f3Smrg /* We generate this from geometry shaders to cap the generated gl_Layer 4917ec681f3Smrg * to be within the number of layers of the framebuffer so we prevent the 4927ec681f3Smrg * binner from trying to access tile state memory out of bounds (for 4937ec681f3Smrg * layers that don't exist). 4947ec681f3Smrg * 4957ec681f3Smrg * Unfortunately, for secondary command buffers we may not know the 4967ec681f3Smrg * number of layers in the framebuffer at this stage. Since we are 4977ec681f3Smrg * only using this to sanitize the shader and it should not have any 4987ec681f3Smrg * impact on correct shaders that emit valid values for gl_Layer, 4997ec681f3Smrg * we just work around it by using the largest number of layers we 5007ec681f3Smrg * support. 5017ec681f3Smrg * 5027ec681f3Smrg * FIXME: we could do better than this by recording in the job that 5037ec681f3Smrg * the value at this uniform offset is not correct, and patch it when 5047ec681f3Smrg * we execute the secondary command buffer into a primary, since we do 5057ec681f3Smrg * have the correct number of layers at that point, but again, since this 5067ec681f3Smrg * is only for sanityzing the shader and it only affects the specific case 5077ec681f3Smrg * of secondary command buffers without framebuffer info available it 5087ec681f3Smrg * might not be worth the trouble. 5097ec681f3Smrg * 5107ec681f3Smrg * With multiview the number of layers is dictated by the view mask 5117ec681f3Smrg * and not by the framebuffer layers. We do set the job's frame tiling 5127ec681f3Smrg * information correctly from the view mask in that case, however, 5137ec681f3Smrg * secondary command buffers may not have valid frame tiling data, 5147ec681f3Smrg * so when multiview is enabled, we always set the number of layers 5157ec681f3Smrg * from the subpass view mask. 5167ec681f3Smrg */ 5177ec681f3Smrg case QUNIFORM_FB_LAYERS: { 5187ec681f3Smrg const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state; 5197ec681f3Smrg const uint32_t view_mask = 5207ec681f3Smrg state->pass->subpasses[state->subpass_idx].view_mask; 5217ec681f3Smrg 5227ec681f3Smrg uint32_t num_layers; 5237ec681f3Smrg if (view_mask != 0) { 5247ec681f3Smrg num_layers = util_last_bit(view_mask); 5257ec681f3Smrg } else if (job->frame_tiling.layers != 0) { 5267ec681f3Smrg num_layers = job->frame_tiling.layers; 5277ec681f3Smrg } else if (cmd_buffer->state.framebuffer) { 5287ec681f3Smrg num_layers = cmd_buffer->state.framebuffer->layers; 5297ec681f3Smrg } else { 5307ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 5317ec681f3Smrg num_layers = 2048; 5327ec681f3Smrg#if DEBUG 5337ec681f3Smrg fprintf(stderr, "Skipping gl_LayerID shader sanity check for " 5347ec681f3Smrg "secondary command buffer\n"); 5357ec681f3Smrg#endif 5367ec681f3Smrg } 5377ec681f3Smrg cl_aligned_u32(&uniforms, num_layers); 5387ec681f3Smrg break; 5397ec681f3Smrg } 5407ec681f3Smrg 5417ec681f3Smrg case QUNIFORM_VIEW_INDEX: 5427ec681f3Smrg cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index); 5437ec681f3Smrg break; 5447ec681f3Smrg 5457ec681f3Smrg case QUNIFORM_NUM_WORK_GROUPS: 5467ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 5477ec681f3Smrg assert(job->csd.wg_count[data] > 0); 5487ec681f3Smrg if (wg_count_offsets) 5497ec681f3Smrg wg_count_offsets[data] = (uint32_t *) uniforms; 5507ec681f3Smrg cl_aligned_u32(&uniforms, job->csd.wg_count[data]); 5517ec681f3Smrg break; 5527ec681f3Smrg 5537ec681f3Smrg case QUNIFORM_WORK_GROUP_BASE: 5547ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 5557ec681f3Smrg cl_aligned_u32(&uniforms, job->csd.wg_base[data]); 5567ec681f3Smrg break; 5577ec681f3Smrg 5587ec681f3Smrg case QUNIFORM_SHARED_OFFSET: 5597ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 5607ec681f3Smrg assert(job->csd.shared_memory); 5617ec681f3Smrg cl_aligned_u32(&uniforms, job->csd.shared_memory->offset); 5627ec681f3Smrg break; 5637ec681f3Smrg 5647ec681f3Smrg case QUNIFORM_SPILL_OFFSET: 5657ec681f3Smrg assert(pipeline->spill.bo); 5667ec681f3Smrg cl_aligned_u32(&uniforms, pipeline->spill.bo->offset); 5677ec681f3Smrg break; 5687ec681f3Smrg 5697ec681f3Smrg case QUNIFORM_SPILL_SIZE_PER_THREAD: 5707ec681f3Smrg assert(pipeline->spill.size_per_thread > 0); 5717ec681f3Smrg cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread); 5727ec681f3Smrg break; 5737ec681f3Smrg 5747ec681f3Smrg default: 5757ec681f3Smrg unreachable("unsupported quniform_contents uniform type\n"); 5767ec681f3Smrg } 5777ec681f3Smrg } 5787ec681f3Smrg 5797ec681f3Smrg cl_end(&job->indirect, uniforms); 5807ec681f3Smrg 5817ec681f3Smrg for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) { 5827ec681f3Smrg if (tex_bos.tex[i]) 5837ec681f3Smrg v3dv_job_add_bo(job, tex_bos.tex[i]); 5847ec681f3Smrg } 5857ec681f3Smrg 5867ec681f3Smrg for (int i = 0; i < state_bos.count; i++) 5877ec681f3Smrg v3dv_job_add_bo(job, state_bos.states[i]); 5887ec681f3Smrg 5897ec681f3Smrg for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) { 5907ec681f3Smrg if (buffer_bos.ubo[i]) 5917ec681f3Smrg v3dv_job_add_bo(job, buffer_bos.ubo[i]); 5927ec681f3Smrg } 5937ec681f3Smrg 5947ec681f3Smrg for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) { 5957ec681f3Smrg if (buffer_bos.ssbo[i]) 5967ec681f3Smrg v3dv_job_add_bo(job, buffer_bos.ssbo[i]); 5977ec681f3Smrg } 5987ec681f3Smrg 5997ec681f3Smrg if (job->csd.shared_memory) 6007ec681f3Smrg v3dv_job_add_bo(job, job->csd.shared_memory); 6017ec681f3Smrg 6027ec681f3Smrg if (pipeline->spill.bo) 6037ec681f3Smrg v3dv_job_add_bo(job, pipeline->spill.bo); 6047ec681f3Smrg 6057ec681f3Smrg return uniform_stream; 6067ec681f3Smrg} 6077ec681f3Smrg 6087ec681f3Smrgstruct v3dv_cl_reloc 6097ec681f3Smrgv3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, 6107ec681f3Smrg struct v3dv_pipeline *pipeline, 6117ec681f3Smrg struct v3dv_shader_variant *variant) 6127ec681f3Smrg{ 6137ec681f3Smrg return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL); 6147ec681f3Smrg} 615