17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors (Collabora): 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg#ifndef __PAN_ENCODER_H 287ec681f3Smrg#define __PAN_ENCODER_H 297ec681f3Smrg 307ec681f3Smrg#include "util/macros.h" 317ec681f3Smrg#include "panfrost-quirks.h" 327ec681f3Smrg 337ec681f3Smrg#include <stdbool.h> 347ec681f3Smrg#include "util/format/u_format.h" 357ec681f3Smrg#include "pan_bo.h" 367ec681f3Smrg#include "genxml/gen_macros.h" 377ec681f3Smrg#include "pan_device.h" 387ec681f3Smrg 397ec681f3Smrg/* Tiler structure size computation */ 407ec681f3Smrg 417ec681f3Smrgunsigned 427ec681f3Smrgpanfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy); 437ec681f3Smrg 447ec681f3Smrgunsigned 457ec681f3Smrgpanfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy); 467ec681f3Smrg 477ec681f3Smrgunsigned 487ec681f3Smrgpanfrost_choose_hierarchy_mask( 497ec681f3Smrg unsigned width, unsigned height, 507ec681f3Smrg unsigned vertex_count, bool hierarchy); 517ec681f3Smrg 527ec681f3Smrg#if defined(PAN_ARCH) && PAN_ARCH <= 5 537ec681f3Smrgstatic inline unsigned 547ec681f3Smrgpanfrost_tiler_get_polygon_list_size(const struct panfrost_device *dev, 557ec681f3Smrg unsigned fb_width, unsigned fb_height, 567ec681f3Smrg bool has_draws) 577ec681f3Smrg{ 587ec681f3Smrg if (!has_draws) 597ec681f3Smrg return MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE + 4; 607ec681f3Smrg 617ec681f3Smrg bool hierarchy = !(dev->quirks & MIDGARD_NO_HIER_TILING); 627ec681f3Smrg unsigned hierarchy_mask = 637ec681f3Smrg panfrost_choose_hierarchy_mask(fb_width, fb_height, 1, hierarchy); 647ec681f3Smrg 657ec681f3Smrg return panfrost_tiler_full_size(fb_width, fb_height, hierarchy_mask, hierarchy) + 667ec681f3Smrg panfrost_tiler_header_size(fb_width, fb_height, hierarchy_mask, hierarchy); 677ec681f3Smrg} 687ec681f3Smrg#endif 697ec681f3Smrg 707ec681f3Smrg/* Stack sizes */ 717ec681f3Smrg 727ec681f3Smrgunsigned 737ec681f3Smrgpanfrost_get_stack_shift(unsigned stack_size); 747ec681f3Smrg 757ec681f3Smrgunsigned 767ec681f3Smrgpanfrost_get_total_stack_size( 777ec681f3Smrg unsigned thread_size, 787ec681f3Smrg unsigned threads_per_core, 797ec681f3Smrg unsigned core_count); 807ec681f3Smrg 817ec681f3Smrgconst char * panfrost_model_name(unsigned gpu_id); 827ec681f3Smrg 837ec681f3Smrg/* Attributes / instancing */ 847ec681f3Smrg 857ec681f3Smrgunsigned 867ec681f3Smrgpanfrost_padded_vertex_count(unsigned vertex_count); 877ec681f3Smrg 887ec681f3Smrgunsigned 897ec681f3Smrgpanfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags); 907ec681f3Smrg 917ec681f3Smrg#ifdef PAN_ARCH 927ec681f3Smrg/* Records for gl_VertexID and gl_InstanceID use special encodings on Midgard */ 937ec681f3Smrg 947ec681f3Smrg#if PAN_ARCH <= 5 957ec681f3Smrgstatic inline void 967ec681f3Smrgpanfrost_vertex_id(unsigned padded_count, 977ec681f3Smrg struct mali_attribute_buffer_packed *attr, 987ec681f3Smrg bool instanced) 997ec681f3Smrg{ 1007ec681f3Smrg pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) { 1017ec681f3Smrg if (instanced) { 1027ec681f3Smrg cfg.divisor_r = __builtin_ctz(padded_count); 1037ec681f3Smrg cfg.divisor_p = padded_count >> (cfg.divisor_r + 1); 1047ec681f3Smrg } else { 1057ec681f3Smrg /* Large values so the modulo is a no-op */ 1067ec681f3Smrg cfg.divisor_r = 0x1F; 1077ec681f3Smrg cfg.divisor_p = 0x4; 1087ec681f3Smrg } 1097ec681f3Smrg } 1107ec681f3Smrg} 1117ec681f3Smrg 1127ec681f3Smrgstatic inline void 1137ec681f3Smrgpanfrost_instance_id(unsigned padded_count, 1147ec681f3Smrg struct mali_attribute_buffer_packed *attr, 1157ec681f3Smrg bool instanced) 1167ec681f3Smrg{ 1177ec681f3Smrg pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) { 1187ec681f3Smrg if (!instanced || padded_count <= 1) { 1197ec681f3Smrg /* Divide by large number to force to 0 */ 1207ec681f3Smrg cfg.divisor_p = ((1u << 31) - 1); 1217ec681f3Smrg cfg.divisor_r = 0x1F; 1227ec681f3Smrg cfg.divisor_e = 0x1; 1237ec681f3Smrg } else if(util_is_power_of_two_or_zero(padded_count)) { 1247ec681f3Smrg /* Can't underflow since padded_count >= 2 */ 1257ec681f3Smrg cfg.divisor_r = __builtin_ctz(padded_count) - 1; 1267ec681f3Smrg } else { 1277ec681f3Smrg cfg.divisor_p = 1287ec681f3Smrg panfrost_compute_magic_divisor(padded_count, 1297ec681f3Smrg &cfg.divisor_r, &cfg.divisor_e); 1307ec681f3Smrg } 1317ec681f3Smrg } 1327ec681f3Smrg} 1337ec681f3Smrg#endif /* PAN_ARCH <= 5 */ 1347ec681f3Smrg 1357ec681f3Smrg/* Sampler comparison functions are flipped in OpenGL from the hardware, so we 1367ec681f3Smrg * need to be able to flip accordingly */ 1377ec681f3Smrg 1387ec681f3Smrgstatic inline enum mali_func 1397ec681f3Smrgpanfrost_flip_compare_func(enum mali_func f) 1407ec681f3Smrg{ 1417ec681f3Smrg switch (f) { 1427ec681f3Smrg case MALI_FUNC_LESS: return MALI_FUNC_GREATER; 1437ec681f3Smrg case MALI_FUNC_GREATER: return MALI_FUNC_LESS; 1447ec681f3Smrg case MALI_FUNC_LEQUAL: return MALI_FUNC_GEQUAL; 1457ec681f3Smrg case MALI_FUNC_GEQUAL: return MALI_FUNC_LEQUAL; 1467ec681f3Smrg default: return f; 1477ec681f3Smrg } 1487ec681f3Smrg 1497ec681f3Smrg} 1507ec681f3Smrg 1517ec681f3Smrg/* Compute shaders are invoked with a gl_NumWorkGroups X/Y/Z triplet. Vertex 1527ec681f3Smrg * shaders are invoked as (1, vertex_count, instance_count). Compute shaders 1537ec681f3Smrg * also have a gl_WorkGroupSize X/Y/Z triplet. These 6 values are packed 1547ec681f3Smrg * together in a dynamic bitfield, packed by this routine. */ 1557ec681f3Smrg 1567ec681f3Smrgstatic inline void 1577ec681f3Smrgpanfrost_pack_work_groups_compute( 1587ec681f3Smrg struct mali_invocation_packed *out, 1597ec681f3Smrg unsigned num_x, unsigned num_y, unsigned num_z, 1607ec681f3Smrg unsigned size_x, unsigned size_y, unsigned size_z, 1617ec681f3Smrg bool quirk_graphics, bool indirect_dispatch) 1627ec681f3Smrg{ 1637ec681f3Smrg /* The values needing packing, in order, and the corresponding shifts. 1647ec681f3Smrg * Indicies into shift are off-by-one to make the logic easier */ 1657ec681f3Smrg 1667ec681f3Smrg unsigned values[6] = { size_x, size_y, size_z, num_x, num_y, num_z }; 1677ec681f3Smrg unsigned shifts[7] = { 0 }; 1687ec681f3Smrg uint32_t packed = 0; 1697ec681f3Smrg 1707ec681f3Smrg for (unsigned i = 0; i < 6; ++i) { 1717ec681f3Smrg /* Must be positive, otherwise we underflow */ 1727ec681f3Smrg assert(values[i] >= 1); 1737ec681f3Smrg 1747ec681f3Smrg /* OR it in, shifting as required */ 1757ec681f3Smrg packed |= ((values[i] - 1) << shifts[i]); 1767ec681f3Smrg 1777ec681f3Smrg /* How many bits did we use? */ 1787ec681f3Smrg unsigned bit_count = util_logbase2_ceil(values[i]); 1797ec681f3Smrg 1807ec681f3Smrg /* Set the next shift accordingly */ 1817ec681f3Smrg shifts[i + 1] = shifts[i] + bit_count; 1827ec681f3Smrg } 1837ec681f3Smrg 1847ec681f3Smrg pan_pack(out, INVOCATION, cfg) { 1857ec681f3Smrg cfg.invocations = packed; 1867ec681f3Smrg cfg.size_y_shift = shifts[1]; 1877ec681f3Smrg cfg.size_z_shift = shifts[2]; 1887ec681f3Smrg cfg.workgroups_x_shift = shifts[3]; 1897ec681f3Smrg 1907ec681f3Smrg if (!indirect_dispatch) { 1917ec681f3Smrg /* Leave zero for the dispatch shader */ 1927ec681f3Smrg cfg.workgroups_y_shift = shifts[4]; 1937ec681f3Smrg cfg.workgroups_z_shift = shifts[5]; 1947ec681f3Smrg } 1957ec681f3Smrg 1967ec681f3Smrg /* Quirk: for non-instanced graphics, the blob sets 1977ec681f3Smrg * workgroups_z_shift = 32. This doesn't appear to matter to 1987ec681f3Smrg * the hardware, but it's good to be bit-identical. */ 1997ec681f3Smrg 2007ec681f3Smrg if (quirk_graphics && (num_z <= 1)) 2017ec681f3Smrg cfg.workgroups_z_shift = 32; 2027ec681f3Smrg 2037ec681f3Smrg /* For graphics, set to the minimum efficient value. For 2047ec681f3Smrg * compute, must equal the workgroup X shift for barriers to 2057ec681f3Smrg * function correctly */ 2067ec681f3Smrg 2077ec681f3Smrg cfg.thread_group_split = quirk_graphics ? 2087ec681f3Smrg MALI_SPLIT_MIN_EFFICIENT : cfg.workgroups_x_shift; 2097ec681f3Smrg } 2107ec681f3Smrg} 2117ec681f3Smrg 2127ec681f3Smrg#if PAN_ARCH >= 5 2137ec681f3Smrg/* Format conversion */ 2147ec681f3Smrgstatic inline enum mali_z_internal_format 2157ec681f3Smrgpanfrost_get_z_internal_format(enum pipe_format fmt) 2167ec681f3Smrg{ 2177ec681f3Smrg switch (fmt) { 2187ec681f3Smrg case PIPE_FORMAT_Z16_UNORM: 2197ec681f3Smrg case PIPE_FORMAT_Z16_UNORM_S8_UINT: 2207ec681f3Smrg return MALI_Z_INTERNAL_FORMAT_D16; 2217ec681f3Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 2227ec681f3Smrg case PIPE_FORMAT_Z24X8_UNORM: 2237ec681f3Smrg return MALI_Z_INTERNAL_FORMAT_D24; 2247ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT: 2257ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2267ec681f3Smrg return MALI_Z_INTERNAL_FORMAT_D32; 2277ec681f3Smrg default: 2287ec681f3Smrg unreachable("Unsupported depth/stencil format."); 2297ec681f3Smrg } 2307ec681f3Smrg} 2317ec681f3Smrg#endif 2327ec681f3Smrg 2337ec681f3Smrg#endif /* PAN_ARCH */ 2347ec681f3Smrg 2357ec681f3Smrg#endif 236