1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors (Collabora): 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26 27#ifndef __PAN_ENCODER_H 28#define __PAN_ENCODER_H 29 30#include "util/macros.h" 31#include "panfrost-quirks.h" 32 33#include <stdbool.h> 34#include "util/format/u_format.h" 35#include "pan_bo.h" 36#include "genxml/gen_macros.h" 37#include "pan_device.h" 38 39/* Tiler structure size computation */ 40 41unsigned 42panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy); 43 44unsigned 45panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy); 46 47unsigned 48panfrost_choose_hierarchy_mask( 49 unsigned width, unsigned height, 50 unsigned vertex_count, bool hierarchy); 51 52#if defined(PAN_ARCH) && PAN_ARCH <= 5 53static inline unsigned 54panfrost_tiler_get_polygon_list_size(const struct panfrost_device *dev, 55 unsigned fb_width, unsigned fb_height, 56 bool has_draws) 57{ 58 if (!has_draws) 59 return MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE + 4; 60 61 bool hierarchy = !(dev->quirks & MIDGARD_NO_HIER_TILING); 62 unsigned hierarchy_mask = 63 panfrost_choose_hierarchy_mask(fb_width, fb_height, 1, hierarchy); 64 65 return panfrost_tiler_full_size(fb_width, fb_height, hierarchy_mask, hierarchy) + 66 panfrost_tiler_header_size(fb_width, fb_height, hierarchy_mask, hierarchy); 67} 68#endif 69 70/* Stack sizes */ 71 72unsigned 73panfrost_get_stack_shift(unsigned stack_size); 74 75unsigned 76panfrost_get_total_stack_size( 77 unsigned thread_size, 78 unsigned threads_per_core, 79 unsigned core_count); 80 81const char * panfrost_model_name(unsigned gpu_id); 82 83/* Attributes / instancing */ 84 85unsigned 86panfrost_padded_vertex_count(unsigned vertex_count); 87 88unsigned 89panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags); 90 91#ifdef PAN_ARCH 92/* Records for gl_VertexID and gl_InstanceID use special encodings on Midgard */ 93 94#if PAN_ARCH <= 5 95static inline void 96panfrost_vertex_id(unsigned padded_count, 97 struct mali_attribute_buffer_packed *attr, 98 bool instanced) 99{ 100 pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) { 101 if (instanced) { 102 cfg.divisor_r = __builtin_ctz(padded_count); 103 cfg.divisor_p = padded_count >> (cfg.divisor_r + 1); 104 } else { 105 /* Large values so the modulo is a no-op */ 106 cfg.divisor_r = 0x1F; 107 cfg.divisor_p = 0x4; 108 } 109 } 110} 111 112static inline void 113panfrost_instance_id(unsigned padded_count, 114 struct mali_attribute_buffer_packed *attr, 115 bool instanced) 116{ 117 pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) { 118 if (!instanced || padded_count <= 1) { 119 /* Divide by large number to force to 0 */ 120 cfg.divisor_p = ((1u << 31) - 1); 121 cfg.divisor_r = 0x1F; 122 cfg.divisor_e = 0x1; 123 } else if(util_is_power_of_two_or_zero(padded_count)) { 124 /* Can't underflow since padded_count >= 2 */ 125 cfg.divisor_r = __builtin_ctz(padded_count) - 1; 126 } else { 127 cfg.divisor_p = 128 panfrost_compute_magic_divisor(padded_count, 129 &cfg.divisor_r, &cfg.divisor_e); 130 } 131 } 132} 133#endif /* PAN_ARCH <= 5 */ 134 135/* Sampler comparison functions are flipped in OpenGL from the hardware, so we 136 * need to be able to flip accordingly */ 137 138static inline enum mali_func 139panfrost_flip_compare_func(enum mali_func f) 140{ 141 switch (f) { 142 case MALI_FUNC_LESS: return MALI_FUNC_GREATER; 143 case MALI_FUNC_GREATER: return MALI_FUNC_LESS; 144 case MALI_FUNC_LEQUAL: return MALI_FUNC_GEQUAL; 145 case MALI_FUNC_GEQUAL: return MALI_FUNC_LEQUAL; 146 default: return f; 147 } 148 149} 150 151/* Compute shaders are invoked with a gl_NumWorkGroups X/Y/Z triplet. Vertex 152 * shaders are invoked as (1, vertex_count, instance_count). Compute shaders 153 * also have a gl_WorkGroupSize X/Y/Z triplet. These 6 values are packed 154 * together in a dynamic bitfield, packed by this routine. */ 155 156static inline void 157panfrost_pack_work_groups_compute( 158 struct mali_invocation_packed *out, 159 unsigned num_x, unsigned num_y, unsigned num_z, 160 unsigned size_x, unsigned size_y, unsigned size_z, 161 bool quirk_graphics, bool indirect_dispatch) 162{ 163 /* The values needing packing, in order, and the corresponding shifts. 164 * Indicies into shift are off-by-one to make the logic easier */ 165 166 unsigned values[6] = { size_x, size_y, size_z, num_x, num_y, num_z }; 167 unsigned shifts[7] = { 0 }; 168 uint32_t packed = 0; 169 170 for (unsigned i = 0; i < 6; ++i) { 171 /* Must be positive, otherwise we underflow */ 172 assert(values[i] >= 1); 173 174 /* OR it in, shifting as required */ 175 packed |= ((values[i] - 1) << shifts[i]); 176 177 /* How many bits did we use? */ 178 unsigned bit_count = util_logbase2_ceil(values[i]); 179 180 /* Set the next shift accordingly */ 181 shifts[i + 1] = shifts[i] + bit_count; 182 } 183 184 pan_pack(out, INVOCATION, cfg) { 185 cfg.invocations = packed; 186 cfg.size_y_shift = shifts[1]; 187 cfg.size_z_shift = shifts[2]; 188 cfg.workgroups_x_shift = shifts[3]; 189 190 if (!indirect_dispatch) { 191 /* Leave zero for the dispatch shader */ 192 cfg.workgroups_y_shift = shifts[4]; 193 cfg.workgroups_z_shift = shifts[5]; 194 } 195 196 /* Quirk: for non-instanced graphics, the blob sets 197 * workgroups_z_shift = 32. This doesn't appear to matter to 198 * the hardware, but it's good to be bit-identical. */ 199 200 if (quirk_graphics && (num_z <= 1)) 201 cfg.workgroups_z_shift = 32; 202 203 /* For graphics, set to the minimum efficient value. For 204 * compute, must equal the workgroup X shift for barriers to 205 * function correctly */ 206 207 cfg.thread_group_split = quirk_graphics ? 208 MALI_SPLIT_MIN_EFFICIENT : cfg.workgroups_x_shift; 209 } 210} 211 212#if PAN_ARCH >= 5 213/* Format conversion */ 214static inline enum mali_z_internal_format 215panfrost_get_z_internal_format(enum pipe_format fmt) 216{ 217 switch (fmt) { 218 case PIPE_FORMAT_Z16_UNORM: 219 case PIPE_FORMAT_Z16_UNORM_S8_UINT: 220 return MALI_Z_INTERNAL_FORMAT_D16; 221 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 222 case PIPE_FORMAT_Z24X8_UNORM: 223 return MALI_Z_INTERNAL_FORMAT_D24; 224 case PIPE_FORMAT_Z32_FLOAT: 225 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 226 return MALI_Z_INTERNAL_FORMAT_D32; 227 default: 228 unreachable("Unsupported depth/stencil format."); 229 } 230} 231#endif 232 233#endif /* PAN_ARCH */ 234 235#endif 236