1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright 2011 Marek Olšák <maraeo@gmail.com> 4848b8605Smrg * All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the 8848b8605Smrg * "Software"), to deal in the Software without restriction, including 9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish, 10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to 11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to 12848b8605Smrg * the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice (including the 15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions 16848b8605Smrg * of the Software. 17848b8605Smrg * 18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21848b8605Smrg * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25848b8605Smrg * 26848b8605Smrg **************************************************************************/ 27848b8605Smrg 28848b8605Smrg/** 29848b8605Smrg * This module uploads user buffers and translates the vertex buffers which 30848b8605Smrg * contain incompatible vertices (i.e. not supported by the driver/hardware) 31848b8605Smrg * into compatible ones, based on the Gallium CAPs. 32848b8605Smrg * 33848b8605Smrg * It does not upload index buffers. 34848b8605Smrg * 35848b8605Smrg * The module heavily uses bitmasks to represent per-buffer and 36848b8605Smrg * per-vertex-element flags to avoid looping over the list of buffers just 37848b8605Smrg * to see if there's a non-zero stride, or user buffer, or unsupported format, 38848b8605Smrg * etc. 39848b8605Smrg * 40848b8605Smrg * There are 3 categories of vertex elements, which are processed separately: 41848b8605Smrg * - per-vertex attribs (stride != 0, instance_divisor == 0) 42848b8605Smrg * - instanced attribs (stride != 0, instance_divisor > 0) 43848b8605Smrg * - constant attribs (stride == 0) 44848b8605Smrg * 45848b8605Smrg * All needed uploads and translations are performed every draw command, but 46848b8605Smrg * only the subset of vertices needed for that draw command is uploaded or 47848b8605Smrg * translated. (the module never translates whole buffers) 48848b8605Smrg * 49848b8605Smrg * 50848b8605Smrg * The module consists of two main parts: 51848b8605Smrg * 52848b8605Smrg * 53848b8605Smrg * 1) Translate (u_vbuf_translate_begin/end) 54848b8605Smrg * 55848b8605Smrg * This is pretty much a vertex fetch fallback. It translates vertices from 56848b8605Smrg * one vertex buffer to another in an unused vertex buffer slot. It does 57848b8605Smrg * whatever is needed to make the vertices readable by the hardware (changes 58848b8605Smrg * vertex formats and aligns offsets and strides). The translate module is 59848b8605Smrg * used here. 60848b8605Smrg * 61848b8605Smrg * Each of the 3 categories is translated to a separate buffer. 62848b8605Smrg * Only the [min_index, max_index] range is translated. For instanced attribs, 63848b8605Smrg * the range is [start_instance, start_instance+instance_count]. For constant 64848b8605Smrg * attribs, the range is [0, 1]. 65848b8605Smrg * 66848b8605Smrg * 67848b8605Smrg * 2) User buffer uploading (u_vbuf_upload_buffers) 68848b8605Smrg * 69848b8605Smrg * Only the [min_index, max_index] range is uploaded (just like Translate) 70848b8605Smrg * with a single memcpy. 71848b8605Smrg * 72848b8605Smrg * This method works best for non-indexed draw operations or indexed draw 73848b8605Smrg * operations where the [min_index, max_index] range is not being way bigger 74848b8605Smrg * than the vertex count. 75848b8605Smrg * 76848b8605Smrg * If the range is too big (e.g. one triangle with indices {0, 1, 10000}), 77848b8605Smrg * the per-vertex attribs are uploaded via the translate module, all packed 78848b8605Smrg * into one vertex buffer, and the indexed draw call is turned into 79848b8605Smrg * a non-indexed one in the process. This adds additional complexity 80848b8605Smrg * to the translate part, but it prevents bad apps from bringing your frame 81848b8605Smrg * rate down. 82848b8605Smrg * 83848b8605Smrg * 84848b8605Smrg * If there is nothing to do, it forwards every command to the driver. 85848b8605Smrg * The module also has its own CSO cache of vertex element states. 86848b8605Smrg */ 87848b8605Smrg 88848b8605Smrg#include "util/u_vbuf.h" 89848b8605Smrg 90848b8605Smrg#include "util/u_dump.h" 91848b8605Smrg#include "util/u_format.h" 92848b8605Smrg#include "util/u_inlines.h" 93848b8605Smrg#include "util/u_memory.h" 94848b8605Smrg#include "util/u_upload_mgr.h" 95848b8605Smrg#include "translate/translate.h" 96848b8605Smrg#include "translate/translate_cache.h" 97848b8605Smrg#include "cso_cache/cso_cache.h" 98848b8605Smrg#include "cso_cache/cso_hash.h" 99848b8605Smrg 100848b8605Smrgstruct u_vbuf_elements { 101848b8605Smrg unsigned count; 102848b8605Smrg struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; 103848b8605Smrg 104848b8605Smrg unsigned src_format_size[PIPE_MAX_ATTRIBS]; 105848b8605Smrg 106848b8605Smrg /* If (velem[i].src_format != native_format[i]), the vertex buffer 107848b8605Smrg * referenced by the vertex element cannot be used for rendering and 108848b8605Smrg * its vertex data must be translated to native_format[i]. */ 109848b8605Smrg enum pipe_format native_format[PIPE_MAX_ATTRIBS]; 110848b8605Smrg unsigned native_format_size[PIPE_MAX_ATTRIBS]; 111848b8605Smrg 112848b8605Smrg /* Which buffers are used by the vertex element state. */ 113848b8605Smrg uint32_t used_vb_mask; 114848b8605Smrg /* This might mean two things: 115848b8605Smrg * - src_format != native_format, as discussed above. 116848b8605Smrg * - src_offset % 4 != 0 (if the caps don't allow such an offset). */ 117848b8605Smrg uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib */ 118848b8605Smrg /* Which buffer has at least one vertex element referencing it 119848b8605Smrg * incompatible. */ 120848b8605Smrg uint32_t incompatible_vb_mask_any; 121848b8605Smrg /* Which buffer has all vertex elements referencing it incompatible. */ 122848b8605Smrg uint32_t incompatible_vb_mask_all; 123848b8605Smrg /* Which buffer has at least one vertex element referencing it 124848b8605Smrg * compatible. */ 125848b8605Smrg uint32_t compatible_vb_mask_any; 126848b8605Smrg /* Which buffer has all vertex elements referencing it compatible. */ 127848b8605Smrg uint32_t compatible_vb_mask_all; 128848b8605Smrg 129848b8605Smrg /* Which buffer has at least one vertex element referencing it 130848b8605Smrg * non-instanced. */ 131848b8605Smrg uint32_t noninstance_vb_mask_any; 132848b8605Smrg 133848b8605Smrg void *driver_cso; 134848b8605Smrg}; 135848b8605Smrg 136848b8605Smrgenum { 137848b8605Smrg VB_VERTEX = 0, 138848b8605Smrg VB_INSTANCE = 1, 139848b8605Smrg VB_CONST = 2, 140848b8605Smrg VB_NUM = 3 141848b8605Smrg}; 142848b8605Smrg 143848b8605Smrgstruct u_vbuf { 144848b8605Smrg struct u_vbuf_caps caps; 145b8e80941Smrg bool has_signed_vb_offset; 146848b8605Smrg 147848b8605Smrg struct pipe_context *pipe; 148848b8605Smrg struct translate_cache *translate_cache; 149848b8605Smrg struct cso_cache *cso_cache; 150848b8605Smrg 151848b8605Smrg /* This is what was set in set_vertex_buffers. 152848b8605Smrg * May contain user buffers. */ 153848b8605Smrg struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; 154848b8605Smrg uint32_t enabled_vb_mask; 155848b8605Smrg 156848b8605Smrg /* Saved vertex buffer. */ 157b8e80941Smrg struct pipe_vertex_buffer vertex_buffer0_saved; 158848b8605Smrg 159848b8605Smrg /* Vertex buffers for the driver. 160848b8605Smrg * There are usually no user buffers. */ 161848b8605Smrg struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; 162848b8605Smrg uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last 163848b8605Smrg call of set_vertex_buffers */ 164848b8605Smrg 165848b8605Smrg /* Vertex elements. */ 166848b8605Smrg struct u_vbuf_elements *ve, *ve_saved; 167848b8605Smrg 168848b8605Smrg /* Vertex elements used for the translate fallback. */ 169848b8605Smrg struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS]; 170848b8605Smrg /* If non-NULL, this is a vertex element state used for the translate 171848b8605Smrg * fallback and therefore used for rendering too. */ 172848b8605Smrg boolean using_translate; 173848b8605Smrg /* The vertex buffer slot index where translated vertices have been 174848b8605Smrg * stored in. */ 175848b8605Smrg unsigned fallback_vbs[VB_NUM]; 176848b8605Smrg 177848b8605Smrg /* Which buffer is a user buffer. */ 178848b8605Smrg uint32_t user_vb_mask; /* each bit describes a corresp. buffer */ 179848b8605Smrg /* Which buffer is incompatible (unaligned). */ 180848b8605Smrg uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ 181848b8605Smrg /* Which buffer has a non-zero stride. */ 182848b8605Smrg uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ 183848b8605Smrg}; 184848b8605Smrg 185848b8605Smrgstatic void * 186848b8605Smrgu_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 187848b8605Smrg const struct pipe_vertex_element *attribs); 188848b8605Smrgstatic void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso); 189848b8605Smrg 190b8e80941Smrgstatic const struct { 191b8e80941Smrg enum pipe_format from, to; 192b8e80941Smrg} vbuf_format_fallbacks[] = { 193b8e80941Smrg { PIPE_FORMAT_R32_FIXED, PIPE_FORMAT_R32_FLOAT }, 194b8e80941Smrg { PIPE_FORMAT_R32G32_FIXED, PIPE_FORMAT_R32G32_FLOAT }, 195b8e80941Smrg { PIPE_FORMAT_R32G32B32_FIXED, PIPE_FORMAT_R32G32B32_FLOAT }, 196b8e80941Smrg { PIPE_FORMAT_R32G32B32A32_FIXED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 197b8e80941Smrg { PIPE_FORMAT_R16_FLOAT, PIPE_FORMAT_R32_FLOAT }, 198b8e80941Smrg { PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 199b8e80941Smrg { PIPE_FORMAT_R16G16B16_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 200b8e80941Smrg { PIPE_FORMAT_R16G16B16A16_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 201b8e80941Smrg { PIPE_FORMAT_R64_FLOAT, PIPE_FORMAT_R32_FLOAT }, 202b8e80941Smrg { PIPE_FORMAT_R64G64_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 203b8e80941Smrg { PIPE_FORMAT_R64G64B64_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 204b8e80941Smrg { PIPE_FORMAT_R64G64B64A64_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 205b8e80941Smrg { PIPE_FORMAT_R32_UNORM, PIPE_FORMAT_R32_FLOAT }, 206b8e80941Smrg { PIPE_FORMAT_R32G32_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 207b8e80941Smrg { PIPE_FORMAT_R32G32B32_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 208b8e80941Smrg { PIPE_FORMAT_R32G32B32A32_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 209b8e80941Smrg { PIPE_FORMAT_R32_SNORM, PIPE_FORMAT_R32_FLOAT }, 210b8e80941Smrg { PIPE_FORMAT_R32G32_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 211b8e80941Smrg { PIPE_FORMAT_R32G32B32_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 212b8e80941Smrg { PIPE_FORMAT_R32G32B32A32_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 213b8e80941Smrg { PIPE_FORMAT_R32_USCALED, PIPE_FORMAT_R32_FLOAT }, 214b8e80941Smrg { PIPE_FORMAT_R32G32_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 215b8e80941Smrg { PIPE_FORMAT_R32G32B32_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 216b8e80941Smrg { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 217b8e80941Smrg { PIPE_FORMAT_R32_SSCALED, PIPE_FORMAT_R32_FLOAT }, 218b8e80941Smrg { PIPE_FORMAT_R32G32_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 219b8e80941Smrg { PIPE_FORMAT_R32G32B32_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 220b8e80941Smrg { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 221b8e80941Smrg { PIPE_FORMAT_R16_UNORM, PIPE_FORMAT_R32_FLOAT }, 222b8e80941Smrg { PIPE_FORMAT_R16G16_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 223b8e80941Smrg { PIPE_FORMAT_R16G16B16_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 224b8e80941Smrg { PIPE_FORMAT_R16G16B16A16_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 225b8e80941Smrg { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R32_FLOAT }, 226b8e80941Smrg { PIPE_FORMAT_R16G16_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 227b8e80941Smrg { PIPE_FORMAT_R16G16B16_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 228b8e80941Smrg { PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 229b8e80941Smrg { PIPE_FORMAT_R16_USCALED, PIPE_FORMAT_R32_FLOAT }, 230b8e80941Smrg { PIPE_FORMAT_R16G16_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 231b8e80941Smrg { PIPE_FORMAT_R16G16B16_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 232b8e80941Smrg { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 233b8e80941Smrg { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R32_FLOAT }, 234b8e80941Smrg { PIPE_FORMAT_R16G16_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 235b8e80941Smrg { PIPE_FORMAT_R16G16B16_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 236b8e80941Smrg { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 237b8e80941Smrg { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R32_FLOAT }, 238b8e80941Smrg { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 239b8e80941Smrg { PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 240b8e80941Smrg { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 241b8e80941Smrg { PIPE_FORMAT_R8_SNORM, PIPE_FORMAT_R32_FLOAT }, 242b8e80941Smrg { PIPE_FORMAT_R8G8_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 243b8e80941Smrg { PIPE_FORMAT_R8G8B8_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 244b8e80941Smrg { PIPE_FORMAT_R8G8B8A8_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 245b8e80941Smrg { PIPE_FORMAT_R8_USCALED, PIPE_FORMAT_R32_FLOAT }, 246b8e80941Smrg { PIPE_FORMAT_R8G8_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 247b8e80941Smrg { PIPE_FORMAT_R8G8B8_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 248b8e80941Smrg { PIPE_FORMAT_R8G8B8A8_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 249b8e80941Smrg { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R32_FLOAT }, 250b8e80941Smrg { PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 251b8e80941Smrg { PIPE_FORMAT_R8G8B8_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 252b8e80941Smrg { PIPE_FORMAT_R8G8B8A8_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 253b8e80941Smrg}; 254848b8605Smrg 255b8e80941Smrgboolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, 256b8e80941Smrg unsigned flags) 257848b8605Smrg{ 258b8e80941Smrg unsigned i; 259b8e80941Smrg boolean fallback = FALSE; 260b8e80941Smrg 261b8e80941Smrg /* I'd rather have a bitfield of which formats are supported and a static 262b8e80941Smrg * table of the translations indexed by format, but since we don't have C99 263b8e80941Smrg * we can't easily make a sparsely-populated table indexed by format. So, 264b8e80941Smrg * we construct the sparse table here. 265b8e80941Smrg */ 266b8e80941Smrg for (i = 0; i < PIPE_FORMAT_COUNT; i++) 267b8e80941Smrg caps->format_translation[i] = i; 268b8e80941Smrg 269b8e80941Smrg for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) { 270b8e80941Smrg enum pipe_format format = vbuf_format_fallbacks[i].from; 271b8e80941Smrg 272b8e80941Smrg if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0, 273b8e80941Smrg PIPE_BIND_VERTEX_BUFFER)) { 274b8e80941Smrg caps->format_translation[format] = vbuf_format_fallbacks[i].to; 275b8e80941Smrg fallback = TRUE; 276b8e80941Smrg } 277b8e80941Smrg } 278848b8605Smrg 279848b8605Smrg caps->buffer_offset_unaligned = 280848b8605Smrg !screen->get_param(screen, 281b8e80941Smrg PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY); 282848b8605Smrg caps->buffer_stride_unaligned = 283b8e80941Smrg !screen->get_param(screen, 284848b8605Smrg PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY); 285848b8605Smrg caps->velem_src_offset_unaligned = 286848b8605Smrg !screen->get_param(screen, 287b8e80941Smrg PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY); 288848b8605Smrg caps->user_vertex_buffers = 289848b8605Smrg screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS); 290b8e80941Smrg 291b8e80941Smrg if (!caps->buffer_offset_unaligned || 292b8e80941Smrg !caps->buffer_stride_unaligned || 293b8e80941Smrg !caps->velem_src_offset_unaligned || 294b8e80941Smrg (!(flags & U_VBUF_FLAG_NO_USER_VBOS) && !caps->user_vertex_buffers)) { 295b8e80941Smrg fallback = TRUE; 296b8e80941Smrg } 297b8e80941Smrg 298b8e80941Smrg return fallback; 299848b8605Smrg} 300848b8605Smrg 301848b8605Smrgstruct u_vbuf * 302b8e80941Smrgu_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps) 303848b8605Smrg{ 304848b8605Smrg struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf); 305848b8605Smrg 306848b8605Smrg mgr->caps = *caps; 307848b8605Smrg mgr->pipe = pipe; 308848b8605Smrg mgr->cso_cache = cso_cache_create(); 309848b8605Smrg mgr->translate_cache = translate_cache_create(); 310848b8605Smrg memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs)); 311848b8605Smrg 312b8e80941Smrg mgr->has_signed_vb_offset = 313b8e80941Smrg pipe->screen->get_param(pipe->screen, 314b8e80941Smrg PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET); 315848b8605Smrg 316848b8605Smrg return mgr; 317848b8605Smrg} 318848b8605Smrg 319848b8605Smrg/* u_vbuf uses its own caching for vertex elements, because it needs to keep 320848b8605Smrg * its own preprocessed state per vertex element CSO. */ 321848b8605Smrgstatic struct u_vbuf_elements * 322848b8605Smrgu_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count, 323848b8605Smrg const struct pipe_vertex_element *states) 324848b8605Smrg{ 325848b8605Smrg struct pipe_context *pipe = mgr->pipe; 326848b8605Smrg unsigned key_size, hash_key; 327848b8605Smrg struct cso_hash_iter iter; 328848b8605Smrg struct u_vbuf_elements *ve; 329848b8605Smrg struct cso_velems_state velems_state; 330848b8605Smrg 331848b8605Smrg /* need to include the count into the stored state data too. */ 332848b8605Smrg key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); 333848b8605Smrg velems_state.count = count; 334848b8605Smrg memcpy(velems_state.velems, states, 335848b8605Smrg sizeof(struct pipe_vertex_element) * count); 336848b8605Smrg hash_key = cso_construct_key((void*)&velems_state, key_size); 337848b8605Smrg iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS, 338848b8605Smrg (void*)&velems_state, key_size); 339848b8605Smrg 340848b8605Smrg if (cso_hash_iter_is_null(iter)) { 341848b8605Smrg struct cso_velements *cso = MALLOC_STRUCT(cso_velements); 342848b8605Smrg memcpy(&cso->state, &velems_state, key_size); 343848b8605Smrg cso->data = u_vbuf_create_vertex_elements(mgr, count, states); 344848b8605Smrg cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements; 345848b8605Smrg cso->context = (void*)mgr; 346848b8605Smrg 347848b8605Smrg iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso); 348848b8605Smrg ve = cso->data; 349848b8605Smrg } else { 350848b8605Smrg ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data; 351848b8605Smrg } 352848b8605Smrg 353848b8605Smrg assert(ve); 354848b8605Smrg 355848b8605Smrg if (ve != mgr->ve) 356b8e80941Smrg pipe->bind_vertex_elements_state(pipe, ve->driver_cso); 357b8e80941Smrg 358848b8605Smrg return ve; 359848b8605Smrg} 360848b8605Smrg 361848b8605Smrgvoid u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, 362848b8605Smrg const struct pipe_vertex_element *states) 363848b8605Smrg{ 364848b8605Smrg mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states); 365848b8605Smrg} 366848b8605Smrg 367848b8605Smrgvoid u_vbuf_destroy(struct u_vbuf *mgr) 368848b8605Smrg{ 369848b8605Smrg struct pipe_screen *screen = mgr->pipe->screen; 370848b8605Smrg unsigned i; 371b8e80941Smrg const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX, 372b8e80941Smrg PIPE_SHADER_CAP_MAX_INPUTS); 373848b8605Smrg 374848b8605Smrg mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL); 375848b8605Smrg 376b8e80941Smrg for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 377b8e80941Smrg pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); 378b8e80941Smrg for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 379b8e80941Smrg pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]); 380b8e80941Smrg 381b8e80941Smrg pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); 382848b8605Smrg 383848b8605Smrg translate_cache_destroy(mgr->translate_cache); 384848b8605Smrg cso_cache_delete(mgr->cso_cache); 385848b8605Smrg FREE(mgr); 386848b8605Smrg} 387848b8605Smrg 388848b8605Smrgstatic enum pipe_error 389848b8605Smrgu_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, 390b8e80941Smrg const struct pipe_draw_info *info, 391848b8605Smrg unsigned vb_mask, unsigned out_vb, 392848b8605Smrg int start_vertex, unsigned num_vertices, 393b8e80941Smrg int min_index, boolean unroll_indices) 394848b8605Smrg{ 395848b8605Smrg struct translate *tr; 396848b8605Smrg struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; 397848b8605Smrg struct pipe_resource *out_buffer = NULL; 398848b8605Smrg uint8_t *out_map; 399848b8605Smrg unsigned out_offset, mask; 400848b8605Smrg 401848b8605Smrg /* Get a translate object. */ 402848b8605Smrg tr = translate_cache_find(mgr->translate_cache, key); 403848b8605Smrg 404848b8605Smrg /* Map buffers we want to translate. */ 405848b8605Smrg mask = vb_mask; 406848b8605Smrg while (mask) { 407848b8605Smrg struct pipe_vertex_buffer *vb; 408848b8605Smrg unsigned offset; 409848b8605Smrg uint8_t *map; 410848b8605Smrg unsigned i = u_bit_scan(&mask); 411848b8605Smrg 412848b8605Smrg vb = &mgr->vertex_buffer[i]; 413848b8605Smrg offset = vb->buffer_offset + vb->stride * start_vertex; 414848b8605Smrg 415b8e80941Smrg if (vb->is_user_buffer) { 416b8e80941Smrg map = (uint8_t*)vb->buffer.user + offset; 417848b8605Smrg } else { 418848b8605Smrg unsigned size = vb->stride ? num_vertices * vb->stride 419848b8605Smrg : sizeof(double)*4; 420848b8605Smrg 421b8e80941Smrg if (!vb->buffer.resource) 422b8e80941Smrg continue; 423b8e80941Smrg 424b8e80941Smrg if (offset + size > vb->buffer.resource->width0) { 425b8e80941Smrg /* Don't try to map past end of buffer. This often happens when 426b8e80941Smrg * we're translating an attribute that's at offset > 0 from the 427b8e80941Smrg * start of the vertex. If we'd subtract attrib's offset from 428b8e80941Smrg * the size, this probably wouldn't happen. 429b8e80941Smrg */ 430b8e80941Smrg size = vb->buffer.resource->width0 - offset; 431b8e80941Smrg 432b8e80941Smrg /* Also adjust num_vertices. A common user error is to call 433b8e80941Smrg * glDrawRangeElements() with incorrect 'end' argument. The 'end 434b8e80941Smrg * value should be the max index value, but people often 435b8e80941Smrg * accidentally add one to this value. This adjustment avoids 436b8e80941Smrg * crashing (by reading past the end of a hardware buffer mapping) 437b8e80941Smrg * when people do that. 438b8e80941Smrg */ 439b8e80941Smrg num_vertices = (size + vb->stride - 1) / vb->stride; 440848b8605Smrg } 441848b8605Smrg 442b8e80941Smrg map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size, 443848b8605Smrg PIPE_TRANSFER_READ, &vb_transfer[i]); 444848b8605Smrg } 445848b8605Smrg 446848b8605Smrg /* Subtract min_index so that indexing with the index buffer works. */ 447848b8605Smrg if (unroll_indices) { 448848b8605Smrg map -= (ptrdiff_t)vb->stride * min_index; 449848b8605Smrg } 450848b8605Smrg 451b8e80941Smrg tr->set_buffer(tr, i, map, vb->stride, info->max_index); 452848b8605Smrg } 453848b8605Smrg 454848b8605Smrg /* Translate. */ 455848b8605Smrg if (unroll_indices) { 456848b8605Smrg struct pipe_transfer *transfer = NULL; 457b8e80941Smrg const unsigned offset = info->start * info->index_size; 458848b8605Smrg uint8_t *map; 459848b8605Smrg 460848b8605Smrg /* Create and map the output buffer. */ 461b8e80941Smrg u_upload_alloc(mgr->pipe->stream_uploader, 0, 462b8e80941Smrg key->output_stride * info->count, 4, 463b8e80941Smrg &out_offset, &out_buffer, 464b8e80941Smrg (void**)&out_map); 465b8e80941Smrg if (!out_buffer) 466b8e80941Smrg return PIPE_ERROR_OUT_OF_MEMORY; 467b8e80941Smrg 468b8e80941Smrg if (info->has_user_indices) { 469b8e80941Smrg map = (uint8_t*)info->index.user + offset; 470848b8605Smrg } else { 471b8e80941Smrg map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset, 472b8e80941Smrg info->count * info->index_size, 473848b8605Smrg PIPE_TRANSFER_READ, &transfer); 474848b8605Smrg } 475848b8605Smrg 476b8e80941Smrg switch (info->index_size) { 477848b8605Smrg case 4: 478b8e80941Smrg tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map); 479848b8605Smrg break; 480848b8605Smrg case 2: 481b8e80941Smrg tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map); 482848b8605Smrg break; 483848b8605Smrg case 1: 484b8e80941Smrg tr->run_elts8(tr, map, info->count, 0, 0, out_map); 485848b8605Smrg break; 486848b8605Smrg } 487848b8605Smrg 488848b8605Smrg if (transfer) { 489848b8605Smrg pipe_buffer_unmap(mgr->pipe, transfer); 490848b8605Smrg } 491848b8605Smrg } else { 492848b8605Smrg /* Create and map the output buffer. */ 493b8e80941Smrg u_upload_alloc(mgr->pipe->stream_uploader, 494b8e80941Smrg mgr->has_signed_vb_offset ? 495b8e80941Smrg 0 : key->output_stride * start_vertex, 496b8e80941Smrg key->output_stride * num_vertices, 4, 497b8e80941Smrg &out_offset, &out_buffer, 498b8e80941Smrg (void**)&out_map); 499b8e80941Smrg if (!out_buffer) 500b8e80941Smrg return PIPE_ERROR_OUT_OF_MEMORY; 501848b8605Smrg 502848b8605Smrg out_offset -= key->output_stride * start_vertex; 503848b8605Smrg 504848b8605Smrg tr->run(tr, 0, num_vertices, 0, 0, out_map); 505848b8605Smrg } 506848b8605Smrg 507848b8605Smrg /* Unmap all buffers. */ 508848b8605Smrg mask = vb_mask; 509848b8605Smrg while (mask) { 510848b8605Smrg unsigned i = u_bit_scan(&mask); 511848b8605Smrg 512848b8605Smrg if (vb_transfer[i]) { 513848b8605Smrg pipe_buffer_unmap(mgr->pipe, vb_transfer[i]); 514848b8605Smrg } 515848b8605Smrg } 516848b8605Smrg 517848b8605Smrg /* Setup the new vertex buffer. */ 518848b8605Smrg mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset; 519848b8605Smrg mgr->real_vertex_buffer[out_vb].stride = key->output_stride; 520848b8605Smrg 521848b8605Smrg /* Move the buffer reference. */ 522b8e80941Smrg pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]); 523b8e80941Smrg mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer; 524b8e80941Smrg mgr->real_vertex_buffer[out_vb].is_user_buffer = false; 525848b8605Smrg 526848b8605Smrg return PIPE_OK; 527848b8605Smrg} 528848b8605Smrg 529848b8605Smrgstatic boolean 530848b8605Smrgu_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, 531848b8605Smrg unsigned mask[VB_NUM]) 532848b8605Smrg{ 533848b8605Smrg unsigned type; 534848b8605Smrg unsigned fallback_vbs[VB_NUM]; 535848b8605Smrg /* Set the bit for each buffer which is incompatible, or isn't set. */ 536848b8605Smrg uint32_t unused_vb_mask = 537848b8605Smrg mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | 538848b8605Smrg ~mgr->enabled_vb_mask; 539848b8605Smrg 540848b8605Smrg memset(fallback_vbs, ~0, sizeof(fallback_vbs)); 541848b8605Smrg 542848b8605Smrg /* Find free slots for each type if needed. */ 543848b8605Smrg for (type = 0; type < VB_NUM; type++) { 544848b8605Smrg if (mask[type]) { 545848b8605Smrg uint32_t index; 546848b8605Smrg 547848b8605Smrg if (!unused_vb_mask) { 548848b8605Smrg return FALSE; 549848b8605Smrg } 550848b8605Smrg 551848b8605Smrg index = ffs(unused_vb_mask) - 1; 552848b8605Smrg fallback_vbs[type] = index; 553b8e80941Smrg unused_vb_mask &= ~(1 << index); 554848b8605Smrg /*printf("found slot=%i for type=%i\n", index, type);*/ 555848b8605Smrg } 556848b8605Smrg } 557848b8605Smrg 558848b8605Smrg for (type = 0; type < VB_NUM; type++) { 559848b8605Smrg if (mask[type]) { 560848b8605Smrg mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type]; 561848b8605Smrg } 562848b8605Smrg } 563848b8605Smrg 564848b8605Smrg memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs)); 565848b8605Smrg return TRUE; 566848b8605Smrg} 567848b8605Smrg 568848b8605Smrgstatic boolean 569848b8605Smrgu_vbuf_translate_begin(struct u_vbuf *mgr, 570b8e80941Smrg const struct pipe_draw_info *info, 571848b8605Smrg int start_vertex, unsigned num_vertices, 572b8e80941Smrg int min_index, boolean unroll_indices) 573848b8605Smrg{ 574848b8605Smrg unsigned mask[VB_NUM] = {0}; 575848b8605Smrg struct translate_key key[VB_NUM]; 576848b8605Smrg unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ 577848b8605Smrg unsigned i, type; 578b8e80941Smrg const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & 579b8e80941Smrg mgr->ve->used_vb_mask; 580848b8605Smrg 581b8e80941Smrg const int start[VB_NUM] = { 582b8e80941Smrg start_vertex, /* VERTEX */ 583b8e80941Smrg info->start_instance, /* INSTANCE */ 584b8e80941Smrg 0 /* CONST */ 585848b8605Smrg }; 586848b8605Smrg 587b8e80941Smrg const unsigned num[VB_NUM] = { 588b8e80941Smrg num_vertices, /* VERTEX */ 589b8e80941Smrg info->instance_count, /* INSTANCE */ 590b8e80941Smrg 1 /* CONST */ 591848b8605Smrg }; 592848b8605Smrg 593848b8605Smrg memset(key, 0, sizeof(key)); 594848b8605Smrg memset(elem_index, ~0, sizeof(elem_index)); 595848b8605Smrg 596848b8605Smrg /* See if there are vertex attribs of each type to translate and 597848b8605Smrg * which ones. */ 598848b8605Smrg for (i = 0; i < mgr->ve->count; i++) { 599848b8605Smrg unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index; 600848b8605Smrg 601848b8605Smrg if (!mgr->vertex_buffer[vb_index].stride) { 602848b8605Smrg if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 603848b8605Smrg !(incompatible_vb_mask & (1 << vb_index))) { 604848b8605Smrg continue; 605848b8605Smrg } 606848b8605Smrg mask[VB_CONST] |= 1 << vb_index; 607848b8605Smrg } else if (mgr->ve->ve[i].instance_divisor) { 608848b8605Smrg if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 609848b8605Smrg !(incompatible_vb_mask & (1 << vb_index))) { 610848b8605Smrg continue; 611848b8605Smrg } 612848b8605Smrg mask[VB_INSTANCE] |= 1 << vb_index; 613848b8605Smrg } else { 614848b8605Smrg if (!unroll_indices && 615848b8605Smrg !(mgr->ve->incompatible_elem_mask & (1 << i)) && 616848b8605Smrg !(incompatible_vb_mask & (1 << vb_index))) { 617848b8605Smrg continue; 618848b8605Smrg } 619848b8605Smrg mask[VB_VERTEX] |= 1 << vb_index; 620848b8605Smrg } 621848b8605Smrg } 622848b8605Smrg 623848b8605Smrg assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]); 624848b8605Smrg 625848b8605Smrg /* Find free vertex buffer slots. */ 626848b8605Smrg if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) { 627848b8605Smrg return FALSE; 628848b8605Smrg } 629848b8605Smrg 630848b8605Smrg /* Initialize the translate keys. */ 631848b8605Smrg for (i = 0; i < mgr->ve->count; i++) { 632848b8605Smrg struct translate_key *k; 633848b8605Smrg struct translate_element *te; 634b8e80941Smrg enum pipe_format output_format = mgr->ve->native_format[i]; 635848b8605Smrg unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index; 636848b8605Smrg bit = 1 << vb_index; 637848b8605Smrg 638848b8605Smrg if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 639848b8605Smrg !(incompatible_vb_mask & (1 << vb_index)) && 640848b8605Smrg (!unroll_indices || !(mask[VB_VERTEX] & bit))) { 641848b8605Smrg continue; 642848b8605Smrg } 643848b8605Smrg 644848b8605Smrg /* Set type to what we will translate. 645848b8605Smrg * Whether vertex, instance, or constant attribs. */ 646848b8605Smrg for (type = 0; type < VB_NUM; type++) { 647848b8605Smrg if (mask[type] & bit) { 648848b8605Smrg break; 649848b8605Smrg } 650848b8605Smrg } 651848b8605Smrg assert(type < VB_NUM); 652b8e80941Smrg if (mgr->ve->ve[i].src_format != output_format) 653b8e80941Smrg assert(translate_is_output_format_supported(output_format)); 654848b8605Smrg /*printf("velem=%i type=%i\n", i, type);*/ 655848b8605Smrg 656848b8605Smrg /* Add the vertex element. */ 657848b8605Smrg k = &key[type]; 658848b8605Smrg elem_index[type][i] = k->nr_elements; 659848b8605Smrg 660848b8605Smrg te = &k->element[k->nr_elements]; 661848b8605Smrg te->type = TRANSLATE_ELEMENT_NORMAL; 662848b8605Smrg te->instance_divisor = 0; 663848b8605Smrg te->input_buffer = vb_index; 664848b8605Smrg te->input_format = mgr->ve->ve[i].src_format; 665848b8605Smrg te->input_offset = mgr->ve->ve[i].src_offset; 666b8e80941Smrg te->output_format = output_format; 667848b8605Smrg te->output_offset = k->output_stride; 668848b8605Smrg 669848b8605Smrg k->output_stride += mgr->ve->native_format_size[i]; 670848b8605Smrg k->nr_elements++; 671848b8605Smrg } 672848b8605Smrg 673848b8605Smrg /* Translate buffers. */ 674848b8605Smrg for (type = 0; type < VB_NUM; type++) { 675848b8605Smrg if (key[type].nr_elements) { 676848b8605Smrg enum pipe_error err; 677b8e80941Smrg err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type], 678848b8605Smrg mgr->fallback_vbs[type], 679b8e80941Smrg start[type], num[type], min_index, 680848b8605Smrg unroll_indices && type == VB_VERTEX); 681848b8605Smrg if (err != PIPE_OK) 682848b8605Smrg return FALSE; 683848b8605Smrg 684848b8605Smrg /* Fixup the stride for constant attribs. */ 685848b8605Smrg if (type == VB_CONST) { 686848b8605Smrg mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0; 687848b8605Smrg } 688848b8605Smrg } 689848b8605Smrg } 690848b8605Smrg 691848b8605Smrg /* Setup new vertex elements. */ 692848b8605Smrg for (i = 0; i < mgr->ve->count; i++) { 693848b8605Smrg for (type = 0; type < VB_NUM; type++) { 694848b8605Smrg if (elem_index[type][i] < key[type].nr_elements) { 695848b8605Smrg struct translate_element *te = &key[type].element[elem_index[type][i]]; 696848b8605Smrg mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; 697848b8605Smrg mgr->fallback_velems[i].src_format = te->output_format; 698848b8605Smrg mgr->fallback_velems[i].src_offset = te->output_offset; 699848b8605Smrg mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; 700848b8605Smrg 701848b8605Smrg /* elem_index[type][i] can only be set for one type. */ 702b8e80941Smrg assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u); 703b8e80941Smrg assert(type > VB_VERTEX || elem_index[type+2][i] == ~0u); 704848b8605Smrg break; 705848b8605Smrg } 706848b8605Smrg } 707848b8605Smrg /* No translating, just copy the original vertex element over. */ 708848b8605Smrg if (type == VB_NUM) { 709848b8605Smrg memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i], 710848b8605Smrg sizeof(struct pipe_vertex_element)); 711848b8605Smrg } 712848b8605Smrg } 713848b8605Smrg 714848b8605Smrg u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count, 715848b8605Smrg mgr->fallback_velems); 716848b8605Smrg mgr->using_translate = TRUE; 717848b8605Smrg return TRUE; 718848b8605Smrg} 719848b8605Smrg 720848b8605Smrgstatic void u_vbuf_translate_end(struct u_vbuf *mgr) 721848b8605Smrg{ 722848b8605Smrg unsigned i; 723848b8605Smrg 724848b8605Smrg /* Restore vertex elements. */ 725848b8605Smrg mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso); 726848b8605Smrg mgr->using_translate = FALSE; 727848b8605Smrg 728848b8605Smrg /* Unreference the now-unused VBOs. */ 729848b8605Smrg for (i = 0; i < VB_NUM; i++) { 730848b8605Smrg unsigned vb = mgr->fallback_vbs[i]; 731b8e80941Smrg if (vb != ~0u) { 732b8e80941Smrg pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL); 733848b8605Smrg mgr->fallback_vbs[i] = ~0; 734848b8605Smrg 735848b8605Smrg /* This will cause the buffer to be unbound in the driver later. */ 736848b8605Smrg mgr->dirty_real_vb_mask |= 1 << vb; 737848b8605Smrg } 738848b8605Smrg } 739848b8605Smrg} 740848b8605Smrg 741848b8605Smrgstatic void * 742848b8605Smrgu_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 743848b8605Smrg const struct pipe_vertex_element *attribs) 744848b8605Smrg{ 745848b8605Smrg struct pipe_context *pipe = mgr->pipe; 746848b8605Smrg unsigned i; 747848b8605Smrg struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS]; 748848b8605Smrg struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements); 749848b8605Smrg uint32_t used_buffers = 0; 750848b8605Smrg 751848b8605Smrg ve->count = count; 752848b8605Smrg 753848b8605Smrg memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count); 754848b8605Smrg memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count); 755848b8605Smrg 756848b8605Smrg /* Set the best native format in case the original format is not 757848b8605Smrg * supported. */ 758848b8605Smrg for (i = 0; i < count; i++) { 759848b8605Smrg enum pipe_format format = ve->ve[i].src_format; 760848b8605Smrg 761848b8605Smrg ve->src_format_size[i] = util_format_get_blocksize(format); 762848b8605Smrg 763848b8605Smrg used_buffers |= 1 << ve->ve[i].vertex_buffer_index; 764848b8605Smrg 765848b8605Smrg if (!ve->ve[i].instance_divisor) { 766848b8605Smrg ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 767848b8605Smrg } 768848b8605Smrg 769b8e80941Smrg format = mgr->caps.format_translation[format]; 770848b8605Smrg 771848b8605Smrg driver_attribs[i].src_format = format; 772848b8605Smrg ve->native_format[i] = format; 773848b8605Smrg ve->native_format_size[i] = 774848b8605Smrg util_format_get_blocksize(ve->native_format[i]); 775848b8605Smrg 776848b8605Smrg if (ve->ve[i].src_format != format || 777848b8605Smrg (!mgr->caps.velem_src_offset_unaligned && 778848b8605Smrg ve->ve[i].src_offset % 4 != 0)) { 779848b8605Smrg ve->incompatible_elem_mask |= 1 << i; 780848b8605Smrg ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 781848b8605Smrg } else { 782848b8605Smrg ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 783848b8605Smrg } 784848b8605Smrg } 785848b8605Smrg 786848b8605Smrg ve->used_vb_mask = used_buffers; 787848b8605Smrg ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers; 788848b8605Smrg ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers; 789848b8605Smrg 790b8e80941Smrg /* Align the formats and offsets to the size of DWORD if needed. */ 791848b8605Smrg if (!mgr->caps.velem_src_offset_unaligned) { 792848b8605Smrg for (i = 0; i < count; i++) { 793848b8605Smrg ve->native_format_size[i] = align(ve->native_format_size[i], 4); 794b8e80941Smrg driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4); 795848b8605Smrg } 796848b8605Smrg } 797848b8605Smrg 798848b8605Smrg ve->driver_cso = 799848b8605Smrg pipe->create_vertex_elements_state(pipe, count, driver_attribs); 800848b8605Smrg return ve; 801848b8605Smrg} 802848b8605Smrg 803848b8605Smrgstatic void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso) 804848b8605Smrg{ 805848b8605Smrg struct pipe_context *pipe = mgr->pipe; 806848b8605Smrg struct u_vbuf_elements *ve = cso; 807848b8605Smrg 808848b8605Smrg pipe->delete_vertex_elements_state(pipe, ve->driver_cso); 809848b8605Smrg FREE(ve); 810848b8605Smrg} 811848b8605Smrg 812848b8605Smrgvoid u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, 813848b8605Smrg unsigned start_slot, unsigned count, 814848b8605Smrg const struct pipe_vertex_buffer *bufs) 815848b8605Smrg{ 816848b8605Smrg unsigned i; 817848b8605Smrg /* which buffers are enabled */ 818848b8605Smrg uint32_t enabled_vb_mask = 0; 819848b8605Smrg /* which buffers are in user memory */ 820848b8605Smrg uint32_t user_vb_mask = 0; 821848b8605Smrg /* which buffers are incompatible with the driver */ 822848b8605Smrg uint32_t incompatible_vb_mask = 0; 823848b8605Smrg /* which buffers have a non-zero stride */ 824848b8605Smrg uint32_t nonzero_stride_vb_mask = 0; 825b8e80941Smrg const uint32_t mask = ~(((1ull << count) - 1) << start_slot); 826848b8605Smrg 827848b8605Smrg /* Zero out the bits we are going to rewrite completely. */ 828848b8605Smrg mgr->user_vb_mask &= mask; 829848b8605Smrg mgr->incompatible_vb_mask &= mask; 830848b8605Smrg mgr->nonzero_stride_vb_mask &= mask; 831848b8605Smrg mgr->enabled_vb_mask &= mask; 832848b8605Smrg 833848b8605Smrg if (!bufs) { 834848b8605Smrg struct pipe_context *pipe = mgr->pipe; 835848b8605Smrg /* Unbind. */ 836848b8605Smrg mgr->dirty_real_vb_mask &= mask; 837848b8605Smrg 838848b8605Smrg for (i = 0; i < count; i++) { 839848b8605Smrg unsigned dst_index = start_slot + i; 840848b8605Smrg 841b8e80941Smrg pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); 842b8e80941Smrg pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); 843848b8605Smrg } 844848b8605Smrg 845848b8605Smrg pipe->set_vertex_buffers(pipe, start_slot, count, NULL); 846848b8605Smrg return; 847848b8605Smrg } 848848b8605Smrg 849848b8605Smrg for (i = 0; i < count; i++) { 850848b8605Smrg unsigned dst_index = start_slot + i; 851848b8605Smrg const struct pipe_vertex_buffer *vb = &bufs[i]; 852848b8605Smrg struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index]; 853848b8605Smrg struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index]; 854848b8605Smrg 855b8e80941Smrg if (!vb->buffer.resource) { 856b8e80941Smrg pipe_vertex_buffer_unreference(orig_vb); 857b8e80941Smrg pipe_vertex_buffer_unreference(real_vb); 858848b8605Smrg continue; 859848b8605Smrg } 860848b8605Smrg 861b8e80941Smrg pipe_vertex_buffer_reference(orig_vb, vb); 862848b8605Smrg 863848b8605Smrg if (vb->stride) { 864848b8605Smrg nonzero_stride_vb_mask |= 1 << dst_index; 865848b8605Smrg } 866848b8605Smrg enabled_vb_mask |= 1 << dst_index; 867848b8605Smrg 868848b8605Smrg if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) || 869848b8605Smrg (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) { 870848b8605Smrg incompatible_vb_mask |= 1 << dst_index; 871b8e80941Smrg real_vb->buffer_offset = vb->buffer_offset; 872b8e80941Smrg real_vb->stride = vb->stride; 873b8e80941Smrg pipe_vertex_buffer_unreference(real_vb); 874b8e80941Smrg real_vb->is_user_buffer = false; 875848b8605Smrg continue; 876848b8605Smrg } 877848b8605Smrg 878b8e80941Smrg if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) { 879848b8605Smrg user_vb_mask |= 1 << dst_index; 880b8e80941Smrg real_vb->buffer_offset = vb->buffer_offset; 881b8e80941Smrg real_vb->stride = vb->stride; 882b8e80941Smrg pipe_vertex_buffer_unreference(real_vb); 883b8e80941Smrg real_vb->is_user_buffer = false; 884848b8605Smrg continue; 885848b8605Smrg } 886848b8605Smrg 887b8e80941Smrg pipe_vertex_buffer_reference(real_vb, vb); 888848b8605Smrg } 889848b8605Smrg 890848b8605Smrg mgr->user_vb_mask |= user_vb_mask; 891848b8605Smrg mgr->incompatible_vb_mask |= incompatible_vb_mask; 892848b8605Smrg mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; 893848b8605Smrg mgr->enabled_vb_mask |= enabled_vb_mask; 894848b8605Smrg 895848b8605Smrg /* All changed buffers are marked as dirty, even the NULL ones, 896848b8605Smrg * which will cause the NULL buffers to be unbound in the driver later. */ 897848b8605Smrg mgr->dirty_real_vb_mask |= ~mask; 898848b8605Smrg} 899848b8605Smrg 900848b8605Smrgstatic enum pipe_error 901848b8605Smrgu_vbuf_upload_buffers(struct u_vbuf *mgr, 902848b8605Smrg int start_vertex, unsigned num_vertices, 903848b8605Smrg int start_instance, unsigned num_instances) 904848b8605Smrg{ 905848b8605Smrg unsigned i; 906848b8605Smrg unsigned nr_velems = mgr->ve->count; 907b8e80941Smrg const struct pipe_vertex_element *velems = 908848b8605Smrg mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; 909848b8605Smrg unsigned start_offset[PIPE_MAX_ATTRIBS]; 910848b8605Smrg unsigned end_offset[PIPE_MAX_ATTRIBS]; 911848b8605Smrg uint32_t buffer_mask = 0; 912848b8605Smrg 913848b8605Smrg /* Determine how much data needs to be uploaded. */ 914848b8605Smrg for (i = 0; i < nr_velems; i++) { 915b8e80941Smrg const struct pipe_vertex_element *velem = &velems[i]; 916848b8605Smrg unsigned index = velem->vertex_buffer_index; 917848b8605Smrg struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; 918848b8605Smrg unsigned instance_div, first, size, index_bit; 919848b8605Smrg 920848b8605Smrg /* Skip the buffers generated by translate. */ 921848b8605Smrg if (index == mgr->fallback_vbs[VB_VERTEX] || 922848b8605Smrg index == mgr->fallback_vbs[VB_INSTANCE] || 923848b8605Smrg index == mgr->fallback_vbs[VB_CONST]) { 924848b8605Smrg continue; 925848b8605Smrg } 926848b8605Smrg 927b8e80941Smrg if (!vb->is_user_buffer) { 928848b8605Smrg continue; 929848b8605Smrg } 930848b8605Smrg 931848b8605Smrg instance_div = velem->instance_divisor; 932848b8605Smrg first = vb->buffer_offset + velem->src_offset; 933848b8605Smrg 934848b8605Smrg if (!vb->stride) { 935848b8605Smrg /* Constant attrib. */ 936848b8605Smrg size = mgr->ve->src_format_size[i]; 937848b8605Smrg } else if (instance_div) { 938848b8605Smrg /* Per-instance attrib. */ 939b8e80941Smrg 940b8e80941Smrg /* Figure out how many instances we'll render given instance_div. We 941b8e80941Smrg * can't use the typical div_round_up() pattern because the CTS uses 942b8e80941Smrg * instance_div = ~0 for a test, which overflows div_round_up()'s 943b8e80941Smrg * addition. 944b8e80941Smrg */ 945b8e80941Smrg unsigned count = num_instances / instance_div; 946b8e80941Smrg if (count * instance_div != num_instances) 947b8e80941Smrg count++; 948b8e80941Smrg 949848b8605Smrg first += vb->stride * start_instance; 950848b8605Smrg size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; 951848b8605Smrg } else { 952848b8605Smrg /* Per-vertex attrib. */ 953848b8605Smrg first += vb->stride * start_vertex; 954848b8605Smrg size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; 955848b8605Smrg } 956848b8605Smrg 957848b8605Smrg index_bit = 1 << index; 958848b8605Smrg 959848b8605Smrg /* Update offsets. */ 960848b8605Smrg if (!(buffer_mask & index_bit)) { 961848b8605Smrg start_offset[index] = first; 962848b8605Smrg end_offset[index] = first + size; 963848b8605Smrg } else { 964848b8605Smrg if (first < start_offset[index]) 965848b8605Smrg start_offset[index] = first; 966848b8605Smrg if (first + size > end_offset[index]) 967848b8605Smrg end_offset[index] = first + size; 968848b8605Smrg } 969848b8605Smrg 970848b8605Smrg buffer_mask |= index_bit; 971848b8605Smrg } 972848b8605Smrg 973848b8605Smrg /* Upload buffers. */ 974848b8605Smrg while (buffer_mask) { 975848b8605Smrg unsigned start, end; 976848b8605Smrg struct pipe_vertex_buffer *real_vb; 977848b8605Smrg const uint8_t *ptr; 978848b8605Smrg 979848b8605Smrg i = u_bit_scan(&buffer_mask); 980848b8605Smrg 981848b8605Smrg start = start_offset[i]; 982848b8605Smrg end = end_offset[i]; 983848b8605Smrg assert(start < end); 984848b8605Smrg 985848b8605Smrg real_vb = &mgr->real_vertex_buffer[i]; 986b8e80941Smrg ptr = mgr->vertex_buffer[i].buffer.user; 987848b8605Smrg 988b8e80941Smrg u_upload_data(mgr->pipe->stream_uploader, 989b8e80941Smrg mgr->has_signed_vb_offset ? 0 : start, 990b8e80941Smrg end - start, 4, 991b8e80941Smrg ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource); 992b8e80941Smrg if (!real_vb->buffer.resource) 993b8e80941Smrg return PIPE_ERROR_OUT_OF_MEMORY; 994848b8605Smrg 995848b8605Smrg real_vb->buffer_offset -= start; 996848b8605Smrg } 997848b8605Smrg 998848b8605Smrg return PIPE_OK; 999848b8605Smrg} 1000848b8605Smrg 1001b8e80941Smrgstatic boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr) 1002848b8605Smrg{ 1003848b8605Smrg /* See if there are any per-vertex attribs which will be uploaded or 1004848b8605Smrg * translated. Use bitmasks to get the info instead of looping over vertex 1005848b8605Smrg * elements. */ 1006848b8605Smrg return (mgr->ve->used_vb_mask & 1007b8e80941Smrg ((mgr->user_vb_mask | 1008b8e80941Smrg mgr->incompatible_vb_mask | 1009848b8605Smrg mgr->ve->incompatible_vb_mask_any) & 1010b8e80941Smrg mgr->ve->noninstance_vb_mask_any & 1011b8e80941Smrg mgr->nonzero_stride_vb_mask)) != 0; 1012848b8605Smrg} 1013848b8605Smrg 1014b8e80941Smrgstatic boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) 1015848b8605Smrg{ 1016848b8605Smrg /* Return true if there are hw buffers which don't need to be translated. 1017848b8605Smrg * 1018848b8605Smrg * We could query whether each buffer is busy, but that would 1019848b8605Smrg * be way more costly than this. */ 1020848b8605Smrg return (mgr->ve->used_vb_mask & 1021b8e80941Smrg (~mgr->user_vb_mask & 1022b8e80941Smrg ~mgr->incompatible_vb_mask & 1023b8e80941Smrg mgr->ve->compatible_vb_mask_all & 1024b8e80941Smrg mgr->ve->noninstance_vb_mask_any & 1025848b8605Smrg mgr->nonzero_stride_vb_mask)) != 0; 1026848b8605Smrg} 1027848b8605Smrg 1028b8e80941Smrgstatic void 1029b8e80941Smrgu_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, 1030b8e80941Smrg const void *indices, unsigned *out_min_index, 1031b8e80941Smrg unsigned *out_max_index) 1032848b8605Smrg{ 1033b8e80941Smrg unsigned max = 0; 1034b8e80941Smrg unsigned min = ~0u; 1035848b8605Smrg 1036b8e80941Smrg switch (info->index_size) { 1037848b8605Smrg case 4: { 1038848b8605Smrg const unsigned *ui_indices = (const unsigned*)indices; 1039b8e80941Smrg if (info->primitive_restart) { 1040b8e80941Smrg for (unsigned i = 0; i < info->count; i++) { 1041b8e80941Smrg if (ui_indices[i] != info->restart_index) { 1042b8e80941Smrg if (ui_indices[i] > max) max = ui_indices[i]; 1043b8e80941Smrg if (ui_indices[i] < min) min = ui_indices[i]; 1044848b8605Smrg } 1045848b8605Smrg } 1046848b8605Smrg } 1047848b8605Smrg else { 1048b8e80941Smrg for (unsigned i = 0; i < info->count; i++) { 1049b8e80941Smrg if (ui_indices[i] > max) max = ui_indices[i]; 1050b8e80941Smrg if (ui_indices[i] < min) min = ui_indices[i]; 1051848b8605Smrg } 1052848b8605Smrg } 1053848b8605Smrg break; 1054848b8605Smrg } 1055848b8605Smrg case 2: { 1056848b8605Smrg const unsigned short *us_indices = (const unsigned short*)indices; 1057b8e80941Smrg if (info->primitive_restart) { 1058b8e80941Smrg for (unsigned i = 0; i < info->count; i++) { 1059b8e80941Smrg if (us_indices[i] != info->restart_index) { 1060b8e80941Smrg if (us_indices[i] > max) max = us_indices[i]; 1061b8e80941Smrg if (us_indices[i] < min) min = us_indices[i]; 1062848b8605Smrg } 1063848b8605Smrg } 1064848b8605Smrg } 1065848b8605Smrg else { 1066b8e80941Smrg for (unsigned i = 0; i < info->count; i++) { 1067b8e80941Smrg if (us_indices[i] > max) max = us_indices[i]; 1068b8e80941Smrg if (us_indices[i] < min) min = us_indices[i]; 1069848b8605Smrg } 1070848b8605Smrg } 1071848b8605Smrg break; 1072848b8605Smrg } 1073848b8605Smrg case 1: { 1074848b8605Smrg const unsigned char *ub_indices = (const unsigned char*)indices; 1075b8e80941Smrg if (info->primitive_restart) { 1076b8e80941Smrg for (unsigned i = 0; i < info->count; i++) { 1077b8e80941Smrg if (ub_indices[i] != info->restart_index) { 1078b8e80941Smrg if (ub_indices[i] > max) max = ub_indices[i]; 1079b8e80941Smrg if (ub_indices[i] < min) min = ub_indices[i]; 1080848b8605Smrg } 1081848b8605Smrg } 1082848b8605Smrg } 1083848b8605Smrg else { 1084b8e80941Smrg for (unsigned i = 0; i < info->count; i++) { 1085b8e80941Smrg if (ub_indices[i] > max) max = ub_indices[i]; 1086b8e80941Smrg if (ub_indices[i] < min) min = ub_indices[i]; 1087848b8605Smrg } 1088848b8605Smrg } 1089848b8605Smrg break; 1090848b8605Smrg } 1091848b8605Smrg default: 1092848b8605Smrg assert(0); 1093848b8605Smrg } 1094848b8605Smrg 1095b8e80941Smrg *out_min_index = min; 1096b8e80941Smrg *out_max_index = max; 1097b8e80941Smrg} 1098b8e80941Smrg 1099b8e80941Smrgvoid u_vbuf_get_minmax_index(struct pipe_context *pipe, 1100b8e80941Smrg const struct pipe_draw_info *info, 1101b8e80941Smrg unsigned *out_min_index, unsigned *out_max_index) 1102b8e80941Smrg{ 1103b8e80941Smrg struct pipe_transfer *transfer = NULL; 1104b8e80941Smrg const void *indices; 1105b8e80941Smrg 1106b8e80941Smrg if (info->has_user_indices) { 1107b8e80941Smrg indices = (uint8_t*)info->index.user + 1108b8e80941Smrg info->start * info->index_size; 1109b8e80941Smrg } else { 1110b8e80941Smrg indices = pipe_buffer_map_range(pipe, info->index.resource, 1111b8e80941Smrg info->start * info->index_size, 1112b8e80941Smrg info->count * info->index_size, 1113b8e80941Smrg PIPE_TRANSFER_READ, &transfer); 1114b8e80941Smrg } 1115b8e80941Smrg 1116b8e80941Smrg u_vbuf_get_minmax_index_mapped(info, indices, out_min_index, out_max_index); 1117b8e80941Smrg 1118848b8605Smrg if (transfer) { 1119848b8605Smrg pipe_buffer_unmap(pipe, transfer); 1120848b8605Smrg } 1121848b8605Smrg} 1122848b8605Smrg 1123848b8605Smrgstatic void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) 1124848b8605Smrg{ 1125848b8605Smrg struct pipe_context *pipe = mgr->pipe; 1126848b8605Smrg unsigned start_slot, count; 1127848b8605Smrg 1128848b8605Smrg start_slot = ffs(mgr->dirty_real_vb_mask) - 1; 1129848b8605Smrg count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot); 1130848b8605Smrg 1131848b8605Smrg pipe->set_vertex_buffers(pipe, start_slot, count, 1132848b8605Smrg mgr->real_vertex_buffer + start_slot); 1133848b8605Smrg mgr->dirty_real_vb_mask = 0; 1134848b8605Smrg} 1135848b8605Smrg 1136b8e80941Smrgstatic void 1137b8e80941Smrgu_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, 1138b8e80941Smrg unsigned *indirect_data, unsigned stride, 1139b8e80941Smrg unsigned draw_count) 1140b8e80941Smrg{ 1141b8e80941Smrg assert(info->index_size); 1142b8e80941Smrg info->indirect = NULL; 1143b8e80941Smrg 1144b8e80941Smrg for (unsigned i = 0; i < draw_count; i++) { 1145b8e80941Smrg unsigned offset = i * stride / 4; 1146b8e80941Smrg 1147b8e80941Smrg info->count = indirect_data[offset + 0]; 1148b8e80941Smrg info->instance_count = indirect_data[offset + 1]; 1149b8e80941Smrg 1150b8e80941Smrg if (!info->count || !info->instance_count) 1151b8e80941Smrg continue; 1152b8e80941Smrg 1153b8e80941Smrg info->start = indirect_data[offset + 2]; 1154b8e80941Smrg info->index_bias = indirect_data[offset + 3]; 1155b8e80941Smrg info->start_instance = indirect_data[offset + 4]; 1156b8e80941Smrg 1157b8e80941Smrg u_vbuf_draw_vbo(mgr, info); 1158b8e80941Smrg } 1159b8e80941Smrg} 1160b8e80941Smrg 1161848b8605Smrgvoid u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) 1162848b8605Smrg{ 1163848b8605Smrg struct pipe_context *pipe = mgr->pipe; 1164b8e80941Smrg int start_vertex; 1165b8e80941Smrg unsigned min_index; 1166848b8605Smrg unsigned num_vertices; 1167848b8605Smrg boolean unroll_indices = FALSE; 1168b8e80941Smrg const uint32_t used_vb_mask = mgr->ve->used_vb_mask; 1169848b8605Smrg uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; 1170b8e80941Smrg const uint32_t incompatible_vb_mask = 1171b8e80941Smrg mgr->incompatible_vb_mask & used_vb_mask; 1172848b8605Smrg struct pipe_draw_info new_info; 1173848b8605Smrg 1174848b8605Smrg /* Normal draw. No fallback and no user buffers. */ 1175848b8605Smrg if (!incompatible_vb_mask && 1176848b8605Smrg !mgr->ve->incompatible_elem_mask && 1177848b8605Smrg !user_vb_mask) { 1178848b8605Smrg 1179848b8605Smrg /* Set vertex buffers if needed. */ 1180848b8605Smrg if (mgr->dirty_real_vb_mask & used_vb_mask) { 1181848b8605Smrg u_vbuf_set_driver_vertex_buffers(mgr); 1182848b8605Smrg } 1183848b8605Smrg 1184848b8605Smrg pipe->draw_vbo(pipe, info); 1185848b8605Smrg return; 1186848b8605Smrg } 1187848b8605Smrg 1188848b8605Smrg new_info = *info; 1189848b8605Smrg 1190b8e80941Smrg /* Handle indirect (multi)draws. */ 1191848b8605Smrg if (new_info.indirect) { 1192b8e80941Smrg const struct pipe_draw_indirect_info *indirect = new_info.indirect; 1193b8e80941Smrg unsigned draw_count = 0; 1194b8e80941Smrg 1195b8e80941Smrg /* Get the number of draws. */ 1196b8e80941Smrg if (indirect->indirect_draw_count) { 1197b8e80941Smrg pipe_buffer_read(pipe, indirect->indirect_draw_count, 1198b8e80941Smrg indirect->indirect_draw_count_offset, 1199b8e80941Smrg 4, &draw_count); 1200b8e80941Smrg } else { 1201b8e80941Smrg draw_count = indirect->draw_count; 1202848b8605Smrg } 1203848b8605Smrg 1204b8e80941Smrg if (!draw_count) 1205b8e80941Smrg return; 1206b8e80941Smrg 1207b8e80941Smrg unsigned data_size = (draw_count - 1) * indirect->stride + 1208b8e80941Smrg (new_info.index_size ? 20 : 16); 1209b8e80941Smrg unsigned *data = malloc(data_size); 1210b8e80941Smrg if (!data) 1211b8e80941Smrg return; /* report an error? */ 1212b8e80941Smrg 1213b8e80941Smrg /* Read the used buffer range only once, because the read can be 1214b8e80941Smrg * uncached. 1215b8e80941Smrg */ 1216b8e80941Smrg pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size, 1217b8e80941Smrg data); 1218b8e80941Smrg 1219b8e80941Smrg if (info->index_size) { 1220b8e80941Smrg /* Indexed multidraw. */ 1221b8e80941Smrg unsigned index_bias0 = data[3]; 1222b8e80941Smrg bool index_bias_same = true; 1223b8e80941Smrg 1224b8e80941Smrg /* If we invoke the translate path, we have to split the multidraw. */ 1225b8e80941Smrg if (incompatible_vb_mask || 1226b8e80941Smrg mgr->ve->incompatible_elem_mask) { 1227b8e80941Smrg u_vbuf_split_indexed_multidraw(mgr, &new_info, data, 1228b8e80941Smrg indirect->stride, draw_count); 1229b8e80941Smrg free(data); 1230b8e80941Smrg return; 1231b8e80941Smrg } 1232b8e80941Smrg 1233b8e80941Smrg /* See if index_bias is the same for all draws. */ 1234b8e80941Smrg for (unsigned i = 1; i < draw_count; i++) { 1235b8e80941Smrg if (data[i * indirect->stride / 4 + 3] != index_bias0) { 1236b8e80941Smrg index_bias_same = false; 1237b8e80941Smrg break; 1238b8e80941Smrg } 1239b8e80941Smrg } 1240b8e80941Smrg 1241b8e80941Smrg /* Split the multidraw if index_bias is different. */ 1242b8e80941Smrg if (!index_bias_same) { 1243b8e80941Smrg u_vbuf_split_indexed_multidraw(mgr, &new_info, data, 1244b8e80941Smrg indirect->stride, draw_count); 1245b8e80941Smrg free(data); 1246b8e80941Smrg return; 1247b8e80941Smrg } 1248b8e80941Smrg 1249b8e80941Smrg /* If we don't need to use the translate path and index_bias is 1250b8e80941Smrg * the same, we can process the multidraw with the time complexity 1251b8e80941Smrg * equal to 1 draw call (except for the index range computation). 1252b8e80941Smrg * We only need to compute the index range covering all draw calls 1253b8e80941Smrg * of the multidraw. 1254b8e80941Smrg * 1255b8e80941Smrg * The driver will not look at these values because indirect != NULL. 1256b8e80941Smrg * These values determine the user buffer bounds to upload. 1257b8e80941Smrg */ 1258b8e80941Smrg new_info.index_bias = index_bias0; 1259b8e80941Smrg new_info.min_index = ~0u; 1260b8e80941Smrg new_info.max_index = 0; 1261b8e80941Smrg new_info.start_instance = ~0u; 1262b8e80941Smrg unsigned end_instance = 0; 1263b8e80941Smrg 1264b8e80941Smrg struct pipe_transfer *transfer = NULL; 1265b8e80941Smrg const uint8_t *indices; 1266b8e80941Smrg 1267b8e80941Smrg if (info->has_user_indices) { 1268b8e80941Smrg indices = (uint8_t*)info->index.user; 1269b8e80941Smrg } else { 1270b8e80941Smrg indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, 1271b8e80941Smrg PIPE_TRANSFER_READ, &transfer); 1272b8e80941Smrg } 1273b8e80941Smrg 1274b8e80941Smrg for (unsigned i = 0; i < draw_count; i++) { 1275b8e80941Smrg unsigned offset = i * indirect->stride / 4; 1276b8e80941Smrg unsigned start = data[offset + 2]; 1277b8e80941Smrg unsigned count = data[offset + 0]; 1278b8e80941Smrg unsigned start_instance = data[offset + 4]; 1279b8e80941Smrg unsigned instance_count = data[offset + 1]; 1280b8e80941Smrg 1281b8e80941Smrg if (!count || !instance_count) 1282b8e80941Smrg continue; 1283b8e80941Smrg 1284b8e80941Smrg /* Update the ranges of instances. */ 1285b8e80941Smrg new_info.start_instance = MIN2(new_info.start_instance, 1286b8e80941Smrg start_instance); 1287b8e80941Smrg end_instance = MAX2(end_instance, start_instance + instance_count); 1288b8e80941Smrg 1289b8e80941Smrg /* Update the index range. */ 1290b8e80941Smrg unsigned min, max; 1291b8e80941Smrg new_info.count = count; /* only used by get_minmax_index */ 1292b8e80941Smrg u_vbuf_get_minmax_index_mapped(&new_info, 1293b8e80941Smrg indices + 1294b8e80941Smrg new_info.index_size * start, 1295b8e80941Smrg &min, &max); 1296b8e80941Smrg 1297b8e80941Smrg new_info.min_index = MIN2(new_info.min_index, min); 1298b8e80941Smrg new_info.max_index = MAX2(new_info.max_index, max); 1299b8e80941Smrg } 1300b8e80941Smrg free(data); 1301b8e80941Smrg 1302b8e80941Smrg if (transfer) 1303b8e80941Smrg pipe_buffer_unmap(pipe, transfer); 1304b8e80941Smrg 1305b8e80941Smrg /* Set the final instance count. */ 1306b8e80941Smrg new_info.instance_count = end_instance - new_info.start_instance; 1307b8e80941Smrg 1308b8e80941Smrg if (new_info.start_instance == ~0u || !new_info.instance_count) 1309b8e80941Smrg return; 1310b8e80941Smrg } else { 1311b8e80941Smrg /* Non-indexed multidraw. 1312b8e80941Smrg * 1313b8e80941Smrg * Keep the draw call indirect and compute minimums & maximums, 1314b8e80941Smrg * which will determine the user buffer bounds to upload, but 1315b8e80941Smrg * the driver will not look at these values because indirect != NULL. 1316b8e80941Smrg * 1317b8e80941Smrg * This efficiently processes the multidraw with the time complexity 1318b8e80941Smrg * equal to 1 draw call. 1319b8e80941Smrg */ 1320b8e80941Smrg new_info.start = ~0u; 1321b8e80941Smrg new_info.start_instance = ~0u; 1322b8e80941Smrg unsigned end_vertex = 0; 1323b8e80941Smrg unsigned end_instance = 0; 1324b8e80941Smrg 1325b8e80941Smrg for (unsigned i = 0; i < draw_count; i++) { 1326b8e80941Smrg unsigned offset = i * indirect->stride / 4; 1327b8e80941Smrg unsigned start = data[offset + 2]; 1328b8e80941Smrg unsigned count = data[offset + 0]; 1329b8e80941Smrg unsigned start_instance = data[offset + 3]; 1330b8e80941Smrg unsigned instance_count = data[offset + 1]; 1331b8e80941Smrg 1332b8e80941Smrg new_info.start = MIN2(new_info.start, start); 1333b8e80941Smrg new_info.start_instance = MIN2(new_info.start_instance, 1334b8e80941Smrg start_instance); 1335b8e80941Smrg 1336b8e80941Smrg end_vertex = MAX2(end_vertex, start + count); 1337b8e80941Smrg end_instance = MAX2(end_instance, start_instance + instance_count); 1338b8e80941Smrg } 1339b8e80941Smrg free(data); 1340b8e80941Smrg 1341b8e80941Smrg /* Set the final counts. */ 1342b8e80941Smrg new_info.count = end_vertex - new_info.start; 1343b8e80941Smrg new_info.instance_count = end_instance - new_info.start_instance; 1344b8e80941Smrg 1345b8e80941Smrg if (new_info.start == ~0u || !new_info.count || !new_info.instance_count) 1346b8e80941Smrg return; 1347b8e80941Smrg } 1348848b8605Smrg } 1349848b8605Smrg 1350b8e80941Smrg if (new_info.index_size) { 1351848b8605Smrg /* See if anything needs to be done for per-vertex attribs. */ 1352848b8605Smrg if (u_vbuf_need_minmax_index(mgr)) { 1353b8e80941Smrg unsigned max_index; 1354848b8605Smrg 1355b8e80941Smrg if (new_info.max_index != ~0u) { 1356848b8605Smrg min_index = new_info.min_index; 1357848b8605Smrg max_index = new_info.max_index; 1358848b8605Smrg } else { 1359b8e80941Smrg u_vbuf_get_minmax_index(mgr->pipe, &new_info, 1360b8e80941Smrg &min_index, &max_index); 1361848b8605Smrg } 1362848b8605Smrg 1363848b8605Smrg assert(min_index <= max_index); 1364848b8605Smrg 1365848b8605Smrg start_vertex = min_index + new_info.index_bias; 1366848b8605Smrg num_vertices = max_index + 1 - min_index; 1367848b8605Smrg 1368848b8605Smrg /* Primitive restart doesn't work when unrolling indices. 1369848b8605Smrg * We would have to break this drawing operation into several ones. */ 1370848b8605Smrg /* Use some heuristic to see if unrolling indices improves 1371848b8605Smrg * performance. */ 1372b8e80941Smrg if (!info->indirect && 1373b8e80941Smrg !new_info.primitive_restart && 1374848b8605Smrg num_vertices > new_info.count*2 && 1375848b8605Smrg num_vertices - new_info.count > 32 && 1376848b8605Smrg !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { 1377848b8605Smrg unroll_indices = TRUE; 1378848b8605Smrg user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & 1379848b8605Smrg mgr->ve->noninstance_vb_mask_any); 1380848b8605Smrg } 1381848b8605Smrg } else { 1382848b8605Smrg /* Nothing to do for per-vertex attribs. */ 1383848b8605Smrg start_vertex = 0; 1384848b8605Smrg num_vertices = 0; 1385848b8605Smrg min_index = 0; 1386848b8605Smrg } 1387848b8605Smrg } else { 1388848b8605Smrg start_vertex = new_info.start; 1389848b8605Smrg num_vertices = new_info.count; 1390848b8605Smrg min_index = 0; 1391848b8605Smrg } 1392848b8605Smrg 1393848b8605Smrg /* Translate vertices with non-native layouts or formats. */ 1394848b8605Smrg if (unroll_indices || 1395848b8605Smrg incompatible_vb_mask || 1396848b8605Smrg mgr->ve->incompatible_elem_mask) { 1397b8e80941Smrg if (!u_vbuf_translate_begin(mgr, &new_info, start_vertex, num_vertices, 1398b8e80941Smrg min_index, unroll_indices)) { 1399848b8605Smrg debug_warn_once("u_vbuf_translate_begin() failed"); 1400848b8605Smrg return; 1401848b8605Smrg } 1402848b8605Smrg 1403848b8605Smrg if (unroll_indices) { 1404b8e80941Smrg new_info.index_size = 0; 1405848b8605Smrg new_info.index_bias = 0; 1406848b8605Smrg new_info.min_index = 0; 1407848b8605Smrg new_info.max_index = new_info.count - 1; 1408848b8605Smrg new_info.start = 0; 1409848b8605Smrg } 1410848b8605Smrg 1411848b8605Smrg user_vb_mask &= ~(incompatible_vb_mask | 1412848b8605Smrg mgr->ve->incompatible_vb_mask_all); 1413848b8605Smrg } 1414848b8605Smrg 1415848b8605Smrg /* Upload user buffers. */ 1416848b8605Smrg if (user_vb_mask) { 1417848b8605Smrg if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, 1418848b8605Smrg new_info.start_instance, 1419848b8605Smrg new_info.instance_count) != PIPE_OK) { 1420848b8605Smrg debug_warn_once("u_vbuf_upload_buffers() failed"); 1421848b8605Smrg return; 1422848b8605Smrg } 1423848b8605Smrg 1424848b8605Smrg mgr->dirty_real_vb_mask |= user_vb_mask; 1425848b8605Smrg } 1426848b8605Smrg 1427848b8605Smrg /* 1428848b8605Smrg if (unroll_indices) { 1429848b8605Smrg printf("unrolling indices: start_vertex = %i, num_vertices = %i\n", 1430848b8605Smrg start_vertex, num_vertices); 1431848b8605Smrg util_dump_draw_info(stdout, info); 1432848b8605Smrg printf("\n"); 1433848b8605Smrg } 1434848b8605Smrg 1435848b8605Smrg unsigned i; 1436848b8605Smrg for (i = 0; i < mgr->nr_vertex_buffers; i++) { 1437848b8605Smrg printf("input %i: ", i); 1438848b8605Smrg util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i); 1439848b8605Smrg printf("\n"); 1440848b8605Smrg } 1441848b8605Smrg for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { 1442848b8605Smrg printf("real %i: ", i); 1443848b8605Smrg util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i); 1444848b8605Smrg printf("\n"); 1445848b8605Smrg } 1446848b8605Smrg */ 1447848b8605Smrg 1448b8e80941Smrg u_upload_unmap(pipe->stream_uploader); 1449848b8605Smrg u_vbuf_set_driver_vertex_buffers(mgr); 1450848b8605Smrg 1451848b8605Smrg pipe->draw_vbo(pipe, &new_info); 1452848b8605Smrg 1453848b8605Smrg if (mgr->using_translate) { 1454848b8605Smrg u_vbuf_translate_end(mgr); 1455848b8605Smrg } 1456848b8605Smrg} 1457848b8605Smrg 1458848b8605Smrgvoid u_vbuf_save_vertex_elements(struct u_vbuf *mgr) 1459848b8605Smrg{ 1460848b8605Smrg assert(!mgr->ve_saved); 1461848b8605Smrg mgr->ve_saved = mgr->ve; 1462848b8605Smrg} 1463848b8605Smrg 1464848b8605Smrgvoid u_vbuf_restore_vertex_elements(struct u_vbuf *mgr) 1465848b8605Smrg{ 1466848b8605Smrg if (mgr->ve != mgr->ve_saved) { 1467848b8605Smrg struct pipe_context *pipe = mgr->pipe; 1468848b8605Smrg 1469848b8605Smrg mgr->ve = mgr->ve_saved; 1470848b8605Smrg pipe->bind_vertex_elements_state(pipe, 1471848b8605Smrg mgr->ve ? mgr->ve->driver_cso : NULL); 1472848b8605Smrg } 1473848b8605Smrg mgr->ve_saved = NULL; 1474848b8605Smrg} 1475848b8605Smrg 1476b8e80941Smrgvoid u_vbuf_save_vertex_buffer0(struct u_vbuf *mgr) 1477848b8605Smrg{ 1478b8e80941Smrg pipe_vertex_buffer_reference(&mgr->vertex_buffer0_saved, 1479b8e80941Smrg &mgr->vertex_buffer[0]); 1480848b8605Smrg} 1481848b8605Smrg 1482b8e80941Smrgvoid u_vbuf_restore_vertex_buffer0(struct u_vbuf *mgr) 1483848b8605Smrg{ 1484b8e80941Smrg u_vbuf_set_vertex_buffers(mgr, 0, 1, &mgr->vertex_buffer0_saved); 1485b8e80941Smrg pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); 1486848b8605Smrg} 1487