u_vbuf.c revision 01e04c3f
1/************************************************************************** 2 * 3 * Copyright 2011 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * This module uploads user buffers and translates the vertex buffers which 30 * contain incompatible vertices (i.e. not supported by the driver/hardware) 31 * into compatible ones, based on the Gallium CAPs. 32 * 33 * It does not upload index buffers. 34 * 35 * The module heavily uses bitmasks to represent per-buffer and 36 * per-vertex-element flags to avoid looping over the list of buffers just 37 * to see if there's a non-zero stride, or user buffer, or unsupported format, 38 * etc. 39 * 40 * There are 3 categories of vertex elements, which are processed separately: 41 * - per-vertex attribs (stride != 0, instance_divisor == 0) 42 * - instanced attribs (stride != 0, instance_divisor > 0) 43 * - constant attribs (stride == 0) 44 * 45 * All needed uploads and translations are performed every draw command, but 46 * only the subset of vertices needed for that draw command is uploaded or 47 * translated. (the module never translates whole buffers) 48 * 49 * 50 * The module consists of two main parts: 51 * 52 * 53 * 1) Translate (u_vbuf_translate_begin/end) 54 * 55 * This is pretty much a vertex fetch fallback. It translates vertices from 56 * one vertex buffer to another in an unused vertex buffer slot. It does 57 * whatever is needed to make the vertices readable by the hardware (changes 58 * vertex formats and aligns offsets and strides). The translate module is 59 * used here. 60 * 61 * Each of the 3 categories is translated to a separate buffer. 62 * Only the [min_index, max_index] range is translated. For instanced attribs, 63 * the range is [start_instance, start_instance+instance_count]. For constant 64 * attribs, the range is [0, 1]. 65 * 66 * 67 * 2) User buffer uploading (u_vbuf_upload_buffers) 68 * 69 * Only the [min_index, max_index] range is uploaded (just like Translate) 70 * with a single memcpy. 71 * 72 * This method works best for non-indexed draw operations or indexed draw 73 * operations where the [min_index, max_index] range is not being way bigger 74 * than the vertex count. 75 * 76 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}), 77 * the per-vertex attribs are uploaded via the translate module, all packed 78 * into one vertex buffer, and the indexed draw call is turned into 79 * a non-indexed one in the process. This adds additional complexity 80 * to the translate part, but it prevents bad apps from bringing your frame 81 * rate down. 82 * 83 * 84 * If there is nothing to do, it forwards every command to the driver. 85 * The module also has its own CSO cache of vertex element states. 86 */ 87 88#include "util/u_vbuf.h" 89 90#include "util/u_dump.h" 91#include "util/u_format.h" 92#include "util/u_inlines.h" 93#include "util/u_memory.h" 94#include "util/u_upload_mgr.h" 95#include "translate/translate.h" 96#include "translate/translate_cache.h" 97#include "cso_cache/cso_cache.h" 98#include "cso_cache/cso_hash.h" 99 100struct u_vbuf_elements { 101 unsigned count; 102 struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; 103 104 unsigned src_format_size[PIPE_MAX_ATTRIBS]; 105 106 /* If (velem[i].src_format != native_format[i]), the vertex buffer 107 * referenced by the vertex element cannot be used for rendering and 108 * its vertex data must be translated to native_format[i]. */ 109 enum pipe_format native_format[PIPE_MAX_ATTRIBS]; 110 unsigned native_format_size[PIPE_MAX_ATTRIBS]; 111 112 /* Which buffers are used by the vertex element state. */ 113 uint32_t used_vb_mask; 114 /* This might mean two things: 115 * - src_format != native_format, as discussed above. 116 * - src_offset % 4 != 0 (if the caps don't allow such an offset). */ 117 uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib */ 118 /* Which buffer has at least one vertex element referencing it 119 * incompatible. */ 120 uint32_t incompatible_vb_mask_any; 121 /* Which buffer has all vertex elements referencing it incompatible. */ 122 uint32_t incompatible_vb_mask_all; 123 /* Which buffer has at least one vertex element referencing it 124 * compatible. */ 125 uint32_t compatible_vb_mask_any; 126 /* Which buffer has all vertex elements referencing it compatible. */ 127 uint32_t compatible_vb_mask_all; 128 129 /* Which buffer has at least one vertex element referencing it 130 * non-instanced. */ 131 uint32_t noninstance_vb_mask_any; 132 133 void *driver_cso; 134}; 135 136enum { 137 VB_VERTEX = 0, 138 VB_INSTANCE = 1, 139 VB_CONST = 2, 140 VB_NUM = 3 141}; 142 143struct u_vbuf { 144 struct u_vbuf_caps caps; 145 bool has_signed_vb_offset; 146 147 struct pipe_context *pipe; 148 struct translate_cache *translate_cache; 149 struct cso_cache *cso_cache; 150 151 /* This is what was set in set_vertex_buffers. 152 * May contain user buffers. */ 153 struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; 154 uint32_t enabled_vb_mask; 155 156 /* Saved vertex buffer. */ 157 struct pipe_vertex_buffer vertex_buffer0_saved; 158 159 /* Vertex buffers for the driver. 160 * There are usually no user buffers. */ 161 struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; 162 uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last 163 call of set_vertex_buffers */ 164 165 /* Vertex elements. */ 166 struct u_vbuf_elements *ve, *ve_saved; 167 168 /* Vertex elements used for the translate fallback. */ 169 struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS]; 170 /* If non-NULL, this is a vertex element state used for the translate 171 * fallback and therefore used for rendering too. */ 172 boolean using_translate; 173 /* The vertex buffer slot index where translated vertices have been 174 * stored in. */ 175 unsigned fallback_vbs[VB_NUM]; 176 177 /* Which buffer is a user buffer. */ 178 uint32_t user_vb_mask; /* each bit describes a corresp. buffer */ 179 /* Which buffer is incompatible (unaligned). */ 180 uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ 181 /* Which buffer has a non-zero stride. */ 182 uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ 183}; 184 185static void * 186u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 187 const struct pipe_vertex_element *attribs); 188static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso); 189 190static const struct { 191 enum pipe_format from, to; 192} vbuf_format_fallbacks[] = { 193 { PIPE_FORMAT_R32_FIXED, PIPE_FORMAT_R32_FLOAT }, 194 { PIPE_FORMAT_R32G32_FIXED, PIPE_FORMAT_R32G32_FLOAT }, 195 { PIPE_FORMAT_R32G32B32_FIXED, PIPE_FORMAT_R32G32B32_FLOAT }, 196 { PIPE_FORMAT_R32G32B32A32_FIXED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 197 { PIPE_FORMAT_R16_FLOAT, PIPE_FORMAT_R32_FLOAT }, 198 { PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 199 { PIPE_FORMAT_R16G16B16_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 200 { PIPE_FORMAT_R16G16B16A16_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 201 { PIPE_FORMAT_R64_FLOAT, PIPE_FORMAT_R32_FLOAT }, 202 { PIPE_FORMAT_R64G64_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 203 { PIPE_FORMAT_R64G64B64_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 204 { PIPE_FORMAT_R64G64B64A64_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 205 { PIPE_FORMAT_R32_UNORM, PIPE_FORMAT_R32_FLOAT }, 206 { PIPE_FORMAT_R32G32_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 207 { PIPE_FORMAT_R32G32B32_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 208 { PIPE_FORMAT_R32G32B32A32_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 209 { PIPE_FORMAT_R32_SNORM, PIPE_FORMAT_R32_FLOAT }, 210 { PIPE_FORMAT_R32G32_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 211 { PIPE_FORMAT_R32G32B32_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 212 { PIPE_FORMAT_R32G32B32A32_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 213 { PIPE_FORMAT_R32_USCALED, PIPE_FORMAT_R32_FLOAT }, 214 { PIPE_FORMAT_R32G32_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 215 { PIPE_FORMAT_R32G32B32_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 216 { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 217 { PIPE_FORMAT_R32_SSCALED, PIPE_FORMAT_R32_FLOAT }, 218 { PIPE_FORMAT_R32G32_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 219 { PIPE_FORMAT_R32G32B32_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 220 { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 221 { PIPE_FORMAT_R16_UNORM, PIPE_FORMAT_R32_FLOAT }, 222 { PIPE_FORMAT_R16G16_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 223 { PIPE_FORMAT_R16G16B16_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 224 { PIPE_FORMAT_R16G16B16A16_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 225 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R32_FLOAT }, 226 { PIPE_FORMAT_R16G16_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 227 { PIPE_FORMAT_R16G16B16_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 228 { PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 229 { PIPE_FORMAT_R16_USCALED, PIPE_FORMAT_R32_FLOAT }, 230 { PIPE_FORMAT_R16G16_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 231 { PIPE_FORMAT_R16G16B16_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 232 { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 233 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R32_FLOAT }, 234 { PIPE_FORMAT_R16G16_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 235 { PIPE_FORMAT_R16G16B16_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 236 { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 237 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R32_FLOAT }, 238 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 239 { PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 240 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 241 { PIPE_FORMAT_R8_SNORM, PIPE_FORMAT_R32_FLOAT }, 242 { PIPE_FORMAT_R8G8_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 243 { PIPE_FORMAT_R8G8B8_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 244 { PIPE_FORMAT_R8G8B8A8_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 245 { PIPE_FORMAT_R8_USCALED, PIPE_FORMAT_R32_FLOAT }, 246 { PIPE_FORMAT_R8G8_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 247 { PIPE_FORMAT_R8G8B8_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 248 { PIPE_FORMAT_R8G8B8A8_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 249 { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R32_FLOAT }, 250 { PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 251 { PIPE_FORMAT_R8G8B8_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 252 { PIPE_FORMAT_R8G8B8A8_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 253}; 254 255boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, 256 unsigned flags) 257{ 258 unsigned i; 259 boolean fallback = FALSE; 260 261 /* I'd rather have a bitfield of which formats are supported and a static 262 * table of the translations indexed by format, but since we don't have C99 263 * we can't easily make a sparsely-populated table indexed by format. So, 264 * we construct the sparse table here. 265 */ 266 for (i = 0; i < PIPE_FORMAT_COUNT; i++) 267 caps->format_translation[i] = i; 268 269 for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) { 270 enum pipe_format format = vbuf_format_fallbacks[i].from; 271 272 if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0, 273 PIPE_BIND_VERTEX_BUFFER)) { 274 caps->format_translation[format] = vbuf_format_fallbacks[i].to; 275 fallback = TRUE; 276 } 277 } 278 279 caps->buffer_offset_unaligned = 280 !screen->get_param(screen, 281 PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY); 282 caps->buffer_stride_unaligned = 283 !screen->get_param(screen, 284 PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY); 285 caps->velem_src_offset_unaligned = 286 !screen->get_param(screen, 287 PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY); 288 caps->user_vertex_buffers = 289 screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS); 290 291 if (!caps->buffer_offset_unaligned || 292 !caps->buffer_stride_unaligned || 293 !caps->velem_src_offset_unaligned || 294 (!(flags & U_VBUF_FLAG_NO_USER_VBOS) && !caps->user_vertex_buffers)) { 295 fallback = TRUE; 296 } 297 298 return fallback; 299} 300 301struct u_vbuf * 302u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps) 303{ 304 struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf); 305 306 mgr->caps = *caps; 307 mgr->pipe = pipe; 308 mgr->cso_cache = cso_cache_create(); 309 mgr->translate_cache = translate_cache_create(); 310 memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs)); 311 312 mgr->has_signed_vb_offset = 313 pipe->screen->get_param(pipe->screen, 314 PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET); 315 316 return mgr; 317} 318 319/* u_vbuf uses its own caching for vertex elements, because it needs to keep 320 * its own preprocessed state per vertex element CSO. */ 321static struct u_vbuf_elements * 322u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count, 323 const struct pipe_vertex_element *states) 324{ 325 struct pipe_context *pipe = mgr->pipe; 326 unsigned key_size, hash_key; 327 struct cso_hash_iter iter; 328 struct u_vbuf_elements *ve; 329 struct cso_velems_state velems_state; 330 331 /* need to include the count into the stored state data too. */ 332 key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); 333 velems_state.count = count; 334 memcpy(velems_state.velems, states, 335 sizeof(struct pipe_vertex_element) * count); 336 hash_key = cso_construct_key((void*)&velems_state, key_size); 337 iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS, 338 (void*)&velems_state, key_size); 339 340 if (cso_hash_iter_is_null(iter)) { 341 struct cso_velements *cso = MALLOC_STRUCT(cso_velements); 342 memcpy(&cso->state, &velems_state, key_size); 343 cso->data = u_vbuf_create_vertex_elements(mgr, count, states); 344 cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements; 345 cso->context = (void*)mgr; 346 347 iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso); 348 ve = cso->data; 349 } else { 350 ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data; 351 } 352 353 assert(ve); 354 355 if (ve != mgr->ve) 356 pipe->bind_vertex_elements_state(pipe, ve->driver_cso); 357 358 return ve; 359} 360 361void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, 362 const struct pipe_vertex_element *states) 363{ 364 mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states); 365} 366 367void u_vbuf_destroy(struct u_vbuf *mgr) 368{ 369 struct pipe_screen *screen = mgr->pipe->screen; 370 unsigned i; 371 const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX, 372 PIPE_SHADER_CAP_MAX_INPUTS); 373 374 mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL); 375 376 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 377 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); 378 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 379 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]); 380 381 pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); 382 383 translate_cache_destroy(mgr->translate_cache); 384 cso_cache_delete(mgr->cso_cache); 385 FREE(mgr); 386} 387 388static enum pipe_error 389u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, 390 const struct pipe_draw_info *info, 391 unsigned vb_mask, unsigned out_vb, 392 int start_vertex, unsigned num_vertices, 393 int min_index, boolean unroll_indices) 394{ 395 struct translate *tr; 396 struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; 397 struct pipe_resource *out_buffer = NULL; 398 uint8_t *out_map; 399 unsigned out_offset, mask; 400 401 /* Get a translate object. */ 402 tr = translate_cache_find(mgr->translate_cache, key); 403 404 /* Map buffers we want to translate. */ 405 mask = vb_mask; 406 while (mask) { 407 struct pipe_vertex_buffer *vb; 408 unsigned offset; 409 uint8_t *map; 410 unsigned i = u_bit_scan(&mask); 411 412 vb = &mgr->vertex_buffer[i]; 413 offset = vb->buffer_offset + vb->stride * start_vertex; 414 415 if (vb->is_user_buffer) { 416 map = (uint8_t*)vb->buffer.user + offset; 417 } else { 418 unsigned size = vb->stride ? num_vertices * vb->stride 419 : sizeof(double)*4; 420 421 if (offset + size > vb->buffer.resource->width0) { 422 /* Don't try to map past end of buffer. This often happens when 423 * we're translating an attribute that's at offset > 0 from the 424 * start of the vertex. If we'd subtract attrib's offset from 425 * the size, this probably wouldn't happen. 426 */ 427 size = vb->buffer.resource->width0 - offset; 428 429 /* Also adjust num_vertices. A common user error is to call 430 * glDrawRangeElements() with incorrect 'end' argument. The 'end 431 * value should be the max index value, but people often 432 * accidentally add one to this value. This adjustment avoids 433 * crashing (by reading past the end of a hardware buffer mapping) 434 * when people do that. 435 */ 436 num_vertices = (size + vb->stride - 1) / vb->stride; 437 } 438 439 map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size, 440 PIPE_TRANSFER_READ, &vb_transfer[i]); 441 } 442 443 /* Subtract min_index so that indexing with the index buffer works. */ 444 if (unroll_indices) { 445 map -= (ptrdiff_t)vb->stride * min_index; 446 } 447 448 tr->set_buffer(tr, i, map, vb->stride, info->max_index); 449 } 450 451 /* Translate. */ 452 if (unroll_indices) { 453 struct pipe_transfer *transfer = NULL; 454 const unsigned offset = info->start * info->index_size; 455 uint8_t *map; 456 457 /* Create and map the output buffer. */ 458 u_upload_alloc(mgr->pipe->stream_uploader, 0, 459 key->output_stride * info->count, 4, 460 &out_offset, &out_buffer, 461 (void**)&out_map); 462 if (!out_buffer) 463 return PIPE_ERROR_OUT_OF_MEMORY; 464 465 if (info->has_user_indices) { 466 map = (uint8_t*)info->index.user + offset; 467 } else { 468 map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset, 469 info->count * info->index_size, 470 PIPE_TRANSFER_READ, &transfer); 471 } 472 473 switch (info->index_size) { 474 case 4: 475 tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map); 476 break; 477 case 2: 478 tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map); 479 break; 480 case 1: 481 tr->run_elts8(tr, map, info->count, 0, 0, out_map); 482 break; 483 } 484 485 if (transfer) { 486 pipe_buffer_unmap(mgr->pipe, transfer); 487 } 488 } else { 489 /* Create and map the output buffer. */ 490 u_upload_alloc(mgr->pipe->stream_uploader, 491 mgr->has_signed_vb_offset ? 492 0 : key->output_stride * start_vertex, 493 key->output_stride * num_vertices, 4, 494 &out_offset, &out_buffer, 495 (void**)&out_map); 496 if (!out_buffer) 497 return PIPE_ERROR_OUT_OF_MEMORY; 498 499 out_offset -= key->output_stride * start_vertex; 500 501 tr->run(tr, 0, num_vertices, 0, 0, out_map); 502 } 503 504 /* Unmap all buffers. */ 505 mask = vb_mask; 506 while (mask) { 507 unsigned i = u_bit_scan(&mask); 508 509 if (vb_transfer[i]) { 510 pipe_buffer_unmap(mgr->pipe, vb_transfer[i]); 511 } 512 } 513 514 /* Setup the new vertex buffer. */ 515 mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset; 516 mgr->real_vertex_buffer[out_vb].stride = key->output_stride; 517 518 /* Move the buffer reference. */ 519 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]); 520 mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer; 521 mgr->real_vertex_buffer[out_vb].is_user_buffer = false; 522 523 return PIPE_OK; 524} 525 526static boolean 527u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, 528 unsigned mask[VB_NUM]) 529{ 530 unsigned type; 531 unsigned fallback_vbs[VB_NUM]; 532 /* Set the bit for each buffer which is incompatible, or isn't set. */ 533 uint32_t unused_vb_mask = 534 mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | 535 ~mgr->enabled_vb_mask; 536 537 memset(fallback_vbs, ~0, sizeof(fallback_vbs)); 538 539 /* Find free slots for each type if needed. */ 540 for (type = 0; type < VB_NUM; type++) { 541 if (mask[type]) { 542 uint32_t index; 543 544 if (!unused_vb_mask) { 545 return FALSE; 546 } 547 548 index = ffs(unused_vb_mask) - 1; 549 fallback_vbs[type] = index; 550 unused_vb_mask &= ~(1 << index); 551 /*printf("found slot=%i for type=%i\n", index, type);*/ 552 } 553 } 554 555 for (type = 0; type < VB_NUM; type++) { 556 if (mask[type]) { 557 mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type]; 558 } 559 } 560 561 memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs)); 562 return TRUE; 563} 564 565static boolean 566u_vbuf_translate_begin(struct u_vbuf *mgr, 567 const struct pipe_draw_info *info, 568 int start_vertex, unsigned num_vertices, 569 int min_index, boolean unroll_indices) 570{ 571 unsigned mask[VB_NUM] = {0}; 572 struct translate_key key[VB_NUM]; 573 unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ 574 unsigned i, type; 575 const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & 576 mgr->ve->used_vb_mask; 577 578 const int start[VB_NUM] = { 579 start_vertex, /* VERTEX */ 580 info->start_instance, /* INSTANCE */ 581 0 /* CONST */ 582 }; 583 584 const unsigned num[VB_NUM] = { 585 num_vertices, /* VERTEX */ 586 info->instance_count, /* INSTANCE */ 587 1 /* CONST */ 588 }; 589 590 memset(key, 0, sizeof(key)); 591 memset(elem_index, ~0, sizeof(elem_index)); 592 593 /* See if there are vertex attribs of each type to translate and 594 * which ones. */ 595 for (i = 0; i < mgr->ve->count; i++) { 596 unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index; 597 598 if (!mgr->vertex_buffer[vb_index].stride) { 599 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 600 !(incompatible_vb_mask & (1 << vb_index))) { 601 continue; 602 } 603 mask[VB_CONST] |= 1 << vb_index; 604 } else if (mgr->ve->ve[i].instance_divisor) { 605 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 606 !(incompatible_vb_mask & (1 << vb_index))) { 607 continue; 608 } 609 mask[VB_INSTANCE] |= 1 << vb_index; 610 } else { 611 if (!unroll_indices && 612 !(mgr->ve->incompatible_elem_mask & (1 << i)) && 613 !(incompatible_vb_mask & (1 << vb_index))) { 614 continue; 615 } 616 mask[VB_VERTEX] |= 1 << vb_index; 617 } 618 } 619 620 assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]); 621 622 /* Find free vertex buffer slots. */ 623 if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) { 624 return FALSE; 625 } 626 627 /* Initialize the translate keys. */ 628 for (i = 0; i < mgr->ve->count; i++) { 629 struct translate_key *k; 630 struct translate_element *te; 631 enum pipe_format output_format = mgr->ve->native_format[i]; 632 unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index; 633 bit = 1 << vb_index; 634 635 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 636 !(incompatible_vb_mask & (1 << vb_index)) && 637 (!unroll_indices || !(mask[VB_VERTEX] & bit))) { 638 continue; 639 } 640 641 /* Set type to what we will translate. 642 * Whether vertex, instance, or constant attribs. */ 643 for (type = 0; type < VB_NUM; type++) { 644 if (mask[type] & bit) { 645 break; 646 } 647 } 648 assert(type < VB_NUM); 649 if (mgr->ve->ve[i].src_format != output_format) 650 assert(translate_is_output_format_supported(output_format)); 651 /*printf("velem=%i type=%i\n", i, type);*/ 652 653 /* Add the vertex element. */ 654 k = &key[type]; 655 elem_index[type][i] = k->nr_elements; 656 657 te = &k->element[k->nr_elements]; 658 te->type = TRANSLATE_ELEMENT_NORMAL; 659 te->instance_divisor = 0; 660 te->input_buffer = vb_index; 661 te->input_format = mgr->ve->ve[i].src_format; 662 te->input_offset = mgr->ve->ve[i].src_offset; 663 te->output_format = output_format; 664 te->output_offset = k->output_stride; 665 666 k->output_stride += mgr->ve->native_format_size[i]; 667 k->nr_elements++; 668 } 669 670 /* Translate buffers. */ 671 for (type = 0; type < VB_NUM; type++) { 672 if (key[type].nr_elements) { 673 enum pipe_error err; 674 err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type], 675 mgr->fallback_vbs[type], 676 start[type], num[type], min_index, 677 unroll_indices && type == VB_VERTEX); 678 if (err != PIPE_OK) 679 return FALSE; 680 681 /* Fixup the stride for constant attribs. */ 682 if (type == VB_CONST) { 683 mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0; 684 } 685 } 686 } 687 688 /* Setup new vertex elements. */ 689 for (i = 0; i < mgr->ve->count; i++) { 690 for (type = 0; type < VB_NUM; type++) { 691 if (elem_index[type][i] < key[type].nr_elements) { 692 struct translate_element *te = &key[type].element[elem_index[type][i]]; 693 mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; 694 mgr->fallback_velems[i].src_format = te->output_format; 695 mgr->fallback_velems[i].src_offset = te->output_offset; 696 mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; 697 698 /* elem_index[type][i] can only be set for one type. */ 699 assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u); 700 assert(type > VB_VERTEX || elem_index[type+2][i] == ~0u); 701 break; 702 } 703 } 704 /* No translating, just copy the original vertex element over. */ 705 if (type == VB_NUM) { 706 memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i], 707 sizeof(struct pipe_vertex_element)); 708 } 709 } 710 711 u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count, 712 mgr->fallback_velems); 713 mgr->using_translate = TRUE; 714 return TRUE; 715} 716 717static void u_vbuf_translate_end(struct u_vbuf *mgr) 718{ 719 unsigned i; 720 721 /* Restore vertex elements. */ 722 mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso); 723 mgr->using_translate = FALSE; 724 725 /* Unreference the now-unused VBOs. */ 726 for (i = 0; i < VB_NUM; i++) { 727 unsigned vb = mgr->fallback_vbs[i]; 728 if (vb != ~0u) { 729 pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL); 730 mgr->fallback_vbs[i] = ~0; 731 732 /* This will cause the buffer to be unbound in the driver later. */ 733 mgr->dirty_real_vb_mask |= 1 << vb; 734 } 735 } 736} 737 738static void * 739u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 740 const struct pipe_vertex_element *attribs) 741{ 742 struct pipe_context *pipe = mgr->pipe; 743 unsigned i; 744 struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS]; 745 struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements); 746 uint32_t used_buffers = 0; 747 748 ve->count = count; 749 750 memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count); 751 memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count); 752 753 /* Set the best native format in case the original format is not 754 * supported. */ 755 for (i = 0; i < count; i++) { 756 enum pipe_format format = ve->ve[i].src_format; 757 758 ve->src_format_size[i] = util_format_get_blocksize(format); 759 760 used_buffers |= 1 << ve->ve[i].vertex_buffer_index; 761 762 if (!ve->ve[i].instance_divisor) { 763 ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 764 } 765 766 format = mgr->caps.format_translation[format]; 767 768 driver_attribs[i].src_format = format; 769 ve->native_format[i] = format; 770 ve->native_format_size[i] = 771 util_format_get_blocksize(ve->native_format[i]); 772 773 if (ve->ve[i].src_format != format || 774 (!mgr->caps.velem_src_offset_unaligned && 775 ve->ve[i].src_offset % 4 != 0)) { 776 ve->incompatible_elem_mask |= 1 << i; 777 ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 778 } else { 779 ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 780 } 781 } 782 783 ve->used_vb_mask = used_buffers; 784 ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers; 785 ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers; 786 787 /* Align the formats and offsets to the size of DWORD if needed. */ 788 if (!mgr->caps.velem_src_offset_unaligned) { 789 for (i = 0; i < count; i++) { 790 ve->native_format_size[i] = align(ve->native_format_size[i], 4); 791 driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4); 792 } 793 } 794 795 ve->driver_cso = 796 pipe->create_vertex_elements_state(pipe, count, driver_attribs); 797 return ve; 798} 799 800static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso) 801{ 802 struct pipe_context *pipe = mgr->pipe; 803 struct u_vbuf_elements *ve = cso; 804 805 pipe->delete_vertex_elements_state(pipe, ve->driver_cso); 806 FREE(ve); 807} 808 809void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, 810 unsigned start_slot, unsigned count, 811 const struct pipe_vertex_buffer *bufs) 812{ 813 unsigned i; 814 /* which buffers are enabled */ 815 uint32_t enabled_vb_mask = 0; 816 /* which buffers are in user memory */ 817 uint32_t user_vb_mask = 0; 818 /* which buffers are incompatible with the driver */ 819 uint32_t incompatible_vb_mask = 0; 820 /* which buffers have a non-zero stride */ 821 uint32_t nonzero_stride_vb_mask = 0; 822 const uint32_t mask = ~(((1ull << count) - 1) << start_slot); 823 824 /* Zero out the bits we are going to rewrite completely. */ 825 mgr->user_vb_mask &= mask; 826 mgr->incompatible_vb_mask &= mask; 827 mgr->nonzero_stride_vb_mask &= mask; 828 mgr->enabled_vb_mask &= mask; 829 830 if (!bufs) { 831 struct pipe_context *pipe = mgr->pipe; 832 /* Unbind. */ 833 mgr->dirty_real_vb_mask &= mask; 834 835 for (i = 0; i < count; i++) { 836 unsigned dst_index = start_slot + i; 837 838 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); 839 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); 840 } 841 842 pipe->set_vertex_buffers(pipe, start_slot, count, NULL); 843 return; 844 } 845 846 for (i = 0; i < count; i++) { 847 unsigned dst_index = start_slot + i; 848 const struct pipe_vertex_buffer *vb = &bufs[i]; 849 struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index]; 850 struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index]; 851 852 if (!vb->buffer.resource) { 853 pipe_vertex_buffer_unreference(orig_vb); 854 pipe_vertex_buffer_unreference(real_vb); 855 continue; 856 } 857 858 pipe_vertex_buffer_reference(orig_vb, vb); 859 860 if (vb->stride) { 861 nonzero_stride_vb_mask |= 1 << dst_index; 862 } 863 enabled_vb_mask |= 1 << dst_index; 864 865 if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) || 866 (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) { 867 incompatible_vb_mask |= 1 << dst_index; 868 real_vb->buffer_offset = vb->buffer_offset; 869 real_vb->stride = vb->stride; 870 pipe_vertex_buffer_unreference(real_vb); 871 real_vb->is_user_buffer = false; 872 continue; 873 } 874 875 if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) { 876 user_vb_mask |= 1 << dst_index; 877 real_vb->buffer_offset = vb->buffer_offset; 878 real_vb->stride = vb->stride; 879 pipe_vertex_buffer_unreference(real_vb); 880 real_vb->is_user_buffer = false; 881 continue; 882 } 883 884 pipe_vertex_buffer_reference(real_vb, vb); 885 } 886 887 mgr->user_vb_mask |= user_vb_mask; 888 mgr->incompatible_vb_mask |= incompatible_vb_mask; 889 mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; 890 mgr->enabled_vb_mask |= enabled_vb_mask; 891 892 /* All changed buffers are marked as dirty, even the NULL ones, 893 * which will cause the NULL buffers to be unbound in the driver later. */ 894 mgr->dirty_real_vb_mask |= ~mask; 895} 896 897static enum pipe_error 898u_vbuf_upload_buffers(struct u_vbuf *mgr, 899 int start_vertex, unsigned num_vertices, 900 int start_instance, unsigned num_instances) 901{ 902 unsigned i; 903 unsigned nr_velems = mgr->ve->count; 904 const struct pipe_vertex_element *velems = 905 mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; 906 unsigned start_offset[PIPE_MAX_ATTRIBS]; 907 unsigned end_offset[PIPE_MAX_ATTRIBS]; 908 uint32_t buffer_mask = 0; 909 910 /* Determine how much data needs to be uploaded. */ 911 for (i = 0; i < nr_velems; i++) { 912 const struct pipe_vertex_element *velem = &velems[i]; 913 unsigned index = velem->vertex_buffer_index; 914 struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; 915 unsigned instance_div, first, size, index_bit; 916 917 /* Skip the buffers generated by translate. */ 918 if (index == mgr->fallback_vbs[VB_VERTEX] || 919 index == mgr->fallback_vbs[VB_INSTANCE] || 920 index == mgr->fallback_vbs[VB_CONST]) { 921 continue; 922 } 923 924 if (!vb->is_user_buffer) { 925 continue; 926 } 927 928 instance_div = velem->instance_divisor; 929 first = vb->buffer_offset + velem->src_offset; 930 931 if (!vb->stride) { 932 /* Constant attrib. */ 933 size = mgr->ve->src_format_size[i]; 934 } else if (instance_div) { 935 /* Per-instance attrib. */ 936 937 /* Figure out how many instances we'll render given instance_div. We 938 * can't use the typical div_round_up() pattern because the CTS uses 939 * instance_div = ~0 for a test, which overflows div_round_up()'s 940 * addition. 941 */ 942 unsigned count = num_instances / instance_div; 943 if (count * instance_div != num_instances) 944 count++; 945 946 first += vb->stride * start_instance; 947 size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; 948 } else { 949 /* Per-vertex attrib. */ 950 first += vb->stride * start_vertex; 951 size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; 952 } 953 954 index_bit = 1 << index; 955 956 /* Update offsets. */ 957 if (!(buffer_mask & index_bit)) { 958 start_offset[index] = first; 959 end_offset[index] = first + size; 960 } else { 961 if (first < start_offset[index]) 962 start_offset[index] = first; 963 if (first + size > end_offset[index]) 964 end_offset[index] = first + size; 965 } 966 967 buffer_mask |= index_bit; 968 } 969 970 /* Upload buffers. */ 971 while (buffer_mask) { 972 unsigned start, end; 973 struct pipe_vertex_buffer *real_vb; 974 const uint8_t *ptr; 975 976 i = u_bit_scan(&buffer_mask); 977 978 start = start_offset[i]; 979 end = end_offset[i]; 980 assert(start < end); 981 982 real_vb = &mgr->real_vertex_buffer[i]; 983 ptr = mgr->vertex_buffer[i].buffer.user; 984 985 u_upload_data(mgr->pipe->stream_uploader, 986 mgr->has_signed_vb_offset ? 0 : start, 987 end - start, 4, 988 ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource); 989 if (!real_vb->buffer.resource) 990 return PIPE_ERROR_OUT_OF_MEMORY; 991 992 real_vb->buffer_offset -= start; 993 } 994 995 return PIPE_OK; 996} 997 998static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr) 999{ 1000 /* See if there are any per-vertex attribs which will be uploaded or 1001 * translated. Use bitmasks to get the info instead of looping over vertex 1002 * elements. */ 1003 return (mgr->ve->used_vb_mask & 1004 ((mgr->user_vb_mask | 1005 mgr->incompatible_vb_mask | 1006 mgr->ve->incompatible_vb_mask_any) & 1007 mgr->ve->noninstance_vb_mask_any & 1008 mgr->nonzero_stride_vb_mask)) != 0; 1009} 1010 1011static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) 1012{ 1013 /* Return true if there are hw buffers which don't need to be translated. 1014 * 1015 * We could query whether each buffer is busy, but that would 1016 * be way more costly than this. */ 1017 return (mgr->ve->used_vb_mask & 1018 (~mgr->user_vb_mask & 1019 ~mgr->incompatible_vb_mask & 1020 mgr->ve->compatible_vb_mask_all & 1021 mgr->ve->noninstance_vb_mask_any & 1022 mgr->nonzero_stride_vb_mask)) != 0; 1023} 1024 1025static void 1026u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, 1027 const void *indices, unsigned *out_min_index, 1028 unsigned *out_max_index) 1029{ 1030 unsigned max = 0; 1031 unsigned min = ~0u; 1032 1033 switch (info->index_size) { 1034 case 4: { 1035 const unsigned *ui_indices = (const unsigned*)indices; 1036 if (info->primitive_restart) { 1037 for (unsigned i = 0; i < info->count; i++) { 1038 if (ui_indices[i] != info->restart_index) { 1039 if (ui_indices[i] > max) max = ui_indices[i]; 1040 if (ui_indices[i] < min) min = ui_indices[i]; 1041 } 1042 } 1043 } 1044 else { 1045 for (unsigned i = 0; i < info->count; i++) { 1046 if (ui_indices[i] > max) max = ui_indices[i]; 1047 if (ui_indices[i] < min) min = ui_indices[i]; 1048 } 1049 } 1050 break; 1051 } 1052 case 2: { 1053 const unsigned short *us_indices = (const unsigned short*)indices; 1054 if (info->primitive_restart) { 1055 for (unsigned i = 0; i < info->count; i++) { 1056 if (us_indices[i] != info->restart_index) { 1057 if (us_indices[i] > max) max = us_indices[i]; 1058 if (us_indices[i] < min) min = us_indices[i]; 1059 } 1060 } 1061 } 1062 else { 1063 for (unsigned i = 0; i < info->count; i++) { 1064 if (us_indices[i] > max) max = us_indices[i]; 1065 if (us_indices[i] < min) min = us_indices[i]; 1066 } 1067 } 1068 break; 1069 } 1070 case 1: { 1071 const unsigned char *ub_indices = (const unsigned char*)indices; 1072 if (info->primitive_restart) { 1073 for (unsigned i = 0; i < info->count; i++) { 1074 if (ub_indices[i] != info->restart_index) { 1075 if (ub_indices[i] > max) max = ub_indices[i]; 1076 if (ub_indices[i] < min) min = ub_indices[i]; 1077 } 1078 } 1079 } 1080 else { 1081 for (unsigned i = 0; i < info->count; i++) { 1082 if (ub_indices[i] > max) max = ub_indices[i]; 1083 if (ub_indices[i] < min) min = ub_indices[i]; 1084 } 1085 } 1086 break; 1087 } 1088 default: 1089 assert(0); 1090 } 1091 1092 *out_min_index = min; 1093 *out_max_index = max; 1094} 1095 1096static void 1097u_vbuf_get_minmax_index(struct pipe_context *pipe, 1098 const struct pipe_draw_info *info, 1099 unsigned *out_min_index, unsigned *out_max_index) 1100{ 1101 struct pipe_transfer *transfer = NULL; 1102 const void *indices; 1103 1104 if (info->has_user_indices) { 1105 indices = (uint8_t*)info->index.user + 1106 info->start * info->index_size; 1107 } else { 1108 indices = pipe_buffer_map_range(pipe, info->index.resource, 1109 info->start * info->index_size, 1110 info->count * info->index_size, 1111 PIPE_TRANSFER_READ, &transfer); 1112 } 1113 1114 u_vbuf_get_minmax_index_mapped(info, indices, out_min_index, out_max_index); 1115 1116 if (transfer) { 1117 pipe_buffer_unmap(pipe, transfer); 1118 } 1119} 1120 1121static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) 1122{ 1123 struct pipe_context *pipe = mgr->pipe; 1124 unsigned start_slot, count; 1125 1126 start_slot = ffs(mgr->dirty_real_vb_mask) - 1; 1127 count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot); 1128 1129 pipe->set_vertex_buffers(pipe, start_slot, count, 1130 mgr->real_vertex_buffer + start_slot); 1131 mgr->dirty_real_vb_mask = 0; 1132} 1133 1134static void 1135u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, 1136 unsigned *indirect_data, unsigned stride, 1137 unsigned draw_count) 1138{ 1139 assert(info->index_size); 1140 info->indirect = NULL; 1141 1142 for (unsigned i = 0; i < draw_count; i++) { 1143 unsigned offset = i * stride / 4; 1144 1145 info->count = indirect_data[offset + 0]; 1146 info->instance_count = indirect_data[offset + 1]; 1147 1148 if (!info->count || !info->instance_count) 1149 continue; 1150 1151 info->start = indirect_data[offset + 2]; 1152 info->index_bias = indirect_data[offset + 3]; 1153 info->start_instance = indirect_data[offset + 4]; 1154 1155 u_vbuf_draw_vbo(mgr, info); 1156 } 1157} 1158 1159void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) 1160{ 1161 struct pipe_context *pipe = mgr->pipe; 1162 int start_vertex; 1163 unsigned min_index; 1164 unsigned num_vertices; 1165 boolean unroll_indices = FALSE; 1166 const uint32_t used_vb_mask = mgr->ve->used_vb_mask; 1167 uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; 1168 const uint32_t incompatible_vb_mask = 1169 mgr->incompatible_vb_mask & used_vb_mask; 1170 struct pipe_draw_info new_info; 1171 1172 /* Normal draw. No fallback and no user buffers. */ 1173 if (!incompatible_vb_mask && 1174 !mgr->ve->incompatible_elem_mask && 1175 !user_vb_mask) { 1176 1177 /* Set vertex buffers if needed. */ 1178 if (mgr->dirty_real_vb_mask & used_vb_mask) { 1179 u_vbuf_set_driver_vertex_buffers(mgr); 1180 } 1181 1182 pipe->draw_vbo(pipe, info); 1183 return; 1184 } 1185 1186 new_info = *info; 1187 1188 /* Handle indirect (multi)draws. */ 1189 if (new_info.indirect) { 1190 const struct pipe_draw_indirect_info *indirect = new_info.indirect; 1191 unsigned draw_count = 0; 1192 1193 /* Get the number of draws. */ 1194 if (indirect->indirect_draw_count) { 1195 pipe_buffer_read(pipe, indirect->indirect_draw_count, 1196 indirect->indirect_draw_count_offset, 1197 4, &draw_count); 1198 } else { 1199 draw_count = indirect->draw_count; 1200 } 1201 1202 if (!draw_count) 1203 return; 1204 1205 unsigned data_size = (draw_count - 1) * indirect->stride + 1206 (new_info.index_size ? 20 : 16); 1207 unsigned *data = malloc(data_size); 1208 if (!data) 1209 return; /* report an error? */ 1210 1211 /* Read the used buffer range only once, because the read can be 1212 * uncached. 1213 */ 1214 pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size, 1215 data); 1216 1217 if (info->index_size) { 1218 /* Indexed multidraw. */ 1219 unsigned index_bias0 = data[3]; 1220 bool index_bias_same = true; 1221 1222 /* If we invoke the translate path, we have to split the multidraw. */ 1223 if (incompatible_vb_mask || 1224 mgr->ve->incompatible_elem_mask) { 1225 u_vbuf_split_indexed_multidraw(mgr, &new_info, data, 1226 indirect->stride, draw_count); 1227 free(data); 1228 return; 1229 } 1230 1231 /* See if index_bias is the same for all draws. */ 1232 for (unsigned i = 1; i < draw_count; i++) { 1233 if (data[i * indirect->stride / 4 + 3] != index_bias0) { 1234 index_bias_same = false; 1235 break; 1236 } 1237 } 1238 1239 /* Split the multidraw if index_bias is different. */ 1240 if (!index_bias_same) { 1241 u_vbuf_split_indexed_multidraw(mgr, &new_info, data, 1242 indirect->stride, draw_count); 1243 free(data); 1244 return; 1245 } 1246 1247 /* If we don't need to use the translate path and index_bias is 1248 * the same, we can process the multidraw with the time complexity 1249 * equal to 1 draw call (except for the index range computation). 1250 * We only need to compute the index range covering all draw calls 1251 * of the multidraw. 1252 * 1253 * The driver will not look at these values because indirect != NULL. 1254 * These values determine the user buffer bounds to upload. 1255 */ 1256 new_info.index_bias = index_bias0; 1257 new_info.min_index = ~0u; 1258 new_info.max_index = 0; 1259 new_info.start_instance = ~0u; 1260 unsigned end_instance = 0; 1261 1262 struct pipe_transfer *transfer = NULL; 1263 const uint8_t *indices; 1264 1265 if (info->has_user_indices) { 1266 indices = (uint8_t*)info->index.user; 1267 } else { 1268 indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, 1269 PIPE_TRANSFER_READ, &transfer); 1270 } 1271 1272 for (unsigned i = 0; i < draw_count; i++) { 1273 unsigned offset = i * indirect->stride / 4; 1274 unsigned start = data[offset + 2]; 1275 unsigned count = data[offset + 0]; 1276 unsigned start_instance = data[offset + 4]; 1277 unsigned instance_count = data[offset + 1]; 1278 1279 if (!count || !instance_count) 1280 continue; 1281 1282 /* Update the ranges of instances. */ 1283 new_info.start_instance = MIN2(new_info.start_instance, 1284 start_instance); 1285 end_instance = MAX2(end_instance, start_instance + instance_count); 1286 1287 /* Update the index range. */ 1288 unsigned min, max; 1289 new_info.count = count; /* only used by get_minmax_index */ 1290 u_vbuf_get_minmax_index_mapped(&new_info, 1291 indices + 1292 new_info.index_size * start, 1293 &min, &max); 1294 1295 new_info.min_index = MIN2(new_info.min_index, min); 1296 new_info.max_index = MAX2(new_info.max_index, max); 1297 } 1298 free(data); 1299 1300 if (transfer) 1301 pipe_buffer_unmap(pipe, transfer); 1302 1303 /* Set the final instance count. */ 1304 new_info.instance_count = end_instance - new_info.start_instance; 1305 1306 if (new_info.start_instance == ~0u || !new_info.instance_count) 1307 return; 1308 } else { 1309 /* Non-indexed multidraw. 1310 * 1311 * Keep the draw call indirect and compute minimums & maximums, 1312 * which will determine the user buffer bounds to upload, but 1313 * the driver will not look at these values because indirect != NULL. 1314 * 1315 * This efficiently processes the multidraw with the time complexity 1316 * equal to 1 draw call. 1317 */ 1318 new_info.start = ~0u; 1319 new_info.start_instance = ~0u; 1320 unsigned end_vertex = 0; 1321 unsigned end_instance = 0; 1322 1323 for (unsigned i = 0; i < draw_count; i++) { 1324 unsigned offset = i * indirect->stride / 4; 1325 unsigned start = data[offset + 2]; 1326 unsigned count = data[offset + 0]; 1327 unsigned start_instance = data[offset + 3]; 1328 unsigned instance_count = data[offset + 1]; 1329 1330 new_info.start = MIN2(new_info.start, start); 1331 new_info.start_instance = MIN2(new_info.start_instance, 1332 start_instance); 1333 1334 end_vertex = MAX2(end_vertex, start + count); 1335 end_instance = MAX2(end_instance, start_instance + instance_count); 1336 } 1337 free(data); 1338 1339 /* Set the final counts. */ 1340 new_info.count = end_vertex - new_info.start; 1341 new_info.instance_count = end_instance - new_info.start_instance; 1342 1343 if (new_info.start == ~0u || !new_info.count || !new_info.instance_count) 1344 return; 1345 } 1346 } 1347 1348 if (new_info.index_size) { 1349 /* See if anything needs to be done for per-vertex attribs. */ 1350 if (u_vbuf_need_minmax_index(mgr)) { 1351 unsigned max_index; 1352 1353 if (new_info.max_index != ~0u) { 1354 min_index = new_info.min_index; 1355 max_index = new_info.max_index; 1356 } else { 1357 u_vbuf_get_minmax_index(mgr->pipe, &new_info, 1358 &min_index, &max_index); 1359 } 1360 1361 assert(min_index <= max_index); 1362 1363 start_vertex = min_index + new_info.index_bias; 1364 num_vertices = max_index + 1 - min_index; 1365 1366 /* Primitive restart doesn't work when unrolling indices. 1367 * We would have to break this drawing operation into several ones. */ 1368 /* Use some heuristic to see if unrolling indices improves 1369 * performance. */ 1370 if (!info->indirect && 1371 !new_info.primitive_restart && 1372 num_vertices > new_info.count*2 && 1373 num_vertices - new_info.count > 32 && 1374 !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { 1375 unroll_indices = TRUE; 1376 user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & 1377 mgr->ve->noninstance_vb_mask_any); 1378 } 1379 } else { 1380 /* Nothing to do for per-vertex attribs. */ 1381 start_vertex = 0; 1382 num_vertices = 0; 1383 min_index = 0; 1384 } 1385 } else { 1386 start_vertex = new_info.start; 1387 num_vertices = new_info.count; 1388 min_index = 0; 1389 } 1390 1391 /* Translate vertices with non-native layouts or formats. */ 1392 if (unroll_indices || 1393 incompatible_vb_mask || 1394 mgr->ve->incompatible_elem_mask) { 1395 if (!u_vbuf_translate_begin(mgr, &new_info, start_vertex, num_vertices, 1396 min_index, unroll_indices)) { 1397 debug_warn_once("u_vbuf_translate_begin() failed"); 1398 return; 1399 } 1400 1401 if (unroll_indices) { 1402 new_info.index_size = 0; 1403 new_info.index_bias = 0; 1404 new_info.min_index = 0; 1405 new_info.max_index = new_info.count - 1; 1406 new_info.start = 0; 1407 } 1408 1409 user_vb_mask &= ~(incompatible_vb_mask | 1410 mgr->ve->incompatible_vb_mask_all); 1411 } 1412 1413 /* Upload user buffers. */ 1414 if (user_vb_mask) { 1415 if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, 1416 new_info.start_instance, 1417 new_info.instance_count) != PIPE_OK) { 1418 debug_warn_once("u_vbuf_upload_buffers() failed"); 1419 return; 1420 } 1421 1422 mgr->dirty_real_vb_mask |= user_vb_mask; 1423 } 1424 1425 /* 1426 if (unroll_indices) { 1427 printf("unrolling indices: start_vertex = %i, num_vertices = %i\n", 1428 start_vertex, num_vertices); 1429 util_dump_draw_info(stdout, info); 1430 printf("\n"); 1431 } 1432 1433 unsigned i; 1434 for (i = 0; i < mgr->nr_vertex_buffers; i++) { 1435 printf("input %i: ", i); 1436 util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i); 1437 printf("\n"); 1438 } 1439 for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { 1440 printf("real %i: ", i); 1441 util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i); 1442 printf("\n"); 1443 } 1444 */ 1445 1446 u_upload_unmap(pipe->stream_uploader); 1447 u_vbuf_set_driver_vertex_buffers(mgr); 1448 1449 pipe->draw_vbo(pipe, &new_info); 1450 1451 if (mgr->using_translate) { 1452 u_vbuf_translate_end(mgr); 1453 } 1454} 1455 1456void u_vbuf_save_vertex_elements(struct u_vbuf *mgr) 1457{ 1458 assert(!mgr->ve_saved); 1459 mgr->ve_saved = mgr->ve; 1460} 1461 1462void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr) 1463{ 1464 if (mgr->ve != mgr->ve_saved) { 1465 struct pipe_context *pipe = mgr->pipe; 1466 1467 mgr->ve = mgr->ve_saved; 1468 pipe->bind_vertex_elements_state(pipe, 1469 mgr->ve ? mgr->ve->driver_cso : NULL); 1470 } 1471 mgr->ve_saved = NULL; 1472} 1473 1474void u_vbuf_save_vertex_buffer0(struct u_vbuf *mgr) 1475{ 1476 pipe_vertex_buffer_reference(&mgr->vertex_buffer0_saved, 1477 &mgr->vertex_buffer[0]); 1478} 1479 1480void u_vbuf_restore_vertex_buffer0(struct u_vbuf *mgr) 1481{ 1482 u_vbuf_set_vertex_buffers(mgr, 0, 1, &mgr->vertex_buffer0_saved); 1483 pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); 1484} 1485