1/************************************************************************** 2 * 3 * Copyright 2011 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * This module uploads user buffers and translates the vertex buffers which 30 * contain incompatible vertices (i.e. not supported by the driver/hardware) 31 * into compatible ones, based on the Gallium CAPs. 32 * 33 * It does not upload index buffers. 34 * 35 * The module heavily uses bitmasks to represent per-buffer and 36 * per-vertex-element flags to avoid looping over the list of buffers just 37 * to see if there's a non-zero stride, or user buffer, or unsupported format, 38 * etc. 39 * 40 * There are 3 categories of vertex elements, which are processed separately: 41 * - per-vertex attribs (stride != 0, instance_divisor == 0) 42 * - instanced attribs (stride != 0, instance_divisor > 0) 43 * - constant attribs (stride == 0) 44 * 45 * All needed uploads and translations are performed every draw command, but 46 * only the subset of vertices needed for that draw command is uploaded or 47 * translated. (the module never translates whole buffers) 48 * 49 * 50 * The module consists of two main parts: 51 * 52 * 53 * 1) Translate (u_vbuf_translate_begin/end) 54 * 55 * This is pretty much a vertex fetch fallback. It translates vertices from 56 * one vertex buffer to another in an unused vertex buffer slot. It does 57 * whatever is needed to make the vertices readable by the hardware (changes 58 * vertex formats and aligns offsets and strides). The translate module is 59 * used here. 60 * 61 * Each of the 3 categories is translated to a separate buffer. 62 * Only the [min_index, max_index] range is translated. For instanced attribs, 63 * the range is [start_instance, start_instance+instance_count]. For constant 64 * attribs, the range is [0, 1]. 65 * 66 * 67 * 2) User buffer uploading (u_vbuf_upload_buffers) 68 * 69 * Only the [min_index, max_index] range is uploaded (just like Translate) 70 * with a single memcpy. 71 * 72 * This method works best for non-indexed draw operations or indexed draw 73 * operations where the [min_index, max_index] range is not being way bigger 74 * than the vertex count. 75 * 76 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}), 77 * the per-vertex attribs are uploaded via the translate module, all packed 78 * into one vertex buffer, and the indexed draw call is turned into 79 * a non-indexed one in the process. This adds additional complexity 80 * to the translate part, but it prevents bad apps from bringing your frame 81 * rate down. 82 * 83 * 84 * If there is nothing to do, it forwards every command to the driver. 85 * The module also has its own CSO cache of vertex element states. 86 */ 87 88#include "util/u_vbuf.h" 89 90#include "util/u_dump.h" 91#include "util/u_format.h" 92#include "util/u_inlines.h" 93#include "util/u_memory.h" 94#include "util/u_upload_mgr.h" 95#include "translate/translate.h" 96#include "translate/translate_cache.h" 97#include "cso_cache/cso_cache.h" 98#include "cso_cache/cso_hash.h" 99 100struct u_vbuf_elements { 101 unsigned count; 102 struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; 103 104 unsigned src_format_size[PIPE_MAX_ATTRIBS]; 105 106 /* If (velem[i].src_format != native_format[i]), the vertex buffer 107 * referenced by the vertex element cannot be used for rendering and 108 * its vertex data must be translated to native_format[i]. */ 109 enum pipe_format native_format[PIPE_MAX_ATTRIBS]; 110 unsigned native_format_size[PIPE_MAX_ATTRIBS]; 111 112 /* Which buffers are used by the vertex element state. */ 113 uint32_t used_vb_mask; 114 /* This might mean two things: 115 * - src_format != native_format, as discussed above. 116 * - src_offset % 4 != 0 (if the caps don't allow such an offset). */ 117 uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib */ 118 /* Which buffer has at least one vertex element referencing it 119 * incompatible. */ 120 uint32_t incompatible_vb_mask_any; 121 /* Which buffer has all vertex elements referencing it incompatible. */ 122 uint32_t incompatible_vb_mask_all; 123 /* Which buffer has at least one vertex element referencing it 124 * compatible. */ 125 uint32_t compatible_vb_mask_any; 126 /* Which buffer has all vertex elements referencing it compatible. */ 127 uint32_t compatible_vb_mask_all; 128 129 /* Which buffer has at least one vertex element referencing it 130 * non-instanced. */ 131 uint32_t noninstance_vb_mask_any; 132 133 void *driver_cso; 134}; 135 136enum { 137 VB_VERTEX = 0, 138 VB_INSTANCE = 1, 139 VB_CONST = 2, 140 VB_NUM = 3 141}; 142 143struct u_vbuf { 144 struct u_vbuf_caps caps; 145 bool has_signed_vb_offset; 146 147 struct pipe_context *pipe; 148 struct translate_cache *translate_cache; 149 struct cso_cache *cso_cache; 150 151 /* This is what was set in set_vertex_buffers. 152 * May contain user buffers. */ 153 struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; 154 uint32_t enabled_vb_mask; 155 156 /* Saved vertex buffer. */ 157 struct pipe_vertex_buffer vertex_buffer0_saved; 158 159 /* Vertex buffers for the driver. 160 * There are usually no user buffers. */ 161 struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; 162 uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last 163 call of set_vertex_buffers */ 164 165 /* Vertex elements. */ 166 struct u_vbuf_elements *ve, *ve_saved; 167 168 /* Vertex elements used for the translate fallback. */ 169 struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS]; 170 /* If non-NULL, this is a vertex element state used for the translate 171 * fallback and therefore used for rendering too. */ 172 boolean using_translate; 173 /* The vertex buffer slot index where translated vertices have been 174 * stored in. */ 175 unsigned fallback_vbs[VB_NUM]; 176 177 /* Which buffer is a user buffer. */ 178 uint32_t user_vb_mask; /* each bit describes a corresp. buffer */ 179 /* Which buffer is incompatible (unaligned). */ 180 uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ 181 /* Which buffer has a non-zero stride. */ 182 uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ 183}; 184 185static void * 186u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 187 const struct pipe_vertex_element *attribs); 188static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso); 189 190static const struct { 191 enum pipe_format from, to; 192} vbuf_format_fallbacks[] = { 193 { PIPE_FORMAT_R32_FIXED, PIPE_FORMAT_R32_FLOAT }, 194 { PIPE_FORMAT_R32G32_FIXED, PIPE_FORMAT_R32G32_FLOAT }, 195 { PIPE_FORMAT_R32G32B32_FIXED, PIPE_FORMAT_R32G32B32_FLOAT }, 196 { PIPE_FORMAT_R32G32B32A32_FIXED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 197 { PIPE_FORMAT_R16_FLOAT, PIPE_FORMAT_R32_FLOAT }, 198 { PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 199 { PIPE_FORMAT_R16G16B16_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 200 { PIPE_FORMAT_R16G16B16A16_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 201 { PIPE_FORMAT_R64_FLOAT, PIPE_FORMAT_R32_FLOAT }, 202 { PIPE_FORMAT_R64G64_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 203 { PIPE_FORMAT_R64G64B64_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 204 { PIPE_FORMAT_R64G64B64A64_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 205 { PIPE_FORMAT_R32_UNORM, PIPE_FORMAT_R32_FLOAT }, 206 { PIPE_FORMAT_R32G32_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 207 { PIPE_FORMAT_R32G32B32_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 208 { PIPE_FORMAT_R32G32B32A32_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 209 { PIPE_FORMAT_R32_SNORM, PIPE_FORMAT_R32_FLOAT }, 210 { PIPE_FORMAT_R32G32_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 211 { PIPE_FORMAT_R32G32B32_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 212 { PIPE_FORMAT_R32G32B32A32_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 213 { PIPE_FORMAT_R32_USCALED, PIPE_FORMAT_R32_FLOAT }, 214 { PIPE_FORMAT_R32G32_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 215 { PIPE_FORMAT_R32G32B32_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 216 { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 217 { PIPE_FORMAT_R32_SSCALED, PIPE_FORMAT_R32_FLOAT }, 218 { PIPE_FORMAT_R32G32_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 219 { PIPE_FORMAT_R32G32B32_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 220 { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 221 { PIPE_FORMAT_R16_UNORM, PIPE_FORMAT_R32_FLOAT }, 222 { PIPE_FORMAT_R16G16_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 223 { PIPE_FORMAT_R16G16B16_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 224 { PIPE_FORMAT_R16G16B16A16_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 225 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R32_FLOAT }, 226 { PIPE_FORMAT_R16G16_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 227 { PIPE_FORMAT_R16G16B16_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 228 { PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 229 { PIPE_FORMAT_R16_USCALED, PIPE_FORMAT_R32_FLOAT }, 230 { PIPE_FORMAT_R16G16_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 231 { PIPE_FORMAT_R16G16B16_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 232 { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 233 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R32_FLOAT }, 234 { PIPE_FORMAT_R16G16_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 235 { PIPE_FORMAT_R16G16B16_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 236 { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 237 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R32_FLOAT }, 238 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 239 { PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 240 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 241 { PIPE_FORMAT_R8_SNORM, PIPE_FORMAT_R32_FLOAT }, 242 { PIPE_FORMAT_R8G8_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 243 { PIPE_FORMAT_R8G8B8_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 244 { PIPE_FORMAT_R8G8B8A8_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 245 { PIPE_FORMAT_R8_USCALED, PIPE_FORMAT_R32_FLOAT }, 246 { PIPE_FORMAT_R8G8_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 247 { PIPE_FORMAT_R8G8B8_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 248 { PIPE_FORMAT_R8G8B8A8_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 249 { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R32_FLOAT }, 250 { PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 251 { PIPE_FORMAT_R8G8B8_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 252 { PIPE_FORMAT_R8G8B8A8_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 253}; 254 255boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, 256 unsigned flags) 257{ 258 unsigned i; 259 boolean fallback = FALSE; 260 261 /* I'd rather have a bitfield of which formats are supported and a static 262 * table of the translations indexed by format, but since we don't have C99 263 * we can't easily make a sparsely-populated table indexed by format. So, 264 * we construct the sparse table here. 265 */ 266 for (i = 0; i < PIPE_FORMAT_COUNT; i++) 267 caps->format_translation[i] = i; 268 269 for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) { 270 enum pipe_format format = vbuf_format_fallbacks[i].from; 271 272 if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0, 273 PIPE_BIND_VERTEX_BUFFER)) { 274 caps->format_translation[format] = vbuf_format_fallbacks[i].to; 275 fallback = TRUE; 276 } 277 } 278 279 caps->buffer_offset_unaligned = 280 !screen->get_param(screen, 281 PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY); 282 caps->buffer_stride_unaligned = 283 !screen->get_param(screen, 284 PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY); 285 caps->velem_src_offset_unaligned = 286 !screen->get_param(screen, 287 PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY); 288 caps->user_vertex_buffers = 289 screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS); 290 291 if (!caps->buffer_offset_unaligned || 292 !caps->buffer_stride_unaligned || 293 !caps->velem_src_offset_unaligned || 294 (!(flags & U_VBUF_FLAG_NO_USER_VBOS) && !caps->user_vertex_buffers)) { 295 fallback = TRUE; 296 } 297 298 return fallback; 299} 300 301struct u_vbuf * 302u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps) 303{ 304 struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf); 305 306 mgr->caps = *caps; 307 mgr->pipe = pipe; 308 mgr->cso_cache = cso_cache_create(); 309 mgr->translate_cache = translate_cache_create(); 310 memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs)); 311 312 mgr->has_signed_vb_offset = 313 pipe->screen->get_param(pipe->screen, 314 PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET); 315 316 return mgr; 317} 318 319/* u_vbuf uses its own caching for vertex elements, because it needs to keep 320 * its own preprocessed state per vertex element CSO. */ 321static struct u_vbuf_elements * 322u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count, 323 const struct pipe_vertex_element *states) 324{ 325 struct pipe_context *pipe = mgr->pipe; 326 unsigned key_size, hash_key; 327 struct cso_hash_iter iter; 328 struct u_vbuf_elements *ve; 329 struct cso_velems_state velems_state; 330 331 /* need to include the count into the stored state data too. */ 332 key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); 333 velems_state.count = count; 334 memcpy(velems_state.velems, states, 335 sizeof(struct pipe_vertex_element) * count); 336 hash_key = cso_construct_key((void*)&velems_state, key_size); 337 iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS, 338 (void*)&velems_state, key_size); 339 340 if (cso_hash_iter_is_null(iter)) { 341 struct cso_velements *cso = MALLOC_STRUCT(cso_velements); 342 memcpy(&cso->state, &velems_state, key_size); 343 cso->data = u_vbuf_create_vertex_elements(mgr, count, states); 344 cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements; 345 cso->context = (void*)mgr; 346 347 iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso); 348 ve = cso->data; 349 } else { 350 ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data; 351 } 352 353 assert(ve); 354 355 if (ve != mgr->ve) 356 pipe->bind_vertex_elements_state(pipe, ve->driver_cso); 357 358 return ve; 359} 360 361void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, 362 const struct pipe_vertex_element *states) 363{ 364 mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states); 365} 366 367void u_vbuf_destroy(struct u_vbuf *mgr) 368{ 369 struct pipe_screen *screen = mgr->pipe->screen; 370 unsigned i; 371 const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX, 372 PIPE_SHADER_CAP_MAX_INPUTS); 373 374 mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL); 375 376 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 377 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); 378 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 379 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]); 380 381 pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); 382 383 translate_cache_destroy(mgr->translate_cache); 384 cso_cache_delete(mgr->cso_cache); 385 FREE(mgr); 386} 387 388static enum pipe_error 389u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, 390 const struct pipe_draw_info *info, 391 unsigned vb_mask, unsigned out_vb, 392 int start_vertex, unsigned num_vertices, 393 int min_index, boolean unroll_indices) 394{ 395 struct translate *tr; 396 struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; 397 struct pipe_resource *out_buffer = NULL; 398 uint8_t *out_map; 399 unsigned out_offset, mask; 400 401 /* Get a translate object. */ 402 tr = translate_cache_find(mgr->translate_cache, key); 403 404 /* Map buffers we want to translate. */ 405 mask = vb_mask; 406 while (mask) { 407 struct pipe_vertex_buffer *vb; 408 unsigned offset; 409 uint8_t *map; 410 unsigned i = u_bit_scan(&mask); 411 412 vb = &mgr->vertex_buffer[i]; 413 offset = vb->buffer_offset + vb->stride * start_vertex; 414 415 if (vb->is_user_buffer) { 416 map = (uint8_t*)vb->buffer.user + offset; 417 } else { 418 unsigned size = vb->stride ? num_vertices * vb->stride 419 : sizeof(double)*4; 420 421 if (!vb->buffer.resource) 422 continue; 423 424 if (offset + size > vb->buffer.resource->width0) { 425 /* Don't try to map past end of buffer. This often happens when 426 * we're translating an attribute that's at offset > 0 from the 427 * start of the vertex. If we'd subtract attrib's offset from 428 * the size, this probably wouldn't happen. 429 */ 430 size = vb->buffer.resource->width0 - offset; 431 432 /* Also adjust num_vertices. A common user error is to call 433 * glDrawRangeElements() with incorrect 'end' argument. The 'end 434 * value should be the max index value, but people often 435 * accidentally add one to this value. This adjustment avoids 436 * crashing (by reading past the end of a hardware buffer mapping) 437 * when people do that. 438 */ 439 num_vertices = (size + vb->stride - 1) / vb->stride; 440 } 441 442 map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size, 443 PIPE_TRANSFER_READ, &vb_transfer[i]); 444 } 445 446 /* Subtract min_index so that indexing with the index buffer works. */ 447 if (unroll_indices) { 448 map -= (ptrdiff_t)vb->stride * min_index; 449 } 450 451 tr->set_buffer(tr, i, map, vb->stride, info->max_index); 452 } 453 454 /* Translate. */ 455 if (unroll_indices) { 456 struct pipe_transfer *transfer = NULL; 457 const unsigned offset = info->start * info->index_size; 458 uint8_t *map; 459 460 /* Create and map the output buffer. */ 461 u_upload_alloc(mgr->pipe->stream_uploader, 0, 462 key->output_stride * info->count, 4, 463 &out_offset, &out_buffer, 464 (void**)&out_map); 465 if (!out_buffer) 466 return PIPE_ERROR_OUT_OF_MEMORY; 467 468 if (info->has_user_indices) { 469 map = (uint8_t*)info->index.user + offset; 470 } else { 471 map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset, 472 info->count * info->index_size, 473 PIPE_TRANSFER_READ, &transfer); 474 } 475 476 switch (info->index_size) { 477 case 4: 478 tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map); 479 break; 480 case 2: 481 tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map); 482 break; 483 case 1: 484 tr->run_elts8(tr, map, info->count, 0, 0, out_map); 485 break; 486 } 487 488 if (transfer) { 489 pipe_buffer_unmap(mgr->pipe, transfer); 490 } 491 } else { 492 /* Create and map the output buffer. */ 493 u_upload_alloc(mgr->pipe->stream_uploader, 494 mgr->has_signed_vb_offset ? 495 0 : key->output_stride * start_vertex, 496 key->output_stride * num_vertices, 4, 497 &out_offset, &out_buffer, 498 (void**)&out_map); 499 if (!out_buffer) 500 return PIPE_ERROR_OUT_OF_MEMORY; 501 502 out_offset -= key->output_stride * start_vertex; 503 504 tr->run(tr, 0, num_vertices, 0, 0, out_map); 505 } 506 507 /* Unmap all buffers. */ 508 mask = vb_mask; 509 while (mask) { 510 unsigned i = u_bit_scan(&mask); 511 512 if (vb_transfer[i]) { 513 pipe_buffer_unmap(mgr->pipe, vb_transfer[i]); 514 } 515 } 516 517 /* Setup the new vertex buffer. */ 518 mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset; 519 mgr->real_vertex_buffer[out_vb].stride = key->output_stride; 520 521 /* Move the buffer reference. */ 522 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]); 523 mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer; 524 mgr->real_vertex_buffer[out_vb].is_user_buffer = false; 525 526 return PIPE_OK; 527} 528 529static boolean 530u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, 531 unsigned mask[VB_NUM]) 532{ 533 unsigned type; 534 unsigned fallback_vbs[VB_NUM]; 535 /* Set the bit for each buffer which is incompatible, or isn't set. */ 536 uint32_t unused_vb_mask = 537 mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | 538 ~mgr->enabled_vb_mask; 539 540 memset(fallback_vbs, ~0, sizeof(fallback_vbs)); 541 542 /* Find free slots for each type if needed. */ 543 for (type = 0; type < VB_NUM; type++) { 544 if (mask[type]) { 545 uint32_t index; 546 547 if (!unused_vb_mask) { 548 return FALSE; 549 } 550 551 index = ffs(unused_vb_mask) - 1; 552 fallback_vbs[type] = index; 553 unused_vb_mask &= ~(1 << index); 554 /*printf("found slot=%i for type=%i\n", index, type);*/ 555 } 556 } 557 558 for (type = 0; type < VB_NUM; type++) { 559 if (mask[type]) { 560 mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type]; 561 } 562 } 563 564 memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs)); 565 return TRUE; 566} 567 568static boolean 569u_vbuf_translate_begin(struct u_vbuf *mgr, 570 const struct pipe_draw_info *info, 571 int start_vertex, unsigned num_vertices, 572 int min_index, boolean unroll_indices) 573{ 574 unsigned mask[VB_NUM] = {0}; 575 struct translate_key key[VB_NUM]; 576 unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ 577 unsigned i, type; 578 const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & 579 mgr->ve->used_vb_mask; 580 581 const int start[VB_NUM] = { 582 start_vertex, /* VERTEX */ 583 info->start_instance, /* INSTANCE */ 584 0 /* CONST */ 585 }; 586 587 const unsigned num[VB_NUM] = { 588 num_vertices, /* VERTEX */ 589 info->instance_count, /* INSTANCE */ 590 1 /* CONST */ 591 }; 592 593 memset(key, 0, sizeof(key)); 594 memset(elem_index, ~0, sizeof(elem_index)); 595 596 /* See if there are vertex attribs of each type to translate and 597 * which ones. */ 598 for (i = 0; i < mgr->ve->count; i++) { 599 unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index; 600 601 if (!mgr->vertex_buffer[vb_index].stride) { 602 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 603 !(incompatible_vb_mask & (1 << vb_index))) { 604 continue; 605 } 606 mask[VB_CONST] |= 1 << vb_index; 607 } else if (mgr->ve->ve[i].instance_divisor) { 608 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 609 !(incompatible_vb_mask & (1 << vb_index))) { 610 continue; 611 } 612 mask[VB_INSTANCE] |= 1 << vb_index; 613 } else { 614 if (!unroll_indices && 615 !(mgr->ve->incompatible_elem_mask & (1 << i)) && 616 !(incompatible_vb_mask & (1 << vb_index))) { 617 continue; 618 } 619 mask[VB_VERTEX] |= 1 << vb_index; 620 } 621 } 622 623 assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]); 624 625 /* Find free vertex buffer slots. */ 626 if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) { 627 return FALSE; 628 } 629 630 /* Initialize the translate keys. */ 631 for (i = 0; i < mgr->ve->count; i++) { 632 struct translate_key *k; 633 struct translate_element *te; 634 enum pipe_format output_format = mgr->ve->native_format[i]; 635 unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index; 636 bit = 1 << vb_index; 637 638 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 639 !(incompatible_vb_mask & (1 << vb_index)) && 640 (!unroll_indices || !(mask[VB_VERTEX] & bit))) { 641 continue; 642 } 643 644 /* Set type to what we will translate. 645 * Whether vertex, instance, or constant attribs. */ 646 for (type = 0; type < VB_NUM; type++) { 647 if (mask[type] & bit) { 648 break; 649 } 650 } 651 assert(type < VB_NUM); 652 if (mgr->ve->ve[i].src_format != output_format) 653 assert(translate_is_output_format_supported(output_format)); 654 /*printf("velem=%i type=%i\n", i, type);*/ 655 656 /* Add the vertex element. */ 657 k = &key[type]; 658 elem_index[type][i] = k->nr_elements; 659 660 te = &k->element[k->nr_elements]; 661 te->type = TRANSLATE_ELEMENT_NORMAL; 662 te->instance_divisor = 0; 663 te->input_buffer = vb_index; 664 te->input_format = mgr->ve->ve[i].src_format; 665 te->input_offset = mgr->ve->ve[i].src_offset; 666 te->output_format = output_format; 667 te->output_offset = k->output_stride; 668 669 k->output_stride += mgr->ve->native_format_size[i]; 670 k->nr_elements++; 671 } 672 673 /* Translate buffers. */ 674 for (type = 0; type < VB_NUM; type++) { 675 if (key[type].nr_elements) { 676 enum pipe_error err; 677 err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type], 678 mgr->fallback_vbs[type], 679 start[type], num[type], min_index, 680 unroll_indices && type == VB_VERTEX); 681 if (err != PIPE_OK) 682 return FALSE; 683 684 /* Fixup the stride for constant attribs. */ 685 if (type == VB_CONST) { 686 mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0; 687 } 688 } 689 } 690 691 /* Setup new vertex elements. */ 692 for (i = 0; i < mgr->ve->count; i++) { 693 for (type = 0; type < VB_NUM; type++) { 694 if (elem_index[type][i] < key[type].nr_elements) { 695 struct translate_element *te = &key[type].element[elem_index[type][i]]; 696 mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; 697 mgr->fallback_velems[i].src_format = te->output_format; 698 mgr->fallback_velems[i].src_offset = te->output_offset; 699 mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; 700 701 /* elem_index[type][i] can only be set for one type. */ 702 assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u); 703 assert(type > VB_VERTEX || elem_index[type+2][i] == ~0u); 704 break; 705 } 706 } 707 /* No translating, just copy the original vertex element over. */ 708 if (type == VB_NUM) { 709 memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i], 710 sizeof(struct pipe_vertex_element)); 711 } 712 } 713 714 u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count, 715 mgr->fallback_velems); 716 mgr->using_translate = TRUE; 717 return TRUE; 718} 719 720static void u_vbuf_translate_end(struct u_vbuf *mgr) 721{ 722 unsigned i; 723 724 /* Restore vertex elements. */ 725 mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso); 726 mgr->using_translate = FALSE; 727 728 /* Unreference the now-unused VBOs. */ 729 for (i = 0; i < VB_NUM; i++) { 730 unsigned vb = mgr->fallback_vbs[i]; 731 if (vb != ~0u) { 732 pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL); 733 mgr->fallback_vbs[i] = ~0; 734 735 /* This will cause the buffer to be unbound in the driver later. */ 736 mgr->dirty_real_vb_mask |= 1 << vb; 737 } 738 } 739} 740 741static void * 742u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 743 const struct pipe_vertex_element *attribs) 744{ 745 struct pipe_context *pipe = mgr->pipe; 746 unsigned i; 747 struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS]; 748 struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements); 749 uint32_t used_buffers = 0; 750 751 ve->count = count; 752 753 memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count); 754 memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count); 755 756 /* Set the best native format in case the original format is not 757 * supported. */ 758 for (i = 0; i < count; i++) { 759 enum pipe_format format = ve->ve[i].src_format; 760 761 ve->src_format_size[i] = util_format_get_blocksize(format); 762 763 used_buffers |= 1 << ve->ve[i].vertex_buffer_index; 764 765 if (!ve->ve[i].instance_divisor) { 766 ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 767 } 768 769 format = mgr->caps.format_translation[format]; 770 771 driver_attribs[i].src_format = format; 772 ve->native_format[i] = format; 773 ve->native_format_size[i] = 774 util_format_get_blocksize(ve->native_format[i]); 775 776 if (ve->ve[i].src_format != format || 777 (!mgr->caps.velem_src_offset_unaligned && 778 ve->ve[i].src_offset % 4 != 0)) { 779 ve->incompatible_elem_mask |= 1 << i; 780 ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 781 } else { 782 ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 783 } 784 } 785 786 ve->used_vb_mask = used_buffers; 787 ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers; 788 ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers; 789 790 /* Align the formats and offsets to the size of DWORD if needed. */ 791 if (!mgr->caps.velem_src_offset_unaligned) { 792 for (i = 0; i < count; i++) { 793 ve->native_format_size[i] = align(ve->native_format_size[i], 4); 794 driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4); 795 } 796 } 797 798 ve->driver_cso = 799 pipe->create_vertex_elements_state(pipe, count, driver_attribs); 800 return ve; 801} 802 803static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso) 804{ 805 struct pipe_context *pipe = mgr->pipe; 806 struct u_vbuf_elements *ve = cso; 807 808 pipe->delete_vertex_elements_state(pipe, ve->driver_cso); 809 FREE(ve); 810} 811 812void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, 813 unsigned start_slot, unsigned count, 814 const struct pipe_vertex_buffer *bufs) 815{ 816 unsigned i; 817 /* which buffers are enabled */ 818 uint32_t enabled_vb_mask = 0; 819 /* which buffers are in user memory */ 820 uint32_t user_vb_mask = 0; 821 /* which buffers are incompatible with the driver */ 822 uint32_t incompatible_vb_mask = 0; 823 /* which buffers have a non-zero stride */ 824 uint32_t nonzero_stride_vb_mask = 0; 825 const uint32_t mask = ~(((1ull << count) - 1) << start_slot); 826 827 /* Zero out the bits we are going to rewrite completely. */ 828 mgr->user_vb_mask &= mask; 829 mgr->incompatible_vb_mask &= mask; 830 mgr->nonzero_stride_vb_mask &= mask; 831 mgr->enabled_vb_mask &= mask; 832 833 if (!bufs) { 834 struct pipe_context *pipe = mgr->pipe; 835 /* Unbind. */ 836 mgr->dirty_real_vb_mask &= mask; 837 838 for (i = 0; i < count; i++) { 839 unsigned dst_index = start_slot + i; 840 841 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); 842 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); 843 } 844 845 pipe->set_vertex_buffers(pipe, start_slot, count, NULL); 846 return; 847 } 848 849 for (i = 0; i < count; i++) { 850 unsigned dst_index = start_slot + i; 851 const struct pipe_vertex_buffer *vb = &bufs[i]; 852 struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index]; 853 struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index]; 854 855 if (!vb->buffer.resource) { 856 pipe_vertex_buffer_unreference(orig_vb); 857 pipe_vertex_buffer_unreference(real_vb); 858 continue; 859 } 860 861 pipe_vertex_buffer_reference(orig_vb, vb); 862 863 if (vb->stride) { 864 nonzero_stride_vb_mask |= 1 << dst_index; 865 } 866 enabled_vb_mask |= 1 << dst_index; 867 868 if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) || 869 (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) { 870 incompatible_vb_mask |= 1 << dst_index; 871 real_vb->buffer_offset = vb->buffer_offset; 872 real_vb->stride = vb->stride; 873 pipe_vertex_buffer_unreference(real_vb); 874 real_vb->is_user_buffer = false; 875 continue; 876 } 877 878 if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) { 879 user_vb_mask |= 1 << dst_index; 880 real_vb->buffer_offset = vb->buffer_offset; 881 real_vb->stride = vb->stride; 882 pipe_vertex_buffer_unreference(real_vb); 883 real_vb->is_user_buffer = false; 884 continue; 885 } 886 887 pipe_vertex_buffer_reference(real_vb, vb); 888 } 889 890 mgr->user_vb_mask |= user_vb_mask; 891 mgr->incompatible_vb_mask |= incompatible_vb_mask; 892 mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; 893 mgr->enabled_vb_mask |= enabled_vb_mask; 894 895 /* All changed buffers are marked as dirty, even the NULL ones, 896 * which will cause the NULL buffers to be unbound in the driver later. */ 897 mgr->dirty_real_vb_mask |= ~mask; 898} 899 900static enum pipe_error 901u_vbuf_upload_buffers(struct u_vbuf *mgr, 902 int start_vertex, unsigned num_vertices, 903 int start_instance, unsigned num_instances) 904{ 905 unsigned i; 906 unsigned nr_velems = mgr->ve->count; 907 const struct pipe_vertex_element *velems = 908 mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; 909 unsigned start_offset[PIPE_MAX_ATTRIBS]; 910 unsigned end_offset[PIPE_MAX_ATTRIBS]; 911 uint32_t buffer_mask = 0; 912 913 /* Determine how much data needs to be uploaded. */ 914 for (i = 0; i < nr_velems; i++) { 915 const struct pipe_vertex_element *velem = &velems[i]; 916 unsigned index = velem->vertex_buffer_index; 917 struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; 918 unsigned instance_div, first, size, index_bit; 919 920 /* Skip the buffers generated by translate. */ 921 if (index == mgr->fallback_vbs[VB_VERTEX] || 922 index == mgr->fallback_vbs[VB_INSTANCE] || 923 index == mgr->fallback_vbs[VB_CONST]) { 924 continue; 925 } 926 927 if (!vb->is_user_buffer) { 928 continue; 929 } 930 931 instance_div = velem->instance_divisor; 932 first = vb->buffer_offset + velem->src_offset; 933 934 if (!vb->stride) { 935 /* Constant attrib. */ 936 size = mgr->ve->src_format_size[i]; 937 } else if (instance_div) { 938 /* Per-instance attrib. */ 939 940 /* Figure out how many instances we'll render given instance_div. We 941 * can't use the typical div_round_up() pattern because the CTS uses 942 * instance_div = ~0 for a test, which overflows div_round_up()'s 943 * addition. 944 */ 945 unsigned count = num_instances / instance_div; 946 if (count * instance_div != num_instances) 947 count++; 948 949 first += vb->stride * start_instance; 950 size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; 951 } else { 952 /* Per-vertex attrib. */ 953 first += vb->stride * start_vertex; 954 size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; 955 } 956 957 index_bit = 1 << index; 958 959 /* Update offsets. */ 960 if (!(buffer_mask & index_bit)) { 961 start_offset[index] = first; 962 end_offset[index] = first + size; 963 } else { 964 if (first < start_offset[index]) 965 start_offset[index] = first; 966 if (first + size > end_offset[index]) 967 end_offset[index] = first + size; 968 } 969 970 buffer_mask |= index_bit; 971 } 972 973 /* Upload buffers. */ 974 while (buffer_mask) { 975 unsigned start, end; 976 struct pipe_vertex_buffer *real_vb; 977 const uint8_t *ptr; 978 979 i = u_bit_scan(&buffer_mask); 980 981 start = start_offset[i]; 982 end = end_offset[i]; 983 assert(start < end); 984 985 real_vb = &mgr->real_vertex_buffer[i]; 986 ptr = mgr->vertex_buffer[i].buffer.user; 987 988 u_upload_data(mgr->pipe->stream_uploader, 989 mgr->has_signed_vb_offset ? 0 : start, 990 end - start, 4, 991 ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource); 992 if (!real_vb->buffer.resource) 993 return PIPE_ERROR_OUT_OF_MEMORY; 994 995 real_vb->buffer_offset -= start; 996 } 997 998 return PIPE_OK; 999} 1000 1001static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr) 1002{ 1003 /* See if there are any per-vertex attribs which will be uploaded or 1004 * translated. Use bitmasks to get the info instead of looping over vertex 1005 * elements. */ 1006 return (mgr->ve->used_vb_mask & 1007 ((mgr->user_vb_mask | 1008 mgr->incompatible_vb_mask | 1009 mgr->ve->incompatible_vb_mask_any) & 1010 mgr->ve->noninstance_vb_mask_any & 1011 mgr->nonzero_stride_vb_mask)) != 0; 1012} 1013 1014static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) 1015{ 1016 /* Return true if there are hw buffers which don't need to be translated. 1017 * 1018 * We could query whether each buffer is busy, but that would 1019 * be way more costly than this. */ 1020 return (mgr->ve->used_vb_mask & 1021 (~mgr->user_vb_mask & 1022 ~mgr->incompatible_vb_mask & 1023 mgr->ve->compatible_vb_mask_all & 1024 mgr->ve->noninstance_vb_mask_any & 1025 mgr->nonzero_stride_vb_mask)) != 0; 1026} 1027 1028static void 1029u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, 1030 const void *indices, unsigned *out_min_index, 1031 unsigned *out_max_index) 1032{ 1033 unsigned max = 0; 1034 unsigned min = ~0u; 1035 1036 switch (info->index_size) { 1037 case 4: { 1038 const unsigned *ui_indices = (const unsigned*)indices; 1039 if (info->primitive_restart) { 1040 for (unsigned i = 0; i < info->count; i++) { 1041 if (ui_indices[i] != info->restart_index) { 1042 if (ui_indices[i] > max) max = ui_indices[i]; 1043 if (ui_indices[i] < min) min = ui_indices[i]; 1044 } 1045 } 1046 } 1047 else { 1048 for (unsigned i = 0; i < info->count; i++) { 1049 if (ui_indices[i] > max) max = ui_indices[i]; 1050 if (ui_indices[i] < min) min = ui_indices[i]; 1051 } 1052 } 1053 break; 1054 } 1055 case 2: { 1056 const unsigned short *us_indices = (const unsigned short*)indices; 1057 if (info->primitive_restart) { 1058 for (unsigned i = 0; i < info->count; i++) { 1059 if (us_indices[i] != info->restart_index) { 1060 if (us_indices[i] > max) max = us_indices[i]; 1061 if (us_indices[i] < min) min = us_indices[i]; 1062 } 1063 } 1064 } 1065 else { 1066 for (unsigned i = 0; i < info->count; i++) { 1067 if (us_indices[i] > max) max = us_indices[i]; 1068 if (us_indices[i] < min) min = us_indices[i]; 1069 } 1070 } 1071 break; 1072 } 1073 case 1: { 1074 const unsigned char *ub_indices = (const unsigned char*)indices; 1075 if (info->primitive_restart) { 1076 for (unsigned i = 0; i < info->count; i++) { 1077 if (ub_indices[i] != info->restart_index) { 1078 if (ub_indices[i] > max) max = ub_indices[i]; 1079 if (ub_indices[i] < min) min = ub_indices[i]; 1080 } 1081 } 1082 } 1083 else { 1084 for (unsigned i = 0; i < info->count; i++) { 1085 if (ub_indices[i] > max) max = ub_indices[i]; 1086 if (ub_indices[i] < min) min = ub_indices[i]; 1087 } 1088 } 1089 break; 1090 } 1091 default: 1092 assert(0); 1093 } 1094 1095 *out_min_index = min; 1096 *out_max_index = max; 1097} 1098 1099void u_vbuf_get_minmax_index(struct pipe_context *pipe, 1100 const struct pipe_draw_info *info, 1101 unsigned *out_min_index, unsigned *out_max_index) 1102{ 1103 struct pipe_transfer *transfer = NULL; 1104 const void *indices; 1105 1106 if (info->has_user_indices) { 1107 indices = (uint8_t*)info->index.user + 1108 info->start * info->index_size; 1109 } else { 1110 indices = pipe_buffer_map_range(pipe, info->index.resource, 1111 info->start * info->index_size, 1112 info->count * info->index_size, 1113 PIPE_TRANSFER_READ, &transfer); 1114 } 1115 1116 u_vbuf_get_minmax_index_mapped(info, indices, out_min_index, out_max_index); 1117 1118 if (transfer) { 1119 pipe_buffer_unmap(pipe, transfer); 1120 } 1121} 1122 1123static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) 1124{ 1125 struct pipe_context *pipe = mgr->pipe; 1126 unsigned start_slot, count; 1127 1128 start_slot = ffs(mgr->dirty_real_vb_mask) - 1; 1129 count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot); 1130 1131 pipe->set_vertex_buffers(pipe, start_slot, count, 1132 mgr->real_vertex_buffer + start_slot); 1133 mgr->dirty_real_vb_mask = 0; 1134} 1135 1136static void 1137u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, 1138 unsigned *indirect_data, unsigned stride, 1139 unsigned draw_count) 1140{ 1141 assert(info->index_size); 1142 info->indirect = NULL; 1143 1144 for (unsigned i = 0; i < draw_count; i++) { 1145 unsigned offset = i * stride / 4; 1146 1147 info->count = indirect_data[offset + 0]; 1148 info->instance_count = indirect_data[offset + 1]; 1149 1150 if (!info->count || !info->instance_count) 1151 continue; 1152 1153 info->start = indirect_data[offset + 2]; 1154 info->index_bias = indirect_data[offset + 3]; 1155 info->start_instance = indirect_data[offset + 4]; 1156 1157 u_vbuf_draw_vbo(mgr, info); 1158 } 1159} 1160 1161void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) 1162{ 1163 struct pipe_context *pipe = mgr->pipe; 1164 int start_vertex; 1165 unsigned min_index; 1166 unsigned num_vertices; 1167 boolean unroll_indices = FALSE; 1168 const uint32_t used_vb_mask = mgr->ve->used_vb_mask; 1169 uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; 1170 const uint32_t incompatible_vb_mask = 1171 mgr->incompatible_vb_mask & used_vb_mask; 1172 struct pipe_draw_info new_info; 1173 1174 /* Normal draw. No fallback and no user buffers. */ 1175 if (!incompatible_vb_mask && 1176 !mgr->ve->incompatible_elem_mask && 1177 !user_vb_mask) { 1178 1179 /* Set vertex buffers if needed. */ 1180 if (mgr->dirty_real_vb_mask & used_vb_mask) { 1181 u_vbuf_set_driver_vertex_buffers(mgr); 1182 } 1183 1184 pipe->draw_vbo(pipe, info); 1185 return; 1186 } 1187 1188 new_info = *info; 1189 1190 /* Handle indirect (multi)draws. */ 1191 if (new_info.indirect) { 1192 const struct pipe_draw_indirect_info *indirect = new_info.indirect; 1193 unsigned draw_count = 0; 1194 1195 /* Get the number of draws. */ 1196 if (indirect->indirect_draw_count) { 1197 pipe_buffer_read(pipe, indirect->indirect_draw_count, 1198 indirect->indirect_draw_count_offset, 1199 4, &draw_count); 1200 } else { 1201 draw_count = indirect->draw_count; 1202 } 1203 1204 if (!draw_count) 1205 return; 1206 1207 unsigned data_size = (draw_count - 1) * indirect->stride + 1208 (new_info.index_size ? 20 : 16); 1209 unsigned *data = malloc(data_size); 1210 if (!data) 1211 return; /* report an error? */ 1212 1213 /* Read the used buffer range only once, because the read can be 1214 * uncached. 1215 */ 1216 pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size, 1217 data); 1218 1219 if (info->index_size) { 1220 /* Indexed multidraw. */ 1221 unsigned index_bias0 = data[3]; 1222 bool index_bias_same = true; 1223 1224 /* If we invoke the translate path, we have to split the multidraw. */ 1225 if (incompatible_vb_mask || 1226 mgr->ve->incompatible_elem_mask) { 1227 u_vbuf_split_indexed_multidraw(mgr, &new_info, data, 1228 indirect->stride, draw_count); 1229 free(data); 1230 return; 1231 } 1232 1233 /* See if index_bias is the same for all draws. */ 1234 for (unsigned i = 1; i < draw_count; i++) { 1235 if (data[i * indirect->stride / 4 + 3] != index_bias0) { 1236 index_bias_same = false; 1237 break; 1238 } 1239 } 1240 1241 /* Split the multidraw if index_bias is different. */ 1242 if (!index_bias_same) { 1243 u_vbuf_split_indexed_multidraw(mgr, &new_info, data, 1244 indirect->stride, draw_count); 1245 free(data); 1246 return; 1247 } 1248 1249 /* If we don't need to use the translate path and index_bias is 1250 * the same, we can process the multidraw with the time complexity 1251 * equal to 1 draw call (except for the index range computation). 1252 * We only need to compute the index range covering all draw calls 1253 * of the multidraw. 1254 * 1255 * The driver will not look at these values because indirect != NULL. 1256 * These values determine the user buffer bounds to upload. 1257 */ 1258 new_info.index_bias = index_bias0; 1259 new_info.min_index = ~0u; 1260 new_info.max_index = 0; 1261 new_info.start_instance = ~0u; 1262 unsigned end_instance = 0; 1263 1264 struct pipe_transfer *transfer = NULL; 1265 const uint8_t *indices; 1266 1267 if (info->has_user_indices) { 1268 indices = (uint8_t*)info->index.user; 1269 } else { 1270 indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, 1271 PIPE_TRANSFER_READ, &transfer); 1272 } 1273 1274 for (unsigned i = 0; i < draw_count; i++) { 1275 unsigned offset = i * indirect->stride / 4; 1276 unsigned start = data[offset + 2]; 1277 unsigned count = data[offset + 0]; 1278 unsigned start_instance = data[offset + 4]; 1279 unsigned instance_count = data[offset + 1]; 1280 1281 if (!count || !instance_count) 1282 continue; 1283 1284 /* Update the ranges of instances. */ 1285 new_info.start_instance = MIN2(new_info.start_instance, 1286 start_instance); 1287 end_instance = MAX2(end_instance, start_instance + instance_count); 1288 1289 /* Update the index range. */ 1290 unsigned min, max; 1291 new_info.count = count; /* only used by get_minmax_index */ 1292 u_vbuf_get_minmax_index_mapped(&new_info, 1293 indices + 1294 new_info.index_size * start, 1295 &min, &max); 1296 1297 new_info.min_index = MIN2(new_info.min_index, min); 1298 new_info.max_index = MAX2(new_info.max_index, max); 1299 } 1300 free(data); 1301 1302 if (transfer) 1303 pipe_buffer_unmap(pipe, transfer); 1304 1305 /* Set the final instance count. */ 1306 new_info.instance_count = end_instance - new_info.start_instance; 1307 1308 if (new_info.start_instance == ~0u || !new_info.instance_count) 1309 return; 1310 } else { 1311 /* Non-indexed multidraw. 1312 * 1313 * Keep the draw call indirect and compute minimums & maximums, 1314 * which will determine the user buffer bounds to upload, but 1315 * the driver will not look at these values because indirect != NULL. 1316 * 1317 * This efficiently processes the multidraw with the time complexity 1318 * equal to 1 draw call. 1319 */ 1320 new_info.start = ~0u; 1321 new_info.start_instance = ~0u; 1322 unsigned end_vertex = 0; 1323 unsigned end_instance = 0; 1324 1325 for (unsigned i = 0; i < draw_count; i++) { 1326 unsigned offset = i * indirect->stride / 4; 1327 unsigned start = data[offset + 2]; 1328 unsigned count = data[offset + 0]; 1329 unsigned start_instance = data[offset + 3]; 1330 unsigned instance_count = data[offset + 1]; 1331 1332 new_info.start = MIN2(new_info.start, start); 1333 new_info.start_instance = MIN2(new_info.start_instance, 1334 start_instance); 1335 1336 end_vertex = MAX2(end_vertex, start + count); 1337 end_instance = MAX2(end_instance, start_instance + instance_count); 1338 } 1339 free(data); 1340 1341 /* Set the final counts. */ 1342 new_info.count = end_vertex - new_info.start; 1343 new_info.instance_count = end_instance - new_info.start_instance; 1344 1345 if (new_info.start == ~0u || !new_info.count || !new_info.instance_count) 1346 return; 1347 } 1348 } 1349 1350 if (new_info.index_size) { 1351 /* See if anything needs to be done for per-vertex attribs. */ 1352 if (u_vbuf_need_minmax_index(mgr)) { 1353 unsigned max_index; 1354 1355 if (new_info.max_index != ~0u) { 1356 min_index = new_info.min_index; 1357 max_index = new_info.max_index; 1358 } else { 1359 u_vbuf_get_minmax_index(mgr->pipe, &new_info, 1360 &min_index, &max_index); 1361 } 1362 1363 assert(min_index <= max_index); 1364 1365 start_vertex = min_index + new_info.index_bias; 1366 num_vertices = max_index + 1 - min_index; 1367 1368 /* Primitive restart doesn't work when unrolling indices. 1369 * We would have to break this drawing operation into several ones. */ 1370 /* Use some heuristic to see if unrolling indices improves 1371 * performance. */ 1372 if (!info->indirect && 1373 !new_info.primitive_restart && 1374 num_vertices > new_info.count*2 && 1375 num_vertices - new_info.count > 32 && 1376 !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { 1377 unroll_indices = TRUE; 1378 user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & 1379 mgr->ve->noninstance_vb_mask_any); 1380 } 1381 } else { 1382 /* Nothing to do for per-vertex attribs. */ 1383 start_vertex = 0; 1384 num_vertices = 0; 1385 min_index = 0; 1386 } 1387 } else { 1388 start_vertex = new_info.start; 1389 num_vertices = new_info.count; 1390 min_index = 0; 1391 } 1392 1393 /* Translate vertices with non-native layouts or formats. */ 1394 if (unroll_indices || 1395 incompatible_vb_mask || 1396 mgr->ve->incompatible_elem_mask) { 1397 if (!u_vbuf_translate_begin(mgr, &new_info, start_vertex, num_vertices, 1398 min_index, unroll_indices)) { 1399 debug_warn_once("u_vbuf_translate_begin() failed"); 1400 return; 1401 } 1402 1403 if (unroll_indices) { 1404 new_info.index_size = 0; 1405 new_info.index_bias = 0; 1406 new_info.min_index = 0; 1407 new_info.max_index = new_info.count - 1; 1408 new_info.start = 0; 1409 } 1410 1411 user_vb_mask &= ~(incompatible_vb_mask | 1412 mgr->ve->incompatible_vb_mask_all); 1413 } 1414 1415 /* Upload user buffers. */ 1416 if (user_vb_mask) { 1417 if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, 1418 new_info.start_instance, 1419 new_info.instance_count) != PIPE_OK) { 1420 debug_warn_once("u_vbuf_upload_buffers() failed"); 1421 return; 1422 } 1423 1424 mgr->dirty_real_vb_mask |= user_vb_mask; 1425 } 1426 1427 /* 1428 if (unroll_indices) { 1429 printf("unrolling indices: start_vertex = %i, num_vertices = %i\n", 1430 start_vertex, num_vertices); 1431 util_dump_draw_info(stdout, info); 1432 printf("\n"); 1433 } 1434 1435 unsigned i; 1436 for (i = 0; i < mgr->nr_vertex_buffers; i++) { 1437 printf("input %i: ", i); 1438 util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i); 1439 printf("\n"); 1440 } 1441 for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { 1442 printf("real %i: ", i); 1443 util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i); 1444 printf("\n"); 1445 } 1446 */ 1447 1448 u_upload_unmap(pipe->stream_uploader); 1449 u_vbuf_set_driver_vertex_buffers(mgr); 1450 1451 pipe->draw_vbo(pipe, &new_info); 1452 1453 if (mgr->using_translate) { 1454 u_vbuf_translate_end(mgr); 1455 } 1456} 1457 1458void u_vbuf_save_vertex_elements(struct u_vbuf *mgr) 1459{ 1460 assert(!mgr->ve_saved); 1461 mgr->ve_saved = mgr->ve; 1462} 1463 1464void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr) 1465{ 1466 if (mgr->ve != mgr->ve_saved) { 1467 struct pipe_context *pipe = mgr->pipe; 1468 1469 mgr->ve = mgr->ve_saved; 1470 pipe->bind_vertex_elements_state(pipe, 1471 mgr->ve ? mgr->ve->driver_cso : NULL); 1472 } 1473 mgr->ve_saved = NULL; 1474} 1475 1476void u_vbuf_save_vertex_buffer0(struct u_vbuf *mgr) 1477{ 1478 pipe_vertex_buffer_reference(&mgr->vertex_buffer0_saved, 1479 &mgr->vertex_buffer[0]); 1480} 1481 1482void u_vbuf_restore_vertex_buffer0(struct u_vbuf *mgr) 1483{ 1484 u_vbuf_set_vertex_buffers(mgr, 0, 1, &mgr->vertex_buffer0_saved); 1485 pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); 1486} 1487