1/* 2 * Copyright 2003 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include "main/arrayobj.h" 27#include "main/bufferobj.h" 28#include "main/context.h" 29#include "main/enums.h" 30#include "main/macros.h" 31#include "main/glformats.h" 32#include "nir.h" 33 34#include "brw_draw.h" 35#include "brw_defines.h" 36#include "brw_context.h" 37#include "brw_state.h" 38 39#include "intel_batchbuffer.h" 40#include "intel_buffer_objects.h" 41 42static const GLuint double_types_float[5] = { 43 0, 44 ISL_FORMAT_R64_FLOAT, 45 ISL_FORMAT_R64G64_FLOAT, 46 ISL_FORMAT_R64G64B64_FLOAT, 47 ISL_FORMAT_R64G64B64A64_FLOAT 48}; 49 50static const GLuint double_types_passthru[5] = { 51 0, 52 ISL_FORMAT_R64_PASSTHRU, 53 ISL_FORMAT_R64G64_PASSTHRU, 54 ISL_FORMAT_R64G64B64_PASSTHRU, 55 ISL_FORMAT_R64G64B64A64_PASSTHRU 56}; 57 58static const GLuint float_types[5] = { 59 0, 60 ISL_FORMAT_R32_FLOAT, 61 ISL_FORMAT_R32G32_FLOAT, 62 ISL_FORMAT_R32G32B32_FLOAT, 63 ISL_FORMAT_R32G32B32A32_FLOAT 64}; 65 66static const GLuint half_float_types[5] = { 67 0, 68 ISL_FORMAT_R16_FLOAT, 69 ISL_FORMAT_R16G16_FLOAT, 70 ISL_FORMAT_R16G16B16_FLOAT, 71 ISL_FORMAT_R16G16B16A16_FLOAT 72}; 73 74static const GLuint fixed_point_types[5] = { 75 0, 76 ISL_FORMAT_R32_SFIXED, 77 ISL_FORMAT_R32G32_SFIXED, 78 ISL_FORMAT_R32G32B32_SFIXED, 79 ISL_FORMAT_R32G32B32A32_SFIXED, 80}; 81 82static const GLuint uint_types_direct[5] = { 83 0, 84 ISL_FORMAT_R32_UINT, 85 ISL_FORMAT_R32G32_UINT, 86 ISL_FORMAT_R32G32B32_UINT, 87 ISL_FORMAT_R32G32B32A32_UINT 88}; 89 90static const GLuint uint_types_norm[5] = { 91 0, 92 ISL_FORMAT_R32_UNORM, 93 ISL_FORMAT_R32G32_UNORM, 94 ISL_FORMAT_R32G32B32_UNORM, 95 ISL_FORMAT_R32G32B32A32_UNORM 96}; 97 98static const GLuint uint_types_scale[5] = { 99 0, 100 ISL_FORMAT_R32_USCALED, 101 ISL_FORMAT_R32G32_USCALED, 102 ISL_FORMAT_R32G32B32_USCALED, 103 ISL_FORMAT_R32G32B32A32_USCALED 104}; 105 106static const GLuint int_types_direct[5] = { 107 0, 108 ISL_FORMAT_R32_SINT, 109 ISL_FORMAT_R32G32_SINT, 110 ISL_FORMAT_R32G32B32_SINT, 111 ISL_FORMAT_R32G32B32A32_SINT 112}; 113 114static const GLuint int_types_norm[5] = { 115 0, 116 ISL_FORMAT_R32_SNORM, 117 ISL_FORMAT_R32G32_SNORM, 118 ISL_FORMAT_R32G32B32_SNORM, 119 ISL_FORMAT_R32G32B32A32_SNORM 120}; 121 122static const GLuint int_types_scale[5] = { 123 0, 124 ISL_FORMAT_R32_SSCALED, 125 ISL_FORMAT_R32G32_SSCALED, 126 ISL_FORMAT_R32G32B32_SSCALED, 127 ISL_FORMAT_R32G32B32A32_SSCALED 128}; 129 130static const GLuint ushort_types_direct[5] = { 131 0, 132 ISL_FORMAT_R16_UINT, 133 ISL_FORMAT_R16G16_UINT, 134 ISL_FORMAT_R16G16B16_UINT, 135 ISL_FORMAT_R16G16B16A16_UINT 136}; 137 138static const GLuint ushort_types_norm[5] = { 139 0, 140 ISL_FORMAT_R16_UNORM, 141 ISL_FORMAT_R16G16_UNORM, 142 ISL_FORMAT_R16G16B16_UNORM, 143 ISL_FORMAT_R16G16B16A16_UNORM 144}; 145 146static const GLuint ushort_types_scale[5] = { 147 0, 148 ISL_FORMAT_R16_USCALED, 149 ISL_FORMAT_R16G16_USCALED, 150 ISL_FORMAT_R16G16B16_USCALED, 151 ISL_FORMAT_R16G16B16A16_USCALED 152}; 153 154static const GLuint short_types_direct[5] = { 155 0, 156 ISL_FORMAT_R16_SINT, 157 ISL_FORMAT_R16G16_SINT, 158 ISL_FORMAT_R16G16B16_SINT, 159 ISL_FORMAT_R16G16B16A16_SINT 160}; 161 162static const GLuint short_types_norm[5] = { 163 0, 164 ISL_FORMAT_R16_SNORM, 165 ISL_FORMAT_R16G16_SNORM, 166 ISL_FORMAT_R16G16B16_SNORM, 167 ISL_FORMAT_R16G16B16A16_SNORM 168}; 169 170static const GLuint short_types_scale[5] = { 171 0, 172 ISL_FORMAT_R16_SSCALED, 173 ISL_FORMAT_R16G16_SSCALED, 174 ISL_FORMAT_R16G16B16_SSCALED, 175 ISL_FORMAT_R16G16B16A16_SSCALED 176}; 177 178static const GLuint ubyte_types_direct[5] = { 179 0, 180 ISL_FORMAT_R8_UINT, 181 ISL_FORMAT_R8G8_UINT, 182 ISL_FORMAT_R8G8B8_UINT, 183 ISL_FORMAT_R8G8B8A8_UINT 184}; 185 186static const GLuint ubyte_types_norm[5] = { 187 0, 188 ISL_FORMAT_R8_UNORM, 189 ISL_FORMAT_R8G8_UNORM, 190 ISL_FORMAT_R8G8B8_UNORM, 191 ISL_FORMAT_R8G8B8A8_UNORM 192}; 193 194static const GLuint ubyte_types_scale[5] = { 195 0, 196 ISL_FORMAT_R8_USCALED, 197 ISL_FORMAT_R8G8_USCALED, 198 ISL_FORMAT_R8G8B8_USCALED, 199 ISL_FORMAT_R8G8B8A8_USCALED 200}; 201 202static const GLuint byte_types_direct[5] = { 203 0, 204 ISL_FORMAT_R8_SINT, 205 ISL_FORMAT_R8G8_SINT, 206 ISL_FORMAT_R8G8B8_SINT, 207 ISL_FORMAT_R8G8B8A8_SINT 208}; 209 210static const GLuint byte_types_norm[5] = { 211 0, 212 ISL_FORMAT_R8_SNORM, 213 ISL_FORMAT_R8G8_SNORM, 214 ISL_FORMAT_R8G8B8_SNORM, 215 ISL_FORMAT_R8G8B8A8_SNORM 216}; 217 218static const GLuint byte_types_scale[5] = { 219 0, 220 ISL_FORMAT_R8_SSCALED, 221 ISL_FORMAT_R8G8_SSCALED, 222 ISL_FORMAT_R8G8B8_SSCALED, 223 ISL_FORMAT_R8G8B8A8_SSCALED 224}; 225 226static GLuint 227double_types(int size, GLboolean doubles) 228{ 229 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): 230 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, 231 * 64-bit components are stored in the URB without any conversion." 232 * Also included on BDW PRM, Volume 7, page 470, table "Source Element 233 * Formats Supported in VF Unit" 234 * 235 * Previous PRMs don't include those references, so for gen7 we can't use 236 * PASSTHRU formats directly. But in any case, we prefer to return passthru 237 * even in that case, because that reflects what we want to achieve, even 238 * if we would need to workaround on gen < 8. 239 */ 240 return (doubles 241 ? double_types_passthru[size] 242 : double_types_float[size]); 243} 244 245/** 246 * Given vertex array type/size/format/normalized info, return 247 * the appopriate hardware surface type. 248 * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. 249 */ 250unsigned 251brw_get_vertex_surface_type(struct brw_context *brw, 252 const struct gl_vertex_format *glformat) 253{ 254 int size = glformat->Size; 255 const struct gen_device_info *devinfo = &brw->screen->devinfo; 256 const bool is_ivybridge_or_older = 257 devinfo->gen <= 7 && !devinfo->is_baytrail && !devinfo->is_haswell; 258 259 if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) 260 fprintf(stderr, "type %s size %d normalized %d\n", 261 _mesa_enum_to_string(glformat->Type), 262 glformat->Size, glformat->Normalized); 263 264 if (glformat->Integer) { 265 assert(glformat->Format == GL_RGBA); /* sanity check */ 266 switch (glformat->Type) { 267 case GL_INT: return int_types_direct[size]; 268 case GL_SHORT: 269 if (is_ivybridge_or_older && size == 3) 270 return short_types_direct[4]; 271 else 272 return short_types_direct[size]; 273 case GL_BYTE: 274 if (is_ivybridge_or_older && size == 3) 275 return byte_types_direct[4]; 276 else 277 return byte_types_direct[size]; 278 case GL_UNSIGNED_INT: return uint_types_direct[size]; 279 case GL_UNSIGNED_SHORT: 280 if (is_ivybridge_or_older && size == 3) 281 return ushort_types_direct[4]; 282 else 283 return ushort_types_direct[size]; 284 case GL_UNSIGNED_BYTE: 285 if (is_ivybridge_or_older && size == 3) 286 return ubyte_types_direct[4]; 287 else 288 return ubyte_types_direct[size]; 289 default: unreachable("not reached"); 290 } 291 } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { 292 return ISL_FORMAT_R11G11B10_FLOAT; 293 } else if (glformat->Normalized) { 294 switch (glformat->Type) { 295 case GL_DOUBLE: return double_types(size, glformat->Doubles); 296 case GL_FLOAT: return float_types[size]; 297 case GL_HALF_FLOAT: 298 case GL_HALF_FLOAT_OES: 299 if (devinfo->gen < 6 && size == 3) 300 return half_float_types[4]; 301 else 302 return half_float_types[size]; 303 case GL_INT: return int_types_norm[size]; 304 case GL_SHORT: return short_types_norm[size]; 305 case GL_BYTE: return byte_types_norm[size]; 306 case GL_UNSIGNED_INT: return uint_types_norm[size]; 307 case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; 308 case GL_UNSIGNED_BYTE: 309 if (glformat->Format == GL_BGRA) { 310 /* See GL_EXT_vertex_array_bgra */ 311 assert(size == 4); 312 return ISL_FORMAT_B8G8R8A8_UNORM; 313 } 314 else { 315 return ubyte_types_norm[size]; 316 } 317 case GL_FIXED: 318 if (devinfo->gen >= 8 || devinfo->is_haswell) 319 return fixed_point_types[size]; 320 321 /* This produces GL_FIXED inputs as values between INT32_MIN and 322 * INT32_MAX, which will be scaled down by 1/65536 by the VS. 323 */ 324 return int_types_scale[size]; 325 /* See GL_ARB_vertex_type_2_10_10_10_rev. 326 * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd 327 * like to use here, so upload everything as UINT and fix 328 * it in the shader 329 */ 330 case GL_INT_2_10_10_10_REV: 331 assert(size == 4); 332 if (devinfo->gen >= 8 || devinfo->is_haswell) { 333 return glformat->Format == GL_BGRA 334 ? ISL_FORMAT_B10G10R10A2_SNORM 335 : ISL_FORMAT_R10G10B10A2_SNORM; 336 } 337 return ISL_FORMAT_R10G10B10A2_UINT; 338 case GL_UNSIGNED_INT_2_10_10_10_REV: 339 assert(size == 4); 340 if (devinfo->gen >= 8 || devinfo->is_haswell) { 341 return glformat->Format == GL_BGRA 342 ? ISL_FORMAT_B10G10R10A2_UNORM 343 : ISL_FORMAT_R10G10B10A2_UNORM; 344 } 345 return ISL_FORMAT_R10G10B10A2_UINT; 346 default: unreachable("not reached"); 347 } 348 } 349 else { 350 /* See GL_ARB_vertex_type_2_10_10_10_rev. 351 * W/A: the hardware doesn't really support the formats we'd 352 * like to use here, so upload everything as UINT and fix 353 * it in the shader 354 */ 355 if (glformat->Type == GL_INT_2_10_10_10_REV) { 356 assert(size == 4); 357 if (devinfo->gen >= 8 || devinfo->is_haswell) { 358 return glformat->Format == GL_BGRA 359 ? ISL_FORMAT_B10G10R10A2_SSCALED 360 : ISL_FORMAT_R10G10B10A2_SSCALED; 361 } 362 return ISL_FORMAT_R10G10B10A2_UINT; 363 } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { 364 assert(size == 4); 365 if (devinfo->gen >= 8 || devinfo->is_haswell) { 366 return glformat->Format == GL_BGRA 367 ? ISL_FORMAT_B10G10R10A2_USCALED 368 : ISL_FORMAT_R10G10B10A2_USCALED; 369 } 370 return ISL_FORMAT_R10G10B10A2_UINT; 371 } 372 assert(glformat->Format == GL_RGBA); /* sanity check */ 373 switch (glformat->Type) { 374 case GL_DOUBLE: return double_types(size, glformat->Doubles); 375 case GL_FLOAT: return float_types[size]; 376 case GL_HALF_FLOAT: 377 case GL_HALF_FLOAT_OES: 378 if (devinfo->gen < 6 && size == 3) 379 return half_float_types[4]; 380 else 381 return half_float_types[size]; 382 case GL_INT: return int_types_scale[size]; 383 case GL_SHORT: return short_types_scale[size]; 384 case GL_BYTE: return byte_types_scale[size]; 385 case GL_UNSIGNED_INT: return uint_types_scale[size]; 386 case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; 387 case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; 388 case GL_FIXED: 389 if (devinfo->gen >= 8 || devinfo->is_haswell) 390 return fixed_point_types[size]; 391 392 /* This produces GL_FIXED inputs as values between INT32_MIN and 393 * INT32_MAX, which will be scaled down by 1/65536 by the VS. 394 */ 395 return int_types_scale[size]; 396 default: unreachable("not reached"); 397 } 398 } 399} 400 401static void 402copy_array_to_vbo_array(struct brw_context *brw, 403 struct brw_vertex_element *element, 404 int min, int max, 405 struct brw_vertex_buffer *buffer, 406 GLuint dst_stride) 407{ 408 const struct gl_vertex_buffer_binding *glbinding = element->glbinding; 409 const struct gl_array_attributes *glattrib = element->glattrib; 410 const struct gl_vertex_format *glformat = &glattrib->Format; 411 const int src_stride = glbinding->Stride; 412 413 /* If the source stride is zero, we just want to upload the current 414 * attribute once and set the buffer's stride to 0. There's no need 415 * to replicate it out. 416 */ 417 if (src_stride == 0) { 418 brw_upload_data(&brw->upload, glattrib->Ptr, glformat->_ElementSize, 419 glformat->_ElementSize, &buffer->bo, &buffer->offset); 420 421 buffer->stride = 0; 422 buffer->size = glformat->_ElementSize; 423 return; 424 } 425 426 const unsigned char *src = glattrib->Ptr + min * src_stride; 427 int count = max - min + 1; 428 GLuint size = count * dst_stride; 429 uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride, 430 &buffer->bo, &buffer->offset); 431 432 /* The GL 4.5 spec says: 433 * "If any enabled array’s buffer binding is zero when DrawArrays or 434 * one of the other drawing commands defined in section 10.4 is called, 435 * the result is undefined." 436 * 437 * In this case, let's the dst with undefined values 438 */ 439 if (src != NULL) { 440 if (dst_stride == src_stride) { 441 memcpy(dst, src, size); 442 } else { 443 while (count--) { 444 memcpy(dst, src, dst_stride); 445 src += src_stride; 446 dst += dst_stride; 447 } 448 } 449 } 450 buffer->stride = dst_stride; 451 buffer->size = size; 452} 453 454void 455brw_prepare_vertices(struct brw_context *brw) 456{ 457 const struct gen_device_info *devinfo = &brw->screen->devinfo; 458 struct gl_context *ctx = &brw->ctx; 459 /* BRW_NEW_VERTEX_PROGRAM */ 460 const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX]; 461 /* BRW_NEW_VS_PROG_DATA */ 462 const struct brw_vs_prog_data *vs_prog_data = 463 brw_vs_prog_data(brw->vs.base.prog_data); 464 GLbitfield64 vs_inputs = 465 nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read, 466 vp->DualSlotInputs); 467 const unsigned char *ptr = NULL; 468 GLuint interleaved = 0; 469 unsigned int min_index = brw->vb.min_index + brw->basevertex; 470 unsigned int max_index = brw->vb.max_index + brw->basevertex; 471 unsigned i; 472 int delta, j; 473 474 struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; 475 GLuint nr_uploads = 0; 476 477 /* _NEW_POLYGON 478 * 479 * On gen6+, edge flags don't end up in the VUE (either in or out of the 480 * VS). Instead, they're uploaded as the last vertex element, and the data 481 * is passed sideband through the fixed function units. So, we need to 482 * prepare the vertex buffer for it, but it's not present in inputs_read. 483 */ 484 if (devinfo->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || 485 ctx->Polygon.BackMode != GL_FILL)) { 486 vs_inputs |= VERT_BIT_EDGEFLAG; 487 } 488 489 if (0) 490 fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); 491 492 /* Accumulate the list of enabled arrays. */ 493 brw->vb.nr_enabled = 0; 494 while (vs_inputs) { 495 const unsigned index = ffsll(vs_inputs) - 1; 496 assert(index < 64); 497 498 struct brw_vertex_element *input = &brw->vb.inputs[index]; 499 input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(index)) != 0; 500 vs_inputs &= ~BITFIELD64_BIT(index); 501 brw->vb.enabled[brw->vb.nr_enabled++] = input; 502 } 503 504 if (brw->vb.nr_enabled == 0) 505 return; 506 507 if (brw->vb.nr_buffers) 508 return; 509 510 /* The range of data in a given buffer represented as [min, max) */ 511 struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; 512 uint32_t buffer_range_start[VERT_ATTRIB_MAX]; 513 uint32_t buffer_range_end[VERT_ATTRIB_MAX]; 514 515 for (i = j = 0; i < brw->vb.nr_enabled; i++) { 516 struct brw_vertex_element *input = brw->vb.enabled[i]; 517 const struct gl_vertex_buffer_binding *glbinding = input->glbinding; 518 const struct gl_array_attributes *glattrib = input->glattrib; 519 520 if (_mesa_is_bufferobj(glbinding->BufferObj)) { 521 struct intel_buffer_object *intel_buffer = 522 intel_buffer_object(glbinding->BufferObj); 523 524 const uint32_t offset = _mesa_draw_binding_offset(glbinding) + 525 _mesa_draw_attributes_relative_offset(glattrib); 526 527 /* Start with the worst case */ 528 uint32_t start = 0; 529 uint32_t range = intel_buffer->Base.Size; 530 if (glbinding->InstanceDivisor) { 531 if (brw->num_instances) { 532 start = offset + glbinding->Stride * brw->baseinstance; 533 range = (glbinding->Stride * ((brw->num_instances - 1) / 534 glbinding->InstanceDivisor) + 535 glattrib->Format._ElementSize); 536 } 537 } else { 538 if (brw->vb.index_bounds_valid) { 539 start = offset + min_index * glbinding->Stride; 540 range = (glbinding->Stride * (max_index - min_index) + 541 glattrib->Format._ElementSize); 542 } 543 } 544 545 /* If we have a VB set to be uploaded for this buffer object 546 * already, reuse that VB state so that we emit fewer 547 * relocations. 548 */ 549 unsigned k; 550 for (k = 0; k < i; k++) { 551 struct brw_vertex_element *other = brw->vb.enabled[k]; 552 const struct gl_vertex_buffer_binding *obind = other->glbinding; 553 const struct gl_array_attributes *oattrib = other->glattrib; 554 const uint32_t ooffset = _mesa_draw_binding_offset(obind) + 555 _mesa_draw_attributes_relative_offset(oattrib); 556 if (glbinding->BufferObj == obind->BufferObj && 557 glbinding->Stride == obind->Stride && 558 glbinding->InstanceDivisor == obind->InstanceDivisor && 559 (offset - ooffset) < glbinding->Stride) 560 { 561 input->buffer = brw->vb.enabled[k]->buffer; 562 input->offset = offset - ooffset; 563 564 buffer_range_start[input->buffer] = 565 MIN2(buffer_range_start[input->buffer], start); 566 buffer_range_end[input->buffer] = 567 MAX2(buffer_range_end[input->buffer], start + range); 568 break; 569 } 570 } 571 if (k == i) { 572 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 573 574 /* Named buffer object: Just reference its contents directly. */ 575 buffer->offset = offset; 576 buffer->stride = glbinding->Stride; 577 buffer->step_rate = glbinding->InstanceDivisor; 578 buffer->size = glbinding->BufferObj->Size - offset; 579 580 enabled_buffer[j] = intel_buffer; 581 buffer_range_start[j] = start; 582 buffer_range_end[j] = start + range; 583 584 input->buffer = j++; 585 input->offset = 0; 586 } 587 } else { 588 /* Queue the buffer object up to be uploaded in the next pass, 589 * when we've decided if we're doing interleaved or not. 590 */ 591 if (nr_uploads == 0) { 592 interleaved = glbinding->Stride; 593 ptr = glattrib->Ptr; 594 } 595 else if (interleaved != glbinding->Stride || 596 glbinding->InstanceDivisor != 0 || 597 glattrib->Ptr < ptr || 598 (uintptr_t)(glattrib->Ptr - ptr) + 599 glattrib->Format._ElementSize > interleaved) 600 { 601 /* If our stride is different from the first attribute's stride, 602 * or if we are using an instance divisor or if the first 603 * attribute's stride didn't cover our element, disable the 604 * interleaved upload optimization. The second case can most 605 * commonly occur in cases where there is a single vertex and, for 606 * example, the data is stored on the application's stack. 607 * 608 * NOTE: This will also disable the optimization in cases where 609 * the data is in a different order than the array indices. 610 * Something like: 611 * 612 * float data[...]; 613 * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); 614 * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); 615 */ 616 interleaved = 0; 617 } 618 619 upload[nr_uploads++] = input; 620 } 621 } 622 623 /* Now that we've set up all of the buffers, we walk through and reference 624 * each of them. We do this late so that we get the right size in each 625 * buffer and don't reference too little data. 626 */ 627 for (i = 0; i < j; i++) { 628 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; 629 if (buffer->bo) 630 continue; 631 632 const uint32_t start = buffer_range_start[i]; 633 const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; 634 635 buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, 636 range, false); 637 brw_bo_reference(buffer->bo); 638 } 639 640 /* If we need to upload all the arrays, then we can trim those arrays to 641 * only the used elements [min_index, max_index] so long as we adjust all 642 * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. 643 */ 644 brw->vb.start_vertex_bias = 0; 645 delta = min_index; 646 if (nr_uploads == brw->vb.nr_enabled) { 647 brw->vb.start_vertex_bias = -delta; 648 delta = 0; 649 } 650 651 /* Handle any arrays to be uploaded. */ 652 if (nr_uploads > 1) { 653 if (interleaved) { 654 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 655 /* All uploads are interleaved, so upload the arrays together as 656 * interleaved. First, upload the contents and set up upload[0]. 657 */ 658 copy_array_to_vbo_array(brw, upload[0], min_index, max_index, 659 buffer, interleaved); 660 buffer->offset -= delta * interleaved; 661 buffer->size += delta * interleaved; 662 buffer->step_rate = 0; 663 664 for (i = 0; i < nr_uploads; i++) { 665 const struct gl_array_attributes *glattrib = upload[i]->glattrib; 666 /* Then, just point upload[i] at upload[0]'s buffer. */ 667 upload[i]->offset = ((const unsigned char *)glattrib->Ptr - ptr); 668 upload[i]->buffer = j; 669 } 670 j++; 671 672 nr_uploads = 0; 673 } 674 } 675 /* Upload non-interleaved arrays */ 676 for (i = 0; i < nr_uploads; i++) { 677 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 678 const struct gl_vertex_buffer_binding *glbinding = upload[i]->glbinding; 679 const struct gl_array_attributes *glattrib = upload[i]->glattrib; 680 if (glbinding->InstanceDivisor == 0) { 681 copy_array_to_vbo_array(brw, upload[i], min_index, max_index, 682 buffer, glattrib->Format._ElementSize); 683 } else { 684 /* This is an instanced attribute, since its InstanceDivisor 685 * is not zero. Therefore, its data will be stepped after the 686 * instanced draw has been run InstanceDivisor times. 687 */ 688 uint32_t instanced_attr_max_index = 689 (brw->num_instances - 1) / glbinding->InstanceDivisor; 690 copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, 691 buffer, glattrib->Format._ElementSize); 692 } 693 buffer->offset -= delta * buffer->stride; 694 buffer->size += delta * buffer->stride; 695 buffer->step_rate = glbinding->InstanceDivisor; 696 upload[i]->buffer = j++; 697 upload[i]->offset = 0; 698 } 699 700 brw->vb.nr_buffers = j; 701} 702 703void 704brw_prepare_shader_draw_parameters(struct brw_context *brw) 705{ 706 const struct brw_vs_prog_data *vs_prog_data = 707 brw_vs_prog_data(brw->vs.base.prog_data); 708 709 /* For non-indirect draws, upload the shader draw parameters */ 710 if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) && 711 brw->draw.draw_params_bo == NULL) { 712 brw_upload_data(&brw->upload, 713 &brw->draw.params, sizeof(brw->draw.params), 4, 714 &brw->draw.draw_params_bo, 715 &brw->draw.draw_params_offset); 716 } 717 718 if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) { 719 brw_upload_data(&brw->upload, 720 &brw->draw.derived_params, sizeof(brw->draw.derived_params), 4, 721 &brw->draw.derived_draw_params_bo, 722 &brw->draw.derived_draw_params_offset); 723 } 724} 725 726static void 727brw_upload_indices(struct brw_context *brw) 728{ 729 const struct _mesa_index_buffer *index_buffer = brw->ib.ib; 730 GLuint ib_size; 731 struct brw_bo *old_bo = brw->ib.bo; 732 struct gl_buffer_object *bufferobj; 733 GLuint offset; 734 GLuint ib_type_size; 735 736 if (index_buffer == NULL) 737 return; 738 739 ib_type_size = index_buffer->index_size; 740 ib_size = index_buffer->count ? ib_type_size * index_buffer->count : 741 index_buffer->obj->Size; 742 bufferobj = index_buffer->obj; 743 744 /* Turn into a proper VBO: 745 */ 746 if (!_mesa_is_bufferobj(bufferobj)) { 747 /* Get new bufferobj, offset: 748 */ 749 brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size, 750 &brw->ib.bo, &offset); 751 brw->ib.size = brw->ib.bo->size; 752 } else { 753 offset = (GLuint) (unsigned long) index_buffer->ptr; 754 755 struct brw_bo *bo = 756 intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj), 757 offset, ib_size, false); 758 if (bo != brw->ib.bo) { 759 brw_bo_unreference(brw->ib.bo); 760 brw->ib.bo = bo; 761 brw->ib.size = bufferobj->Size; 762 brw_bo_reference(bo); 763 } 764 } 765 766 /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading 767 * the index buffer state when we're just moving the start index 768 * of our drawing. 769 */ 770 brw->ib.start_vertex_offset = offset / ib_type_size; 771 772 if (brw->ib.bo != old_bo) 773 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; 774 775 if (index_buffer->index_size != brw->ib.index_size) { 776 brw->ib.index_size = index_buffer->index_size; 777 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; 778 } 779 780 /* We need to re-emit an index buffer state each time 781 * when cut index flag is changed 782 */ 783 if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) { 784 brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index; 785 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; 786 } 787} 788 789const struct brw_tracked_state brw_indices = { 790 .dirty = { 791 .mesa = 0, 792 .brw = BRW_NEW_BLORP | 793 BRW_NEW_INDICES, 794 }, 795 .emit = brw_upload_indices, 796}; 797