1/*
2 * Copyright 2003 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#include "main/arrayobj.h"
27#include "main/bufferobj.h"
28#include "main/context.h"
29#include "main/enums.h"
30#include "main/macros.h"
31#include "main/glformats.h"
32#include "nir.h"
33
34#include "brw_draw.h"
35#include "brw_defines.h"
36#include "brw_context.h"
37#include "brw_state.h"
38
39#include "intel_batchbuffer.h"
40#include "intel_buffer_objects.h"
41
42static const GLuint double_types_float[5] = {
43   0,
44   ISL_FORMAT_R64_FLOAT,
45   ISL_FORMAT_R64G64_FLOAT,
46   ISL_FORMAT_R64G64B64_FLOAT,
47   ISL_FORMAT_R64G64B64A64_FLOAT
48};
49
50static const GLuint double_types_passthru[5] = {
51   0,
52   ISL_FORMAT_R64_PASSTHRU,
53   ISL_FORMAT_R64G64_PASSTHRU,
54   ISL_FORMAT_R64G64B64_PASSTHRU,
55   ISL_FORMAT_R64G64B64A64_PASSTHRU
56};
57
58static const GLuint float_types[5] = {
59   0,
60   ISL_FORMAT_R32_FLOAT,
61   ISL_FORMAT_R32G32_FLOAT,
62   ISL_FORMAT_R32G32B32_FLOAT,
63   ISL_FORMAT_R32G32B32A32_FLOAT
64};
65
66static const GLuint half_float_types[5] = {
67   0,
68   ISL_FORMAT_R16_FLOAT,
69   ISL_FORMAT_R16G16_FLOAT,
70   ISL_FORMAT_R16G16B16_FLOAT,
71   ISL_FORMAT_R16G16B16A16_FLOAT
72};
73
74static const GLuint fixed_point_types[5] = {
75   0,
76   ISL_FORMAT_R32_SFIXED,
77   ISL_FORMAT_R32G32_SFIXED,
78   ISL_FORMAT_R32G32B32_SFIXED,
79   ISL_FORMAT_R32G32B32A32_SFIXED,
80};
81
82static const GLuint uint_types_direct[5] = {
83   0,
84   ISL_FORMAT_R32_UINT,
85   ISL_FORMAT_R32G32_UINT,
86   ISL_FORMAT_R32G32B32_UINT,
87   ISL_FORMAT_R32G32B32A32_UINT
88};
89
90static const GLuint uint_types_norm[5] = {
91   0,
92   ISL_FORMAT_R32_UNORM,
93   ISL_FORMAT_R32G32_UNORM,
94   ISL_FORMAT_R32G32B32_UNORM,
95   ISL_FORMAT_R32G32B32A32_UNORM
96};
97
98static const GLuint uint_types_scale[5] = {
99   0,
100   ISL_FORMAT_R32_USCALED,
101   ISL_FORMAT_R32G32_USCALED,
102   ISL_FORMAT_R32G32B32_USCALED,
103   ISL_FORMAT_R32G32B32A32_USCALED
104};
105
106static const GLuint int_types_direct[5] = {
107   0,
108   ISL_FORMAT_R32_SINT,
109   ISL_FORMAT_R32G32_SINT,
110   ISL_FORMAT_R32G32B32_SINT,
111   ISL_FORMAT_R32G32B32A32_SINT
112};
113
114static const GLuint int_types_norm[5] = {
115   0,
116   ISL_FORMAT_R32_SNORM,
117   ISL_FORMAT_R32G32_SNORM,
118   ISL_FORMAT_R32G32B32_SNORM,
119   ISL_FORMAT_R32G32B32A32_SNORM
120};
121
122static const GLuint int_types_scale[5] = {
123   0,
124   ISL_FORMAT_R32_SSCALED,
125   ISL_FORMAT_R32G32_SSCALED,
126   ISL_FORMAT_R32G32B32_SSCALED,
127   ISL_FORMAT_R32G32B32A32_SSCALED
128};
129
130static const GLuint ushort_types_direct[5] = {
131   0,
132   ISL_FORMAT_R16_UINT,
133   ISL_FORMAT_R16G16_UINT,
134   ISL_FORMAT_R16G16B16_UINT,
135   ISL_FORMAT_R16G16B16A16_UINT
136};
137
138static const GLuint ushort_types_norm[5] = {
139   0,
140   ISL_FORMAT_R16_UNORM,
141   ISL_FORMAT_R16G16_UNORM,
142   ISL_FORMAT_R16G16B16_UNORM,
143   ISL_FORMAT_R16G16B16A16_UNORM
144};
145
146static const GLuint ushort_types_scale[5] = {
147   0,
148   ISL_FORMAT_R16_USCALED,
149   ISL_FORMAT_R16G16_USCALED,
150   ISL_FORMAT_R16G16B16_USCALED,
151   ISL_FORMAT_R16G16B16A16_USCALED
152};
153
154static const GLuint short_types_direct[5] = {
155   0,
156   ISL_FORMAT_R16_SINT,
157   ISL_FORMAT_R16G16_SINT,
158   ISL_FORMAT_R16G16B16_SINT,
159   ISL_FORMAT_R16G16B16A16_SINT
160};
161
162static const GLuint short_types_norm[5] = {
163   0,
164   ISL_FORMAT_R16_SNORM,
165   ISL_FORMAT_R16G16_SNORM,
166   ISL_FORMAT_R16G16B16_SNORM,
167   ISL_FORMAT_R16G16B16A16_SNORM
168};
169
170static const GLuint short_types_scale[5] = {
171   0,
172   ISL_FORMAT_R16_SSCALED,
173   ISL_FORMAT_R16G16_SSCALED,
174   ISL_FORMAT_R16G16B16_SSCALED,
175   ISL_FORMAT_R16G16B16A16_SSCALED
176};
177
178static const GLuint ubyte_types_direct[5] = {
179   0,
180   ISL_FORMAT_R8_UINT,
181   ISL_FORMAT_R8G8_UINT,
182   ISL_FORMAT_R8G8B8_UINT,
183   ISL_FORMAT_R8G8B8A8_UINT
184};
185
186static const GLuint ubyte_types_norm[5] = {
187   0,
188   ISL_FORMAT_R8_UNORM,
189   ISL_FORMAT_R8G8_UNORM,
190   ISL_FORMAT_R8G8B8_UNORM,
191   ISL_FORMAT_R8G8B8A8_UNORM
192};
193
194static const GLuint ubyte_types_scale[5] = {
195   0,
196   ISL_FORMAT_R8_USCALED,
197   ISL_FORMAT_R8G8_USCALED,
198   ISL_FORMAT_R8G8B8_USCALED,
199   ISL_FORMAT_R8G8B8A8_USCALED
200};
201
202static const GLuint byte_types_direct[5] = {
203   0,
204   ISL_FORMAT_R8_SINT,
205   ISL_FORMAT_R8G8_SINT,
206   ISL_FORMAT_R8G8B8_SINT,
207   ISL_FORMAT_R8G8B8A8_SINT
208};
209
210static const GLuint byte_types_norm[5] = {
211   0,
212   ISL_FORMAT_R8_SNORM,
213   ISL_FORMAT_R8G8_SNORM,
214   ISL_FORMAT_R8G8B8_SNORM,
215   ISL_FORMAT_R8G8B8A8_SNORM
216};
217
218static const GLuint byte_types_scale[5] = {
219   0,
220   ISL_FORMAT_R8_SSCALED,
221   ISL_FORMAT_R8G8_SSCALED,
222   ISL_FORMAT_R8G8B8_SSCALED,
223   ISL_FORMAT_R8G8B8A8_SSCALED
224};
225
226static GLuint
227double_types(int size, GLboolean doubles)
228{
229   /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
230    * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
231    * 64-bit components are stored in the URB without any conversion."
232    * Also included on BDW PRM, Volume 7, page 470, table "Source Element
233    * Formats Supported in VF Unit"
234    *
235    * Previous PRMs don't include those references, so for gen7 we can't use
236    * PASSTHRU formats directly. But in any case, we prefer to return passthru
237    * even in that case, because that reflects what we want to achieve, even
238    * if we would need to workaround on gen < 8.
239    */
240   return (doubles
241           ? double_types_passthru[size]
242           : double_types_float[size]);
243}
244
245/**
246 * Given vertex array type/size/format/normalized info, return
247 * the appopriate hardware surface type.
248 * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
249 */
250unsigned
251brw_get_vertex_surface_type(struct brw_context *brw,
252                            const struct gl_vertex_format *glformat)
253{
254   int size = glformat->Size;
255   const struct gen_device_info *devinfo = &brw->screen->devinfo;
256   const bool is_ivybridge_or_older =
257      devinfo->gen <= 7 && !devinfo->is_baytrail && !devinfo->is_haswell;
258
259   if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
260      fprintf(stderr, "type %s size %d normalized %d\n",
261              _mesa_enum_to_string(glformat->Type),
262              glformat->Size, glformat->Normalized);
263
264   if (glformat->Integer) {
265      assert(glformat->Format == GL_RGBA); /* sanity check */
266      switch (glformat->Type) {
267      case GL_INT: return int_types_direct[size];
268      case GL_SHORT:
269         if (is_ivybridge_or_older && size == 3)
270            return short_types_direct[4];
271         else
272            return short_types_direct[size];
273      case GL_BYTE:
274         if (is_ivybridge_or_older && size == 3)
275            return byte_types_direct[4];
276         else
277            return byte_types_direct[size];
278      case GL_UNSIGNED_INT: return uint_types_direct[size];
279      case GL_UNSIGNED_SHORT:
280         if (is_ivybridge_or_older && size == 3)
281            return ushort_types_direct[4];
282         else
283            return ushort_types_direct[size];
284      case GL_UNSIGNED_BYTE:
285         if (is_ivybridge_or_older && size == 3)
286            return ubyte_types_direct[4];
287         else
288            return ubyte_types_direct[size];
289      default: unreachable("not reached");
290      }
291   } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
292      return ISL_FORMAT_R11G11B10_FLOAT;
293   } else if (glformat->Normalized) {
294      switch (glformat->Type) {
295      case GL_DOUBLE: return double_types(size, glformat->Doubles);
296      case GL_FLOAT: return float_types[size];
297      case GL_HALF_FLOAT:
298      case GL_HALF_FLOAT_OES:
299         if (devinfo->gen < 6 && size == 3)
300            return half_float_types[4];
301         else
302            return half_float_types[size];
303      case GL_INT: return int_types_norm[size];
304      case GL_SHORT: return short_types_norm[size];
305      case GL_BYTE: return byte_types_norm[size];
306      case GL_UNSIGNED_INT: return uint_types_norm[size];
307      case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
308      case GL_UNSIGNED_BYTE:
309         if (glformat->Format == GL_BGRA) {
310            /* See GL_EXT_vertex_array_bgra */
311            assert(size == 4);
312            return ISL_FORMAT_B8G8R8A8_UNORM;
313         }
314         else {
315            return ubyte_types_norm[size];
316         }
317      case GL_FIXED:
318         if (devinfo->gen >= 8 || devinfo->is_haswell)
319            return fixed_point_types[size];
320
321         /* This produces GL_FIXED inputs as values between INT32_MIN and
322          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
323          */
324         return int_types_scale[size];
325      /* See GL_ARB_vertex_type_2_10_10_10_rev.
326       * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
327       * like to use here, so upload everything as UINT and fix
328       * it in the shader
329       */
330      case GL_INT_2_10_10_10_REV:
331         assert(size == 4);
332         if (devinfo->gen >= 8 || devinfo->is_haswell) {
333            return glformat->Format == GL_BGRA
334               ? ISL_FORMAT_B10G10R10A2_SNORM
335               : ISL_FORMAT_R10G10B10A2_SNORM;
336         }
337         return ISL_FORMAT_R10G10B10A2_UINT;
338      case GL_UNSIGNED_INT_2_10_10_10_REV:
339         assert(size == 4);
340         if (devinfo->gen >= 8 || devinfo->is_haswell) {
341            return glformat->Format == GL_BGRA
342               ? ISL_FORMAT_B10G10R10A2_UNORM
343               : ISL_FORMAT_R10G10B10A2_UNORM;
344         }
345         return ISL_FORMAT_R10G10B10A2_UINT;
346      default: unreachable("not reached");
347      }
348   }
349   else {
350      /* See GL_ARB_vertex_type_2_10_10_10_rev.
351       * W/A: the hardware doesn't really support the formats we'd
352       * like to use here, so upload everything as UINT and fix
353       * it in the shader
354       */
355      if (glformat->Type == GL_INT_2_10_10_10_REV) {
356         assert(size == 4);
357         if (devinfo->gen >= 8 || devinfo->is_haswell) {
358            return glformat->Format == GL_BGRA
359               ? ISL_FORMAT_B10G10R10A2_SSCALED
360               : ISL_FORMAT_R10G10B10A2_SSCALED;
361         }
362         return ISL_FORMAT_R10G10B10A2_UINT;
363      } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
364         assert(size == 4);
365         if (devinfo->gen >= 8 || devinfo->is_haswell) {
366            return glformat->Format == GL_BGRA
367               ? ISL_FORMAT_B10G10R10A2_USCALED
368               : ISL_FORMAT_R10G10B10A2_USCALED;
369         }
370         return ISL_FORMAT_R10G10B10A2_UINT;
371      }
372      assert(glformat->Format == GL_RGBA); /* sanity check */
373      switch (glformat->Type) {
374      case GL_DOUBLE: return double_types(size, glformat->Doubles);
375      case GL_FLOAT: return float_types[size];
376      case GL_HALF_FLOAT:
377      case GL_HALF_FLOAT_OES:
378         if (devinfo->gen < 6 && size == 3)
379            return half_float_types[4];
380         else
381            return half_float_types[size];
382      case GL_INT: return int_types_scale[size];
383      case GL_SHORT: return short_types_scale[size];
384      case GL_BYTE: return byte_types_scale[size];
385      case GL_UNSIGNED_INT: return uint_types_scale[size];
386      case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
387      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
388      case GL_FIXED:
389         if (devinfo->gen >= 8 || devinfo->is_haswell)
390            return fixed_point_types[size];
391
392         /* This produces GL_FIXED inputs as values between INT32_MIN and
393          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
394          */
395         return int_types_scale[size];
396      default: unreachable("not reached");
397      }
398   }
399}
400
401static void
402copy_array_to_vbo_array(struct brw_context *brw,
403			struct brw_vertex_element *element,
404			int min, int max,
405			struct brw_vertex_buffer *buffer,
406			GLuint dst_stride)
407{
408   const struct gl_vertex_buffer_binding *glbinding = element->glbinding;
409   const struct gl_array_attributes *glattrib = element->glattrib;
410   const struct gl_vertex_format *glformat = &glattrib->Format;
411   const int src_stride = glbinding->Stride;
412
413   /* If the source stride is zero, we just want to upload the current
414    * attribute once and set the buffer's stride to 0.  There's no need
415    * to replicate it out.
416    */
417   if (src_stride == 0) {
418      brw_upload_data(&brw->upload, glattrib->Ptr, glformat->_ElementSize,
419                      glformat->_ElementSize, &buffer->bo, &buffer->offset);
420
421      buffer->stride = 0;
422      buffer->size = glformat->_ElementSize;
423      return;
424   }
425
426   const unsigned char *src = glattrib->Ptr + min * src_stride;
427   int count = max - min + 1;
428   GLuint size = count * dst_stride;
429   uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride,
430                                   &buffer->bo, &buffer->offset);
431
432   /* The GL 4.5 spec says:
433    *      "If any enabled array’s buffer binding is zero when DrawArrays or
434    *      one of the other drawing commands defined in section 10.4 is called,
435    *      the result is undefined."
436    *
437    * In this case, let's the dst with undefined values
438    */
439   if (src != NULL) {
440      if (dst_stride == src_stride) {
441         memcpy(dst, src, size);
442      } else {
443         while (count--) {
444            memcpy(dst, src, dst_stride);
445            src += src_stride;
446            dst += dst_stride;
447         }
448      }
449   }
450   buffer->stride = dst_stride;
451   buffer->size = size;
452}
453
454void
455brw_prepare_vertices(struct brw_context *brw)
456{
457   const struct gen_device_info *devinfo = &brw->screen->devinfo;
458   struct gl_context *ctx = &brw->ctx;
459   /* BRW_NEW_VERTEX_PROGRAM */
460   const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
461   /* BRW_NEW_VS_PROG_DATA */
462   const struct brw_vs_prog_data *vs_prog_data =
463      brw_vs_prog_data(brw->vs.base.prog_data);
464   GLbitfield64 vs_inputs =
465      nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read,
466                                       vp->DualSlotInputs);
467   const unsigned char *ptr = NULL;
468   GLuint interleaved = 0;
469   unsigned int min_index = brw->vb.min_index + brw->basevertex;
470   unsigned int max_index = brw->vb.max_index + brw->basevertex;
471   unsigned i;
472   int delta, j;
473
474   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
475   GLuint nr_uploads = 0;
476
477   /* _NEW_POLYGON
478    *
479    * On gen6+, edge flags don't end up in the VUE (either in or out of the
480    * VS).  Instead, they're uploaded as the last vertex element, and the data
481    * is passed sideband through the fixed function units.  So, we need to
482    * prepare the vertex buffer for it, but it's not present in inputs_read.
483    */
484   if (devinfo->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
485                           ctx->Polygon.BackMode != GL_FILL)) {
486      vs_inputs |= VERT_BIT_EDGEFLAG;
487   }
488
489   if (0)
490      fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
491
492   /* Accumulate the list of enabled arrays. */
493   brw->vb.nr_enabled = 0;
494   while (vs_inputs) {
495      const unsigned index = ffsll(vs_inputs) - 1;
496      assert(index < 64);
497
498      struct brw_vertex_element *input = &brw->vb.inputs[index];
499      input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(index)) != 0;
500      vs_inputs &= ~BITFIELD64_BIT(index);
501      brw->vb.enabled[brw->vb.nr_enabled++] = input;
502   }
503
504   if (brw->vb.nr_enabled == 0)
505      return;
506
507   if (brw->vb.nr_buffers)
508      return;
509
510   /* The range of data in a given buffer represented as [min, max) */
511   struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
512   uint32_t buffer_range_start[VERT_ATTRIB_MAX];
513   uint32_t buffer_range_end[VERT_ATTRIB_MAX];
514
515   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
516      struct brw_vertex_element *input = brw->vb.enabled[i];
517      const struct gl_vertex_buffer_binding *glbinding = input->glbinding;
518      const struct gl_array_attributes *glattrib = input->glattrib;
519
520      if (_mesa_is_bufferobj(glbinding->BufferObj)) {
521	 struct intel_buffer_object *intel_buffer =
522	    intel_buffer_object(glbinding->BufferObj);
523
524         const uint32_t offset = _mesa_draw_binding_offset(glbinding) +
525            _mesa_draw_attributes_relative_offset(glattrib);
526
527         /* Start with the worst case */
528         uint32_t start = 0;
529         uint32_t range = intel_buffer->Base.Size;
530         if (glbinding->InstanceDivisor) {
531            if (brw->num_instances) {
532               start = offset + glbinding->Stride * brw->baseinstance;
533               range = (glbinding->Stride * ((brw->num_instances - 1) /
534                                            glbinding->InstanceDivisor) +
535                        glattrib->Format._ElementSize);
536            }
537         } else {
538            if (brw->vb.index_bounds_valid) {
539               start = offset + min_index * glbinding->Stride;
540               range = (glbinding->Stride * (max_index - min_index) +
541                        glattrib->Format._ElementSize);
542            }
543         }
544
545	 /* If we have a VB set to be uploaded for this buffer object
546	  * already, reuse that VB state so that we emit fewer
547	  * relocations.
548	  */
549	 unsigned k;
550	 for (k = 0; k < i; k++) {
551            struct brw_vertex_element *other = brw->vb.enabled[k];
552            const struct gl_vertex_buffer_binding *obind = other->glbinding;
553            const struct gl_array_attributes *oattrib = other->glattrib;
554            const uint32_t ooffset = _mesa_draw_binding_offset(obind) +
555               _mesa_draw_attributes_relative_offset(oattrib);
556	    if (glbinding->BufferObj == obind->BufferObj &&
557		glbinding->Stride == obind->Stride &&
558		glbinding->InstanceDivisor == obind->InstanceDivisor &&
559                (offset - ooffset) < glbinding->Stride)
560	    {
561	       input->buffer = brw->vb.enabled[k]->buffer;
562               input->offset = offset - ooffset;
563
564               buffer_range_start[input->buffer] =
565                  MIN2(buffer_range_start[input->buffer], start);
566               buffer_range_end[input->buffer] =
567                  MAX2(buffer_range_end[input->buffer], start + range);
568	       break;
569	    }
570	 }
571	 if (k == i) {
572	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
573
574	    /* Named buffer object: Just reference its contents directly. */
575	    buffer->offset = offset;
576            buffer->stride = glbinding->Stride;
577            buffer->step_rate = glbinding->InstanceDivisor;
578            buffer->size = glbinding->BufferObj->Size - offset;
579
580            enabled_buffer[j] = intel_buffer;
581            buffer_range_start[j] = start;
582            buffer_range_end[j] = start + range;
583
584	    input->buffer = j++;
585	    input->offset = 0;
586	 }
587      } else {
588	 /* Queue the buffer object up to be uploaded in the next pass,
589	  * when we've decided if we're doing interleaved or not.
590	  */
591	 if (nr_uploads == 0) {
592            interleaved = glbinding->Stride;
593            ptr = glattrib->Ptr;
594	 }
595	 else if (interleaved != glbinding->Stride ||
596                  glbinding->InstanceDivisor != 0 ||
597                  glattrib->Ptr < ptr ||
598                  (uintptr_t)(glattrib->Ptr - ptr) +
599                  glattrib->Format._ElementSize > interleaved)
600	 {
601            /* If our stride is different from the first attribute's stride,
602             * or if we are using an instance divisor or if the first
603             * attribute's stride didn't cover our element, disable the
604             * interleaved upload optimization.  The second case can most
605             * commonly occur in cases where there is a single vertex and, for
606             * example, the data is stored on the application's stack.
607             *
608             * NOTE: This will also disable the optimization in cases where
609             * the data is in a different order than the array indices.
610             * Something like:
611             *
612             *     float data[...];
613             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
614             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
615             */
616	    interleaved = 0;
617	 }
618
619	 upload[nr_uploads++] = input;
620      }
621   }
622
623   /* Now that we've set up all of the buffers, we walk through and reference
624    * each of them.  We do this late so that we get the right size in each
625    * buffer and don't reference too little data.
626    */
627   for (i = 0; i < j; i++) {
628      struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
629      if (buffer->bo)
630         continue;
631
632      const uint32_t start = buffer_range_start[i];
633      const uint32_t range = buffer_range_end[i] - buffer_range_start[i];
634
635      buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start,
636                                          range, false);
637      brw_bo_reference(buffer->bo);
638   }
639
640   /* If we need to upload all the arrays, then we can trim those arrays to
641    * only the used elements [min_index, max_index] so long as we adjust all
642    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
643    */
644   brw->vb.start_vertex_bias = 0;
645   delta = min_index;
646   if (nr_uploads == brw->vb.nr_enabled) {
647      brw->vb.start_vertex_bias = -delta;
648      delta = 0;
649   }
650
651   /* Handle any arrays to be uploaded. */
652   if (nr_uploads > 1) {
653      if (interleaved) {
654	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
655	 /* All uploads are interleaved, so upload the arrays together as
656	  * interleaved.  First, upload the contents and set up upload[0].
657	  */
658	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
659				 buffer, interleaved);
660	 buffer->offset -= delta * interleaved;
661         buffer->size += delta * interleaved;
662         buffer->step_rate = 0;
663
664	 for (i = 0; i < nr_uploads; i++) {
665            const struct gl_array_attributes *glattrib = upload[i]->glattrib;
666	    /* Then, just point upload[i] at upload[0]'s buffer. */
667            upload[i]->offset = ((const unsigned char *)glattrib->Ptr - ptr);
668	    upload[i]->buffer = j;
669	 }
670	 j++;
671
672	 nr_uploads = 0;
673      }
674   }
675   /* Upload non-interleaved arrays */
676   for (i = 0; i < nr_uploads; i++) {
677      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
678      const struct gl_vertex_buffer_binding *glbinding = upload[i]->glbinding;
679      const struct gl_array_attributes *glattrib = upload[i]->glattrib;
680      if (glbinding->InstanceDivisor == 0) {
681         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
682                                 buffer, glattrib->Format._ElementSize);
683      } else {
684         /* This is an instanced attribute, since its InstanceDivisor
685          * is not zero. Therefore, its data will be stepped after the
686          * instanced draw has been run InstanceDivisor times.
687          */
688         uint32_t instanced_attr_max_index =
689            (brw->num_instances - 1) / glbinding->InstanceDivisor;
690         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
691                                 buffer, glattrib->Format._ElementSize);
692      }
693      buffer->offset -= delta * buffer->stride;
694      buffer->size += delta * buffer->stride;
695      buffer->step_rate = glbinding->InstanceDivisor;
696      upload[i]->buffer = j++;
697      upload[i]->offset = 0;
698   }
699
700   brw->vb.nr_buffers = j;
701}
702
703void
704brw_prepare_shader_draw_parameters(struct brw_context *brw)
705{
706   const struct brw_vs_prog_data *vs_prog_data =
707      brw_vs_prog_data(brw->vs.base.prog_data);
708
709   /* For non-indirect draws, upload the shader draw parameters */
710   if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
711       brw->draw.draw_params_bo == NULL) {
712      brw_upload_data(&brw->upload,
713                      &brw->draw.params, sizeof(brw->draw.params), 4,
714                      &brw->draw.draw_params_bo,
715                      &brw->draw.draw_params_offset);
716   }
717
718   if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) {
719      brw_upload_data(&brw->upload,
720                      &brw->draw.derived_params, sizeof(brw->draw.derived_params), 4,
721                      &brw->draw.derived_draw_params_bo,
722                      &brw->draw.derived_draw_params_offset);
723   }
724}
725
726static void
727brw_upload_indices(struct brw_context *brw)
728{
729   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
730   GLuint ib_size;
731   struct brw_bo *old_bo = brw->ib.bo;
732   struct gl_buffer_object *bufferobj;
733   GLuint offset;
734   GLuint ib_type_size;
735
736   if (index_buffer == NULL)
737      return;
738
739   ib_type_size = index_buffer->index_size;
740   ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
741                                   index_buffer->obj->Size;
742   bufferobj = index_buffer->obj;
743
744   /* Turn into a proper VBO:
745    */
746   if (!_mesa_is_bufferobj(bufferobj)) {
747      /* Get new bufferobj, offset:
748       */
749      brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size,
750                      &brw->ib.bo, &offset);
751      brw->ib.size = brw->ib.bo->size;
752   } else {
753      offset = (GLuint) (unsigned long) index_buffer->ptr;
754
755      struct brw_bo *bo =
756         intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj),
757                                offset, ib_size, false);
758      if (bo != brw->ib.bo) {
759         brw_bo_unreference(brw->ib.bo);
760         brw->ib.bo = bo;
761         brw->ib.size = bufferobj->Size;
762         brw_bo_reference(bo);
763      }
764   }
765
766   /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
767    * the index buffer state when we're just moving the start index
768    * of our drawing.
769    */
770   brw->ib.start_vertex_offset = offset / ib_type_size;
771
772   if (brw->ib.bo != old_bo)
773      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
774
775   if (index_buffer->index_size != brw->ib.index_size) {
776      brw->ib.index_size = index_buffer->index_size;
777      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
778   }
779
780   /* We need to re-emit an index buffer state each time
781    * when cut index flag is changed
782    */
783   if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
784      brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
785      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
786   }
787}
788
789const struct brw_tracked_state brw_indices = {
790   .dirty = {
791      .mesa = 0,
792      .brw = BRW_NEW_BLORP |
793             BRW_NEW_INDICES,
794   },
795   .emit = brw_upload_indices,
796};
797