u_vbuf.c revision 7ec681f3
1/**************************************************************************
2 *
3 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * This module uploads user buffers and translates the vertex buffers which
30 * contain incompatible vertices (i.e. not supported by the driver/hardware)
31 * into compatible ones, based on the Gallium CAPs.
32 *
33 * It does not upload index buffers.
34 *
35 * The module heavily uses bitmasks to represent per-buffer and
36 * per-vertex-element flags to avoid looping over the list of buffers just
37 * to see if there's a non-zero stride, or user buffer, or unsupported format,
38 * etc.
39 *
40 * There are 3 categories of vertex elements, which are processed separately:
41 * - per-vertex attribs (stride != 0, instance_divisor == 0)
42 * - instanced attribs (stride != 0, instance_divisor > 0)
43 * - constant attribs (stride == 0)
44 *
45 * All needed uploads and translations are performed every draw command, but
46 * only the subset of vertices needed for that draw command is uploaded or
47 * translated. (the module never translates whole buffers)
48 *
49 *
50 * The module consists of two main parts:
51 *
52 *
53 * 1) Translate (u_vbuf_translate_begin/end)
54 *
55 * This is pretty much a vertex fetch fallback. It translates vertices from
56 * one vertex buffer to another in an unused vertex buffer slot. It does
57 * whatever is needed to make the vertices readable by the hardware (changes
58 * vertex formats and aligns offsets and strides). The translate module is
59 * used here.
60 *
61 * Each of the 3 categories is translated to a separate buffer.
62 * Only the [min_index, max_index] range is translated. For instanced attribs,
63 * the range is [start_instance, start_instance+instance_count]. For constant
64 * attribs, the range is [0, 1].
65 *
66 *
67 * 2) User buffer uploading (u_vbuf_upload_buffers)
68 *
69 * Only the [min_index, max_index] range is uploaded (just like Translate)
70 * with a single memcpy.
71 *
72 * This method works best for non-indexed draw operations or indexed draw
73 * operations where the [min_index, max_index] range is not being way bigger
74 * than the vertex count.
75 *
76 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
77 * the per-vertex attribs are uploaded via the translate module, all packed
78 * into one vertex buffer, and the indexed draw call is turned into
79 * a non-indexed one in the process. This adds additional complexity
80 * to the translate part, but it prevents bad apps from bringing your frame
81 * rate down.
82 *
83 *
84 * If there is nothing to do, it forwards every command to the driver.
85 * The module also has its own CSO cache of vertex element states.
86 */
87
88#include "util/u_vbuf.h"
89
90#include "util/u_dump.h"
91#include "util/format/u_format.h"
92#include "util/u_helpers.h"
93#include "util/u_inlines.h"
94#include "util/u_memory.h"
95#include "indices/u_primconvert.h"
96#include "util/u_prim_restart.h"
97#include "util/u_screen.h"
98#include "util/u_upload_mgr.h"
99#include "translate/translate.h"
100#include "translate/translate_cache.h"
101#include "cso_cache/cso_cache.h"
102#include "cso_cache/cso_hash.h"
103
104struct u_vbuf_elements {
105   unsigned count;
106   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
107
108   unsigned src_format_size[PIPE_MAX_ATTRIBS];
109
110   /* If (velem[i].src_format != native_format[i]), the vertex buffer
111    * referenced by the vertex element cannot be used for rendering and
112    * its vertex data must be translated to native_format[i]. */
113   enum pipe_format native_format[PIPE_MAX_ATTRIBS];
114   unsigned native_format_size[PIPE_MAX_ATTRIBS];
115
116   /* Which buffers are used by the vertex element state. */
117   uint32_t used_vb_mask;
118   /* This might mean two things:
119    * - src_format != native_format, as discussed above.
120    * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
121   uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
122   /* Which buffer has at least one vertex element referencing it
123    * incompatible. */
124   uint32_t incompatible_vb_mask_any;
125   /* Which buffer has all vertex elements referencing it incompatible. */
126   uint32_t incompatible_vb_mask_all;
127   /* Which buffer has at least one vertex element referencing it
128    * compatible. */
129   uint32_t compatible_vb_mask_any;
130   /* Which buffer has all vertex elements referencing it compatible. */
131   uint32_t compatible_vb_mask_all;
132
133   /* Which buffer has at least one vertex element referencing it
134    * non-instanced. */
135   uint32_t noninstance_vb_mask_any;
136
137   /* Which buffers are used by multiple vertex attribs. */
138   uint32_t interleaved_vb_mask;
139
140   void *driver_cso;
141};
142
143enum {
144   VB_VERTEX = 0,
145   VB_INSTANCE = 1,
146   VB_CONST = 2,
147   VB_NUM = 3
148};
149
150struct u_vbuf {
151   struct u_vbuf_caps caps;
152   bool has_signed_vb_offset;
153
154   struct pipe_context *pipe;
155   struct translate_cache *translate_cache;
156   struct cso_cache cso_cache;
157
158   struct primconvert_context *pc;
159   bool flatshade_first;
160
161   /* This is what was set in set_vertex_buffers.
162    * May contain user buffers. */
163   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
164   uint32_t enabled_vb_mask;
165
166   /* Vertex buffers for the driver.
167    * There are usually no user buffers. */
168   struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
169   uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
170                                   call of set_vertex_buffers */
171
172   /* Vertex elements. */
173   struct u_vbuf_elements *ve, *ve_saved;
174
175   /* Vertex elements used for the translate fallback. */
176   struct cso_velems_state fallback_velems;
177   /* If non-NULL, this is a vertex element state used for the translate
178    * fallback and therefore used for rendering too. */
179   boolean using_translate;
180   /* The vertex buffer slot index where translated vertices have been
181    * stored in. */
182   unsigned fallback_vbs[VB_NUM];
183   unsigned fallback_vbs_mask;
184
185   /* Which buffer is a user buffer. */
186   uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
187   /* Which buffer is incompatible (unaligned). */
188   uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
189   /* Which buffer has a non-zero stride. */
190   uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
191   /* Which buffers are allowed (supported by hardware). */
192   uint32_t allowed_vb_mask;
193};
194
195static void *
196u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
197                              const struct pipe_vertex_element *attribs);
198static void u_vbuf_delete_vertex_elements(void *ctx, void *state,
199                                          enum cso_cache_type type);
200
201static const struct {
202   enum pipe_format from, to;
203} vbuf_format_fallbacks[] = {
204   { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
205   { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
206   { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
207   { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
208   { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
209   { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
210   { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
211   { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
212   { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
213   { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
214   { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
215   { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
216   { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
217   { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
218   { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
219   { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
220   { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
221   { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
222   { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
223   { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
224   { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
225   { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
226   { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
227   { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
228   { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
229   { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
230   { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
231   { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
232   { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
233   { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
234   { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
235   { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
236   { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
237   { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
238   { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
239   { PIPE_FORMAT_R16G16B16_SINT,       PIPE_FORMAT_R32G32B32_SINT },
240   { PIPE_FORMAT_R16G16B16_UINT,       PIPE_FORMAT_R32G32B32_UINT },
241   { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
242   { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
243   { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
244   { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
245   { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
246   { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
247   { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
248   { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
249   { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
250   { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
251   { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
252   { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
253   { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
254   { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
255   { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
256   { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
257   { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
258   { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
259   { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
260   { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
261   { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
262   { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
263   { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
264   { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
265   { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
266};
267
268void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
269                     bool needs64b)
270{
271   unsigned i;
272
273   memset(caps, 0, sizeof(*caps));
274
275   /* I'd rather have a bitfield of which formats are supported and a static
276    * table of the translations indexed by format, but since we don't have C99
277    * we can't easily make a sparsely-populated table indexed by format.  So,
278    * we construct the sparse table here.
279    */
280   for (i = 0; i < PIPE_FORMAT_COUNT; i++)
281      caps->format_translation[i] = i;
282
283   for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
284      enum pipe_format format = vbuf_format_fallbacks[i].from;
285      unsigned comp_bits = util_format_get_component_bits(format, 0, 0);
286
287      if ((comp_bits > 32) && !needs64b)
288         continue;
289
290      if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0,
291                                       PIPE_BIND_VERTEX_BUFFER)) {
292         caps->format_translation[format] = vbuf_format_fallbacks[i].to;
293         caps->fallback_always = true;
294      }
295   }
296
297   caps->buffer_offset_unaligned =
298      !screen->get_param(screen,
299                         PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
300   caps->buffer_stride_unaligned =
301     !screen->get_param(screen,
302                        PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
303   caps->velem_src_offset_unaligned =
304      !screen->get_param(screen,
305                         PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
306   caps->user_vertex_buffers =
307      screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
308   caps->max_vertex_buffers =
309      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
310
311   if (screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART) ||
312       screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX)) {
313      caps->rewrite_restart_index = screen->get_param(screen, PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART);
314      caps->supported_restart_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART);
315      caps->supported_restart_modes |= BITFIELD_BIT(PIPE_PRIM_PATCHES);
316      if (caps->supported_restart_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
317         caps->fallback_always = true;
318      caps->fallback_always |= caps->rewrite_restart_index;
319   }
320   caps->supported_prim_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES);
321   if (caps->supported_prim_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
322      caps->fallback_always = true;
323
324   if (!screen->is_format_supported(screen, PIPE_FORMAT_R8_UINT, PIPE_BUFFER, 0, 0, PIPE_BIND_INDEX_BUFFER))
325      caps->fallback_always = caps->rewrite_ubyte_ibs = true;
326
327   /* OpenGL 2.0 requires a minimum of 16 vertex buffers */
328   if (caps->max_vertex_buffers < 16)
329      caps->fallback_always = true;
330
331   if (!caps->buffer_offset_unaligned ||
332       !caps->buffer_stride_unaligned ||
333       !caps->velem_src_offset_unaligned)
334      caps->fallback_always = true;
335
336   if (!caps->fallback_always && !caps->user_vertex_buffers)
337      caps->fallback_only_for_user_vbuffers = true;
338}
339
340struct u_vbuf *
341u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps)
342{
343   struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
344
345   mgr->caps = *caps;
346   mgr->pipe = pipe;
347   if (caps->rewrite_ubyte_ibs || caps->rewrite_restart_index ||
348       /* require all but patches */
349       ((caps->supported_prim_modes & caps->supported_restart_modes & BITFIELD_MASK(PIPE_PRIM_MAX))) !=
350                                      BITFIELD_MASK(PIPE_PRIM_MAX)) {
351      struct primconvert_config cfg;
352      cfg.fixed_prim_restart = caps->rewrite_restart_index;
353      cfg.primtypes_mask = caps->supported_prim_modes;
354      cfg.restart_primtypes_mask = caps->supported_restart_modes;
355      mgr->pc = util_primconvert_create_config(pipe, &cfg);
356   }
357   mgr->translate_cache = translate_cache_create();
358   memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
359   mgr->allowed_vb_mask = u_bit_consecutive(0, mgr->caps.max_vertex_buffers);
360
361   mgr->has_signed_vb_offset =
362      pipe->screen->get_param(pipe->screen,
363                              PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET);
364
365   cso_cache_init(&mgr->cso_cache, pipe);
366   cso_cache_set_delete_cso_callback(&mgr->cso_cache,
367                                     u_vbuf_delete_vertex_elements, pipe);
368
369   return mgr;
370}
371
372/* u_vbuf uses its own caching for vertex elements, because it needs to keep
373 * its own preprocessed state per vertex element CSO. */
374static struct u_vbuf_elements *
375u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr,
376                                    const struct cso_velems_state *velems)
377{
378   struct pipe_context *pipe = mgr->pipe;
379   unsigned key_size, hash_key;
380   struct cso_hash_iter iter;
381   struct u_vbuf_elements *ve;
382
383   /* need to include the count into the stored state data too. */
384   key_size = sizeof(struct pipe_vertex_element) * velems->count +
385              sizeof(unsigned);
386   hash_key = cso_construct_key((void*)velems, key_size);
387   iter = cso_find_state_template(&mgr->cso_cache, hash_key, CSO_VELEMENTS,
388                                  (void*)velems, key_size);
389
390   if (cso_hash_iter_is_null(iter)) {
391      struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
392      memcpy(&cso->state, velems, key_size);
393      cso->data = u_vbuf_create_vertex_elements(mgr, velems->count,
394                                                velems->velems);
395
396      iter = cso_insert_state(&mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
397      ve = cso->data;
398   } else {
399      ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
400   }
401
402   assert(ve);
403
404   if (ve != mgr->ve)
405      pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
406
407   return ve;
408}
409
410void u_vbuf_set_vertex_elements(struct u_vbuf *mgr,
411                                const struct cso_velems_state *velems)
412{
413   mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, velems);
414}
415
416void u_vbuf_set_flatshade_first(struct u_vbuf *mgr, bool flatshade_first)
417{
418   mgr->flatshade_first = flatshade_first;
419}
420
421void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr)
422{
423   mgr->ve = NULL;
424}
425
426void u_vbuf_destroy(struct u_vbuf *mgr)
427{
428   struct pipe_screen *screen = mgr->pipe->screen;
429   unsigned i;
430   const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
431                                                    PIPE_SHADER_CAP_MAX_INPUTS);
432
433   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, 0, num_vb, false, NULL);
434
435   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
436      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]);
437   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
438      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]);
439
440   if (mgr->pc)
441      util_primconvert_destroy(mgr->pc);
442
443   translate_cache_destroy(mgr->translate_cache);
444   cso_cache_delete(&mgr->cso_cache);
445   FREE(mgr);
446}
447
448static enum pipe_error
449u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
450                         const struct pipe_draw_info *info,
451                         const struct pipe_draw_start_count_bias *draw,
452                         unsigned vb_mask, unsigned out_vb,
453                         int start_vertex, unsigned num_vertices,
454                         int min_index, boolean unroll_indices)
455{
456   struct translate *tr;
457   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
458   struct pipe_resource *out_buffer = NULL;
459   uint8_t *out_map;
460   unsigned out_offset, mask;
461
462   /* Get a translate object. */
463   tr = translate_cache_find(mgr->translate_cache, key);
464
465   /* Map buffers we want to translate. */
466   mask = vb_mask;
467   while (mask) {
468      struct pipe_vertex_buffer *vb;
469      unsigned offset;
470      uint8_t *map;
471      unsigned i = u_bit_scan(&mask);
472
473      vb = &mgr->vertex_buffer[i];
474      offset = vb->buffer_offset + vb->stride * start_vertex;
475
476      if (vb->is_user_buffer) {
477         map = (uint8_t*)vb->buffer.user + offset;
478      } else {
479         unsigned size = vb->stride ? num_vertices * vb->stride
480                                    : sizeof(double)*4;
481
482         if (!vb->buffer.resource) {
483            static uint64_t dummy_buf[4] = { 0 };
484            tr->set_buffer(tr, i, dummy_buf, 0, 0);
485            continue;
486         }
487
488         if (vb->stride) {
489            /* the stride cannot be used to calculate the map size of the buffer,
490             * as it only determines the bytes between elements, not the size of elements
491             * themselves, meaning that if stride < element_size, the mapped size will
492             * be too small and conversion will overrun the map buffer
493             *
494             * instead, add the size of the largest possible attribute to the final attribute's offset
495             * in order to ensure the map is large enough
496             */
497            unsigned last_offset = size - vb->stride;
498            size = MAX2(size, last_offset + sizeof(double)*4);
499         }
500
501         if (offset + size > vb->buffer.resource->width0) {
502            /* Don't try to map past end of buffer.  This often happens when
503             * we're translating an attribute that's at offset > 0 from the
504             * start of the vertex.  If we'd subtract attrib's offset from
505             * the size, this probably wouldn't happen.
506             */
507            size = vb->buffer.resource->width0 - offset;
508
509            /* Also adjust num_vertices.  A common user error is to call
510             * glDrawRangeElements() with incorrect 'end' argument.  The 'end
511             * value should be the max index value, but people often
512             * accidentally add one to this value.  This adjustment avoids
513             * crashing (by reading past the end of a hardware buffer mapping)
514             * when people do that.
515             */
516            num_vertices = (size + vb->stride - 1) / vb->stride;
517         }
518
519         map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size,
520                                     PIPE_MAP_READ, &vb_transfer[i]);
521      }
522
523      /* Subtract min_index so that indexing with the index buffer works. */
524      if (unroll_indices) {
525         map -= (ptrdiff_t)vb->stride * min_index;
526      }
527
528      tr->set_buffer(tr, i, map, vb->stride, info->max_index);
529   }
530
531   /* Translate. */
532   if (unroll_indices) {
533      struct pipe_transfer *transfer = NULL;
534      const unsigned offset = draw->start * info->index_size;
535      uint8_t *map;
536
537      /* Create and map the output buffer. */
538      u_upload_alloc(mgr->pipe->stream_uploader, 0,
539                     key->output_stride * draw->count, 4,
540                     &out_offset, &out_buffer,
541                     (void**)&out_map);
542      if (!out_buffer)
543         return PIPE_ERROR_OUT_OF_MEMORY;
544
545      if (info->has_user_indices) {
546         map = (uint8_t*)info->index.user + offset;
547      } else {
548         map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset,
549                                     draw->count * info->index_size,
550                                     PIPE_MAP_READ, &transfer);
551      }
552
553      switch (info->index_size) {
554      case 4:
555         tr->run_elts(tr, (unsigned*)map, draw->count, 0, 0, out_map);
556         break;
557      case 2:
558         tr->run_elts16(tr, (uint16_t*)map, draw->count, 0, 0, out_map);
559         break;
560      case 1:
561         tr->run_elts8(tr, map, draw->count, 0, 0, out_map);
562         break;
563      }
564
565      if (transfer) {
566         pipe_buffer_unmap(mgr->pipe, transfer);
567      }
568   } else {
569      /* Create and map the output buffer. */
570      u_upload_alloc(mgr->pipe->stream_uploader,
571                     mgr->has_signed_vb_offset ?
572                        0 : key->output_stride * start_vertex,
573                     key->output_stride * num_vertices, 4,
574                     &out_offset, &out_buffer,
575                     (void**)&out_map);
576      if (!out_buffer)
577         return PIPE_ERROR_OUT_OF_MEMORY;
578
579      out_offset -= key->output_stride * start_vertex;
580
581      tr->run(tr, 0, num_vertices, 0, 0, out_map);
582   }
583
584   /* Unmap all buffers. */
585   mask = vb_mask;
586   while (mask) {
587      unsigned i = u_bit_scan(&mask);
588
589      if (vb_transfer[i]) {
590         pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
591      }
592   }
593
594   /* Setup the new vertex buffer. */
595   mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
596   mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
597
598   /* Move the buffer reference. */
599   pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
600   mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
601   mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
602
603   return PIPE_OK;
604}
605
606static boolean
607u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
608                                    unsigned mask[VB_NUM])
609{
610   unsigned type;
611   unsigned fallback_vbs[VB_NUM];
612   /* Set the bit for each buffer which is incompatible, or isn't set. */
613   uint32_t unused_vb_mask =
614      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
615      ~mgr->enabled_vb_mask;
616   uint32_t unused_vb_mask_orig;
617   boolean insufficient_buffers = false;
618
619   /* No vertex buffers available at all */
620   if (!unused_vb_mask)
621      return FALSE;
622
623   memset(fallback_vbs, ~0, sizeof(fallback_vbs));
624   mgr->fallback_vbs_mask = 0;
625
626   /* Find free slots for each type if needed. */
627   unused_vb_mask_orig = unused_vb_mask;
628   for (type = 0; type < VB_NUM; type++) {
629      if (mask[type]) {
630         uint32_t index;
631
632         if (!unused_vb_mask) {
633            insufficient_buffers = true;
634            break;
635         }
636
637         index = ffs(unused_vb_mask) - 1;
638         fallback_vbs[type] = index;
639         mgr->fallback_vbs_mask |= 1 << index;
640         unused_vb_mask &= ~(1 << index);
641         /*printf("found slot=%i for type=%i\n", index, type);*/
642      }
643   }
644
645   if (insufficient_buffers) {
646      /* not enough vbs for all types supported by the hardware, they will have to share one
647       * buffer */
648      uint32_t index = ffs(unused_vb_mask_orig) - 1;
649      /* When sharing one vertex buffer use per-vertex frequency for everything. */
650      fallback_vbs[VB_VERTEX] = index;
651      mgr->fallback_vbs_mask = 1 << index;
652      mask[VB_VERTEX] = mask[VB_VERTEX] | mask[VB_CONST] | mask[VB_INSTANCE];
653      mask[VB_CONST] = 0;
654      mask[VB_INSTANCE] = 0;
655   }
656
657   for (type = 0; type < VB_NUM; type++) {
658      if (mask[type]) {
659         mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
660      }
661   }
662
663   memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
664   return TRUE;
665}
666
667static boolean
668u_vbuf_translate_begin(struct u_vbuf *mgr,
669                       const struct pipe_draw_info *info,
670                       const struct pipe_draw_start_count_bias *draw,
671                       int start_vertex, unsigned num_vertices,
672                       int min_index, boolean unroll_indices)
673{
674   unsigned mask[VB_NUM] = {0};
675   struct translate_key key[VB_NUM];
676   unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
677   unsigned i, type;
678   const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
679                                         mgr->ve->used_vb_mask;
680
681   const int start[VB_NUM] = {
682      start_vertex,           /* VERTEX */
683      info->start_instance,   /* INSTANCE */
684      0                       /* CONST */
685   };
686
687   const unsigned num[VB_NUM] = {
688      num_vertices,           /* VERTEX */
689      info->instance_count,   /* INSTANCE */
690      1                       /* CONST */
691   };
692
693   memset(key, 0, sizeof(key));
694   memset(elem_index, ~0, sizeof(elem_index));
695
696   /* See if there are vertex attribs of each type to translate and
697    * which ones. */
698   for (i = 0; i < mgr->ve->count; i++) {
699      unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
700
701      if (!mgr->vertex_buffer[vb_index].stride) {
702         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
703             !(incompatible_vb_mask & (1 << vb_index))) {
704            continue;
705         }
706         mask[VB_CONST] |= 1 << vb_index;
707      } else if (mgr->ve->ve[i].instance_divisor) {
708         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
709             !(incompatible_vb_mask & (1 << vb_index))) {
710            continue;
711         }
712         mask[VB_INSTANCE] |= 1 << vb_index;
713      } else {
714         if (!unroll_indices &&
715             !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
716             !(incompatible_vb_mask & (1 << vb_index))) {
717            continue;
718         }
719         mask[VB_VERTEX] |= 1 << vb_index;
720      }
721   }
722
723   assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
724
725   /* Find free vertex buffer slots. */
726   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
727      return FALSE;
728   }
729
730   /* Initialize the translate keys. */
731   for (i = 0; i < mgr->ve->count; i++) {
732      struct translate_key *k;
733      struct translate_element *te;
734      enum pipe_format output_format = mgr->ve->native_format[i];
735      unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
736      bit = 1 << vb_index;
737
738      if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
739          !(incompatible_vb_mask & (1 << vb_index)) &&
740          (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
741         continue;
742      }
743
744      /* Set type to what we will translate.
745       * Whether vertex, instance, or constant attribs. */
746      for (type = 0; type < VB_NUM; type++) {
747         if (mask[type] & bit) {
748            break;
749         }
750      }
751      assert(type < VB_NUM);
752      if (mgr->ve->ve[i].src_format != output_format)
753         assert(translate_is_output_format_supported(output_format));
754      /*printf("velem=%i type=%i\n", i, type);*/
755
756      /* Add the vertex element. */
757      k = &key[type];
758      elem_index[type][i] = k->nr_elements;
759
760      te = &k->element[k->nr_elements];
761      te->type = TRANSLATE_ELEMENT_NORMAL;
762      te->instance_divisor = 0;
763      te->input_buffer = vb_index;
764      te->input_format = mgr->ve->ve[i].src_format;
765      te->input_offset = mgr->ve->ve[i].src_offset;
766      te->output_format = output_format;
767      te->output_offset = k->output_stride;
768
769      k->output_stride += mgr->ve->native_format_size[i];
770      k->nr_elements++;
771   }
772
773   /* Translate buffers. */
774   for (type = 0; type < VB_NUM; type++) {
775      if (key[type].nr_elements) {
776         enum pipe_error err;
777         err = u_vbuf_translate_buffers(mgr, &key[type], info, draw,
778                                        mask[type], mgr->fallback_vbs[type],
779                                        start[type], num[type], min_index,
780                                        unroll_indices && type == VB_VERTEX);
781         if (err != PIPE_OK)
782            return FALSE;
783
784         /* Fixup the stride for constant attribs. */
785         if (type == VB_CONST) {
786            mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
787         }
788      }
789   }
790
791   /* Setup new vertex elements. */
792   for (i = 0; i < mgr->ve->count; i++) {
793      for (type = 0; type < VB_NUM; type++) {
794         if (elem_index[type][i] < key[type].nr_elements) {
795            struct translate_element *te = &key[type].element[elem_index[type][i]];
796            mgr->fallback_velems.velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
797            mgr->fallback_velems.velems[i].src_format = te->output_format;
798            mgr->fallback_velems.velems[i].src_offset = te->output_offset;
799            mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
800
801            /* elem_index[type][i] can only be set for one type. */
802            assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
803            assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
804            break;
805         }
806      }
807      /* No translating, just copy the original vertex element over. */
808      if (type == VB_NUM) {
809         memcpy(&mgr->fallback_velems.velems[i], &mgr->ve->ve[i],
810                sizeof(struct pipe_vertex_element));
811      }
812   }
813
814   mgr->fallback_velems.count = mgr->ve->count;
815
816   u_vbuf_set_vertex_elements_internal(mgr, &mgr->fallback_velems);
817   mgr->using_translate = TRUE;
818   return TRUE;
819}
820
821static void u_vbuf_translate_end(struct u_vbuf *mgr)
822{
823   unsigned i;
824
825   /* Restore vertex elements. */
826   mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
827   mgr->using_translate = FALSE;
828
829   /* Unreference the now-unused VBOs. */
830   for (i = 0; i < VB_NUM; i++) {
831      unsigned vb = mgr->fallback_vbs[i];
832      if (vb != ~0u) {
833         pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL);
834         mgr->fallback_vbs[i] = ~0;
835      }
836   }
837   /* This will cause the buffer to be unbound in the driver later. */
838   mgr->dirty_real_vb_mask |= mgr->fallback_vbs_mask;
839   mgr->fallback_vbs_mask = 0;
840}
841
842static void *
843u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
844                              const struct pipe_vertex_element *attribs)
845{
846   struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS];
847   util_lower_uint64_vertex_elements(&attribs, &count, tmp);
848
849   struct pipe_context *pipe = mgr->pipe;
850   unsigned i;
851   struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
852   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
853   uint32_t used_buffers = 0;
854
855   ve->count = count;
856
857   memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
858   memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
859
860   /* Set the best native format in case the original format is not
861    * supported. */
862   for (i = 0; i < count; i++) {
863      enum pipe_format format = ve->ve[i].src_format;
864      unsigned vb_index_bit = 1 << ve->ve[i].vertex_buffer_index;
865
866      ve->src_format_size[i] = util_format_get_blocksize(format);
867
868      if (used_buffers & vb_index_bit)
869         ve->interleaved_vb_mask |= vb_index_bit;
870
871      used_buffers |= vb_index_bit;
872
873      if (!ve->ve[i].instance_divisor) {
874         ve->noninstance_vb_mask_any |= vb_index_bit;
875      }
876
877      format = mgr->caps.format_translation[format];
878
879      driver_attribs[i].src_format = format;
880      ve->native_format[i] = format;
881      ve->native_format_size[i] =
882            util_format_get_blocksize(ve->native_format[i]);
883
884      if (ve->ve[i].src_format != format ||
885          (!mgr->caps.velem_src_offset_unaligned &&
886           ve->ve[i].src_offset % 4 != 0)) {
887         ve->incompatible_elem_mask |= 1 << i;
888         ve->incompatible_vb_mask_any |= vb_index_bit;
889      } else {
890         ve->compatible_vb_mask_any |= vb_index_bit;
891      }
892   }
893
894   if (used_buffers & ~mgr->allowed_vb_mask) {
895      /* More vertex buffers are used than the hardware supports.  In
896       * principle, we only need to make sure that less vertex buffers are
897       * used, and mark some of the latter vertex buffers as incompatible.
898       * For now, mark all vertex buffers as incompatible.
899       */
900      ve->incompatible_vb_mask_any = used_buffers;
901      ve->compatible_vb_mask_any = 0;
902      ve->incompatible_elem_mask = u_bit_consecutive(0, count);
903   }
904
905   ve->used_vb_mask = used_buffers;
906   ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
907   ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
908
909   /* Align the formats and offsets to the size of DWORD if needed. */
910   if (!mgr->caps.velem_src_offset_unaligned) {
911      for (i = 0; i < count; i++) {
912         ve->native_format_size[i] = align(ve->native_format_size[i], 4);
913         driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
914      }
915   }
916
917   /* Only create driver CSO if no incompatible elements */
918   if (!ve->incompatible_elem_mask) {
919      ve->driver_cso =
920         pipe->create_vertex_elements_state(pipe, count, driver_attribs);
921   }
922
923   return ve;
924}
925
926static void u_vbuf_delete_vertex_elements(void *ctx, void *state,
927                                          enum cso_cache_type type)
928{
929   struct pipe_context *pipe = (struct pipe_context*)ctx;
930   struct cso_velements *cso = (struct cso_velements*)state;
931   struct u_vbuf_elements *ve = (struct u_vbuf_elements*)cso->data;
932
933   if (ve->driver_cso)
934      pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
935   FREE(ve);
936   FREE(cso);
937}
938
939void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
940                               unsigned start_slot, unsigned count,
941                               unsigned unbind_num_trailing_slots,
942                               bool take_ownership,
943                               const struct pipe_vertex_buffer *bufs)
944{
945   unsigned i;
946   /* which buffers are enabled */
947   uint32_t enabled_vb_mask = 0;
948   /* which buffers are in user memory */
949   uint32_t user_vb_mask = 0;
950   /* which buffers are incompatible with the driver */
951   uint32_t incompatible_vb_mask = 0;
952   /* which buffers have a non-zero stride */
953   uint32_t nonzero_stride_vb_mask = 0;
954   const uint32_t mask =
955      ~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot);
956
957   /* Zero out the bits we are going to rewrite completely. */
958   mgr->user_vb_mask &= mask;
959   mgr->incompatible_vb_mask &= mask;
960   mgr->nonzero_stride_vb_mask &= mask;
961   mgr->enabled_vb_mask &= mask;
962
963   if (!bufs) {
964      struct pipe_context *pipe = mgr->pipe;
965      /* Unbind. */
966      unsigned total_count = count + unbind_num_trailing_slots;
967      mgr->dirty_real_vb_mask &= mask;
968
969      for (i = 0; i < total_count; i++) {
970         unsigned dst_index = start_slot + i;
971
972         pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
973         pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
974      }
975
976      pipe->set_vertex_buffers(pipe, start_slot, count,
977                               unbind_num_trailing_slots, false, NULL);
978      return;
979   }
980
981   for (i = 0; i < count; i++) {
982      unsigned dst_index = start_slot + i;
983      const struct pipe_vertex_buffer *vb = &bufs[i];
984      struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
985      struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
986
987      if (!vb->buffer.resource) {
988         pipe_vertex_buffer_unreference(orig_vb);
989         pipe_vertex_buffer_unreference(real_vb);
990         continue;
991      }
992
993      if (take_ownership) {
994         pipe_vertex_buffer_unreference(orig_vb);
995         memcpy(orig_vb, vb, sizeof(*vb));
996      } else {
997         pipe_vertex_buffer_reference(orig_vb, vb);
998      }
999
1000      if (vb->stride) {
1001         nonzero_stride_vb_mask |= 1 << dst_index;
1002      }
1003      enabled_vb_mask |= 1 << dst_index;
1004
1005      if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
1006          (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
1007         incompatible_vb_mask |= 1 << dst_index;
1008         real_vb->buffer_offset = vb->buffer_offset;
1009         real_vb->stride = vb->stride;
1010         pipe_vertex_buffer_unreference(real_vb);
1011         real_vb->is_user_buffer = false;
1012         continue;
1013      }
1014
1015      if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
1016         user_vb_mask |= 1 << dst_index;
1017         real_vb->buffer_offset = vb->buffer_offset;
1018         real_vb->stride = vb->stride;
1019         pipe_vertex_buffer_unreference(real_vb);
1020         real_vb->is_user_buffer = false;
1021         continue;
1022      }
1023
1024      pipe_vertex_buffer_reference(real_vb, vb);
1025   }
1026
1027   for (i = 0; i < unbind_num_trailing_slots; i++) {
1028      unsigned dst_index = start_slot + count + i;
1029
1030      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
1031      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
1032   }
1033
1034   mgr->user_vb_mask |= user_vb_mask;
1035   mgr->incompatible_vb_mask |= incompatible_vb_mask;
1036   mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
1037   mgr->enabled_vb_mask |= enabled_vb_mask;
1038
1039   /* All changed buffers are marked as dirty, even the NULL ones,
1040    * which will cause the NULL buffers to be unbound in the driver later. */
1041   mgr->dirty_real_vb_mask |= ~mask;
1042}
1043
1044static ALWAYS_INLINE bool
1045get_upload_offset_size(struct u_vbuf *mgr,
1046                       const struct pipe_vertex_buffer *vb,
1047                       struct u_vbuf_elements *ve,
1048                       const struct pipe_vertex_element *velem,
1049                       unsigned vb_index, unsigned velem_index,
1050                       int start_vertex, unsigned num_vertices,
1051                       int start_instance, unsigned num_instances,
1052                       unsigned *offset, unsigned *size)
1053{
1054   /* Skip the buffers generated by translate. */
1055   if ((1 << vb_index) & mgr->fallback_vbs_mask || !vb->is_user_buffer)
1056      return false;
1057
1058   unsigned instance_div = velem->instance_divisor;
1059   *offset = vb->buffer_offset + velem->src_offset;
1060
1061   if (!vb->stride) {
1062      /* Constant attrib. */
1063      *size = ve->src_format_size[velem_index];
1064   } else if (instance_div) {
1065      /* Per-instance attrib. */
1066
1067      /* Figure out how many instances we'll render given instance_div.  We
1068       * can't use the typical div_round_up() pattern because the CTS uses
1069       * instance_div = ~0 for a test, which overflows div_round_up()'s
1070       * addition.
1071       */
1072      unsigned count = num_instances / instance_div;
1073      if (count * instance_div != num_instances)
1074         count++;
1075
1076      *offset += vb->stride * start_instance;
1077      *size = vb->stride * (count - 1) + ve->src_format_size[velem_index];
1078   } else {
1079      /* Per-vertex attrib. */
1080      *offset += vb->stride * start_vertex;
1081      *size = vb->stride * (num_vertices - 1) + ve->src_format_size[velem_index];
1082   }
1083   return true;
1084}
1085
1086
1087static enum pipe_error
1088u_vbuf_upload_buffers(struct u_vbuf *mgr,
1089                      int start_vertex, unsigned num_vertices,
1090                      int start_instance, unsigned num_instances)
1091{
1092   unsigned i;
1093   struct u_vbuf_elements *ve = mgr->ve;
1094   unsigned nr_velems = ve->count;
1095   const struct pipe_vertex_element *velems =
1096         mgr->using_translate ? mgr->fallback_velems.velems : ve->ve;
1097
1098   /* Faster path when no vertex attribs are interleaved. */
1099   if ((ve->interleaved_vb_mask & mgr->user_vb_mask) == 0) {
1100      for (i = 0; i < nr_velems; i++) {
1101         const struct pipe_vertex_element *velem = &velems[i];
1102         unsigned index = velem->vertex_buffer_index;
1103         struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
1104         unsigned offset, size;
1105
1106         if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
1107                                     num_vertices, start_instance, num_instances,
1108                                     &offset, &size))
1109            continue;
1110
1111         struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[index];
1112         const uint8_t *ptr = mgr->vertex_buffer[index].buffer.user;
1113
1114         u_upload_data(mgr->pipe->stream_uploader,
1115                       mgr->has_signed_vb_offset ? 0 : offset,
1116                       size, 4, ptr + offset, &real_vb->buffer_offset,
1117                       &real_vb->buffer.resource);
1118         if (!real_vb->buffer.resource)
1119            return PIPE_ERROR_OUT_OF_MEMORY;
1120
1121         real_vb->buffer_offset -= offset;
1122      }
1123      return PIPE_OK;
1124   }
1125
1126   unsigned start_offset[PIPE_MAX_ATTRIBS];
1127   unsigned end_offset[PIPE_MAX_ATTRIBS];
1128   uint32_t buffer_mask = 0;
1129
1130   /* Slower path supporting interleaved vertex attribs using 2 loops. */
1131   /* Determine how much data needs to be uploaded. */
1132   for (i = 0; i < nr_velems; i++) {
1133      const struct pipe_vertex_element *velem = &velems[i];
1134      unsigned index = velem->vertex_buffer_index;
1135      struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
1136      unsigned first, size, index_bit;
1137
1138      if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
1139                                  num_vertices, start_instance, num_instances,
1140                                  &first, &size))
1141         continue;
1142
1143      index_bit = 1 << index;
1144
1145      /* Update offsets. */
1146      if (!(buffer_mask & index_bit)) {
1147         start_offset[index] = first;
1148         end_offset[index] = first + size;
1149      } else {
1150         if (first < start_offset[index])
1151            start_offset[index] = first;
1152         if (first + size > end_offset[index])
1153            end_offset[index] = first + size;
1154      }
1155
1156      buffer_mask |= index_bit;
1157   }
1158
1159   /* Upload buffers. */
1160   while (buffer_mask) {
1161      unsigned start, end;
1162      struct pipe_vertex_buffer *real_vb;
1163      const uint8_t *ptr;
1164
1165      i = u_bit_scan(&buffer_mask);
1166
1167      start = start_offset[i];
1168      end = end_offset[i];
1169      assert(start < end);
1170
1171      real_vb = &mgr->real_vertex_buffer[i];
1172      ptr = mgr->vertex_buffer[i].buffer.user;
1173
1174      u_upload_data(mgr->pipe->stream_uploader,
1175                    mgr->has_signed_vb_offset ? 0 : start,
1176                    end - start, 4,
1177                    ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource);
1178      if (!real_vb->buffer.resource)
1179         return PIPE_ERROR_OUT_OF_MEMORY;
1180
1181      real_vb->buffer_offset -= start;
1182   }
1183
1184   return PIPE_OK;
1185}
1186
1187static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
1188{
1189   /* See if there are any per-vertex attribs which will be uploaded or
1190    * translated. Use bitmasks to get the info instead of looping over vertex
1191    * elements. */
1192   return (mgr->ve->used_vb_mask &
1193           ((mgr->user_vb_mask |
1194             mgr->incompatible_vb_mask |
1195             mgr->ve->incompatible_vb_mask_any) &
1196            mgr->ve->noninstance_vb_mask_any &
1197            mgr->nonzero_stride_vb_mask)) != 0;
1198}
1199
1200static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
1201{
1202   /* Return true if there are hw buffers which don't need to be translated.
1203    *
1204    * We could query whether each buffer is busy, but that would
1205    * be way more costly than this. */
1206   return (mgr->ve->used_vb_mask &
1207           (~mgr->user_vb_mask &
1208            ~mgr->incompatible_vb_mask &
1209            mgr->ve->compatible_vb_mask_all &
1210            mgr->ve->noninstance_vb_mask_any &
1211            mgr->nonzero_stride_vb_mask)) != 0;
1212}
1213
1214static void
1215u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info,
1216                               unsigned count,
1217                               const void *indices, unsigned *out_min_index,
1218                               unsigned *out_max_index)
1219{
1220   if (!count) {
1221      *out_min_index = 0;
1222      *out_max_index = 0;
1223      return;
1224   }
1225
1226   switch (info->index_size) {
1227   case 4: {
1228      const unsigned *ui_indices = (const unsigned*)indices;
1229      unsigned max = 0;
1230      unsigned min = ~0u;
1231      if (info->primitive_restart) {
1232         for (unsigned i = 0; i < count; i++) {
1233            if (ui_indices[i] != info->restart_index) {
1234               if (ui_indices[i] > max) max = ui_indices[i];
1235               if (ui_indices[i] < min) min = ui_indices[i];
1236            }
1237         }
1238      }
1239      else {
1240         for (unsigned i = 0; i < count; i++) {
1241            if (ui_indices[i] > max) max = ui_indices[i];
1242            if (ui_indices[i] < min) min = ui_indices[i];
1243         }
1244      }
1245      *out_min_index = min;
1246      *out_max_index = max;
1247      break;
1248   }
1249   case 2: {
1250      const unsigned short *us_indices = (const unsigned short*)indices;
1251      unsigned short max = 0;
1252      unsigned short min = ~((unsigned short)0);
1253      if (info->primitive_restart) {
1254         for (unsigned i = 0; i < count; i++) {
1255            if (us_indices[i] != info->restart_index) {
1256               if (us_indices[i] > max) max = us_indices[i];
1257               if (us_indices[i] < min) min = us_indices[i];
1258            }
1259         }
1260      }
1261      else {
1262         for (unsigned i = 0; i < count; i++) {
1263            if (us_indices[i] > max) max = us_indices[i];
1264            if (us_indices[i] < min) min = us_indices[i];
1265         }
1266      }
1267      *out_min_index = min;
1268      *out_max_index = max;
1269      break;
1270   }
1271   case 1: {
1272      const unsigned char *ub_indices = (const unsigned char*)indices;
1273      unsigned char max = 0;
1274      unsigned char min = ~((unsigned char)0);
1275      if (info->primitive_restart) {
1276         for (unsigned i = 0; i < count; i++) {
1277            if (ub_indices[i] != info->restart_index) {
1278               if (ub_indices[i] > max) max = ub_indices[i];
1279               if (ub_indices[i] < min) min = ub_indices[i];
1280            }
1281         }
1282      }
1283      else {
1284         for (unsigned i = 0; i < count; i++) {
1285            if (ub_indices[i] > max) max = ub_indices[i];
1286            if (ub_indices[i] < min) min = ub_indices[i];
1287         }
1288      }
1289      *out_min_index = min;
1290      *out_max_index = max;
1291      break;
1292   }
1293   default:
1294      unreachable("bad index size");
1295   }
1296}
1297
1298void u_vbuf_get_minmax_index(struct pipe_context *pipe,
1299                             const struct pipe_draw_info *info,
1300                             const struct pipe_draw_start_count_bias *draw,
1301                             unsigned *out_min_index, unsigned *out_max_index)
1302{
1303   struct pipe_transfer *transfer = NULL;
1304   const void *indices;
1305
1306   if (info->has_user_indices) {
1307      indices = (uint8_t*)info->index.user +
1308                draw->start * info->index_size;
1309   } else {
1310      indices = pipe_buffer_map_range(pipe, info->index.resource,
1311                                      draw->start * info->index_size,
1312                                      draw->count * info->index_size,
1313                                      PIPE_MAP_READ, &transfer);
1314   }
1315
1316   u_vbuf_get_minmax_index_mapped(info, draw->count, indices,
1317                                  out_min_index, out_max_index);
1318
1319   if (transfer) {
1320      pipe_buffer_unmap(pipe, transfer);
1321   }
1322}
1323
1324static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
1325{
1326   struct pipe_context *pipe = mgr->pipe;
1327   unsigned start_slot, count;
1328
1329   start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
1330   count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
1331
1332   if (mgr->dirty_real_vb_mask == mgr->enabled_vb_mask &&
1333       mgr->dirty_real_vb_mask == mgr->user_vb_mask) {
1334      /* Fast path that allows us to transfer the VBO references to the driver
1335       * to skip atomic reference counting there. These are freshly uploaded
1336       * user buffers that can be discarded after this call.
1337       */
1338      pipe->set_vertex_buffers(pipe, start_slot, count, 0, true,
1339                               mgr->real_vertex_buffer + start_slot);
1340
1341      /* We don't own the VBO references now. Set them to NULL. */
1342      for (unsigned i = 0; i < count; i++) {
1343         assert(!mgr->real_vertex_buffer[start_slot + i].is_user_buffer);
1344         mgr->real_vertex_buffer[start_slot + i].buffer.resource = NULL;
1345      }
1346   } else {
1347      /* Slow path where we have to keep VBO references. */
1348      pipe->set_vertex_buffers(pipe, start_slot, count, 0, false,
1349                               mgr->real_vertex_buffer + start_slot);
1350   }
1351   mgr->dirty_real_vb_mask = 0;
1352}
1353
1354static void
1355u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
1356                               unsigned drawid_offset,
1357                               unsigned *indirect_data, unsigned stride,
1358                               unsigned draw_count)
1359{
1360   /* Increase refcount to be able to use take_index_buffer_ownership with
1361    * all draws.
1362    */
1363   if (draw_count > 1 && info->take_index_buffer_ownership)
1364      p_atomic_add(&info->index.resource->reference.count, draw_count - 1);
1365
1366   assert(info->index_size);
1367
1368   for (unsigned i = 0; i < draw_count; i++) {
1369      struct pipe_draw_start_count_bias draw;
1370      unsigned offset = i * stride / 4;
1371
1372      draw.count = indirect_data[offset + 0];
1373      info->instance_count = indirect_data[offset + 1];
1374      draw.start = indirect_data[offset + 2];
1375      draw.index_bias = indirect_data[offset + 3];
1376      info->start_instance = indirect_data[offset + 4];
1377
1378      u_vbuf_draw_vbo(mgr, info, drawid_offset, NULL, draw);
1379   }
1380}
1381
1382void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
1383                     unsigned drawid_offset,
1384                     const struct pipe_draw_indirect_info *indirect,
1385                     const struct pipe_draw_start_count_bias draw)
1386{
1387   struct pipe_context *pipe = mgr->pipe;
1388   int start_vertex;
1389   unsigned min_index;
1390   unsigned num_vertices;
1391   boolean unroll_indices = FALSE;
1392   const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
1393   uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
1394   const uint32_t incompatible_vb_mask =
1395      mgr->incompatible_vb_mask & used_vb_mask;
1396   struct pipe_draw_info new_info;
1397   struct pipe_draw_start_count_bias new_draw;
1398   unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0;
1399
1400   /* Normal draw. No fallback and no user buffers. */
1401   if (!incompatible_vb_mask &&
1402       !mgr->ve->incompatible_elem_mask &&
1403       !user_vb_mask &&
1404       (info->index_size != 1 || !mgr->caps.rewrite_ubyte_ibs) &&
1405       (!info->primitive_restart ||
1406        info->restart_index == fixed_restart_index ||
1407        !mgr->caps.rewrite_restart_index) &&
1408       (!info->primitive_restart || mgr->caps.supported_restart_modes & BITFIELD_BIT(info->mode)) &&
1409       mgr->caps.supported_prim_modes & BITFIELD_BIT(info->mode)) {
1410
1411      /* Set vertex buffers if needed. */
1412      if (mgr->dirty_real_vb_mask & used_vb_mask) {
1413         u_vbuf_set_driver_vertex_buffers(mgr);
1414      }
1415
1416      pipe->draw_vbo(pipe, info, drawid_offset, indirect, &draw, 1);
1417      return;
1418   }
1419
1420   new_info = *info;
1421   new_draw = draw;
1422
1423   /* Handle indirect (multi)draws. */
1424   if (indirect && indirect->buffer) {
1425      unsigned draw_count = 0;
1426
1427      /* Get the number of draws. */
1428      if (indirect->indirect_draw_count) {
1429         pipe_buffer_read(pipe, indirect->indirect_draw_count,
1430                          indirect->indirect_draw_count_offset,
1431                          4, &draw_count);
1432      } else {
1433         draw_count = indirect->draw_count;
1434      }
1435
1436      if (!draw_count)
1437         goto cleanup;
1438
1439      unsigned data_size = (draw_count - 1) * indirect->stride +
1440                           (new_info.index_size ? 20 : 16);
1441      unsigned *data = malloc(data_size);
1442      if (!data)
1443         goto cleanup; /* report an error? */
1444
1445      /* Read the used buffer range only once, because the read can be
1446       * uncached.
1447       */
1448      pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
1449                       data);
1450
1451      if (info->index_size) {
1452         /* Indexed multidraw. */
1453         unsigned index_bias0 = data[3];
1454         bool index_bias_same = true;
1455
1456         /* If we invoke the translate path, we have to split the multidraw. */
1457         if (incompatible_vb_mask ||
1458             mgr->ve->incompatible_elem_mask) {
1459            u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
1460                                           indirect->stride, draw_count);
1461            free(data);
1462            return;
1463         }
1464
1465         /* See if index_bias is the same for all draws. */
1466         for (unsigned i = 1; i < draw_count; i++) {
1467            if (data[i * indirect->stride / 4 + 3] != index_bias0) {
1468               index_bias_same = false;
1469               break;
1470            }
1471         }
1472
1473         /* Split the multidraw if index_bias is different. */
1474         if (!index_bias_same) {
1475            u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
1476                                           indirect->stride, draw_count);
1477            free(data);
1478            return;
1479         }
1480
1481         /* If we don't need to use the translate path and index_bias is
1482          * the same, we can process the multidraw with the time complexity
1483          * equal to 1 draw call (except for the index range computation).
1484          * We only need to compute the index range covering all draw calls
1485          * of the multidraw.
1486          *
1487          * The driver will not look at these values because indirect != NULL.
1488          * These values determine the user buffer bounds to upload.
1489          */
1490         new_draw.index_bias = index_bias0;
1491         new_info.index_bounds_valid = true;
1492         new_info.min_index = ~0u;
1493         new_info.max_index = 0;
1494         new_info.start_instance = ~0u;
1495         unsigned end_instance = 0;
1496
1497         struct pipe_transfer *transfer = NULL;
1498         const uint8_t *indices;
1499
1500         if (info->has_user_indices) {
1501            indices = (uint8_t*)info->index.user;
1502         } else {
1503            indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
1504                                                PIPE_MAP_READ, &transfer);
1505         }
1506
1507         for (unsigned i = 0; i < draw_count; i++) {
1508            unsigned offset = i * indirect->stride / 4;
1509            unsigned start = data[offset + 2];
1510            unsigned count = data[offset + 0];
1511            unsigned start_instance = data[offset + 4];
1512            unsigned instance_count = data[offset + 1];
1513
1514            if (!count || !instance_count)
1515               continue;
1516
1517            /* Update the ranges of instances. */
1518            new_info.start_instance = MIN2(new_info.start_instance,
1519                                           start_instance);
1520            end_instance = MAX2(end_instance, start_instance + instance_count);
1521
1522            /* Update the index range. */
1523            unsigned min, max;
1524            u_vbuf_get_minmax_index_mapped(&new_info, count,
1525                                           indices +
1526                                           new_info.index_size * start,
1527                                           &min, &max);
1528
1529            new_info.min_index = MIN2(new_info.min_index, min);
1530            new_info.max_index = MAX2(new_info.max_index, max);
1531         }
1532         free(data);
1533
1534         if (transfer)
1535            pipe_buffer_unmap(pipe, transfer);
1536
1537         /* Set the final instance count. */
1538         new_info.instance_count = end_instance - new_info.start_instance;
1539
1540         if (new_info.start_instance == ~0u || !new_info.instance_count)
1541            goto cleanup;
1542      } else {
1543         /* Non-indexed multidraw.
1544          *
1545          * Keep the draw call indirect and compute minimums & maximums,
1546          * which will determine the user buffer bounds to upload, but
1547          * the driver will not look at these values because indirect != NULL.
1548          *
1549          * This efficiently processes the multidraw with the time complexity
1550          * equal to 1 draw call.
1551          */
1552         new_draw.start = ~0u;
1553         new_info.start_instance = ~0u;
1554         unsigned end_vertex = 0;
1555         unsigned end_instance = 0;
1556
1557         for (unsigned i = 0; i < draw_count; i++) {
1558            unsigned offset = i * indirect->stride / 4;
1559            unsigned start = data[offset + 2];
1560            unsigned count = data[offset + 0];
1561            unsigned start_instance = data[offset + 3];
1562            unsigned instance_count = data[offset + 1];
1563
1564            new_draw.start = MIN2(new_draw.start, start);
1565            new_info.start_instance = MIN2(new_info.start_instance,
1566                                           start_instance);
1567
1568            end_vertex = MAX2(end_vertex, start + count);
1569            end_instance = MAX2(end_instance, start_instance + instance_count);
1570         }
1571         free(data);
1572
1573         /* Set the final counts. */
1574         new_draw.count = end_vertex - new_draw.start;
1575         new_info.instance_count = end_instance - new_info.start_instance;
1576
1577         if (new_draw.start == ~0u || !new_draw.count || !new_info.instance_count)
1578            goto cleanup;
1579      }
1580   } else {
1581      if ((!indirect && !new_draw.count) || !new_info.instance_count)
1582         goto cleanup;
1583   }
1584
1585   if (new_info.index_size) {
1586      /* See if anything needs to be done for per-vertex attribs. */
1587      if (u_vbuf_need_minmax_index(mgr)) {
1588         unsigned max_index;
1589
1590         if (new_info.index_bounds_valid) {
1591            min_index = new_info.min_index;
1592            max_index = new_info.max_index;
1593         } else {
1594            u_vbuf_get_minmax_index(mgr->pipe, &new_info, &new_draw,
1595                                    &min_index, &max_index);
1596         }
1597
1598         assert(min_index <= max_index);
1599
1600         start_vertex = min_index + new_draw.index_bias;
1601         num_vertices = max_index + 1 - min_index;
1602
1603         /* Primitive restart doesn't work when unrolling indices.
1604          * We would have to break this drawing operation into several ones. */
1605         /* Use some heuristic to see if unrolling indices improves
1606          * performance. */
1607         if (!indirect &&
1608             !new_info.primitive_restart &&
1609             util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) &&
1610             !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
1611            unroll_indices = TRUE;
1612            user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
1613                              mgr->ve->noninstance_vb_mask_any);
1614         }
1615      } else {
1616         /* Nothing to do for per-vertex attribs. */
1617         start_vertex = 0;
1618         num_vertices = 0;
1619         min_index = 0;
1620      }
1621   } else {
1622      start_vertex = new_draw.start;
1623      num_vertices = new_draw.count;
1624      min_index = 0;
1625   }
1626
1627   /* Translate vertices with non-native layouts or formats. */
1628   if (unroll_indices ||
1629       incompatible_vb_mask ||
1630       mgr->ve->incompatible_elem_mask) {
1631      if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw,
1632                                  start_vertex, num_vertices,
1633                                  min_index, unroll_indices)) {
1634         debug_warn_once("u_vbuf_translate_begin() failed");
1635         goto cleanup;
1636      }
1637
1638      if (unroll_indices) {
1639         new_info.index_size = 0;
1640         new_draw.index_bias = 0;
1641         new_info.index_bounds_valid = true;
1642         new_info.min_index = 0;
1643         new_info.max_index = new_draw.count - 1;
1644         new_draw.start = 0;
1645      }
1646
1647      user_vb_mask &= ~(incompatible_vb_mask |
1648                        mgr->ve->incompatible_vb_mask_all);
1649   }
1650
1651   /* Upload user buffers. */
1652   if (user_vb_mask) {
1653      if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1654                                new_info.start_instance,
1655                                new_info.instance_count) != PIPE_OK) {
1656         debug_warn_once("u_vbuf_upload_buffers() failed");
1657         goto cleanup;
1658      }
1659
1660      mgr->dirty_real_vb_mask |= user_vb_mask;
1661   }
1662
1663   /*
1664   if (unroll_indices) {
1665      printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1666             start_vertex, num_vertices);
1667      util_dump_draw_info(stdout, info);
1668      printf("\n");
1669   }
1670
1671   unsigned i;
1672   for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1673      printf("input %i: ", i);
1674      util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1675      printf("\n");
1676   }
1677   for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1678      printf("real %i: ", i);
1679      util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1680      printf("\n");
1681   }
1682   */
1683
1684   u_upload_unmap(pipe->stream_uploader);
1685   if (mgr->dirty_real_vb_mask)
1686      u_vbuf_set_driver_vertex_buffers(mgr);
1687
1688   if ((new_info.index_size == 1 && mgr->caps.rewrite_ubyte_ibs) ||
1689       (new_info.primitive_restart &&
1690        ((new_info.restart_index != fixed_restart_index && mgr->caps.rewrite_restart_index) ||
1691        !(mgr->caps.supported_restart_modes & BITFIELD_BIT(new_info.mode)))) ||
1692       !(mgr->caps.supported_prim_modes & BITFIELD_BIT(new_info.mode))) {
1693      util_primconvert_save_flatshade_first(mgr->pc, mgr->flatshade_first);
1694      util_primconvert_draw_vbo(mgr->pc, &new_info, drawid_offset, indirect, &new_draw, 1);
1695   } else
1696      pipe->draw_vbo(pipe, &new_info, drawid_offset, indirect, &new_draw, 1);
1697
1698   if (mgr->using_translate) {
1699      u_vbuf_translate_end(mgr);
1700   }
1701   return;
1702
1703cleanup:
1704   if (info->take_index_buffer_ownership) {
1705      struct pipe_resource *indexbuf = info->index.resource;
1706      pipe_resource_reference(&indexbuf, NULL);
1707   }
1708}
1709
1710void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
1711{
1712   assert(!mgr->ve_saved);
1713   mgr->ve_saved = mgr->ve;
1714}
1715
1716void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
1717{
1718   if (mgr->ve != mgr->ve_saved) {
1719      struct pipe_context *pipe = mgr->pipe;
1720
1721      mgr->ve = mgr->ve_saved;
1722      pipe->bind_vertex_elements_state(pipe,
1723                                       mgr->ve ? mgr->ve->driver_cso : NULL);
1724   }
1725   mgr->ve_saved = NULL;
1726}
1727