1af69d88dSmrg/**************************************************************************
2af69d88dSmrg *
3af69d88dSmrg * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4af69d88dSmrg * All Rights Reserved.
5af69d88dSmrg *
6af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
7af69d88dSmrg * copy of this software and associated documentation files (the
8af69d88dSmrg * "Software"), to deal in the Software without restriction, including
9af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish,
10af69d88dSmrg * distribute, sub license, and/or sell copies of the Software, and to
11af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to
12af69d88dSmrg * the following conditions:
13af69d88dSmrg *
14af69d88dSmrg * The above copyright notice and this permission notice (including the
15af69d88dSmrg * next paragraph) shall be included in all copies or substantial portions
16af69d88dSmrg * of the Software.
17af69d88dSmrg *
18af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19af69d88dSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21af69d88dSmrg * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22af69d88dSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23af69d88dSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24af69d88dSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25af69d88dSmrg *
26af69d88dSmrg **************************************************************************/
27af69d88dSmrg
28af69d88dSmrg/**
29af69d88dSmrg * This module uploads user buffers and translates the vertex buffers which
30af69d88dSmrg * contain incompatible vertices (i.e. not supported by the driver/hardware)
31af69d88dSmrg * into compatible ones, based on the Gallium CAPs.
32af69d88dSmrg *
33af69d88dSmrg * It does not upload index buffers.
34af69d88dSmrg *
35af69d88dSmrg * The module heavily uses bitmasks to represent per-buffer and
36af69d88dSmrg * per-vertex-element flags to avoid looping over the list of buffers just
37af69d88dSmrg * to see if there's a non-zero stride, or user buffer, or unsupported format,
38af69d88dSmrg * etc.
39af69d88dSmrg *
40af69d88dSmrg * There are 3 categories of vertex elements, which are processed separately:
41af69d88dSmrg * - per-vertex attribs (stride != 0, instance_divisor == 0)
42af69d88dSmrg * - instanced attribs (stride != 0, instance_divisor > 0)
43af69d88dSmrg * - constant attribs (stride == 0)
44af69d88dSmrg *
45af69d88dSmrg * All needed uploads and translations are performed every draw command, but
46af69d88dSmrg * only the subset of vertices needed for that draw command is uploaded or
47af69d88dSmrg * translated. (the module never translates whole buffers)
48af69d88dSmrg *
49af69d88dSmrg *
50af69d88dSmrg * The module consists of two main parts:
51af69d88dSmrg *
52af69d88dSmrg *
53af69d88dSmrg * 1) Translate (u_vbuf_translate_begin/end)
54af69d88dSmrg *
55af69d88dSmrg * This is pretty much a vertex fetch fallback. It translates vertices from
56af69d88dSmrg * one vertex buffer to another in an unused vertex buffer slot. It does
57af69d88dSmrg * whatever is needed to make the vertices readable by the hardware (changes
58af69d88dSmrg * vertex formats and aligns offsets and strides). The translate module is
59af69d88dSmrg * used here.
60af69d88dSmrg *
61af69d88dSmrg * Each of the 3 categories is translated to a separate buffer.
62af69d88dSmrg * Only the [min_index, max_index] range is translated. For instanced attribs,
63af69d88dSmrg * the range is [start_instance, start_instance+instance_count]. For constant
64af69d88dSmrg * attribs, the range is [0, 1].
65af69d88dSmrg *
66af69d88dSmrg *
67af69d88dSmrg * 2) User buffer uploading (u_vbuf_upload_buffers)
68af69d88dSmrg *
69af69d88dSmrg * Only the [min_index, max_index] range is uploaded (just like Translate)
70af69d88dSmrg * with a single memcpy.
71af69d88dSmrg *
72af69d88dSmrg * This method works best for non-indexed draw operations or indexed draw
73af69d88dSmrg * operations where the [min_index, max_index] range is not being way bigger
74af69d88dSmrg * than the vertex count.
75af69d88dSmrg *
76af69d88dSmrg * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
77af69d88dSmrg * the per-vertex attribs are uploaded via the translate module, all packed
78af69d88dSmrg * into one vertex buffer, and the indexed draw call is turned into
79af69d88dSmrg * a non-indexed one in the process. This adds additional complexity
80af69d88dSmrg * to the translate part, but it prevents bad apps from bringing your frame
81af69d88dSmrg * rate down.
82af69d88dSmrg *
83af69d88dSmrg *
84af69d88dSmrg * If there is nothing to do, it forwards every command to the driver.
85af69d88dSmrg * The module also has its own CSO cache of vertex element states.
86af69d88dSmrg */
87af69d88dSmrg
88af69d88dSmrg#include "util/u_vbuf.h"
89af69d88dSmrg
90af69d88dSmrg#include "util/u_dump.h"
917ec681f3Smrg#include "util/format/u_format.h"
927ec681f3Smrg#include "util/u_helpers.h"
93af69d88dSmrg#include "util/u_inlines.h"
94af69d88dSmrg#include "util/u_memory.h"
957ec681f3Smrg#include "indices/u_primconvert.h"
967ec681f3Smrg#include "util/u_prim_restart.h"
977ec681f3Smrg#include "util/u_screen.h"
98af69d88dSmrg#include "util/u_upload_mgr.h"
99af69d88dSmrg#include "translate/translate.h"
100af69d88dSmrg#include "translate/translate_cache.h"
101af69d88dSmrg#include "cso_cache/cso_cache.h"
102af69d88dSmrg#include "cso_cache/cso_hash.h"
103af69d88dSmrg
104af69d88dSmrgstruct u_vbuf_elements {
105af69d88dSmrg   unsigned count;
106af69d88dSmrg   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
107af69d88dSmrg
108af69d88dSmrg   unsigned src_format_size[PIPE_MAX_ATTRIBS];
109af69d88dSmrg
110af69d88dSmrg   /* If (velem[i].src_format != native_format[i]), the vertex buffer
111af69d88dSmrg    * referenced by the vertex element cannot be used for rendering and
112af69d88dSmrg    * its vertex data must be translated to native_format[i]. */
113af69d88dSmrg   enum pipe_format native_format[PIPE_MAX_ATTRIBS];
114af69d88dSmrg   unsigned native_format_size[PIPE_MAX_ATTRIBS];
115af69d88dSmrg
116af69d88dSmrg   /* Which buffers are used by the vertex element state. */
117af69d88dSmrg   uint32_t used_vb_mask;
118af69d88dSmrg   /* This might mean two things:
119af69d88dSmrg    * - src_format != native_format, as discussed above.
120af69d88dSmrg    * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
121af69d88dSmrg   uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
122af69d88dSmrg   /* Which buffer has at least one vertex element referencing it
123af69d88dSmrg    * incompatible. */
124af69d88dSmrg   uint32_t incompatible_vb_mask_any;
125af69d88dSmrg   /* Which buffer has all vertex elements referencing it incompatible. */
126af69d88dSmrg   uint32_t incompatible_vb_mask_all;
127af69d88dSmrg   /* Which buffer has at least one vertex element referencing it
128af69d88dSmrg    * compatible. */
129af69d88dSmrg   uint32_t compatible_vb_mask_any;
130af69d88dSmrg   /* Which buffer has all vertex elements referencing it compatible. */
131af69d88dSmrg   uint32_t compatible_vb_mask_all;
132af69d88dSmrg
133af69d88dSmrg   /* Which buffer has at least one vertex element referencing it
134af69d88dSmrg    * non-instanced. */
135af69d88dSmrg   uint32_t noninstance_vb_mask_any;
136af69d88dSmrg
1377ec681f3Smrg   /* Which buffers are used by multiple vertex attribs. */
1387ec681f3Smrg   uint32_t interleaved_vb_mask;
1397ec681f3Smrg
140af69d88dSmrg   void *driver_cso;
141af69d88dSmrg};
142af69d88dSmrg
143af69d88dSmrgenum {
144af69d88dSmrg   VB_VERTEX = 0,
145af69d88dSmrg   VB_INSTANCE = 1,
146af69d88dSmrg   VB_CONST = 2,
147af69d88dSmrg   VB_NUM = 3
148af69d88dSmrg};
149af69d88dSmrg
150af69d88dSmrgstruct u_vbuf {
151af69d88dSmrg   struct u_vbuf_caps caps;
15201e04c3fSmrg   bool has_signed_vb_offset;
153af69d88dSmrg
154af69d88dSmrg   struct pipe_context *pipe;
155af69d88dSmrg   struct translate_cache *translate_cache;
1567ec681f3Smrg   struct cso_cache cso_cache;
1577ec681f3Smrg
1587ec681f3Smrg   struct primconvert_context *pc;
1597ec681f3Smrg   bool flatshade_first;
160af69d88dSmrg
161af69d88dSmrg   /* This is what was set in set_vertex_buffers.
162af69d88dSmrg    * May contain user buffers. */
163af69d88dSmrg   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
164af69d88dSmrg   uint32_t enabled_vb_mask;
165af69d88dSmrg
166af69d88dSmrg   /* Vertex buffers for the driver.
167af69d88dSmrg    * There are usually no user buffers. */
168af69d88dSmrg   struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
169af69d88dSmrg   uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
170af69d88dSmrg                                   call of set_vertex_buffers */
171af69d88dSmrg
172af69d88dSmrg   /* Vertex elements. */
173af69d88dSmrg   struct u_vbuf_elements *ve, *ve_saved;
174af69d88dSmrg
175af69d88dSmrg   /* Vertex elements used for the translate fallback. */
1767ec681f3Smrg   struct cso_velems_state fallback_velems;
177af69d88dSmrg   /* If non-NULL, this is a vertex element state used for the translate
178af69d88dSmrg    * fallback and therefore used for rendering too. */
179af69d88dSmrg   boolean using_translate;
180af69d88dSmrg   /* The vertex buffer slot index where translated vertices have been
181af69d88dSmrg    * stored in. */
182af69d88dSmrg   unsigned fallback_vbs[VB_NUM];
1837ec681f3Smrg   unsigned fallback_vbs_mask;
184af69d88dSmrg
185af69d88dSmrg   /* Which buffer is a user buffer. */
186af69d88dSmrg   uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
187af69d88dSmrg   /* Which buffer is incompatible (unaligned). */
188af69d88dSmrg   uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
189af69d88dSmrg   /* Which buffer has a non-zero stride. */
190af69d88dSmrg   uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
1917ec681f3Smrg   /* Which buffers are allowed (supported by hardware). */
1927ec681f3Smrg   uint32_t allowed_vb_mask;
193af69d88dSmrg};
194af69d88dSmrg
195af69d88dSmrgstatic void *
196af69d88dSmrgu_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
197af69d88dSmrg                              const struct pipe_vertex_element *attribs);
1987ec681f3Smrgstatic void u_vbuf_delete_vertex_elements(void *ctx, void *state,
1997ec681f3Smrg                                          enum cso_cache_type type);
200af69d88dSmrg
20101e04c3fSmrgstatic const struct {
20201e04c3fSmrg   enum pipe_format from, to;
20301e04c3fSmrg} vbuf_format_fallbacks[] = {
20401e04c3fSmrg   { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
20501e04c3fSmrg   { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
20601e04c3fSmrg   { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
20701e04c3fSmrg   { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
20801e04c3fSmrg   { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
20901e04c3fSmrg   { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
21001e04c3fSmrg   { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
21101e04c3fSmrg   { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
21201e04c3fSmrg   { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
21301e04c3fSmrg   { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
21401e04c3fSmrg   { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
21501e04c3fSmrg   { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
21601e04c3fSmrg   { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
21701e04c3fSmrg   { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
21801e04c3fSmrg   { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
21901e04c3fSmrg   { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
22001e04c3fSmrg   { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
22101e04c3fSmrg   { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
22201e04c3fSmrg   { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
22301e04c3fSmrg   { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
22401e04c3fSmrg   { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
22501e04c3fSmrg   { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
22601e04c3fSmrg   { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
22701e04c3fSmrg   { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
22801e04c3fSmrg   { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
22901e04c3fSmrg   { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
23001e04c3fSmrg   { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
23101e04c3fSmrg   { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
23201e04c3fSmrg   { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
23301e04c3fSmrg   { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
23401e04c3fSmrg   { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
23501e04c3fSmrg   { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
23601e04c3fSmrg   { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
23701e04c3fSmrg   { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
23801e04c3fSmrg   { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
2397ec681f3Smrg   { PIPE_FORMAT_R16G16B16_SINT,       PIPE_FORMAT_R32G32B32_SINT },
2407ec681f3Smrg   { PIPE_FORMAT_R16G16B16_UINT,       PIPE_FORMAT_R32G32B32_UINT },
24101e04c3fSmrg   { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
24201e04c3fSmrg   { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
24301e04c3fSmrg   { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
24401e04c3fSmrg   { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
24501e04c3fSmrg   { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
24601e04c3fSmrg   { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
24701e04c3fSmrg   { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
24801e04c3fSmrg   { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
24901e04c3fSmrg   { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
25001e04c3fSmrg   { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
25101e04c3fSmrg   { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
25201e04c3fSmrg   { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
25301e04c3fSmrg   { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
25401e04c3fSmrg   { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
25501e04c3fSmrg   { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
25601e04c3fSmrg   { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
25701e04c3fSmrg   { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
25801e04c3fSmrg   { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
25901e04c3fSmrg   { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
26001e04c3fSmrg   { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
26101e04c3fSmrg   { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
26201e04c3fSmrg   { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
26301e04c3fSmrg   { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
26401e04c3fSmrg   { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
26501e04c3fSmrg   { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
26601e04c3fSmrg};
267af69d88dSmrg
2687ec681f3Smrgvoid u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
2697ec681f3Smrg                     bool needs64b)
270af69d88dSmrg{
27101e04c3fSmrg   unsigned i;
2727ec681f3Smrg
2737ec681f3Smrg   memset(caps, 0, sizeof(*caps));
27401e04c3fSmrg
27501e04c3fSmrg   /* I'd rather have a bitfield of which formats are supported and a static
27601e04c3fSmrg    * table of the translations indexed by format, but since we don't have C99
27701e04c3fSmrg    * we can't easily make a sparsely-populated table indexed by format.  So,
27801e04c3fSmrg    * we construct the sparse table here.
27901e04c3fSmrg    */
28001e04c3fSmrg   for (i = 0; i < PIPE_FORMAT_COUNT; i++)
28101e04c3fSmrg      caps->format_translation[i] = i;
28201e04c3fSmrg
28301e04c3fSmrg   for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
28401e04c3fSmrg      enum pipe_format format = vbuf_format_fallbacks[i].from;
2857ec681f3Smrg      unsigned comp_bits = util_format_get_component_bits(format, 0, 0);
2867ec681f3Smrg
2877ec681f3Smrg      if ((comp_bits > 32) && !needs64b)
2887ec681f3Smrg         continue;
28901e04c3fSmrg
29001e04c3fSmrg      if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0,
29101e04c3fSmrg                                       PIPE_BIND_VERTEX_BUFFER)) {
29201e04c3fSmrg         caps->format_translation[format] = vbuf_format_fallbacks[i].to;
2937ec681f3Smrg         caps->fallback_always = true;
29401e04c3fSmrg      }
29501e04c3fSmrg   }
296af69d88dSmrg
297af69d88dSmrg   caps->buffer_offset_unaligned =
298af69d88dSmrg      !screen->get_param(screen,
29901e04c3fSmrg                         PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
300af69d88dSmrg   caps->buffer_stride_unaligned =
30101e04c3fSmrg     !screen->get_param(screen,
302af69d88dSmrg                        PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
303af69d88dSmrg   caps->velem_src_offset_unaligned =
304af69d88dSmrg      !screen->get_param(screen,
30501e04c3fSmrg                         PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
306af69d88dSmrg   caps->user_vertex_buffers =
307af69d88dSmrg      screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
3087ec681f3Smrg   caps->max_vertex_buffers =
3097ec681f3Smrg      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
3107ec681f3Smrg
3117ec681f3Smrg   if (screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART) ||
3127ec681f3Smrg       screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX)) {
3137ec681f3Smrg      caps->rewrite_restart_index = screen->get_param(screen, PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART);
3147ec681f3Smrg      caps->supported_restart_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART);
3157ec681f3Smrg      caps->supported_restart_modes |= BITFIELD_BIT(PIPE_PRIM_PATCHES);
3167ec681f3Smrg      if (caps->supported_restart_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
3177ec681f3Smrg         caps->fallback_always = true;
3187ec681f3Smrg      caps->fallback_always |= caps->rewrite_restart_index;
3197ec681f3Smrg   }
3207ec681f3Smrg   caps->supported_prim_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES);
3217ec681f3Smrg   if (caps->supported_prim_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
3227ec681f3Smrg      caps->fallback_always = true;
3237ec681f3Smrg
3247ec681f3Smrg   if (!screen->is_format_supported(screen, PIPE_FORMAT_R8_UINT, PIPE_BUFFER, 0, 0, PIPE_BIND_INDEX_BUFFER))
3257ec681f3Smrg      caps->fallback_always = caps->rewrite_ubyte_ibs = true;
3267ec681f3Smrg
3277ec681f3Smrg   /* OpenGL 2.0 requires a minimum of 16 vertex buffers */
3287ec681f3Smrg   if (caps->max_vertex_buffers < 16)
3297ec681f3Smrg      caps->fallback_always = true;
33001e04c3fSmrg
33101e04c3fSmrg   if (!caps->buffer_offset_unaligned ||
33201e04c3fSmrg       !caps->buffer_stride_unaligned ||
3337ec681f3Smrg       !caps->velem_src_offset_unaligned)
3347ec681f3Smrg      caps->fallback_always = true;
33501e04c3fSmrg
3367ec681f3Smrg   if (!caps->fallback_always && !caps->user_vertex_buffers)
3377ec681f3Smrg      caps->fallback_only_for_user_vbuffers = true;
338af69d88dSmrg}
339af69d88dSmrg
340af69d88dSmrgstruct u_vbuf *
34101e04c3fSmrgu_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps)
342af69d88dSmrg{
343af69d88dSmrg   struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
344af69d88dSmrg
345af69d88dSmrg   mgr->caps = *caps;
346af69d88dSmrg   mgr->pipe = pipe;
3477ec681f3Smrg   if (caps->rewrite_ubyte_ibs || caps->rewrite_restart_index ||
3487ec681f3Smrg       /* require all but patches */
3497ec681f3Smrg       ((caps->supported_prim_modes & caps->supported_restart_modes & BITFIELD_MASK(PIPE_PRIM_MAX))) !=
3507ec681f3Smrg                                      BITFIELD_MASK(PIPE_PRIM_MAX)) {
3517ec681f3Smrg      struct primconvert_config cfg;
3527ec681f3Smrg      cfg.fixed_prim_restart = caps->rewrite_restart_index;
3537ec681f3Smrg      cfg.primtypes_mask = caps->supported_prim_modes;
3547ec681f3Smrg      cfg.restart_primtypes_mask = caps->supported_restart_modes;
3557ec681f3Smrg      mgr->pc = util_primconvert_create_config(pipe, &cfg);
3567ec681f3Smrg   }
357af69d88dSmrg   mgr->translate_cache = translate_cache_create();
358af69d88dSmrg   memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
3597ec681f3Smrg   mgr->allowed_vb_mask = u_bit_consecutive(0, mgr->caps.max_vertex_buffers);
360af69d88dSmrg
36101e04c3fSmrg   mgr->has_signed_vb_offset =
36201e04c3fSmrg      pipe->screen->get_param(pipe->screen,
36301e04c3fSmrg                              PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET);
364af69d88dSmrg
3657ec681f3Smrg   cso_cache_init(&mgr->cso_cache, pipe);
3667ec681f3Smrg   cso_cache_set_delete_cso_callback(&mgr->cso_cache,
3677ec681f3Smrg                                     u_vbuf_delete_vertex_elements, pipe);
3687ec681f3Smrg
369af69d88dSmrg   return mgr;
370af69d88dSmrg}
371af69d88dSmrg
372af69d88dSmrg/* u_vbuf uses its own caching for vertex elements, because it needs to keep
373af69d88dSmrg * its own preprocessed state per vertex element CSO. */
374af69d88dSmrgstatic struct u_vbuf_elements *
3757ec681f3Smrgu_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr,
3767ec681f3Smrg                                    const struct cso_velems_state *velems)
377af69d88dSmrg{
378af69d88dSmrg   struct pipe_context *pipe = mgr->pipe;
379af69d88dSmrg   unsigned key_size, hash_key;
380af69d88dSmrg   struct cso_hash_iter iter;
381af69d88dSmrg   struct u_vbuf_elements *ve;
382af69d88dSmrg
383af69d88dSmrg   /* need to include the count into the stored state data too. */
3847ec681f3Smrg   key_size = sizeof(struct pipe_vertex_element) * velems->count +
3857ec681f3Smrg              sizeof(unsigned);
3867ec681f3Smrg   hash_key = cso_construct_key((void*)velems, key_size);
3877ec681f3Smrg   iter = cso_find_state_template(&mgr->cso_cache, hash_key, CSO_VELEMENTS,
3887ec681f3Smrg                                  (void*)velems, key_size);
389af69d88dSmrg
390af69d88dSmrg   if (cso_hash_iter_is_null(iter)) {
391af69d88dSmrg      struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
3927ec681f3Smrg      memcpy(&cso->state, velems, key_size);
3937ec681f3Smrg      cso->data = u_vbuf_create_vertex_elements(mgr, velems->count,
3947ec681f3Smrg                                                velems->velems);
395af69d88dSmrg
3967ec681f3Smrg      iter = cso_insert_state(&mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
397af69d88dSmrg      ve = cso->data;
398af69d88dSmrg   } else {
399af69d88dSmrg      ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
400af69d88dSmrg   }
401af69d88dSmrg
402af69d88dSmrg   assert(ve);
403af69d88dSmrg
404af69d88dSmrg   if (ve != mgr->ve)
40501e04c3fSmrg      pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
40601e04c3fSmrg
407af69d88dSmrg   return ve;
408af69d88dSmrg}
409af69d88dSmrg
4107ec681f3Smrgvoid u_vbuf_set_vertex_elements(struct u_vbuf *mgr,
4117ec681f3Smrg                                const struct cso_velems_state *velems)
412af69d88dSmrg{
4137ec681f3Smrg   mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, velems);
4147ec681f3Smrg}
4157ec681f3Smrg
4167ec681f3Smrgvoid u_vbuf_set_flatshade_first(struct u_vbuf *mgr, bool flatshade_first)
4177ec681f3Smrg{
4187ec681f3Smrg   mgr->flatshade_first = flatshade_first;
4197ec681f3Smrg}
4207ec681f3Smrg
4217ec681f3Smrgvoid u_vbuf_unset_vertex_elements(struct u_vbuf *mgr)
4227ec681f3Smrg{
4237ec681f3Smrg   mgr->ve = NULL;
424af69d88dSmrg}
425af69d88dSmrg
426af69d88dSmrgvoid u_vbuf_destroy(struct u_vbuf *mgr)
427af69d88dSmrg{
428af69d88dSmrg   struct pipe_screen *screen = mgr->pipe->screen;
429af69d88dSmrg   unsigned i;
43001e04c3fSmrg   const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
43101e04c3fSmrg                                                    PIPE_SHADER_CAP_MAX_INPUTS);
432af69d88dSmrg
4337ec681f3Smrg   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, 0, num_vb, false, NULL);
434af69d88dSmrg
43501e04c3fSmrg   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
43601e04c3fSmrg      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]);
43701e04c3fSmrg   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
43801e04c3fSmrg      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]);
43901e04c3fSmrg
4407ec681f3Smrg   if (mgr->pc)
4417ec681f3Smrg      util_primconvert_destroy(mgr->pc);
442af69d88dSmrg
443af69d88dSmrg   translate_cache_destroy(mgr->translate_cache);
4447ec681f3Smrg   cso_cache_delete(&mgr->cso_cache);
445af69d88dSmrg   FREE(mgr);
446af69d88dSmrg}
447af69d88dSmrg
448af69d88dSmrgstatic enum pipe_error
449af69d88dSmrgu_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
45001e04c3fSmrg                         const struct pipe_draw_info *info,
4517ec681f3Smrg                         const struct pipe_draw_start_count_bias *draw,
452af69d88dSmrg                         unsigned vb_mask, unsigned out_vb,
453af69d88dSmrg                         int start_vertex, unsigned num_vertices,
45401e04c3fSmrg                         int min_index, boolean unroll_indices)
455af69d88dSmrg{
456af69d88dSmrg   struct translate *tr;
457af69d88dSmrg   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
458af69d88dSmrg   struct pipe_resource *out_buffer = NULL;
459af69d88dSmrg   uint8_t *out_map;
460af69d88dSmrg   unsigned out_offset, mask;
461af69d88dSmrg
462af69d88dSmrg   /* Get a translate object. */
463af69d88dSmrg   tr = translate_cache_find(mgr->translate_cache, key);
464af69d88dSmrg
465af69d88dSmrg   /* Map buffers we want to translate. */
466af69d88dSmrg   mask = vb_mask;
467af69d88dSmrg   while (mask) {
468af69d88dSmrg      struct pipe_vertex_buffer *vb;
469af69d88dSmrg      unsigned offset;
470af69d88dSmrg      uint8_t *map;
471af69d88dSmrg      unsigned i = u_bit_scan(&mask);
472af69d88dSmrg
473af69d88dSmrg      vb = &mgr->vertex_buffer[i];
474af69d88dSmrg      offset = vb->buffer_offset + vb->stride * start_vertex;
475af69d88dSmrg
47601e04c3fSmrg      if (vb->is_user_buffer) {
47701e04c3fSmrg         map = (uint8_t*)vb->buffer.user + offset;
478af69d88dSmrg      } else {
479af69d88dSmrg         unsigned size = vb->stride ? num_vertices * vb->stride
480af69d88dSmrg                                    : sizeof(double)*4;
481af69d88dSmrg
4827ec681f3Smrg         if (!vb->buffer.resource) {
4837ec681f3Smrg            static uint64_t dummy_buf[4] = { 0 };
4847ec681f3Smrg            tr->set_buffer(tr, i, dummy_buf, 0, 0);
485361fc4cbSmaya            continue;
4867ec681f3Smrg         }
4877ec681f3Smrg
4887ec681f3Smrg         if (vb->stride) {
4897ec681f3Smrg            /* the stride cannot be used to calculate the map size of the buffer,
4907ec681f3Smrg             * as it only determines the bytes between elements, not the size of elements
4917ec681f3Smrg             * themselves, meaning that if stride < element_size, the mapped size will
4927ec681f3Smrg             * be too small and conversion will overrun the map buffer
4937ec681f3Smrg             *
4947ec681f3Smrg             * instead, add the size of the largest possible attribute to the final attribute's offset
4957ec681f3Smrg             * in order to ensure the map is large enough
4967ec681f3Smrg             */
4977ec681f3Smrg            unsigned last_offset = size - vb->stride;
4987ec681f3Smrg            size = MAX2(size, last_offset + sizeof(double)*4);
4997ec681f3Smrg         }
500361fc4cbSmaya
50101e04c3fSmrg         if (offset + size > vb->buffer.resource->width0) {
50201e04c3fSmrg            /* Don't try to map past end of buffer.  This often happens when
50301e04c3fSmrg             * we're translating an attribute that's at offset > 0 from the
50401e04c3fSmrg             * start of the vertex.  If we'd subtract attrib's offset from
50501e04c3fSmrg             * the size, this probably wouldn't happen.
50601e04c3fSmrg             */
50701e04c3fSmrg            size = vb->buffer.resource->width0 - offset;
50801e04c3fSmrg
50901e04c3fSmrg            /* Also adjust num_vertices.  A common user error is to call
51001e04c3fSmrg             * glDrawRangeElements() with incorrect 'end' argument.  The 'end
51101e04c3fSmrg             * value should be the max index value, but people often
51201e04c3fSmrg             * accidentally add one to this value.  This adjustment avoids
51301e04c3fSmrg             * crashing (by reading past the end of a hardware buffer mapping)
51401e04c3fSmrg             * when people do that.
51501e04c3fSmrg             */
51601e04c3fSmrg            num_vertices = (size + vb->stride - 1) / vb->stride;
517af69d88dSmrg         }
518af69d88dSmrg
51901e04c3fSmrg         map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size,
5207ec681f3Smrg                                     PIPE_MAP_READ, &vb_transfer[i]);
521af69d88dSmrg      }
522af69d88dSmrg
523af69d88dSmrg      /* Subtract min_index so that indexing with the index buffer works. */
524af69d88dSmrg      if (unroll_indices) {
525af69d88dSmrg         map -= (ptrdiff_t)vb->stride * min_index;
526af69d88dSmrg      }
527af69d88dSmrg
52801e04c3fSmrg      tr->set_buffer(tr, i, map, vb->stride, info->max_index);
529af69d88dSmrg   }
530af69d88dSmrg
531af69d88dSmrg   /* Translate. */
532af69d88dSmrg   if (unroll_indices) {
533af69d88dSmrg      struct pipe_transfer *transfer = NULL;
5347ec681f3Smrg      const unsigned offset = draw->start * info->index_size;
535af69d88dSmrg      uint8_t *map;
536af69d88dSmrg
537af69d88dSmrg      /* Create and map the output buffer. */
53801e04c3fSmrg      u_upload_alloc(mgr->pipe->stream_uploader, 0,
5397ec681f3Smrg                     key->output_stride * draw->count, 4,
54001e04c3fSmrg                     &out_offset, &out_buffer,
54101e04c3fSmrg                     (void**)&out_map);
54201e04c3fSmrg      if (!out_buffer)
54301e04c3fSmrg         return PIPE_ERROR_OUT_OF_MEMORY;
54401e04c3fSmrg
54501e04c3fSmrg      if (info->has_user_indices) {
54601e04c3fSmrg         map = (uint8_t*)info->index.user + offset;
547af69d88dSmrg      } else {
54801e04c3fSmrg         map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset,
5497ec681f3Smrg                                     draw->count * info->index_size,
5507ec681f3Smrg                                     PIPE_MAP_READ, &transfer);
551af69d88dSmrg      }
552af69d88dSmrg
55301e04c3fSmrg      switch (info->index_size) {
554af69d88dSmrg      case 4:
5557ec681f3Smrg         tr->run_elts(tr, (unsigned*)map, draw->count, 0, 0, out_map);
556af69d88dSmrg         break;
557af69d88dSmrg      case 2:
5587ec681f3Smrg         tr->run_elts16(tr, (uint16_t*)map, draw->count, 0, 0, out_map);
559af69d88dSmrg         break;
560af69d88dSmrg      case 1:
5617ec681f3Smrg         tr->run_elts8(tr, map, draw->count, 0, 0, out_map);
562af69d88dSmrg         break;
563af69d88dSmrg      }
564af69d88dSmrg
565af69d88dSmrg      if (transfer) {
566af69d88dSmrg         pipe_buffer_unmap(mgr->pipe, transfer);
567af69d88dSmrg      }
568af69d88dSmrg   } else {
569af69d88dSmrg      /* Create and map the output buffer. */
57001e04c3fSmrg      u_upload_alloc(mgr->pipe->stream_uploader,
57101e04c3fSmrg                     mgr->has_signed_vb_offset ?
57201e04c3fSmrg                        0 : key->output_stride * start_vertex,
57301e04c3fSmrg                     key->output_stride * num_vertices, 4,
57401e04c3fSmrg                     &out_offset, &out_buffer,
57501e04c3fSmrg                     (void**)&out_map);
57601e04c3fSmrg      if (!out_buffer)
57701e04c3fSmrg         return PIPE_ERROR_OUT_OF_MEMORY;
578af69d88dSmrg
579af69d88dSmrg      out_offset -= key->output_stride * start_vertex;
580af69d88dSmrg
581af69d88dSmrg      tr->run(tr, 0, num_vertices, 0, 0, out_map);
582af69d88dSmrg   }
583af69d88dSmrg
584af69d88dSmrg   /* Unmap all buffers. */
585af69d88dSmrg   mask = vb_mask;
586af69d88dSmrg   while (mask) {
587af69d88dSmrg      unsigned i = u_bit_scan(&mask);
588af69d88dSmrg
589af69d88dSmrg      if (vb_transfer[i]) {
590af69d88dSmrg         pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
591af69d88dSmrg      }
592af69d88dSmrg   }
593af69d88dSmrg
594af69d88dSmrg   /* Setup the new vertex buffer. */
595af69d88dSmrg   mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
596af69d88dSmrg   mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
597af69d88dSmrg
598af69d88dSmrg   /* Move the buffer reference. */
59901e04c3fSmrg   pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
60001e04c3fSmrg   mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
60101e04c3fSmrg   mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
602af69d88dSmrg
603af69d88dSmrg   return PIPE_OK;
604af69d88dSmrg}
605af69d88dSmrg
606af69d88dSmrgstatic boolean
607af69d88dSmrgu_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
608af69d88dSmrg                                    unsigned mask[VB_NUM])
609af69d88dSmrg{
610af69d88dSmrg   unsigned type;
611af69d88dSmrg   unsigned fallback_vbs[VB_NUM];
612af69d88dSmrg   /* Set the bit for each buffer which is incompatible, or isn't set. */
613af69d88dSmrg   uint32_t unused_vb_mask =
614af69d88dSmrg      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
615af69d88dSmrg      ~mgr->enabled_vb_mask;
6167ec681f3Smrg   uint32_t unused_vb_mask_orig;
6177ec681f3Smrg   boolean insufficient_buffers = false;
6187ec681f3Smrg
6197ec681f3Smrg   /* No vertex buffers available at all */
6207ec681f3Smrg   if (!unused_vb_mask)
6217ec681f3Smrg      return FALSE;
622af69d88dSmrg
623af69d88dSmrg   memset(fallback_vbs, ~0, sizeof(fallback_vbs));
6247ec681f3Smrg   mgr->fallback_vbs_mask = 0;
625af69d88dSmrg
626af69d88dSmrg   /* Find free slots for each type if needed. */
6277ec681f3Smrg   unused_vb_mask_orig = unused_vb_mask;
628af69d88dSmrg   for (type = 0; type < VB_NUM; type++) {
629af69d88dSmrg      if (mask[type]) {
630af69d88dSmrg         uint32_t index;
631af69d88dSmrg
632af69d88dSmrg         if (!unused_vb_mask) {
6337ec681f3Smrg            insufficient_buffers = true;
6347ec681f3Smrg            break;
635af69d88dSmrg         }
636af69d88dSmrg
637af69d88dSmrg         index = ffs(unused_vb_mask) - 1;
638af69d88dSmrg         fallback_vbs[type] = index;
6397ec681f3Smrg         mgr->fallback_vbs_mask |= 1 << index;
64001e04c3fSmrg         unused_vb_mask &= ~(1 << index);
641af69d88dSmrg         /*printf("found slot=%i for type=%i\n", index, type);*/
642af69d88dSmrg      }
643af69d88dSmrg   }
644af69d88dSmrg
6457ec681f3Smrg   if (insufficient_buffers) {
6467ec681f3Smrg      /* not enough vbs for all types supported by the hardware, they will have to share one
6477ec681f3Smrg       * buffer */
6487ec681f3Smrg      uint32_t index = ffs(unused_vb_mask_orig) - 1;
6497ec681f3Smrg      /* When sharing one vertex buffer use per-vertex frequency for everything. */
6507ec681f3Smrg      fallback_vbs[VB_VERTEX] = index;
6517ec681f3Smrg      mgr->fallback_vbs_mask = 1 << index;
6527ec681f3Smrg      mask[VB_VERTEX] = mask[VB_VERTEX] | mask[VB_CONST] | mask[VB_INSTANCE];
6537ec681f3Smrg      mask[VB_CONST] = 0;
6547ec681f3Smrg      mask[VB_INSTANCE] = 0;
6557ec681f3Smrg   }
6567ec681f3Smrg
657af69d88dSmrg   for (type = 0; type < VB_NUM; type++) {
658af69d88dSmrg      if (mask[type]) {
659af69d88dSmrg         mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
660af69d88dSmrg      }
661af69d88dSmrg   }
662af69d88dSmrg
663af69d88dSmrg   memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
664af69d88dSmrg   return TRUE;
665af69d88dSmrg}
666af69d88dSmrg
667af69d88dSmrgstatic boolean
668af69d88dSmrgu_vbuf_translate_begin(struct u_vbuf *mgr,
66901e04c3fSmrg                       const struct pipe_draw_info *info,
6707ec681f3Smrg                       const struct pipe_draw_start_count_bias *draw,
671af69d88dSmrg                       int start_vertex, unsigned num_vertices,
67201e04c3fSmrg                       int min_index, boolean unroll_indices)
673af69d88dSmrg{
674af69d88dSmrg   unsigned mask[VB_NUM] = {0};
675af69d88dSmrg   struct translate_key key[VB_NUM];
676af69d88dSmrg   unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
677af69d88dSmrg   unsigned i, type;
67801e04c3fSmrg   const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
67901e04c3fSmrg                                         mgr->ve->used_vb_mask;
680af69d88dSmrg
68101e04c3fSmrg   const int start[VB_NUM] = {
68201e04c3fSmrg      start_vertex,           /* VERTEX */
68301e04c3fSmrg      info->start_instance,   /* INSTANCE */
68401e04c3fSmrg      0                       /* CONST */
685af69d88dSmrg   };
686af69d88dSmrg
68701e04c3fSmrg   const unsigned num[VB_NUM] = {
68801e04c3fSmrg      num_vertices,           /* VERTEX */
68901e04c3fSmrg      info->instance_count,   /* INSTANCE */
69001e04c3fSmrg      1                       /* CONST */
691af69d88dSmrg   };
692af69d88dSmrg
693af69d88dSmrg   memset(key, 0, sizeof(key));
694af69d88dSmrg   memset(elem_index, ~0, sizeof(elem_index));
695af69d88dSmrg
696af69d88dSmrg   /* See if there are vertex attribs of each type to translate and
697af69d88dSmrg    * which ones. */
698af69d88dSmrg   for (i = 0; i < mgr->ve->count; i++) {
699af69d88dSmrg      unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
700af69d88dSmrg
701af69d88dSmrg      if (!mgr->vertex_buffer[vb_index].stride) {
702af69d88dSmrg         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
703af69d88dSmrg             !(incompatible_vb_mask & (1 << vb_index))) {
704af69d88dSmrg            continue;
705af69d88dSmrg         }
706af69d88dSmrg         mask[VB_CONST] |= 1 << vb_index;
707af69d88dSmrg      } else if (mgr->ve->ve[i].instance_divisor) {
708af69d88dSmrg         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
709af69d88dSmrg             !(incompatible_vb_mask & (1 << vb_index))) {
710af69d88dSmrg            continue;
711af69d88dSmrg         }
712af69d88dSmrg         mask[VB_INSTANCE] |= 1 << vb_index;
713af69d88dSmrg      } else {
714af69d88dSmrg         if (!unroll_indices &&
715af69d88dSmrg             !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
716af69d88dSmrg             !(incompatible_vb_mask & (1 << vb_index))) {
717af69d88dSmrg            continue;
718af69d88dSmrg         }
719af69d88dSmrg         mask[VB_VERTEX] |= 1 << vb_index;
720af69d88dSmrg      }
721af69d88dSmrg   }
722af69d88dSmrg
723af69d88dSmrg   assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
724af69d88dSmrg
725af69d88dSmrg   /* Find free vertex buffer slots. */
726af69d88dSmrg   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
727af69d88dSmrg      return FALSE;
728af69d88dSmrg   }
729af69d88dSmrg
730af69d88dSmrg   /* Initialize the translate keys. */
731af69d88dSmrg   for (i = 0; i < mgr->ve->count; i++) {
732af69d88dSmrg      struct translate_key *k;
733af69d88dSmrg      struct translate_element *te;
73401e04c3fSmrg      enum pipe_format output_format = mgr->ve->native_format[i];
735af69d88dSmrg      unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
736af69d88dSmrg      bit = 1 << vb_index;
737af69d88dSmrg
738af69d88dSmrg      if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
739af69d88dSmrg          !(incompatible_vb_mask & (1 << vb_index)) &&
740af69d88dSmrg          (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
741af69d88dSmrg         continue;
742af69d88dSmrg      }
743af69d88dSmrg
744af69d88dSmrg      /* Set type to what we will translate.
745af69d88dSmrg       * Whether vertex, instance, or constant attribs. */
746af69d88dSmrg      for (type = 0; type < VB_NUM; type++) {
747af69d88dSmrg         if (mask[type] & bit) {
748af69d88dSmrg            break;
749af69d88dSmrg         }
750af69d88dSmrg      }
751af69d88dSmrg      assert(type < VB_NUM);
75201e04c3fSmrg      if (mgr->ve->ve[i].src_format != output_format)
75301e04c3fSmrg         assert(translate_is_output_format_supported(output_format));
754af69d88dSmrg      /*printf("velem=%i type=%i\n", i, type);*/
755af69d88dSmrg
756af69d88dSmrg      /* Add the vertex element. */
757af69d88dSmrg      k = &key[type];
758af69d88dSmrg      elem_index[type][i] = k->nr_elements;
759af69d88dSmrg
760af69d88dSmrg      te = &k->element[k->nr_elements];
761af69d88dSmrg      te->type = TRANSLATE_ELEMENT_NORMAL;
762af69d88dSmrg      te->instance_divisor = 0;
763af69d88dSmrg      te->input_buffer = vb_index;
764af69d88dSmrg      te->input_format = mgr->ve->ve[i].src_format;
765af69d88dSmrg      te->input_offset = mgr->ve->ve[i].src_offset;
76601e04c3fSmrg      te->output_format = output_format;
767af69d88dSmrg      te->output_offset = k->output_stride;
768af69d88dSmrg
769af69d88dSmrg      k->output_stride += mgr->ve->native_format_size[i];
770af69d88dSmrg      k->nr_elements++;
771af69d88dSmrg   }
772af69d88dSmrg
773af69d88dSmrg   /* Translate buffers. */
774af69d88dSmrg   for (type = 0; type < VB_NUM; type++) {
775af69d88dSmrg      if (key[type].nr_elements) {
776af69d88dSmrg         enum pipe_error err;
7777ec681f3Smrg         err = u_vbuf_translate_buffers(mgr, &key[type], info, draw,
7787ec681f3Smrg                                        mask[type], mgr->fallback_vbs[type],
77901e04c3fSmrg                                        start[type], num[type], min_index,
780af69d88dSmrg                                        unroll_indices && type == VB_VERTEX);
781af69d88dSmrg         if (err != PIPE_OK)
782af69d88dSmrg            return FALSE;
783af69d88dSmrg
784af69d88dSmrg         /* Fixup the stride for constant attribs. */
785af69d88dSmrg         if (type == VB_CONST) {
786af69d88dSmrg            mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
787af69d88dSmrg         }
788af69d88dSmrg      }
789af69d88dSmrg   }
790af69d88dSmrg
791af69d88dSmrg   /* Setup new vertex elements. */
792af69d88dSmrg   for (i = 0; i < mgr->ve->count; i++) {
793af69d88dSmrg      for (type = 0; type < VB_NUM; type++) {
794af69d88dSmrg         if (elem_index[type][i] < key[type].nr_elements) {
795af69d88dSmrg            struct translate_element *te = &key[type].element[elem_index[type][i]];
7967ec681f3Smrg            mgr->fallback_velems.velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
7977ec681f3Smrg            mgr->fallback_velems.velems[i].src_format = te->output_format;
7987ec681f3Smrg            mgr->fallback_velems.velems[i].src_offset = te->output_offset;
7997ec681f3Smrg            mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
800af69d88dSmrg
801af69d88dSmrg            /* elem_index[type][i] can only be set for one type. */
80201e04c3fSmrg            assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
80301e04c3fSmrg            assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
804af69d88dSmrg            break;
805af69d88dSmrg         }
806af69d88dSmrg      }
807af69d88dSmrg      /* No translating, just copy the original vertex element over. */
808af69d88dSmrg      if (type == VB_NUM) {
8097ec681f3Smrg         memcpy(&mgr->fallback_velems.velems[i], &mgr->ve->ve[i],
810af69d88dSmrg                sizeof(struct pipe_vertex_element));
811af69d88dSmrg      }
812af69d88dSmrg   }
813af69d88dSmrg
8147ec681f3Smrg   mgr->fallback_velems.count = mgr->ve->count;
8157ec681f3Smrg
8167ec681f3Smrg   u_vbuf_set_vertex_elements_internal(mgr, &mgr->fallback_velems);
817af69d88dSmrg   mgr->using_translate = TRUE;
818af69d88dSmrg   return TRUE;
819af69d88dSmrg}
820af69d88dSmrg
821af69d88dSmrgstatic void u_vbuf_translate_end(struct u_vbuf *mgr)
822af69d88dSmrg{
823af69d88dSmrg   unsigned i;
824af69d88dSmrg
825af69d88dSmrg   /* Restore vertex elements. */
826af69d88dSmrg   mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
827af69d88dSmrg   mgr->using_translate = FALSE;
828af69d88dSmrg
829af69d88dSmrg   /* Unreference the now-unused VBOs. */
830af69d88dSmrg   for (i = 0; i < VB_NUM; i++) {
831af69d88dSmrg      unsigned vb = mgr->fallback_vbs[i];
83201e04c3fSmrg      if (vb != ~0u) {
83301e04c3fSmrg         pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL);
834af69d88dSmrg         mgr->fallback_vbs[i] = ~0;
835af69d88dSmrg      }
836af69d88dSmrg   }
8377ec681f3Smrg   /* This will cause the buffer to be unbound in the driver later. */
8387ec681f3Smrg   mgr->dirty_real_vb_mask |= mgr->fallback_vbs_mask;
8397ec681f3Smrg   mgr->fallback_vbs_mask = 0;
840af69d88dSmrg}
841af69d88dSmrg
842af69d88dSmrgstatic void *
843af69d88dSmrgu_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
844af69d88dSmrg                              const struct pipe_vertex_element *attribs)
845af69d88dSmrg{
8467ec681f3Smrg   struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS];
8477ec681f3Smrg   util_lower_uint64_vertex_elements(&attribs, &count, tmp);
8487ec681f3Smrg
849af69d88dSmrg   struct pipe_context *pipe = mgr->pipe;
850af69d88dSmrg   unsigned i;
851af69d88dSmrg   struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
852af69d88dSmrg   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
853af69d88dSmrg   uint32_t used_buffers = 0;
854af69d88dSmrg
855af69d88dSmrg   ve->count = count;
856af69d88dSmrg
857af69d88dSmrg   memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
858af69d88dSmrg   memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
859af69d88dSmrg
860af69d88dSmrg   /* Set the best native format in case the original format is not
861af69d88dSmrg    * supported. */
862af69d88dSmrg   for (i = 0; i < count; i++) {
863af69d88dSmrg      enum pipe_format format = ve->ve[i].src_format;
8647ec681f3Smrg      unsigned vb_index_bit = 1 << ve->ve[i].vertex_buffer_index;
865af69d88dSmrg
866af69d88dSmrg      ve->src_format_size[i] = util_format_get_blocksize(format);
867af69d88dSmrg
8687ec681f3Smrg      if (used_buffers & vb_index_bit)
8697ec681f3Smrg         ve->interleaved_vb_mask |= vb_index_bit;
8707ec681f3Smrg
8717ec681f3Smrg      used_buffers |= vb_index_bit;
872af69d88dSmrg
873af69d88dSmrg      if (!ve->ve[i].instance_divisor) {
8747ec681f3Smrg         ve->noninstance_vb_mask_any |= vb_index_bit;
875af69d88dSmrg      }
876af69d88dSmrg
87701e04c3fSmrg      format = mgr->caps.format_translation[format];
878af69d88dSmrg
879af69d88dSmrg      driver_attribs[i].src_format = format;
880af69d88dSmrg      ve->native_format[i] = format;
881af69d88dSmrg      ve->native_format_size[i] =
882af69d88dSmrg            util_format_get_blocksize(ve->native_format[i]);
883af69d88dSmrg
884af69d88dSmrg      if (ve->ve[i].src_format != format ||
885af69d88dSmrg          (!mgr->caps.velem_src_offset_unaligned &&
886af69d88dSmrg           ve->ve[i].src_offset % 4 != 0)) {
887af69d88dSmrg         ve->incompatible_elem_mask |= 1 << i;
8887ec681f3Smrg         ve->incompatible_vb_mask_any |= vb_index_bit;
889af69d88dSmrg      } else {
8907ec681f3Smrg         ve->compatible_vb_mask_any |= vb_index_bit;
891af69d88dSmrg      }
892af69d88dSmrg   }
893af69d88dSmrg
8947ec681f3Smrg   if (used_buffers & ~mgr->allowed_vb_mask) {
8957ec681f3Smrg      /* More vertex buffers are used than the hardware supports.  In
8967ec681f3Smrg       * principle, we only need to make sure that less vertex buffers are
8977ec681f3Smrg       * used, and mark some of the latter vertex buffers as incompatible.
8987ec681f3Smrg       * For now, mark all vertex buffers as incompatible.
8997ec681f3Smrg       */
9007ec681f3Smrg      ve->incompatible_vb_mask_any = used_buffers;
9017ec681f3Smrg      ve->compatible_vb_mask_any = 0;
9027ec681f3Smrg      ve->incompatible_elem_mask = u_bit_consecutive(0, count);
9037ec681f3Smrg   }
9047ec681f3Smrg
905af69d88dSmrg   ve->used_vb_mask = used_buffers;
906af69d88dSmrg   ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
907af69d88dSmrg   ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
908af69d88dSmrg
90901e04c3fSmrg   /* Align the formats and offsets to the size of DWORD if needed. */
910af69d88dSmrg   if (!mgr->caps.velem_src_offset_unaligned) {
911af69d88dSmrg      for (i = 0; i < count; i++) {
912af69d88dSmrg         ve->native_format_size[i] = align(ve->native_format_size[i], 4);
91301e04c3fSmrg         driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
914af69d88dSmrg      }
915af69d88dSmrg   }
916af69d88dSmrg
9177ec681f3Smrg   /* Only create driver CSO if no incompatible elements */
9187ec681f3Smrg   if (!ve->incompatible_elem_mask) {
9197ec681f3Smrg      ve->driver_cso =
9207ec681f3Smrg         pipe->create_vertex_elements_state(pipe, count, driver_attribs);
9217ec681f3Smrg   }
9227ec681f3Smrg
923af69d88dSmrg   return ve;
924af69d88dSmrg}
925af69d88dSmrg
9267ec681f3Smrgstatic void u_vbuf_delete_vertex_elements(void *ctx, void *state,
9277ec681f3Smrg                                          enum cso_cache_type type)
928af69d88dSmrg{
9297ec681f3Smrg   struct pipe_context *pipe = (struct pipe_context*)ctx;
9307ec681f3Smrg   struct cso_velements *cso = (struct cso_velements*)state;
9317ec681f3Smrg   struct u_vbuf_elements *ve = (struct u_vbuf_elements*)cso->data;
932af69d88dSmrg
9337ec681f3Smrg   if (ve->driver_cso)
9347ec681f3Smrg      pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
935af69d88dSmrg   FREE(ve);
9367ec681f3Smrg   FREE(cso);
937af69d88dSmrg}
938af69d88dSmrg
939af69d88dSmrgvoid u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
940af69d88dSmrg                               unsigned start_slot, unsigned count,
9417ec681f3Smrg                               unsigned unbind_num_trailing_slots,
9427ec681f3Smrg                               bool take_ownership,
943af69d88dSmrg                               const struct pipe_vertex_buffer *bufs)
944af69d88dSmrg{
945af69d88dSmrg   unsigned i;
946af69d88dSmrg   /* which buffers are enabled */
947af69d88dSmrg   uint32_t enabled_vb_mask = 0;
948af69d88dSmrg   /* which buffers are in user memory */
949af69d88dSmrg   uint32_t user_vb_mask = 0;
950af69d88dSmrg   /* which buffers are incompatible with the driver */
951af69d88dSmrg   uint32_t incompatible_vb_mask = 0;
952af69d88dSmrg   /* which buffers have a non-zero stride */
953af69d88dSmrg   uint32_t nonzero_stride_vb_mask = 0;
9547ec681f3Smrg   const uint32_t mask =
9557ec681f3Smrg      ~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot);
956af69d88dSmrg
957af69d88dSmrg   /* Zero out the bits we are going to rewrite completely. */
958af69d88dSmrg   mgr->user_vb_mask &= mask;
959af69d88dSmrg   mgr->incompatible_vb_mask &= mask;
960af69d88dSmrg   mgr->nonzero_stride_vb_mask &= mask;
961af69d88dSmrg   mgr->enabled_vb_mask &= mask;
962af69d88dSmrg
963af69d88dSmrg   if (!bufs) {
964af69d88dSmrg      struct pipe_context *pipe = mgr->pipe;
965af69d88dSmrg      /* Unbind. */
9667ec681f3Smrg      unsigned total_count = count + unbind_num_trailing_slots;
967af69d88dSmrg      mgr->dirty_real_vb_mask &= mask;
968af69d88dSmrg
9697ec681f3Smrg      for (i = 0; i < total_count; i++) {
970af69d88dSmrg         unsigned dst_index = start_slot + i;
971af69d88dSmrg
97201e04c3fSmrg         pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
97301e04c3fSmrg         pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
974af69d88dSmrg      }
975af69d88dSmrg
9767ec681f3Smrg      pipe->set_vertex_buffers(pipe, start_slot, count,
9777ec681f3Smrg                               unbind_num_trailing_slots, false, NULL);
978af69d88dSmrg      return;
979af69d88dSmrg   }
980af69d88dSmrg
981af69d88dSmrg   for (i = 0; i < count; i++) {
982af69d88dSmrg      unsigned dst_index = start_slot + i;
983af69d88dSmrg      const struct pipe_vertex_buffer *vb = &bufs[i];
984af69d88dSmrg      struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
985af69d88dSmrg      struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
986af69d88dSmrg
98701e04c3fSmrg      if (!vb->buffer.resource) {
98801e04c3fSmrg         pipe_vertex_buffer_unreference(orig_vb);
98901e04c3fSmrg         pipe_vertex_buffer_unreference(real_vb);
990af69d88dSmrg         continue;
991af69d88dSmrg      }
992af69d88dSmrg
9937ec681f3Smrg      if (take_ownership) {
9947ec681f3Smrg         pipe_vertex_buffer_unreference(orig_vb);
9957ec681f3Smrg         memcpy(orig_vb, vb, sizeof(*vb));
9967ec681f3Smrg      } else {
9977ec681f3Smrg         pipe_vertex_buffer_reference(orig_vb, vb);
9987ec681f3Smrg      }
999af69d88dSmrg
1000af69d88dSmrg      if (vb->stride) {
1001af69d88dSmrg         nonzero_stride_vb_mask |= 1 << dst_index;
1002af69d88dSmrg      }
1003af69d88dSmrg      enabled_vb_mask |= 1 << dst_index;
1004af69d88dSmrg
1005af69d88dSmrg      if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
1006af69d88dSmrg          (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
1007af69d88dSmrg         incompatible_vb_mask |= 1 << dst_index;
100801e04c3fSmrg         real_vb->buffer_offset = vb->buffer_offset;
100901e04c3fSmrg         real_vb->stride = vb->stride;
101001e04c3fSmrg         pipe_vertex_buffer_unreference(real_vb);
101101e04c3fSmrg         real_vb->is_user_buffer = false;
1012af69d88dSmrg         continue;
1013af69d88dSmrg      }
1014af69d88dSmrg
101501e04c3fSmrg      if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
1016af69d88dSmrg         user_vb_mask |= 1 << dst_index;
101701e04c3fSmrg         real_vb->buffer_offset = vb->buffer_offset;
101801e04c3fSmrg         real_vb->stride = vb->stride;
101901e04c3fSmrg         pipe_vertex_buffer_unreference(real_vb);
102001e04c3fSmrg         real_vb->is_user_buffer = false;
1021af69d88dSmrg         continue;
1022af69d88dSmrg      }
1023af69d88dSmrg
102401e04c3fSmrg      pipe_vertex_buffer_reference(real_vb, vb);
1025af69d88dSmrg   }
1026af69d88dSmrg
10277ec681f3Smrg   for (i = 0; i < unbind_num_trailing_slots; i++) {
10287ec681f3Smrg      unsigned dst_index = start_slot + count + i;
10297ec681f3Smrg
10307ec681f3Smrg      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
10317ec681f3Smrg      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
10327ec681f3Smrg   }
10337ec681f3Smrg
1034af69d88dSmrg   mgr->user_vb_mask |= user_vb_mask;
1035af69d88dSmrg   mgr->incompatible_vb_mask |= incompatible_vb_mask;
1036af69d88dSmrg   mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
1037af69d88dSmrg   mgr->enabled_vb_mask |= enabled_vb_mask;
1038af69d88dSmrg
1039af69d88dSmrg   /* All changed buffers are marked as dirty, even the NULL ones,
1040af69d88dSmrg    * which will cause the NULL buffers to be unbound in the driver later. */
1041af69d88dSmrg   mgr->dirty_real_vb_mask |= ~mask;
1042af69d88dSmrg}
1043af69d88dSmrg
10447ec681f3Smrgstatic ALWAYS_INLINE bool
10457ec681f3Smrgget_upload_offset_size(struct u_vbuf *mgr,
10467ec681f3Smrg                       const struct pipe_vertex_buffer *vb,
10477ec681f3Smrg                       struct u_vbuf_elements *ve,
10487ec681f3Smrg                       const struct pipe_vertex_element *velem,
10497ec681f3Smrg                       unsigned vb_index, unsigned velem_index,
10507ec681f3Smrg                       int start_vertex, unsigned num_vertices,
10517ec681f3Smrg                       int start_instance, unsigned num_instances,
10527ec681f3Smrg                       unsigned *offset, unsigned *size)
10537ec681f3Smrg{
10547ec681f3Smrg   /* Skip the buffers generated by translate. */
10557ec681f3Smrg   if ((1 << vb_index) & mgr->fallback_vbs_mask || !vb->is_user_buffer)
10567ec681f3Smrg      return false;
10577ec681f3Smrg
10587ec681f3Smrg   unsigned instance_div = velem->instance_divisor;
10597ec681f3Smrg   *offset = vb->buffer_offset + velem->src_offset;
10607ec681f3Smrg
10617ec681f3Smrg   if (!vb->stride) {
10627ec681f3Smrg      /* Constant attrib. */
10637ec681f3Smrg      *size = ve->src_format_size[velem_index];
10647ec681f3Smrg   } else if (instance_div) {
10657ec681f3Smrg      /* Per-instance attrib. */
10667ec681f3Smrg
10677ec681f3Smrg      /* Figure out how many instances we'll render given instance_div.  We
10687ec681f3Smrg       * can't use the typical div_round_up() pattern because the CTS uses
10697ec681f3Smrg       * instance_div = ~0 for a test, which overflows div_round_up()'s
10707ec681f3Smrg       * addition.
10717ec681f3Smrg       */
10727ec681f3Smrg      unsigned count = num_instances / instance_div;
10737ec681f3Smrg      if (count * instance_div != num_instances)
10747ec681f3Smrg         count++;
10757ec681f3Smrg
10767ec681f3Smrg      *offset += vb->stride * start_instance;
10777ec681f3Smrg      *size = vb->stride * (count - 1) + ve->src_format_size[velem_index];
10787ec681f3Smrg   } else {
10797ec681f3Smrg      /* Per-vertex attrib. */
10807ec681f3Smrg      *offset += vb->stride * start_vertex;
10817ec681f3Smrg      *size = vb->stride * (num_vertices - 1) + ve->src_format_size[velem_index];
10827ec681f3Smrg   }
10837ec681f3Smrg   return true;
10847ec681f3Smrg}
10857ec681f3Smrg
10867ec681f3Smrg
1087af69d88dSmrgstatic enum pipe_error
1088af69d88dSmrgu_vbuf_upload_buffers(struct u_vbuf *mgr,
1089af69d88dSmrg                      int start_vertex, unsigned num_vertices,
1090af69d88dSmrg                      int start_instance, unsigned num_instances)
1091af69d88dSmrg{
1092af69d88dSmrg   unsigned i;
10937ec681f3Smrg   struct u_vbuf_elements *ve = mgr->ve;
10947ec681f3Smrg   unsigned nr_velems = ve->count;
109501e04c3fSmrg   const struct pipe_vertex_element *velems =
10967ec681f3Smrg         mgr->using_translate ? mgr->fallback_velems.velems : ve->ve;
10977ec681f3Smrg
10987ec681f3Smrg   /* Faster path when no vertex attribs are interleaved. */
10997ec681f3Smrg   if ((ve->interleaved_vb_mask & mgr->user_vb_mask) == 0) {
11007ec681f3Smrg      for (i = 0; i < nr_velems; i++) {
11017ec681f3Smrg         const struct pipe_vertex_element *velem = &velems[i];
11027ec681f3Smrg         unsigned index = velem->vertex_buffer_index;
11037ec681f3Smrg         struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
11047ec681f3Smrg         unsigned offset, size;
11057ec681f3Smrg
11067ec681f3Smrg         if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
11077ec681f3Smrg                                     num_vertices, start_instance, num_instances,
11087ec681f3Smrg                                     &offset, &size))
11097ec681f3Smrg            continue;
11107ec681f3Smrg
11117ec681f3Smrg         struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[index];
11127ec681f3Smrg         const uint8_t *ptr = mgr->vertex_buffer[index].buffer.user;
11137ec681f3Smrg
11147ec681f3Smrg         u_upload_data(mgr->pipe->stream_uploader,
11157ec681f3Smrg                       mgr->has_signed_vb_offset ? 0 : offset,
11167ec681f3Smrg                       size, 4, ptr + offset, &real_vb->buffer_offset,
11177ec681f3Smrg                       &real_vb->buffer.resource);
11187ec681f3Smrg         if (!real_vb->buffer.resource)
11197ec681f3Smrg            return PIPE_ERROR_OUT_OF_MEMORY;
11207ec681f3Smrg
11217ec681f3Smrg         real_vb->buffer_offset -= offset;
11227ec681f3Smrg      }
11237ec681f3Smrg      return PIPE_OK;
11247ec681f3Smrg   }
11257ec681f3Smrg
1126af69d88dSmrg   unsigned start_offset[PIPE_MAX_ATTRIBS];
1127af69d88dSmrg   unsigned end_offset[PIPE_MAX_ATTRIBS];
1128af69d88dSmrg   uint32_t buffer_mask = 0;
1129af69d88dSmrg
11307ec681f3Smrg   /* Slower path supporting interleaved vertex attribs using 2 loops. */
1131af69d88dSmrg   /* Determine how much data needs to be uploaded. */
1132af69d88dSmrg   for (i = 0; i < nr_velems; i++) {
113301e04c3fSmrg      const struct pipe_vertex_element *velem = &velems[i];
1134af69d88dSmrg      unsigned index = velem->vertex_buffer_index;
1135af69d88dSmrg      struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
11367ec681f3Smrg      unsigned first, size, index_bit;
1137af69d88dSmrg
11387ec681f3Smrg      if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
11397ec681f3Smrg                                  num_vertices, start_instance, num_instances,
11407ec681f3Smrg                                  &first, &size))
1141af69d88dSmrg         continue;
1142af69d88dSmrg
1143af69d88dSmrg      index_bit = 1 << index;
1144af69d88dSmrg
1145af69d88dSmrg      /* Update offsets. */
1146af69d88dSmrg      if (!(buffer_mask & index_bit)) {
1147af69d88dSmrg         start_offset[index] = first;
1148af69d88dSmrg         end_offset[index] = first + size;
1149af69d88dSmrg      } else {
1150af69d88dSmrg         if (first < start_offset[index])
1151af69d88dSmrg            start_offset[index] = first;
1152af69d88dSmrg         if (first + size > end_offset[index])
1153af69d88dSmrg            end_offset[index] = first + size;
1154af69d88dSmrg      }
1155af69d88dSmrg
1156af69d88dSmrg      buffer_mask |= index_bit;
1157af69d88dSmrg   }
1158af69d88dSmrg
1159af69d88dSmrg   /* Upload buffers. */
1160af69d88dSmrg   while (buffer_mask) {
1161af69d88dSmrg      unsigned start, end;
1162af69d88dSmrg      struct pipe_vertex_buffer *real_vb;
1163af69d88dSmrg      const uint8_t *ptr;
1164af69d88dSmrg
1165af69d88dSmrg      i = u_bit_scan(&buffer_mask);
1166af69d88dSmrg
1167af69d88dSmrg      start = start_offset[i];
1168af69d88dSmrg      end = end_offset[i];
1169af69d88dSmrg      assert(start < end);
1170af69d88dSmrg
1171af69d88dSmrg      real_vb = &mgr->real_vertex_buffer[i];
117201e04c3fSmrg      ptr = mgr->vertex_buffer[i].buffer.user;
1173af69d88dSmrg
117401e04c3fSmrg      u_upload_data(mgr->pipe->stream_uploader,
117501e04c3fSmrg                    mgr->has_signed_vb_offset ? 0 : start,
117601e04c3fSmrg                    end - start, 4,
117701e04c3fSmrg                    ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource);
117801e04c3fSmrg      if (!real_vb->buffer.resource)
117901e04c3fSmrg         return PIPE_ERROR_OUT_OF_MEMORY;
1180af69d88dSmrg
1181af69d88dSmrg      real_vb->buffer_offset -= start;
1182af69d88dSmrg   }
1183af69d88dSmrg
1184af69d88dSmrg   return PIPE_OK;
1185af69d88dSmrg}
1186af69d88dSmrg
118701e04c3fSmrgstatic boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
1188af69d88dSmrg{
1189af69d88dSmrg   /* See if there are any per-vertex attribs which will be uploaded or
1190af69d88dSmrg    * translated. Use bitmasks to get the info instead of looping over vertex
1191af69d88dSmrg    * elements. */
1192af69d88dSmrg   return (mgr->ve->used_vb_mask &
119301e04c3fSmrg           ((mgr->user_vb_mask |
119401e04c3fSmrg             mgr->incompatible_vb_mask |
1195af69d88dSmrg             mgr->ve->incompatible_vb_mask_any) &
119601e04c3fSmrg            mgr->ve->noninstance_vb_mask_any &
119701e04c3fSmrg            mgr->nonzero_stride_vb_mask)) != 0;
1198af69d88dSmrg}
1199af69d88dSmrg
120001e04c3fSmrgstatic boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
1201af69d88dSmrg{
1202af69d88dSmrg   /* Return true if there are hw buffers which don't need to be translated.
1203af69d88dSmrg    *
1204af69d88dSmrg    * We could query whether each buffer is busy, but that would
1205af69d88dSmrg    * be way more costly than this. */
1206af69d88dSmrg   return (mgr->ve->used_vb_mask &
120701e04c3fSmrg           (~mgr->user_vb_mask &
120801e04c3fSmrg            ~mgr->incompatible_vb_mask &
120901e04c3fSmrg            mgr->ve->compatible_vb_mask_all &
121001e04c3fSmrg            mgr->ve->noninstance_vb_mask_any &
1211af69d88dSmrg            mgr->nonzero_stride_vb_mask)) != 0;
1212af69d88dSmrg}
1213af69d88dSmrg
121401e04c3fSmrgstatic void
121501e04c3fSmrgu_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info,
12167ec681f3Smrg                               unsigned count,
121701e04c3fSmrg                               const void *indices, unsigned *out_min_index,
121801e04c3fSmrg                               unsigned *out_max_index)
1219af69d88dSmrg{
12207ec681f3Smrg   if (!count) {
12217ec681f3Smrg      *out_min_index = 0;
12227ec681f3Smrg      *out_max_index = 0;
12237ec681f3Smrg      return;
12247ec681f3Smrg   }
1225af69d88dSmrg
122601e04c3fSmrg   switch (info->index_size) {
1227af69d88dSmrg   case 4: {
1228af69d88dSmrg      const unsigned *ui_indices = (const unsigned*)indices;
12297ec681f3Smrg      unsigned max = 0;
12307ec681f3Smrg      unsigned min = ~0u;
123101e04c3fSmrg      if (info->primitive_restart) {
12327ec681f3Smrg         for (unsigned i = 0; i < count; i++) {
123301e04c3fSmrg            if (ui_indices[i] != info->restart_index) {
123401e04c3fSmrg               if (ui_indices[i] > max) max = ui_indices[i];
123501e04c3fSmrg               if (ui_indices[i] < min) min = ui_indices[i];
1236af69d88dSmrg            }
1237af69d88dSmrg         }
1238af69d88dSmrg      }
1239af69d88dSmrg      else {
12407ec681f3Smrg         for (unsigned i = 0; i < count; i++) {
124101e04c3fSmrg            if (ui_indices[i] > max) max = ui_indices[i];
124201e04c3fSmrg            if (ui_indices[i] < min) min = ui_indices[i];
1243af69d88dSmrg         }
1244af69d88dSmrg      }
12457ec681f3Smrg      *out_min_index = min;
12467ec681f3Smrg      *out_max_index = max;
1247af69d88dSmrg      break;
1248af69d88dSmrg   }
1249af69d88dSmrg   case 2: {
1250af69d88dSmrg      const unsigned short *us_indices = (const unsigned short*)indices;
12517ec681f3Smrg      unsigned short max = 0;
12527ec681f3Smrg      unsigned short min = ~((unsigned short)0);
125301e04c3fSmrg      if (info->primitive_restart) {
12547ec681f3Smrg         for (unsigned i = 0; i < count; i++) {
125501e04c3fSmrg            if (us_indices[i] != info->restart_index) {
125601e04c3fSmrg               if (us_indices[i] > max) max = us_indices[i];
125701e04c3fSmrg               if (us_indices[i] < min) min = us_indices[i];
1258af69d88dSmrg            }
1259af69d88dSmrg         }
1260af69d88dSmrg      }
1261af69d88dSmrg      else {
12627ec681f3Smrg         for (unsigned i = 0; i < count; i++) {
126301e04c3fSmrg            if (us_indices[i] > max) max = us_indices[i];
126401e04c3fSmrg            if (us_indices[i] < min) min = us_indices[i];
1265af69d88dSmrg         }
1266af69d88dSmrg      }
12677ec681f3Smrg      *out_min_index = min;
12687ec681f3Smrg      *out_max_index = max;
1269af69d88dSmrg      break;
1270af69d88dSmrg   }
1271af69d88dSmrg   case 1: {
1272af69d88dSmrg      const unsigned char *ub_indices = (const unsigned char*)indices;
12737ec681f3Smrg      unsigned char max = 0;
12747ec681f3Smrg      unsigned char min = ~((unsigned char)0);
127501e04c3fSmrg      if (info->primitive_restart) {
12767ec681f3Smrg         for (unsigned i = 0; i < count; i++) {
127701e04c3fSmrg            if (ub_indices[i] != info->restart_index) {
127801e04c3fSmrg               if (ub_indices[i] > max) max = ub_indices[i];
127901e04c3fSmrg               if (ub_indices[i] < min) min = ub_indices[i];
1280af69d88dSmrg            }
1281af69d88dSmrg         }
1282af69d88dSmrg      }
1283af69d88dSmrg      else {
12847ec681f3Smrg         for (unsigned i = 0; i < count; i++) {
128501e04c3fSmrg            if (ub_indices[i] > max) max = ub_indices[i];
128601e04c3fSmrg            if (ub_indices[i] < min) min = ub_indices[i];
1287af69d88dSmrg         }
1288af69d88dSmrg      }
12897ec681f3Smrg      *out_min_index = min;
12907ec681f3Smrg      *out_max_index = max;
1291af69d88dSmrg      break;
1292af69d88dSmrg   }
1293af69d88dSmrg   default:
12947ec681f3Smrg      unreachable("bad index size");
1295af69d88dSmrg   }
129601e04c3fSmrg}
129701e04c3fSmrg
1298361fc4cbSmayavoid u_vbuf_get_minmax_index(struct pipe_context *pipe,
1299361fc4cbSmaya                             const struct pipe_draw_info *info,
13007ec681f3Smrg                             const struct pipe_draw_start_count_bias *draw,
1301361fc4cbSmaya                             unsigned *out_min_index, unsigned *out_max_index)
130201e04c3fSmrg{
130301e04c3fSmrg   struct pipe_transfer *transfer = NULL;
130401e04c3fSmrg   const void *indices;
130501e04c3fSmrg
130601e04c3fSmrg   if (info->has_user_indices) {
130701e04c3fSmrg      indices = (uint8_t*)info->index.user +
13087ec681f3Smrg                draw->start * info->index_size;
130901e04c3fSmrg   } else {
131001e04c3fSmrg      indices = pipe_buffer_map_range(pipe, info->index.resource,
13117ec681f3Smrg                                      draw->start * info->index_size,
13127ec681f3Smrg                                      draw->count * info->index_size,
13137ec681f3Smrg                                      PIPE_MAP_READ, &transfer);
131401e04c3fSmrg   }
131501e04c3fSmrg
13167ec681f3Smrg   u_vbuf_get_minmax_index_mapped(info, draw->count, indices,
13177ec681f3Smrg                                  out_min_index, out_max_index);
131801e04c3fSmrg
1319af69d88dSmrg   if (transfer) {
1320af69d88dSmrg      pipe_buffer_unmap(pipe, transfer);
1321af69d88dSmrg   }
1322af69d88dSmrg}
1323af69d88dSmrg
1324af69d88dSmrgstatic void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
1325af69d88dSmrg{
1326af69d88dSmrg   struct pipe_context *pipe = mgr->pipe;
1327af69d88dSmrg   unsigned start_slot, count;
1328af69d88dSmrg
1329af69d88dSmrg   start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
1330af69d88dSmrg   count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
1331af69d88dSmrg
13327ec681f3Smrg   if (mgr->dirty_real_vb_mask == mgr->enabled_vb_mask &&
13337ec681f3Smrg       mgr->dirty_real_vb_mask == mgr->user_vb_mask) {
13347ec681f3Smrg      /* Fast path that allows us to transfer the VBO references to the driver
13357ec681f3Smrg       * to skip atomic reference counting there. These are freshly uploaded
13367ec681f3Smrg       * user buffers that can be discarded after this call.
13377ec681f3Smrg       */
13387ec681f3Smrg      pipe->set_vertex_buffers(pipe, start_slot, count, 0, true,
13397ec681f3Smrg                               mgr->real_vertex_buffer + start_slot);
13407ec681f3Smrg
13417ec681f3Smrg      /* We don't own the VBO references now. Set them to NULL. */
13427ec681f3Smrg      for (unsigned i = 0; i < count; i++) {
13437ec681f3Smrg         assert(!mgr->real_vertex_buffer[start_slot + i].is_user_buffer);
13447ec681f3Smrg         mgr->real_vertex_buffer[start_slot + i].buffer.resource = NULL;
13457ec681f3Smrg      }
13467ec681f3Smrg   } else {
13477ec681f3Smrg      /* Slow path where we have to keep VBO references. */
13487ec681f3Smrg      pipe->set_vertex_buffers(pipe, start_slot, count, 0, false,
13497ec681f3Smrg                               mgr->real_vertex_buffer + start_slot);
13507ec681f3Smrg   }
1351af69d88dSmrg   mgr->dirty_real_vb_mask = 0;
1352af69d88dSmrg}
1353af69d88dSmrg
135401e04c3fSmrgstatic void
135501e04c3fSmrgu_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
13567ec681f3Smrg                               unsigned drawid_offset,
135701e04c3fSmrg                               unsigned *indirect_data, unsigned stride,
135801e04c3fSmrg                               unsigned draw_count)
135901e04c3fSmrg{
13607ec681f3Smrg   /* Increase refcount to be able to use take_index_buffer_ownership with
13617ec681f3Smrg    * all draws.
13627ec681f3Smrg    */
13637ec681f3Smrg   if (draw_count > 1 && info->take_index_buffer_ownership)
13647ec681f3Smrg      p_atomic_add(&info->index.resource->reference.count, draw_count - 1);
13657ec681f3Smrg
136601e04c3fSmrg   assert(info->index_size);
136701e04c3fSmrg
136801e04c3fSmrg   for (unsigned i = 0; i < draw_count; i++) {
13697ec681f3Smrg      struct pipe_draw_start_count_bias draw;
137001e04c3fSmrg      unsigned offset = i * stride / 4;
137101e04c3fSmrg
13727ec681f3Smrg      draw.count = indirect_data[offset + 0];
137301e04c3fSmrg      info->instance_count = indirect_data[offset + 1];
13747ec681f3Smrg      draw.start = indirect_data[offset + 2];
13757ec681f3Smrg      draw.index_bias = indirect_data[offset + 3];
137601e04c3fSmrg      info->start_instance = indirect_data[offset + 4];
137701e04c3fSmrg
13787ec681f3Smrg      u_vbuf_draw_vbo(mgr, info, drawid_offset, NULL, draw);
137901e04c3fSmrg   }
138001e04c3fSmrg}
138101e04c3fSmrg
13827ec681f3Smrgvoid u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
13837ec681f3Smrg                     unsigned drawid_offset,
13847ec681f3Smrg                     const struct pipe_draw_indirect_info *indirect,
13857ec681f3Smrg                     const struct pipe_draw_start_count_bias draw)
1386af69d88dSmrg{
1387af69d88dSmrg   struct pipe_context *pipe = mgr->pipe;
138801e04c3fSmrg   int start_vertex;
138901e04c3fSmrg   unsigned min_index;
1390af69d88dSmrg   unsigned num_vertices;
1391af69d88dSmrg   boolean unroll_indices = FALSE;
139201e04c3fSmrg   const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
1393af69d88dSmrg   uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
139401e04c3fSmrg   const uint32_t incompatible_vb_mask =
139501e04c3fSmrg      mgr->incompatible_vb_mask & used_vb_mask;
1396af69d88dSmrg   struct pipe_draw_info new_info;
13977ec681f3Smrg   struct pipe_draw_start_count_bias new_draw;
13987ec681f3Smrg   unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0;
1399af69d88dSmrg
1400af69d88dSmrg   /* Normal draw. No fallback and no user buffers. */
1401af69d88dSmrg   if (!incompatible_vb_mask &&
1402af69d88dSmrg       !mgr->ve->incompatible_elem_mask &&
14037ec681f3Smrg       !user_vb_mask &&
14047ec681f3Smrg       (info->index_size != 1 || !mgr->caps.rewrite_ubyte_ibs) &&
14057ec681f3Smrg       (!info->primitive_restart ||
14067ec681f3Smrg        info->restart_index == fixed_restart_index ||
14077ec681f3Smrg        !mgr->caps.rewrite_restart_index) &&
14087ec681f3Smrg       (!info->primitive_restart || mgr->caps.supported_restart_modes & BITFIELD_BIT(info->mode)) &&
14097ec681f3Smrg       mgr->caps.supported_prim_modes & BITFIELD_BIT(info->mode)) {
1410af69d88dSmrg
1411af69d88dSmrg      /* Set vertex buffers if needed. */
1412af69d88dSmrg      if (mgr->dirty_real_vb_mask & used_vb_mask) {
1413af69d88dSmrg         u_vbuf_set_driver_vertex_buffers(mgr);
1414af69d88dSmrg      }
1415af69d88dSmrg
14167ec681f3Smrg      pipe->draw_vbo(pipe, info, drawid_offset, indirect, &draw, 1);
1417af69d88dSmrg      return;
1418af69d88dSmrg   }
1419af69d88dSmrg
1420af69d88dSmrg   new_info = *info;
14217ec681f3Smrg   new_draw = draw;
1422af69d88dSmrg
142301e04c3fSmrg   /* Handle indirect (multi)draws. */
14247ec681f3Smrg   if (indirect && indirect->buffer) {
142501e04c3fSmrg      unsigned draw_count = 0;
142601e04c3fSmrg
142701e04c3fSmrg      /* Get the number of draws. */
142801e04c3fSmrg      if (indirect->indirect_draw_count) {
142901e04c3fSmrg         pipe_buffer_read(pipe, indirect->indirect_draw_count,
143001e04c3fSmrg                          indirect->indirect_draw_count_offset,
143101e04c3fSmrg                          4, &draw_count);
143201e04c3fSmrg      } else {
143301e04c3fSmrg         draw_count = indirect->draw_count;
1434af69d88dSmrg      }
1435af69d88dSmrg
143601e04c3fSmrg      if (!draw_count)
14377ec681f3Smrg         goto cleanup;
143801e04c3fSmrg
143901e04c3fSmrg      unsigned data_size = (draw_count - 1) * indirect->stride +
144001e04c3fSmrg                           (new_info.index_size ? 20 : 16);
144101e04c3fSmrg      unsigned *data = malloc(data_size);
144201e04c3fSmrg      if (!data)
14437ec681f3Smrg         goto cleanup; /* report an error? */
144401e04c3fSmrg
144501e04c3fSmrg      /* Read the used buffer range only once, because the read can be
144601e04c3fSmrg       * uncached.
144701e04c3fSmrg       */
144801e04c3fSmrg      pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
144901e04c3fSmrg                       data);
145001e04c3fSmrg
145101e04c3fSmrg      if (info->index_size) {
145201e04c3fSmrg         /* Indexed multidraw. */
145301e04c3fSmrg         unsigned index_bias0 = data[3];
145401e04c3fSmrg         bool index_bias_same = true;
145501e04c3fSmrg
145601e04c3fSmrg         /* If we invoke the translate path, we have to split the multidraw. */
145701e04c3fSmrg         if (incompatible_vb_mask ||
145801e04c3fSmrg             mgr->ve->incompatible_elem_mask) {
14597ec681f3Smrg            u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
146001e04c3fSmrg                                           indirect->stride, draw_count);
146101e04c3fSmrg            free(data);
146201e04c3fSmrg            return;
146301e04c3fSmrg         }
146401e04c3fSmrg
146501e04c3fSmrg         /* See if index_bias is the same for all draws. */
146601e04c3fSmrg         for (unsigned i = 1; i < draw_count; i++) {
146701e04c3fSmrg            if (data[i * indirect->stride / 4 + 3] != index_bias0) {
146801e04c3fSmrg               index_bias_same = false;
146901e04c3fSmrg               break;
147001e04c3fSmrg            }
147101e04c3fSmrg         }
147201e04c3fSmrg
147301e04c3fSmrg         /* Split the multidraw if index_bias is different. */
147401e04c3fSmrg         if (!index_bias_same) {
14757ec681f3Smrg            u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
147601e04c3fSmrg                                           indirect->stride, draw_count);
147701e04c3fSmrg            free(data);
147801e04c3fSmrg            return;
147901e04c3fSmrg         }
148001e04c3fSmrg
148101e04c3fSmrg         /* If we don't need to use the translate path and index_bias is
148201e04c3fSmrg          * the same, we can process the multidraw with the time complexity
148301e04c3fSmrg          * equal to 1 draw call (except for the index range computation).
148401e04c3fSmrg          * We only need to compute the index range covering all draw calls
148501e04c3fSmrg          * of the multidraw.
148601e04c3fSmrg          *
148701e04c3fSmrg          * The driver will not look at these values because indirect != NULL.
148801e04c3fSmrg          * These values determine the user buffer bounds to upload.
148901e04c3fSmrg          */
14907ec681f3Smrg         new_draw.index_bias = index_bias0;
14917ec681f3Smrg         new_info.index_bounds_valid = true;
149201e04c3fSmrg         new_info.min_index = ~0u;
149301e04c3fSmrg         new_info.max_index = 0;
149401e04c3fSmrg         new_info.start_instance = ~0u;
149501e04c3fSmrg         unsigned end_instance = 0;
149601e04c3fSmrg
149701e04c3fSmrg         struct pipe_transfer *transfer = NULL;
149801e04c3fSmrg         const uint8_t *indices;
149901e04c3fSmrg
150001e04c3fSmrg         if (info->has_user_indices) {
150101e04c3fSmrg            indices = (uint8_t*)info->index.user;
150201e04c3fSmrg         } else {
150301e04c3fSmrg            indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
15047ec681f3Smrg                                                PIPE_MAP_READ, &transfer);
150501e04c3fSmrg         }
150601e04c3fSmrg
150701e04c3fSmrg         for (unsigned i = 0; i < draw_count; i++) {
150801e04c3fSmrg            unsigned offset = i * indirect->stride / 4;
150901e04c3fSmrg            unsigned start = data[offset + 2];
151001e04c3fSmrg            unsigned count = data[offset + 0];
151101e04c3fSmrg            unsigned start_instance = data[offset + 4];
151201e04c3fSmrg            unsigned instance_count = data[offset + 1];
151301e04c3fSmrg
151401e04c3fSmrg            if (!count || !instance_count)
151501e04c3fSmrg               continue;
151601e04c3fSmrg
151701e04c3fSmrg            /* Update the ranges of instances. */
151801e04c3fSmrg            new_info.start_instance = MIN2(new_info.start_instance,
151901e04c3fSmrg                                           start_instance);
152001e04c3fSmrg            end_instance = MAX2(end_instance, start_instance + instance_count);
152101e04c3fSmrg
152201e04c3fSmrg            /* Update the index range. */
152301e04c3fSmrg            unsigned min, max;
15247ec681f3Smrg            u_vbuf_get_minmax_index_mapped(&new_info, count,
152501e04c3fSmrg                                           indices +
152601e04c3fSmrg                                           new_info.index_size * start,
152701e04c3fSmrg                                           &min, &max);
152801e04c3fSmrg
152901e04c3fSmrg            new_info.min_index = MIN2(new_info.min_index, min);
153001e04c3fSmrg            new_info.max_index = MAX2(new_info.max_index, max);
153101e04c3fSmrg         }
153201e04c3fSmrg         free(data);
153301e04c3fSmrg
153401e04c3fSmrg         if (transfer)
153501e04c3fSmrg            pipe_buffer_unmap(pipe, transfer);
153601e04c3fSmrg
153701e04c3fSmrg         /* Set the final instance count. */
153801e04c3fSmrg         new_info.instance_count = end_instance - new_info.start_instance;
153901e04c3fSmrg
154001e04c3fSmrg         if (new_info.start_instance == ~0u || !new_info.instance_count)
15417ec681f3Smrg            goto cleanup;
154201e04c3fSmrg      } else {
154301e04c3fSmrg         /* Non-indexed multidraw.
154401e04c3fSmrg          *
154501e04c3fSmrg          * Keep the draw call indirect and compute minimums & maximums,
154601e04c3fSmrg          * which will determine the user buffer bounds to upload, but
154701e04c3fSmrg          * the driver will not look at these values because indirect != NULL.
154801e04c3fSmrg          *
154901e04c3fSmrg          * This efficiently processes the multidraw with the time complexity
155001e04c3fSmrg          * equal to 1 draw call.
155101e04c3fSmrg          */
15527ec681f3Smrg         new_draw.start = ~0u;
155301e04c3fSmrg         new_info.start_instance = ~0u;
155401e04c3fSmrg         unsigned end_vertex = 0;
155501e04c3fSmrg         unsigned end_instance = 0;
155601e04c3fSmrg
155701e04c3fSmrg         for (unsigned i = 0; i < draw_count; i++) {
155801e04c3fSmrg            unsigned offset = i * indirect->stride / 4;
155901e04c3fSmrg            unsigned start = data[offset + 2];
156001e04c3fSmrg            unsigned count = data[offset + 0];
156101e04c3fSmrg            unsigned start_instance = data[offset + 3];
156201e04c3fSmrg            unsigned instance_count = data[offset + 1];
156301e04c3fSmrg
15647ec681f3Smrg            new_draw.start = MIN2(new_draw.start, start);
156501e04c3fSmrg            new_info.start_instance = MIN2(new_info.start_instance,
156601e04c3fSmrg                                           start_instance);
156701e04c3fSmrg
156801e04c3fSmrg            end_vertex = MAX2(end_vertex, start + count);
156901e04c3fSmrg            end_instance = MAX2(end_instance, start_instance + instance_count);
157001e04c3fSmrg         }
157101e04c3fSmrg         free(data);
157201e04c3fSmrg
157301e04c3fSmrg         /* Set the final counts. */
15747ec681f3Smrg         new_draw.count = end_vertex - new_draw.start;
157501e04c3fSmrg         new_info.instance_count = end_instance - new_info.start_instance;
157601e04c3fSmrg
15777ec681f3Smrg         if (new_draw.start == ~0u || !new_draw.count || !new_info.instance_count)
15787ec681f3Smrg            goto cleanup;
157901e04c3fSmrg      }
15807ec681f3Smrg   } else {
15817ec681f3Smrg      if ((!indirect && !new_draw.count) || !new_info.instance_count)
15827ec681f3Smrg         goto cleanup;
1583af69d88dSmrg   }
1584af69d88dSmrg
158501e04c3fSmrg   if (new_info.index_size) {
1586af69d88dSmrg      /* See if anything needs to be done for per-vertex attribs. */
1587af69d88dSmrg      if (u_vbuf_need_minmax_index(mgr)) {
158801e04c3fSmrg         unsigned max_index;
1589af69d88dSmrg
15907ec681f3Smrg         if (new_info.index_bounds_valid) {
1591af69d88dSmrg            min_index = new_info.min_index;
1592af69d88dSmrg            max_index = new_info.max_index;
1593af69d88dSmrg         } else {
15947ec681f3Smrg            u_vbuf_get_minmax_index(mgr->pipe, &new_info, &new_draw,
159501e04c3fSmrg                                    &min_index, &max_index);
1596af69d88dSmrg         }
1597af69d88dSmrg
1598af69d88dSmrg         assert(min_index <= max_index);
1599af69d88dSmrg
16007ec681f3Smrg         start_vertex = min_index + new_draw.index_bias;
1601af69d88dSmrg         num_vertices = max_index + 1 - min_index;
1602af69d88dSmrg
1603af69d88dSmrg         /* Primitive restart doesn't work when unrolling indices.
1604af69d88dSmrg          * We would have to break this drawing operation into several ones. */
1605af69d88dSmrg         /* Use some heuristic to see if unrolling indices improves
1606af69d88dSmrg          * performance. */
16077ec681f3Smrg         if (!indirect &&
160801e04c3fSmrg             !new_info.primitive_restart &&
16097ec681f3Smrg             util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) &&
1610af69d88dSmrg             !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
1611af69d88dSmrg            unroll_indices = TRUE;
1612af69d88dSmrg            user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
1613af69d88dSmrg                              mgr->ve->noninstance_vb_mask_any);
1614af69d88dSmrg         }
1615af69d88dSmrg      } else {
1616af69d88dSmrg         /* Nothing to do for per-vertex attribs. */
1617af69d88dSmrg         start_vertex = 0;
1618af69d88dSmrg         num_vertices = 0;
1619af69d88dSmrg         min_index = 0;
1620af69d88dSmrg      }
1621af69d88dSmrg   } else {
16227ec681f3Smrg      start_vertex = new_draw.start;
16237ec681f3Smrg      num_vertices = new_draw.count;
1624af69d88dSmrg      min_index = 0;
1625af69d88dSmrg   }
1626af69d88dSmrg
1627af69d88dSmrg   /* Translate vertices with non-native layouts or formats. */
1628af69d88dSmrg   if (unroll_indices ||
1629af69d88dSmrg       incompatible_vb_mask ||
1630af69d88dSmrg       mgr->ve->incompatible_elem_mask) {
16317ec681f3Smrg      if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw,
16327ec681f3Smrg                                  start_vertex, num_vertices,
163301e04c3fSmrg                                  min_index, unroll_indices)) {
1634af69d88dSmrg         debug_warn_once("u_vbuf_translate_begin() failed");
16357ec681f3Smrg         goto cleanup;
1636af69d88dSmrg      }
1637af69d88dSmrg
1638af69d88dSmrg      if (unroll_indices) {
163901e04c3fSmrg         new_info.index_size = 0;
16407ec681f3Smrg         new_draw.index_bias = 0;
16417ec681f3Smrg         new_info.index_bounds_valid = true;
1642af69d88dSmrg         new_info.min_index = 0;
16437ec681f3Smrg         new_info.max_index = new_draw.count - 1;
16447ec681f3Smrg         new_draw.start = 0;
1645af69d88dSmrg      }
1646af69d88dSmrg
1647af69d88dSmrg      user_vb_mask &= ~(incompatible_vb_mask |
1648af69d88dSmrg                        mgr->ve->incompatible_vb_mask_all);
1649af69d88dSmrg   }
1650af69d88dSmrg
1651af69d88dSmrg   /* Upload user buffers. */
1652af69d88dSmrg   if (user_vb_mask) {
1653af69d88dSmrg      if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1654af69d88dSmrg                                new_info.start_instance,
1655af69d88dSmrg                                new_info.instance_count) != PIPE_OK) {
1656af69d88dSmrg         debug_warn_once("u_vbuf_upload_buffers() failed");
16577ec681f3Smrg         goto cleanup;
1658af69d88dSmrg      }
1659af69d88dSmrg
1660af69d88dSmrg      mgr->dirty_real_vb_mask |= user_vb_mask;
1661af69d88dSmrg   }
1662af69d88dSmrg
1663af69d88dSmrg   /*
1664af69d88dSmrg   if (unroll_indices) {
1665af69d88dSmrg      printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1666af69d88dSmrg             start_vertex, num_vertices);
1667af69d88dSmrg      util_dump_draw_info(stdout, info);
1668af69d88dSmrg      printf("\n");
1669af69d88dSmrg   }
1670af69d88dSmrg
1671af69d88dSmrg   unsigned i;
1672af69d88dSmrg   for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1673af69d88dSmrg      printf("input %i: ", i);
1674af69d88dSmrg      util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1675af69d88dSmrg      printf("\n");
1676af69d88dSmrg   }
1677af69d88dSmrg   for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1678af69d88dSmrg      printf("real %i: ", i);
1679af69d88dSmrg      util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1680af69d88dSmrg      printf("\n");
1681af69d88dSmrg   }
1682af69d88dSmrg   */
1683af69d88dSmrg
168401e04c3fSmrg   u_upload_unmap(pipe->stream_uploader);
16857ec681f3Smrg   if (mgr->dirty_real_vb_mask)
16867ec681f3Smrg      u_vbuf_set_driver_vertex_buffers(mgr);
16877ec681f3Smrg
16887ec681f3Smrg   if ((new_info.index_size == 1 && mgr->caps.rewrite_ubyte_ibs) ||
16897ec681f3Smrg       (new_info.primitive_restart &&
16907ec681f3Smrg        ((new_info.restart_index != fixed_restart_index && mgr->caps.rewrite_restart_index) ||
16917ec681f3Smrg        !(mgr->caps.supported_restart_modes & BITFIELD_BIT(new_info.mode)))) ||
16927ec681f3Smrg       !(mgr->caps.supported_prim_modes & BITFIELD_BIT(new_info.mode))) {
16937ec681f3Smrg      util_primconvert_save_flatshade_first(mgr->pc, mgr->flatshade_first);
16947ec681f3Smrg      util_primconvert_draw_vbo(mgr->pc, &new_info, drawid_offset, indirect, &new_draw, 1);
16957ec681f3Smrg   } else
16967ec681f3Smrg      pipe->draw_vbo(pipe, &new_info, drawid_offset, indirect, &new_draw, 1);
1697af69d88dSmrg
1698af69d88dSmrg   if (mgr->using_translate) {
1699af69d88dSmrg      u_vbuf_translate_end(mgr);
1700af69d88dSmrg   }
17017ec681f3Smrg   return;
17027ec681f3Smrg
17037ec681f3Smrgcleanup:
17047ec681f3Smrg   if (info->take_index_buffer_ownership) {
17057ec681f3Smrg      struct pipe_resource *indexbuf = info->index.resource;
17067ec681f3Smrg      pipe_resource_reference(&indexbuf, NULL);
17077ec681f3Smrg   }
1708af69d88dSmrg}
1709af69d88dSmrg
1710af69d88dSmrgvoid u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
1711af69d88dSmrg{
1712af69d88dSmrg   assert(!mgr->ve_saved);
1713af69d88dSmrg   mgr->ve_saved = mgr->ve;
1714af69d88dSmrg}
1715af69d88dSmrg
1716af69d88dSmrgvoid u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
1717af69d88dSmrg{
1718af69d88dSmrg   if (mgr->ve != mgr->ve_saved) {
1719af69d88dSmrg      struct pipe_context *pipe = mgr->pipe;
1720af69d88dSmrg
1721af69d88dSmrg      mgr->ve = mgr->ve_saved;
1722af69d88dSmrg      pipe->bind_vertex_elements_state(pipe,
1723af69d88dSmrg                                       mgr->ve ? mgr->ve->driver_cso : NULL);
1724af69d88dSmrg   }
1725af69d88dSmrg   mgr->ve_saved = NULL;
1726af69d88dSmrg}
1727