1848b8605Smrg/**************************************************************************
2848b8605Smrg *
3848b8605Smrg * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4848b8605Smrg * All Rights Reserved.
5848b8605Smrg *
6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7848b8605Smrg * copy of this software and associated documentation files (the
8848b8605Smrg * "Software"), to deal in the Software without restriction, including
9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish,
10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to
11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to
12848b8605Smrg * the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice (including the
15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions
16848b8605Smrg * of the Software.
17848b8605Smrg *
18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21848b8605Smrg * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25848b8605Smrg *
26848b8605Smrg **************************************************************************/
27848b8605Smrg
28848b8605Smrg/**
29848b8605Smrg * This module uploads user buffers and translates the vertex buffers which
30848b8605Smrg * contain incompatible vertices (i.e. not supported by the driver/hardware)
31848b8605Smrg * into compatible ones, based on the Gallium CAPs.
32848b8605Smrg *
33848b8605Smrg * It does not upload index buffers.
34848b8605Smrg *
35848b8605Smrg * The module heavily uses bitmasks to represent per-buffer and
36848b8605Smrg * per-vertex-element flags to avoid looping over the list of buffers just
37848b8605Smrg * to see if there's a non-zero stride, or user buffer, or unsupported format,
38848b8605Smrg * etc.
39848b8605Smrg *
40848b8605Smrg * There are 3 categories of vertex elements, which are processed separately:
41848b8605Smrg * - per-vertex attribs (stride != 0, instance_divisor == 0)
42848b8605Smrg * - instanced attribs (stride != 0, instance_divisor > 0)
43848b8605Smrg * - constant attribs (stride == 0)
44848b8605Smrg *
45848b8605Smrg * All needed uploads and translations are performed every draw command, but
46848b8605Smrg * only the subset of vertices needed for that draw command is uploaded or
47848b8605Smrg * translated. (the module never translates whole buffers)
48848b8605Smrg *
49848b8605Smrg *
50848b8605Smrg * The module consists of two main parts:
51848b8605Smrg *
52848b8605Smrg *
53848b8605Smrg * 1) Translate (u_vbuf_translate_begin/end)
54848b8605Smrg *
55848b8605Smrg * This is pretty much a vertex fetch fallback. It translates vertices from
56848b8605Smrg * one vertex buffer to another in an unused vertex buffer slot. It does
57848b8605Smrg * whatever is needed to make the vertices readable by the hardware (changes
58848b8605Smrg * vertex formats and aligns offsets and strides). The translate module is
59848b8605Smrg * used here.
60848b8605Smrg *
61848b8605Smrg * Each of the 3 categories is translated to a separate buffer.
62848b8605Smrg * Only the [min_index, max_index] range is translated. For instanced attribs,
63848b8605Smrg * the range is [start_instance, start_instance+instance_count]. For constant
64848b8605Smrg * attribs, the range is [0, 1].
65848b8605Smrg *
66848b8605Smrg *
67848b8605Smrg * 2) User buffer uploading (u_vbuf_upload_buffers)
68848b8605Smrg *
69848b8605Smrg * Only the [min_index, max_index] range is uploaded (just like Translate)
70848b8605Smrg * with a single memcpy.
71848b8605Smrg *
72848b8605Smrg * This method works best for non-indexed draw operations or indexed draw
73848b8605Smrg * operations where the [min_index, max_index] range is not being way bigger
74848b8605Smrg * than the vertex count.
75848b8605Smrg *
76848b8605Smrg * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
77848b8605Smrg * the per-vertex attribs are uploaded via the translate module, all packed
78848b8605Smrg * into one vertex buffer, and the indexed draw call is turned into
79848b8605Smrg * a non-indexed one in the process. This adds additional complexity
80848b8605Smrg * to the translate part, but it prevents bad apps from bringing your frame
81848b8605Smrg * rate down.
82848b8605Smrg *
83848b8605Smrg *
84848b8605Smrg * If there is nothing to do, it forwards every command to the driver.
85848b8605Smrg * The module also has its own CSO cache of vertex element states.
86848b8605Smrg */
87848b8605Smrg
88848b8605Smrg#include "util/u_vbuf.h"
89848b8605Smrg
90848b8605Smrg#include "util/u_dump.h"
91848b8605Smrg#include "util/u_format.h"
92848b8605Smrg#include "util/u_inlines.h"
93848b8605Smrg#include "util/u_memory.h"
94848b8605Smrg#include "util/u_upload_mgr.h"
95848b8605Smrg#include "translate/translate.h"
96848b8605Smrg#include "translate/translate_cache.h"
97848b8605Smrg#include "cso_cache/cso_cache.h"
98848b8605Smrg#include "cso_cache/cso_hash.h"
99848b8605Smrg
100848b8605Smrgstruct u_vbuf_elements {
101848b8605Smrg   unsigned count;
102848b8605Smrg   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
103848b8605Smrg
104848b8605Smrg   unsigned src_format_size[PIPE_MAX_ATTRIBS];
105848b8605Smrg
106848b8605Smrg   /* If (velem[i].src_format != native_format[i]), the vertex buffer
107848b8605Smrg    * referenced by the vertex element cannot be used for rendering and
108848b8605Smrg    * its vertex data must be translated to native_format[i]. */
109848b8605Smrg   enum pipe_format native_format[PIPE_MAX_ATTRIBS];
110848b8605Smrg   unsigned native_format_size[PIPE_MAX_ATTRIBS];
111848b8605Smrg
112848b8605Smrg   /* Which buffers are used by the vertex element state. */
113848b8605Smrg   uint32_t used_vb_mask;
114848b8605Smrg   /* This might mean two things:
115848b8605Smrg    * - src_format != native_format, as discussed above.
116848b8605Smrg    * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
117848b8605Smrg   uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
118848b8605Smrg   /* Which buffer has at least one vertex element referencing it
119848b8605Smrg    * incompatible. */
120848b8605Smrg   uint32_t incompatible_vb_mask_any;
121848b8605Smrg   /* Which buffer has all vertex elements referencing it incompatible. */
122848b8605Smrg   uint32_t incompatible_vb_mask_all;
123848b8605Smrg   /* Which buffer has at least one vertex element referencing it
124848b8605Smrg    * compatible. */
125848b8605Smrg   uint32_t compatible_vb_mask_any;
126848b8605Smrg   /* Which buffer has all vertex elements referencing it compatible. */
127848b8605Smrg   uint32_t compatible_vb_mask_all;
128848b8605Smrg
129848b8605Smrg   /* Which buffer has at least one vertex element referencing it
130848b8605Smrg    * non-instanced. */
131848b8605Smrg   uint32_t noninstance_vb_mask_any;
132848b8605Smrg
133848b8605Smrg   void *driver_cso;
134848b8605Smrg};
135848b8605Smrg
136848b8605Smrgenum {
137848b8605Smrg   VB_VERTEX = 0,
138848b8605Smrg   VB_INSTANCE = 1,
139848b8605Smrg   VB_CONST = 2,
140848b8605Smrg   VB_NUM = 3
141848b8605Smrg};
142848b8605Smrg
143848b8605Smrgstruct u_vbuf {
144848b8605Smrg   struct u_vbuf_caps caps;
145b8e80941Smrg   bool has_signed_vb_offset;
146848b8605Smrg
147848b8605Smrg   struct pipe_context *pipe;
148848b8605Smrg   struct translate_cache *translate_cache;
149848b8605Smrg   struct cso_cache *cso_cache;
150848b8605Smrg
151848b8605Smrg   /* This is what was set in set_vertex_buffers.
152848b8605Smrg    * May contain user buffers. */
153848b8605Smrg   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
154848b8605Smrg   uint32_t enabled_vb_mask;
155848b8605Smrg
156848b8605Smrg   /* Saved vertex buffer. */
157b8e80941Smrg   struct pipe_vertex_buffer vertex_buffer0_saved;
158848b8605Smrg
159848b8605Smrg   /* Vertex buffers for the driver.
160848b8605Smrg    * There are usually no user buffers. */
161848b8605Smrg   struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
162848b8605Smrg   uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
163848b8605Smrg                                   call of set_vertex_buffers */
164848b8605Smrg
165848b8605Smrg   /* Vertex elements. */
166848b8605Smrg   struct u_vbuf_elements *ve, *ve_saved;
167848b8605Smrg
168848b8605Smrg   /* Vertex elements used for the translate fallback. */
169848b8605Smrg   struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
170848b8605Smrg   /* If non-NULL, this is a vertex element state used for the translate
171848b8605Smrg    * fallback and therefore used for rendering too. */
172848b8605Smrg   boolean using_translate;
173848b8605Smrg   /* The vertex buffer slot index where translated vertices have been
174848b8605Smrg    * stored in. */
175848b8605Smrg   unsigned fallback_vbs[VB_NUM];
176848b8605Smrg
177848b8605Smrg   /* Which buffer is a user buffer. */
178848b8605Smrg   uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
179848b8605Smrg   /* Which buffer is incompatible (unaligned). */
180848b8605Smrg   uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
181848b8605Smrg   /* Which buffer has a non-zero stride. */
182848b8605Smrg   uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
183848b8605Smrg};
184848b8605Smrg
185848b8605Smrgstatic void *
186848b8605Smrgu_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
187848b8605Smrg                              const struct pipe_vertex_element *attribs);
188848b8605Smrgstatic void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);
189848b8605Smrg
190b8e80941Smrgstatic const struct {
191b8e80941Smrg   enum pipe_format from, to;
192b8e80941Smrg} vbuf_format_fallbacks[] = {
193b8e80941Smrg   { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
194b8e80941Smrg   { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
195b8e80941Smrg   { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
196b8e80941Smrg   { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
197b8e80941Smrg   { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
198b8e80941Smrg   { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
199b8e80941Smrg   { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
200b8e80941Smrg   { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
201b8e80941Smrg   { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
202b8e80941Smrg   { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
203b8e80941Smrg   { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
204b8e80941Smrg   { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
205b8e80941Smrg   { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
206b8e80941Smrg   { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
207b8e80941Smrg   { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
208b8e80941Smrg   { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
209b8e80941Smrg   { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
210b8e80941Smrg   { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
211b8e80941Smrg   { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
212b8e80941Smrg   { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
213b8e80941Smrg   { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
214b8e80941Smrg   { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
215b8e80941Smrg   { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
216b8e80941Smrg   { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
217b8e80941Smrg   { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
218b8e80941Smrg   { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
219b8e80941Smrg   { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
220b8e80941Smrg   { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
221b8e80941Smrg   { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
222b8e80941Smrg   { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
223b8e80941Smrg   { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
224b8e80941Smrg   { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
225b8e80941Smrg   { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
226b8e80941Smrg   { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
227b8e80941Smrg   { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
228b8e80941Smrg   { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
229b8e80941Smrg   { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
230b8e80941Smrg   { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
231b8e80941Smrg   { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
232b8e80941Smrg   { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
233b8e80941Smrg   { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
234b8e80941Smrg   { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
235b8e80941Smrg   { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
236b8e80941Smrg   { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
237b8e80941Smrg   { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
238b8e80941Smrg   { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
239b8e80941Smrg   { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
240b8e80941Smrg   { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
241b8e80941Smrg   { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
242b8e80941Smrg   { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
243b8e80941Smrg   { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
244b8e80941Smrg   { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
245b8e80941Smrg   { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
246b8e80941Smrg   { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
247b8e80941Smrg   { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
248b8e80941Smrg   { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
249b8e80941Smrg   { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
250b8e80941Smrg   { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
251b8e80941Smrg   { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
252b8e80941Smrg   { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
253b8e80941Smrg};
254848b8605Smrg
255b8e80941Smrgboolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
256b8e80941Smrg                        unsigned flags)
257848b8605Smrg{
258b8e80941Smrg   unsigned i;
259b8e80941Smrg   boolean fallback = FALSE;
260b8e80941Smrg
261b8e80941Smrg   /* I'd rather have a bitfield of which formats are supported and a static
262b8e80941Smrg    * table of the translations indexed by format, but since we don't have C99
263b8e80941Smrg    * we can't easily make a sparsely-populated table indexed by format.  So,
264b8e80941Smrg    * we construct the sparse table here.
265b8e80941Smrg    */
266b8e80941Smrg   for (i = 0; i < PIPE_FORMAT_COUNT; i++)
267b8e80941Smrg      caps->format_translation[i] = i;
268b8e80941Smrg
269b8e80941Smrg   for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
270b8e80941Smrg      enum pipe_format format = vbuf_format_fallbacks[i].from;
271b8e80941Smrg
272b8e80941Smrg      if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0,
273b8e80941Smrg                                       PIPE_BIND_VERTEX_BUFFER)) {
274b8e80941Smrg         caps->format_translation[format] = vbuf_format_fallbacks[i].to;
275b8e80941Smrg         fallback = TRUE;
276b8e80941Smrg      }
277b8e80941Smrg   }
278848b8605Smrg
279848b8605Smrg   caps->buffer_offset_unaligned =
280848b8605Smrg      !screen->get_param(screen,
281b8e80941Smrg                         PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
282848b8605Smrg   caps->buffer_stride_unaligned =
283b8e80941Smrg     !screen->get_param(screen,
284848b8605Smrg                        PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
285848b8605Smrg   caps->velem_src_offset_unaligned =
286848b8605Smrg      !screen->get_param(screen,
287b8e80941Smrg                         PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
288848b8605Smrg   caps->user_vertex_buffers =
289848b8605Smrg      screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
290b8e80941Smrg
291b8e80941Smrg   if (!caps->buffer_offset_unaligned ||
292b8e80941Smrg       !caps->buffer_stride_unaligned ||
293b8e80941Smrg       !caps->velem_src_offset_unaligned ||
294b8e80941Smrg       (!(flags & U_VBUF_FLAG_NO_USER_VBOS) && !caps->user_vertex_buffers)) {
295b8e80941Smrg      fallback = TRUE;
296b8e80941Smrg   }
297b8e80941Smrg
298b8e80941Smrg   return fallback;
299848b8605Smrg}
300848b8605Smrg
301848b8605Smrgstruct u_vbuf *
302b8e80941Smrgu_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps)
303848b8605Smrg{
304848b8605Smrg   struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
305848b8605Smrg
306848b8605Smrg   mgr->caps = *caps;
307848b8605Smrg   mgr->pipe = pipe;
308848b8605Smrg   mgr->cso_cache = cso_cache_create();
309848b8605Smrg   mgr->translate_cache = translate_cache_create();
310848b8605Smrg   memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
311848b8605Smrg
312b8e80941Smrg   mgr->has_signed_vb_offset =
313b8e80941Smrg      pipe->screen->get_param(pipe->screen,
314b8e80941Smrg                              PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET);
315848b8605Smrg
316848b8605Smrg   return mgr;
317848b8605Smrg}
318848b8605Smrg
319848b8605Smrg/* u_vbuf uses its own caching for vertex elements, because it needs to keep
320848b8605Smrg * its own preprocessed state per vertex element CSO. */
321848b8605Smrgstatic struct u_vbuf_elements *
322848b8605Smrgu_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count,
323848b8605Smrg                                    const struct pipe_vertex_element *states)
324848b8605Smrg{
325848b8605Smrg   struct pipe_context *pipe = mgr->pipe;
326848b8605Smrg   unsigned key_size, hash_key;
327848b8605Smrg   struct cso_hash_iter iter;
328848b8605Smrg   struct u_vbuf_elements *ve;
329848b8605Smrg   struct cso_velems_state velems_state;
330848b8605Smrg
331848b8605Smrg   /* need to include the count into the stored state data too. */
332848b8605Smrg   key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
333848b8605Smrg   velems_state.count = count;
334848b8605Smrg   memcpy(velems_state.velems, states,
335848b8605Smrg          sizeof(struct pipe_vertex_element) * count);
336848b8605Smrg   hash_key = cso_construct_key((void*)&velems_state, key_size);
337848b8605Smrg   iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
338848b8605Smrg                                  (void*)&velems_state, key_size);
339848b8605Smrg
340848b8605Smrg   if (cso_hash_iter_is_null(iter)) {
341848b8605Smrg      struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
342848b8605Smrg      memcpy(&cso->state, &velems_state, key_size);
343848b8605Smrg      cso->data = u_vbuf_create_vertex_elements(mgr, count, states);
344848b8605Smrg      cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
345848b8605Smrg      cso->context = (void*)mgr;
346848b8605Smrg
347848b8605Smrg      iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
348848b8605Smrg      ve = cso->data;
349848b8605Smrg   } else {
350848b8605Smrg      ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
351848b8605Smrg   }
352848b8605Smrg
353848b8605Smrg   assert(ve);
354848b8605Smrg
355848b8605Smrg   if (ve != mgr->ve)
356b8e80941Smrg      pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
357b8e80941Smrg
358848b8605Smrg   return ve;
359848b8605Smrg}
360848b8605Smrg
361848b8605Smrgvoid u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
362848b8605Smrg                               const struct pipe_vertex_element *states)
363848b8605Smrg{
364848b8605Smrg   mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
365848b8605Smrg}
366848b8605Smrg
367848b8605Smrgvoid u_vbuf_destroy(struct u_vbuf *mgr)
368848b8605Smrg{
369848b8605Smrg   struct pipe_screen *screen = mgr->pipe->screen;
370848b8605Smrg   unsigned i;
371b8e80941Smrg   const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
372b8e80941Smrg                                                    PIPE_SHADER_CAP_MAX_INPUTS);
373848b8605Smrg
374848b8605Smrg   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);
375848b8605Smrg
376b8e80941Smrg   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
377b8e80941Smrg      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]);
378b8e80941Smrg   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
379b8e80941Smrg      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]);
380b8e80941Smrg
381b8e80941Smrg   pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved);
382848b8605Smrg
383848b8605Smrg   translate_cache_destroy(mgr->translate_cache);
384848b8605Smrg   cso_cache_delete(mgr->cso_cache);
385848b8605Smrg   FREE(mgr);
386848b8605Smrg}
387848b8605Smrg
388848b8605Smrgstatic enum pipe_error
389848b8605Smrgu_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
390b8e80941Smrg                         const struct pipe_draw_info *info,
391848b8605Smrg                         unsigned vb_mask, unsigned out_vb,
392848b8605Smrg                         int start_vertex, unsigned num_vertices,
393b8e80941Smrg                         int min_index, boolean unroll_indices)
394848b8605Smrg{
395848b8605Smrg   struct translate *tr;
396848b8605Smrg   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
397848b8605Smrg   struct pipe_resource *out_buffer = NULL;
398848b8605Smrg   uint8_t *out_map;
399848b8605Smrg   unsigned out_offset, mask;
400848b8605Smrg
401848b8605Smrg   /* Get a translate object. */
402848b8605Smrg   tr = translate_cache_find(mgr->translate_cache, key);
403848b8605Smrg
404848b8605Smrg   /* Map buffers we want to translate. */
405848b8605Smrg   mask = vb_mask;
406848b8605Smrg   while (mask) {
407848b8605Smrg      struct pipe_vertex_buffer *vb;
408848b8605Smrg      unsigned offset;
409848b8605Smrg      uint8_t *map;
410848b8605Smrg      unsigned i = u_bit_scan(&mask);
411848b8605Smrg
412848b8605Smrg      vb = &mgr->vertex_buffer[i];
413848b8605Smrg      offset = vb->buffer_offset + vb->stride * start_vertex;
414848b8605Smrg
415b8e80941Smrg      if (vb->is_user_buffer) {
416b8e80941Smrg         map = (uint8_t*)vb->buffer.user + offset;
417848b8605Smrg      } else {
418848b8605Smrg         unsigned size = vb->stride ? num_vertices * vb->stride
419848b8605Smrg                                    : sizeof(double)*4;
420848b8605Smrg
421b8e80941Smrg         if (!vb->buffer.resource)
422b8e80941Smrg            continue;
423b8e80941Smrg
424b8e80941Smrg         if (offset + size > vb->buffer.resource->width0) {
425b8e80941Smrg            /* Don't try to map past end of buffer.  This often happens when
426b8e80941Smrg             * we're translating an attribute that's at offset > 0 from the
427b8e80941Smrg             * start of the vertex.  If we'd subtract attrib's offset from
428b8e80941Smrg             * the size, this probably wouldn't happen.
429b8e80941Smrg             */
430b8e80941Smrg            size = vb->buffer.resource->width0 - offset;
431b8e80941Smrg
432b8e80941Smrg            /* Also adjust num_vertices.  A common user error is to call
433b8e80941Smrg             * glDrawRangeElements() with incorrect 'end' argument.  The 'end
434b8e80941Smrg             * value should be the max index value, but people often
435b8e80941Smrg             * accidentally add one to this value.  This adjustment avoids
436b8e80941Smrg             * crashing (by reading past the end of a hardware buffer mapping)
437b8e80941Smrg             * when people do that.
438b8e80941Smrg             */
439b8e80941Smrg            num_vertices = (size + vb->stride - 1) / vb->stride;
440848b8605Smrg         }
441848b8605Smrg
442b8e80941Smrg         map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size,
443848b8605Smrg                                     PIPE_TRANSFER_READ, &vb_transfer[i]);
444848b8605Smrg      }
445848b8605Smrg
446848b8605Smrg      /* Subtract min_index so that indexing with the index buffer works. */
447848b8605Smrg      if (unroll_indices) {
448848b8605Smrg         map -= (ptrdiff_t)vb->stride * min_index;
449848b8605Smrg      }
450848b8605Smrg
451b8e80941Smrg      tr->set_buffer(tr, i, map, vb->stride, info->max_index);
452848b8605Smrg   }
453848b8605Smrg
454848b8605Smrg   /* Translate. */
455848b8605Smrg   if (unroll_indices) {
456848b8605Smrg      struct pipe_transfer *transfer = NULL;
457b8e80941Smrg      const unsigned offset = info->start * info->index_size;
458848b8605Smrg      uint8_t *map;
459848b8605Smrg
460848b8605Smrg      /* Create and map the output buffer. */
461b8e80941Smrg      u_upload_alloc(mgr->pipe->stream_uploader, 0,
462b8e80941Smrg                     key->output_stride * info->count, 4,
463b8e80941Smrg                     &out_offset, &out_buffer,
464b8e80941Smrg                     (void**)&out_map);
465b8e80941Smrg      if (!out_buffer)
466b8e80941Smrg         return PIPE_ERROR_OUT_OF_MEMORY;
467b8e80941Smrg
468b8e80941Smrg      if (info->has_user_indices) {
469b8e80941Smrg         map = (uint8_t*)info->index.user + offset;
470848b8605Smrg      } else {
471b8e80941Smrg         map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset,
472b8e80941Smrg                                     info->count * info->index_size,
473848b8605Smrg                                     PIPE_TRANSFER_READ, &transfer);
474848b8605Smrg      }
475848b8605Smrg
476b8e80941Smrg      switch (info->index_size) {
477848b8605Smrg      case 4:
478b8e80941Smrg         tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map);
479848b8605Smrg         break;
480848b8605Smrg      case 2:
481b8e80941Smrg         tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map);
482848b8605Smrg         break;
483848b8605Smrg      case 1:
484b8e80941Smrg         tr->run_elts8(tr, map, info->count, 0, 0, out_map);
485848b8605Smrg         break;
486848b8605Smrg      }
487848b8605Smrg
488848b8605Smrg      if (transfer) {
489848b8605Smrg         pipe_buffer_unmap(mgr->pipe, transfer);
490848b8605Smrg      }
491848b8605Smrg   } else {
492848b8605Smrg      /* Create and map the output buffer. */
493b8e80941Smrg      u_upload_alloc(mgr->pipe->stream_uploader,
494b8e80941Smrg                     mgr->has_signed_vb_offset ?
495b8e80941Smrg                        0 : key->output_stride * start_vertex,
496b8e80941Smrg                     key->output_stride * num_vertices, 4,
497b8e80941Smrg                     &out_offset, &out_buffer,
498b8e80941Smrg                     (void**)&out_map);
499b8e80941Smrg      if (!out_buffer)
500b8e80941Smrg         return PIPE_ERROR_OUT_OF_MEMORY;
501848b8605Smrg
502848b8605Smrg      out_offset -= key->output_stride * start_vertex;
503848b8605Smrg
504848b8605Smrg      tr->run(tr, 0, num_vertices, 0, 0, out_map);
505848b8605Smrg   }
506848b8605Smrg
507848b8605Smrg   /* Unmap all buffers. */
508848b8605Smrg   mask = vb_mask;
509848b8605Smrg   while (mask) {
510848b8605Smrg      unsigned i = u_bit_scan(&mask);
511848b8605Smrg
512848b8605Smrg      if (vb_transfer[i]) {
513848b8605Smrg         pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
514848b8605Smrg      }
515848b8605Smrg   }
516848b8605Smrg
517848b8605Smrg   /* Setup the new vertex buffer. */
518848b8605Smrg   mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
519848b8605Smrg   mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
520848b8605Smrg
521848b8605Smrg   /* Move the buffer reference. */
522b8e80941Smrg   pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
523b8e80941Smrg   mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
524b8e80941Smrg   mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
525848b8605Smrg
526848b8605Smrg   return PIPE_OK;
527848b8605Smrg}
528848b8605Smrg
529848b8605Smrgstatic boolean
530848b8605Smrgu_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
531848b8605Smrg                                    unsigned mask[VB_NUM])
532848b8605Smrg{
533848b8605Smrg   unsigned type;
534848b8605Smrg   unsigned fallback_vbs[VB_NUM];
535848b8605Smrg   /* Set the bit for each buffer which is incompatible, or isn't set. */
536848b8605Smrg   uint32_t unused_vb_mask =
537848b8605Smrg      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
538848b8605Smrg      ~mgr->enabled_vb_mask;
539848b8605Smrg
540848b8605Smrg   memset(fallback_vbs, ~0, sizeof(fallback_vbs));
541848b8605Smrg
542848b8605Smrg   /* Find free slots for each type if needed. */
543848b8605Smrg   for (type = 0; type < VB_NUM; type++) {
544848b8605Smrg      if (mask[type]) {
545848b8605Smrg         uint32_t index;
546848b8605Smrg
547848b8605Smrg         if (!unused_vb_mask) {
548848b8605Smrg            return FALSE;
549848b8605Smrg         }
550848b8605Smrg
551848b8605Smrg         index = ffs(unused_vb_mask) - 1;
552848b8605Smrg         fallback_vbs[type] = index;
553b8e80941Smrg         unused_vb_mask &= ~(1 << index);
554848b8605Smrg         /*printf("found slot=%i for type=%i\n", index, type);*/
555848b8605Smrg      }
556848b8605Smrg   }
557848b8605Smrg
558848b8605Smrg   for (type = 0; type < VB_NUM; type++) {
559848b8605Smrg      if (mask[type]) {
560848b8605Smrg         mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
561848b8605Smrg      }
562848b8605Smrg   }
563848b8605Smrg
564848b8605Smrg   memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
565848b8605Smrg   return TRUE;
566848b8605Smrg}
567848b8605Smrg
568848b8605Smrgstatic boolean
569848b8605Smrgu_vbuf_translate_begin(struct u_vbuf *mgr,
570b8e80941Smrg                       const struct pipe_draw_info *info,
571848b8605Smrg                       int start_vertex, unsigned num_vertices,
572b8e80941Smrg                       int min_index, boolean unroll_indices)
573848b8605Smrg{
574848b8605Smrg   unsigned mask[VB_NUM] = {0};
575848b8605Smrg   struct translate_key key[VB_NUM];
576848b8605Smrg   unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
577848b8605Smrg   unsigned i, type;
578b8e80941Smrg   const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
579b8e80941Smrg                                         mgr->ve->used_vb_mask;
580848b8605Smrg
581b8e80941Smrg   const int start[VB_NUM] = {
582b8e80941Smrg      start_vertex,           /* VERTEX */
583b8e80941Smrg      info->start_instance,   /* INSTANCE */
584b8e80941Smrg      0                       /* CONST */
585848b8605Smrg   };
586848b8605Smrg
587b8e80941Smrg   const unsigned num[VB_NUM] = {
588b8e80941Smrg      num_vertices,           /* VERTEX */
589b8e80941Smrg      info->instance_count,   /* INSTANCE */
590b8e80941Smrg      1                       /* CONST */
591848b8605Smrg   };
592848b8605Smrg
593848b8605Smrg   memset(key, 0, sizeof(key));
594848b8605Smrg   memset(elem_index, ~0, sizeof(elem_index));
595848b8605Smrg
596848b8605Smrg   /* See if there are vertex attribs of each type to translate and
597848b8605Smrg    * which ones. */
598848b8605Smrg   for (i = 0; i < mgr->ve->count; i++) {
599848b8605Smrg      unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
600848b8605Smrg
601848b8605Smrg      if (!mgr->vertex_buffer[vb_index].stride) {
602848b8605Smrg         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
603848b8605Smrg             !(incompatible_vb_mask & (1 << vb_index))) {
604848b8605Smrg            continue;
605848b8605Smrg         }
606848b8605Smrg         mask[VB_CONST] |= 1 << vb_index;
607848b8605Smrg      } else if (mgr->ve->ve[i].instance_divisor) {
608848b8605Smrg         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
609848b8605Smrg             !(incompatible_vb_mask & (1 << vb_index))) {
610848b8605Smrg            continue;
611848b8605Smrg         }
612848b8605Smrg         mask[VB_INSTANCE] |= 1 << vb_index;
613848b8605Smrg      } else {
614848b8605Smrg         if (!unroll_indices &&
615848b8605Smrg             !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
616848b8605Smrg             !(incompatible_vb_mask & (1 << vb_index))) {
617848b8605Smrg            continue;
618848b8605Smrg         }
619848b8605Smrg         mask[VB_VERTEX] |= 1 << vb_index;
620848b8605Smrg      }
621848b8605Smrg   }
622848b8605Smrg
623848b8605Smrg   assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
624848b8605Smrg
625848b8605Smrg   /* Find free vertex buffer slots. */
626848b8605Smrg   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
627848b8605Smrg      return FALSE;
628848b8605Smrg   }
629848b8605Smrg
630848b8605Smrg   /* Initialize the translate keys. */
631848b8605Smrg   for (i = 0; i < mgr->ve->count; i++) {
632848b8605Smrg      struct translate_key *k;
633848b8605Smrg      struct translate_element *te;
634b8e80941Smrg      enum pipe_format output_format = mgr->ve->native_format[i];
635848b8605Smrg      unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
636848b8605Smrg      bit = 1 << vb_index;
637848b8605Smrg
638848b8605Smrg      if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
639848b8605Smrg          !(incompatible_vb_mask & (1 << vb_index)) &&
640848b8605Smrg          (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
641848b8605Smrg         continue;
642848b8605Smrg      }
643848b8605Smrg
644848b8605Smrg      /* Set type to what we will translate.
645848b8605Smrg       * Whether vertex, instance, or constant attribs. */
646848b8605Smrg      for (type = 0; type < VB_NUM; type++) {
647848b8605Smrg         if (mask[type] & bit) {
648848b8605Smrg            break;
649848b8605Smrg         }
650848b8605Smrg      }
651848b8605Smrg      assert(type < VB_NUM);
652b8e80941Smrg      if (mgr->ve->ve[i].src_format != output_format)
653b8e80941Smrg         assert(translate_is_output_format_supported(output_format));
654848b8605Smrg      /*printf("velem=%i type=%i\n", i, type);*/
655848b8605Smrg
656848b8605Smrg      /* Add the vertex element. */
657848b8605Smrg      k = &key[type];
658848b8605Smrg      elem_index[type][i] = k->nr_elements;
659848b8605Smrg
660848b8605Smrg      te = &k->element[k->nr_elements];
661848b8605Smrg      te->type = TRANSLATE_ELEMENT_NORMAL;
662848b8605Smrg      te->instance_divisor = 0;
663848b8605Smrg      te->input_buffer = vb_index;
664848b8605Smrg      te->input_format = mgr->ve->ve[i].src_format;
665848b8605Smrg      te->input_offset = mgr->ve->ve[i].src_offset;
666b8e80941Smrg      te->output_format = output_format;
667848b8605Smrg      te->output_offset = k->output_stride;
668848b8605Smrg
669848b8605Smrg      k->output_stride += mgr->ve->native_format_size[i];
670848b8605Smrg      k->nr_elements++;
671848b8605Smrg   }
672848b8605Smrg
673848b8605Smrg   /* Translate buffers. */
674848b8605Smrg   for (type = 0; type < VB_NUM; type++) {
675848b8605Smrg      if (key[type].nr_elements) {
676848b8605Smrg         enum pipe_error err;
677b8e80941Smrg         err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type],
678848b8605Smrg                                        mgr->fallback_vbs[type],
679b8e80941Smrg                                        start[type], num[type], min_index,
680848b8605Smrg                                        unroll_indices && type == VB_VERTEX);
681848b8605Smrg         if (err != PIPE_OK)
682848b8605Smrg            return FALSE;
683848b8605Smrg
684848b8605Smrg         /* Fixup the stride for constant attribs. */
685848b8605Smrg         if (type == VB_CONST) {
686848b8605Smrg            mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
687848b8605Smrg         }
688848b8605Smrg      }
689848b8605Smrg   }
690848b8605Smrg
691848b8605Smrg   /* Setup new vertex elements. */
692848b8605Smrg   for (i = 0; i < mgr->ve->count; i++) {
693848b8605Smrg      for (type = 0; type < VB_NUM; type++) {
694848b8605Smrg         if (elem_index[type][i] < key[type].nr_elements) {
695848b8605Smrg            struct translate_element *te = &key[type].element[elem_index[type][i]];
696848b8605Smrg            mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
697848b8605Smrg            mgr->fallback_velems[i].src_format = te->output_format;
698848b8605Smrg            mgr->fallback_velems[i].src_offset = te->output_offset;
699848b8605Smrg            mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
700848b8605Smrg
701848b8605Smrg            /* elem_index[type][i] can only be set for one type. */
702b8e80941Smrg            assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
703b8e80941Smrg            assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
704848b8605Smrg            break;
705848b8605Smrg         }
706848b8605Smrg      }
707848b8605Smrg      /* No translating, just copy the original vertex element over. */
708848b8605Smrg      if (type == VB_NUM) {
709848b8605Smrg         memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
710848b8605Smrg                sizeof(struct pipe_vertex_element));
711848b8605Smrg      }
712848b8605Smrg   }
713848b8605Smrg
714848b8605Smrg   u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count,
715848b8605Smrg                                       mgr->fallback_velems);
716848b8605Smrg   mgr->using_translate = TRUE;
717848b8605Smrg   return TRUE;
718848b8605Smrg}
719848b8605Smrg
720848b8605Smrgstatic void u_vbuf_translate_end(struct u_vbuf *mgr)
721848b8605Smrg{
722848b8605Smrg   unsigned i;
723848b8605Smrg
724848b8605Smrg   /* Restore vertex elements. */
725848b8605Smrg   mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
726848b8605Smrg   mgr->using_translate = FALSE;
727848b8605Smrg
728848b8605Smrg   /* Unreference the now-unused VBOs. */
729848b8605Smrg   for (i = 0; i < VB_NUM; i++) {
730848b8605Smrg      unsigned vb = mgr->fallback_vbs[i];
731b8e80941Smrg      if (vb != ~0u) {
732b8e80941Smrg         pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL);
733848b8605Smrg         mgr->fallback_vbs[i] = ~0;
734848b8605Smrg
735848b8605Smrg         /* This will cause the buffer to be unbound in the driver later. */
736848b8605Smrg         mgr->dirty_real_vb_mask |= 1 << vb;
737848b8605Smrg      }
738848b8605Smrg   }
739848b8605Smrg}
740848b8605Smrg
741848b8605Smrgstatic void *
742848b8605Smrgu_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
743848b8605Smrg                              const struct pipe_vertex_element *attribs)
744848b8605Smrg{
745848b8605Smrg   struct pipe_context *pipe = mgr->pipe;
746848b8605Smrg   unsigned i;
747848b8605Smrg   struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
748848b8605Smrg   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
749848b8605Smrg   uint32_t used_buffers = 0;
750848b8605Smrg
751848b8605Smrg   ve->count = count;
752848b8605Smrg
753848b8605Smrg   memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
754848b8605Smrg   memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
755848b8605Smrg
756848b8605Smrg   /* Set the best native format in case the original format is not
757848b8605Smrg    * supported. */
758848b8605Smrg   for (i = 0; i < count; i++) {
759848b8605Smrg      enum pipe_format format = ve->ve[i].src_format;
760848b8605Smrg
761848b8605Smrg      ve->src_format_size[i] = util_format_get_blocksize(format);
762848b8605Smrg
763848b8605Smrg      used_buffers |= 1 << ve->ve[i].vertex_buffer_index;
764848b8605Smrg
765848b8605Smrg      if (!ve->ve[i].instance_divisor) {
766848b8605Smrg         ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
767848b8605Smrg      }
768848b8605Smrg
769b8e80941Smrg      format = mgr->caps.format_translation[format];
770848b8605Smrg
771848b8605Smrg      driver_attribs[i].src_format = format;
772848b8605Smrg      ve->native_format[i] = format;
773848b8605Smrg      ve->native_format_size[i] =
774848b8605Smrg            util_format_get_blocksize(ve->native_format[i]);
775848b8605Smrg
776848b8605Smrg      if (ve->ve[i].src_format != format ||
777848b8605Smrg          (!mgr->caps.velem_src_offset_unaligned &&
778848b8605Smrg           ve->ve[i].src_offset % 4 != 0)) {
779848b8605Smrg         ve->incompatible_elem_mask |= 1 << i;
780848b8605Smrg         ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
781848b8605Smrg      } else {
782848b8605Smrg         ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
783848b8605Smrg      }
784848b8605Smrg   }
785848b8605Smrg
786848b8605Smrg   ve->used_vb_mask = used_buffers;
787848b8605Smrg   ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
788848b8605Smrg   ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
789848b8605Smrg
790b8e80941Smrg   /* Align the formats and offsets to the size of DWORD if needed. */
791848b8605Smrg   if (!mgr->caps.velem_src_offset_unaligned) {
792848b8605Smrg      for (i = 0; i < count; i++) {
793848b8605Smrg         ve->native_format_size[i] = align(ve->native_format_size[i], 4);
794b8e80941Smrg         driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
795848b8605Smrg      }
796848b8605Smrg   }
797848b8605Smrg
798848b8605Smrg   ve->driver_cso =
799848b8605Smrg      pipe->create_vertex_elements_state(pipe, count, driver_attribs);
800848b8605Smrg   return ve;
801848b8605Smrg}
802848b8605Smrg
803848b8605Smrgstatic void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
804848b8605Smrg{
805848b8605Smrg   struct pipe_context *pipe = mgr->pipe;
806848b8605Smrg   struct u_vbuf_elements *ve = cso;
807848b8605Smrg
808848b8605Smrg   pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
809848b8605Smrg   FREE(ve);
810848b8605Smrg}
811848b8605Smrg
812848b8605Smrgvoid u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
813848b8605Smrg                               unsigned start_slot, unsigned count,
814848b8605Smrg                               const struct pipe_vertex_buffer *bufs)
815848b8605Smrg{
816848b8605Smrg   unsigned i;
817848b8605Smrg   /* which buffers are enabled */
818848b8605Smrg   uint32_t enabled_vb_mask = 0;
819848b8605Smrg   /* which buffers are in user memory */
820848b8605Smrg   uint32_t user_vb_mask = 0;
821848b8605Smrg   /* which buffers are incompatible with the driver */
822848b8605Smrg   uint32_t incompatible_vb_mask = 0;
823848b8605Smrg   /* which buffers have a non-zero stride */
824848b8605Smrg   uint32_t nonzero_stride_vb_mask = 0;
825b8e80941Smrg   const uint32_t mask = ~(((1ull << count) - 1) << start_slot);
826848b8605Smrg
827848b8605Smrg   /* Zero out the bits we are going to rewrite completely. */
828848b8605Smrg   mgr->user_vb_mask &= mask;
829848b8605Smrg   mgr->incompatible_vb_mask &= mask;
830848b8605Smrg   mgr->nonzero_stride_vb_mask &= mask;
831848b8605Smrg   mgr->enabled_vb_mask &= mask;
832848b8605Smrg
833848b8605Smrg   if (!bufs) {
834848b8605Smrg      struct pipe_context *pipe = mgr->pipe;
835848b8605Smrg      /* Unbind. */
836848b8605Smrg      mgr->dirty_real_vb_mask &= mask;
837848b8605Smrg
838848b8605Smrg      for (i = 0; i < count; i++) {
839848b8605Smrg         unsigned dst_index = start_slot + i;
840848b8605Smrg
841b8e80941Smrg         pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
842b8e80941Smrg         pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
843848b8605Smrg      }
844848b8605Smrg
845848b8605Smrg      pipe->set_vertex_buffers(pipe, start_slot, count, NULL);
846848b8605Smrg      return;
847848b8605Smrg   }
848848b8605Smrg
849848b8605Smrg   for (i = 0; i < count; i++) {
850848b8605Smrg      unsigned dst_index = start_slot + i;
851848b8605Smrg      const struct pipe_vertex_buffer *vb = &bufs[i];
852848b8605Smrg      struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
853848b8605Smrg      struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
854848b8605Smrg
855b8e80941Smrg      if (!vb->buffer.resource) {
856b8e80941Smrg         pipe_vertex_buffer_unreference(orig_vb);
857b8e80941Smrg         pipe_vertex_buffer_unreference(real_vb);
858848b8605Smrg         continue;
859848b8605Smrg      }
860848b8605Smrg
861b8e80941Smrg      pipe_vertex_buffer_reference(orig_vb, vb);
862848b8605Smrg
863848b8605Smrg      if (vb->stride) {
864848b8605Smrg         nonzero_stride_vb_mask |= 1 << dst_index;
865848b8605Smrg      }
866848b8605Smrg      enabled_vb_mask |= 1 << dst_index;
867848b8605Smrg
868848b8605Smrg      if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
869848b8605Smrg          (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
870848b8605Smrg         incompatible_vb_mask |= 1 << dst_index;
871b8e80941Smrg         real_vb->buffer_offset = vb->buffer_offset;
872b8e80941Smrg         real_vb->stride = vb->stride;
873b8e80941Smrg         pipe_vertex_buffer_unreference(real_vb);
874b8e80941Smrg         real_vb->is_user_buffer = false;
875848b8605Smrg         continue;
876848b8605Smrg      }
877848b8605Smrg
878b8e80941Smrg      if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
879848b8605Smrg         user_vb_mask |= 1 << dst_index;
880b8e80941Smrg         real_vb->buffer_offset = vb->buffer_offset;
881b8e80941Smrg         real_vb->stride = vb->stride;
882b8e80941Smrg         pipe_vertex_buffer_unreference(real_vb);
883b8e80941Smrg         real_vb->is_user_buffer = false;
884848b8605Smrg         continue;
885848b8605Smrg      }
886848b8605Smrg
887b8e80941Smrg      pipe_vertex_buffer_reference(real_vb, vb);
888848b8605Smrg   }
889848b8605Smrg
890848b8605Smrg   mgr->user_vb_mask |= user_vb_mask;
891848b8605Smrg   mgr->incompatible_vb_mask |= incompatible_vb_mask;
892848b8605Smrg   mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
893848b8605Smrg   mgr->enabled_vb_mask |= enabled_vb_mask;
894848b8605Smrg
895848b8605Smrg   /* All changed buffers are marked as dirty, even the NULL ones,
896848b8605Smrg    * which will cause the NULL buffers to be unbound in the driver later. */
897848b8605Smrg   mgr->dirty_real_vb_mask |= ~mask;
898848b8605Smrg}
899848b8605Smrg
900848b8605Smrgstatic enum pipe_error
901848b8605Smrgu_vbuf_upload_buffers(struct u_vbuf *mgr,
902848b8605Smrg                      int start_vertex, unsigned num_vertices,
903848b8605Smrg                      int start_instance, unsigned num_instances)
904848b8605Smrg{
905848b8605Smrg   unsigned i;
906848b8605Smrg   unsigned nr_velems = mgr->ve->count;
907b8e80941Smrg   const struct pipe_vertex_element *velems =
908848b8605Smrg         mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve;
909848b8605Smrg   unsigned start_offset[PIPE_MAX_ATTRIBS];
910848b8605Smrg   unsigned end_offset[PIPE_MAX_ATTRIBS];
911848b8605Smrg   uint32_t buffer_mask = 0;
912848b8605Smrg
913848b8605Smrg   /* Determine how much data needs to be uploaded. */
914848b8605Smrg   for (i = 0; i < nr_velems; i++) {
915b8e80941Smrg      const struct pipe_vertex_element *velem = &velems[i];
916848b8605Smrg      unsigned index = velem->vertex_buffer_index;
917848b8605Smrg      struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
918848b8605Smrg      unsigned instance_div, first, size, index_bit;
919848b8605Smrg
920848b8605Smrg      /* Skip the buffers generated by translate. */
921848b8605Smrg      if (index == mgr->fallback_vbs[VB_VERTEX] ||
922848b8605Smrg          index == mgr->fallback_vbs[VB_INSTANCE] ||
923848b8605Smrg          index == mgr->fallback_vbs[VB_CONST]) {
924848b8605Smrg         continue;
925848b8605Smrg      }
926848b8605Smrg
927b8e80941Smrg      if (!vb->is_user_buffer) {
928848b8605Smrg         continue;
929848b8605Smrg      }
930848b8605Smrg
931848b8605Smrg      instance_div = velem->instance_divisor;
932848b8605Smrg      first = vb->buffer_offset + velem->src_offset;
933848b8605Smrg
934848b8605Smrg      if (!vb->stride) {
935848b8605Smrg         /* Constant attrib. */
936848b8605Smrg         size = mgr->ve->src_format_size[i];
937848b8605Smrg      } else if (instance_div) {
938848b8605Smrg         /* Per-instance attrib. */
939b8e80941Smrg
940b8e80941Smrg         /* Figure out how many instances we'll render given instance_div.  We
941b8e80941Smrg          * can't use the typical div_round_up() pattern because the CTS uses
942b8e80941Smrg          * instance_div = ~0 for a test, which overflows div_round_up()'s
943b8e80941Smrg          * addition.
944b8e80941Smrg          */
945b8e80941Smrg         unsigned count = num_instances / instance_div;
946b8e80941Smrg         if (count * instance_div != num_instances)
947b8e80941Smrg            count++;
948b8e80941Smrg
949848b8605Smrg         first += vb->stride * start_instance;
950848b8605Smrg         size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
951848b8605Smrg      } else {
952848b8605Smrg         /* Per-vertex attrib. */
953848b8605Smrg         first += vb->stride * start_vertex;
954848b8605Smrg         size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
955848b8605Smrg      }
956848b8605Smrg
957848b8605Smrg      index_bit = 1 << index;
958848b8605Smrg
959848b8605Smrg      /* Update offsets. */
960848b8605Smrg      if (!(buffer_mask & index_bit)) {
961848b8605Smrg         start_offset[index] = first;
962848b8605Smrg         end_offset[index] = first + size;
963848b8605Smrg      } else {
964848b8605Smrg         if (first < start_offset[index])
965848b8605Smrg            start_offset[index] = first;
966848b8605Smrg         if (first + size > end_offset[index])
967848b8605Smrg            end_offset[index] = first + size;
968848b8605Smrg      }
969848b8605Smrg
970848b8605Smrg      buffer_mask |= index_bit;
971848b8605Smrg   }
972848b8605Smrg
973848b8605Smrg   /* Upload buffers. */
974848b8605Smrg   while (buffer_mask) {
975848b8605Smrg      unsigned start, end;
976848b8605Smrg      struct pipe_vertex_buffer *real_vb;
977848b8605Smrg      const uint8_t *ptr;
978848b8605Smrg
979848b8605Smrg      i = u_bit_scan(&buffer_mask);
980848b8605Smrg
981848b8605Smrg      start = start_offset[i];
982848b8605Smrg      end = end_offset[i];
983848b8605Smrg      assert(start < end);
984848b8605Smrg
985848b8605Smrg      real_vb = &mgr->real_vertex_buffer[i];
986b8e80941Smrg      ptr = mgr->vertex_buffer[i].buffer.user;
987848b8605Smrg
988b8e80941Smrg      u_upload_data(mgr->pipe->stream_uploader,
989b8e80941Smrg                    mgr->has_signed_vb_offset ? 0 : start,
990b8e80941Smrg                    end - start, 4,
991b8e80941Smrg                    ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource);
992b8e80941Smrg      if (!real_vb->buffer.resource)
993b8e80941Smrg         return PIPE_ERROR_OUT_OF_MEMORY;
994848b8605Smrg
995848b8605Smrg      real_vb->buffer_offset -= start;
996848b8605Smrg   }
997848b8605Smrg
998848b8605Smrg   return PIPE_OK;
999848b8605Smrg}
1000848b8605Smrg
1001b8e80941Smrgstatic boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
1002848b8605Smrg{
1003848b8605Smrg   /* See if there are any per-vertex attribs which will be uploaded or
1004848b8605Smrg    * translated. Use bitmasks to get the info instead of looping over vertex
1005848b8605Smrg    * elements. */
1006848b8605Smrg   return (mgr->ve->used_vb_mask &
1007b8e80941Smrg           ((mgr->user_vb_mask |
1008b8e80941Smrg             mgr->incompatible_vb_mask |
1009848b8605Smrg             mgr->ve->incompatible_vb_mask_any) &
1010b8e80941Smrg            mgr->ve->noninstance_vb_mask_any &
1011b8e80941Smrg            mgr->nonzero_stride_vb_mask)) != 0;
1012848b8605Smrg}
1013848b8605Smrg
1014b8e80941Smrgstatic boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
1015848b8605Smrg{
1016848b8605Smrg   /* Return true if there are hw buffers which don't need to be translated.
1017848b8605Smrg    *
1018848b8605Smrg    * We could query whether each buffer is busy, but that would
1019848b8605Smrg    * be way more costly than this. */
1020848b8605Smrg   return (mgr->ve->used_vb_mask &
1021b8e80941Smrg           (~mgr->user_vb_mask &
1022b8e80941Smrg            ~mgr->incompatible_vb_mask &
1023b8e80941Smrg            mgr->ve->compatible_vb_mask_all &
1024b8e80941Smrg            mgr->ve->noninstance_vb_mask_any &
1025848b8605Smrg            mgr->nonzero_stride_vb_mask)) != 0;
1026848b8605Smrg}
1027848b8605Smrg
1028b8e80941Smrgstatic void
1029b8e80941Smrgu_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info,
1030b8e80941Smrg                               const void *indices, unsigned *out_min_index,
1031b8e80941Smrg                               unsigned *out_max_index)
1032848b8605Smrg{
1033b8e80941Smrg   unsigned max = 0;
1034b8e80941Smrg   unsigned min = ~0u;
1035848b8605Smrg
1036b8e80941Smrg   switch (info->index_size) {
1037848b8605Smrg   case 4: {
1038848b8605Smrg      const unsigned *ui_indices = (const unsigned*)indices;
1039b8e80941Smrg      if (info->primitive_restart) {
1040b8e80941Smrg         for (unsigned i = 0; i < info->count; i++) {
1041b8e80941Smrg            if (ui_indices[i] != info->restart_index) {
1042b8e80941Smrg               if (ui_indices[i] > max) max = ui_indices[i];
1043b8e80941Smrg               if (ui_indices[i] < min) min = ui_indices[i];
1044848b8605Smrg            }
1045848b8605Smrg         }
1046848b8605Smrg      }
1047848b8605Smrg      else {
1048b8e80941Smrg         for (unsigned i = 0; i < info->count; i++) {
1049b8e80941Smrg            if (ui_indices[i] > max) max = ui_indices[i];
1050b8e80941Smrg            if (ui_indices[i] < min) min = ui_indices[i];
1051848b8605Smrg         }
1052848b8605Smrg      }
1053848b8605Smrg      break;
1054848b8605Smrg   }
1055848b8605Smrg   case 2: {
1056848b8605Smrg      const unsigned short *us_indices = (const unsigned short*)indices;
1057b8e80941Smrg      if (info->primitive_restart) {
1058b8e80941Smrg         for (unsigned i = 0; i < info->count; i++) {
1059b8e80941Smrg            if (us_indices[i] != info->restart_index) {
1060b8e80941Smrg               if (us_indices[i] > max) max = us_indices[i];
1061b8e80941Smrg               if (us_indices[i] < min) min = us_indices[i];
1062848b8605Smrg            }
1063848b8605Smrg         }
1064848b8605Smrg      }
1065848b8605Smrg      else {
1066b8e80941Smrg         for (unsigned i = 0; i < info->count; i++) {
1067b8e80941Smrg            if (us_indices[i] > max) max = us_indices[i];
1068b8e80941Smrg            if (us_indices[i] < min) min = us_indices[i];
1069848b8605Smrg         }
1070848b8605Smrg      }
1071848b8605Smrg      break;
1072848b8605Smrg   }
1073848b8605Smrg   case 1: {
1074848b8605Smrg      const unsigned char *ub_indices = (const unsigned char*)indices;
1075b8e80941Smrg      if (info->primitive_restart) {
1076b8e80941Smrg         for (unsigned i = 0; i < info->count; i++) {
1077b8e80941Smrg            if (ub_indices[i] != info->restart_index) {
1078b8e80941Smrg               if (ub_indices[i] > max) max = ub_indices[i];
1079b8e80941Smrg               if (ub_indices[i] < min) min = ub_indices[i];
1080848b8605Smrg            }
1081848b8605Smrg         }
1082848b8605Smrg      }
1083848b8605Smrg      else {
1084b8e80941Smrg         for (unsigned i = 0; i < info->count; i++) {
1085b8e80941Smrg            if (ub_indices[i] > max) max = ub_indices[i];
1086b8e80941Smrg            if (ub_indices[i] < min) min = ub_indices[i];
1087848b8605Smrg         }
1088848b8605Smrg      }
1089848b8605Smrg      break;
1090848b8605Smrg   }
1091848b8605Smrg   default:
1092848b8605Smrg      assert(0);
1093848b8605Smrg   }
1094848b8605Smrg
1095b8e80941Smrg   *out_min_index = min;
1096b8e80941Smrg   *out_max_index = max;
1097b8e80941Smrg}
1098b8e80941Smrg
1099b8e80941Smrgvoid u_vbuf_get_minmax_index(struct pipe_context *pipe,
1100b8e80941Smrg                             const struct pipe_draw_info *info,
1101b8e80941Smrg                             unsigned *out_min_index, unsigned *out_max_index)
1102b8e80941Smrg{
1103b8e80941Smrg   struct pipe_transfer *transfer = NULL;
1104b8e80941Smrg   const void *indices;
1105b8e80941Smrg
1106b8e80941Smrg   if (info->has_user_indices) {
1107b8e80941Smrg      indices = (uint8_t*)info->index.user +
1108b8e80941Smrg                info->start * info->index_size;
1109b8e80941Smrg   } else {
1110b8e80941Smrg      indices = pipe_buffer_map_range(pipe, info->index.resource,
1111b8e80941Smrg                                      info->start * info->index_size,
1112b8e80941Smrg                                      info->count * info->index_size,
1113b8e80941Smrg                                      PIPE_TRANSFER_READ, &transfer);
1114b8e80941Smrg   }
1115b8e80941Smrg
1116b8e80941Smrg   u_vbuf_get_minmax_index_mapped(info, indices, out_min_index, out_max_index);
1117b8e80941Smrg
1118848b8605Smrg   if (transfer) {
1119848b8605Smrg      pipe_buffer_unmap(pipe, transfer);
1120848b8605Smrg   }
1121848b8605Smrg}
1122848b8605Smrg
1123848b8605Smrgstatic void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
1124848b8605Smrg{
1125848b8605Smrg   struct pipe_context *pipe = mgr->pipe;
1126848b8605Smrg   unsigned start_slot, count;
1127848b8605Smrg
1128848b8605Smrg   start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
1129848b8605Smrg   count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
1130848b8605Smrg
1131848b8605Smrg   pipe->set_vertex_buffers(pipe, start_slot, count,
1132848b8605Smrg                            mgr->real_vertex_buffer + start_slot);
1133848b8605Smrg   mgr->dirty_real_vb_mask = 0;
1134848b8605Smrg}
1135848b8605Smrg
1136b8e80941Smrgstatic void
1137b8e80941Smrgu_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
1138b8e80941Smrg                               unsigned *indirect_data, unsigned stride,
1139b8e80941Smrg                               unsigned draw_count)
1140b8e80941Smrg{
1141b8e80941Smrg   assert(info->index_size);
1142b8e80941Smrg   info->indirect = NULL;
1143b8e80941Smrg
1144b8e80941Smrg   for (unsigned i = 0; i < draw_count; i++) {
1145b8e80941Smrg      unsigned offset = i * stride / 4;
1146b8e80941Smrg
1147b8e80941Smrg      info->count = indirect_data[offset + 0];
1148b8e80941Smrg      info->instance_count = indirect_data[offset + 1];
1149b8e80941Smrg
1150b8e80941Smrg      if (!info->count || !info->instance_count)
1151b8e80941Smrg         continue;
1152b8e80941Smrg
1153b8e80941Smrg      info->start = indirect_data[offset + 2];
1154b8e80941Smrg      info->index_bias = indirect_data[offset + 3];
1155b8e80941Smrg      info->start_instance = indirect_data[offset + 4];
1156b8e80941Smrg
1157b8e80941Smrg      u_vbuf_draw_vbo(mgr, info);
1158b8e80941Smrg   }
1159b8e80941Smrg}
1160b8e80941Smrg
1161848b8605Smrgvoid u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
1162848b8605Smrg{
1163848b8605Smrg   struct pipe_context *pipe = mgr->pipe;
1164b8e80941Smrg   int start_vertex;
1165b8e80941Smrg   unsigned min_index;
1166848b8605Smrg   unsigned num_vertices;
1167848b8605Smrg   boolean unroll_indices = FALSE;
1168b8e80941Smrg   const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
1169848b8605Smrg   uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
1170b8e80941Smrg   const uint32_t incompatible_vb_mask =
1171b8e80941Smrg      mgr->incompatible_vb_mask & used_vb_mask;
1172848b8605Smrg   struct pipe_draw_info new_info;
1173848b8605Smrg
1174848b8605Smrg   /* Normal draw. No fallback and no user buffers. */
1175848b8605Smrg   if (!incompatible_vb_mask &&
1176848b8605Smrg       !mgr->ve->incompatible_elem_mask &&
1177848b8605Smrg       !user_vb_mask) {
1178848b8605Smrg
1179848b8605Smrg      /* Set vertex buffers if needed. */
1180848b8605Smrg      if (mgr->dirty_real_vb_mask & used_vb_mask) {
1181848b8605Smrg         u_vbuf_set_driver_vertex_buffers(mgr);
1182848b8605Smrg      }
1183848b8605Smrg
1184848b8605Smrg      pipe->draw_vbo(pipe, info);
1185848b8605Smrg      return;
1186848b8605Smrg   }
1187848b8605Smrg
1188848b8605Smrg   new_info = *info;
1189848b8605Smrg
1190b8e80941Smrg   /* Handle indirect (multi)draws. */
1191848b8605Smrg   if (new_info.indirect) {
1192b8e80941Smrg      const struct pipe_draw_indirect_info *indirect = new_info.indirect;
1193b8e80941Smrg      unsigned draw_count = 0;
1194b8e80941Smrg
1195b8e80941Smrg      /* Get the number of draws. */
1196b8e80941Smrg      if (indirect->indirect_draw_count) {
1197b8e80941Smrg         pipe_buffer_read(pipe, indirect->indirect_draw_count,
1198b8e80941Smrg                          indirect->indirect_draw_count_offset,
1199b8e80941Smrg                          4, &draw_count);
1200b8e80941Smrg      } else {
1201b8e80941Smrg         draw_count = indirect->draw_count;
1202848b8605Smrg      }
1203848b8605Smrg
1204b8e80941Smrg      if (!draw_count)
1205b8e80941Smrg         return;
1206b8e80941Smrg
1207b8e80941Smrg      unsigned data_size = (draw_count - 1) * indirect->stride +
1208b8e80941Smrg                           (new_info.index_size ? 20 : 16);
1209b8e80941Smrg      unsigned *data = malloc(data_size);
1210b8e80941Smrg      if (!data)
1211b8e80941Smrg         return; /* report an error? */
1212b8e80941Smrg
1213b8e80941Smrg      /* Read the used buffer range only once, because the read can be
1214b8e80941Smrg       * uncached.
1215b8e80941Smrg       */
1216b8e80941Smrg      pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
1217b8e80941Smrg                       data);
1218b8e80941Smrg
1219b8e80941Smrg      if (info->index_size) {
1220b8e80941Smrg         /* Indexed multidraw. */
1221b8e80941Smrg         unsigned index_bias0 = data[3];
1222b8e80941Smrg         bool index_bias_same = true;
1223b8e80941Smrg
1224b8e80941Smrg         /* If we invoke the translate path, we have to split the multidraw. */
1225b8e80941Smrg         if (incompatible_vb_mask ||
1226b8e80941Smrg             mgr->ve->incompatible_elem_mask) {
1227b8e80941Smrg            u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
1228b8e80941Smrg                                           indirect->stride, draw_count);
1229b8e80941Smrg            free(data);
1230b8e80941Smrg            return;
1231b8e80941Smrg         }
1232b8e80941Smrg
1233b8e80941Smrg         /* See if index_bias is the same for all draws. */
1234b8e80941Smrg         for (unsigned i = 1; i < draw_count; i++) {
1235b8e80941Smrg            if (data[i * indirect->stride / 4 + 3] != index_bias0) {
1236b8e80941Smrg               index_bias_same = false;
1237b8e80941Smrg               break;
1238b8e80941Smrg            }
1239b8e80941Smrg         }
1240b8e80941Smrg
1241b8e80941Smrg         /* Split the multidraw if index_bias is different. */
1242b8e80941Smrg         if (!index_bias_same) {
1243b8e80941Smrg            u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
1244b8e80941Smrg                                           indirect->stride, draw_count);
1245b8e80941Smrg            free(data);
1246b8e80941Smrg            return;
1247b8e80941Smrg         }
1248b8e80941Smrg
1249b8e80941Smrg         /* If we don't need to use the translate path and index_bias is
1250b8e80941Smrg          * the same, we can process the multidraw with the time complexity
1251b8e80941Smrg          * equal to 1 draw call (except for the index range computation).
1252b8e80941Smrg          * We only need to compute the index range covering all draw calls
1253b8e80941Smrg          * of the multidraw.
1254b8e80941Smrg          *
1255b8e80941Smrg          * The driver will not look at these values because indirect != NULL.
1256b8e80941Smrg          * These values determine the user buffer bounds to upload.
1257b8e80941Smrg          */
1258b8e80941Smrg         new_info.index_bias = index_bias0;
1259b8e80941Smrg         new_info.min_index = ~0u;
1260b8e80941Smrg         new_info.max_index = 0;
1261b8e80941Smrg         new_info.start_instance = ~0u;
1262b8e80941Smrg         unsigned end_instance = 0;
1263b8e80941Smrg
1264b8e80941Smrg         struct pipe_transfer *transfer = NULL;
1265b8e80941Smrg         const uint8_t *indices;
1266b8e80941Smrg
1267b8e80941Smrg         if (info->has_user_indices) {
1268b8e80941Smrg            indices = (uint8_t*)info->index.user;
1269b8e80941Smrg         } else {
1270b8e80941Smrg            indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
1271b8e80941Smrg                                                PIPE_TRANSFER_READ, &transfer);
1272b8e80941Smrg         }
1273b8e80941Smrg
1274b8e80941Smrg         for (unsigned i = 0; i < draw_count; i++) {
1275b8e80941Smrg            unsigned offset = i * indirect->stride / 4;
1276b8e80941Smrg            unsigned start = data[offset + 2];
1277b8e80941Smrg            unsigned count = data[offset + 0];
1278b8e80941Smrg            unsigned start_instance = data[offset + 4];
1279b8e80941Smrg            unsigned instance_count = data[offset + 1];
1280b8e80941Smrg
1281b8e80941Smrg            if (!count || !instance_count)
1282b8e80941Smrg               continue;
1283b8e80941Smrg
1284b8e80941Smrg            /* Update the ranges of instances. */
1285b8e80941Smrg            new_info.start_instance = MIN2(new_info.start_instance,
1286b8e80941Smrg                                           start_instance);
1287b8e80941Smrg            end_instance = MAX2(end_instance, start_instance + instance_count);
1288b8e80941Smrg
1289b8e80941Smrg            /* Update the index range. */
1290b8e80941Smrg            unsigned min, max;
1291b8e80941Smrg            new_info.count = count; /* only used by get_minmax_index */
1292b8e80941Smrg            u_vbuf_get_minmax_index_mapped(&new_info,
1293b8e80941Smrg                                           indices +
1294b8e80941Smrg                                           new_info.index_size * start,
1295b8e80941Smrg                                           &min, &max);
1296b8e80941Smrg
1297b8e80941Smrg            new_info.min_index = MIN2(new_info.min_index, min);
1298b8e80941Smrg            new_info.max_index = MAX2(new_info.max_index, max);
1299b8e80941Smrg         }
1300b8e80941Smrg         free(data);
1301b8e80941Smrg
1302b8e80941Smrg         if (transfer)
1303b8e80941Smrg            pipe_buffer_unmap(pipe, transfer);
1304b8e80941Smrg
1305b8e80941Smrg         /* Set the final instance count. */
1306b8e80941Smrg         new_info.instance_count = end_instance - new_info.start_instance;
1307b8e80941Smrg
1308b8e80941Smrg         if (new_info.start_instance == ~0u || !new_info.instance_count)
1309b8e80941Smrg            return;
1310b8e80941Smrg      } else {
1311b8e80941Smrg         /* Non-indexed multidraw.
1312b8e80941Smrg          *
1313b8e80941Smrg          * Keep the draw call indirect and compute minimums & maximums,
1314b8e80941Smrg          * which will determine the user buffer bounds to upload, but
1315b8e80941Smrg          * the driver will not look at these values because indirect != NULL.
1316b8e80941Smrg          *
1317b8e80941Smrg          * This efficiently processes the multidraw with the time complexity
1318b8e80941Smrg          * equal to 1 draw call.
1319b8e80941Smrg          */
1320b8e80941Smrg         new_info.start = ~0u;
1321b8e80941Smrg         new_info.start_instance = ~0u;
1322b8e80941Smrg         unsigned end_vertex = 0;
1323b8e80941Smrg         unsigned end_instance = 0;
1324b8e80941Smrg
1325b8e80941Smrg         for (unsigned i = 0; i < draw_count; i++) {
1326b8e80941Smrg            unsigned offset = i * indirect->stride / 4;
1327b8e80941Smrg            unsigned start = data[offset + 2];
1328b8e80941Smrg            unsigned count = data[offset + 0];
1329b8e80941Smrg            unsigned start_instance = data[offset + 3];
1330b8e80941Smrg            unsigned instance_count = data[offset + 1];
1331b8e80941Smrg
1332b8e80941Smrg            new_info.start = MIN2(new_info.start, start);
1333b8e80941Smrg            new_info.start_instance = MIN2(new_info.start_instance,
1334b8e80941Smrg                                           start_instance);
1335b8e80941Smrg
1336b8e80941Smrg            end_vertex = MAX2(end_vertex, start + count);
1337b8e80941Smrg            end_instance = MAX2(end_instance, start_instance + instance_count);
1338b8e80941Smrg         }
1339b8e80941Smrg         free(data);
1340b8e80941Smrg
1341b8e80941Smrg         /* Set the final counts. */
1342b8e80941Smrg         new_info.count = end_vertex - new_info.start;
1343b8e80941Smrg         new_info.instance_count = end_instance - new_info.start_instance;
1344b8e80941Smrg
1345b8e80941Smrg         if (new_info.start == ~0u || !new_info.count || !new_info.instance_count)
1346b8e80941Smrg            return;
1347b8e80941Smrg      }
1348848b8605Smrg   }
1349848b8605Smrg
1350b8e80941Smrg   if (new_info.index_size) {
1351848b8605Smrg      /* See if anything needs to be done for per-vertex attribs. */
1352848b8605Smrg      if (u_vbuf_need_minmax_index(mgr)) {
1353b8e80941Smrg         unsigned max_index;
1354848b8605Smrg
1355b8e80941Smrg         if (new_info.max_index != ~0u) {
1356848b8605Smrg            min_index = new_info.min_index;
1357848b8605Smrg            max_index = new_info.max_index;
1358848b8605Smrg         } else {
1359b8e80941Smrg            u_vbuf_get_minmax_index(mgr->pipe, &new_info,
1360b8e80941Smrg                                    &min_index, &max_index);
1361848b8605Smrg         }
1362848b8605Smrg
1363848b8605Smrg         assert(min_index <= max_index);
1364848b8605Smrg
1365848b8605Smrg         start_vertex = min_index + new_info.index_bias;
1366848b8605Smrg         num_vertices = max_index + 1 - min_index;
1367848b8605Smrg
1368848b8605Smrg         /* Primitive restart doesn't work when unrolling indices.
1369848b8605Smrg          * We would have to break this drawing operation into several ones. */
1370848b8605Smrg         /* Use some heuristic to see if unrolling indices improves
1371848b8605Smrg          * performance. */
1372b8e80941Smrg         if (!info->indirect &&
1373b8e80941Smrg             !new_info.primitive_restart &&
1374848b8605Smrg             num_vertices > new_info.count*2 &&
1375848b8605Smrg             num_vertices - new_info.count > 32 &&
1376848b8605Smrg             !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
1377848b8605Smrg            unroll_indices = TRUE;
1378848b8605Smrg            user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
1379848b8605Smrg                              mgr->ve->noninstance_vb_mask_any);
1380848b8605Smrg         }
1381848b8605Smrg      } else {
1382848b8605Smrg         /* Nothing to do for per-vertex attribs. */
1383848b8605Smrg         start_vertex = 0;
1384848b8605Smrg         num_vertices = 0;
1385848b8605Smrg         min_index = 0;
1386848b8605Smrg      }
1387848b8605Smrg   } else {
1388848b8605Smrg      start_vertex = new_info.start;
1389848b8605Smrg      num_vertices = new_info.count;
1390848b8605Smrg      min_index = 0;
1391848b8605Smrg   }
1392848b8605Smrg
1393848b8605Smrg   /* Translate vertices with non-native layouts or formats. */
1394848b8605Smrg   if (unroll_indices ||
1395848b8605Smrg       incompatible_vb_mask ||
1396848b8605Smrg       mgr->ve->incompatible_elem_mask) {
1397b8e80941Smrg      if (!u_vbuf_translate_begin(mgr, &new_info, start_vertex, num_vertices,
1398b8e80941Smrg                                  min_index, unroll_indices)) {
1399848b8605Smrg         debug_warn_once("u_vbuf_translate_begin() failed");
1400848b8605Smrg         return;
1401848b8605Smrg      }
1402848b8605Smrg
1403848b8605Smrg      if (unroll_indices) {
1404b8e80941Smrg         new_info.index_size = 0;
1405848b8605Smrg         new_info.index_bias = 0;
1406848b8605Smrg         new_info.min_index = 0;
1407848b8605Smrg         new_info.max_index = new_info.count - 1;
1408848b8605Smrg         new_info.start = 0;
1409848b8605Smrg      }
1410848b8605Smrg
1411848b8605Smrg      user_vb_mask &= ~(incompatible_vb_mask |
1412848b8605Smrg                        mgr->ve->incompatible_vb_mask_all);
1413848b8605Smrg   }
1414848b8605Smrg
1415848b8605Smrg   /* Upload user buffers. */
1416848b8605Smrg   if (user_vb_mask) {
1417848b8605Smrg      if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1418848b8605Smrg                                new_info.start_instance,
1419848b8605Smrg                                new_info.instance_count) != PIPE_OK) {
1420848b8605Smrg         debug_warn_once("u_vbuf_upload_buffers() failed");
1421848b8605Smrg         return;
1422848b8605Smrg      }
1423848b8605Smrg
1424848b8605Smrg      mgr->dirty_real_vb_mask |= user_vb_mask;
1425848b8605Smrg   }
1426848b8605Smrg
1427848b8605Smrg   /*
1428848b8605Smrg   if (unroll_indices) {
1429848b8605Smrg      printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1430848b8605Smrg             start_vertex, num_vertices);
1431848b8605Smrg      util_dump_draw_info(stdout, info);
1432848b8605Smrg      printf("\n");
1433848b8605Smrg   }
1434848b8605Smrg
1435848b8605Smrg   unsigned i;
1436848b8605Smrg   for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1437848b8605Smrg      printf("input %i: ", i);
1438848b8605Smrg      util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1439848b8605Smrg      printf("\n");
1440848b8605Smrg   }
1441848b8605Smrg   for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1442848b8605Smrg      printf("real %i: ", i);
1443848b8605Smrg      util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1444848b8605Smrg      printf("\n");
1445848b8605Smrg   }
1446848b8605Smrg   */
1447848b8605Smrg
1448b8e80941Smrg   u_upload_unmap(pipe->stream_uploader);
1449848b8605Smrg   u_vbuf_set_driver_vertex_buffers(mgr);
1450848b8605Smrg
1451848b8605Smrg   pipe->draw_vbo(pipe, &new_info);
1452848b8605Smrg
1453848b8605Smrg   if (mgr->using_translate) {
1454848b8605Smrg      u_vbuf_translate_end(mgr);
1455848b8605Smrg   }
1456848b8605Smrg}
1457848b8605Smrg
1458848b8605Smrgvoid u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
1459848b8605Smrg{
1460848b8605Smrg   assert(!mgr->ve_saved);
1461848b8605Smrg   mgr->ve_saved = mgr->ve;
1462848b8605Smrg}
1463848b8605Smrg
1464848b8605Smrgvoid u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
1465848b8605Smrg{
1466848b8605Smrg   if (mgr->ve != mgr->ve_saved) {
1467848b8605Smrg      struct pipe_context *pipe = mgr->pipe;
1468848b8605Smrg
1469848b8605Smrg      mgr->ve = mgr->ve_saved;
1470848b8605Smrg      pipe->bind_vertex_elements_state(pipe,
1471848b8605Smrg                                       mgr->ve ? mgr->ve->driver_cso : NULL);
1472848b8605Smrg   }
1473848b8605Smrg   mgr->ve_saved = NULL;
1474848b8605Smrg}
1475848b8605Smrg
1476b8e80941Smrgvoid u_vbuf_save_vertex_buffer0(struct u_vbuf *mgr)
1477848b8605Smrg{
1478b8e80941Smrg   pipe_vertex_buffer_reference(&mgr->vertex_buffer0_saved,
1479b8e80941Smrg                                &mgr->vertex_buffer[0]);
1480848b8605Smrg}
1481848b8605Smrg
1482b8e80941Smrgvoid u_vbuf_restore_vertex_buffer0(struct u_vbuf *mgr)
1483848b8605Smrg{
1484b8e80941Smrg   u_vbuf_set_vertex_buffers(mgr, 0, 1, &mgr->vertex_buffer0_saved);
1485b8e80941Smrg   pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved);
1486848b8605Smrg}
1487