1/**********************************************************
2 * Copyright 2008-2009 VMware, Inc.  All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26#include "pipe/p_compiler.h"
27#include "util/u_inlines.h"
28#include "pipe/p_defines.h"
29#include "util/u_helpers.h"
30#include "util/u_memory.h"
31#include "util/u_math.h"
32
33#include "svga_context.h"
34#include "svga_draw.h"
35#include "svga_draw_private.h"
36#include "svga_debug.h"
37#include "svga_screen.h"
38#include "svga_resource.h"
39#include "svga_resource_buffer.h"
40#include "svga_resource_texture.h"
41#include "svga_sampler_view.h"
42#include "svga_shader.h"
43#include "svga_surface.h"
44#include "svga_winsys.h"
45#include "svga_cmd.h"
46
47
48struct svga_hwtnl *
49svga_hwtnl_create(struct svga_context *svga)
50{
51   struct svga_hwtnl *hwtnl = CALLOC_STRUCT(svga_hwtnl);
52   if (!hwtnl)
53      goto fail;
54
55   hwtnl->svga = svga;
56
57   hwtnl->cmd.swc = svga->swc;
58
59   return hwtnl;
60
61fail:
62   return NULL;
63}
64
65
66void
67svga_hwtnl_destroy(struct svga_hwtnl *hwtnl)
68{
69   unsigned i, j;
70
71   for (i = 0; i < PIPE_PRIM_MAX; i++) {
72      for (j = 0; j < IDX_CACHE_MAX; j++) {
73         pipe_resource_reference(&hwtnl->index_cache[i][j].buffer, NULL);
74      }
75   }
76
77   for (i = 0; i < hwtnl->cmd.vbuf_count; i++)
78      pipe_vertex_buffer_unreference(&hwtnl->cmd.vbufs[i]);
79
80   for (i = 0; i < hwtnl->cmd.prim_count; i++)
81      pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
82
83   FREE(hwtnl);
84}
85
86
87void
88svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
89                         boolean flatshade, boolean flatshade_first)
90{
91   struct svga_screen *svgascreen = svga_screen(hwtnl->svga->pipe.screen);
92
93   /* User-specified PV */
94   hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
95
96   /* Device supported PV */
97   if (svgascreen->haveProvokingVertex) {
98      /* use the mode specified by the user */
99      hwtnl->hw_pv = hwtnl->api_pv;
100   }
101   else {
102      /* the device only support first provoking vertex */
103      hwtnl->hw_pv = PV_FIRST;
104   }
105}
106
107
108void
109svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode)
110{
111   hwtnl->api_fillmode = mode;
112}
113
114
115void
116svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
117                        unsigned count,
118                        const SVGA3dVertexDecl * decls,
119                        const unsigned *buffer_indexes,
120                        SVGA3dElementLayoutId layout_id)
121{
122   assert(hwtnl->cmd.prim_count == 0);
123   hwtnl->cmd.vdecl_count = count;
124   hwtnl->cmd.vdecl_layout_id = layout_id;
125   memcpy(hwtnl->cmd.vdecl, decls, count * sizeof(*decls));
126   memcpy(hwtnl->cmd.vdecl_buffer_index, buffer_indexes,
127          count * sizeof(unsigned));
128}
129
130
131/**
132 * Specify vertex buffers for hardware drawing.
133 */
134void
135svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
136                          unsigned count, struct pipe_vertex_buffer *buffers)
137{
138   struct pipe_vertex_buffer *dst = hwtnl->cmd.vbufs;
139   const struct pipe_vertex_buffer *src = buffers;
140   unsigned i;
141
142   for (i = 0; i < count; i++) {
143      pipe_vertex_buffer_reference(&dst[i], &src[i]);
144   }
145
146   /* release old buffer references */
147   for ( ; i < hwtnl->cmd.vbuf_count; i++) {
148      pipe_vertex_buffer_unreference(&dst[i]);
149      /* don't bother zeroing stride/offset fields */
150   }
151
152   hwtnl->cmd.vbuf_count = count;
153}
154
155
156/**
157 * Determine whether the specified buffer is referred in the primitive queue,
158 * for which no commands have been written yet.
159 */
160boolean
161svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
162                              struct pipe_resource *buffer)
163{
164   unsigned i;
165
166   if (svga_buffer_is_user_buffer(buffer)) {
167      return FALSE;
168   }
169
170   if (!hwtnl->cmd.prim_count) {
171      return FALSE;
172   }
173
174   for (i = 0; i < hwtnl->cmd.vbuf_count; ++i) {
175      if (hwtnl->cmd.vbufs[i].buffer.resource == buffer) {
176         return TRUE;
177      }
178   }
179
180   for (i = 0; i < hwtnl->cmd.prim_count; ++i) {
181      if (hwtnl->cmd.prim_ib[i] == buffer) {
182         return TRUE;
183      }
184   }
185
186   return FALSE;
187}
188
189
190static enum pipe_error
191draw_vgpu9(struct svga_hwtnl *hwtnl)
192{
193   struct svga_winsys_context *swc = hwtnl->cmd.swc;
194   struct svga_context *svga = hwtnl->svga;
195   enum pipe_error ret;
196   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
197   struct svga_winsys_surface *ib_handle[QSZ];
198   struct svga_winsys_surface *handle;
199   SVGA3dVertexDecl *vdecl;
200   SVGA3dPrimitiveRange *prim;
201   unsigned i;
202
203   /* Re-validate those sampler views with backing copy
204    * of texture whose original copy has been updated.
205    * This is done here at draw time because the texture binding might not
206    * have modified, hence validation is not triggered at state update time,
207    * and yet the texture might have been updated in another context, so
208    * we need to re-validate the sampler view in order to update the backing
209    * copy of the updated texture.
210    */
211   if (svga->state.hw_draw.num_backed_views) {
212      for (i = 0; i < svga->state.hw_draw.num_views; i++) {
213         struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
214         struct svga_texture *tex = svga_texture(view->texture);
215         struct svga_sampler_view *sv = view->v;
216         if (sv && tex && sv->handle != tex->handle && sv->age < tex->age)
217            svga_validate_sampler_view(svga, view->v);
218      }
219   }
220
221   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
222      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
223      handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer.resource,
224                                  PIPE_BIND_VERTEX_BUFFER);
225      if (!handle)
226         return PIPE_ERROR_OUT_OF_MEMORY;
227
228      vb_handle[i] = handle;
229   }
230
231   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
232      if (hwtnl->cmd.prim_ib[i]) {
233         handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i],
234                                     PIPE_BIND_INDEX_BUFFER);
235         if (!handle)
236            return PIPE_ERROR_OUT_OF_MEMORY;
237      }
238      else
239         handle = NULL;
240
241      ib_handle[i] = handle;
242   }
243
244   if (svga->rebind.flags.rendertargets) {
245      ret = svga_reemit_framebuffer_bindings(svga);
246      if (ret != PIPE_OK) {
247         return ret;
248      }
249   }
250
251   if (svga->rebind.flags.texture_samplers) {
252      ret = svga_reemit_tss_bindings(svga);
253      if (ret != PIPE_OK) {
254         return ret;
255      }
256   }
257
258   if (svga->rebind.flags.vs) {
259      ret = svga_reemit_vs_bindings(svga);
260      if (ret != PIPE_OK) {
261         return ret;
262      }
263   }
264
265   if (svga->rebind.flags.fs) {
266      ret = svga_reemit_fs_bindings(svga);
267      if (ret != PIPE_OK) {
268         return ret;
269      }
270   }
271
272   SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
273            svga->curr.framebuffer.cbufs[0] ?
274            svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
275            hwtnl->cmd.prim_count);
276
277   ret = SVGA3D_BeginDrawPrimitives(swc,
278                                    &vdecl,
279                                    hwtnl->cmd.vdecl_count,
280                                    &prim, hwtnl->cmd.prim_count);
281   if (ret != PIPE_OK)
282      return ret;
283
284   memcpy(vdecl,
285          hwtnl->cmd.vdecl,
286          hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
287
288   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
289      /* check for 4-byte alignment */
290      assert(vdecl[i].array.offset % 4 == 0);
291      assert(vdecl[i].array.stride % 4 == 0);
292
293      /* Given rangeHint is considered to be relative to indexBias, and
294       * indexBias varies per primitive, we cannot accurately supply an
295       * rangeHint when emitting more than one primitive per draw command.
296       */
297      if (hwtnl->cmd.prim_count == 1) {
298         vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
299         vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
300      }
301      else {
302         vdecl[i].rangeHint.first = 0;
303         vdecl[i].rangeHint.last = 0;
304      }
305
306      swc->surface_relocation(swc,
307                              &vdecl[i].array.surfaceId,
308                              NULL, vb_handle[i], SVGA_RELOC_READ);
309   }
310
311   memcpy(prim,
312          hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
313
314   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
315      swc->surface_relocation(swc,
316                              &prim[i].indexArray.surfaceId,
317                              NULL, ib_handle[i], SVGA_RELOC_READ);
318      pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
319   }
320
321   SVGA_FIFOCommitAll(swc);
322
323   hwtnl->cmd.prim_count = 0;
324
325   return PIPE_OK;
326}
327
328
329static SVGA3dSurfaceFormat
330xlate_index_format(unsigned indexWidth)
331{
332   if (indexWidth == 2) {
333      return SVGA3D_R16_UINT;
334   }
335   else if (indexWidth == 4) {
336      return SVGA3D_R32_UINT;
337   }
338   else {
339      assert(!"Bad indexWidth");
340      return SVGA3D_R32_UINT;
341   }
342}
343
344
345static enum pipe_error
346validate_sampler_resources(struct svga_context *svga)
347{
348   enum pipe_shader_type shader;
349
350   assert(svga_have_vgpu10(svga));
351
352   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) {
353      unsigned count = svga->curr.num_sampler_views[shader];
354      unsigned i;
355      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
356      enum pipe_error ret;
357
358      /*
359       * Reference bound sampler resources to ensure pending updates are
360       * noticed by the device.
361       */
362      for (i = 0; i < count; i++) {
363         struct svga_pipe_sampler_view *sv =
364            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
365
366         if (sv) {
367            if (sv->base.texture->target == PIPE_BUFFER) {
368               surfaces[i] = svga_buffer_handle(svga, sv->base.texture,
369                                                PIPE_BIND_SAMPLER_VIEW);
370            }
371            else {
372               surfaces[i] = svga_texture(sv->base.texture)->handle;
373            }
374         }
375         else {
376            surfaces[i] = NULL;
377         }
378      }
379
380      if (shader == PIPE_SHADER_FRAGMENT &&
381          svga->curr.rast->templ.poly_stipple_enable) {
382         const unsigned unit =
383            svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit;
384         struct svga_pipe_sampler_view *sv =
385            svga->polygon_stipple.sampler_view;
386
387         assert(sv);
388         surfaces[unit] = svga_texture(sv->base.texture)->handle;
389         count = MAX2(count, unit+1);
390      }
391
392      /* rebind the shader resources if needed */
393      if (svga->rebind.flags.texture_samplers) {
394         for (i = 0; i < count; i++) {
395            if (surfaces[i]) {
396               ret = svga->swc->resource_rebind(svga->swc,
397                                                surfaces[i],
398                                                NULL,
399                                                SVGA_RELOC_READ);
400               if (ret != PIPE_OK)
401                  return ret;
402            }
403         }
404      }
405   }
406   svga->rebind.flags.texture_samplers = FALSE;
407
408   return PIPE_OK;
409}
410
411
412static enum pipe_error
413validate_constant_buffers(struct svga_context *svga)
414{
415   enum pipe_shader_type shader;
416
417   assert(svga_have_vgpu10(svga));
418
419   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) {
420      enum pipe_error ret;
421      struct svga_buffer *buffer;
422
423      /* Rebind the default constant buffer if needed */
424      if (svga->rebind.flags.constbufs) {
425         buffer = svga_buffer(svga->state.hw_draw.constbuf[shader][0]);
426         if (buffer) {
427            ret = svga->swc->resource_rebind(svga->swc,
428                                             buffer->handle,
429                                             NULL,
430                                             SVGA_RELOC_READ);
431            if (ret != PIPE_OK)
432               return ret;
433         }
434      }
435
436      struct svga_winsys_surface *handle;
437      unsigned enabled_constbufs;
438
439      /*
440       * Reference other bound constant buffers to ensure pending updates are
441       * noticed by the device.
442       */
443      enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u;
444      while (enabled_constbufs) {
445         unsigned i = u_bit_scan(&enabled_constbufs);
446         buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer);
447
448         /* If the constant buffer has hw storage, get the buffer winsys handle.
449          * Rebind the resource if needed.
450          */
451         if (buffer && !buffer->use_swbuf)
452            handle = svga_buffer_handle(svga, &buffer->b,
453                                        PIPE_BIND_CONSTANT_BUFFER);
454         else
455            handle = svga->state.hw_draw.constbufoffsets[shader][i].handle;
456
457         if (svga->rebind.flags.constbufs && handle) {
458            ret = svga->swc->resource_rebind(svga->swc,
459                                             handle,
460                                             NULL,
461                                             SVGA_RELOC_READ);
462            if (ret != PIPE_OK)
463               return ret;
464         }
465      }
466   }
467   svga->rebind.flags.constbufs = FALSE;
468
469   return PIPE_OK;
470}
471
472
473/**
474 * Was the last command put into the command buffer a drawing command?
475 * We use this to determine if we can skip emitting buffer re-bind
476 * commands when we have a sequence of drawing commands that use the
477 * same vertex/index buffers with no intervening commands.
478 *
479 * The first drawing command will bind the vertex/index buffers.  If
480 * the immediately following command is also a drawing command using the
481 * same buffers, we shouldn't have to rebind them.
482 */
483static bool
484last_command_was_draw(const struct svga_context *svga)
485{
486   switch (SVGA3D_GetLastCommand(svga->swc)) {
487   case SVGA_3D_CMD_DX_DRAW:
488   case SVGA_3D_CMD_DX_DRAW_INDEXED:
489   case SVGA_3D_CMD_DX_DRAW_INSTANCED:
490   case SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED:
491   case SVGA_3D_CMD_DX_DRAW_AUTO:
492   case SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT:
493   case SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT:
494      return true;
495   default:
496      return false;
497   }
498}
499
500
501/**
502 * A helper function to compare vertex buffers.
503 * They are equal if the vertex buffer attributes and the vertex buffer
504 * resources are identical.
505 */
506static boolean
507vertex_buffers_equal(unsigned count,
508                     SVGA3dVertexBuffer *pVBufAttr1,
509                     struct pipe_resource **pVBuf1,
510                     SVGA3dVertexBuffer *pVBufAttr2,
511                     struct pipe_resource **pVBuf2)
512{
513   return (memcmp(pVBufAttr1, pVBufAttr2,
514                  count * sizeof(*pVBufAttr1)) == 0) &&
515          (memcmp(pVBuf1, pVBuf2, count * sizeof(*pVBuf1)) == 0);
516}
517
518
519/*
520 * Prepare the vertex buffers for a drawing command.
521 */
522static enum pipe_error
523validate_vertex_buffers(struct svga_hwtnl *hwtnl,
524                   const struct pipe_stream_output_target *so_vertex_count)
525{
526   struct svga_context *svga = hwtnl->svga;
527   struct pipe_resource *vbuffers[SVGA3D_INPUTREG_MAX];
528   struct svga_winsys_surface *vbuffer_handles[SVGA3D_INPUTREG_MAX];
529   struct svga_winsys_surface *so_vertex_count_handle;
530   const unsigned vbuf_count = so_vertex_count ? 1 : hwtnl->cmd.vbuf_count;
531   int last_vbuf = -1;
532   unsigned i;
533
534   assert(svga_have_vgpu10(svga));
535
536   /* Get handle for each referenced vertex buffer, unless we're using a
537    * stream-out buffer to specify the drawing information (DrawAuto).
538    */
539   if (so_vertex_count) {
540      i = 0;
541   }
542   else {
543      for (i = 0; i < vbuf_count; i++) {
544         struct svga_buffer *sbuf =
545            svga_buffer(hwtnl->cmd.vbufs[i].buffer.resource);
546
547         if (sbuf) {
548            vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b,
549                                                    PIPE_BIND_VERTEX_BUFFER);
550            assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
551            if (vbuffer_handles[i] == NULL)
552               return PIPE_ERROR_OUT_OF_MEMORY;
553            vbuffers[i] = &sbuf->b;
554            last_vbuf = i;
555         }
556         else {
557            vbuffers[i] = NULL;
558            vbuffer_handles[i] = NULL;
559         }
560      }
561   }
562
563   for (; i < svga->state.hw_draw.num_vbuffers; i++) {
564      vbuffers[i] = NULL;
565      vbuffer_handles[i] = NULL;
566   }
567
568   /* Get handle for each referenced vertex buffer */
569   for (i = 0; i < vbuf_count; i++) {
570      struct svga_buffer *sbuf =
571         svga_buffer(hwtnl->cmd.vbufs[i].buffer.resource);
572
573      if (sbuf) {
574         vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b,
575                                                 PIPE_BIND_VERTEX_BUFFER);
576         assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
577         if (vbuffer_handles[i] == NULL)
578            return PIPE_ERROR_OUT_OF_MEMORY;
579         vbuffers[i] = &sbuf->b;
580         last_vbuf = i;
581      }
582      else {
583         vbuffers[i] = NULL;
584         vbuffer_handles[i] = NULL;
585      }
586   }
587
588   for (; i < svga->state.hw_draw.num_vbuffers; i++) {
589      vbuffers[i] = NULL;
590      vbuffer_handles[i] = NULL;
591   }
592
593   /* setup vertex attribute input layout */
594   if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) {
595      enum pipe_error ret =
596         SVGA3D_vgpu10_SetInputLayout(svga->swc,
597                                      hwtnl->cmd.vdecl_layout_id);
598      if (ret != PIPE_OK)
599         return ret;
600
601      svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
602   }
603
604   /* Get handle for the stream out buffer */
605   if (so_vertex_count) {
606      so_vertex_count_handle = svga_buffer_handle(svga,
607                                                  so_vertex_count->buffer,
608                                                  (PIPE_BIND_VERTEX_BUFFER |
609                                                   PIPE_BIND_STREAM_OUTPUT));
610      if (!so_vertex_count_handle)
611         return PIPE_ERROR_OUT_OF_MEMORY;
612   }
613   else {
614      so_vertex_count_handle = NULL;
615   }
616
617   /* setup vertex buffers */
618   {
619      SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS];
620
621      if (so_vertex_count) {
622         /* Set IA slot0 input buffer to the SO buffer */
623         assert(vbuf_count == 1);
624         vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride;
625         vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset;
626         vbuffer_attrs[0].sid = 0;
627         vbuffers[0] = so_vertex_count->buffer;
628         vbuffer_handles[0] = so_vertex_count_handle;
629      }
630      else {
631         for (i = 0; i < vbuf_count; i++) {
632            vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride;
633            vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
634            vbuffer_attrs[i].sid = 0;
635         }
636      }
637
638      /* If any of the vertex buffer state has changed, issue
639       * the SetVertexBuffers command. Otherwise, we will just
640       * need to rebind the resources.
641       */
642      if (vbuf_count != svga->state.hw_draw.num_vbuffers ||
643          !vertex_buffers_equal(vbuf_count,
644                                vbuffer_attrs,
645                                vbuffers,
646                                svga->state.hw_draw.vbuffer_attrs,
647                                svga->state.hw_draw.vbuffers)) {
648
649         unsigned num_vbuffers;
650
651         /* get the max of the current bound vertex buffers count and
652          * the to-be-bound vertex buffers count, so as to unbind
653          * the unused vertex buffers.
654          */
655         num_vbuffers = MAX2(vbuf_count, svga->state.hw_draw.num_vbuffers);
656
657         /* Zero-out the old buffers we want to unbind (the number of loop
658          * iterations here is typically very small, and often zero.)
659          */
660         for (i = vbuf_count; i < num_vbuffers; i++) {
661            vbuffer_attrs[i].sid = 0;
662            vbuffer_attrs[i].stride = 0;
663            vbuffer_attrs[i].offset = 0;
664            vbuffer_handles[i] = NULL;
665         }
666
667         if (num_vbuffers > 0) {
668            SVGA3dVertexBuffer *pbufAttrs = vbuffer_attrs;
669            struct svga_winsys_surface **pbufHandles = vbuffer_handles;
670            unsigned numVBuf = 0;
671
672            /* Loop through the vertex buffer lists to only emit
673             * those vertex buffers that are not already in the
674             * corresponding entries in the device's vertex buffer list.
675             */
676            for (i = 0; i < num_vbuffers; i++) {
677               boolean emit =
678                  vertex_buffers_equal(1,
679                                       &vbuffer_attrs[i],
680                                       &vbuffers[i],
681                                       &svga->state.hw_draw.vbuffer_attrs[i],
682                                       &svga->state.hw_draw.vbuffers[i]);
683
684               if (!emit && i == num_vbuffers-1) {
685                  /* Include the last vertex buffer in the next emit
686                   * if it is different.
687                   */
688                  emit = TRUE;
689                  numVBuf++;
690                  i++;
691               }
692
693               if (emit) {
694                  /* numVBuf can only be 0 if the first vertex buffer
695                   * is the same as the one in the device's list.
696                   * In this case, there is nothing to send yet.
697                   */
698                  if (numVBuf) {
699                     enum pipe_error ret =
700                        SVGA3D_vgpu10_SetVertexBuffers(svga->swc,
701                                                       numVBuf,
702                                                       i - numVBuf,
703                                                       pbufAttrs, pbufHandles);
704                     if (ret != PIPE_OK)
705                        return ret;
706                  }
707                  pbufAttrs += (numVBuf + 1);
708                  pbufHandles += (numVBuf + 1);
709                  numVBuf = 0;
710               }
711               else
712                  numVBuf++;
713            }
714
715            /* save the number of vertex buffers sent to the device, not
716             * including trailing unbound vertex buffers.
717             */
718            svga->state.hw_draw.num_vbuffers = last_vbuf + 1;
719            memcpy(svga->state.hw_draw.vbuffer_attrs, vbuffer_attrs,
720                   num_vbuffers * sizeof(vbuffer_attrs[0]));
721            for (i = 0; i < num_vbuffers; i++) {
722               pipe_resource_reference(&svga->state.hw_draw.vbuffers[i],
723                                       vbuffers[i]);
724            }
725         }
726      }
727      else {
728         /* Even though we can avoid emitting the redundant SetVertexBuffers
729          * command, we still need to reference the vertex buffers surfaces.
730          */
731         for (i = 0; i < vbuf_count; i++) {
732            if (vbuffer_handles[i] && !last_command_was_draw(svga)) {
733               enum pipe_error ret =
734                  svga->swc->resource_rebind(svga->swc, vbuffer_handles[i],
735                                             NULL, SVGA_RELOC_READ);
736               if (ret != PIPE_OK)
737                  return ret;
738            }
739         }
740      }
741   }
742
743   return PIPE_OK;
744}
745
746
747/*
748 * Prepare the index buffer for a drawing command.
749 */
750static enum pipe_error
751validate_index_buffer(struct svga_hwtnl *hwtnl,
752                      const SVGA3dPrimitiveRange *range,
753                      struct pipe_resource *ib)
754{
755   struct svga_context *svga = hwtnl->svga;
756   struct svga_winsys_surface *ib_handle =
757      svga_buffer_handle(svga, ib, PIPE_BIND_INDEX_BUFFER);
758
759   if (!ib_handle)
760      return PIPE_ERROR_OUT_OF_MEMORY;
761
762   struct svga_buffer *sbuf = svga_buffer(ib);
763   assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
764   (void) sbuf; /* silence unused var warning */
765
766   SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
767
768   if (ib != svga->state.hw_draw.ib ||
769       indexFormat != svga->state.hw_draw.ib_format ||
770       range->indexArray.offset != svga->state.hw_draw.ib_offset) {
771
772      assert(indexFormat != SVGA3D_FORMAT_INVALID);
773      enum pipe_error ret =
774         SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
775                                      indexFormat,
776                                      range->indexArray.offset);
777      if (ret != PIPE_OK)
778         return ret;
779
780      pipe_resource_reference(&svga->state.hw_draw.ib, ib);
781      svga->state.hw_draw.ib_format = indexFormat;
782      svga->state.hw_draw.ib_offset = range->indexArray.offset;
783   }
784   else {
785      /* Even though we can avoid emitting the redundant SetIndexBuffer
786       * command, we still need to reference the index buffer surface.
787       */
788      if (!last_command_was_draw(svga)) {
789         enum pipe_error ret = svga->swc->resource_rebind(svga->swc,
790                                                          ib_handle,
791                                                          NULL,
792                                                          SVGA_RELOC_READ);
793         if (ret != PIPE_OK)
794            return ret;
795      }
796   }
797
798   return PIPE_OK;
799}
800
801
802static enum pipe_error
803draw_vgpu10(struct svga_hwtnl *hwtnl,
804            const SVGA3dPrimitiveRange *range,
805            unsigned vcount,
806            unsigned min_index, unsigned max_index,
807            struct pipe_resource *ib,
808            unsigned start_instance, unsigned instance_count,
809            const struct pipe_draw_indirect_info *indirect,
810            const struct pipe_stream_output_target *so_vertex_count)
811{
812   struct svga_context *svga = hwtnl->svga;
813   struct svga_winsys_surface *indirect_handle;
814   enum pipe_error ret;
815
816   assert(svga_have_vgpu10(svga));
817   assert(hwtnl->cmd.prim_count == 0);
818
819   /* We need to reemit all the current resource bindings along with the Draw
820    * command to be sure that the referenced resources are available for the
821    * Draw command, just in case the surfaces associated with the resources
822    * are paged out.
823    */
824   if (svga->rebind.val) {
825      ret = svga_rebind_framebuffer_bindings(svga);
826      if (ret != PIPE_OK)
827         return ret;
828
829      ret = svga_rebind_shaders(svga);
830      if (ret != PIPE_OK)
831         return ret;
832
833      /* Rebind stream output targets */
834      ret = svga_rebind_stream_output_targets(svga);
835      if (ret != PIPE_OK)
836         return ret;
837
838      /* No need to explicitly rebind index buffer and vertex buffers here.
839       * Even if the same index buffer or vertex buffers are referenced for this
840       * draw and we skip emitting the redundant set command, we will still
841       * reference the associated resources.
842       */
843   }
844
845   ret = validate_sampler_resources(svga);
846   if (ret != PIPE_OK)
847      return ret;
848
849   ret = validate_constant_buffers(svga);
850   if (ret != PIPE_OK)
851      return ret;
852
853   ret = validate_vertex_buffers(hwtnl, so_vertex_count);
854   if (ret != PIPE_OK)
855      return ret;
856
857   if (ib) {
858      ret = validate_index_buffer(hwtnl, range, ib);
859      if (ret != PIPE_OK)
860         return ret;
861   }
862
863   if (indirect) {
864      indirect_handle = svga_buffer_handle(svga, indirect->buffer,
865                                           PIPE_BIND_COMMAND_ARGS_BUFFER);
866      if (!indirect_handle)
867         return PIPE_ERROR_OUT_OF_MEMORY;
868   }
869   else {
870      indirect_handle = NULL;
871   }
872
873   /* Set primitive type (line, tri, etc) */
874   if (svga->state.hw_draw.topology != range->primType) {
875      ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType);
876      if (ret != PIPE_OK)
877         return ret;
878
879      svga->state.hw_draw.topology = range->primType;
880   }
881
882   if (ib) {
883      /* indexed drawing */
884      if (indirect) {
885         ret = SVGA3D_sm5_DrawIndexedInstancedIndirect(svga->swc,
886                                                       indirect_handle,
887                                                       indirect->offset);
888      }
889      else if (instance_count > 1) {
890         ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
891                                                  vcount,
892                                                  instance_count,
893                                                  0, /* startIndexLocation */
894                                                  range->indexBias,
895                                                  start_instance);
896      }
897      else {
898         /* non-instanced drawing */
899         ret = SVGA3D_vgpu10_DrawIndexed(svga->swc,
900                                         vcount,
901                                         0,      /* startIndexLocation */
902                                         range->indexBias);
903      }
904      if (ret != PIPE_OK) {
905         return ret;
906      }
907   }
908   else {
909      /* non-indexed drawing */
910      if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID ||
911          svga->state.hw_draw.ib != NULL) {
912         /* Unbind previously bound index buffer */
913         ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL,
914                                            SVGA3D_FORMAT_INVALID, 0);
915         if (ret != PIPE_OK)
916            return ret;
917         pipe_resource_reference(&svga->state.hw_draw.ib, NULL);
918         svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID;
919      }
920
921      assert(svga->state.hw_draw.ib == NULL);
922
923      if (so_vertex_count) {
924         /* Stream-output drawing */
925         ret = SVGA3D_vgpu10_DrawAuto(svga->swc);
926      }
927      else if (indirect) {
928         ret = SVGA3D_sm5_DrawInstancedIndirect(svga->swc,
929                                                indirect_handle,
930                                                indirect->offset);
931      }
932      else if (instance_count > 1) {
933         ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
934                                           vcount,
935                                           instance_count,
936                                           range->indexBias,
937                                           start_instance);
938      }
939      else {
940         /* non-instanced */
941         ret = SVGA3D_vgpu10_Draw(svga->swc,
942                                  vcount,
943                                  range->indexBias);
944      }
945      if (ret != PIPE_OK) {
946         return ret;
947      }
948   }
949
950   hwtnl->cmd.prim_count = 0;
951
952   return PIPE_OK;
953}
954
955
956
957/**
958 * Emit any pending drawing commands to the command buffer.
959 * When we receive VGPU9 drawing commands we accumulate them and don't
960 * immediately emit them into the command buffer.
961 * This function needs to be called before we change state that could
962 * effect those pending draws.
963 */
964enum pipe_error
965svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
966{
967   enum pipe_error ret = PIPE_OK;
968
969   SVGA_STATS_TIME_PUSH(svga_sws(hwtnl->svga), SVGA_STATS_TIME_HWTNLFLUSH);
970
971   if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) {
972      /* we only queue up primitive for VGPU9 */
973      ret = draw_vgpu9(hwtnl);
974   }
975
976   SVGA_STATS_TIME_POP(svga_screen(hwtnl->svga->pipe.screen)->sws);
977   return ret;
978}
979
980
981void
982svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias)
983{
984   hwtnl->index_bias = index_bias;
985}
986
987
988
989/***********************************************************************
990 * Internal functions:
991 */
992
993/**
994 * For debugging only.
995 */
996static void
997check_draw_params(struct svga_hwtnl *hwtnl,
998                  const SVGA3dPrimitiveRange *range,
999                  unsigned min_index, unsigned max_index,
1000                  struct pipe_resource *ib)
1001{
1002   unsigned i;
1003
1004   assert(!svga_have_vgpu10(hwtnl->svga));
1005
1006   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
1007      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
1008      const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j];
1009      unsigned size = vb->buffer.resource ? vb->buffer.resource->width0 : 0;
1010      unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
1011      unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
1012      int index_bias = (int) range->indexBias + hwtnl->index_bias;
1013      unsigned width;
1014
1015      if (size == 0)
1016         continue;
1017
1018      assert(vb);
1019      assert(size);
1020      assert(offset < size);
1021      assert(min_index <= max_index);
1022      (void) width;
1023      (void) stride;
1024      (void) offset;
1025      (void) size;
1026
1027      switch (hwtnl->cmd.vdecl[i].identity.type) {
1028      case SVGA3D_DECLTYPE_FLOAT1:
1029         width = 4;
1030         break;
1031      case SVGA3D_DECLTYPE_FLOAT2:
1032         width = 4 * 2;
1033         break;
1034      case SVGA3D_DECLTYPE_FLOAT3:
1035         width = 4 * 3;
1036         break;
1037      case SVGA3D_DECLTYPE_FLOAT4:
1038         width = 4 * 4;
1039         break;
1040      case SVGA3D_DECLTYPE_D3DCOLOR:
1041         width = 4;
1042         break;
1043      case SVGA3D_DECLTYPE_UBYTE4:
1044         width = 1 * 4;
1045         break;
1046      case SVGA3D_DECLTYPE_SHORT2:
1047         width = 2 * 2;
1048         break;
1049      case SVGA3D_DECLTYPE_SHORT4:
1050         width = 2 * 4;
1051         break;
1052      case SVGA3D_DECLTYPE_UBYTE4N:
1053         width = 1 * 4;
1054         break;
1055      case SVGA3D_DECLTYPE_SHORT2N:
1056         width = 2 * 2;
1057         break;
1058      case SVGA3D_DECLTYPE_SHORT4N:
1059         width = 2 * 4;
1060         break;
1061      case SVGA3D_DECLTYPE_USHORT2N:
1062         width = 2 * 2;
1063         break;
1064      case SVGA3D_DECLTYPE_USHORT4N:
1065         width = 2 * 4;
1066         break;
1067      case SVGA3D_DECLTYPE_UDEC3:
1068         width = 4;
1069         break;
1070      case SVGA3D_DECLTYPE_DEC3N:
1071         width = 4;
1072         break;
1073      case SVGA3D_DECLTYPE_FLOAT16_2:
1074         width = 2 * 2;
1075         break;
1076      case SVGA3D_DECLTYPE_FLOAT16_4:
1077         width = 2 * 4;
1078         break;
1079      default:
1080         assert(0);
1081         width = 0;
1082         break;
1083      }
1084
1085      if (index_bias >= 0) {
1086         assert(offset + index_bias * stride + width <= size);
1087      }
1088
1089      /*
1090       * min_index/max_index are merely conservative guesses, so we can't
1091       * make buffer overflow detection based on their values.
1092       */
1093   }
1094
1095   assert(range->indexWidth == range->indexArray.stride);
1096
1097   if (ib) {
1098      ASSERTED unsigned size = ib->width0;
1099      ASSERTED unsigned offset = range->indexArray.offset;
1100      ASSERTED unsigned stride = range->indexArray.stride;
1101      ASSERTED unsigned count;
1102
1103      assert(size);
1104      assert(offset < size);
1105      assert(stride);
1106
1107      switch (range->primType) {
1108      case SVGA3D_PRIMITIVE_POINTLIST:
1109         count = range->primitiveCount;
1110         break;
1111      case SVGA3D_PRIMITIVE_LINELIST:
1112         count = range->primitiveCount * 2;
1113         break;
1114      case SVGA3D_PRIMITIVE_LINESTRIP:
1115         count = range->primitiveCount + 1;
1116         break;
1117      case SVGA3D_PRIMITIVE_TRIANGLELIST:
1118         count = range->primitiveCount * 3;
1119         break;
1120      case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
1121         count = range->primitiveCount + 2;
1122         break;
1123      case SVGA3D_PRIMITIVE_TRIANGLEFAN:
1124         count = range->primitiveCount + 2;
1125         break;
1126      default:
1127         assert(0);
1128         count = 0;
1129         break;
1130      }
1131
1132      assert(offset + count * stride <= size);
1133   }
1134}
1135
1136
1137/**
1138 * All drawing filters down into this function, either directly
1139 * on the hardware path or after doing software vertex processing.
1140 * \param indirect  if non-null, get the vertex count, first vertex, etc.
1141 *                  from a buffer.
1142 * \param so_vertex_count  if non-null, get the vertex count from a
1143 *                         stream-output target.
1144 */
1145enum pipe_error
1146svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
1147                const SVGA3dPrimitiveRange *range,
1148                unsigned vcount,
1149                unsigned min_index, unsigned max_index,
1150                struct pipe_resource *ib,
1151                unsigned start_instance, unsigned instance_count,
1152                const struct pipe_draw_indirect_info *indirect,
1153                const struct pipe_stream_output_target *so_vertex_count)
1154{
1155   enum pipe_error ret = PIPE_OK;
1156
1157   SVGA_STATS_TIME_PUSH(svga_sws(hwtnl->svga), SVGA_STATS_TIME_HWTNLPRIM);
1158
1159   if (svga_have_vgpu10(hwtnl->svga)) {
1160      /* draw immediately */
1161      SVGA_RETRY(hwtnl->svga, draw_vgpu10(hwtnl, range, vcount, min_index,
1162                                          max_index, ib, start_instance,
1163                                          instance_count, indirect,
1164                                          so_vertex_count));
1165   }
1166   else {
1167      /* batch up drawing commands */
1168      assert(indirect == NULL);
1169#ifdef DEBUG
1170      check_draw_params(hwtnl, range, min_index, max_index, ib);
1171      assert(start_instance == 0);
1172      assert(instance_count <= 1);
1173#else
1174      (void) check_draw_params;
1175#endif
1176
1177      if (hwtnl->cmd.prim_count + 1 >= QSZ) {
1178         ret = svga_hwtnl_flush(hwtnl);
1179         if (ret != PIPE_OK)
1180            goto done;
1181      }
1182
1183      /* min/max indices are relative to bias */
1184      hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
1185      hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
1186
1187      hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
1188      hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
1189
1190      pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
1191      hwtnl->cmd.prim_count++;
1192   }
1193
1194done:
1195   SVGA_STATS_TIME_POP(svga_screen(hwtnl->svga->pipe.screen)->sws);
1196   return ret;
1197}
1198
1199
1200/**
1201 * Return TRUE if there are pending primitives.
1202 */
1203boolean
1204svga_hwtnl_has_pending_prim(struct svga_hwtnl *hwtnl)
1205{
1206   return hwtnl->cmd.prim_count > 0;
1207}
1208